af_vsock.c 65 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * VMware vSockets Driver
  4. *
  5. * Copyright (C) 2007-2013 VMware, Inc. All rights reserved.
  6. */
  7. /* Implementation notes:
  8. *
  9. * - There are two kinds of sockets: those created by user action (such as
  10. * calling socket(2)) and those created by incoming connection request packets.
  11. *
  12. * - There are two "global" tables, one for bound sockets (sockets that have
  13. * specified an address that they are responsible for) and one for connected
  14. * sockets (sockets that have established a connection with another socket).
  15. * These tables are "global" in that all sockets on the system are placed
  16. * within them. - Note, though, that the bound table contains an extra entry
  17. * for a list of unbound sockets and SOCK_DGRAM sockets will always remain in
  18. * that list. The bound table is used solely for lookup of sockets when packets
  19. * are received and that's not necessary for SOCK_DGRAM sockets since we create
  20. * a datagram handle for each and need not perform a lookup. Keeping SOCK_DGRAM
  21. * sockets out of the bound hash buckets will reduce the chance of collisions
  22. * when looking for SOCK_STREAM sockets and prevents us from having to check the
  23. * socket type in the hash table lookups.
  24. *
  25. * - Sockets created by user action will either be "client" sockets that
  26. * initiate a connection or "server" sockets that listen for connections; we do
  27. * not support simultaneous connects (two "client" sockets connecting).
  28. *
  29. * - "Server" sockets are referred to as listener sockets throughout this
  30. * implementation because they are in the TCP_LISTEN state. When a
  31. * connection request is received (the second kind of socket mentioned above),
  32. * we create a new socket and refer to it as a pending socket. These pending
  33. * sockets are placed on the pending connection list of the listener socket.
  34. * When future packets are received for the address the listener socket is
  35. * bound to, we check if the source of the packet is from one that has an
  36. * existing pending connection. If it does, we process the packet for the
  37. * pending socket. When that socket reaches the connected state, it is removed
  38. * from the listener socket's pending list and enqueued in the listener
  39. * socket's accept queue. Callers of accept(2) will accept connected sockets
  40. * from the listener socket's accept queue. If the socket cannot be accepted
  41. * for some reason then it is marked rejected. Once the connection is
  42. * accepted, it is owned by the user process and the responsibility for cleanup
  43. * falls with that user process.
  44. *
  45. * - It is possible that these pending sockets will never reach the connected
  46. * state; in fact, we may never receive another packet after the connection
  47. * request. Because of this, we must schedule a cleanup function to run in the
  48. * future, after some amount of time passes where a connection should have been
  49. * established. This function ensures that the socket is off all lists so it
  50. * cannot be retrieved, then drops all references to the socket so it is cleaned
  51. * up (sock_put() -> sk_free() -> our sk_destruct implementation). Note this
  52. * function will also cleanup rejected sockets, those that reach the connected
  53. * state but leave it before they have been accepted.
  54. *
  55. * - Lock ordering for pending or accept queue sockets is:
  56. *
  57. * lock_sock(listener);
  58. * lock_sock_nested(pending, SINGLE_DEPTH_NESTING);
  59. *
  60. * Using explicit nested locking keeps lockdep happy since normally only one
  61. * lock of a given class may be taken at a time.
  62. *
  63. * - Sockets created by user action will be cleaned up when the user process
  64. * calls close(2), causing our release implementation to be called. Our release
  65. * implementation will perform some cleanup then drop the last reference so our
  66. * sk_destruct implementation is invoked. Our sk_destruct implementation will
  67. * perform additional cleanup that's common for both types of sockets.
  68. *
  69. * - A socket's reference count is what ensures that the structure won't be
  70. * freed. Each entry in a list (such as the "global" bound and connected tables
  71. * and the listener socket's pending list and connected queue) ensures a
  72. * reference. When we defer work until process context and pass a socket as our
  73. * argument, we must ensure the reference count is increased to ensure the
  74. * socket isn't freed before the function is run; the deferred function will
  75. * then drop the reference.
  76. *
  77. * - sk->sk_state uses the TCP state constants because they are widely used by
  78. * other address families and exposed to userspace tools like ss(8):
  79. *
  80. * TCP_CLOSE - unconnected
  81. * TCP_SYN_SENT - connecting
  82. * TCP_ESTABLISHED - connected
  83. * TCP_CLOSING - disconnecting
  84. * TCP_LISTEN - listening
  85. */
  86. #include <linux/compat.h>
  87. #include <linux/types.h>
  88. #include <linux/bitops.h>
  89. #include <linux/cred.h>
  90. #include <linux/errqueue.h>
  91. #include <linux/init.h>
  92. #include <linux/io.h>
  93. #include <linux/kernel.h>
  94. #include <linux/sched/signal.h>
  95. #include <linux/kmod.h>
  96. #include <linux/list.h>
  97. #include <linux/miscdevice.h>
  98. #include <linux/module.h>
  99. #include <linux/mutex.h>
  100. #include <linux/net.h>
  101. #include <linux/poll.h>
  102. #include <linux/random.h>
  103. #include <linux/skbuff.h>
  104. #include <linux/smp.h>
  105. #include <linux/socket.h>
  106. #include <linux/stddef.h>
  107. #include <linux/unistd.h>
  108. #include <linux/wait.h>
  109. #include <linux/workqueue.h>
  110. #include <net/sock.h>
  111. #include <net/af_vsock.h>
  112. #include <uapi/linux/vm_sockets.h>
  113. #include <uapi/asm-generic/ioctls.h>
  114. static int __vsock_bind(struct sock *sk, struct sockaddr_vm *addr);
  115. static void vsock_sk_destruct(struct sock *sk);
  116. static int vsock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb);
  117. static void vsock_close(struct sock *sk, long timeout);
  118. /* Protocol family. */
  119. struct proto vsock_proto = {
  120. .name = "AF_VSOCK",
  121. .owner = THIS_MODULE,
  122. .obj_size = sizeof(struct vsock_sock),
  123. .close = vsock_close,
  124. #ifdef CONFIG_BPF_SYSCALL
  125. .psock_update_sk_prot = vsock_bpf_update_proto,
  126. #endif
  127. };
  128. /* The default peer timeout indicates how long we will wait for a peer response
  129. * to a control message.
  130. */
  131. #define VSOCK_DEFAULT_CONNECT_TIMEOUT (2 * HZ)
  132. #define VSOCK_DEFAULT_BUFFER_SIZE (1024 * 256)
  133. #define VSOCK_DEFAULT_BUFFER_MAX_SIZE (1024 * 256)
  134. #define VSOCK_DEFAULT_BUFFER_MIN_SIZE 128
  135. /* Transport used for host->guest communication */
  136. static const struct vsock_transport *transport_h2g;
  137. /* Transport used for guest->host communication */
  138. static const struct vsock_transport *transport_g2h;
  139. /* Transport used for DGRAM communication */
  140. static const struct vsock_transport *transport_dgram;
  141. /* Transport used for local communication */
  142. static const struct vsock_transport *transport_local;
  143. static DEFINE_MUTEX(vsock_register_mutex);
  144. /**** UTILS ****/
  145. /* Each bound VSocket is stored in the bind hash table and each connected
  146. * VSocket is stored in the connected hash table.
  147. *
  148. * Unbound sockets are all put on the same list attached to the end of the hash
  149. * table (vsock_unbound_sockets). Bound sockets are added to the hash table in
  150. * the bucket that their local address hashes to (vsock_bound_sockets(addr)
  151. * represents the list that addr hashes to).
  152. *
  153. * Specifically, we initialize the vsock_bind_table array to a size of
  154. * VSOCK_HASH_SIZE + 1 so that vsock_bind_table[0] through
  155. * vsock_bind_table[VSOCK_HASH_SIZE - 1] are for bound sockets and
  156. * vsock_bind_table[VSOCK_HASH_SIZE] is for unbound sockets. The hash function
  157. * mods with VSOCK_HASH_SIZE to ensure this.
  158. */
  159. #define MAX_PORT_RETRIES 24
  160. #define VSOCK_HASH(addr) ((addr)->svm_port % VSOCK_HASH_SIZE)
  161. #define vsock_bound_sockets(addr) (&vsock_bind_table[VSOCK_HASH(addr)])
  162. #define vsock_unbound_sockets (&vsock_bind_table[VSOCK_HASH_SIZE])
  163. /* XXX This can probably be implemented in a better way. */
  164. #define VSOCK_CONN_HASH(src, dst) \
  165. (((src)->svm_cid ^ (dst)->svm_port) % VSOCK_HASH_SIZE)
  166. #define vsock_connected_sockets(src, dst) \
  167. (&vsock_connected_table[VSOCK_CONN_HASH(src, dst)])
  168. #define vsock_connected_sockets_vsk(vsk) \
  169. vsock_connected_sockets(&(vsk)->remote_addr, &(vsk)->local_addr)
  170. struct list_head vsock_bind_table[VSOCK_HASH_SIZE + 1];
  171. EXPORT_SYMBOL_GPL(vsock_bind_table);
  172. struct list_head vsock_connected_table[VSOCK_HASH_SIZE];
  173. EXPORT_SYMBOL_GPL(vsock_connected_table);
  174. DEFINE_SPINLOCK(vsock_table_lock);
  175. EXPORT_SYMBOL_GPL(vsock_table_lock);
  176. /* Autobind this socket to the local address if necessary. */
  177. static int vsock_auto_bind(struct vsock_sock *vsk)
  178. {
  179. struct sock *sk = sk_vsock(vsk);
  180. struct sockaddr_vm local_addr;
  181. if (vsock_addr_bound(&vsk->local_addr))
  182. return 0;
  183. vsock_addr_init(&local_addr, VMADDR_CID_ANY, VMADDR_PORT_ANY);
  184. return __vsock_bind(sk, &local_addr);
  185. }
  186. static void vsock_init_tables(void)
  187. {
  188. int i;
  189. for (i = 0; i < ARRAY_SIZE(vsock_bind_table); i++)
  190. INIT_LIST_HEAD(&vsock_bind_table[i]);
  191. for (i = 0; i < ARRAY_SIZE(vsock_connected_table); i++)
  192. INIT_LIST_HEAD(&vsock_connected_table[i]);
  193. }
  194. static void __vsock_insert_bound(struct list_head *list,
  195. struct vsock_sock *vsk)
  196. {
  197. sock_hold(&vsk->sk);
  198. list_add(&vsk->bound_table, list);
  199. }
  200. static void __vsock_insert_connected(struct list_head *list,
  201. struct vsock_sock *vsk)
  202. {
  203. sock_hold(&vsk->sk);
  204. list_add(&vsk->connected_table, list);
  205. }
  206. static void __vsock_remove_bound(struct vsock_sock *vsk)
  207. {
  208. list_del_init(&vsk->bound_table);
  209. sock_put(&vsk->sk);
  210. }
  211. static void __vsock_remove_connected(struct vsock_sock *vsk)
  212. {
  213. list_del_init(&vsk->connected_table);
  214. sock_put(&vsk->sk);
  215. }
  216. static struct sock *__vsock_find_bound_socket(struct sockaddr_vm *addr)
  217. {
  218. struct vsock_sock *vsk;
  219. list_for_each_entry(vsk, vsock_bound_sockets(addr), bound_table) {
  220. if (vsock_addr_equals_addr(addr, &vsk->local_addr))
  221. return sk_vsock(vsk);
  222. if (addr->svm_port == vsk->local_addr.svm_port &&
  223. (vsk->local_addr.svm_cid == VMADDR_CID_ANY ||
  224. addr->svm_cid == VMADDR_CID_ANY))
  225. return sk_vsock(vsk);
  226. }
  227. return NULL;
  228. }
  229. static struct sock *__vsock_find_connected_socket(struct sockaddr_vm *src,
  230. struct sockaddr_vm *dst)
  231. {
  232. struct vsock_sock *vsk;
  233. list_for_each_entry(vsk, vsock_connected_sockets(src, dst),
  234. connected_table) {
  235. if (vsock_addr_equals_addr(src, &vsk->remote_addr) &&
  236. dst->svm_port == vsk->local_addr.svm_port) {
  237. return sk_vsock(vsk);
  238. }
  239. }
  240. return NULL;
  241. }
  242. static void vsock_insert_unbound(struct vsock_sock *vsk)
  243. {
  244. spin_lock_bh(&vsock_table_lock);
  245. __vsock_insert_bound(vsock_unbound_sockets, vsk);
  246. spin_unlock_bh(&vsock_table_lock);
  247. }
  248. void vsock_insert_connected(struct vsock_sock *vsk)
  249. {
  250. struct list_head *list = vsock_connected_sockets(
  251. &vsk->remote_addr, &vsk->local_addr);
  252. spin_lock_bh(&vsock_table_lock);
  253. __vsock_insert_connected(list, vsk);
  254. spin_unlock_bh(&vsock_table_lock);
  255. }
  256. EXPORT_SYMBOL_GPL(vsock_insert_connected);
  257. void vsock_remove_bound(struct vsock_sock *vsk)
  258. {
  259. spin_lock_bh(&vsock_table_lock);
  260. if (__vsock_in_bound_table(vsk))
  261. __vsock_remove_bound(vsk);
  262. spin_unlock_bh(&vsock_table_lock);
  263. }
  264. EXPORT_SYMBOL_GPL(vsock_remove_bound);
  265. void vsock_remove_connected(struct vsock_sock *vsk)
  266. {
  267. spin_lock_bh(&vsock_table_lock);
  268. if (__vsock_in_connected_table(vsk))
  269. __vsock_remove_connected(vsk);
  270. spin_unlock_bh(&vsock_table_lock);
  271. }
  272. EXPORT_SYMBOL_GPL(vsock_remove_connected);
  273. struct sock *vsock_find_bound_socket(struct sockaddr_vm *addr)
  274. {
  275. struct sock *sk;
  276. spin_lock_bh(&vsock_table_lock);
  277. sk = __vsock_find_bound_socket(addr);
  278. if (sk)
  279. sock_hold(sk);
  280. spin_unlock_bh(&vsock_table_lock);
  281. return sk;
  282. }
  283. EXPORT_SYMBOL_GPL(vsock_find_bound_socket);
  284. struct sock *vsock_find_connected_socket(struct sockaddr_vm *src,
  285. struct sockaddr_vm *dst)
  286. {
  287. struct sock *sk;
  288. spin_lock_bh(&vsock_table_lock);
  289. sk = __vsock_find_connected_socket(src, dst);
  290. if (sk)
  291. sock_hold(sk);
  292. spin_unlock_bh(&vsock_table_lock);
  293. return sk;
  294. }
  295. EXPORT_SYMBOL_GPL(vsock_find_connected_socket);
  296. void vsock_remove_sock(struct vsock_sock *vsk)
  297. {
  298. /* Transport reassignment must not remove the binding. */
  299. if (sock_flag(sk_vsock(vsk), SOCK_DEAD))
  300. vsock_remove_bound(vsk);
  301. vsock_remove_connected(vsk);
  302. }
  303. EXPORT_SYMBOL_GPL(vsock_remove_sock);
  304. void vsock_for_each_connected_socket(struct vsock_transport *transport,
  305. void (*fn)(struct sock *sk))
  306. {
  307. int i;
  308. spin_lock_bh(&vsock_table_lock);
  309. for (i = 0; i < ARRAY_SIZE(vsock_connected_table); i++) {
  310. struct vsock_sock *vsk;
  311. list_for_each_entry(vsk, &vsock_connected_table[i],
  312. connected_table) {
  313. if (vsk->transport != transport)
  314. continue;
  315. fn(sk_vsock(vsk));
  316. }
  317. }
  318. spin_unlock_bh(&vsock_table_lock);
  319. }
  320. EXPORT_SYMBOL_GPL(vsock_for_each_connected_socket);
  321. void vsock_add_pending(struct sock *listener, struct sock *pending)
  322. {
  323. struct vsock_sock *vlistener;
  324. struct vsock_sock *vpending;
  325. vlistener = vsock_sk(listener);
  326. vpending = vsock_sk(pending);
  327. sock_hold(pending);
  328. sock_hold(listener);
  329. list_add_tail(&vpending->pending_links, &vlistener->pending_links);
  330. }
  331. EXPORT_SYMBOL_GPL(vsock_add_pending);
  332. void vsock_remove_pending(struct sock *listener, struct sock *pending)
  333. {
  334. struct vsock_sock *vpending = vsock_sk(pending);
  335. list_del_init(&vpending->pending_links);
  336. sock_put(listener);
  337. sock_put(pending);
  338. }
  339. EXPORT_SYMBOL_GPL(vsock_remove_pending);
  340. void vsock_enqueue_accept(struct sock *listener, struct sock *connected)
  341. {
  342. struct vsock_sock *vlistener;
  343. struct vsock_sock *vconnected;
  344. vlistener = vsock_sk(listener);
  345. vconnected = vsock_sk(connected);
  346. sock_hold(connected);
  347. sock_hold(listener);
  348. list_add_tail(&vconnected->accept_queue, &vlistener->accept_queue);
  349. }
  350. EXPORT_SYMBOL_GPL(vsock_enqueue_accept);
  351. static bool vsock_use_local_transport(unsigned int remote_cid)
  352. {
  353. lockdep_assert_held(&vsock_register_mutex);
  354. if (!transport_local)
  355. return false;
  356. if (remote_cid == VMADDR_CID_LOCAL)
  357. return true;
  358. if (transport_g2h) {
  359. return remote_cid == transport_g2h->get_local_cid();
  360. } else {
  361. return remote_cid == VMADDR_CID_HOST;
  362. }
  363. }
  364. static void vsock_deassign_transport(struct vsock_sock *vsk)
  365. {
  366. if (!vsk->transport)
  367. return;
  368. vsk->transport->destruct(vsk);
  369. module_put(vsk->transport->module);
  370. vsk->transport = NULL;
  371. }
  372. /* Assign a transport to a socket and call the .init transport callback.
  373. *
  374. * Note: for connection oriented socket this must be called when vsk->remote_addr
  375. * is set (e.g. during the connect() or when a connection request on a listener
  376. * socket is received).
  377. * The vsk->remote_addr is used to decide which transport to use:
  378. * - remote CID == VMADDR_CID_LOCAL or g2h->local_cid or VMADDR_CID_HOST if
  379. * g2h is not loaded, will use local transport;
  380. * - remote CID <= VMADDR_CID_HOST or h2g is not loaded or remote flags field
  381. * includes VMADDR_FLAG_TO_HOST flag value, will use guest->host transport;
  382. * - remote CID > VMADDR_CID_HOST will use host->guest transport;
  383. */
  384. int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk)
  385. {
  386. const struct vsock_transport *new_transport;
  387. struct sock *sk = sk_vsock(vsk);
  388. unsigned int remote_cid = vsk->remote_addr.svm_cid;
  389. __u8 remote_flags;
  390. int ret;
  391. /* If the packet is coming with the source and destination CIDs higher
  392. * than VMADDR_CID_HOST, then a vsock channel where all the packets are
  393. * forwarded to the host should be established. Then the host will
  394. * need to forward the packets to the guest.
  395. *
  396. * The flag is set on the (listen) receive path (psk is not NULL). On
  397. * the connect path the flag can be set by the user space application.
  398. */
  399. if (psk && vsk->local_addr.svm_cid > VMADDR_CID_HOST &&
  400. vsk->remote_addr.svm_cid > VMADDR_CID_HOST)
  401. vsk->remote_addr.svm_flags |= VMADDR_FLAG_TO_HOST;
  402. remote_flags = vsk->remote_addr.svm_flags;
  403. mutex_lock(&vsock_register_mutex);
  404. switch (sk->sk_type) {
  405. case SOCK_DGRAM:
  406. new_transport = transport_dgram;
  407. break;
  408. case SOCK_STREAM:
  409. case SOCK_SEQPACKET:
  410. if (vsock_use_local_transport(remote_cid))
  411. new_transport = transport_local;
  412. else if (remote_cid <= VMADDR_CID_HOST || !transport_h2g ||
  413. (remote_flags & VMADDR_FLAG_TO_HOST))
  414. new_transport = transport_g2h;
  415. else
  416. new_transport = transport_h2g;
  417. break;
  418. default:
  419. ret = -ESOCKTNOSUPPORT;
  420. goto err;
  421. }
  422. if (vsk->transport && vsk->transport == new_transport) {
  423. ret = 0;
  424. goto err;
  425. }
  426. /* We increase the module refcnt to prevent the transport unloading
  427. * while there are open sockets assigned to it.
  428. */
  429. if (!new_transport || !try_module_get(new_transport->module)) {
  430. ret = -ENODEV;
  431. goto err;
  432. }
  433. /* It's safe to release the mutex after a successful try_module_get().
  434. * Whichever transport `new_transport` points at, it won't go away until
  435. * the last module_put() below or in vsock_deassign_transport().
  436. */
  437. mutex_unlock(&vsock_register_mutex);
  438. if (vsk->transport) {
  439. /* transport->release() must be called with sock lock acquired.
  440. * This path can only be taken during vsock_connect(), where we
  441. * have already held the sock lock. In the other cases, this
  442. * function is called on a new socket which is not assigned to
  443. * any transport.
  444. */
  445. vsk->transport->release(vsk);
  446. vsock_deassign_transport(vsk);
  447. /* transport's release() and destruct() can touch some socket
  448. * state, since we are reassigning the socket to a new transport
  449. * during vsock_connect(), let's reset these fields to have a
  450. * clean state.
  451. */
  452. sock_reset_flag(sk, SOCK_DONE);
  453. sk->sk_state = TCP_CLOSE;
  454. vsk->peer_shutdown = 0;
  455. }
  456. if (sk->sk_type == SOCK_SEQPACKET) {
  457. if (!new_transport->seqpacket_allow ||
  458. !new_transport->seqpacket_allow(remote_cid)) {
  459. module_put(new_transport->module);
  460. return -ESOCKTNOSUPPORT;
  461. }
  462. }
  463. ret = new_transport->init(vsk, psk);
  464. if (ret) {
  465. module_put(new_transport->module);
  466. return ret;
  467. }
  468. vsk->transport = new_transport;
  469. return 0;
  470. err:
  471. mutex_unlock(&vsock_register_mutex);
  472. return ret;
  473. }
  474. EXPORT_SYMBOL_GPL(vsock_assign_transport);
  475. /*
  476. * Provide safe access to static transport_{h2g,g2h,dgram,local} callbacks.
  477. * Otherwise we may race with module removal. Do not use on `vsk->transport`.
  478. */
  479. static u32 vsock_registered_transport_cid(const struct vsock_transport **transport)
  480. {
  481. u32 cid = VMADDR_CID_ANY;
  482. mutex_lock(&vsock_register_mutex);
  483. if (*transport)
  484. cid = (*transport)->get_local_cid();
  485. mutex_unlock(&vsock_register_mutex);
  486. return cid;
  487. }
  488. bool vsock_find_cid(unsigned int cid)
  489. {
  490. if (cid == vsock_registered_transport_cid(&transport_g2h))
  491. return true;
  492. if (transport_h2g && cid == VMADDR_CID_HOST)
  493. return true;
  494. if (transport_local && cid == VMADDR_CID_LOCAL)
  495. return true;
  496. return false;
  497. }
  498. EXPORT_SYMBOL_GPL(vsock_find_cid);
  499. static struct sock *vsock_dequeue_accept(struct sock *listener)
  500. {
  501. struct vsock_sock *vlistener;
  502. struct vsock_sock *vconnected;
  503. vlistener = vsock_sk(listener);
  504. if (list_empty(&vlistener->accept_queue))
  505. return NULL;
  506. vconnected = list_entry(vlistener->accept_queue.next,
  507. struct vsock_sock, accept_queue);
  508. list_del_init(&vconnected->accept_queue);
  509. sock_put(listener);
  510. /* The caller will need a reference on the connected socket so we let
  511. * it call sock_put().
  512. */
  513. return sk_vsock(vconnected);
  514. }
  515. static bool vsock_is_accept_queue_empty(struct sock *sk)
  516. {
  517. struct vsock_sock *vsk = vsock_sk(sk);
  518. return list_empty(&vsk->accept_queue);
  519. }
  520. static bool vsock_is_pending(struct sock *sk)
  521. {
  522. struct vsock_sock *vsk = vsock_sk(sk);
  523. return !list_empty(&vsk->pending_links);
  524. }
  525. static int vsock_send_shutdown(struct sock *sk, int mode)
  526. {
  527. struct vsock_sock *vsk = vsock_sk(sk);
  528. if (!vsk->transport)
  529. return -ENODEV;
  530. return vsk->transport->shutdown(vsk, mode);
  531. }
  532. static void vsock_pending_work(struct work_struct *work)
  533. {
  534. struct sock *sk;
  535. struct sock *listener;
  536. struct vsock_sock *vsk;
  537. bool cleanup;
  538. vsk = container_of(work, struct vsock_sock, pending_work.work);
  539. sk = sk_vsock(vsk);
  540. listener = vsk->listener;
  541. cleanup = true;
  542. lock_sock(listener);
  543. lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
  544. if (vsock_is_pending(sk)) {
  545. vsock_remove_pending(listener, sk);
  546. sk_acceptq_removed(listener);
  547. } else if (!vsk->rejected) {
  548. /* We are not on the pending list and accept() did not reject
  549. * us, so we must have been accepted by our user process. We
  550. * just need to drop our references to the sockets and be on
  551. * our way.
  552. */
  553. cleanup = false;
  554. goto out;
  555. }
  556. /* We need to remove ourself from the global connected sockets list so
  557. * incoming packets can't find this socket, and to reduce the reference
  558. * count.
  559. */
  560. vsock_remove_connected(vsk);
  561. sk->sk_state = TCP_CLOSE;
  562. out:
  563. release_sock(sk);
  564. release_sock(listener);
  565. if (cleanup)
  566. sock_put(sk);
  567. sock_put(sk);
  568. sock_put(listener);
  569. }
  570. /**** SOCKET OPERATIONS ****/
  571. static int __vsock_bind_connectible(struct vsock_sock *vsk,
  572. struct sockaddr_vm *addr)
  573. {
  574. static u32 port;
  575. struct sockaddr_vm new_addr;
  576. if (!port)
  577. port = get_random_u32_above(LAST_RESERVED_PORT);
  578. vsock_addr_init(&new_addr, addr->svm_cid, addr->svm_port);
  579. if (addr->svm_port == VMADDR_PORT_ANY) {
  580. bool found = false;
  581. unsigned int i;
  582. for (i = 0; i < MAX_PORT_RETRIES; i++) {
  583. if (port == VMADDR_PORT_ANY ||
  584. port <= LAST_RESERVED_PORT)
  585. port = LAST_RESERVED_PORT + 1;
  586. new_addr.svm_port = port++;
  587. if (!__vsock_find_bound_socket(&new_addr)) {
  588. found = true;
  589. break;
  590. }
  591. }
  592. if (!found)
  593. return -EADDRNOTAVAIL;
  594. } else {
  595. /* If port is in reserved range, ensure caller
  596. * has necessary privileges.
  597. */
  598. if (addr->svm_port <= LAST_RESERVED_PORT &&
  599. !capable(CAP_NET_BIND_SERVICE)) {
  600. return -EACCES;
  601. }
  602. if (__vsock_find_bound_socket(&new_addr))
  603. return -EADDRINUSE;
  604. }
  605. vsock_addr_init(&vsk->local_addr, new_addr.svm_cid, new_addr.svm_port);
  606. /* Remove connection oriented sockets from the unbound list and add them
  607. * to the hash table for easy lookup by its address. The unbound list
  608. * is simply an extra entry at the end of the hash table, a trick used
  609. * by AF_UNIX.
  610. */
  611. __vsock_remove_bound(vsk);
  612. __vsock_insert_bound(vsock_bound_sockets(&vsk->local_addr), vsk);
  613. return 0;
  614. }
  615. static int __vsock_bind_dgram(struct vsock_sock *vsk,
  616. struct sockaddr_vm *addr)
  617. {
  618. return vsk->transport->dgram_bind(vsk, addr);
  619. }
  620. static int __vsock_bind(struct sock *sk, struct sockaddr_vm *addr)
  621. {
  622. struct vsock_sock *vsk = vsock_sk(sk);
  623. int retval;
  624. /* First ensure this socket isn't already bound. */
  625. if (vsock_addr_bound(&vsk->local_addr))
  626. return -EINVAL;
  627. /* Now bind to the provided address or select appropriate values if
  628. * none are provided (VMADDR_CID_ANY and VMADDR_PORT_ANY). Note that
  629. * like AF_INET prevents binding to a non-local IP address (in most
  630. * cases), we only allow binding to a local CID.
  631. */
  632. if (addr->svm_cid != VMADDR_CID_ANY && !vsock_find_cid(addr->svm_cid))
  633. return -EADDRNOTAVAIL;
  634. switch (sk->sk_socket->type) {
  635. case SOCK_STREAM:
  636. case SOCK_SEQPACKET:
  637. spin_lock_bh(&vsock_table_lock);
  638. retval = __vsock_bind_connectible(vsk, addr);
  639. spin_unlock_bh(&vsock_table_lock);
  640. break;
  641. case SOCK_DGRAM:
  642. retval = __vsock_bind_dgram(vsk, addr);
  643. break;
  644. default:
  645. retval = -EINVAL;
  646. break;
  647. }
  648. return retval;
  649. }
  650. static void vsock_connect_timeout(struct work_struct *work);
  651. static struct sock *__vsock_create(struct net *net,
  652. struct socket *sock,
  653. struct sock *parent,
  654. gfp_t priority,
  655. unsigned short type,
  656. int kern)
  657. {
  658. struct sock *sk;
  659. struct vsock_sock *psk;
  660. struct vsock_sock *vsk;
  661. sk = sk_alloc(net, AF_VSOCK, priority, &vsock_proto, kern);
  662. if (!sk)
  663. return NULL;
  664. sock_init_data(sock, sk);
  665. /* sk->sk_type is normally set in sock_init_data, but only if sock is
  666. * non-NULL. We make sure that our sockets always have a type by
  667. * setting it here if needed.
  668. */
  669. if (!sock)
  670. sk->sk_type = type;
  671. vsk = vsock_sk(sk);
  672. vsock_addr_init(&vsk->local_addr, VMADDR_CID_ANY, VMADDR_PORT_ANY);
  673. vsock_addr_init(&vsk->remote_addr, VMADDR_CID_ANY, VMADDR_PORT_ANY);
  674. sk->sk_destruct = vsock_sk_destruct;
  675. sk->sk_backlog_rcv = vsock_queue_rcv_skb;
  676. sock_reset_flag(sk, SOCK_DONE);
  677. INIT_LIST_HEAD(&vsk->bound_table);
  678. INIT_LIST_HEAD(&vsk->connected_table);
  679. vsk->listener = NULL;
  680. INIT_LIST_HEAD(&vsk->pending_links);
  681. INIT_LIST_HEAD(&vsk->accept_queue);
  682. vsk->rejected = false;
  683. vsk->sent_request = false;
  684. vsk->ignore_connecting_rst = false;
  685. vsk->peer_shutdown = 0;
  686. INIT_DELAYED_WORK(&vsk->connect_work, vsock_connect_timeout);
  687. INIT_DELAYED_WORK(&vsk->pending_work, vsock_pending_work);
  688. psk = parent ? vsock_sk(parent) : NULL;
  689. if (parent) {
  690. vsk->trusted = psk->trusted;
  691. vsk->owner = get_cred(psk->owner);
  692. vsk->connect_timeout = psk->connect_timeout;
  693. vsk->buffer_size = psk->buffer_size;
  694. vsk->buffer_min_size = psk->buffer_min_size;
  695. vsk->buffer_max_size = psk->buffer_max_size;
  696. security_sk_clone(parent, sk);
  697. } else {
  698. vsk->trusted = ns_capable_noaudit(&init_user_ns, CAP_NET_ADMIN);
  699. vsk->owner = get_current_cred();
  700. vsk->connect_timeout = VSOCK_DEFAULT_CONNECT_TIMEOUT;
  701. vsk->buffer_size = VSOCK_DEFAULT_BUFFER_SIZE;
  702. vsk->buffer_min_size = VSOCK_DEFAULT_BUFFER_MIN_SIZE;
  703. vsk->buffer_max_size = VSOCK_DEFAULT_BUFFER_MAX_SIZE;
  704. }
  705. return sk;
  706. }
  707. static bool sock_type_connectible(u16 type)
  708. {
  709. return (type == SOCK_STREAM) || (type == SOCK_SEQPACKET);
  710. }
  711. static void __vsock_release(struct sock *sk, int level)
  712. {
  713. struct vsock_sock *vsk;
  714. struct sock *pending;
  715. vsk = vsock_sk(sk);
  716. pending = NULL; /* Compiler warning. */
  717. /* When "level" is SINGLE_DEPTH_NESTING, use the nested
  718. * version to avoid the warning "possible recursive locking
  719. * detected". When "level" is 0, lock_sock_nested(sk, level)
  720. * is the same as lock_sock(sk).
  721. */
  722. lock_sock_nested(sk, level);
  723. /* Indicate to vsock_remove_sock() that the socket is being released and
  724. * can be removed from the bound_table. Unlike transport reassignment
  725. * case, where the socket must remain bound despite vsock_remove_sock()
  726. * being called from the transport release() callback.
  727. */
  728. sock_set_flag(sk, SOCK_DEAD);
  729. if (vsk->transport)
  730. vsk->transport->release(vsk);
  731. else if (sock_type_connectible(sk->sk_type))
  732. vsock_remove_sock(vsk);
  733. sock_orphan(sk);
  734. sk->sk_shutdown = SHUTDOWN_MASK;
  735. skb_queue_purge(&sk->sk_receive_queue);
  736. /* Clean up any sockets that never were accepted. */
  737. while ((pending = vsock_dequeue_accept(sk)) != NULL) {
  738. __vsock_release(pending, SINGLE_DEPTH_NESTING);
  739. sock_put(pending);
  740. }
  741. release_sock(sk);
  742. sock_put(sk);
  743. }
  744. static void vsock_sk_destruct(struct sock *sk)
  745. {
  746. struct vsock_sock *vsk = vsock_sk(sk);
  747. /* Flush MSG_ZEROCOPY leftovers. */
  748. __skb_queue_purge(&sk->sk_error_queue);
  749. vsock_deassign_transport(vsk);
  750. /* When clearing these addresses, there's no need to set the family and
  751. * possibly register the address family with the kernel.
  752. */
  753. vsock_addr_init(&vsk->local_addr, VMADDR_CID_ANY, VMADDR_PORT_ANY);
  754. vsock_addr_init(&vsk->remote_addr, VMADDR_CID_ANY, VMADDR_PORT_ANY);
  755. put_cred(vsk->owner);
  756. }
  757. static int vsock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
  758. {
  759. int err;
  760. err = sock_queue_rcv_skb(sk, skb);
  761. if (err)
  762. kfree_skb(skb);
  763. return err;
  764. }
  765. struct sock *vsock_create_connected(struct sock *parent)
  766. {
  767. return __vsock_create(sock_net(parent), NULL, parent, GFP_KERNEL,
  768. parent->sk_type, 0);
  769. }
  770. EXPORT_SYMBOL_GPL(vsock_create_connected);
  771. s64 vsock_stream_has_data(struct vsock_sock *vsk)
  772. {
  773. if (WARN_ON(!vsk->transport))
  774. return 0;
  775. return vsk->transport->stream_has_data(vsk);
  776. }
  777. EXPORT_SYMBOL_GPL(vsock_stream_has_data);
  778. s64 vsock_connectible_has_data(struct vsock_sock *vsk)
  779. {
  780. struct sock *sk = sk_vsock(vsk);
  781. if (WARN_ON(!vsk->transport))
  782. return 0;
  783. if (sk->sk_type == SOCK_SEQPACKET)
  784. return vsk->transport->seqpacket_has_data(vsk);
  785. else
  786. return vsock_stream_has_data(vsk);
  787. }
  788. EXPORT_SYMBOL_GPL(vsock_connectible_has_data);
  789. s64 vsock_stream_has_space(struct vsock_sock *vsk)
  790. {
  791. if (WARN_ON(!vsk->transport))
  792. return 0;
  793. return vsk->transport->stream_has_space(vsk);
  794. }
  795. EXPORT_SYMBOL_GPL(vsock_stream_has_space);
  796. void vsock_data_ready(struct sock *sk)
  797. {
  798. struct vsock_sock *vsk = vsock_sk(sk);
  799. if (vsock_stream_has_data(vsk) >= sk->sk_rcvlowat ||
  800. sock_flag(sk, SOCK_DONE))
  801. sk->sk_data_ready(sk);
  802. }
  803. EXPORT_SYMBOL_GPL(vsock_data_ready);
  804. /* Dummy callback required by sockmap.
  805. * See unconditional call of saved_close() in sock_map_close().
  806. */
  807. static void vsock_close(struct sock *sk, long timeout)
  808. {
  809. }
  810. static int vsock_release(struct socket *sock)
  811. {
  812. struct sock *sk = sock->sk;
  813. if (!sk)
  814. return 0;
  815. sk->sk_prot->close(sk, 0);
  816. __vsock_release(sk, 0);
  817. sock->sk = NULL;
  818. sock->state = SS_FREE;
  819. return 0;
  820. }
  821. static int
  822. vsock_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
  823. {
  824. int err;
  825. struct sock *sk;
  826. struct sockaddr_vm *vm_addr;
  827. sk = sock->sk;
  828. if (vsock_addr_cast(addr, addr_len, &vm_addr) != 0)
  829. return -EINVAL;
  830. lock_sock(sk);
  831. err = __vsock_bind(sk, vm_addr);
  832. release_sock(sk);
  833. return err;
  834. }
  835. static int vsock_getname(struct socket *sock,
  836. struct sockaddr *addr, int peer)
  837. {
  838. int err;
  839. struct sock *sk;
  840. struct vsock_sock *vsk;
  841. struct sockaddr_vm *vm_addr;
  842. sk = sock->sk;
  843. vsk = vsock_sk(sk);
  844. err = 0;
  845. lock_sock(sk);
  846. if (peer) {
  847. if (sock->state != SS_CONNECTED) {
  848. err = -ENOTCONN;
  849. goto out;
  850. }
  851. vm_addr = &vsk->remote_addr;
  852. } else {
  853. vm_addr = &vsk->local_addr;
  854. }
  855. if (!vm_addr) {
  856. err = -EINVAL;
  857. goto out;
  858. }
  859. /* sys_getsockname() and sys_getpeername() pass us a
  860. * MAX_SOCK_ADDR-sized buffer and don't set addr_len. Unfortunately
  861. * that macro is defined in socket.c instead of .h, so we hardcode its
  862. * value here.
  863. */
  864. BUILD_BUG_ON(sizeof(*vm_addr) > 128);
  865. memcpy(addr, vm_addr, sizeof(*vm_addr));
  866. err = sizeof(*vm_addr);
  867. out:
  868. release_sock(sk);
  869. return err;
  870. }
  871. static int vsock_shutdown(struct socket *sock, int mode)
  872. {
  873. int err;
  874. struct sock *sk;
  875. /* User level uses SHUT_RD (0) and SHUT_WR (1), but the kernel uses
  876. * RCV_SHUTDOWN (1) and SEND_SHUTDOWN (2), so we must increment mode
  877. * here like the other address families do. Note also that the
  878. * increment makes SHUT_RDWR (2) into RCV_SHUTDOWN | SEND_SHUTDOWN (3),
  879. * which is what we want.
  880. */
  881. mode++;
  882. if ((mode & ~SHUTDOWN_MASK) || !mode)
  883. return -EINVAL;
  884. /* If this is a connection oriented socket and it is not connected then
  885. * bail out immediately. If it is a DGRAM socket then we must first
  886. * kick the socket so that it wakes up from any sleeping calls, for
  887. * example recv(), and then afterwards return the error.
  888. */
  889. sk = sock->sk;
  890. lock_sock(sk);
  891. if (sock->state == SS_UNCONNECTED) {
  892. err = -ENOTCONN;
  893. if (sock_type_connectible(sk->sk_type))
  894. goto out;
  895. } else {
  896. sock->state = SS_DISCONNECTING;
  897. err = 0;
  898. }
  899. /* Receive and send shutdowns are treated alike. */
  900. mode = mode & (RCV_SHUTDOWN | SEND_SHUTDOWN);
  901. if (mode) {
  902. sk->sk_shutdown |= mode;
  903. sk->sk_state_change(sk);
  904. if (sock_type_connectible(sk->sk_type)) {
  905. sock_reset_flag(sk, SOCK_DONE);
  906. vsock_send_shutdown(sk, mode);
  907. }
  908. }
  909. out:
  910. release_sock(sk);
  911. return err;
  912. }
  913. static __poll_t vsock_poll(struct file *file, struct socket *sock,
  914. poll_table *wait)
  915. {
  916. struct sock *sk;
  917. __poll_t mask;
  918. struct vsock_sock *vsk;
  919. sk = sock->sk;
  920. vsk = vsock_sk(sk);
  921. poll_wait(file, sk_sleep(sk), wait);
  922. mask = 0;
  923. if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue))
  924. /* Signify that there has been an error on this socket. */
  925. mask |= EPOLLERR;
  926. /* INET sockets treat local write shutdown and peer write shutdown as a
  927. * case of EPOLLHUP set.
  928. */
  929. if ((sk->sk_shutdown == SHUTDOWN_MASK) ||
  930. ((sk->sk_shutdown & SEND_SHUTDOWN) &&
  931. (vsk->peer_shutdown & SEND_SHUTDOWN))) {
  932. mask |= EPOLLHUP;
  933. }
  934. if (sk->sk_shutdown & RCV_SHUTDOWN ||
  935. vsk->peer_shutdown & SEND_SHUTDOWN) {
  936. mask |= EPOLLRDHUP;
  937. }
  938. if (sk_is_readable(sk))
  939. mask |= EPOLLIN | EPOLLRDNORM;
  940. if (sock->type == SOCK_DGRAM) {
  941. /* For datagram sockets we can read if there is something in
  942. * the queue and write as long as the socket isn't shutdown for
  943. * sending.
  944. */
  945. if (!skb_queue_empty_lockless(&sk->sk_receive_queue) ||
  946. (sk->sk_shutdown & RCV_SHUTDOWN)) {
  947. mask |= EPOLLIN | EPOLLRDNORM;
  948. }
  949. if (!(sk->sk_shutdown & SEND_SHUTDOWN))
  950. mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
  951. } else if (sock_type_connectible(sk->sk_type)) {
  952. const struct vsock_transport *transport;
  953. lock_sock(sk);
  954. transport = vsk->transport;
  955. /* Listening sockets that have connections in their accept
  956. * queue can be read.
  957. */
  958. if (sk->sk_state == TCP_LISTEN
  959. && !vsock_is_accept_queue_empty(sk))
  960. mask |= EPOLLIN | EPOLLRDNORM;
  961. /* If there is something in the queue then we can read. */
  962. if (transport && transport->stream_is_active(vsk) &&
  963. !(sk->sk_shutdown & RCV_SHUTDOWN)) {
  964. bool data_ready_now = false;
  965. int target = sock_rcvlowat(sk, 0, INT_MAX);
  966. int ret = transport->notify_poll_in(
  967. vsk, target, &data_ready_now);
  968. if (ret < 0) {
  969. mask |= EPOLLERR;
  970. } else {
  971. if (data_ready_now)
  972. mask |= EPOLLIN | EPOLLRDNORM;
  973. }
  974. }
  975. /* Sockets whose connections have been closed, reset, or
  976. * terminated should also be considered read, and we check the
  977. * shutdown flag for that.
  978. */
  979. if (sk->sk_shutdown & RCV_SHUTDOWN ||
  980. vsk->peer_shutdown & SEND_SHUTDOWN) {
  981. mask |= EPOLLIN | EPOLLRDNORM;
  982. }
  983. /* Connected sockets that can produce data can be written. */
  984. if (transport && sk->sk_state == TCP_ESTABLISHED) {
  985. if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
  986. bool space_avail_now = false;
  987. int ret = transport->notify_poll_out(
  988. vsk, 1, &space_avail_now);
  989. if (ret < 0) {
  990. mask |= EPOLLERR;
  991. } else {
  992. if (space_avail_now)
  993. /* Remove EPOLLWRBAND since INET
  994. * sockets are not setting it.
  995. */
  996. mask |= EPOLLOUT | EPOLLWRNORM;
  997. }
  998. }
  999. }
  1000. /* Simulate INET socket poll behaviors, which sets
  1001. * EPOLLOUT|EPOLLWRNORM when peer is closed and nothing to read,
  1002. * but local send is not shutdown.
  1003. */
  1004. if (sk->sk_state == TCP_CLOSE || sk->sk_state == TCP_CLOSING) {
  1005. if (!(sk->sk_shutdown & SEND_SHUTDOWN))
  1006. mask |= EPOLLOUT | EPOLLWRNORM;
  1007. }
  1008. release_sock(sk);
  1009. }
  1010. return mask;
  1011. }
  1012. static int vsock_read_skb(struct sock *sk, skb_read_actor_t read_actor)
  1013. {
  1014. struct vsock_sock *vsk = vsock_sk(sk);
  1015. if (WARN_ON_ONCE(!vsk->transport))
  1016. return -ENODEV;
  1017. return vsk->transport->read_skb(vsk, read_actor);
  1018. }
  1019. static int vsock_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
  1020. size_t len)
  1021. {
  1022. int err;
  1023. struct sock *sk;
  1024. struct vsock_sock *vsk;
  1025. struct sockaddr_vm *remote_addr;
  1026. const struct vsock_transport *transport;
  1027. if (msg->msg_flags & MSG_OOB)
  1028. return -EOPNOTSUPP;
  1029. /* For now, MSG_DONTWAIT is always assumed... */
  1030. err = 0;
  1031. sk = sock->sk;
  1032. vsk = vsock_sk(sk);
  1033. lock_sock(sk);
  1034. transport = vsk->transport;
  1035. err = vsock_auto_bind(vsk);
  1036. if (err)
  1037. goto out;
  1038. /* If the provided message contains an address, use that. Otherwise
  1039. * fall back on the socket's remote handle (if it has been connected).
  1040. */
  1041. if (msg->msg_name &&
  1042. vsock_addr_cast(msg->msg_name, msg->msg_namelen,
  1043. &remote_addr) == 0) {
  1044. /* Ensure this address is of the right type and is a valid
  1045. * destination.
  1046. */
  1047. if (remote_addr->svm_cid == VMADDR_CID_ANY)
  1048. remote_addr->svm_cid = transport->get_local_cid();
  1049. if (!vsock_addr_bound(remote_addr)) {
  1050. err = -EINVAL;
  1051. goto out;
  1052. }
  1053. } else if (sock->state == SS_CONNECTED) {
  1054. remote_addr = &vsk->remote_addr;
  1055. if (remote_addr->svm_cid == VMADDR_CID_ANY)
  1056. remote_addr->svm_cid = transport->get_local_cid();
  1057. /* XXX Should connect() or this function ensure remote_addr is
  1058. * bound?
  1059. */
  1060. if (!vsock_addr_bound(&vsk->remote_addr)) {
  1061. err = -EINVAL;
  1062. goto out;
  1063. }
  1064. } else {
  1065. err = -EINVAL;
  1066. goto out;
  1067. }
  1068. if (!transport->dgram_allow(remote_addr->svm_cid,
  1069. remote_addr->svm_port)) {
  1070. err = -EINVAL;
  1071. goto out;
  1072. }
  1073. err = transport->dgram_enqueue(vsk, remote_addr, msg, len);
  1074. out:
  1075. release_sock(sk);
  1076. return err;
  1077. }
  1078. static int vsock_dgram_connect(struct socket *sock,
  1079. struct sockaddr *addr, int addr_len, int flags)
  1080. {
  1081. int err;
  1082. struct sock *sk;
  1083. struct vsock_sock *vsk;
  1084. struct sockaddr_vm *remote_addr;
  1085. sk = sock->sk;
  1086. vsk = vsock_sk(sk);
  1087. err = vsock_addr_cast(addr, addr_len, &remote_addr);
  1088. if (err == -EAFNOSUPPORT && remote_addr->svm_family == AF_UNSPEC) {
  1089. lock_sock(sk);
  1090. vsock_addr_init(&vsk->remote_addr, VMADDR_CID_ANY,
  1091. VMADDR_PORT_ANY);
  1092. sock->state = SS_UNCONNECTED;
  1093. release_sock(sk);
  1094. return 0;
  1095. } else if (err != 0)
  1096. return -EINVAL;
  1097. lock_sock(sk);
  1098. err = vsock_auto_bind(vsk);
  1099. if (err)
  1100. goto out;
  1101. if (!vsk->transport->dgram_allow(remote_addr->svm_cid,
  1102. remote_addr->svm_port)) {
  1103. err = -EINVAL;
  1104. goto out;
  1105. }
  1106. memcpy(&vsk->remote_addr, remote_addr, sizeof(vsk->remote_addr));
  1107. sock->state = SS_CONNECTED;
  1108. /* sock map disallows redirection of non-TCP sockets with sk_state !=
  1109. * TCP_ESTABLISHED (see sock_map_redirect_allowed()), so we set
  1110. * TCP_ESTABLISHED here to allow redirection of connected vsock dgrams.
  1111. *
  1112. * This doesn't seem to be abnormal state for datagram sockets, as the
  1113. * same approach can be see in other datagram socket types as well
  1114. * (such as unix sockets).
  1115. */
  1116. sk->sk_state = TCP_ESTABLISHED;
  1117. out:
  1118. release_sock(sk);
  1119. return err;
  1120. }
  1121. int __vsock_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
  1122. size_t len, int flags)
  1123. {
  1124. struct sock *sk = sock->sk;
  1125. struct vsock_sock *vsk = vsock_sk(sk);
  1126. return vsk->transport->dgram_dequeue(vsk, msg, len, flags);
  1127. }
  1128. int vsock_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
  1129. size_t len, int flags)
  1130. {
  1131. #ifdef CONFIG_BPF_SYSCALL
  1132. struct sock *sk = sock->sk;
  1133. const struct proto *prot;
  1134. prot = READ_ONCE(sk->sk_prot);
  1135. if (prot != &vsock_proto)
  1136. return prot->recvmsg(sk, msg, len, flags, NULL);
  1137. #endif
  1138. return __vsock_dgram_recvmsg(sock, msg, len, flags);
  1139. }
  1140. EXPORT_SYMBOL_GPL(vsock_dgram_recvmsg);
  1141. static int vsock_do_ioctl(struct socket *sock, unsigned int cmd,
  1142. int __user *arg)
  1143. {
  1144. struct sock *sk = sock->sk;
  1145. struct vsock_sock *vsk;
  1146. int ret;
  1147. vsk = vsock_sk(sk);
  1148. switch (cmd) {
  1149. case SIOCOUTQ: {
  1150. ssize_t n_bytes;
  1151. if (!vsk->transport || !vsk->transport->unsent_bytes) {
  1152. ret = -EOPNOTSUPP;
  1153. break;
  1154. }
  1155. if (sock_type_connectible(sk->sk_type) && sk->sk_state == TCP_LISTEN) {
  1156. ret = -EINVAL;
  1157. break;
  1158. }
  1159. n_bytes = vsk->transport->unsent_bytes(vsk);
  1160. if (n_bytes < 0) {
  1161. ret = n_bytes;
  1162. break;
  1163. }
  1164. ret = put_user(n_bytes, arg);
  1165. break;
  1166. }
  1167. default:
  1168. ret = -ENOIOCTLCMD;
  1169. }
  1170. return ret;
  1171. }
  1172. static int vsock_ioctl(struct socket *sock, unsigned int cmd,
  1173. unsigned long arg)
  1174. {
  1175. int ret;
  1176. lock_sock(sock->sk);
  1177. ret = vsock_do_ioctl(sock, cmd, (int __user *)arg);
  1178. release_sock(sock->sk);
  1179. return ret;
  1180. }
  1181. static const struct proto_ops vsock_dgram_ops = {
  1182. .family = PF_VSOCK,
  1183. .owner = THIS_MODULE,
  1184. .release = vsock_release,
  1185. .bind = vsock_bind,
  1186. .connect = vsock_dgram_connect,
  1187. .socketpair = sock_no_socketpair,
  1188. .accept = sock_no_accept,
  1189. .getname = vsock_getname,
  1190. .poll = vsock_poll,
  1191. .ioctl = vsock_ioctl,
  1192. .listen = sock_no_listen,
  1193. .shutdown = vsock_shutdown,
  1194. .sendmsg = vsock_dgram_sendmsg,
  1195. .recvmsg = vsock_dgram_recvmsg,
  1196. .mmap = sock_no_mmap,
  1197. .read_skb = vsock_read_skb,
  1198. };
  1199. static int vsock_transport_cancel_pkt(struct vsock_sock *vsk)
  1200. {
  1201. const struct vsock_transport *transport = vsk->transport;
  1202. if (!transport || !transport->cancel_pkt)
  1203. return -EOPNOTSUPP;
  1204. return transport->cancel_pkt(vsk);
  1205. }
  1206. static void vsock_connect_timeout(struct work_struct *work)
  1207. {
  1208. struct sock *sk;
  1209. struct vsock_sock *vsk;
  1210. vsk = container_of(work, struct vsock_sock, connect_work.work);
  1211. sk = sk_vsock(vsk);
  1212. lock_sock(sk);
  1213. if (sk->sk_state == TCP_SYN_SENT &&
  1214. (sk->sk_shutdown != SHUTDOWN_MASK)) {
  1215. sk->sk_state = TCP_CLOSE;
  1216. sk->sk_socket->state = SS_UNCONNECTED;
  1217. sk->sk_err = ETIMEDOUT;
  1218. sk_error_report(sk);
  1219. vsock_transport_cancel_pkt(vsk);
  1220. }
  1221. release_sock(sk);
  1222. sock_put(sk);
  1223. }
  1224. static int vsock_connect(struct socket *sock, struct sockaddr *addr,
  1225. int addr_len, int flags)
  1226. {
  1227. int err;
  1228. struct sock *sk;
  1229. struct vsock_sock *vsk;
  1230. const struct vsock_transport *transport;
  1231. struct sockaddr_vm *remote_addr;
  1232. long timeout;
  1233. DEFINE_WAIT(wait);
  1234. err = 0;
  1235. sk = sock->sk;
  1236. vsk = vsock_sk(sk);
  1237. lock_sock(sk);
  1238. /* XXX AF_UNSPEC should make us disconnect like AF_INET. */
  1239. switch (sock->state) {
  1240. case SS_CONNECTED:
  1241. err = -EISCONN;
  1242. goto out;
  1243. case SS_DISCONNECTING:
  1244. err = -EINVAL;
  1245. goto out;
  1246. case SS_CONNECTING:
  1247. /* This continues on so we can move sock into the SS_CONNECTED
  1248. * state once the connection has completed (at which point err
  1249. * will be set to zero also). Otherwise, we will either wait
  1250. * for the connection or return -EALREADY should this be a
  1251. * non-blocking call.
  1252. */
  1253. err = -EALREADY;
  1254. if (flags & O_NONBLOCK)
  1255. goto out;
  1256. break;
  1257. default:
  1258. if ((sk->sk_state == TCP_LISTEN) ||
  1259. vsock_addr_cast(addr, addr_len, &remote_addr) != 0) {
  1260. err = -EINVAL;
  1261. goto out;
  1262. }
  1263. /* Set the remote address that we are connecting to. */
  1264. memcpy(&vsk->remote_addr, remote_addr,
  1265. sizeof(vsk->remote_addr));
  1266. err = vsock_assign_transport(vsk, NULL);
  1267. if (err)
  1268. goto out;
  1269. transport = vsk->transport;
  1270. /* The hypervisor and well-known contexts do not have socket
  1271. * endpoints.
  1272. */
  1273. if (!transport ||
  1274. !transport->stream_allow(remote_addr->svm_cid,
  1275. remote_addr->svm_port)) {
  1276. err = -ENETUNREACH;
  1277. goto out;
  1278. }
  1279. if (vsock_msgzerocopy_allow(transport)) {
  1280. set_bit(SOCK_SUPPORT_ZC, &sk->sk_socket->flags);
  1281. } else if (sock_flag(sk, SOCK_ZEROCOPY)) {
  1282. /* If this option was set before 'connect()',
  1283. * when transport was unknown, check that this
  1284. * feature is supported here.
  1285. */
  1286. err = -EOPNOTSUPP;
  1287. goto out;
  1288. }
  1289. err = vsock_auto_bind(vsk);
  1290. if (err)
  1291. goto out;
  1292. sk->sk_state = TCP_SYN_SENT;
  1293. err = transport->connect(vsk);
  1294. if (err < 0)
  1295. goto out;
  1296. /* sk_err might have been set as a result of an earlier
  1297. * (failed) connect attempt.
  1298. */
  1299. sk->sk_err = 0;
  1300. /* Mark sock as connecting and set the error code to in
  1301. * progress in case this is a non-blocking connect.
  1302. */
  1303. sock->state = SS_CONNECTING;
  1304. err = -EINPROGRESS;
  1305. }
  1306. /* The receive path will handle all communication until we are able to
  1307. * enter the connected state. Here we wait for the connection to be
  1308. * completed or a notification of an error.
  1309. */
  1310. timeout = vsk->connect_timeout;
  1311. prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
  1312. /* If the socket is already closing or it is in an error state, there
  1313. * is no point in waiting.
  1314. */
  1315. while (sk->sk_state != TCP_ESTABLISHED &&
  1316. sk->sk_state != TCP_CLOSING && sk->sk_err == 0) {
  1317. if (flags & O_NONBLOCK) {
  1318. /* If we're not going to block, we schedule a timeout
  1319. * function to generate a timeout on the connection
  1320. * attempt, in case the peer doesn't respond in a
  1321. * timely manner. We hold on to the socket until the
  1322. * timeout fires.
  1323. */
  1324. sock_hold(sk);
  1325. /* If the timeout function is already scheduled,
  1326. * reschedule it, then ungrab the socket refcount to
  1327. * keep it balanced.
  1328. */
  1329. if (mod_delayed_work(system_wq, &vsk->connect_work,
  1330. timeout))
  1331. sock_put(sk);
  1332. /* Skip ahead to preserve error code set above. */
  1333. goto out_wait;
  1334. }
  1335. release_sock(sk);
  1336. timeout = schedule_timeout(timeout);
  1337. lock_sock(sk);
  1338. if (signal_pending(current)) {
  1339. err = sock_intr_errno(timeout);
  1340. sk->sk_state = sk->sk_state == TCP_ESTABLISHED ? TCP_CLOSING : TCP_CLOSE;
  1341. sock->state = SS_UNCONNECTED;
  1342. vsock_transport_cancel_pkt(vsk);
  1343. vsock_remove_connected(vsk);
  1344. goto out_wait;
  1345. } else if ((sk->sk_state != TCP_ESTABLISHED) && (timeout == 0)) {
  1346. err = -ETIMEDOUT;
  1347. sk->sk_state = TCP_CLOSE;
  1348. sock->state = SS_UNCONNECTED;
  1349. vsock_transport_cancel_pkt(vsk);
  1350. goto out_wait;
  1351. }
  1352. prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
  1353. }
  1354. if (sk->sk_err) {
  1355. err = -sk->sk_err;
  1356. sk->sk_state = TCP_CLOSE;
  1357. sock->state = SS_UNCONNECTED;
  1358. } else {
  1359. err = 0;
  1360. }
  1361. out_wait:
  1362. finish_wait(sk_sleep(sk), &wait);
  1363. out:
  1364. release_sock(sk);
  1365. return err;
  1366. }
  1367. static int vsock_accept(struct socket *sock, struct socket *newsock,
  1368. struct proto_accept_arg *arg)
  1369. {
  1370. struct sock *listener;
  1371. int err;
  1372. struct sock *connected;
  1373. struct vsock_sock *vconnected;
  1374. long timeout;
  1375. DEFINE_WAIT(wait);
  1376. err = 0;
  1377. listener = sock->sk;
  1378. lock_sock(listener);
  1379. if (!sock_type_connectible(sock->type)) {
  1380. err = -EOPNOTSUPP;
  1381. goto out;
  1382. }
  1383. if (listener->sk_state != TCP_LISTEN) {
  1384. err = -EINVAL;
  1385. goto out;
  1386. }
  1387. /* Wait for children sockets to appear; these are the new sockets
  1388. * created upon connection establishment.
  1389. */
  1390. timeout = sock_rcvtimeo(listener, arg->flags & O_NONBLOCK);
  1391. prepare_to_wait(sk_sleep(listener), &wait, TASK_INTERRUPTIBLE);
  1392. while ((connected = vsock_dequeue_accept(listener)) == NULL &&
  1393. listener->sk_err == 0) {
  1394. release_sock(listener);
  1395. timeout = schedule_timeout(timeout);
  1396. finish_wait(sk_sleep(listener), &wait);
  1397. lock_sock(listener);
  1398. if (signal_pending(current)) {
  1399. err = sock_intr_errno(timeout);
  1400. goto out;
  1401. } else if (timeout == 0) {
  1402. err = -EAGAIN;
  1403. goto out;
  1404. }
  1405. prepare_to_wait(sk_sleep(listener), &wait, TASK_INTERRUPTIBLE);
  1406. }
  1407. finish_wait(sk_sleep(listener), &wait);
  1408. if (listener->sk_err)
  1409. err = -listener->sk_err;
  1410. if (connected) {
  1411. sk_acceptq_removed(listener);
  1412. lock_sock_nested(connected, SINGLE_DEPTH_NESTING);
  1413. vconnected = vsock_sk(connected);
  1414. /* If the listener socket has received an error, then we should
  1415. * reject this socket and return. Note that we simply mark the
  1416. * socket rejected, drop our reference, and let the cleanup
  1417. * function handle the cleanup; the fact that we found it in
  1418. * the listener's accept queue guarantees that the cleanup
  1419. * function hasn't run yet.
  1420. */
  1421. if (err) {
  1422. vconnected->rejected = true;
  1423. } else {
  1424. newsock->state = SS_CONNECTED;
  1425. sock_graft(connected, newsock);
  1426. if (vsock_msgzerocopy_allow(vconnected->transport))
  1427. set_bit(SOCK_SUPPORT_ZC,
  1428. &connected->sk_socket->flags);
  1429. }
  1430. release_sock(connected);
  1431. sock_put(connected);
  1432. }
  1433. out:
  1434. release_sock(listener);
  1435. return err;
  1436. }
  1437. static int vsock_listen(struct socket *sock, int backlog)
  1438. {
  1439. int err;
  1440. struct sock *sk;
  1441. struct vsock_sock *vsk;
  1442. sk = sock->sk;
  1443. lock_sock(sk);
  1444. if (!sock_type_connectible(sk->sk_type)) {
  1445. err = -EOPNOTSUPP;
  1446. goto out;
  1447. }
  1448. if (sock->state != SS_UNCONNECTED) {
  1449. err = -EINVAL;
  1450. goto out;
  1451. }
  1452. vsk = vsock_sk(sk);
  1453. if (!vsock_addr_bound(&vsk->local_addr)) {
  1454. err = -EINVAL;
  1455. goto out;
  1456. }
  1457. sk->sk_max_ack_backlog = backlog;
  1458. sk->sk_state = TCP_LISTEN;
  1459. err = 0;
  1460. out:
  1461. release_sock(sk);
  1462. return err;
  1463. }
  1464. static void vsock_update_buffer_size(struct vsock_sock *vsk,
  1465. const struct vsock_transport *transport,
  1466. u64 val)
  1467. {
  1468. if (val > vsk->buffer_max_size)
  1469. val = vsk->buffer_max_size;
  1470. if (val < vsk->buffer_min_size)
  1471. val = vsk->buffer_min_size;
  1472. if (val != vsk->buffer_size &&
  1473. transport && transport->notify_buffer_size)
  1474. transport->notify_buffer_size(vsk, &val);
  1475. vsk->buffer_size = val;
  1476. }
  1477. static int vsock_connectible_setsockopt(struct socket *sock,
  1478. int level,
  1479. int optname,
  1480. sockptr_t optval,
  1481. unsigned int optlen)
  1482. {
  1483. int err;
  1484. struct sock *sk;
  1485. struct vsock_sock *vsk;
  1486. const struct vsock_transport *transport;
  1487. u64 val;
  1488. if (level != AF_VSOCK && level != SOL_SOCKET)
  1489. return -ENOPROTOOPT;
  1490. #define COPY_IN(_v) \
  1491. do { \
  1492. if (optlen < sizeof(_v)) { \
  1493. err = -EINVAL; \
  1494. goto exit; \
  1495. } \
  1496. if (copy_from_sockptr(&_v, optval, sizeof(_v)) != 0) { \
  1497. err = -EFAULT; \
  1498. goto exit; \
  1499. } \
  1500. } while (0)
  1501. err = 0;
  1502. sk = sock->sk;
  1503. vsk = vsock_sk(sk);
  1504. lock_sock(sk);
  1505. transport = vsk->transport;
  1506. if (level == SOL_SOCKET) {
  1507. int zerocopy;
  1508. if (optname != SO_ZEROCOPY) {
  1509. release_sock(sk);
  1510. return sock_setsockopt(sock, level, optname, optval, optlen);
  1511. }
  1512. /* Use 'int' type here, because variable to
  1513. * set this option usually has this type.
  1514. */
  1515. COPY_IN(zerocopy);
  1516. if (zerocopy < 0 || zerocopy > 1) {
  1517. err = -EINVAL;
  1518. goto exit;
  1519. }
  1520. if (transport && !vsock_msgzerocopy_allow(transport)) {
  1521. err = -EOPNOTSUPP;
  1522. goto exit;
  1523. }
  1524. sock_valbool_flag(sk, SOCK_ZEROCOPY, zerocopy);
  1525. goto exit;
  1526. }
  1527. switch (optname) {
  1528. case SO_VM_SOCKETS_BUFFER_SIZE:
  1529. COPY_IN(val);
  1530. vsock_update_buffer_size(vsk, transport, val);
  1531. break;
  1532. case SO_VM_SOCKETS_BUFFER_MAX_SIZE:
  1533. COPY_IN(val);
  1534. vsk->buffer_max_size = val;
  1535. vsock_update_buffer_size(vsk, transport, vsk->buffer_size);
  1536. break;
  1537. case SO_VM_SOCKETS_BUFFER_MIN_SIZE:
  1538. COPY_IN(val);
  1539. vsk->buffer_min_size = val;
  1540. vsock_update_buffer_size(vsk, transport, vsk->buffer_size);
  1541. break;
  1542. case SO_VM_SOCKETS_CONNECT_TIMEOUT_NEW:
  1543. case SO_VM_SOCKETS_CONNECT_TIMEOUT_OLD: {
  1544. struct __kernel_sock_timeval tv;
  1545. err = sock_copy_user_timeval(&tv, optval, optlen,
  1546. optname == SO_VM_SOCKETS_CONNECT_TIMEOUT_OLD);
  1547. if (err)
  1548. break;
  1549. if (tv.tv_sec >= 0 && tv.tv_usec < USEC_PER_SEC &&
  1550. tv.tv_sec < (MAX_SCHEDULE_TIMEOUT / HZ - 1)) {
  1551. vsk->connect_timeout = tv.tv_sec * HZ +
  1552. DIV_ROUND_UP((unsigned long)tv.tv_usec, (USEC_PER_SEC / HZ));
  1553. if (vsk->connect_timeout == 0)
  1554. vsk->connect_timeout =
  1555. VSOCK_DEFAULT_CONNECT_TIMEOUT;
  1556. } else {
  1557. err = -ERANGE;
  1558. }
  1559. break;
  1560. }
  1561. default:
  1562. err = -ENOPROTOOPT;
  1563. break;
  1564. }
  1565. #undef COPY_IN
  1566. exit:
  1567. release_sock(sk);
  1568. return err;
  1569. }
  1570. static int vsock_connectible_getsockopt(struct socket *sock,
  1571. int level, int optname,
  1572. char __user *optval,
  1573. int __user *optlen)
  1574. {
  1575. struct sock *sk = sock->sk;
  1576. struct vsock_sock *vsk = vsock_sk(sk);
  1577. union {
  1578. u64 val64;
  1579. struct old_timeval32 tm32;
  1580. struct __kernel_old_timeval tm;
  1581. struct __kernel_sock_timeval stm;
  1582. } v;
  1583. int lv = sizeof(v.val64);
  1584. int len;
  1585. if (level != AF_VSOCK)
  1586. return -ENOPROTOOPT;
  1587. if (get_user(len, optlen))
  1588. return -EFAULT;
  1589. memset(&v, 0, sizeof(v));
  1590. switch (optname) {
  1591. case SO_VM_SOCKETS_BUFFER_SIZE:
  1592. v.val64 = vsk->buffer_size;
  1593. break;
  1594. case SO_VM_SOCKETS_BUFFER_MAX_SIZE:
  1595. v.val64 = vsk->buffer_max_size;
  1596. break;
  1597. case SO_VM_SOCKETS_BUFFER_MIN_SIZE:
  1598. v.val64 = vsk->buffer_min_size;
  1599. break;
  1600. case SO_VM_SOCKETS_CONNECT_TIMEOUT_NEW:
  1601. case SO_VM_SOCKETS_CONNECT_TIMEOUT_OLD:
  1602. lv = sock_get_timeout(vsk->connect_timeout, &v,
  1603. optname == SO_VM_SOCKETS_CONNECT_TIMEOUT_OLD);
  1604. break;
  1605. default:
  1606. return -ENOPROTOOPT;
  1607. }
  1608. if (len < lv)
  1609. return -EINVAL;
  1610. if (len > lv)
  1611. len = lv;
  1612. if (copy_to_user(optval, &v, len))
  1613. return -EFAULT;
  1614. if (put_user(len, optlen))
  1615. return -EFAULT;
  1616. return 0;
  1617. }
  1618. static int vsock_connectible_sendmsg(struct socket *sock, struct msghdr *msg,
  1619. size_t len)
  1620. {
  1621. struct sock *sk;
  1622. struct vsock_sock *vsk;
  1623. const struct vsock_transport *transport;
  1624. ssize_t total_written;
  1625. long timeout;
  1626. int err;
  1627. struct vsock_transport_send_notify_data send_data;
  1628. DEFINE_WAIT_FUNC(wait, woken_wake_function);
  1629. sk = sock->sk;
  1630. vsk = vsock_sk(sk);
  1631. total_written = 0;
  1632. err = 0;
  1633. if (msg->msg_flags & MSG_OOB)
  1634. return -EOPNOTSUPP;
  1635. lock_sock(sk);
  1636. transport = vsk->transport;
  1637. /* Callers should not provide a destination with connection oriented
  1638. * sockets.
  1639. */
  1640. if (msg->msg_namelen) {
  1641. err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
  1642. goto out;
  1643. }
  1644. /* Send data only if both sides are not shutdown in the direction. */
  1645. if (sk->sk_shutdown & SEND_SHUTDOWN ||
  1646. vsk->peer_shutdown & RCV_SHUTDOWN) {
  1647. err = -EPIPE;
  1648. goto out;
  1649. }
  1650. if (!transport || sk->sk_state != TCP_ESTABLISHED ||
  1651. !vsock_addr_bound(&vsk->local_addr)) {
  1652. err = -ENOTCONN;
  1653. goto out;
  1654. }
  1655. if (!vsock_addr_bound(&vsk->remote_addr)) {
  1656. err = -EDESTADDRREQ;
  1657. goto out;
  1658. }
  1659. if (msg->msg_flags & MSG_ZEROCOPY &&
  1660. !vsock_msgzerocopy_allow(transport)) {
  1661. err = -EOPNOTSUPP;
  1662. goto out;
  1663. }
  1664. /* Wait for room in the produce queue to enqueue our user's data. */
  1665. timeout = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
  1666. err = transport->notify_send_init(vsk, &send_data);
  1667. if (err < 0)
  1668. goto out;
  1669. while (total_written < len) {
  1670. ssize_t written;
  1671. add_wait_queue(sk_sleep(sk), &wait);
  1672. while (vsock_stream_has_space(vsk) == 0 &&
  1673. sk->sk_err == 0 &&
  1674. !(sk->sk_shutdown & SEND_SHUTDOWN) &&
  1675. !(vsk->peer_shutdown & RCV_SHUTDOWN)) {
  1676. /* Don't wait for non-blocking sockets. */
  1677. if (timeout == 0) {
  1678. err = -EAGAIN;
  1679. remove_wait_queue(sk_sleep(sk), &wait);
  1680. goto out_err;
  1681. }
  1682. err = transport->notify_send_pre_block(vsk, &send_data);
  1683. if (err < 0) {
  1684. remove_wait_queue(sk_sleep(sk), &wait);
  1685. goto out_err;
  1686. }
  1687. release_sock(sk);
  1688. timeout = wait_woken(&wait, TASK_INTERRUPTIBLE, timeout);
  1689. lock_sock(sk);
  1690. if (signal_pending(current)) {
  1691. err = sock_intr_errno(timeout);
  1692. remove_wait_queue(sk_sleep(sk), &wait);
  1693. goto out_err;
  1694. } else if (timeout == 0) {
  1695. err = -EAGAIN;
  1696. remove_wait_queue(sk_sleep(sk), &wait);
  1697. goto out_err;
  1698. }
  1699. }
  1700. remove_wait_queue(sk_sleep(sk), &wait);
  1701. /* These checks occur both as part of and after the loop
  1702. * conditional since we need to check before and after
  1703. * sleeping.
  1704. */
  1705. if (sk->sk_err) {
  1706. err = -sk->sk_err;
  1707. goto out_err;
  1708. } else if ((sk->sk_shutdown & SEND_SHUTDOWN) ||
  1709. (vsk->peer_shutdown & RCV_SHUTDOWN)) {
  1710. err = -EPIPE;
  1711. goto out_err;
  1712. }
  1713. err = transport->notify_send_pre_enqueue(vsk, &send_data);
  1714. if (err < 0)
  1715. goto out_err;
  1716. /* Note that enqueue will only write as many bytes as are free
  1717. * in the produce queue, so we don't need to ensure len is
  1718. * smaller than the queue size. It is the caller's
  1719. * responsibility to check how many bytes we were able to send.
  1720. */
  1721. if (sk->sk_type == SOCK_SEQPACKET) {
  1722. written = transport->seqpacket_enqueue(vsk,
  1723. msg, len - total_written);
  1724. } else {
  1725. written = transport->stream_enqueue(vsk,
  1726. msg, len - total_written);
  1727. }
  1728. if (written < 0) {
  1729. err = written;
  1730. goto out_err;
  1731. }
  1732. total_written += written;
  1733. err = transport->notify_send_post_enqueue(
  1734. vsk, written, &send_data);
  1735. if (err < 0)
  1736. goto out_err;
  1737. }
  1738. out_err:
  1739. if (total_written > 0) {
  1740. /* Return number of written bytes only if:
  1741. * 1) SOCK_STREAM socket.
  1742. * 2) SOCK_SEQPACKET socket when whole buffer is sent.
  1743. */
  1744. if (sk->sk_type == SOCK_STREAM || total_written == len)
  1745. err = total_written;
  1746. }
  1747. out:
  1748. if (sk->sk_type == SOCK_STREAM)
  1749. err = sk_stream_error(sk, msg->msg_flags, err);
  1750. release_sock(sk);
  1751. return err;
  1752. }
  1753. static int vsock_connectible_wait_data(struct sock *sk,
  1754. struct wait_queue_entry *wait,
  1755. long timeout,
  1756. struct vsock_transport_recv_notify_data *recv_data,
  1757. size_t target)
  1758. {
  1759. const struct vsock_transport *transport;
  1760. struct vsock_sock *vsk;
  1761. s64 data;
  1762. int err;
  1763. vsk = vsock_sk(sk);
  1764. err = 0;
  1765. transport = vsk->transport;
  1766. while (1) {
  1767. prepare_to_wait(sk_sleep(sk), wait, TASK_INTERRUPTIBLE);
  1768. data = vsock_connectible_has_data(vsk);
  1769. if (data != 0)
  1770. break;
  1771. if (sk->sk_err != 0 ||
  1772. (sk->sk_shutdown & RCV_SHUTDOWN) ||
  1773. (vsk->peer_shutdown & SEND_SHUTDOWN)) {
  1774. break;
  1775. }
  1776. /* Don't wait for non-blocking sockets. */
  1777. if (timeout == 0) {
  1778. err = -EAGAIN;
  1779. break;
  1780. }
  1781. if (recv_data) {
  1782. err = transport->notify_recv_pre_block(vsk, target, recv_data);
  1783. if (err < 0)
  1784. break;
  1785. }
  1786. release_sock(sk);
  1787. timeout = schedule_timeout(timeout);
  1788. lock_sock(sk);
  1789. if (signal_pending(current)) {
  1790. err = sock_intr_errno(timeout);
  1791. break;
  1792. } else if (timeout == 0) {
  1793. err = -EAGAIN;
  1794. break;
  1795. }
  1796. }
  1797. finish_wait(sk_sleep(sk), wait);
  1798. if (err)
  1799. return err;
  1800. /* Internal transport error when checking for available
  1801. * data. XXX This should be changed to a connection
  1802. * reset in a later change.
  1803. */
  1804. if (data < 0)
  1805. return -ENOMEM;
  1806. return data;
  1807. }
  1808. static int __vsock_stream_recvmsg(struct sock *sk, struct msghdr *msg,
  1809. size_t len, int flags)
  1810. {
  1811. struct vsock_transport_recv_notify_data recv_data;
  1812. const struct vsock_transport *transport;
  1813. struct vsock_sock *vsk;
  1814. ssize_t copied;
  1815. size_t target;
  1816. long timeout;
  1817. int err;
  1818. DEFINE_WAIT(wait);
  1819. vsk = vsock_sk(sk);
  1820. transport = vsk->transport;
  1821. /* We must not copy less than target bytes into the user's buffer
  1822. * before returning successfully, so we wait for the consume queue to
  1823. * have that much data to consume before dequeueing. Note that this
  1824. * makes it impossible to handle cases where target is greater than the
  1825. * queue size.
  1826. */
  1827. target = sock_rcvlowat(sk, flags & MSG_WAITALL, len);
  1828. if (target >= transport->stream_rcvhiwat(vsk)) {
  1829. err = -ENOMEM;
  1830. goto out;
  1831. }
  1832. timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
  1833. copied = 0;
  1834. err = transport->notify_recv_init(vsk, target, &recv_data);
  1835. if (err < 0)
  1836. goto out;
  1837. while (1) {
  1838. ssize_t read;
  1839. err = vsock_connectible_wait_data(sk, &wait, timeout,
  1840. &recv_data, target);
  1841. if (err <= 0)
  1842. break;
  1843. err = transport->notify_recv_pre_dequeue(vsk, target,
  1844. &recv_data);
  1845. if (err < 0)
  1846. break;
  1847. read = transport->stream_dequeue(vsk, msg, len - copied, flags);
  1848. if (read < 0) {
  1849. err = read;
  1850. break;
  1851. }
  1852. copied += read;
  1853. err = transport->notify_recv_post_dequeue(vsk, target, read,
  1854. !(flags & MSG_PEEK), &recv_data);
  1855. if (err < 0)
  1856. goto out;
  1857. if (read >= target || flags & MSG_PEEK)
  1858. break;
  1859. target -= read;
  1860. }
  1861. if (sk->sk_err)
  1862. err = -sk->sk_err;
  1863. else if (sk->sk_shutdown & RCV_SHUTDOWN)
  1864. err = 0;
  1865. if (copied > 0)
  1866. err = copied;
  1867. out:
  1868. return err;
  1869. }
  1870. static int __vsock_seqpacket_recvmsg(struct sock *sk, struct msghdr *msg,
  1871. size_t len, int flags)
  1872. {
  1873. const struct vsock_transport *transport;
  1874. struct vsock_sock *vsk;
  1875. ssize_t msg_len;
  1876. long timeout;
  1877. int err = 0;
  1878. DEFINE_WAIT(wait);
  1879. vsk = vsock_sk(sk);
  1880. transport = vsk->transport;
  1881. timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
  1882. err = vsock_connectible_wait_data(sk, &wait, timeout, NULL, 0);
  1883. if (err <= 0)
  1884. goto out;
  1885. msg_len = transport->seqpacket_dequeue(vsk, msg, flags);
  1886. if (msg_len < 0) {
  1887. err = msg_len;
  1888. goto out;
  1889. }
  1890. if (sk->sk_err) {
  1891. err = -sk->sk_err;
  1892. } else if (sk->sk_shutdown & RCV_SHUTDOWN) {
  1893. err = 0;
  1894. } else {
  1895. /* User sets MSG_TRUNC, so return real length of
  1896. * packet.
  1897. */
  1898. if (flags & MSG_TRUNC)
  1899. err = msg_len;
  1900. else
  1901. err = len - msg_data_left(msg);
  1902. /* Always set MSG_TRUNC if real length of packet is
  1903. * bigger than user's buffer.
  1904. */
  1905. if (msg_len > len)
  1906. msg->msg_flags |= MSG_TRUNC;
  1907. }
  1908. out:
  1909. return err;
  1910. }
  1911. int
  1912. __vsock_connectible_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
  1913. int flags)
  1914. {
  1915. struct sock *sk;
  1916. struct vsock_sock *vsk;
  1917. const struct vsock_transport *transport;
  1918. int err;
  1919. sk = sock->sk;
  1920. if (unlikely(flags & MSG_ERRQUEUE))
  1921. return sock_recv_errqueue(sk, msg, len, SOL_VSOCK, VSOCK_RECVERR);
  1922. vsk = vsock_sk(sk);
  1923. err = 0;
  1924. lock_sock(sk);
  1925. transport = vsk->transport;
  1926. if (!transport || sk->sk_state != TCP_ESTABLISHED) {
  1927. /* Recvmsg is supposed to return 0 if a peer performs an
  1928. * orderly shutdown. Differentiate between that case and when a
  1929. * peer has not connected or a local shutdown occurred with the
  1930. * SOCK_DONE flag.
  1931. */
  1932. if (sock_flag(sk, SOCK_DONE))
  1933. err = 0;
  1934. else
  1935. err = -ENOTCONN;
  1936. goto out;
  1937. }
  1938. if (flags & MSG_OOB) {
  1939. err = -EOPNOTSUPP;
  1940. goto out;
  1941. }
  1942. /* We don't check peer_shutdown flag here since peer may actually shut
  1943. * down, but there can be data in the queue that a local socket can
  1944. * receive.
  1945. */
  1946. if (sk->sk_shutdown & RCV_SHUTDOWN) {
  1947. err = 0;
  1948. goto out;
  1949. }
  1950. /* It is valid on Linux to pass in a zero-length receive buffer. This
  1951. * is not an error. We may as well bail out now.
  1952. */
  1953. if (!len) {
  1954. err = 0;
  1955. goto out;
  1956. }
  1957. if (sk->sk_type == SOCK_STREAM)
  1958. err = __vsock_stream_recvmsg(sk, msg, len, flags);
  1959. else
  1960. err = __vsock_seqpacket_recvmsg(sk, msg, len, flags);
  1961. out:
  1962. release_sock(sk);
  1963. return err;
  1964. }
  1965. int
  1966. vsock_connectible_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
  1967. int flags)
  1968. {
  1969. #ifdef CONFIG_BPF_SYSCALL
  1970. struct sock *sk = sock->sk;
  1971. const struct proto *prot;
  1972. prot = READ_ONCE(sk->sk_prot);
  1973. if (prot != &vsock_proto)
  1974. return prot->recvmsg(sk, msg, len, flags, NULL);
  1975. #endif
  1976. return __vsock_connectible_recvmsg(sock, msg, len, flags);
  1977. }
  1978. EXPORT_SYMBOL_GPL(vsock_connectible_recvmsg);
  1979. static int vsock_set_rcvlowat(struct sock *sk, int val)
  1980. {
  1981. const struct vsock_transport *transport;
  1982. struct vsock_sock *vsk;
  1983. vsk = vsock_sk(sk);
  1984. if (val > vsk->buffer_size)
  1985. return -EINVAL;
  1986. transport = vsk->transport;
  1987. if (transport && transport->notify_set_rcvlowat) {
  1988. int err;
  1989. err = transport->notify_set_rcvlowat(vsk, val);
  1990. if (err)
  1991. return err;
  1992. }
  1993. WRITE_ONCE(sk->sk_rcvlowat, val ? : 1);
  1994. return 0;
  1995. }
  1996. static const struct proto_ops vsock_stream_ops = {
  1997. .family = PF_VSOCK,
  1998. .owner = THIS_MODULE,
  1999. .release = vsock_release,
  2000. .bind = vsock_bind,
  2001. .connect = vsock_connect,
  2002. .socketpair = sock_no_socketpair,
  2003. .accept = vsock_accept,
  2004. .getname = vsock_getname,
  2005. .poll = vsock_poll,
  2006. .ioctl = vsock_ioctl,
  2007. .listen = vsock_listen,
  2008. .shutdown = vsock_shutdown,
  2009. .setsockopt = vsock_connectible_setsockopt,
  2010. .getsockopt = vsock_connectible_getsockopt,
  2011. .sendmsg = vsock_connectible_sendmsg,
  2012. .recvmsg = vsock_connectible_recvmsg,
  2013. .mmap = sock_no_mmap,
  2014. .set_rcvlowat = vsock_set_rcvlowat,
  2015. .read_skb = vsock_read_skb,
  2016. };
  2017. static const struct proto_ops vsock_seqpacket_ops = {
  2018. .family = PF_VSOCK,
  2019. .owner = THIS_MODULE,
  2020. .release = vsock_release,
  2021. .bind = vsock_bind,
  2022. .connect = vsock_connect,
  2023. .socketpair = sock_no_socketpair,
  2024. .accept = vsock_accept,
  2025. .getname = vsock_getname,
  2026. .poll = vsock_poll,
  2027. .ioctl = vsock_ioctl,
  2028. .listen = vsock_listen,
  2029. .shutdown = vsock_shutdown,
  2030. .setsockopt = vsock_connectible_setsockopt,
  2031. .getsockopt = vsock_connectible_getsockopt,
  2032. .sendmsg = vsock_connectible_sendmsg,
  2033. .recvmsg = vsock_connectible_recvmsg,
  2034. .mmap = sock_no_mmap,
  2035. .read_skb = vsock_read_skb,
  2036. };
  2037. static int vsock_create(struct net *net, struct socket *sock,
  2038. int protocol, int kern)
  2039. {
  2040. struct vsock_sock *vsk;
  2041. struct sock *sk;
  2042. int ret;
  2043. if (!sock)
  2044. return -EINVAL;
  2045. if (protocol && protocol != PF_VSOCK)
  2046. return -EPROTONOSUPPORT;
  2047. switch (sock->type) {
  2048. case SOCK_DGRAM:
  2049. sock->ops = &vsock_dgram_ops;
  2050. break;
  2051. case SOCK_STREAM:
  2052. sock->ops = &vsock_stream_ops;
  2053. break;
  2054. case SOCK_SEQPACKET:
  2055. sock->ops = &vsock_seqpacket_ops;
  2056. break;
  2057. default:
  2058. return -ESOCKTNOSUPPORT;
  2059. }
  2060. sock->state = SS_UNCONNECTED;
  2061. sk = __vsock_create(net, sock, NULL, GFP_KERNEL, 0, kern);
  2062. if (!sk)
  2063. return -ENOMEM;
  2064. vsk = vsock_sk(sk);
  2065. if (sock->type == SOCK_DGRAM) {
  2066. ret = vsock_assign_transport(vsk, NULL);
  2067. if (ret < 0) {
  2068. sock_put(sk);
  2069. return ret;
  2070. }
  2071. }
  2072. /* SOCK_DGRAM doesn't have 'setsockopt' callback set in its
  2073. * proto_ops, so there is no handler for custom logic.
  2074. */
  2075. if (sock_type_connectible(sock->type))
  2076. set_bit(SOCK_CUSTOM_SOCKOPT, &sk->sk_socket->flags);
  2077. vsock_insert_unbound(vsk);
  2078. return 0;
  2079. }
  2080. static const struct net_proto_family vsock_family_ops = {
  2081. .family = AF_VSOCK,
  2082. .create = vsock_create,
  2083. .owner = THIS_MODULE,
  2084. };
  2085. static long vsock_dev_do_ioctl(struct file *filp,
  2086. unsigned int cmd, void __user *ptr)
  2087. {
  2088. u32 __user *p = ptr;
  2089. int retval = 0;
  2090. u32 cid;
  2091. switch (cmd) {
  2092. case IOCTL_VM_SOCKETS_GET_LOCAL_CID:
  2093. /* To be compatible with the VMCI behavior, we prioritize the
  2094. * guest CID instead of well-know host CID (VMADDR_CID_HOST).
  2095. */
  2096. cid = vsock_registered_transport_cid(&transport_g2h);
  2097. if (cid == VMADDR_CID_ANY)
  2098. cid = vsock_registered_transport_cid(&transport_h2g);
  2099. if (cid == VMADDR_CID_ANY)
  2100. cid = vsock_registered_transport_cid(&transport_local);
  2101. if (put_user(cid, p) != 0)
  2102. retval = -EFAULT;
  2103. break;
  2104. default:
  2105. retval = -ENOIOCTLCMD;
  2106. }
  2107. return retval;
  2108. }
  2109. static long vsock_dev_ioctl(struct file *filp,
  2110. unsigned int cmd, unsigned long arg)
  2111. {
  2112. return vsock_dev_do_ioctl(filp, cmd, (void __user *)arg);
  2113. }
  2114. #ifdef CONFIG_COMPAT
  2115. static long vsock_dev_compat_ioctl(struct file *filp,
  2116. unsigned int cmd, unsigned long arg)
  2117. {
  2118. return vsock_dev_do_ioctl(filp, cmd, compat_ptr(arg));
  2119. }
  2120. #endif
  2121. static const struct file_operations vsock_device_ops = {
  2122. .owner = THIS_MODULE,
  2123. .unlocked_ioctl = vsock_dev_ioctl,
  2124. #ifdef CONFIG_COMPAT
  2125. .compat_ioctl = vsock_dev_compat_ioctl,
  2126. #endif
  2127. .open = nonseekable_open,
  2128. };
  2129. static struct miscdevice vsock_device = {
  2130. .name = "vsock",
  2131. .fops = &vsock_device_ops,
  2132. };
  2133. static int __init vsock_init(void)
  2134. {
  2135. int err = 0;
  2136. vsock_init_tables();
  2137. vsock_proto.owner = THIS_MODULE;
  2138. vsock_device.minor = MISC_DYNAMIC_MINOR;
  2139. err = misc_register(&vsock_device);
  2140. if (err) {
  2141. pr_err("Failed to register misc device\n");
  2142. goto err_reset_transport;
  2143. }
  2144. err = proto_register(&vsock_proto, 1); /* we want our slab */
  2145. if (err) {
  2146. pr_err("Cannot register vsock protocol\n");
  2147. goto err_deregister_misc;
  2148. }
  2149. err = sock_register(&vsock_family_ops);
  2150. if (err) {
  2151. pr_err("could not register af_vsock (%d) address family: %d\n",
  2152. AF_VSOCK, err);
  2153. goto err_unregister_proto;
  2154. }
  2155. vsock_bpf_build_proto();
  2156. return 0;
  2157. err_unregister_proto:
  2158. proto_unregister(&vsock_proto);
  2159. err_deregister_misc:
  2160. misc_deregister(&vsock_device);
  2161. err_reset_transport:
  2162. return err;
  2163. }
  2164. static void __exit vsock_exit(void)
  2165. {
  2166. misc_deregister(&vsock_device);
  2167. sock_unregister(AF_VSOCK);
  2168. proto_unregister(&vsock_proto);
  2169. }
  2170. const struct vsock_transport *vsock_core_get_transport(struct vsock_sock *vsk)
  2171. {
  2172. return vsk->transport;
  2173. }
  2174. EXPORT_SYMBOL_GPL(vsock_core_get_transport);
  2175. int vsock_core_register(const struct vsock_transport *t, int features)
  2176. {
  2177. const struct vsock_transport *t_h2g, *t_g2h, *t_dgram, *t_local;
  2178. int err = mutex_lock_interruptible(&vsock_register_mutex);
  2179. if (err)
  2180. return err;
  2181. t_h2g = transport_h2g;
  2182. t_g2h = transport_g2h;
  2183. t_dgram = transport_dgram;
  2184. t_local = transport_local;
  2185. if (features & VSOCK_TRANSPORT_F_H2G) {
  2186. if (t_h2g) {
  2187. err = -EBUSY;
  2188. goto err_busy;
  2189. }
  2190. t_h2g = t;
  2191. }
  2192. if (features & VSOCK_TRANSPORT_F_G2H) {
  2193. if (t_g2h) {
  2194. err = -EBUSY;
  2195. goto err_busy;
  2196. }
  2197. t_g2h = t;
  2198. }
  2199. if (features & VSOCK_TRANSPORT_F_DGRAM) {
  2200. if (t_dgram) {
  2201. err = -EBUSY;
  2202. goto err_busy;
  2203. }
  2204. t_dgram = t;
  2205. }
  2206. if (features & VSOCK_TRANSPORT_F_LOCAL) {
  2207. if (t_local) {
  2208. err = -EBUSY;
  2209. goto err_busy;
  2210. }
  2211. t_local = t;
  2212. }
  2213. transport_h2g = t_h2g;
  2214. transport_g2h = t_g2h;
  2215. transport_dgram = t_dgram;
  2216. transport_local = t_local;
  2217. err_busy:
  2218. mutex_unlock(&vsock_register_mutex);
  2219. return err;
  2220. }
  2221. EXPORT_SYMBOL_GPL(vsock_core_register);
  2222. void vsock_core_unregister(const struct vsock_transport *t)
  2223. {
  2224. mutex_lock(&vsock_register_mutex);
  2225. if (transport_h2g == t)
  2226. transport_h2g = NULL;
  2227. if (transport_g2h == t)
  2228. transport_g2h = NULL;
  2229. if (transport_dgram == t)
  2230. transport_dgram = NULL;
  2231. if (transport_local == t)
  2232. transport_local = NULL;
  2233. mutex_unlock(&vsock_register_mutex);
  2234. }
  2235. EXPORT_SYMBOL_GPL(vsock_core_unregister);
  2236. module_init(vsock_init);
  2237. module_exit(vsock_exit);
  2238. MODULE_AUTHOR("VMware, Inc.");
  2239. MODULE_DESCRIPTION("VMware Virtual Socket Family");
  2240. MODULE_VERSION("1.0.2.0-k");
  2241. MODULE_LICENSE("GPL v2");