af_smc.c 93 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365336633673368336933703371337233733374337533763377337833793380338133823383338433853386338733883389339033913392339333943395339633973398339934003401340234033404340534063407340834093410341134123413341434153416341734183419342034213422342334243425342634273428342934303431343234333434343534363437343834393440344134423443344434453446344734483449345034513452345334543455345634573458345934603461346234633464346534663467346834693470347134723473347434753476347734783479348034813482348334843485348634873488348934903491349234933494349534963497349834993500350135023503350435053506350735083509351035113512351335143515351635173518351935203521352235233524352535263527352835293530353135323533353435353536353735383539354035413542354335443545354635473548354935503551355235533554355535563557355835593560356135623563356435653566356735683569357035713572357335743575357635773578357935803581358235833584358535863587358835893590359135923593359435953596359735983599360036013602360336043605360636073608360936103611361236133614361536163617361836193620362136223623362436253626362736283629363036313632363336343635363636373638363936403641364236433644364536463647364836493650365136523653365436553656365736583659366036613662366336643665366636673668366936703671367236733674367536763677367836793680368136823683
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * Shared Memory Communications over RDMA (SMC-R) and RoCE
  4. *
  5. * AF_SMC protocol family socket handler keeping the AF_INET sock address type
  6. * applies to SOCK_STREAM sockets only
  7. * offers an alternative communication option for TCP-protocol sockets
  8. * applicable with RoCE-cards only
  9. *
  10. * Initial restrictions:
  11. * - support for alternate links postponed
  12. *
  13. * Copyright IBM Corp. 2016, 2018
  14. *
  15. * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com>
  16. * based on prototype from Frank Blaschka
  17. */
  18. #define KMSG_COMPONENT "smc"
  19. #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
  20. #include <linux/module.h>
  21. #include <linux/socket.h>
  22. #include <linux/workqueue.h>
  23. #include <linux/in.h>
  24. #include <linux/sched/signal.h>
  25. #include <linux/if_vlan.h>
  26. #include <linux/rcupdate_wait.h>
  27. #include <linux/ctype.h>
  28. #include <linux/splice.h>
  29. #include <net/sock.h>
  30. #include <net/inet_common.h>
  31. #if IS_ENABLED(CONFIG_IPV6)
  32. #include <net/ipv6.h>
  33. #endif
  34. #include <net/tcp.h>
  35. #include <net/smc.h>
  36. #include <asm/ioctls.h>
  37. #include <net/net_namespace.h>
  38. #include <net/netns/generic.h>
  39. #include "smc_netns.h"
  40. #include "smc.h"
  41. #include "smc_clc.h"
  42. #include "smc_llc.h"
  43. #include "smc_cdc.h"
  44. #include "smc_core.h"
  45. #include "smc_ib.h"
  46. #include "smc_ism.h"
  47. #include "smc_pnet.h"
  48. #include "smc_netlink.h"
  49. #include "smc_tx.h"
  50. #include "smc_rx.h"
  51. #include "smc_close.h"
  52. #include "smc_stats.h"
  53. #include "smc_tracepoint.h"
  54. #include "smc_sysctl.h"
  55. #include "smc_loopback.h"
  56. #include "smc_inet.h"
  57. static DEFINE_MUTEX(smc_server_lgr_pending); /* serialize link group
  58. * creation on server
  59. */
  60. static DEFINE_MUTEX(smc_client_lgr_pending); /* serialize link group
  61. * creation on client
  62. */
  63. static struct workqueue_struct *smc_tcp_ls_wq; /* wq for tcp listen work */
  64. struct workqueue_struct *smc_hs_wq; /* wq for handshake work */
  65. struct workqueue_struct *smc_close_wq; /* wq for close work */
  66. static void smc_tcp_listen_work(struct work_struct *);
  67. static void smc_connect_work(struct work_struct *);
  68. int smc_nl_dump_hs_limitation(struct sk_buff *skb, struct netlink_callback *cb)
  69. {
  70. struct smc_nl_dmp_ctx *cb_ctx = smc_nl_dmp_ctx(cb);
  71. void *hdr;
  72. if (cb_ctx->pos[0])
  73. goto out;
  74. hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
  75. &smc_gen_nl_family, NLM_F_MULTI,
  76. SMC_NETLINK_DUMP_HS_LIMITATION);
  77. if (!hdr)
  78. return -ENOMEM;
  79. if (nla_put_u8(skb, SMC_NLA_HS_LIMITATION_ENABLED,
  80. sock_net(skb->sk)->smc.limit_smc_hs))
  81. goto err;
  82. genlmsg_end(skb, hdr);
  83. cb_ctx->pos[0] = 1;
  84. out:
  85. return skb->len;
  86. err:
  87. genlmsg_cancel(skb, hdr);
  88. return -EMSGSIZE;
  89. }
  90. int smc_nl_enable_hs_limitation(struct sk_buff *skb, struct genl_info *info)
  91. {
  92. sock_net(skb->sk)->smc.limit_smc_hs = true;
  93. return 0;
  94. }
  95. int smc_nl_disable_hs_limitation(struct sk_buff *skb, struct genl_info *info)
  96. {
  97. sock_net(skb->sk)->smc.limit_smc_hs = false;
  98. return 0;
  99. }
  100. static void smc_set_keepalive(struct sock *sk, int val)
  101. {
  102. struct smc_sock *smc = smc_sk(sk);
  103. smc->clcsock->sk->sk_prot->keepalive(smc->clcsock->sk, val);
  104. }
  105. static struct sock *smc_tcp_syn_recv_sock(const struct sock *sk,
  106. struct sk_buff *skb,
  107. struct request_sock *req,
  108. struct dst_entry *dst,
  109. struct request_sock *req_unhash,
  110. bool *own_req)
  111. {
  112. struct smc_sock *smc;
  113. struct sock *child;
  114. smc = smc_clcsock_user_data(sk);
  115. if (READ_ONCE(sk->sk_ack_backlog) + atomic_read(&smc->queued_smc_hs) >
  116. sk->sk_max_ack_backlog)
  117. goto drop;
  118. if (sk_acceptq_is_full(&smc->sk)) {
  119. NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
  120. goto drop;
  121. }
  122. /* passthrough to original syn recv sock fct */
  123. child = smc->ori_af_ops->syn_recv_sock(sk, skb, req, dst, req_unhash,
  124. own_req);
  125. /* child must not inherit smc or its ops */
  126. if (child) {
  127. rcu_assign_sk_user_data(child, NULL);
  128. /* v4-mapped sockets don't inherit parent ops. Don't restore. */
  129. if (inet_csk(child)->icsk_af_ops == inet_csk(sk)->icsk_af_ops)
  130. inet_csk(child)->icsk_af_ops = smc->ori_af_ops;
  131. }
  132. return child;
  133. drop:
  134. dst_release(dst);
  135. tcp_listendrop(sk);
  136. return NULL;
  137. }
  138. static bool smc_hs_congested(const struct sock *sk)
  139. {
  140. const struct smc_sock *smc;
  141. smc = smc_clcsock_user_data(sk);
  142. if (!smc)
  143. return true;
  144. if (workqueue_congested(WORK_CPU_UNBOUND, smc_hs_wq))
  145. return true;
  146. return false;
  147. }
  148. struct smc_hashinfo smc_v4_hashinfo = {
  149. .lock = __RW_LOCK_UNLOCKED(smc_v4_hashinfo.lock),
  150. };
  151. struct smc_hashinfo smc_v6_hashinfo = {
  152. .lock = __RW_LOCK_UNLOCKED(smc_v6_hashinfo.lock),
  153. };
  154. int smc_hash_sk(struct sock *sk)
  155. {
  156. struct smc_hashinfo *h = sk->sk_prot->h.smc_hash;
  157. struct hlist_head *head;
  158. head = &h->ht;
  159. write_lock_bh(&h->lock);
  160. sk_add_node(sk, head);
  161. write_unlock_bh(&h->lock);
  162. sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
  163. return 0;
  164. }
  165. void smc_unhash_sk(struct sock *sk)
  166. {
  167. struct smc_hashinfo *h = sk->sk_prot->h.smc_hash;
  168. write_lock_bh(&h->lock);
  169. if (sk_del_node_init(sk))
  170. sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
  171. write_unlock_bh(&h->lock);
  172. }
  173. /* This will be called before user really release sock_lock. So do the
  174. * work which we didn't do because of user hold the sock_lock in the
  175. * BH context
  176. */
  177. void smc_release_cb(struct sock *sk)
  178. {
  179. struct smc_sock *smc = smc_sk(sk);
  180. if (smc->conn.tx_in_release_sock) {
  181. smc_tx_pending(&smc->conn);
  182. smc->conn.tx_in_release_sock = false;
  183. }
  184. }
  185. struct proto smc_proto = {
  186. .name = "SMC",
  187. .owner = THIS_MODULE,
  188. .keepalive = smc_set_keepalive,
  189. .hash = smc_hash_sk,
  190. .unhash = smc_unhash_sk,
  191. .release_cb = smc_release_cb,
  192. .obj_size = sizeof(struct smc_sock),
  193. .h.smc_hash = &smc_v4_hashinfo,
  194. .slab_flags = SLAB_TYPESAFE_BY_RCU,
  195. };
  196. EXPORT_SYMBOL_GPL(smc_proto);
  197. struct proto smc_proto6 = {
  198. .name = "SMC6",
  199. .owner = THIS_MODULE,
  200. .keepalive = smc_set_keepalive,
  201. .hash = smc_hash_sk,
  202. .unhash = smc_unhash_sk,
  203. .release_cb = smc_release_cb,
  204. .obj_size = sizeof(struct smc_sock),
  205. .h.smc_hash = &smc_v6_hashinfo,
  206. .slab_flags = SLAB_TYPESAFE_BY_RCU,
  207. };
  208. EXPORT_SYMBOL_GPL(smc_proto6);
  209. static void smc_fback_restore_callbacks(struct smc_sock *smc)
  210. {
  211. struct sock *clcsk = smc->clcsock->sk;
  212. write_lock_bh(&clcsk->sk_callback_lock);
  213. clcsk->sk_user_data = NULL;
  214. smc_clcsock_restore_cb(&clcsk->sk_state_change, &smc->clcsk_state_change);
  215. smc_clcsock_restore_cb(&clcsk->sk_data_ready, &smc->clcsk_data_ready);
  216. smc_clcsock_restore_cb(&clcsk->sk_write_space, &smc->clcsk_write_space);
  217. smc_clcsock_restore_cb(&clcsk->sk_error_report, &smc->clcsk_error_report);
  218. write_unlock_bh(&clcsk->sk_callback_lock);
  219. }
  220. static void smc_restore_fallback_changes(struct smc_sock *smc)
  221. {
  222. if (smc->clcsock->file) { /* non-accepted sockets have no file yet */
  223. smc->clcsock->file->private_data = smc->sk.sk_socket;
  224. smc->clcsock->file = NULL;
  225. smc_fback_restore_callbacks(smc);
  226. }
  227. }
  228. static int __smc_release(struct smc_sock *smc)
  229. {
  230. struct sock *sk = &smc->sk;
  231. int rc = 0;
  232. if (!smc->use_fallback) {
  233. rc = smc_close_active(smc);
  234. smc_sock_set_flag(sk, SOCK_DEAD);
  235. sk->sk_shutdown |= SHUTDOWN_MASK;
  236. } else {
  237. if (sk->sk_state != SMC_CLOSED) {
  238. if (sk->sk_state != SMC_LISTEN &&
  239. sk->sk_state != SMC_INIT)
  240. sock_put(sk); /* passive closing */
  241. if (sk->sk_state == SMC_LISTEN) {
  242. /* wake up clcsock accept */
  243. rc = kernel_sock_shutdown(smc->clcsock,
  244. SHUT_RDWR);
  245. }
  246. sk->sk_state = SMC_CLOSED;
  247. sk->sk_state_change(sk);
  248. }
  249. smc_restore_fallback_changes(smc);
  250. }
  251. sk->sk_prot->unhash(sk);
  252. if (sk->sk_state == SMC_CLOSED) {
  253. if (smc->clcsock) {
  254. release_sock(sk);
  255. smc_clcsock_release(smc);
  256. lock_sock(sk);
  257. }
  258. if (!smc->use_fallback)
  259. smc_conn_free(&smc->conn);
  260. }
  261. return rc;
  262. }
  263. int smc_release(struct socket *sock)
  264. {
  265. struct sock *sk = sock->sk;
  266. struct smc_sock *smc;
  267. int old_state, rc = 0;
  268. if (!sk)
  269. goto out;
  270. sock_hold(sk); /* sock_put below */
  271. smc = smc_sk(sk);
  272. old_state = sk->sk_state;
  273. /* cleanup for a dangling non-blocking connect */
  274. if (smc->connect_nonblock && old_state == SMC_INIT)
  275. tcp_abort(smc->clcsock->sk, ECONNABORTED);
  276. if (cancel_work_sync(&smc->connect_work))
  277. sock_put(&smc->sk); /* sock_hold in smc_connect for passive closing */
  278. if (sk->sk_state == SMC_LISTEN)
  279. /* smc_close_non_accepted() is called and acquires
  280. * sock lock for child sockets again
  281. */
  282. lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
  283. else
  284. lock_sock(sk);
  285. if (old_state == SMC_INIT && sk->sk_state == SMC_ACTIVE &&
  286. !smc->use_fallback)
  287. smc_close_active_abort(smc);
  288. rc = __smc_release(smc);
  289. /* detach socket */
  290. sock_orphan(sk);
  291. sock->sk = NULL;
  292. release_sock(sk);
  293. sock_put(sk); /* sock_hold above */
  294. sock_put(sk); /* final sock_put */
  295. out:
  296. return rc;
  297. }
  298. static void smc_destruct(struct sock *sk)
  299. {
  300. if (sk->sk_state != SMC_CLOSED)
  301. return;
  302. if (!sock_flag(sk, SOCK_DEAD))
  303. return;
  304. switch (sk->sk_family) {
  305. case AF_INET:
  306. inet_sock_destruct(sk);
  307. break;
  308. #if IS_ENABLED(CONFIG_IPV6)
  309. case AF_INET6:
  310. inet6_sock_destruct(sk);
  311. break;
  312. #endif
  313. }
  314. }
  315. static struct lock_class_key smc_key;
  316. static struct lock_class_key smc_slock_key;
  317. void smc_sk_init(struct net *net, struct sock *sk, int protocol)
  318. {
  319. struct smc_sock *smc = smc_sk(sk);
  320. sk->sk_state = SMC_INIT;
  321. sk->sk_destruct = smc_destruct;
  322. sk->sk_protocol = protocol;
  323. WRITE_ONCE(sk->sk_sndbuf, 2 * READ_ONCE(net->smc.sysctl_wmem));
  324. WRITE_ONCE(sk->sk_rcvbuf, 2 * READ_ONCE(net->smc.sysctl_rmem));
  325. INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work);
  326. INIT_WORK(&smc->connect_work, smc_connect_work);
  327. INIT_DELAYED_WORK(&smc->conn.tx_work, smc_tx_work);
  328. INIT_LIST_HEAD(&smc->accept_q);
  329. sock_lock_init_class_and_name(sk, "slock-AF_SMC", &smc_slock_key,
  330. "sk_lock-AF_SMC", &smc_key);
  331. spin_lock_init(&smc->accept_q_lock);
  332. spin_lock_init(&smc->conn.send_lock);
  333. sk->sk_prot->hash(sk);
  334. mutex_init(&smc->clcsock_release_lock);
  335. smc_init_saved_callbacks(smc);
  336. smc->limit_smc_hs = net->smc.limit_smc_hs;
  337. smc->use_fallback = false; /* assume rdma capability first */
  338. smc->fallback_rsn = 0;
  339. smc_close_init(smc);
  340. }
  341. static struct sock *smc_sock_alloc(struct net *net, struct socket *sock,
  342. int protocol)
  343. {
  344. struct proto *prot;
  345. struct sock *sk;
  346. prot = (protocol == SMCPROTO_SMC6) ? &smc_proto6 : &smc_proto;
  347. sk = sk_alloc(net, PF_SMC, GFP_KERNEL, prot, 0);
  348. if (!sk)
  349. return NULL;
  350. sock_init_data(sock, sk); /* sets sk_refcnt to 1 */
  351. smc_sk_init(net, sk, protocol);
  352. return sk;
  353. }
  354. int smc_bind(struct socket *sock, struct sockaddr *uaddr,
  355. int addr_len)
  356. {
  357. struct sockaddr_in *addr = (struct sockaddr_in *)uaddr;
  358. struct sock *sk = sock->sk;
  359. struct smc_sock *smc;
  360. int rc;
  361. smc = smc_sk(sk);
  362. /* replicate tests from inet_bind(), to be safe wrt. future changes */
  363. rc = -EINVAL;
  364. if (addr_len < sizeof(struct sockaddr_in))
  365. goto out;
  366. rc = -EAFNOSUPPORT;
  367. if (addr->sin_family != AF_INET &&
  368. addr->sin_family != AF_INET6 &&
  369. addr->sin_family != AF_UNSPEC)
  370. goto out;
  371. /* accept AF_UNSPEC (mapped to AF_INET) only if s_addr is INADDR_ANY */
  372. if (addr->sin_family == AF_UNSPEC &&
  373. addr->sin_addr.s_addr != htonl(INADDR_ANY))
  374. goto out;
  375. lock_sock(sk);
  376. /* Check if socket is already active */
  377. rc = -EINVAL;
  378. if (sk->sk_state != SMC_INIT || smc->connect_nonblock)
  379. goto out_rel;
  380. smc->clcsock->sk->sk_reuse = sk->sk_reuse;
  381. smc->clcsock->sk->sk_reuseport = sk->sk_reuseport;
  382. rc = kernel_bind(smc->clcsock, uaddr, addr_len);
  383. out_rel:
  384. release_sock(sk);
  385. out:
  386. return rc;
  387. }
  388. /* copy only relevant settings and flags of SOL_SOCKET level from smc to
  389. * clc socket (since smc is not called for these options from net/core)
  390. */
  391. #define SK_FLAGS_SMC_TO_CLC ((1UL << SOCK_URGINLINE) | \
  392. (1UL << SOCK_KEEPOPEN) | \
  393. (1UL << SOCK_LINGER) | \
  394. (1UL << SOCK_BROADCAST) | \
  395. (1UL << SOCK_TIMESTAMP) | \
  396. (1UL << SOCK_DBG) | \
  397. (1UL << SOCK_RCVTSTAMP) | \
  398. (1UL << SOCK_RCVTSTAMPNS) | \
  399. (1UL << SOCK_LOCALROUTE) | \
  400. (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE) | \
  401. (1UL << SOCK_RXQ_OVFL) | \
  402. (1UL << SOCK_WIFI_STATUS) | \
  403. (1UL << SOCK_NOFCS) | \
  404. (1UL << SOCK_FILTER_LOCKED) | \
  405. (1UL << SOCK_TSTAMP_NEW))
  406. /* if set, use value set by setsockopt() - else use IPv4 or SMC sysctl value */
  407. static void smc_adjust_sock_bufsizes(struct sock *nsk, struct sock *osk,
  408. unsigned long mask)
  409. {
  410. nsk->sk_userlocks = osk->sk_userlocks;
  411. if (osk->sk_userlocks & SOCK_SNDBUF_LOCK)
  412. nsk->sk_sndbuf = osk->sk_sndbuf;
  413. if (osk->sk_userlocks & SOCK_RCVBUF_LOCK)
  414. nsk->sk_rcvbuf = osk->sk_rcvbuf;
  415. }
  416. static void smc_copy_sock_settings(struct sock *nsk, struct sock *osk,
  417. unsigned long mask)
  418. {
  419. /* options we don't get control via setsockopt for */
  420. nsk->sk_type = osk->sk_type;
  421. nsk->sk_sndtimeo = osk->sk_sndtimeo;
  422. nsk->sk_rcvtimeo = osk->sk_rcvtimeo;
  423. nsk->sk_mark = READ_ONCE(osk->sk_mark);
  424. nsk->sk_priority = READ_ONCE(osk->sk_priority);
  425. nsk->sk_rcvlowat = osk->sk_rcvlowat;
  426. nsk->sk_bound_dev_if = osk->sk_bound_dev_if;
  427. nsk->sk_err = osk->sk_err;
  428. nsk->sk_flags &= ~mask;
  429. nsk->sk_flags |= osk->sk_flags & mask;
  430. smc_adjust_sock_bufsizes(nsk, osk, mask);
  431. }
  432. static void smc_copy_sock_settings_to_clc(struct smc_sock *smc)
  433. {
  434. smc_copy_sock_settings(smc->clcsock->sk, &smc->sk, SK_FLAGS_SMC_TO_CLC);
  435. }
  436. #define SK_FLAGS_CLC_TO_SMC ((1UL << SOCK_URGINLINE) | \
  437. (1UL << SOCK_KEEPOPEN) | \
  438. (1UL << SOCK_LINGER) | \
  439. (1UL << SOCK_DBG))
  440. /* copy only settings and flags relevant for smc from clc to smc socket */
  441. static void smc_copy_sock_settings_to_smc(struct smc_sock *smc)
  442. {
  443. smc_copy_sock_settings(&smc->sk, smc->clcsock->sk, SK_FLAGS_CLC_TO_SMC);
  444. }
  445. /* register the new vzalloced sndbuf on all links */
  446. static int smcr_lgr_reg_sndbufs(struct smc_link *link,
  447. struct smc_buf_desc *snd_desc)
  448. {
  449. struct smc_link_group *lgr = link->lgr;
  450. int i, rc = 0;
  451. if (!snd_desc->is_vm)
  452. return -EINVAL;
  453. /* protect against parallel smcr_link_reg_buf() */
  454. down_write(&lgr->llc_conf_mutex);
  455. for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
  456. if (!smc_link_active(&lgr->lnk[i]))
  457. continue;
  458. rc = smcr_link_reg_buf(&lgr->lnk[i], snd_desc);
  459. if (rc)
  460. break;
  461. }
  462. up_write(&lgr->llc_conf_mutex);
  463. return rc;
  464. }
  465. /* register the new rmb on all links */
  466. static int smcr_lgr_reg_rmbs(struct smc_link *link,
  467. struct smc_buf_desc *rmb_desc)
  468. {
  469. struct smc_link_group *lgr = link->lgr;
  470. bool do_slow = false;
  471. int i, rc = 0;
  472. rc = smc_llc_flow_initiate(lgr, SMC_LLC_FLOW_RKEY);
  473. if (rc)
  474. return rc;
  475. down_read(&lgr->llc_conf_mutex);
  476. for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
  477. if (!smc_link_active(&lgr->lnk[i]))
  478. continue;
  479. if (!rmb_desc->is_reg_mr[link->link_idx]) {
  480. up_read(&lgr->llc_conf_mutex);
  481. goto slow_path;
  482. }
  483. }
  484. /* mr register already */
  485. goto fast_path;
  486. slow_path:
  487. do_slow = true;
  488. /* protect against parallel smc_llc_cli_rkey_exchange() and
  489. * parallel smcr_link_reg_buf()
  490. */
  491. down_write(&lgr->llc_conf_mutex);
  492. for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
  493. if (!smc_link_active(&lgr->lnk[i]))
  494. continue;
  495. rc = smcr_link_reg_buf(&lgr->lnk[i], rmb_desc);
  496. if (rc)
  497. goto out;
  498. }
  499. fast_path:
  500. /* exchange confirm_rkey msg with peer */
  501. rc = smc_llc_do_confirm_rkey(link, rmb_desc);
  502. if (rc) {
  503. rc = -EFAULT;
  504. goto out;
  505. }
  506. rmb_desc->is_conf_rkey = true;
  507. out:
  508. do_slow ? up_write(&lgr->llc_conf_mutex) : up_read(&lgr->llc_conf_mutex);
  509. smc_llc_flow_stop(lgr, &lgr->llc_flow_lcl);
  510. return rc;
  511. }
  512. static int smcr_clnt_conf_first_link(struct smc_sock *smc)
  513. {
  514. struct smc_link *link = smc->conn.lnk;
  515. struct smc_llc_qentry *qentry;
  516. int rc;
  517. /* Receive CONFIRM LINK request from server over RoCE fabric.
  518. * Increasing the client's timeout by twice as much as the server's
  519. * timeout by default can temporarily avoid decline messages of
  520. * both sides crossing or colliding
  521. */
  522. qentry = smc_llc_wait(link->lgr, NULL, 2 * SMC_LLC_WAIT_TIME,
  523. SMC_LLC_CONFIRM_LINK);
  524. if (!qentry) {
  525. struct smc_clc_msg_decline dclc;
  526. rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
  527. SMC_CLC_DECLINE, CLC_WAIT_TIME_SHORT);
  528. return rc == -EAGAIN ? SMC_CLC_DECL_TIMEOUT_CL : rc;
  529. }
  530. smc_llc_save_peer_uid(qentry);
  531. rc = smc_llc_eval_conf_link(qentry, SMC_LLC_REQ);
  532. smc_llc_flow_qentry_del(&link->lgr->llc_flow_lcl);
  533. if (rc)
  534. return SMC_CLC_DECL_RMBE_EC;
  535. rc = smc_ib_modify_qp_rts(link);
  536. if (rc)
  537. return SMC_CLC_DECL_ERR_RDYLNK;
  538. smc_wr_remember_qp_attr(link);
  539. /* reg the sndbuf if it was vzalloced */
  540. if (smc->conn.sndbuf_desc->is_vm) {
  541. if (smcr_link_reg_buf(link, smc->conn.sndbuf_desc))
  542. return SMC_CLC_DECL_ERR_REGBUF;
  543. }
  544. /* reg the rmb */
  545. if (smcr_link_reg_buf(link, smc->conn.rmb_desc))
  546. return SMC_CLC_DECL_ERR_REGBUF;
  547. /* confirm_rkey is implicit on 1st contact */
  548. smc->conn.rmb_desc->is_conf_rkey = true;
  549. /* send CONFIRM LINK response over RoCE fabric */
  550. rc = smc_llc_send_confirm_link(link, SMC_LLC_RESP);
  551. if (rc < 0)
  552. return SMC_CLC_DECL_TIMEOUT_CL;
  553. smc_llc_link_active(link);
  554. smcr_lgr_set_type(link->lgr, SMC_LGR_SINGLE);
  555. if (link->lgr->max_links > 1) {
  556. /* optional 2nd link, receive ADD LINK request from server */
  557. qentry = smc_llc_wait(link->lgr, NULL, SMC_LLC_WAIT_TIME,
  558. SMC_LLC_ADD_LINK);
  559. if (!qentry) {
  560. struct smc_clc_msg_decline dclc;
  561. rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
  562. SMC_CLC_DECLINE, CLC_WAIT_TIME_SHORT);
  563. if (rc == -EAGAIN)
  564. rc = 0; /* no DECLINE received, go with one link */
  565. return rc;
  566. }
  567. smc_llc_flow_qentry_clr(&link->lgr->llc_flow_lcl);
  568. smc_llc_cli_add_link(link, qentry);
  569. }
  570. return 0;
  571. }
  572. static bool smc_isascii(char *hostname)
  573. {
  574. int i;
  575. for (i = 0; i < SMC_MAX_HOSTNAME_LEN; i++)
  576. if (!isascii(hostname[i]))
  577. return false;
  578. return true;
  579. }
  580. static void smc_conn_save_peer_info_fce(struct smc_sock *smc,
  581. struct smc_clc_msg_accept_confirm *clc)
  582. {
  583. struct smc_clc_first_contact_ext *fce;
  584. int clc_v2_len;
  585. if (clc->hdr.version == SMC_V1 ||
  586. !(clc->hdr.typev2 & SMC_FIRST_CONTACT_MASK))
  587. return;
  588. if (smc->conn.lgr->is_smcd) {
  589. memcpy(smc->conn.lgr->negotiated_eid, clc->d1.eid,
  590. SMC_MAX_EID_LEN);
  591. clc_v2_len = offsetofend(struct smc_clc_msg_accept_confirm, d1);
  592. } else {
  593. memcpy(smc->conn.lgr->negotiated_eid, clc->r1.eid,
  594. SMC_MAX_EID_LEN);
  595. clc_v2_len = offsetofend(struct smc_clc_msg_accept_confirm, r1);
  596. }
  597. fce = (struct smc_clc_first_contact_ext *)(((u8 *)clc) + clc_v2_len);
  598. smc->conn.lgr->peer_os = fce->os_type;
  599. smc->conn.lgr->peer_smc_release = fce->release;
  600. if (smc_isascii(fce->hostname))
  601. memcpy(smc->conn.lgr->peer_hostname, fce->hostname,
  602. SMC_MAX_HOSTNAME_LEN);
  603. }
  604. static void smcr_conn_save_peer_info(struct smc_sock *smc,
  605. struct smc_clc_msg_accept_confirm *clc)
  606. {
  607. int bufsize = smc_uncompress_bufsize(clc->r0.rmbe_size);
  608. smc->conn.peer_rmbe_idx = clc->r0.rmbe_idx;
  609. smc->conn.local_tx_ctrl.token = ntohl(clc->r0.rmbe_alert_token);
  610. smc->conn.peer_rmbe_size = bufsize;
  611. atomic_set(&smc->conn.peer_rmbe_space, smc->conn.peer_rmbe_size);
  612. smc->conn.tx_off = bufsize * (smc->conn.peer_rmbe_idx - 1);
  613. }
  614. static void smcd_conn_save_peer_info(struct smc_sock *smc,
  615. struct smc_clc_msg_accept_confirm *clc)
  616. {
  617. int bufsize = smc_uncompress_bufsize(clc->d0.dmbe_size);
  618. smc->conn.peer_rmbe_idx = clc->d0.dmbe_idx;
  619. smc->conn.peer_token = ntohll(clc->d0.token);
  620. /* msg header takes up space in the buffer */
  621. smc->conn.peer_rmbe_size = bufsize - sizeof(struct smcd_cdc_msg);
  622. atomic_set(&smc->conn.peer_rmbe_space, smc->conn.peer_rmbe_size);
  623. smc->conn.tx_off = bufsize * smc->conn.peer_rmbe_idx;
  624. }
  625. static void smc_conn_save_peer_info(struct smc_sock *smc,
  626. struct smc_clc_msg_accept_confirm *clc)
  627. {
  628. if (smc->conn.lgr->is_smcd)
  629. smcd_conn_save_peer_info(smc, clc);
  630. else
  631. smcr_conn_save_peer_info(smc, clc);
  632. smc_conn_save_peer_info_fce(smc, clc);
  633. }
  634. static void smc_link_save_peer_info(struct smc_link *link,
  635. struct smc_clc_msg_accept_confirm *clc,
  636. struct smc_init_info *ini)
  637. {
  638. link->peer_qpn = ntoh24(clc->r0.qpn);
  639. memcpy(link->peer_gid, ini->peer_gid, SMC_GID_SIZE);
  640. memcpy(link->peer_mac, ini->peer_mac, sizeof(link->peer_mac));
  641. link->peer_psn = ntoh24(clc->r0.psn);
  642. link->peer_mtu = clc->r0.qp_mtu;
  643. }
  644. static void smc_stat_inc_fback_rsn_cnt(struct smc_sock *smc,
  645. struct smc_stats_fback *fback_arr)
  646. {
  647. int cnt;
  648. for (cnt = 0; cnt < SMC_MAX_FBACK_RSN_CNT; cnt++) {
  649. if (fback_arr[cnt].fback_code == smc->fallback_rsn) {
  650. fback_arr[cnt].count++;
  651. break;
  652. }
  653. if (!fback_arr[cnt].fback_code) {
  654. fback_arr[cnt].fback_code = smc->fallback_rsn;
  655. fback_arr[cnt].count++;
  656. break;
  657. }
  658. }
  659. }
  660. static void smc_stat_fallback(struct smc_sock *smc)
  661. {
  662. struct net *net = sock_net(&smc->sk);
  663. mutex_lock(&net->smc.mutex_fback_rsn);
  664. if (smc->listen_smc) {
  665. smc_stat_inc_fback_rsn_cnt(smc, net->smc.fback_rsn->srv);
  666. net->smc.fback_rsn->srv_fback_cnt++;
  667. } else {
  668. smc_stat_inc_fback_rsn_cnt(smc, net->smc.fback_rsn->clnt);
  669. net->smc.fback_rsn->clnt_fback_cnt++;
  670. }
  671. mutex_unlock(&net->smc.mutex_fback_rsn);
  672. }
  673. /* must be called under rcu read lock */
  674. static void smc_fback_wakeup_waitqueue(struct smc_sock *smc, void *key)
  675. {
  676. struct socket_wq *wq;
  677. __poll_t flags;
  678. wq = rcu_dereference(smc->sk.sk_wq);
  679. if (!skwq_has_sleeper(wq))
  680. return;
  681. /* wake up smc sk->sk_wq */
  682. if (!key) {
  683. /* sk_state_change */
  684. wake_up_interruptible_all(&wq->wait);
  685. } else {
  686. flags = key_to_poll(key);
  687. if (flags & (EPOLLIN | EPOLLOUT))
  688. /* sk_data_ready or sk_write_space */
  689. wake_up_interruptible_sync_poll(&wq->wait, flags);
  690. else if (flags & EPOLLERR)
  691. /* sk_error_report */
  692. wake_up_interruptible_poll(&wq->wait, flags);
  693. }
  694. }
  695. static int smc_fback_mark_woken(wait_queue_entry_t *wait,
  696. unsigned int mode, int sync, void *key)
  697. {
  698. struct smc_mark_woken *mark =
  699. container_of(wait, struct smc_mark_woken, wait_entry);
  700. mark->woken = true;
  701. mark->key = key;
  702. return 0;
  703. }
  704. static void smc_fback_forward_wakeup(struct smc_sock *smc, struct sock *clcsk,
  705. void (*clcsock_callback)(struct sock *sk))
  706. {
  707. struct smc_mark_woken mark = { .woken = false };
  708. struct socket_wq *wq;
  709. init_waitqueue_func_entry(&mark.wait_entry,
  710. smc_fback_mark_woken);
  711. rcu_read_lock();
  712. wq = rcu_dereference(clcsk->sk_wq);
  713. if (!wq)
  714. goto out;
  715. add_wait_queue(sk_sleep(clcsk), &mark.wait_entry);
  716. clcsock_callback(clcsk);
  717. remove_wait_queue(sk_sleep(clcsk), &mark.wait_entry);
  718. if (mark.woken)
  719. smc_fback_wakeup_waitqueue(smc, mark.key);
  720. out:
  721. rcu_read_unlock();
  722. }
  723. static void smc_fback_state_change(struct sock *clcsk)
  724. {
  725. struct smc_sock *smc;
  726. read_lock_bh(&clcsk->sk_callback_lock);
  727. smc = smc_clcsock_user_data(clcsk);
  728. if (smc)
  729. smc_fback_forward_wakeup(smc, clcsk,
  730. smc->clcsk_state_change);
  731. read_unlock_bh(&clcsk->sk_callback_lock);
  732. }
  733. static void smc_fback_data_ready(struct sock *clcsk)
  734. {
  735. struct smc_sock *smc;
  736. read_lock_bh(&clcsk->sk_callback_lock);
  737. smc = smc_clcsock_user_data(clcsk);
  738. if (smc)
  739. smc_fback_forward_wakeup(smc, clcsk,
  740. smc->clcsk_data_ready);
  741. read_unlock_bh(&clcsk->sk_callback_lock);
  742. }
  743. static void smc_fback_write_space(struct sock *clcsk)
  744. {
  745. struct smc_sock *smc;
  746. read_lock_bh(&clcsk->sk_callback_lock);
  747. smc = smc_clcsock_user_data(clcsk);
  748. if (smc)
  749. smc_fback_forward_wakeup(smc, clcsk,
  750. smc->clcsk_write_space);
  751. read_unlock_bh(&clcsk->sk_callback_lock);
  752. }
  753. static void smc_fback_error_report(struct sock *clcsk)
  754. {
  755. struct smc_sock *smc;
  756. read_lock_bh(&clcsk->sk_callback_lock);
  757. smc = smc_clcsock_user_data(clcsk);
  758. if (smc)
  759. smc_fback_forward_wakeup(smc, clcsk,
  760. smc->clcsk_error_report);
  761. read_unlock_bh(&clcsk->sk_callback_lock);
  762. }
  763. static void smc_fback_replace_callbacks(struct smc_sock *smc)
  764. {
  765. struct sock *clcsk = smc->clcsock->sk;
  766. write_lock_bh(&clcsk->sk_callback_lock);
  767. clcsk->sk_user_data = (void *)((uintptr_t)smc | SK_USER_DATA_NOCOPY);
  768. smc_clcsock_replace_cb(&clcsk->sk_state_change, smc_fback_state_change,
  769. &smc->clcsk_state_change);
  770. smc_clcsock_replace_cb(&clcsk->sk_data_ready, smc_fback_data_ready,
  771. &smc->clcsk_data_ready);
  772. smc_clcsock_replace_cb(&clcsk->sk_write_space, smc_fback_write_space,
  773. &smc->clcsk_write_space);
  774. smc_clcsock_replace_cb(&clcsk->sk_error_report, smc_fback_error_report,
  775. &smc->clcsk_error_report);
  776. write_unlock_bh(&clcsk->sk_callback_lock);
  777. }
  778. static int smc_switch_to_fallback(struct smc_sock *smc, int reason_code)
  779. {
  780. int rc = 0;
  781. mutex_lock(&smc->clcsock_release_lock);
  782. if (!smc->clcsock) {
  783. rc = -EBADF;
  784. goto out;
  785. }
  786. smc->use_fallback = true;
  787. smc->fallback_rsn = reason_code;
  788. smc_stat_fallback(smc);
  789. trace_smc_switch_to_fallback(smc, reason_code);
  790. if (smc->sk.sk_socket && smc->sk.sk_socket->file) {
  791. smc->clcsock->file = smc->sk.sk_socket->file;
  792. smc->clcsock->file->private_data = smc->clcsock;
  793. smc->clcsock->wq.fasync_list =
  794. smc->sk.sk_socket->wq.fasync_list;
  795. smc->sk.sk_socket->wq.fasync_list = NULL;
  796. /* There might be some wait entries remaining
  797. * in smc sk->sk_wq and they should be woken up
  798. * as clcsock's wait queue is woken up.
  799. */
  800. smc_fback_replace_callbacks(smc);
  801. }
  802. out:
  803. mutex_unlock(&smc->clcsock_release_lock);
  804. return rc;
  805. }
  806. /* fall back during connect */
  807. static int smc_connect_fallback(struct smc_sock *smc, int reason_code)
  808. {
  809. struct net *net = sock_net(&smc->sk);
  810. int rc = 0;
  811. rc = smc_switch_to_fallback(smc, reason_code);
  812. if (rc) { /* fallback fails */
  813. this_cpu_inc(net->smc.smc_stats->clnt_hshake_err_cnt);
  814. if (smc->sk.sk_state == SMC_INIT)
  815. sock_put(&smc->sk); /* passive closing */
  816. return rc;
  817. }
  818. smc_copy_sock_settings_to_clc(smc);
  819. smc->connect_nonblock = 0;
  820. if (smc->sk.sk_state == SMC_INIT)
  821. smc->sk.sk_state = SMC_ACTIVE;
  822. return 0;
  823. }
  824. /* decline and fall back during connect */
  825. static int smc_connect_decline_fallback(struct smc_sock *smc, int reason_code,
  826. u8 version)
  827. {
  828. struct net *net = sock_net(&smc->sk);
  829. int rc;
  830. if (reason_code < 0) { /* error, fallback is not possible */
  831. this_cpu_inc(net->smc.smc_stats->clnt_hshake_err_cnt);
  832. if (smc->sk.sk_state == SMC_INIT)
  833. sock_put(&smc->sk); /* passive closing */
  834. return reason_code;
  835. }
  836. if (reason_code != SMC_CLC_DECL_PEERDECL) {
  837. rc = smc_clc_send_decline(smc, reason_code, version);
  838. if (rc < 0) {
  839. this_cpu_inc(net->smc.smc_stats->clnt_hshake_err_cnt);
  840. if (smc->sk.sk_state == SMC_INIT)
  841. sock_put(&smc->sk); /* passive closing */
  842. return rc;
  843. }
  844. }
  845. return smc_connect_fallback(smc, reason_code);
  846. }
  847. static void smc_conn_abort(struct smc_sock *smc, int local_first)
  848. {
  849. struct smc_connection *conn = &smc->conn;
  850. struct smc_link_group *lgr = conn->lgr;
  851. bool lgr_valid = false;
  852. if (smc_conn_lgr_valid(conn))
  853. lgr_valid = true;
  854. smc_conn_free(conn);
  855. if (local_first && lgr_valid)
  856. smc_lgr_cleanup_early(lgr);
  857. }
  858. /* check if there is a rdma device available for this connection. */
  859. /* called for connect and listen */
  860. static int smc_find_rdma_device(struct smc_sock *smc, struct smc_init_info *ini)
  861. {
  862. /* PNET table look up: search active ib_device and port
  863. * within same PNETID that also contains the ethernet device
  864. * used for the internal TCP socket
  865. */
  866. smc_pnet_find_roce_resource(smc->clcsock->sk, ini);
  867. if (!ini->check_smcrv2 && !ini->ib_dev)
  868. return SMC_CLC_DECL_NOSMCRDEV;
  869. if (ini->check_smcrv2 && !ini->smcrv2.ib_dev_v2)
  870. return SMC_CLC_DECL_NOSMCRDEV;
  871. return 0;
  872. }
  873. /* check if there is an ISM device available for this connection. */
  874. /* called for connect and listen */
  875. static int smc_find_ism_device(struct smc_sock *smc, struct smc_init_info *ini)
  876. {
  877. /* Find ISM device with same PNETID as connecting interface */
  878. smc_pnet_find_ism_resource(smc->clcsock->sk, ini);
  879. if (!ini->ism_dev[0])
  880. return SMC_CLC_DECL_NOSMCDDEV;
  881. else
  882. ini->ism_chid[0] = smc_ism_get_chid(ini->ism_dev[0]);
  883. return 0;
  884. }
  885. /* is chid unique for the ism devices that are already determined? */
  886. static bool smc_find_ism_v2_is_unique_chid(u16 chid, struct smc_init_info *ini,
  887. int cnt)
  888. {
  889. int i = (!ini->ism_dev[0]) ? 1 : 0;
  890. for (; i < cnt; i++)
  891. if (ini->ism_chid[i] == chid)
  892. return false;
  893. return true;
  894. }
  895. /* determine possible V2 ISM devices (either without PNETID or with PNETID plus
  896. * PNETID matching net_device)
  897. */
  898. static int smc_find_ism_v2_device_clnt(struct smc_sock *smc,
  899. struct smc_init_info *ini)
  900. {
  901. int rc = SMC_CLC_DECL_NOSMCDDEV;
  902. struct smcd_dev *smcd;
  903. int i = 1, entry = 1;
  904. bool is_emulated;
  905. u16 chid;
  906. if (smcd_indicated(ini->smc_type_v1))
  907. rc = 0; /* already initialized for V1 */
  908. mutex_lock(&smcd_dev_list.mutex);
  909. list_for_each_entry(smcd, &smcd_dev_list.list, list) {
  910. if (smcd->going_away || smcd == ini->ism_dev[0])
  911. continue;
  912. chid = smc_ism_get_chid(smcd);
  913. if (!smc_find_ism_v2_is_unique_chid(chid, ini, i))
  914. continue;
  915. is_emulated = __smc_ism_is_emulated(chid);
  916. if (!smc_pnet_is_pnetid_set(smcd->pnetid) ||
  917. smc_pnet_is_ndev_pnetid(sock_net(&smc->sk), smcd->pnetid)) {
  918. if (is_emulated && entry == SMCD_CLC_MAX_V2_GID_ENTRIES)
  919. /* It's the last GID-CHID entry left in CLC
  920. * Proposal SMC-Dv2 extension, but an Emulated-
  921. * ISM device will take two entries. So give
  922. * up it and try the next potential ISM device.
  923. */
  924. continue;
  925. ini->ism_dev[i] = smcd;
  926. ini->ism_chid[i] = chid;
  927. ini->is_smcd = true;
  928. rc = 0;
  929. i++;
  930. entry = is_emulated ? entry + 2 : entry + 1;
  931. if (entry > SMCD_CLC_MAX_V2_GID_ENTRIES)
  932. break;
  933. }
  934. }
  935. mutex_unlock(&smcd_dev_list.mutex);
  936. ini->ism_offered_cnt = i - 1;
  937. if (!ini->ism_dev[0] && !ini->ism_dev[1])
  938. ini->smcd_version = 0;
  939. return rc;
  940. }
  941. /* Check for VLAN ID and register it on ISM device just for CLC handshake */
  942. static int smc_connect_ism_vlan_setup(struct smc_sock *smc,
  943. struct smc_init_info *ini)
  944. {
  945. if (ini->vlan_id && smc_ism_get_vlan(ini->ism_dev[0], ini->vlan_id))
  946. return SMC_CLC_DECL_ISMVLANERR;
  947. return 0;
  948. }
  949. static int smc_find_proposal_devices(struct smc_sock *smc,
  950. struct smc_init_info *ini)
  951. {
  952. int rc = 0;
  953. /* check if there is an ism device available */
  954. if (!(ini->smcd_version & SMC_V1) ||
  955. smc_find_ism_device(smc, ini) ||
  956. smc_connect_ism_vlan_setup(smc, ini))
  957. ini->smcd_version &= ~SMC_V1;
  958. /* else ISM V1 is supported for this connection */
  959. /* check if there is an rdma device available */
  960. if (!(ini->smcr_version & SMC_V1) ||
  961. smc_find_rdma_device(smc, ini))
  962. ini->smcr_version &= ~SMC_V1;
  963. /* else RDMA is supported for this connection */
  964. ini->smc_type_v1 = smc_indicated_type(ini->smcd_version & SMC_V1,
  965. ini->smcr_version & SMC_V1);
  966. /* check if there is an ism v2 device available */
  967. if (!(ini->smcd_version & SMC_V2) ||
  968. !smc_ism_is_v2_capable() ||
  969. smc_find_ism_v2_device_clnt(smc, ini))
  970. ini->smcd_version &= ~SMC_V2;
  971. /* check if there is an rdma v2 device available */
  972. ini->check_smcrv2 = true;
  973. ini->smcrv2.saddr = smc->clcsock->sk->sk_rcv_saddr;
  974. if (!(ini->smcr_version & SMC_V2) ||
  975. smc->clcsock->sk->sk_family != AF_INET ||
  976. !smc_clc_ueid_count() ||
  977. smc_find_rdma_device(smc, ini))
  978. ini->smcr_version &= ~SMC_V2;
  979. ini->check_smcrv2 = false;
  980. ini->smc_type_v2 = smc_indicated_type(ini->smcd_version & SMC_V2,
  981. ini->smcr_version & SMC_V2);
  982. /* if neither ISM nor RDMA are supported, fallback */
  983. if (ini->smc_type_v1 == SMC_TYPE_N && ini->smc_type_v2 == SMC_TYPE_N)
  984. rc = SMC_CLC_DECL_NOSMCDEV;
  985. return rc;
  986. }
  987. /* cleanup temporary VLAN ID registration used for CLC handshake. If ISM is
  988. * used, the VLAN ID will be registered again during the connection setup.
  989. */
  990. static int smc_connect_ism_vlan_cleanup(struct smc_sock *smc,
  991. struct smc_init_info *ini)
  992. {
  993. if (!smcd_indicated(ini->smc_type_v1))
  994. return 0;
  995. if (ini->vlan_id && smc_ism_put_vlan(ini->ism_dev[0], ini->vlan_id))
  996. return SMC_CLC_DECL_CNFERR;
  997. return 0;
  998. }
  999. #define SMC_CLC_MAX_ACCEPT_LEN \
  1000. (sizeof(struct smc_clc_msg_accept_confirm) + \
  1001. sizeof(struct smc_clc_first_contact_ext_v2x) + \
  1002. sizeof(struct smc_clc_msg_trail))
  1003. /* CLC handshake during connect */
  1004. static int smc_connect_clc(struct smc_sock *smc,
  1005. struct smc_clc_msg_accept_confirm *aclc,
  1006. struct smc_init_info *ini)
  1007. {
  1008. int rc = 0;
  1009. /* do inband token exchange */
  1010. rc = smc_clc_send_proposal(smc, ini);
  1011. if (rc)
  1012. return rc;
  1013. /* receive SMC Accept CLC message */
  1014. return smc_clc_wait_msg(smc, aclc, SMC_CLC_MAX_ACCEPT_LEN,
  1015. SMC_CLC_ACCEPT, CLC_WAIT_TIME);
  1016. }
  1017. void smc_fill_gid_list(struct smc_link_group *lgr,
  1018. struct smc_gidlist *gidlist,
  1019. struct smc_ib_device *known_dev, u8 *known_gid)
  1020. {
  1021. struct smc_init_info *alt_ini = NULL;
  1022. memset(gidlist, 0, sizeof(*gidlist));
  1023. memcpy(gidlist->list[gidlist->len++], known_gid, SMC_GID_SIZE);
  1024. alt_ini = kzalloc(sizeof(*alt_ini), GFP_KERNEL);
  1025. if (!alt_ini)
  1026. goto out;
  1027. alt_ini->vlan_id = lgr->vlan_id;
  1028. alt_ini->check_smcrv2 = true;
  1029. alt_ini->smcrv2.saddr = lgr->saddr;
  1030. smc_pnet_find_alt_roce(lgr, alt_ini, known_dev);
  1031. if (!alt_ini->smcrv2.ib_dev_v2)
  1032. goto out;
  1033. memcpy(gidlist->list[gidlist->len++], alt_ini->smcrv2.ib_gid_v2,
  1034. SMC_GID_SIZE);
  1035. out:
  1036. kfree(alt_ini);
  1037. }
  1038. static int smc_connect_rdma_v2_prepare(struct smc_sock *smc,
  1039. struct smc_clc_msg_accept_confirm *aclc,
  1040. struct smc_init_info *ini)
  1041. {
  1042. struct smc_clc_first_contact_ext *fce =
  1043. smc_get_clc_first_contact_ext(aclc, false);
  1044. struct net *net = sock_net(&smc->sk);
  1045. int rc;
  1046. if (!ini->first_contact_peer || aclc->hdr.version == SMC_V1)
  1047. return 0;
  1048. if (fce->v2_direct) {
  1049. memcpy(ini->smcrv2.nexthop_mac, &aclc->r0.lcl.mac, ETH_ALEN);
  1050. ini->smcrv2.uses_gateway = false;
  1051. } else {
  1052. if (smc_ib_find_route(net, smc->clcsock->sk->sk_rcv_saddr,
  1053. smc_ib_gid_to_ipv4(aclc->r0.lcl.gid),
  1054. ini->smcrv2.nexthop_mac,
  1055. &ini->smcrv2.uses_gateway))
  1056. return SMC_CLC_DECL_NOROUTE;
  1057. if (!ini->smcrv2.uses_gateway) {
  1058. /* mismatch: peer claims indirect, but its direct */
  1059. return SMC_CLC_DECL_NOINDIRECT;
  1060. }
  1061. }
  1062. ini->release_nr = fce->release;
  1063. rc = smc_clc_clnt_v2x_features_validate(fce, ini);
  1064. if (rc)
  1065. return rc;
  1066. return 0;
  1067. }
  1068. /* setup for RDMA connection of client */
  1069. static int smc_connect_rdma(struct smc_sock *smc,
  1070. struct smc_clc_msg_accept_confirm *aclc,
  1071. struct smc_init_info *ini)
  1072. {
  1073. int i, reason_code = 0;
  1074. struct smc_link *link;
  1075. u8 *eid = NULL;
  1076. ini->is_smcd = false;
  1077. ini->ib_clcqpn = ntoh24(aclc->r0.qpn);
  1078. ini->first_contact_peer = aclc->hdr.typev2 & SMC_FIRST_CONTACT_MASK;
  1079. memcpy(ini->peer_systemid, aclc->r0.lcl.id_for_peer, SMC_SYSTEMID_LEN);
  1080. memcpy(ini->peer_gid, aclc->r0.lcl.gid, SMC_GID_SIZE);
  1081. memcpy(ini->peer_mac, aclc->r0.lcl.mac, ETH_ALEN);
  1082. ini->max_conns = SMC_CONN_PER_LGR_MAX;
  1083. ini->max_links = SMC_LINKS_ADD_LNK_MAX;
  1084. reason_code = smc_connect_rdma_v2_prepare(smc, aclc, ini);
  1085. if (reason_code)
  1086. return reason_code;
  1087. mutex_lock(&smc_client_lgr_pending);
  1088. reason_code = smc_conn_create(smc, ini);
  1089. if (reason_code) {
  1090. mutex_unlock(&smc_client_lgr_pending);
  1091. return reason_code;
  1092. }
  1093. smc_conn_save_peer_info(smc, aclc);
  1094. if (ini->first_contact_local) {
  1095. link = smc->conn.lnk;
  1096. } else {
  1097. /* set link that was assigned by server */
  1098. link = NULL;
  1099. for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
  1100. struct smc_link *l = &smc->conn.lgr->lnk[i];
  1101. if (l->peer_qpn == ntoh24(aclc->r0.qpn) &&
  1102. !memcmp(l->peer_gid, &aclc->r0.lcl.gid,
  1103. SMC_GID_SIZE) &&
  1104. (aclc->hdr.version > SMC_V1 ||
  1105. !memcmp(l->peer_mac, &aclc->r0.lcl.mac,
  1106. sizeof(l->peer_mac)))) {
  1107. link = l;
  1108. break;
  1109. }
  1110. }
  1111. if (!link) {
  1112. reason_code = SMC_CLC_DECL_NOSRVLINK;
  1113. goto connect_abort;
  1114. }
  1115. smc_switch_link_and_count(&smc->conn, link);
  1116. }
  1117. /* create send buffer and rmb */
  1118. if (smc_buf_create(smc, false)) {
  1119. reason_code = SMC_CLC_DECL_MEM;
  1120. goto connect_abort;
  1121. }
  1122. if (ini->first_contact_local)
  1123. smc_link_save_peer_info(link, aclc, ini);
  1124. if (smc_rmb_rtoken_handling(&smc->conn, link, aclc)) {
  1125. reason_code = SMC_CLC_DECL_ERR_RTOK;
  1126. goto connect_abort;
  1127. }
  1128. smc_rx_init(smc);
  1129. if (ini->first_contact_local) {
  1130. if (smc_ib_ready_link(link)) {
  1131. reason_code = SMC_CLC_DECL_ERR_RDYLNK;
  1132. goto connect_abort;
  1133. }
  1134. } else {
  1135. /* reg sendbufs if they were vzalloced */
  1136. if (smc->conn.sndbuf_desc->is_vm) {
  1137. if (smcr_lgr_reg_sndbufs(link, smc->conn.sndbuf_desc)) {
  1138. reason_code = SMC_CLC_DECL_ERR_REGBUF;
  1139. goto connect_abort;
  1140. }
  1141. }
  1142. if (smcr_lgr_reg_rmbs(link, smc->conn.rmb_desc)) {
  1143. reason_code = SMC_CLC_DECL_ERR_REGBUF;
  1144. goto connect_abort;
  1145. }
  1146. }
  1147. if (aclc->hdr.version > SMC_V1) {
  1148. eid = aclc->r1.eid;
  1149. if (ini->first_contact_local)
  1150. smc_fill_gid_list(link->lgr, &ini->smcrv2.gidlist,
  1151. link->smcibdev, link->gid);
  1152. }
  1153. reason_code = smc_clc_send_confirm(smc, ini->first_contact_local,
  1154. aclc->hdr.version, eid, ini);
  1155. if (reason_code)
  1156. goto connect_abort;
  1157. smc_tx_init(smc);
  1158. if (ini->first_contact_local) {
  1159. /* QP confirmation over RoCE fabric */
  1160. smc_llc_flow_initiate(link->lgr, SMC_LLC_FLOW_ADD_LINK);
  1161. reason_code = smcr_clnt_conf_first_link(smc);
  1162. smc_llc_flow_stop(link->lgr, &link->lgr->llc_flow_lcl);
  1163. if (reason_code)
  1164. goto connect_abort;
  1165. }
  1166. mutex_unlock(&smc_client_lgr_pending);
  1167. smc_copy_sock_settings_to_clc(smc);
  1168. smc->connect_nonblock = 0;
  1169. if (smc->sk.sk_state == SMC_INIT)
  1170. smc->sk.sk_state = SMC_ACTIVE;
  1171. return 0;
  1172. connect_abort:
  1173. smc_conn_abort(smc, ini->first_contact_local);
  1174. mutex_unlock(&smc_client_lgr_pending);
  1175. smc->connect_nonblock = 0;
  1176. return reason_code;
  1177. }
  1178. /* The server has chosen one of the proposed ISM devices for the communication.
  1179. * Determine from the CHID of the received CLC ACCEPT the ISM device chosen.
  1180. */
  1181. static int
  1182. smc_v2_determine_accepted_chid(struct smc_clc_msg_accept_confirm *aclc,
  1183. struct smc_init_info *ini)
  1184. {
  1185. int i;
  1186. for (i = 0; i < ini->ism_offered_cnt + 1; i++) {
  1187. if (ini->ism_chid[i] == ntohs(aclc->d1.chid)) {
  1188. ini->ism_selected = i;
  1189. return 0;
  1190. }
  1191. }
  1192. return -EPROTO;
  1193. }
  1194. /* setup for ISM connection of client */
  1195. static int smc_connect_ism(struct smc_sock *smc,
  1196. struct smc_clc_msg_accept_confirm *aclc,
  1197. struct smc_init_info *ini)
  1198. {
  1199. u8 *eid = NULL;
  1200. int rc = 0;
  1201. ini->is_smcd = true;
  1202. ini->first_contact_peer = aclc->hdr.typev2 & SMC_FIRST_CONTACT_MASK;
  1203. if (aclc->hdr.version == SMC_V2) {
  1204. if (ini->first_contact_peer) {
  1205. struct smc_clc_first_contact_ext *fce =
  1206. smc_get_clc_first_contact_ext(aclc, true);
  1207. ini->release_nr = fce->release;
  1208. rc = smc_clc_clnt_v2x_features_validate(fce, ini);
  1209. if (rc)
  1210. return rc;
  1211. }
  1212. rc = smc_v2_determine_accepted_chid(aclc, ini);
  1213. if (rc)
  1214. return rc;
  1215. if (__smc_ism_is_emulated(ini->ism_chid[ini->ism_selected]))
  1216. ini->ism_peer_gid[ini->ism_selected].gid_ext =
  1217. ntohll(aclc->d1.gid_ext);
  1218. /* for non-Emulated-ISM devices, peer gid_ext remains 0. */
  1219. }
  1220. ini->ism_peer_gid[ini->ism_selected].gid = ntohll(aclc->d0.gid);
  1221. /* there is only one lgr role for SMC-D; use server lock */
  1222. mutex_lock(&smc_server_lgr_pending);
  1223. rc = smc_conn_create(smc, ini);
  1224. if (rc) {
  1225. mutex_unlock(&smc_server_lgr_pending);
  1226. return rc;
  1227. }
  1228. /* Create send and receive buffers */
  1229. rc = smc_buf_create(smc, true);
  1230. if (rc) {
  1231. rc = (rc == -ENOSPC) ? SMC_CLC_DECL_MAX_DMB : SMC_CLC_DECL_MEM;
  1232. goto connect_abort;
  1233. }
  1234. smc_conn_save_peer_info(smc, aclc);
  1235. if (smc_ism_support_dmb_nocopy(smc->conn.lgr->smcd)) {
  1236. rc = smcd_buf_attach(smc);
  1237. if (rc) {
  1238. rc = SMC_CLC_DECL_MEM; /* try to fallback */
  1239. goto connect_abort;
  1240. }
  1241. }
  1242. smc_rx_init(smc);
  1243. smc_tx_init(smc);
  1244. if (aclc->hdr.version > SMC_V1)
  1245. eid = aclc->d1.eid;
  1246. rc = smc_clc_send_confirm(smc, ini->first_contact_local,
  1247. aclc->hdr.version, eid, ini);
  1248. if (rc)
  1249. goto connect_abort;
  1250. mutex_unlock(&smc_server_lgr_pending);
  1251. smc_copy_sock_settings_to_clc(smc);
  1252. smc->connect_nonblock = 0;
  1253. if (smc->sk.sk_state == SMC_INIT)
  1254. smc->sk.sk_state = SMC_ACTIVE;
  1255. return 0;
  1256. connect_abort:
  1257. smc_conn_abort(smc, ini->first_contact_local);
  1258. mutex_unlock(&smc_server_lgr_pending);
  1259. smc->connect_nonblock = 0;
  1260. return rc;
  1261. }
  1262. /* check if received accept type and version matches a proposed one */
  1263. static int smc_connect_check_aclc(struct smc_init_info *ini,
  1264. struct smc_clc_msg_accept_confirm *aclc)
  1265. {
  1266. if (aclc->hdr.version >= SMC_V2) {
  1267. if ((aclc->hdr.typev1 == SMC_TYPE_R &&
  1268. !smcr_indicated(ini->smc_type_v2)) ||
  1269. (aclc->hdr.typev1 == SMC_TYPE_D &&
  1270. !smcd_indicated(ini->smc_type_v2)))
  1271. return SMC_CLC_DECL_MODEUNSUPP;
  1272. } else {
  1273. if ((aclc->hdr.typev1 == SMC_TYPE_R &&
  1274. !smcr_indicated(ini->smc_type_v1)) ||
  1275. (aclc->hdr.typev1 == SMC_TYPE_D &&
  1276. !smcd_indicated(ini->smc_type_v1)))
  1277. return SMC_CLC_DECL_MODEUNSUPP;
  1278. }
  1279. return 0;
  1280. }
  1281. /* perform steps before actually connecting */
  1282. static int __smc_connect(struct smc_sock *smc)
  1283. {
  1284. u8 version = smc_ism_is_v2_capable() ? SMC_V2 : SMC_V1;
  1285. struct smc_clc_msg_accept_confirm *aclc;
  1286. struct smc_init_info *ini = NULL;
  1287. u8 *buf = NULL;
  1288. int rc = 0;
  1289. if (smc->use_fallback)
  1290. return smc_connect_fallback(smc, smc->fallback_rsn);
  1291. /* if peer has not signalled SMC-capability, fall back */
  1292. if (!tcp_sk(smc->clcsock->sk)->syn_smc)
  1293. return smc_connect_fallback(smc, SMC_CLC_DECL_PEERNOSMC);
  1294. /* IPSec connections opt out of SMC optimizations */
  1295. if (using_ipsec(smc))
  1296. return smc_connect_decline_fallback(smc, SMC_CLC_DECL_IPSEC,
  1297. version);
  1298. ini = kzalloc(sizeof(*ini), GFP_KERNEL);
  1299. if (!ini)
  1300. return smc_connect_decline_fallback(smc, SMC_CLC_DECL_MEM,
  1301. version);
  1302. ini->smcd_version = SMC_V1 | SMC_V2;
  1303. ini->smcr_version = SMC_V1 | SMC_V2;
  1304. ini->smc_type_v1 = SMC_TYPE_B;
  1305. ini->smc_type_v2 = SMC_TYPE_B;
  1306. /* get vlan id from IP device */
  1307. if (smc_vlan_by_tcpsk(smc->clcsock, ini)) {
  1308. ini->smcd_version &= ~SMC_V1;
  1309. ini->smcr_version = 0;
  1310. ini->smc_type_v1 = SMC_TYPE_N;
  1311. }
  1312. rc = smc_find_proposal_devices(smc, ini);
  1313. if (rc)
  1314. goto fallback;
  1315. buf = kzalloc(SMC_CLC_MAX_ACCEPT_LEN, GFP_KERNEL);
  1316. if (!buf) {
  1317. rc = SMC_CLC_DECL_MEM;
  1318. goto fallback;
  1319. }
  1320. aclc = (struct smc_clc_msg_accept_confirm *)buf;
  1321. /* perform CLC handshake */
  1322. rc = smc_connect_clc(smc, aclc, ini);
  1323. if (rc) {
  1324. /* -EAGAIN on timeout, see tcp_recvmsg() */
  1325. if (rc == -EAGAIN) {
  1326. rc = -ETIMEDOUT;
  1327. smc->sk.sk_err = ETIMEDOUT;
  1328. }
  1329. goto vlan_cleanup;
  1330. }
  1331. /* check if smc modes and versions of CLC proposal and accept match */
  1332. rc = smc_connect_check_aclc(ini, aclc);
  1333. version = aclc->hdr.version == SMC_V1 ? SMC_V1 : SMC_V2;
  1334. if (rc)
  1335. goto vlan_cleanup;
  1336. /* depending on previous steps, connect using rdma or ism */
  1337. if (aclc->hdr.typev1 == SMC_TYPE_R) {
  1338. ini->smcr_version = version;
  1339. rc = smc_connect_rdma(smc, aclc, ini);
  1340. } else if (aclc->hdr.typev1 == SMC_TYPE_D) {
  1341. ini->smcd_version = version;
  1342. rc = smc_connect_ism(smc, aclc, ini);
  1343. }
  1344. if (rc)
  1345. goto vlan_cleanup;
  1346. SMC_STAT_CLNT_SUCC_INC(sock_net(smc->clcsock->sk), aclc);
  1347. smc_connect_ism_vlan_cleanup(smc, ini);
  1348. kfree(buf);
  1349. kfree(ini);
  1350. return 0;
  1351. vlan_cleanup:
  1352. smc_connect_ism_vlan_cleanup(smc, ini);
  1353. kfree(buf);
  1354. fallback:
  1355. kfree(ini);
  1356. return smc_connect_decline_fallback(smc, rc, version);
  1357. }
  1358. static void smc_connect_work(struct work_struct *work)
  1359. {
  1360. struct smc_sock *smc = container_of(work, struct smc_sock,
  1361. connect_work);
  1362. long timeo = smc->sk.sk_sndtimeo;
  1363. int rc = 0;
  1364. if (!timeo)
  1365. timeo = MAX_SCHEDULE_TIMEOUT;
  1366. lock_sock(smc->clcsock->sk);
  1367. if (smc->clcsock->sk->sk_err) {
  1368. smc->sk.sk_err = smc->clcsock->sk->sk_err;
  1369. } else if ((1 << smc->clcsock->sk->sk_state) &
  1370. (TCPF_SYN_SENT | TCPF_SYN_RECV)) {
  1371. rc = sk_stream_wait_connect(smc->clcsock->sk, &timeo);
  1372. if ((rc == -EPIPE) &&
  1373. ((1 << smc->clcsock->sk->sk_state) &
  1374. (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)))
  1375. rc = 0;
  1376. }
  1377. release_sock(smc->clcsock->sk);
  1378. lock_sock(&smc->sk);
  1379. if (rc != 0 || smc->sk.sk_err) {
  1380. smc->sk.sk_state = SMC_CLOSED;
  1381. if (rc == -EPIPE || rc == -EAGAIN)
  1382. smc->sk.sk_err = EPIPE;
  1383. else if (rc == -ECONNREFUSED)
  1384. smc->sk.sk_err = ECONNREFUSED;
  1385. else if (signal_pending(current))
  1386. smc->sk.sk_err = -sock_intr_errno(timeo);
  1387. sock_put(&smc->sk); /* passive closing */
  1388. goto out;
  1389. }
  1390. rc = __smc_connect(smc);
  1391. if (rc < 0)
  1392. smc->sk.sk_err = -rc;
  1393. out:
  1394. if (!sock_flag(&smc->sk, SOCK_DEAD)) {
  1395. if (smc->sk.sk_err) {
  1396. smc->sk.sk_state_change(&smc->sk);
  1397. } else { /* allow polling before and after fallback decision */
  1398. smc->clcsock->sk->sk_write_space(smc->clcsock->sk);
  1399. smc->sk.sk_write_space(&smc->sk);
  1400. }
  1401. }
  1402. release_sock(&smc->sk);
  1403. }
  1404. int smc_connect(struct socket *sock, struct sockaddr *addr,
  1405. int alen, int flags)
  1406. {
  1407. struct sock *sk = sock->sk;
  1408. struct smc_sock *smc;
  1409. int rc = -EINVAL;
  1410. smc = smc_sk(sk);
  1411. /* separate smc parameter checking to be safe */
  1412. if (alen < sizeof(addr->sa_family))
  1413. goto out_err;
  1414. if (addr->sa_family != AF_INET && addr->sa_family != AF_INET6)
  1415. goto out_err;
  1416. lock_sock(sk);
  1417. switch (sock->state) {
  1418. default:
  1419. rc = -EINVAL;
  1420. goto out;
  1421. case SS_CONNECTED:
  1422. rc = sk->sk_state == SMC_ACTIVE ? -EISCONN : -EINVAL;
  1423. goto out;
  1424. case SS_CONNECTING:
  1425. if (sk->sk_state == SMC_ACTIVE)
  1426. goto connected;
  1427. break;
  1428. case SS_UNCONNECTED:
  1429. sock->state = SS_CONNECTING;
  1430. break;
  1431. }
  1432. switch (sk->sk_state) {
  1433. default:
  1434. goto out;
  1435. case SMC_CLOSED:
  1436. rc = sock_error(sk) ? : -ECONNABORTED;
  1437. sock->state = SS_UNCONNECTED;
  1438. goto out;
  1439. case SMC_ACTIVE:
  1440. rc = -EISCONN;
  1441. goto out;
  1442. case SMC_INIT:
  1443. break;
  1444. }
  1445. smc_copy_sock_settings_to_clc(smc);
  1446. tcp_sk(smc->clcsock->sk)->syn_smc = 1;
  1447. if (smc->connect_nonblock) {
  1448. rc = -EALREADY;
  1449. goto out;
  1450. }
  1451. rc = kernel_connect(smc->clcsock, addr, alen, flags);
  1452. if (rc && rc != -EINPROGRESS)
  1453. goto out;
  1454. if (smc->use_fallback) {
  1455. sock->state = rc ? SS_CONNECTING : SS_CONNECTED;
  1456. goto out;
  1457. }
  1458. sock_hold(&smc->sk); /* sock put in passive closing */
  1459. if (flags & O_NONBLOCK) {
  1460. if (queue_work(smc_hs_wq, &smc->connect_work))
  1461. smc->connect_nonblock = 1;
  1462. rc = -EINPROGRESS;
  1463. goto out;
  1464. } else {
  1465. rc = __smc_connect(smc);
  1466. if (rc < 0)
  1467. goto out;
  1468. }
  1469. connected:
  1470. rc = 0;
  1471. sock->state = SS_CONNECTED;
  1472. out:
  1473. release_sock(sk);
  1474. out_err:
  1475. return rc;
  1476. }
  1477. static int smc_clcsock_accept(struct smc_sock *lsmc, struct smc_sock **new_smc)
  1478. {
  1479. struct socket *new_clcsock = NULL;
  1480. struct sock *lsk = &lsmc->sk;
  1481. struct sock *new_sk;
  1482. int rc = -EINVAL;
  1483. release_sock(lsk);
  1484. new_sk = smc_sock_alloc(sock_net(lsk), NULL, lsk->sk_protocol);
  1485. if (!new_sk) {
  1486. rc = -ENOMEM;
  1487. lsk->sk_err = ENOMEM;
  1488. *new_smc = NULL;
  1489. lock_sock(lsk);
  1490. goto out;
  1491. }
  1492. *new_smc = smc_sk(new_sk);
  1493. mutex_lock(&lsmc->clcsock_release_lock);
  1494. if (lsmc->clcsock)
  1495. rc = kernel_accept(lsmc->clcsock, &new_clcsock, SOCK_NONBLOCK);
  1496. mutex_unlock(&lsmc->clcsock_release_lock);
  1497. lock_sock(lsk);
  1498. if (rc < 0 && rc != -EAGAIN)
  1499. lsk->sk_err = -rc;
  1500. if (rc < 0 || lsk->sk_state == SMC_CLOSED) {
  1501. new_sk->sk_prot->unhash(new_sk);
  1502. if (new_clcsock)
  1503. sock_release(new_clcsock);
  1504. new_sk->sk_state = SMC_CLOSED;
  1505. smc_sock_set_flag(new_sk, SOCK_DEAD);
  1506. sock_put(new_sk); /* final */
  1507. *new_smc = NULL;
  1508. goto out;
  1509. }
  1510. /* new clcsock has inherited the smc listen-specific sk_data_ready
  1511. * function; switch it back to the original sk_data_ready function
  1512. */
  1513. new_clcsock->sk->sk_data_ready = lsmc->clcsk_data_ready;
  1514. /* if new clcsock has also inherited the fallback-specific callback
  1515. * functions, switch them back to the original ones.
  1516. */
  1517. if (lsmc->use_fallback) {
  1518. if (lsmc->clcsk_state_change)
  1519. new_clcsock->sk->sk_state_change = lsmc->clcsk_state_change;
  1520. if (lsmc->clcsk_write_space)
  1521. new_clcsock->sk->sk_write_space = lsmc->clcsk_write_space;
  1522. if (lsmc->clcsk_error_report)
  1523. new_clcsock->sk->sk_error_report = lsmc->clcsk_error_report;
  1524. }
  1525. (*new_smc)->clcsock = new_clcsock;
  1526. out:
  1527. return rc;
  1528. }
  1529. /* add a just created sock to the accept queue of the listen sock as
  1530. * candidate for a following socket accept call from user space
  1531. */
  1532. static void smc_accept_enqueue(struct sock *parent, struct sock *sk)
  1533. {
  1534. struct smc_sock *par = smc_sk(parent);
  1535. sock_hold(sk); /* sock_put in smc_accept_unlink () */
  1536. spin_lock(&par->accept_q_lock);
  1537. list_add_tail(&smc_sk(sk)->accept_q, &par->accept_q);
  1538. spin_unlock(&par->accept_q_lock);
  1539. sk_acceptq_added(parent);
  1540. }
  1541. /* remove a socket from the accept queue of its parental listening socket */
  1542. static void smc_accept_unlink(struct sock *sk)
  1543. {
  1544. struct smc_sock *par = smc_sk(sk)->listen_smc;
  1545. spin_lock(&par->accept_q_lock);
  1546. list_del_init(&smc_sk(sk)->accept_q);
  1547. spin_unlock(&par->accept_q_lock);
  1548. sk_acceptq_removed(&smc_sk(sk)->listen_smc->sk);
  1549. sock_put(sk); /* sock_hold in smc_accept_enqueue */
  1550. }
  1551. /* remove a sock from the accept queue to bind it to a new socket created
  1552. * for a socket accept call from user space
  1553. */
  1554. struct sock *smc_accept_dequeue(struct sock *parent,
  1555. struct socket *new_sock)
  1556. {
  1557. struct smc_sock *isk, *n;
  1558. struct sock *new_sk;
  1559. list_for_each_entry_safe(isk, n, &smc_sk(parent)->accept_q, accept_q) {
  1560. new_sk = (struct sock *)isk;
  1561. smc_accept_unlink(new_sk);
  1562. if (new_sk->sk_state == SMC_CLOSED) {
  1563. new_sk->sk_prot->unhash(new_sk);
  1564. if (isk->clcsock) {
  1565. sock_release(isk->clcsock);
  1566. isk->clcsock = NULL;
  1567. }
  1568. sock_put(new_sk); /* final */
  1569. continue;
  1570. }
  1571. if (new_sock) {
  1572. sock_graft(new_sk, new_sock);
  1573. new_sock->state = SS_CONNECTED;
  1574. if (isk->use_fallback) {
  1575. smc_sk(new_sk)->clcsock->file = new_sock->file;
  1576. isk->clcsock->file->private_data = isk->clcsock;
  1577. }
  1578. }
  1579. return new_sk;
  1580. }
  1581. return NULL;
  1582. }
  1583. /* clean up for a created but never accepted sock */
  1584. void smc_close_non_accepted(struct sock *sk)
  1585. {
  1586. struct smc_sock *smc = smc_sk(sk);
  1587. sock_hold(sk); /* sock_put below */
  1588. lock_sock(sk);
  1589. if (!sk->sk_lingertime)
  1590. /* wait for peer closing */
  1591. WRITE_ONCE(sk->sk_lingertime, SMC_MAX_STREAM_WAIT_TIMEOUT);
  1592. __smc_release(smc);
  1593. release_sock(sk);
  1594. sock_put(sk); /* sock_hold above */
  1595. sock_put(sk); /* final sock_put */
  1596. }
  1597. static int smcr_serv_conf_first_link(struct smc_sock *smc)
  1598. {
  1599. struct smc_link *link = smc->conn.lnk;
  1600. struct smc_llc_qentry *qentry;
  1601. int rc;
  1602. /* reg the sndbuf if it was vzalloced*/
  1603. if (smc->conn.sndbuf_desc->is_vm) {
  1604. if (smcr_link_reg_buf(link, smc->conn.sndbuf_desc))
  1605. return SMC_CLC_DECL_ERR_REGBUF;
  1606. }
  1607. /* reg the rmb */
  1608. if (smcr_link_reg_buf(link, smc->conn.rmb_desc))
  1609. return SMC_CLC_DECL_ERR_REGBUF;
  1610. /* send CONFIRM LINK request to client over the RoCE fabric */
  1611. rc = smc_llc_send_confirm_link(link, SMC_LLC_REQ);
  1612. if (rc < 0)
  1613. return SMC_CLC_DECL_TIMEOUT_CL;
  1614. /* receive CONFIRM LINK response from client over the RoCE fabric */
  1615. qentry = smc_llc_wait(link->lgr, link, SMC_LLC_WAIT_TIME,
  1616. SMC_LLC_CONFIRM_LINK);
  1617. if (!qentry) {
  1618. struct smc_clc_msg_decline dclc;
  1619. rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
  1620. SMC_CLC_DECLINE, CLC_WAIT_TIME_SHORT);
  1621. return rc == -EAGAIN ? SMC_CLC_DECL_TIMEOUT_CL : rc;
  1622. }
  1623. smc_llc_save_peer_uid(qentry);
  1624. rc = smc_llc_eval_conf_link(qentry, SMC_LLC_RESP);
  1625. smc_llc_flow_qentry_del(&link->lgr->llc_flow_lcl);
  1626. if (rc)
  1627. return SMC_CLC_DECL_RMBE_EC;
  1628. /* confirm_rkey is implicit on 1st contact */
  1629. smc->conn.rmb_desc->is_conf_rkey = true;
  1630. smc_llc_link_active(link);
  1631. smcr_lgr_set_type(link->lgr, SMC_LGR_SINGLE);
  1632. if (link->lgr->max_links > 1) {
  1633. down_write(&link->lgr->llc_conf_mutex);
  1634. /* initial contact - try to establish second link */
  1635. smc_llc_srv_add_link(link, NULL);
  1636. up_write(&link->lgr->llc_conf_mutex);
  1637. }
  1638. return 0;
  1639. }
  1640. /* listen worker: finish */
  1641. static void smc_listen_out(struct smc_sock *new_smc)
  1642. {
  1643. struct smc_sock *lsmc = new_smc->listen_smc;
  1644. struct sock *newsmcsk = &new_smc->sk;
  1645. if (tcp_sk(new_smc->clcsock->sk)->syn_smc)
  1646. atomic_dec(&lsmc->queued_smc_hs);
  1647. release_sock(newsmcsk); /* lock in smc_listen_work() */
  1648. if (lsmc->sk.sk_state == SMC_LISTEN) {
  1649. lock_sock_nested(&lsmc->sk, SINGLE_DEPTH_NESTING);
  1650. smc_accept_enqueue(&lsmc->sk, newsmcsk);
  1651. release_sock(&lsmc->sk);
  1652. } else { /* no longer listening */
  1653. smc_close_non_accepted(newsmcsk);
  1654. }
  1655. /* Wake up accept */
  1656. lsmc->sk.sk_data_ready(&lsmc->sk);
  1657. sock_put(&lsmc->sk); /* sock_hold in smc_tcp_listen_work */
  1658. }
  1659. /* listen worker: finish in state connected */
  1660. static void smc_listen_out_connected(struct smc_sock *new_smc)
  1661. {
  1662. struct sock *newsmcsk = &new_smc->sk;
  1663. if (newsmcsk->sk_state == SMC_INIT)
  1664. newsmcsk->sk_state = SMC_ACTIVE;
  1665. smc_listen_out(new_smc);
  1666. }
  1667. /* listen worker: finish in error state */
  1668. static void smc_listen_out_err(struct smc_sock *new_smc)
  1669. {
  1670. struct sock *newsmcsk = &new_smc->sk;
  1671. struct net *net = sock_net(newsmcsk);
  1672. this_cpu_inc(net->smc.smc_stats->srv_hshake_err_cnt);
  1673. if (newsmcsk->sk_state == SMC_INIT)
  1674. sock_put(&new_smc->sk); /* passive closing */
  1675. newsmcsk->sk_state = SMC_CLOSED;
  1676. smc_listen_out(new_smc);
  1677. }
  1678. /* listen worker: decline and fall back if possible */
  1679. static void smc_listen_decline(struct smc_sock *new_smc, int reason_code,
  1680. int local_first, u8 version)
  1681. {
  1682. /* RDMA setup failed, switch back to TCP */
  1683. smc_conn_abort(new_smc, local_first);
  1684. if (reason_code < 0 ||
  1685. smc_switch_to_fallback(new_smc, reason_code)) {
  1686. /* error, no fallback possible */
  1687. smc_listen_out_err(new_smc);
  1688. return;
  1689. }
  1690. if (reason_code && reason_code != SMC_CLC_DECL_PEERDECL) {
  1691. if (smc_clc_send_decline(new_smc, reason_code, version) < 0) {
  1692. smc_listen_out_err(new_smc);
  1693. return;
  1694. }
  1695. }
  1696. smc_listen_out_connected(new_smc);
  1697. }
  1698. /* listen worker: version checking */
  1699. static int smc_listen_v2_check(struct smc_sock *new_smc,
  1700. struct smc_clc_msg_proposal *pclc,
  1701. struct smc_init_info *ini)
  1702. {
  1703. struct smc_clc_smcd_v2_extension *pclc_smcd_v2_ext;
  1704. struct smc_clc_v2_extension *pclc_v2_ext;
  1705. int rc = SMC_CLC_DECL_PEERNOSMC;
  1706. ini->smc_type_v1 = pclc->hdr.typev1;
  1707. ini->smc_type_v2 = pclc->hdr.typev2;
  1708. ini->smcd_version = smcd_indicated(ini->smc_type_v1) ? SMC_V1 : 0;
  1709. ini->smcr_version = smcr_indicated(ini->smc_type_v1) ? SMC_V1 : 0;
  1710. if (pclc->hdr.version > SMC_V1) {
  1711. if (smcd_indicated(ini->smc_type_v2))
  1712. ini->smcd_version |= SMC_V2;
  1713. if (smcr_indicated(ini->smc_type_v2))
  1714. ini->smcr_version |= SMC_V2;
  1715. }
  1716. if (!(ini->smcd_version & SMC_V2) && !(ini->smcr_version & SMC_V2)) {
  1717. rc = SMC_CLC_DECL_PEERNOSMC;
  1718. goto out;
  1719. }
  1720. pclc_v2_ext = smc_get_clc_v2_ext(pclc);
  1721. if (!pclc_v2_ext) {
  1722. ini->smcd_version &= ~SMC_V2;
  1723. ini->smcr_version &= ~SMC_V2;
  1724. rc = SMC_CLC_DECL_NOV2EXT;
  1725. goto out;
  1726. }
  1727. pclc_smcd_v2_ext = smc_get_clc_smcd_v2_ext(pclc_v2_ext);
  1728. if (ini->smcd_version & SMC_V2) {
  1729. if (!smc_ism_is_v2_capable()) {
  1730. ini->smcd_version &= ~SMC_V2;
  1731. rc = SMC_CLC_DECL_NOISM2SUPP;
  1732. } else if (!pclc_smcd_v2_ext) {
  1733. ini->smcd_version &= ~SMC_V2;
  1734. rc = SMC_CLC_DECL_NOV2DEXT;
  1735. } else if (!pclc_v2_ext->hdr.eid_cnt &&
  1736. !pclc_v2_ext->hdr.flag.seid) {
  1737. ini->smcd_version &= ~SMC_V2;
  1738. rc = SMC_CLC_DECL_NOUEID;
  1739. }
  1740. }
  1741. if (ini->smcr_version & SMC_V2) {
  1742. if (!pclc_v2_ext->hdr.eid_cnt) {
  1743. ini->smcr_version &= ~SMC_V2;
  1744. rc = SMC_CLC_DECL_NOUEID;
  1745. }
  1746. }
  1747. ini->release_nr = pclc_v2_ext->hdr.flag.release;
  1748. if (pclc_v2_ext->hdr.flag.release > SMC_RELEASE)
  1749. ini->release_nr = SMC_RELEASE;
  1750. out:
  1751. if (!ini->smcd_version && !ini->smcr_version)
  1752. return rc;
  1753. return 0;
  1754. }
  1755. /* listen worker: check prefixes */
  1756. static int smc_listen_prfx_check(struct smc_sock *new_smc,
  1757. struct smc_clc_msg_proposal *pclc)
  1758. {
  1759. struct smc_clc_msg_proposal_prefix *pclc_prfx;
  1760. struct socket *newclcsock = new_smc->clcsock;
  1761. if (pclc->hdr.typev1 == SMC_TYPE_N)
  1762. return 0;
  1763. pclc_prfx = smc_clc_proposal_get_prefix(pclc);
  1764. if (!pclc_prfx)
  1765. return -EPROTO;
  1766. if (smc_clc_prfx_match(newclcsock, pclc_prfx))
  1767. return SMC_CLC_DECL_DIFFPREFIX;
  1768. return 0;
  1769. }
  1770. /* listen worker: initialize connection and buffers */
  1771. static int smc_listen_rdma_init(struct smc_sock *new_smc,
  1772. struct smc_init_info *ini)
  1773. {
  1774. int rc;
  1775. /* allocate connection / link group */
  1776. rc = smc_conn_create(new_smc, ini);
  1777. if (rc)
  1778. return rc;
  1779. /* create send buffer and rmb */
  1780. if (smc_buf_create(new_smc, false)) {
  1781. smc_conn_abort(new_smc, ini->first_contact_local);
  1782. return SMC_CLC_DECL_MEM;
  1783. }
  1784. return 0;
  1785. }
  1786. /* listen worker: initialize connection and buffers for SMC-D */
  1787. static int smc_listen_ism_init(struct smc_sock *new_smc,
  1788. struct smc_init_info *ini)
  1789. {
  1790. int rc;
  1791. rc = smc_conn_create(new_smc, ini);
  1792. if (rc)
  1793. return rc;
  1794. /* Create send and receive buffers */
  1795. rc = smc_buf_create(new_smc, true);
  1796. if (rc) {
  1797. smc_conn_abort(new_smc, ini->first_contact_local);
  1798. return (rc == -ENOSPC) ? SMC_CLC_DECL_MAX_DMB :
  1799. SMC_CLC_DECL_MEM;
  1800. }
  1801. return 0;
  1802. }
  1803. static bool smc_is_already_selected(struct smcd_dev *smcd,
  1804. struct smc_init_info *ini,
  1805. int matches)
  1806. {
  1807. int i;
  1808. for (i = 0; i < matches; i++)
  1809. if (smcd == ini->ism_dev[i])
  1810. return true;
  1811. return false;
  1812. }
  1813. /* check for ISM devices matching proposed ISM devices */
  1814. static void smc_check_ism_v2_match(struct smc_init_info *ini,
  1815. u16 proposed_chid,
  1816. struct smcd_gid *proposed_gid,
  1817. unsigned int *matches)
  1818. {
  1819. struct smcd_dev *smcd;
  1820. list_for_each_entry(smcd, &smcd_dev_list.list, list) {
  1821. if (smcd->going_away)
  1822. continue;
  1823. if (smc_is_already_selected(smcd, ini, *matches))
  1824. continue;
  1825. if (smc_ism_get_chid(smcd) == proposed_chid &&
  1826. !smc_ism_cantalk(proposed_gid, ISM_RESERVED_VLANID, smcd)) {
  1827. ini->ism_peer_gid[*matches].gid = proposed_gid->gid;
  1828. if (__smc_ism_is_emulated(proposed_chid))
  1829. ini->ism_peer_gid[*matches].gid_ext =
  1830. proposed_gid->gid_ext;
  1831. /* non-Emulated-ISM's peer gid_ext remains 0. */
  1832. ini->ism_dev[*matches] = smcd;
  1833. (*matches)++;
  1834. break;
  1835. }
  1836. }
  1837. }
  1838. static void smc_find_ism_store_rc(u32 rc, struct smc_init_info *ini)
  1839. {
  1840. if (!ini->rc)
  1841. ini->rc = rc;
  1842. }
  1843. static void smc_find_ism_v2_device_serv(struct smc_sock *new_smc,
  1844. struct smc_clc_msg_proposal *pclc,
  1845. struct smc_init_info *ini)
  1846. {
  1847. struct smc_clc_smcd_v2_extension *smcd_v2_ext;
  1848. struct smc_clc_v2_extension *smc_v2_ext;
  1849. struct smc_clc_msg_smcd *pclc_smcd;
  1850. unsigned int matches = 0;
  1851. struct smcd_gid smcd_gid;
  1852. u8 smcd_version;
  1853. u8 *eid = NULL;
  1854. int i, rc;
  1855. u16 chid;
  1856. if (!(ini->smcd_version & SMC_V2) || !smcd_indicated(ini->smc_type_v2))
  1857. goto not_found;
  1858. pclc_smcd = smc_get_clc_msg_smcd(pclc);
  1859. smc_v2_ext = smc_get_clc_v2_ext(pclc);
  1860. smcd_v2_ext = smc_get_clc_smcd_v2_ext(smc_v2_ext);
  1861. if (!pclc_smcd || !smc_v2_ext || !smcd_v2_ext)
  1862. goto not_found;
  1863. mutex_lock(&smcd_dev_list.mutex);
  1864. if (pclc_smcd->ism.chid) {
  1865. /* check for ISM device matching proposed native ISM device */
  1866. smcd_gid.gid = ntohll(pclc_smcd->ism.gid);
  1867. smcd_gid.gid_ext = 0;
  1868. smc_check_ism_v2_match(ini, ntohs(pclc_smcd->ism.chid),
  1869. &smcd_gid, &matches);
  1870. }
  1871. for (i = 0; i < smc_v2_ext->hdr.ism_gid_cnt; i++) {
  1872. /* check for ISM devices matching proposed non-native ISM
  1873. * devices
  1874. */
  1875. smcd_gid.gid = ntohll(smcd_v2_ext->gidchid[i].gid);
  1876. smcd_gid.gid_ext = 0;
  1877. chid = ntohs(smcd_v2_ext->gidchid[i].chid);
  1878. if (__smc_ism_is_emulated(chid)) {
  1879. if ((i + 1) == smc_v2_ext->hdr.ism_gid_cnt ||
  1880. chid != ntohs(smcd_v2_ext->gidchid[i + 1].chid))
  1881. /* each Emulated-ISM device takes two GID-CHID
  1882. * entries and CHID of the second entry repeats
  1883. * that of the first entry.
  1884. *
  1885. * So check if the next GID-CHID entry exists
  1886. * and both two entries' CHIDs are the same.
  1887. */
  1888. continue;
  1889. smcd_gid.gid_ext =
  1890. ntohll(smcd_v2_ext->gidchid[++i].gid);
  1891. }
  1892. smc_check_ism_v2_match(ini, chid, &smcd_gid, &matches);
  1893. }
  1894. mutex_unlock(&smcd_dev_list.mutex);
  1895. if (!ini->ism_dev[0]) {
  1896. smc_find_ism_store_rc(SMC_CLC_DECL_NOSMCD2DEV, ini);
  1897. goto not_found;
  1898. }
  1899. smc_ism_get_system_eid(&eid);
  1900. if (!smc_clc_match_eid(ini->negotiated_eid, smc_v2_ext,
  1901. smcd_v2_ext->system_eid, eid))
  1902. goto not_found;
  1903. /* separate - outside the smcd_dev_list.lock */
  1904. smcd_version = ini->smcd_version;
  1905. for (i = 0; i < matches; i++) {
  1906. ini->smcd_version = SMC_V2;
  1907. ini->is_smcd = true;
  1908. ini->ism_selected = i;
  1909. rc = smc_listen_ism_init(new_smc, ini);
  1910. if (rc) {
  1911. smc_find_ism_store_rc(rc, ini);
  1912. /* try next active ISM device */
  1913. continue;
  1914. }
  1915. return; /* matching and usable V2 ISM device found */
  1916. }
  1917. /* no V2 ISM device could be initialized */
  1918. ini->smcd_version = smcd_version; /* restore original value */
  1919. ini->negotiated_eid[0] = 0;
  1920. not_found:
  1921. ini->smcd_version &= ~SMC_V2;
  1922. ini->ism_dev[0] = NULL;
  1923. ini->is_smcd = false;
  1924. }
  1925. static void smc_find_ism_v1_device_serv(struct smc_sock *new_smc,
  1926. struct smc_clc_msg_proposal *pclc,
  1927. struct smc_init_info *ini)
  1928. {
  1929. struct smc_clc_msg_smcd *pclc_smcd = smc_get_clc_msg_smcd(pclc);
  1930. int rc = 0;
  1931. /* check if ISM V1 is available */
  1932. if (!(ini->smcd_version & SMC_V1) ||
  1933. !smcd_indicated(ini->smc_type_v1) ||
  1934. !pclc_smcd)
  1935. goto not_found;
  1936. ini->is_smcd = true; /* prepare ISM check */
  1937. ini->ism_peer_gid[0].gid = ntohll(pclc_smcd->ism.gid);
  1938. ini->ism_peer_gid[0].gid_ext = 0;
  1939. rc = smc_find_ism_device(new_smc, ini);
  1940. if (rc)
  1941. goto not_found;
  1942. ini->ism_selected = 0;
  1943. rc = smc_listen_ism_init(new_smc, ini);
  1944. if (!rc)
  1945. return; /* V1 ISM device found */
  1946. not_found:
  1947. smc_find_ism_store_rc(rc, ini);
  1948. ini->smcd_version &= ~SMC_V1;
  1949. ini->ism_dev[0] = NULL;
  1950. ini->is_smcd = false;
  1951. }
  1952. /* listen worker: register buffers */
  1953. static int smc_listen_rdma_reg(struct smc_sock *new_smc, bool local_first)
  1954. {
  1955. struct smc_connection *conn = &new_smc->conn;
  1956. if (!local_first) {
  1957. /* reg sendbufs if they were vzalloced */
  1958. if (conn->sndbuf_desc->is_vm) {
  1959. if (smcr_lgr_reg_sndbufs(conn->lnk,
  1960. conn->sndbuf_desc))
  1961. return SMC_CLC_DECL_ERR_REGBUF;
  1962. }
  1963. if (smcr_lgr_reg_rmbs(conn->lnk, conn->rmb_desc))
  1964. return SMC_CLC_DECL_ERR_REGBUF;
  1965. }
  1966. return 0;
  1967. }
  1968. static void smc_find_rdma_v2_device_serv(struct smc_sock *new_smc,
  1969. struct smc_clc_msg_proposal *pclc,
  1970. struct smc_init_info *ini)
  1971. {
  1972. struct smc_clc_v2_extension *smc_v2_ext;
  1973. u8 smcr_version;
  1974. int rc;
  1975. if (!(ini->smcr_version & SMC_V2) || !smcr_indicated(ini->smc_type_v2))
  1976. goto not_found;
  1977. smc_v2_ext = smc_get_clc_v2_ext(pclc);
  1978. if (!smc_v2_ext ||
  1979. !smc_clc_match_eid(ini->negotiated_eid, smc_v2_ext, NULL, NULL))
  1980. goto not_found;
  1981. /* prepare RDMA check */
  1982. memcpy(ini->peer_systemid, pclc->lcl.id_for_peer, SMC_SYSTEMID_LEN);
  1983. memcpy(ini->peer_gid, smc_v2_ext->roce, SMC_GID_SIZE);
  1984. memcpy(ini->peer_mac, pclc->lcl.mac, ETH_ALEN);
  1985. ini->check_smcrv2 = true;
  1986. ini->smcrv2.clc_sk = new_smc->clcsock->sk;
  1987. ini->smcrv2.saddr = new_smc->clcsock->sk->sk_rcv_saddr;
  1988. ini->smcrv2.daddr = smc_ib_gid_to_ipv4(smc_v2_ext->roce);
  1989. rc = smc_find_rdma_device(new_smc, ini);
  1990. if (rc) {
  1991. smc_find_ism_store_rc(rc, ini);
  1992. goto not_found;
  1993. }
  1994. if (!ini->smcrv2.uses_gateway)
  1995. memcpy(ini->smcrv2.nexthop_mac, pclc->lcl.mac, ETH_ALEN);
  1996. smcr_version = ini->smcr_version;
  1997. ini->smcr_version = SMC_V2;
  1998. rc = smc_listen_rdma_init(new_smc, ini);
  1999. if (!rc) {
  2000. rc = smc_listen_rdma_reg(new_smc, ini->first_contact_local);
  2001. if (rc)
  2002. smc_conn_abort(new_smc, ini->first_contact_local);
  2003. }
  2004. if (!rc)
  2005. return;
  2006. ini->smcr_version = smcr_version;
  2007. smc_find_ism_store_rc(rc, ini);
  2008. not_found:
  2009. ini->smcr_version &= ~SMC_V2;
  2010. ini->smcrv2.ib_dev_v2 = NULL;
  2011. ini->check_smcrv2 = false;
  2012. }
  2013. static int smc_find_rdma_v1_device_serv(struct smc_sock *new_smc,
  2014. struct smc_clc_msg_proposal *pclc,
  2015. struct smc_init_info *ini)
  2016. {
  2017. int rc;
  2018. if (!(ini->smcr_version & SMC_V1) || !smcr_indicated(ini->smc_type_v1))
  2019. return SMC_CLC_DECL_NOSMCDEV;
  2020. /* prepare RDMA check */
  2021. memcpy(ini->peer_systemid, pclc->lcl.id_for_peer, SMC_SYSTEMID_LEN);
  2022. memcpy(ini->peer_gid, pclc->lcl.gid, SMC_GID_SIZE);
  2023. memcpy(ini->peer_mac, pclc->lcl.mac, ETH_ALEN);
  2024. rc = smc_find_rdma_device(new_smc, ini);
  2025. if (rc) {
  2026. /* no RDMA device found */
  2027. return SMC_CLC_DECL_NOSMCDEV;
  2028. }
  2029. rc = smc_listen_rdma_init(new_smc, ini);
  2030. if (rc)
  2031. return rc;
  2032. return smc_listen_rdma_reg(new_smc, ini->first_contact_local);
  2033. }
  2034. /* determine the local device matching to proposal */
  2035. static int smc_listen_find_device(struct smc_sock *new_smc,
  2036. struct smc_clc_msg_proposal *pclc,
  2037. struct smc_init_info *ini)
  2038. {
  2039. int prfx_rc;
  2040. /* check for ISM device matching V2 proposed device */
  2041. smc_find_ism_v2_device_serv(new_smc, pclc, ini);
  2042. if (ini->ism_dev[0])
  2043. return 0;
  2044. /* check for matching IP prefix and subnet length (V1) */
  2045. prfx_rc = smc_listen_prfx_check(new_smc, pclc);
  2046. if (prfx_rc)
  2047. smc_find_ism_store_rc(prfx_rc, ini);
  2048. /* get vlan id from IP device */
  2049. if (smc_vlan_by_tcpsk(new_smc->clcsock, ini))
  2050. return ini->rc ?: SMC_CLC_DECL_GETVLANERR;
  2051. /* check for ISM device matching V1 proposed device */
  2052. if (!prfx_rc)
  2053. smc_find_ism_v1_device_serv(new_smc, pclc, ini);
  2054. if (ini->ism_dev[0])
  2055. return 0;
  2056. if (!smcr_indicated(pclc->hdr.typev1) &&
  2057. !smcr_indicated(pclc->hdr.typev2))
  2058. /* skip RDMA and decline */
  2059. return ini->rc ?: SMC_CLC_DECL_NOSMCDDEV;
  2060. /* check if RDMA V2 is available */
  2061. smc_find_rdma_v2_device_serv(new_smc, pclc, ini);
  2062. if (ini->smcrv2.ib_dev_v2)
  2063. return 0;
  2064. /* check if RDMA V1 is available */
  2065. if (!prfx_rc) {
  2066. int rc;
  2067. rc = smc_find_rdma_v1_device_serv(new_smc, pclc, ini);
  2068. smc_find_ism_store_rc(rc, ini);
  2069. return (!rc) ? 0 : ini->rc;
  2070. }
  2071. return prfx_rc;
  2072. }
  2073. /* listen worker: finish RDMA setup */
  2074. static int smc_listen_rdma_finish(struct smc_sock *new_smc,
  2075. struct smc_clc_msg_accept_confirm *cclc,
  2076. bool local_first,
  2077. struct smc_init_info *ini)
  2078. {
  2079. struct smc_link *link = new_smc->conn.lnk;
  2080. int reason_code = 0;
  2081. if (local_first)
  2082. smc_link_save_peer_info(link, cclc, ini);
  2083. if (smc_rmb_rtoken_handling(&new_smc->conn, link, cclc))
  2084. return SMC_CLC_DECL_ERR_RTOK;
  2085. if (local_first) {
  2086. if (smc_ib_ready_link(link))
  2087. return SMC_CLC_DECL_ERR_RDYLNK;
  2088. /* QP confirmation over RoCE fabric */
  2089. smc_llc_flow_initiate(link->lgr, SMC_LLC_FLOW_ADD_LINK);
  2090. reason_code = smcr_serv_conf_first_link(new_smc);
  2091. smc_llc_flow_stop(link->lgr, &link->lgr->llc_flow_lcl);
  2092. }
  2093. return reason_code;
  2094. }
  2095. /* setup for connection of server */
  2096. static void smc_listen_work(struct work_struct *work)
  2097. {
  2098. struct smc_sock *new_smc = container_of(work, struct smc_sock,
  2099. smc_listen_work);
  2100. struct socket *newclcsock = new_smc->clcsock;
  2101. struct smc_clc_msg_accept_confirm *cclc;
  2102. struct smc_clc_msg_proposal_area *buf;
  2103. struct smc_clc_msg_proposal *pclc;
  2104. struct smc_init_info *ini = NULL;
  2105. u8 proposal_version = SMC_V1;
  2106. u8 accept_version;
  2107. int rc = 0;
  2108. lock_sock(&new_smc->sk); /* release in smc_listen_out() */
  2109. if (new_smc->listen_smc->sk.sk_state != SMC_LISTEN)
  2110. return smc_listen_out_err(new_smc);
  2111. if (new_smc->use_fallback) {
  2112. smc_listen_out_connected(new_smc);
  2113. return;
  2114. }
  2115. /* check if peer is smc capable */
  2116. if (!tcp_sk(newclcsock->sk)->syn_smc) {
  2117. rc = smc_switch_to_fallback(new_smc, SMC_CLC_DECL_PEERNOSMC);
  2118. if (rc)
  2119. smc_listen_out_err(new_smc);
  2120. else
  2121. smc_listen_out_connected(new_smc);
  2122. return;
  2123. }
  2124. /* do inband token exchange -
  2125. * wait for and receive SMC Proposal CLC message
  2126. */
  2127. buf = kzalloc(sizeof(*buf), GFP_KERNEL);
  2128. if (!buf) {
  2129. rc = SMC_CLC_DECL_MEM;
  2130. goto out_decl;
  2131. }
  2132. pclc = (struct smc_clc_msg_proposal *)buf;
  2133. rc = smc_clc_wait_msg(new_smc, pclc, sizeof(*buf),
  2134. SMC_CLC_PROPOSAL, CLC_WAIT_TIME);
  2135. if (rc)
  2136. goto out_decl;
  2137. if (pclc->hdr.version > SMC_V1)
  2138. proposal_version = SMC_V2;
  2139. /* IPSec connections opt out of SMC optimizations */
  2140. if (using_ipsec(new_smc)) {
  2141. rc = SMC_CLC_DECL_IPSEC;
  2142. goto out_decl;
  2143. }
  2144. ini = kzalloc(sizeof(*ini), GFP_KERNEL);
  2145. if (!ini) {
  2146. rc = SMC_CLC_DECL_MEM;
  2147. goto out_decl;
  2148. }
  2149. /* initial version checking */
  2150. rc = smc_listen_v2_check(new_smc, pclc, ini);
  2151. if (rc)
  2152. goto out_decl;
  2153. rc = smc_clc_srv_v2x_features_validate(new_smc, pclc, ini);
  2154. if (rc)
  2155. goto out_decl;
  2156. mutex_lock(&smc_server_lgr_pending);
  2157. smc_rx_init(new_smc);
  2158. smc_tx_init(new_smc);
  2159. /* determine ISM or RoCE device used for connection */
  2160. rc = smc_listen_find_device(new_smc, pclc, ini);
  2161. if (rc)
  2162. goto out_unlock;
  2163. /* send SMC Accept CLC message */
  2164. accept_version = ini->is_smcd ? ini->smcd_version : ini->smcr_version;
  2165. rc = smc_clc_send_accept(new_smc, ini->first_contact_local,
  2166. accept_version, ini->negotiated_eid, ini);
  2167. if (rc)
  2168. goto out_unlock;
  2169. /* SMC-D does not need this lock any more */
  2170. if (ini->is_smcd)
  2171. mutex_unlock(&smc_server_lgr_pending);
  2172. /* receive SMC Confirm CLC message */
  2173. memset(buf, 0, sizeof(*buf));
  2174. cclc = (struct smc_clc_msg_accept_confirm *)buf;
  2175. rc = smc_clc_wait_msg(new_smc, cclc, sizeof(*buf),
  2176. SMC_CLC_CONFIRM, CLC_WAIT_TIME);
  2177. if (rc) {
  2178. if (!ini->is_smcd)
  2179. goto out_unlock;
  2180. goto out_decl;
  2181. }
  2182. rc = smc_clc_v2x_features_confirm_check(cclc, ini);
  2183. if (rc) {
  2184. if (!ini->is_smcd)
  2185. goto out_unlock;
  2186. goto out_decl;
  2187. }
  2188. /* fce smc release version is needed in smc_listen_rdma_finish,
  2189. * so save fce info here.
  2190. */
  2191. smc_conn_save_peer_info_fce(new_smc, cclc);
  2192. /* finish worker */
  2193. if (!ini->is_smcd) {
  2194. rc = smc_listen_rdma_finish(new_smc, cclc,
  2195. ini->first_contact_local, ini);
  2196. if (rc)
  2197. goto out_unlock;
  2198. mutex_unlock(&smc_server_lgr_pending);
  2199. }
  2200. smc_conn_save_peer_info(new_smc, cclc);
  2201. if (ini->is_smcd &&
  2202. smc_ism_support_dmb_nocopy(new_smc->conn.lgr->smcd)) {
  2203. rc = smcd_buf_attach(new_smc);
  2204. if (rc)
  2205. goto out_decl;
  2206. }
  2207. SMC_STAT_SERV_SUCC_INC(sock_net(newclcsock->sk), ini);
  2208. /* smc_listen_out() will release smcsk */
  2209. smc_listen_out_connected(new_smc);
  2210. goto out_free;
  2211. out_unlock:
  2212. mutex_unlock(&smc_server_lgr_pending);
  2213. out_decl:
  2214. smc_listen_decline(new_smc, rc, ini ? ini->first_contact_local : 0,
  2215. proposal_version);
  2216. out_free:
  2217. kfree(ini);
  2218. kfree(buf);
  2219. }
  2220. static void smc_tcp_listen_work(struct work_struct *work)
  2221. {
  2222. struct smc_sock *lsmc = container_of(work, struct smc_sock,
  2223. tcp_listen_work);
  2224. struct sock *lsk = &lsmc->sk;
  2225. struct smc_sock *new_smc;
  2226. int rc = 0;
  2227. lock_sock(lsk);
  2228. while (lsk->sk_state == SMC_LISTEN) {
  2229. rc = smc_clcsock_accept(lsmc, &new_smc);
  2230. if (rc) /* clcsock accept queue empty or error */
  2231. goto out;
  2232. if (!new_smc)
  2233. continue;
  2234. if (tcp_sk(new_smc->clcsock->sk)->syn_smc)
  2235. atomic_inc(&lsmc->queued_smc_hs);
  2236. new_smc->listen_smc = lsmc;
  2237. new_smc->use_fallback = lsmc->use_fallback;
  2238. new_smc->fallback_rsn = lsmc->fallback_rsn;
  2239. sock_hold(lsk); /* sock_put in smc_listen_work */
  2240. INIT_WORK(&new_smc->smc_listen_work, smc_listen_work);
  2241. smc_copy_sock_settings_to_smc(new_smc);
  2242. sock_hold(&new_smc->sk); /* sock_put in passive closing */
  2243. if (!queue_work(smc_hs_wq, &new_smc->smc_listen_work))
  2244. sock_put(&new_smc->sk);
  2245. }
  2246. out:
  2247. release_sock(lsk);
  2248. sock_put(&lsmc->sk); /* sock_hold in smc_clcsock_data_ready() */
  2249. }
  2250. static void smc_clcsock_data_ready(struct sock *listen_clcsock)
  2251. {
  2252. struct smc_sock *lsmc;
  2253. read_lock_bh(&listen_clcsock->sk_callback_lock);
  2254. lsmc = smc_clcsock_user_data(listen_clcsock);
  2255. if (!lsmc)
  2256. goto out;
  2257. lsmc->clcsk_data_ready(listen_clcsock);
  2258. if (lsmc->sk.sk_state == SMC_LISTEN) {
  2259. sock_hold(&lsmc->sk); /* sock_put in smc_tcp_listen_work() */
  2260. if (!queue_work(smc_tcp_ls_wq, &lsmc->tcp_listen_work))
  2261. sock_put(&lsmc->sk);
  2262. }
  2263. out:
  2264. read_unlock_bh(&listen_clcsock->sk_callback_lock);
  2265. }
  2266. int smc_listen(struct socket *sock, int backlog)
  2267. {
  2268. struct sock *sk = sock->sk;
  2269. struct smc_sock *smc;
  2270. int rc;
  2271. smc = smc_sk(sk);
  2272. lock_sock(sk);
  2273. rc = -EINVAL;
  2274. if ((sk->sk_state != SMC_INIT && sk->sk_state != SMC_LISTEN) ||
  2275. smc->connect_nonblock || sock->state != SS_UNCONNECTED)
  2276. goto out;
  2277. rc = 0;
  2278. if (sk->sk_state == SMC_LISTEN) {
  2279. sk->sk_max_ack_backlog = backlog;
  2280. goto out;
  2281. }
  2282. /* some socket options are handled in core, so we could not apply
  2283. * them to the clc socket -- copy smc socket options to clc socket
  2284. */
  2285. smc_copy_sock_settings_to_clc(smc);
  2286. if (!smc->use_fallback)
  2287. tcp_sk(smc->clcsock->sk)->syn_smc = 1;
  2288. /* save original sk_data_ready function and establish
  2289. * smc-specific sk_data_ready function
  2290. */
  2291. write_lock_bh(&smc->clcsock->sk->sk_callback_lock);
  2292. smc->clcsock->sk->sk_user_data =
  2293. (void *)((uintptr_t)smc | SK_USER_DATA_NOCOPY);
  2294. smc_clcsock_replace_cb(&smc->clcsock->sk->sk_data_ready,
  2295. smc_clcsock_data_ready, &smc->clcsk_data_ready);
  2296. write_unlock_bh(&smc->clcsock->sk->sk_callback_lock);
  2297. /* save original ops */
  2298. smc->ori_af_ops = inet_csk(smc->clcsock->sk)->icsk_af_ops;
  2299. smc->af_ops = *smc->ori_af_ops;
  2300. smc->af_ops.syn_recv_sock = smc_tcp_syn_recv_sock;
  2301. inet_csk(smc->clcsock->sk)->icsk_af_ops = &smc->af_ops;
  2302. if (smc->limit_smc_hs)
  2303. tcp_sk(smc->clcsock->sk)->smc_hs_congested = smc_hs_congested;
  2304. rc = kernel_listen(smc->clcsock, backlog);
  2305. if (rc) {
  2306. write_lock_bh(&smc->clcsock->sk->sk_callback_lock);
  2307. smc_clcsock_restore_cb(&smc->clcsock->sk->sk_data_ready,
  2308. &smc->clcsk_data_ready);
  2309. smc->clcsock->sk->sk_user_data = NULL;
  2310. write_unlock_bh(&smc->clcsock->sk->sk_callback_lock);
  2311. goto out;
  2312. }
  2313. sk->sk_max_ack_backlog = backlog;
  2314. sk->sk_ack_backlog = 0;
  2315. sk->sk_state = SMC_LISTEN;
  2316. out:
  2317. release_sock(sk);
  2318. return rc;
  2319. }
  2320. int smc_accept(struct socket *sock, struct socket *new_sock,
  2321. struct proto_accept_arg *arg)
  2322. {
  2323. struct sock *sk = sock->sk, *nsk;
  2324. DECLARE_WAITQUEUE(wait, current);
  2325. struct smc_sock *lsmc;
  2326. long timeo;
  2327. int rc = 0;
  2328. lsmc = smc_sk(sk);
  2329. sock_hold(sk); /* sock_put below */
  2330. lock_sock(sk);
  2331. if (lsmc->sk.sk_state != SMC_LISTEN) {
  2332. rc = -EINVAL;
  2333. release_sock(sk);
  2334. goto out;
  2335. }
  2336. /* Wait for an incoming connection */
  2337. timeo = sock_rcvtimeo(sk, arg->flags & O_NONBLOCK);
  2338. add_wait_queue_exclusive(sk_sleep(sk), &wait);
  2339. while (!(nsk = smc_accept_dequeue(sk, new_sock))) {
  2340. set_current_state(TASK_INTERRUPTIBLE);
  2341. if (!timeo) {
  2342. rc = -EAGAIN;
  2343. break;
  2344. }
  2345. release_sock(sk);
  2346. timeo = schedule_timeout(timeo);
  2347. /* wakeup by sk_data_ready in smc_listen_work() */
  2348. sched_annotate_sleep();
  2349. lock_sock(sk);
  2350. if (signal_pending(current)) {
  2351. rc = sock_intr_errno(timeo);
  2352. break;
  2353. }
  2354. }
  2355. set_current_state(TASK_RUNNING);
  2356. remove_wait_queue(sk_sleep(sk), &wait);
  2357. if (!rc)
  2358. rc = sock_error(nsk);
  2359. release_sock(sk);
  2360. if (rc)
  2361. goto out;
  2362. if (lsmc->sockopt_defer_accept && !(arg->flags & O_NONBLOCK)) {
  2363. /* wait till data arrives on the socket */
  2364. timeo = msecs_to_jiffies(lsmc->sockopt_defer_accept *
  2365. MSEC_PER_SEC);
  2366. if (smc_sk(nsk)->use_fallback) {
  2367. struct sock *clcsk = smc_sk(nsk)->clcsock->sk;
  2368. lock_sock(clcsk);
  2369. if (skb_queue_empty(&clcsk->sk_receive_queue))
  2370. sk_wait_data(clcsk, &timeo, NULL);
  2371. release_sock(clcsk);
  2372. } else if (!atomic_read(&smc_sk(nsk)->conn.bytes_to_rcv)) {
  2373. lock_sock(nsk);
  2374. smc_rx_wait(smc_sk(nsk), &timeo, 0, smc_rx_data_available);
  2375. release_sock(nsk);
  2376. }
  2377. }
  2378. out:
  2379. sock_put(sk); /* sock_hold above */
  2380. return rc;
  2381. }
  2382. int smc_getname(struct socket *sock, struct sockaddr *addr,
  2383. int peer)
  2384. {
  2385. struct smc_sock *smc;
  2386. if (peer && (sock->sk->sk_state != SMC_ACTIVE) &&
  2387. (sock->sk->sk_state != SMC_APPCLOSEWAIT1))
  2388. return -ENOTCONN;
  2389. smc = smc_sk(sock->sk);
  2390. return smc->clcsock->ops->getname(smc->clcsock, addr, peer);
  2391. }
  2392. int smc_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
  2393. {
  2394. struct sock *sk = sock->sk;
  2395. struct smc_sock *smc;
  2396. int rc;
  2397. smc = smc_sk(sk);
  2398. lock_sock(sk);
  2399. /* SMC does not support connect with fastopen */
  2400. if (msg->msg_flags & MSG_FASTOPEN) {
  2401. /* not connected yet, fallback */
  2402. if (sk->sk_state == SMC_INIT && !smc->connect_nonblock) {
  2403. rc = smc_switch_to_fallback(smc, SMC_CLC_DECL_OPTUNSUPP);
  2404. if (rc)
  2405. goto out;
  2406. } else {
  2407. rc = -EINVAL;
  2408. goto out;
  2409. }
  2410. } else if ((sk->sk_state != SMC_ACTIVE) &&
  2411. (sk->sk_state != SMC_APPCLOSEWAIT1) &&
  2412. (sk->sk_state != SMC_INIT)) {
  2413. rc = -EPIPE;
  2414. goto out;
  2415. }
  2416. if (smc->use_fallback) {
  2417. rc = smc->clcsock->ops->sendmsg(smc->clcsock, msg, len);
  2418. } else {
  2419. rc = smc_tx_sendmsg(smc, msg, len);
  2420. SMC_STAT_TX_PAYLOAD(smc, len, rc);
  2421. }
  2422. out:
  2423. release_sock(sk);
  2424. return rc;
  2425. }
  2426. int smc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
  2427. int flags)
  2428. {
  2429. struct sock *sk = sock->sk;
  2430. struct smc_sock *smc;
  2431. int rc = -ENOTCONN;
  2432. smc = smc_sk(sk);
  2433. lock_sock(sk);
  2434. if (sk->sk_state == SMC_CLOSED && (sk->sk_shutdown & RCV_SHUTDOWN)) {
  2435. /* socket was connected before, no more data to read */
  2436. rc = 0;
  2437. goto out;
  2438. }
  2439. if ((sk->sk_state == SMC_INIT) ||
  2440. (sk->sk_state == SMC_LISTEN) ||
  2441. (sk->sk_state == SMC_CLOSED))
  2442. goto out;
  2443. if (sk->sk_state == SMC_PEERFINCLOSEWAIT) {
  2444. rc = 0;
  2445. goto out;
  2446. }
  2447. if (smc->use_fallback) {
  2448. rc = smc->clcsock->ops->recvmsg(smc->clcsock, msg, len, flags);
  2449. } else {
  2450. msg->msg_namelen = 0;
  2451. rc = smc_rx_recvmsg(smc, msg, NULL, len, flags);
  2452. SMC_STAT_RX_PAYLOAD(smc, rc, rc);
  2453. }
  2454. out:
  2455. release_sock(sk);
  2456. return rc;
  2457. }
  2458. static __poll_t smc_accept_poll(struct sock *parent)
  2459. {
  2460. struct smc_sock *isk = smc_sk(parent);
  2461. __poll_t mask = 0;
  2462. spin_lock(&isk->accept_q_lock);
  2463. if (!list_empty(&isk->accept_q))
  2464. mask = EPOLLIN | EPOLLRDNORM;
  2465. spin_unlock(&isk->accept_q_lock);
  2466. return mask;
  2467. }
  2468. __poll_t smc_poll(struct file *file, struct socket *sock,
  2469. poll_table *wait)
  2470. {
  2471. struct sock *sk = sock->sk;
  2472. struct smc_sock *smc;
  2473. __poll_t mask = 0;
  2474. if (!sk)
  2475. return EPOLLNVAL;
  2476. smc = smc_sk(sock->sk);
  2477. if (smc->use_fallback) {
  2478. /* delegate to CLC child sock */
  2479. mask = smc->clcsock->ops->poll(file, smc->clcsock, wait);
  2480. sk->sk_err = smc->clcsock->sk->sk_err;
  2481. } else {
  2482. if (sk->sk_state != SMC_CLOSED)
  2483. sock_poll_wait(file, sock, wait);
  2484. if (sk->sk_err)
  2485. mask |= EPOLLERR;
  2486. if ((sk->sk_shutdown == SHUTDOWN_MASK) ||
  2487. (sk->sk_state == SMC_CLOSED))
  2488. mask |= EPOLLHUP;
  2489. if (sk->sk_state == SMC_LISTEN) {
  2490. /* woken up by sk_data_ready in smc_listen_work() */
  2491. mask |= smc_accept_poll(sk);
  2492. } else if (smc->use_fallback) { /* as result of connect_work()*/
  2493. mask |= smc->clcsock->ops->poll(file, smc->clcsock,
  2494. wait);
  2495. sk->sk_err = smc->clcsock->sk->sk_err;
  2496. } else {
  2497. if ((sk->sk_state != SMC_INIT &&
  2498. atomic_read(&smc->conn.sndbuf_space)) ||
  2499. sk->sk_shutdown & SEND_SHUTDOWN) {
  2500. mask |= EPOLLOUT | EPOLLWRNORM;
  2501. } else {
  2502. sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
  2503. set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
  2504. if (sk->sk_state != SMC_INIT) {
  2505. /* Race breaker the same way as tcp_poll(). */
  2506. smp_mb__after_atomic();
  2507. if (atomic_read(&smc->conn.sndbuf_space))
  2508. mask |= EPOLLOUT | EPOLLWRNORM;
  2509. }
  2510. }
  2511. if (atomic_read(&smc->conn.bytes_to_rcv))
  2512. mask |= EPOLLIN | EPOLLRDNORM;
  2513. if (sk->sk_shutdown & RCV_SHUTDOWN)
  2514. mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP;
  2515. if (sk->sk_state == SMC_APPCLOSEWAIT1)
  2516. mask |= EPOLLIN;
  2517. if (smc->conn.urg_state == SMC_URG_VALID)
  2518. mask |= EPOLLPRI;
  2519. }
  2520. }
  2521. return mask;
  2522. }
  2523. int smc_shutdown(struct socket *sock, int how)
  2524. {
  2525. struct sock *sk = sock->sk;
  2526. bool do_shutdown = true;
  2527. struct smc_sock *smc;
  2528. int rc = -EINVAL;
  2529. int old_state;
  2530. int rc1 = 0;
  2531. smc = smc_sk(sk);
  2532. if ((how < SHUT_RD) || (how > SHUT_RDWR))
  2533. return rc;
  2534. lock_sock(sk);
  2535. if (sock->state == SS_CONNECTING) {
  2536. if (sk->sk_state == SMC_ACTIVE)
  2537. sock->state = SS_CONNECTED;
  2538. else if (sk->sk_state == SMC_PEERCLOSEWAIT1 ||
  2539. sk->sk_state == SMC_PEERCLOSEWAIT2 ||
  2540. sk->sk_state == SMC_APPCLOSEWAIT1 ||
  2541. sk->sk_state == SMC_APPCLOSEWAIT2 ||
  2542. sk->sk_state == SMC_APPFINCLOSEWAIT)
  2543. sock->state = SS_DISCONNECTING;
  2544. }
  2545. rc = -ENOTCONN;
  2546. if ((sk->sk_state != SMC_ACTIVE) &&
  2547. (sk->sk_state != SMC_PEERCLOSEWAIT1) &&
  2548. (sk->sk_state != SMC_PEERCLOSEWAIT2) &&
  2549. (sk->sk_state != SMC_APPCLOSEWAIT1) &&
  2550. (sk->sk_state != SMC_APPCLOSEWAIT2) &&
  2551. (sk->sk_state != SMC_APPFINCLOSEWAIT))
  2552. goto out;
  2553. if (smc->use_fallback) {
  2554. rc = kernel_sock_shutdown(smc->clcsock, how);
  2555. sk->sk_shutdown = smc->clcsock->sk->sk_shutdown;
  2556. if (sk->sk_shutdown == SHUTDOWN_MASK) {
  2557. sk->sk_state = SMC_CLOSED;
  2558. sk->sk_socket->state = SS_UNCONNECTED;
  2559. sock_put(sk);
  2560. }
  2561. goto out;
  2562. }
  2563. switch (how) {
  2564. case SHUT_RDWR: /* shutdown in both directions */
  2565. old_state = sk->sk_state;
  2566. rc = smc_close_active(smc);
  2567. if (old_state == SMC_ACTIVE &&
  2568. sk->sk_state == SMC_PEERCLOSEWAIT1)
  2569. do_shutdown = false;
  2570. break;
  2571. case SHUT_WR:
  2572. rc = smc_close_shutdown_write(smc);
  2573. break;
  2574. case SHUT_RD:
  2575. rc = 0;
  2576. /* nothing more to do because peer is not involved */
  2577. break;
  2578. }
  2579. if (do_shutdown && smc->clcsock)
  2580. rc1 = kernel_sock_shutdown(smc->clcsock, how);
  2581. /* map sock_shutdown_cmd constants to sk_shutdown value range */
  2582. sk->sk_shutdown |= how + 1;
  2583. if (sk->sk_state == SMC_CLOSED)
  2584. sock->state = SS_UNCONNECTED;
  2585. else
  2586. sock->state = SS_DISCONNECTING;
  2587. out:
  2588. release_sock(sk);
  2589. return rc ? rc : rc1;
  2590. }
  2591. static int __smc_getsockopt(struct socket *sock, int level, int optname,
  2592. char __user *optval, int __user *optlen)
  2593. {
  2594. struct smc_sock *smc;
  2595. int val, len;
  2596. smc = smc_sk(sock->sk);
  2597. if (get_user(len, optlen))
  2598. return -EFAULT;
  2599. len = min_t(int, len, sizeof(int));
  2600. if (len < 0)
  2601. return -EINVAL;
  2602. switch (optname) {
  2603. case SMC_LIMIT_HS:
  2604. val = smc->limit_smc_hs;
  2605. break;
  2606. default:
  2607. return -EOPNOTSUPP;
  2608. }
  2609. if (put_user(len, optlen))
  2610. return -EFAULT;
  2611. if (copy_to_user(optval, &val, len))
  2612. return -EFAULT;
  2613. return 0;
  2614. }
  2615. static int __smc_setsockopt(struct socket *sock, int level, int optname,
  2616. sockptr_t optval, unsigned int optlen)
  2617. {
  2618. struct sock *sk = sock->sk;
  2619. struct smc_sock *smc;
  2620. int val, rc;
  2621. smc = smc_sk(sk);
  2622. lock_sock(sk);
  2623. switch (optname) {
  2624. case SMC_LIMIT_HS:
  2625. if (optlen < sizeof(int)) {
  2626. rc = -EINVAL;
  2627. break;
  2628. }
  2629. if (copy_from_sockptr(&val, optval, sizeof(int))) {
  2630. rc = -EFAULT;
  2631. break;
  2632. }
  2633. smc->limit_smc_hs = !!val;
  2634. rc = 0;
  2635. break;
  2636. default:
  2637. rc = -EOPNOTSUPP;
  2638. break;
  2639. }
  2640. release_sock(sk);
  2641. return rc;
  2642. }
  2643. int smc_setsockopt(struct socket *sock, int level, int optname,
  2644. sockptr_t optval, unsigned int optlen)
  2645. {
  2646. struct sock *sk = sock->sk;
  2647. struct smc_sock *smc;
  2648. int val, rc;
  2649. if (level == SOL_TCP && optname == TCP_ULP)
  2650. return -EOPNOTSUPP;
  2651. else if (level == SOL_SMC)
  2652. return __smc_setsockopt(sock, level, optname, optval, optlen);
  2653. smc = smc_sk(sk);
  2654. /* generic setsockopts reaching us here always apply to the
  2655. * CLC socket
  2656. */
  2657. mutex_lock(&smc->clcsock_release_lock);
  2658. if (!smc->clcsock) {
  2659. mutex_unlock(&smc->clcsock_release_lock);
  2660. return -EBADF;
  2661. }
  2662. if (unlikely(!smc->clcsock->ops->setsockopt))
  2663. rc = -EOPNOTSUPP;
  2664. else
  2665. rc = smc->clcsock->ops->setsockopt(smc->clcsock, level, optname,
  2666. optval, optlen);
  2667. if (smc->clcsock->sk->sk_err) {
  2668. sk->sk_err = smc->clcsock->sk->sk_err;
  2669. sk_error_report(sk);
  2670. }
  2671. mutex_unlock(&smc->clcsock_release_lock);
  2672. if (optlen < sizeof(int))
  2673. return -EINVAL;
  2674. if (copy_from_sockptr(&val, optval, sizeof(int)))
  2675. return -EFAULT;
  2676. lock_sock(sk);
  2677. if (rc || smc->use_fallback)
  2678. goto out;
  2679. switch (optname) {
  2680. case TCP_FASTOPEN:
  2681. case TCP_FASTOPEN_CONNECT:
  2682. case TCP_FASTOPEN_KEY:
  2683. case TCP_FASTOPEN_NO_COOKIE:
  2684. /* option not supported by SMC */
  2685. if (sk->sk_state == SMC_INIT && !smc->connect_nonblock) {
  2686. rc = smc_switch_to_fallback(smc, SMC_CLC_DECL_OPTUNSUPP);
  2687. } else {
  2688. rc = -EINVAL;
  2689. }
  2690. break;
  2691. case TCP_NODELAY:
  2692. if (sk->sk_state != SMC_INIT &&
  2693. sk->sk_state != SMC_LISTEN &&
  2694. sk->sk_state != SMC_CLOSED) {
  2695. if (val) {
  2696. SMC_STAT_INC(smc, ndly_cnt);
  2697. smc_tx_pending(&smc->conn);
  2698. cancel_delayed_work(&smc->conn.tx_work);
  2699. }
  2700. }
  2701. break;
  2702. case TCP_CORK:
  2703. if (sk->sk_state != SMC_INIT &&
  2704. sk->sk_state != SMC_LISTEN &&
  2705. sk->sk_state != SMC_CLOSED) {
  2706. if (!val) {
  2707. SMC_STAT_INC(smc, cork_cnt);
  2708. smc_tx_pending(&smc->conn);
  2709. cancel_delayed_work(&smc->conn.tx_work);
  2710. }
  2711. }
  2712. break;
  2713. case TCP_DEFER_ACCEPT:
  2714. smc->sockopt_defer_accept = val;
  2715. break;
  2716. default:
  2717. break;
  2718. }
  2719. out:
  2720. release_sock(sk);
  2721. return rc;
  2722. }
  2723. int smc_getsockopt(struct socket *sock, int level, int optname,
  2724. char __user *optval, int __user *optlen)
  2725. {
  2726. struct smc_sock *smc;
  2727. int rc;
  2728. if (level == SOL_SMC)
  2729. return __smc_getsockopt(sock, level, optname, optval, optlen);
  2730. smc = smc_sk(sock->sk);
  2731. mutex_lock(&smc->clcsock_release_lock);
  2732. if (!smc->clcsock) {
  2733. mutex_unlock(&smc->clcsock_release_lock);
  2734. return -EBADF;
  2735. }
  2736. /* socket options apply to the CLC socket */
  2737. if (unlikely(!smc->clcsock->ops->getsockopt)) {
  2738. mutex_unlock(&smc->clcsock_release_lock);
  2739. return -EOPNOTSUPP;
  2740. }
  2741. rc = smc->clcsock->ops->getsockopt(smc->clcsock, level, optname,
  2742. optval, optlen);
  2743. mutex_unlock(&smc->clcsock_release_lock);
  2744. return rc;
  2745. }
  2746. int smc_ioctl(struct socket *sock, unsigned int cmd,
  2747. unsigned long arg)
  2748. {
  2749. union smc_host_cursor cons, urg;
  2750. struct smc_connection *conn;
  2751. struct smc_sock *smc;
  2752. int answ;
  2753. smc = smc_sk(sock->sk);
  2754. conn = &smc->conn;
  2755. lock_sock(&smc->sk);
  2756. if (smc->use_fallback) {
  2757. if (!smc->clcsock) {
  2758. release_sock(&smc->sk);
  2759. return -EBADF;
  2760. }
  2761. answ = smc->clcsock->ops->ioctl(smc->clcsock, cmd, arg);
  2762. release_sock(&smc->sk);
  2763. return answ;
  2764. }
  2765. switch (cmd) {
  2766. case SIOCINQ: /* same as FIONREAD */
  2767. if (smc->sk.sk_state == SMC_LISTEN) {
  2768. release_sock(&smc->sk);
  2769. return -EINVAL;
  2770. }
  2771. if (smc->sk.sk_state == SMC_INIT ||
  2772. smc->sk.sk_state == SMC_CLOSED)
  2773. answ = 0;
  2774. else
  2775. answ = atomic_read(&smc->conn.bytes_to_rcv);
  2776. break;
  2777. case SIOCOUTQ:
  2778. /* output queue size (not send + not acked) */
  2779. if (smc->sk.sk_state == SMC_LISTEN) {
  2780. release_sock(&smc->sk);
  2781. return -EINVAL;
  2782. }
  2783. if (smc->sk.sk_state == SMC_INIT ||
  2784. smc->sk.sk_state == SMC_CLOSED)
  2785. answ = 0;
  2786. else
  2787. answ = smc->conn.sndbuf_desc->len -
  2788. atomic_read(&smc->conn.sndbuf_space);
  2789. break;
  2790. case SIOCOUTQNSD:
  2791. /* output queue size (not send only) */
  2792. if (smc->sk.sk_state == SMC_LISTEN) {
  2793. release_sock(&smc->sk);
  2794. return -EINVAL;
  2795. }
  2796. if (smc->sk.sk_state == SMC_INIT ||
  2797. smc->sk.sk_state == SMC_CLOSED)
  2798. answ = 0;
  2799. else
  2800. answ = smc_tx_prepared_sends(&smc->conn);
  2801. break;
  2802. case SIOCATMARK:
  2803. if (smc->sk.sk_state == SMC_LISTEN) {
  2804. release_sock(&smc->sk);
  2805. return -EINVAL;
  2806. }
  2807. if (smc->sk.sk_state == SMC_INIT ||
  2808. smc->sk.sk_state == SMC_CLOSED) {
  2809. answ = 0;
  2810. } else {
  2811. smc_curs_copy(&cons, &conn->local_tx_ctrl.cons, conn);
  2812. smc_curs_copy(&urg, &conn->urg_curs, conn);
  2813. answ = smc_curs_diff(conn->rmb_desc->len,
  2814. &cons, &urg) == 1;
  2815. }
  2816. break;
  2817. default:
  2818. release_sock(&smc->sk);
  2819. return -ENOIOCTLCMD;
  2820. }
  2821. release_sock(&smc->sk);
  2822. return put_user(answ, (int __user *)arg);
  2823. }
  2824. /* Map the affected portions of the rmbe into an spd, note the number of bytes
  2825. * to splice in conn->splice_pending, and press 'go'. Delays consumer cursor
  2826. * updates till whenever a respective page has been fully processed.
  2827. * Note that subsequent recv() calls have to wait till all splice() processing
  2828. * completed.
  2829. */
  2830. ssize_t smc_splice_read(struct socket *sock, loff_t *ppos,
  2831. struct pipe_inode_info *pipe, size_t len,
  2832. unsigned int flags)
  2833. {
  2834. struct sock *sk = sock->sk;
  2835. struct smc_sock *smc;
  2836. int rc = -ENOTCONN;
  2837. smc = smc_sk(sk);
  2838. lock_sock(sk);
  2839. if (sk->sk_state == SMC_CLOSED && (sk->sk_shutdown & RCV_SHUTDOWN)) {
  2840. /* socket was connected before, no more data to read */
  2841. rc = 0;
  2842. goto out;
  2843. }
  2844. if (sk->sk_state == SMC_INIT ||
  2845. sk->sk_state == SMC_LISTEN ||
  2846. sk->sk_state == SMC_CLOSED)
  2847. goto out;
  2848. if (sk->sk_state == SMC_PEERFINCLOSEWAIT) {
  2849. rc = 0;
  2850. goto out;
  2851. }
  2852. if (smc->use_fallback) {
  2853. rc = smc->clcsock->ops->splice_read(smc->clcsock, ppos,
  2854. pipe, len, flags);
  2855. } else {
  2856. if (*ppos) {
  2857. rc = -ESPIPE;
  2858. goto out;
  2859. }
  2860. if (flags & SPLICE_F_NONBLOCK)
  2861. flags = MSG_DONTWAIT;
  2862. else
  2863. flags = 0;
  2864. SMC_STAT_INC(smc, splice_cnt);
  2865. rc = smc_rx_recvmsg(smc, NULL, pipe, len, flags);
  2866. }
  2867. out:
  2868. release_sock(sk);
  2869. return rc;
  2870. }
  2871. /* must look like tcp */
  2872. static const struct proto_ops smc_sock_ops = {
  2873. .family = PF_SMC,
  2874. .owner = THIS_MODULE,
  2875. .release = smc_release,
  2876. .bind = smc_bind,
  2877. .connect = smc_connect,
  2878. .socketpair = sock_no_socketpair,
  2879. .accept = smc_accept,
  2880. .getname = smc_getname,
  2881. .poll = smc_poll,
  2882. .ioctl = smc_ioctl,
  2883. .listen = smc_listen,
  2884. .shutdown = smc_shutdown,
  2885. .setsockopt = smc_setsockopt,
  2886. .getsockopt = smc_getsockopt,
  2887. .sendmsg = smc_sendmsg,
  2888. .recvmsg = smc_recvmsg,
  2889. .mmap = sock_no_mmap,
  2890. .splice_read = smc_splice_read,
  2891. };
  2892. int smc_create_clcsk(struct net *net, struct sock *sk, int family)
  2893. {
  2894. struct smc_sock *smc = smc_sk(sk);
  2895. int rc;
  2896. rc = sock_create_kern(net, family, SOCK_STREAM, IPPROTO_TCP,
  2897. &smc->clcsock);
  2898. if (rc)
  2899. return rc;
  2900. /* smc_clcsock_release() does not wait smc->clcsock->sk's
  2901. * destruction; its sk_state might not be TCP_CLOSE after
  2902. * smc->sk is close()d, and TCP timers can be fired later,
  2903. * which need net ref.
  2904. */
  2905. sk = smc->clcsock->sk;
  2906. sk_net_refcnt_upgrade(sk);
  2907. return 0;
  2908. }
  2909. static int __smc_create(struct net *net, struct socket *sock, int protocol,
  2910. int kern, struct socket *clcsock)
  2911. {
  2912. int family = (protocol == SMCPROTO_SMC6) ? PF_INET6 : PF_INET;
  2913. struct smc_sock *smc;
  2914. struct sock *sk;
  2915. int rc;
  2916. rc = -ESOCKTNOSUPPORT;
  2917. if (sock->type != SOCK_STREAM)
  2918. goto out;
  2919. rc = -EPROTONOSUPPORT;
  2920. if (protocol != SMCPROTO_SMC && protocol != SMCPROTO_SMC6)
  2921. goto out;
  2922. rc = -ENOBUFS;
  2923. sock->ops = &smc_sock_ops;
  2924. sock->state = SS_UNCONNECTED;
  2925. sk = smc_sock_alloc(net, sock, protocol);
  2926. if (!sk)
  2927. goto out;
  2928. /* create internal TCP socket for CLC handshake and fallback */
  2929. smc = smc_sk(sk);
  2930. rc = 0;
  2931. if (clcsock)
  2932. smc->clcsock = clcsock;
  2933. else
  2934. rc = smc_create_clcsk(net, sk, family);
  2935. if (rc) {
  2936. sk_common_release(sk);
  2937. sock->sk = NULL;
  2938. }
  2939. out:
  2940. return rc;
  2941. }
  2942. static int smc_create(struct net *net, struct socket *sock, int protocol,
  2943. int kern)
  2944. {
  2945. return __smc_create(net, sock, protocol, kern, NULL);
  2946. }
  2947. static const struct net_proto_family smc_sock_family_ops = {
  2948. .family = PF_SMC,
  2949. .owner = THIS_MODULE,
  2950. .create = smc_create,
  2951. };
  2952. static int smc_ulp_init(struct sock *sk)
  2953. {
  2954. struct socket *tcp = sk->sk_socket;
  2955. struct net *net = sock_net(sk);
  2956. struct socket *smcsock;
  2957. int protocol, ret;
  2958. /* only TCP can be replaced */
  2959. if (tcp->type != SOCK_STREAM || sk->sk_protocol != IPPROTO_TCP ||
  2960. (sk->sk_family != AF_INET && sk->sk_family != AF_INET6))
  2961. return -ESOCKTNOSUPPORT;
  2962. /* don't handle wq now */
  2963. if (tcp->state != SS_UNCONNECTED || !tcp->file || tcp->wq.fasync_list)
  2964. return -ENOTCONN;
  2965. if (sk->sk_family == AF_INET)
  2966. protocol = SMCPROTO_SMC;
  2967. else
  2968. protocol = SMCPROTO_SMC6;
  2969. smcsock = sock_alloc();
  2970. if (!smcsock)
  2971. return -ENFILE;
  2972. smcsock->type = SOCK_STREAM;
  2973. __module_get(THIS_MODULE); /* tried in __tcp_ulp_find_autoload */
  2974. ret = __smc_create(net, smcsock, protocol, 1, tcp);
  2975. if (ret) {
  2976. sock_release(smcsock); /* module_put() which ops won't be NULL */
  2977. return ret;
  2978. }
  2979. /* replace tcp socket to smc */
  2980. smcsock->file = tcp->file;
  2981. smcsock->file->private_data = smcsock;
  2982. smcsock->file->f_inode = SOCK_INODE(smcsock); /* replace inode when sock_close */
  2983. smcsock->file->f_path.dentry->d_inode = SOCK_INODE(smcsock); /* dput() in __fput */
  2984. tcp->file = NULL;
  2985. return ret;
  2986. }
  2987. static void smc_ulp_clone(const struct request_sock *req, struct sock *newsk,
  2988. const gfp_t priority)
  2989. {
  2990. struct inet_connection_sock *icsk = inet_csk(newsk);
  2991. /* don't inherit ulp ops to child when listen */
  2992. icsk->icsk_ulp_ops = NULL;
  2993. }
  2994. static struct tcp_ulp_ops smc_ulp_ops __read_mostly = {
  2995. .name = "smc",
  2996. .owner = THIS_MODULE,
  2997. .init = smc_ulp_init,
  2998. .clone = smc_ulp_clone,
  2999. };
  3000. unsigned int smc_net_id;
  3001. static __net_init int smc_net_init(struct net *net)
  3002. {
  3003. int rc;
  3004. rc = smc_sysctl_net_init(net);
  3005. if (rc)
  3006. return rc;
  3007. return smc_pnet_net_init(net);
  3008. }
  3009. static void __net_exit smc_net_exit(struct net *net)
  3010. {
  3011. smc_sysctl_net_exit(net);
  3012. smc_pnet_net_exit(net);
  3013. }
  3014. static __net_init int smc_net_stat_init(struct net *net)
  3015. {
  3016. return smc_stats_init(net);
  3017. }
  3018. static void __net_exit smc_net_stat_exit(struct net *net)
  3019. {
  3020. smc_stats_exit(net);
  3021. }
  3022. static struct pernet_operations smc_net_ops = {
  3023. .init = smc_net_init,
  3024. .exit = smc_net_exit,
  3025. .id = &smc_net_id,
  3026. .size = sizeof(struct smc_net),
  3027. };
  3028. static struct pernet_operations smc_net_stat_ops = {
  3029. .init = smc_net_stat_init,
  3030. .exit = smc_net_stat_exit,
  3031. };
  3032. static int __init smc_init(void)
  3033. {
  3034. int rc;
  3035. rc = register_pernet_subsys(&smc_net_ops);
  3036. if (rc)
  3037. return rc;
  3038. rc = register_pernet_subsys(&smc_net_stat_ops);
  3039. if (rc)
  3040. goto out_pernet_subsys;
  3041. rc = smc_ism_init();
  3042. if (rc)
  3043. goto out_pernet_subsys_stat;
  3044. smc_clc_init();
  3045. rc = smc_nl_init();
  3046. if (rc)
  3047. goto out_ism;
  3048. rc = smc_pnet_init();
  3049. if (rc)
  3050. goto out_nl;
  3051. rc = -ENOMEM;
  3052. smc_tcp_ls_wq = alloc_workqueue("smc_tcp_ls_wq", 0, 0);
  3053. if (!smc_tcp_ls_wq)
  3054. goto out_pnet;
  3055. smc_hs_wq = alloc_workqueue("smc_hs_wq", 0, 0);
  3056. if (!smc_hs_wq)
  3057. goto out_alloc_tcp_ls_wq;
  3058. smc_close_wq = alloc_workqueue("smc_close_wq", 0, 0);
  3059. if (!smc_close_wq)
  3060. goto out_alloc_hs_wq;
  3061. rc = smc_core_init();
  3062. if (rc) {
  3063. pr_err("%s: smc_core_init fails with %d\n", __func__, rc);
  3064. goto out_alloc_wqs;
  3065. }
  3066. rc = smc_llc_init();
  3067. if (rc) {
  3068. pr_err("%s: smc_llc_init fails with %d\n", __func__, rc);
  3069. goto out_core;
  3070. }
  3071. rc = smc_cdc_init();
  3072. if (rc) {
  3073. pr_err("%s: smc_cdc_init fails with %d\n", __func__, rc);
  3074. goto out_core;
  3075. }
  3076. rc = proto_register(&smc_proto, 1);
  3077. if (rc) {
  3078. pr_err("%s: proto_register(v4) fails with %d\n", __func__, rc);
  3079. goto out_core;
  3080. }
  3081. rc = proto_register(&smc_proto6, 1);
  3082. if (rc) {
  3083. pr_err("%s: proto_register(v6) fails with %d\n", __func__, rc);
  3084. goto out_proto;
  3085. }
  3086. rc = sock_register(&smc_sock_family_ops);
  3087. if (rc) {
  3088. pr_err("%s: sock_register fails with %d\n", __func__, rc);
  3089. goto out_proto6;
  3090. }
  3091. INIT_HLIST_HEAD(&smc_v4_hashinfo.ht);
  3092. INIT_HLIST_HEAD(&smc_v6_hashinfo.ht);
  3093. rc = smc_ib_register_client();
  3094. if (rc) {
  3095. pr_err("%s: ib_register fails with %d\n", __func__, rc);
  3096. goto out_sock;
  3097. }
  3098. rc = smc_loopback_init();
  3099. if (rc) {
  3100. pr_err("%s: smc_loopback_init fails with %d\n", __func__, rc);
  3101. goto out_ib;
  3102. }
  3103. rc = tcp_register_ulp(&smc_ulp_ops);
  3104. if (rc) {
  3105. pr_err("%s: tcp_ulp_register fails with %d\n", __func__, rc);
  3106. goto out_lo;
  3107. }
  3108. rc = smc_inet_init();
  3109. if (rc) {
  3110. pr_err("%s: smc_inet_init fails with %d\n", __func__, rc);
  3111. goto out_ulp;
  3112. }
  3113. static_branch_enable(&tcp_have_smc);
  3114. return 0;
  3115. out_ulp:
  3116. tcp_unregister_ulp(&smc_ulp_ops);
  3117. out_lo:
  3118. smc_loopback_exit();
  3119. out_ib:
  3120. smc_ib_unregister_client();
  3121. out_sock:
  3122. sock_unregister(PF_SMC);
  3123. out_proto6:
  3124. proto_unregister(&smc_proto6);
  3125. out_proto:
  3126. proto_unregister(&smc_proto);
  3127. out_core:
  3128. smc_core_exit();
  3129. out_alloc_wqs:
  3130. destroy_workqueue(smc_close_wq);
  3131. out_alloc_hs_wq:
  3132. destroy_workqueue(smc_hs_wq);
  3133. out_alloc_tcp_ls_wq:
  3134. destroy_workqueue(smc_tcp_ls_wq);
  3135. out_pnet:
  3136. smc_pnet_exit();
  3137. out_nl:
  3138. smc_nl_exit();
  3139. out_ism:
  3140. smc_clc_exit();
  3141. smc_ism_exit();
  3142. out_pernet_subsys_stat:
  3143. unregister_pernet_subsys(&smc_net_stat_ops);
  3144. out_pernet_subsys:
  3145. unregister_pernet_subsys(&smc_net_ops);
  3146. return rc;
  3147. }
  3148. static void __exit smc_exit(void)
  3149. {
  3150. static_branch_disable(&tcp_have_smc);
  3151. smc_inet_exit();
  3152. tcp_unregister_ulp(&smc_ulp_ops);
  3153. sock_unregister(PF_SMC);
  3154. smc_core_exit();
  3155. smc_loopback_exit();
  3156. smc_ib_unregister_client();
  3157. smc_ism_exit();
  3158. destroy_workqueue(smc_close_wq);
  3159. destroy_workqueue(smc_tcp_ls_wq);
  3160. destroy_workqueue(smc_hs_wq);
  3161. proto_unregister(&smc_proto6);
  3162. proto_unregister(&smc_proto);
  3163. smc_pnet_exit();
  3164. smc_nl_exit();
  3165. smc_clc_exit();
  3166. unregister_pernet_subsys(&smc_net_stat_ops);
  3167. unregister_pernet_subsys(&smc_net_ops);
  3168. rcu_barrier();
  3169. }
  3170. module_init(smc_init);
  3171. module_exit(smc_exit);
  3172. MODULE_AUTHOR("Ursula Braun <ubraun@linux.vnet.ibm.com>");
  3173. MODULE_DESCRIPTION("smc socket address family");
  3174. MODULE_LICENSE("GPL");
  3175. MODULE_ALIAS_NETPROTO(PF_SMC);
  3176. MODULE_ALIAS_TCP_ULP("smc");
  3177. /* 256 for IPPROTO_SMC and 1 for SOCK_STREAM */
  3178. MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET, 256, 1);
  3179. #if IS_ENABLED(CONFIG_IPV6)
  3180. MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET6, 256, 1);
  3181. #endif /* CONFIG_IPV6 */
  3182. MODULE_ALIAS_GENL_FAMILY(SMC_GENL_FAMILY_NAME);