smc_core.c 72 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Shared Memory Communications over RDMA (SMC-R) and RoCE
  4. *
  5. * Basic Transport Functions exploiting Infiniband API
  6. *
  7. * Copyright IBM Corp. 2016
  8. *
  9. * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com>
  10. */
  11. #include <linux/socket.h>
  12. #include <linux/if_vlan.h>
  13. #include <linux/random.h>
  14. #include <linux/workqueue.h>
  15. #include <linux/wait.h>
  16. #include <linux/reboot.h>
  17. #include <linux/mutex.h>
  18. #include <linux/list.h>
  19. #include <linux/smc.h>
  20. #include <net/tcp.h>
  21. #include <net/sock.h>
  22. #include <rdma/ib_verbs.h>
  23. #include <rdma/ib_cache.h>
  24. #include "smc.h"
  25. #include "smc_clc.h"
  26. #include "smc_core.h"
  27. #include "smc_ib.h"
  28. #include "smc_wr.h"
  29. #include "smc_llc.h"
  30. #include "smc_cdc.h"
  31. #include "smc_close.h"
  32. #include "smc_ism.h"
  33. #include "smc_netlink.h"
  34. #include "smc_stats.h"
  35. #include "smc_tracepoint.h"
  36. #define SMC_LGR_NUM_INCR 256
  37. #define SMC_LGR_FREE_DELAY_SERV (600 * HZ)
  38. #define SMC_LGR_FREE_DELAY_CLNT (SMC_LGR_FREE_DELAY_SERV + 10 * HZ)
  39. struct smc_lgr_list smc_lgr_list = { /* established link groups */
  40. .lock = __SPIN_LOCK_UNLOCKED(smc_lgr_list.lock),
  41. .list = LIST_HEAD_INIT(smc_lgr_list.list),
  42. .num = 0,
  43. };
  44. static atomic_t lgr_cnt = ATOMIC_INIT(0); /* number of existing link groups */
  45. static DECLARE_WAIT_QUEUE_HEAD(lgrs_deleted);
  46. static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb,
  47. struct smc_buf_desc *buf_desc);
  48. static void __smc_lgr_terminate(struct smc_link_group *lgr, bool soft);
  49. static void smc_link_down_work(struct work_struct *work);
  50. /* return head of link group list and its lock for a given link group */
  51. static inline struct list_head *smc_lgr_list_head(struct smc_link_group *lgr,
  52. spinlock_t **lgr_lock)
  53. {
  54. if (lgr->is_smcd) {
  55. *lgr_lock = &lgr->smcd->lgr_lock;
  56. return &lgr->smcd->lgr_list;
  57. }
  58. *lgr_lock = &smc_lgr_list.lock;
  59. return &smc_lgr_list.list;
  60. }
  61. static void smc_ibdev_cnt_inc(struct smc_link *lnk)
  62. {
  63. atomic_inc(&lnk->smcibdev->lnk_cnt_by_port[lnk->ibport - 1]);
  64. }
  65. static void smc_ibdev_cnt_dec(struct smc_link *lnk)
  66. {
  67. atomic_dec(&lnk->smcibdev->lnk_cnt_by_port[lnk->ibport - 1]);
  68. }
  69. static void smc_lgr_schedule_free_work(struct smc_link_group *lgr)
  70. {
  71. /* client link group creation always follows the server link group
  72. * creation. For client use a somewhat higher removal delay time,
  73. * otherwise there is a risk of out-of-sync link groups.
  74. */
  75. if (!lgr->freeing) {
  76. mod_delayed_work(system_wq, &lgr->free_work,
  77. (!lgr->is_smcd && lgr->role == SMC_CLNT) ?
  78. SMC_LGR_FREE_DELAY_CLNT :
  79. SMC_LGR_FREE_DELAY_SERV);
  80. }
  81. }
  82. /* Register connection's alert token in our lookup structure.
  83. * To use rbtrees we have to implement our own insert core.
  84. * Requires @conns_lock
  85. * @smc connection to register
  86. * Returns 0 on success, != otherwise.
  87. */
  88. static void smc_lgr_add_alert_token(struct smc_connection *conn)
  89. {
  90. struct rb_node **link, *parent = NULL;
  91. u32 token = conn->alert_token_local;
  92. link = &conn->lgr->conns_all.rb_node;
  93. while (*link) {
  94. struct smc_connection *cur = rb_entry(*link,
  95. struct smc_connection, alert_node);
  96. parent = *link;
  97. if (cur->alert_token_local > token)
  98. link = &parent->rb_left;
  99. else
  100. link = &parent->rb_right;
  101. }
  102. /* Put the new node there */
  103. rb_link_node(&conn->alert_node, parent, link);
  104. rb_insert_color(&conn->alert_node, &conn->lgr->conns_all);
  105. }
  106. /* assign an SMC-R link to the connection */
  107. static int smcr_lgr_conn_assign_link(struct smc_connection *conn, bool first)
  108. {
  109. enum smc_link_state expected = first ? SMC_LNK_ACTIVATING :
  110. SMC_LNK_ACTIVE;
  111. int i, j;
  112. /* do link balancing */
  113. conn->lnk = NULL; /* reset conn->lnk first */
  114. for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
  115. struct smc_link *lnk = &conn->lgr->lnk[i];
  116. if (lnk->state != expected || lnk->link_is_asym)
  117. continue;
  118. if (conn->lgr->role == SMC_CLNT) {
  119. conn->lnk = lnk; /* temporary, SMC server assigns link*/
  120. break;
  121. }
  122. if (conn->lgr->conns_num % 2) {
  123. for (j = i + 1; j < SMC_LINKS_PER_LGR_MAX; j++) {
  124. struct smc_link *lnk2;
  125. lnk2 = &conn->lgr->lnk[j];
  126. if (lnk2->state == expected &&
  127. !lnk2->link_is_asym) {
  128. conn->lnk = lnk2;
  129. break;
  130. }
  131. }
  132. }
  133. if (!conn->lnk)
  134. conn->lnk = lnk;
  135. break;
  136. }
  137. if (!conn->lnk)
  138. return SMC_CLC_DECL_NOACTLINK;
  139. atomic_inc(&conn->lnk->conn_cnt);
  140. return 0;
  141. }
  142. /* Register connection in link group by assigning an alert token
  143. * registered in a search tree.
  144. * Requires @conns_lock
  145. * Note that '0' is a reserved value and not assigned.
  146. */
  147. static int smc_lgr_register_conn(struct smc_connection *conn, bool first)
  148. {
  149. struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
  150. static atomic_t nexttoken = ATOMIC_INIT(0);
  151. int rc;
  152. if (!conn->lgr->is_smcd) {
  153. rc = smcr_lgr_conn_assign_link(conn, first);
  154. if (rc) {
  155. conn->lgr = NULL;
  156. return rc;
  157. }
  158. }
  159. /* find a new alert_token_local value not yet used by some connection
  160. * in this link group
  161. */
  162. sock_hold(&smc->sk); /* sock_put in smc_lgr_unregister_conn() */
  163. while (!conn->alert_token_local) {
  164. conn->alert_token_local = atomic_inc_return(&nexttoken);
  165. if (smc_lgr_find_conn(conn->alert_token_local, conn->lgr))
  166. conn->alert_token_local = 0;
  167. }
  168. smc_lgr_add_alert_token(conn);
  169. conn->lgr->conns_num++;
  170. return 0;
  171. }
  172. /* Unregister connection and reset the alert token of the given connection<
  173. */
  174. static void __smc_lgr_unregister_conn(struct smc_connection *conn)
  175. {
  176. struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
  177. struct smc_link_group *lgr = conn->lgr;
  178. rb_erase(&conn->alert_node, &lgr->conns_all);
  179. if (conn->lnk)
  180. atomic_dec(&conn->lnk->conn_cnt);
  181. lgr->conns_num--;
  182. conn->alert_token_local = 0;
  183. sock_put(&smc->sk); /* sock_hold in smc_lgr_register_conn() */
  184. }
  185. /* Unregister connection from lgr
  186. */
  187. static void smc_lgr_unregister_conn(struct smc_connection *conn)
  188. {
  189. struct smc_link_group *lgr = conn->lgr;
  190. if (!smc_conn_lgr_valid(conn))
  191. return;
  192. write_lock_bh(&lgr->conns_lock);
  193. if (conn->alert_token_local) {
  194. __smc_lgr_unregister_conn(conn);
  195. }
  196. write_unlock_bh(&lgr->conns_lock);
  197. }
  198. static void smc_lgr_buf_list_add(struct smc_link_group *lgr,
  199. bool is_rmb,
  200. struct list_head *buf_list,
  201. struct smc_buf_desc *buf_desc)
  202. {
  203. list_add(&buf_desc->list, buf_list);
  204. if (is_rmb) {
  205. lgr->alloc_rmbs += buf_desc->len;
  206. lgr->alloc_rmbs +=
  207. lgr->is_smcd ? sizeof(struct smcd_cdc_msg) : 0;
  208. } else {
  209. lgr->alloc_sndbufs += buf_desc->len;
  210. }
  211. }
  212. static void smc_lgr_buf_list_del(struct smc_link_group *lgr,
  213. bool is_rmb,
  214. struct smc_buf_desc *buf_desc)
  215. {
  216. list_del(&buf_desc->list);
  217. if (is_rmb) {
  218. lgr->alloc_rmbs -= buf_desc->len;
  219. lgr->alloc_rmbs -=
  220. lgr->is_smcd ? sizeof(struct smcd_cdc_msg) : 0;
  221. } else {
  222. lgr->alloc_sndbufs -= buf_desc->len;
  223. }
  224. }
  225. int smc_nl_get_sys_info(struct sk_buff *skb, struct netlink_callback *cb)
  226. {
  227. struct smc_nl_dmp_ctx *cb_ctx = smc_nl_dmp_ctx(cb);
  228. char hostname[SMC_MAX_HOSTNAME_LEN + 1];
  229. char smc_seid[SMC_MAX_EID_LEN + 1];
  230. struct nlattr *attrs;
  231. u8 *seid = NULL;
  232. u8 *host = NULL;
  233. void *nlh;
  234. nlh = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
  235. &smc_gen_nl_family, NLM_F_MULTI,
  236. SMC_NETLINK_GET_SYS_INFO);
  237. if (!nlh)
  238. goto errmsg;
  239. if (cb_ctx->pos[0])
  240. goto errout;
  241. attrs = nla_nest_start(skb, SMC_GEN_SYS_INFO);
  242. if (!attrs)
  243. goto errout;
  244. if (nla_put_u8(skb, SMC_NLA_SYS_VER, SMC_V2))
  245. goto errattr;
  246. if (nla_put_u8(skb, SMC_NLA_SYS_REL, SMC_RELEASE))
  247. goto errattr;
  248. if (nla_put_u8(skb, SMC_NLA_SYS_IS_ISM_V2, smc_ism_is_v2_capable()))
  249. goto errattr;
  250. if (nla_put_u8(skb, SMC_NLA_SYS_IS_SMCR_V2, true))
  251. goto errattr;
  252. smc_clc_get_hostname(&host);
  253. if (host) {
  254. memcpy(hostname, host, SMC_MAX_HOSTNAME_LEN);
  255. hostname[SMC_MAX_HOSTNAME_LEN] = 0;
  256. if (nla_put_string(skb, SMC_NLA_SYS_LOCAL_HOST, hostname))
  257. goto errattr;
  258. }
  259. if (smc_ism_is_v2_capable()) {
  260. smc_ism_get_system_eid(&seid);
  261. memcpy(smc_seid, seid, SMC_MAX_EID_LEN);
  262. smc_seid[SMC_MAX_EID_LEN] = 0;
  263. if (nla_put_string(skb, SMC_NLA_SYS_SEID, smc_seid))
  264. goto errattr;
  265. }
  266. nla_nest_end(skb, attrs);
  267. genlmsg_end(skb, nlh);
  268. cb_ctx->pos[0] = 1;
  269. return skb->len;
  270. errattr:
  271. nla_nest_cancel(skb, attrs);
  272. errout:
  273. genlmsg_cancel(skb, nlh);
  274. errmsg:
  275. return skb->len;
  276. }
  277. /* Fill SMC_NLA_LGR_D_V2_COMMON/SMC_NLA_LGR_R_V2_COMMON nested attributes */
  278. static int smc_nl_fill_lgr_v2_common(struct smc_link_group *lgr,
  279. struct sk_buff *skb,
  280. struct netlink_callback *cb,
  281. struct nlattr *v2_attrs)
  282. {
  283. char smc_host[SMC_MAX_HOSTNAME_LEN + 1];
  284. char smc_eid[SMC_MAX_EID_LEN + 1];
  285. if (nla_put_u8(skb, SMC_NLA_LGR_V2_VER, lgr->smc_version))
  286. goto errv2attr;
  287. if (nla_put_u8(skb, SMC_NLA_LGR_V2_REL, lgr->peer_smc_release))
  288. goto errv2attr;
  289. if (nla_put_u8(skb, SMC_NLA_LGR_V2_OS, lgr->peer_os))
  290. goto errv2attr;
  291. memcpy(smc_host, lgr->peer_hostname, SMC_MAX_HOSTNAME_LEN);
  292. smc_host[SMC_MAX_HOSTNAME_LEN] = 0;
  293. if (nla_put_string(skb, SMC_NLA_LGR_V2_PEER_HOST, smc_host))
  294. goto errv2attr;
  295. memcpy(smc_eid, lgr->negotiated_eid, SMC_MAX_EID_LEN);
  296. smc_eid[SMC_MAX_EID_LEN] = 0;
  297. if (nla_put_string(skb, SMC_NLA_LGR_V2_NEG_EID, smc_eid))
  298. goto errv2attr;
  299. nla_nest_end(skb, v2_attrs);
  300. return 0;
  301. errv2attr:
  302. nla_nest_cancel(skb, v2_attrs);
  303. return -EMSGSIZE;
  304. }
  305. static int smc_nl_fill_smcr_lgr_v2(struct smc_link_group *lgr,
  306. struct sk_buff *skb,
  307. struct netlink_callback *cb)
  308. {
  309. struct nlattr *v2_attrs;
  310. v2_attrs = nla_nest_start(skb, SMC_NLA_LGR_R_V2);
  311. if (!v2_attrs)
  312. goto errattr;
  313. if (nla_put_u8(skb, SMC_NLA_LGR_R_V2_DIRECT, !lgr->uses_gateway))
  314. goto errv2attr;
  315. if (nla_put_u8(skb, SMC_NLA_LGR_R_V2_MAX_CONNS, lgr->max_conns))
  316. goto errv2attr;
  317. if (nla_put_u8(skb, SMC_NLA_LGR_R_V2_MAX_LINKS, lgr->max_links))
  318. goto errv2attr;
  319. nla_nest_end(skb, v2_attrs);
  320. return 0;
  321. errv2attr:
  322. nla_nest_cancel(skb, v2_attrs);
  323. errattr:
  324. return -EMSGSIZE;
  325. }
  326. static int smc_nl_fill_lgr(struct smc_link_group *lgr,
  327. struct sk_buff *skb,
  328. struct netlink_callback *cb)
  329. {
  330. char smc_target[SMC_MAX_PNETID_LEN + 1];
  331. struct nlattr *attrs, *v2_attrs;
  332. attrs = nla_nest_start(skb, SMC_GEN_LGR_SMCR);
  333. if (!attrs)
  334. goto errout;
  335. if (nla_put_u32(skb, SMC_NLA_LGR_R_ID, *((u32 *)&lgr->id)))
  336. goto errattr;
  337. if (nla_put_u32(skb, SMC_NLA_LGR_R_CONNS_NUM, lgr->conns_num))
  338. goto errattr;
  339. if (nla_put_u8(skb, SMC_NLA_LGR_R_ROLE, lgr->role))
  340. goto errattr;
  341. if (nla_put_u8(skb, SMC_NLA_LGR_R_TYPE, lgr->type))
  342. goto errattr;
  343. if (nla_put_u8(skb, SMC_NLA_LGR_R_BUF_TYPE, lgr->buf_type))
  344. goto errattr;
  345. if (nla_put_u8(skb, SMC_NLA_LGR_R_VLAN_ID, lgr->vlan_id))
  346. goto errattr;
  347. if (nla_put_u64_64bit(skb, SMC_NLA_LGR_R_NET_COOKIE,
  348. lgr->net->net_cookie, SMC_NLA_LGR_R_PAD))
  349. goto errattr;
  350. memcpy(smc_target, lgr->pnet_id, SMC_MAX_PNETID_LEN);
  351. smc_target[SMC_MAX_PNETID_LEN] = 0;
  352. if (nla_put_string(skb, SMC_NLA_LGR_R_PNETID, smc_target))
  353. goto errattr;
  354. if (nla_put_uint(skb, SMC_NLA_LGR_R_SNDBUF_ALLOC, lgr->alloc_sndbufs))
  355. goto errattr;
  356. if (nla_put_uint(skb, SMC_NLA_LGR_R_RMB_ALLOC, lgr->alloc_rmbs))
  357. goto errattr;
  358. if (lgr->smc_version > SMC_V1) {
  359. v2_attrs = nla_nest_start(skb, SMC_NLA_LGR_R_V2_COMMON);
  360. if (!v2_attrs)
  361. goto errattr;
  362. if (smc_nl_fill_lgr_v2_common(lgr, skb, cb, v2_attrs))
  363. goto errattr;
  364. if (smc_nl_fill_smcr_lgr_v2(lgr, skb, cb))
  365. goto errattr;
  366. }
  367. nla_nest_end(skb, attrs);
  368. return 0;
  369. errattr:
  370. nla_nest_cancel(skb, attrs);
  371. errout:
  372. return -EMSGSIZE;
  373. }
  374. static int smc_nl_fill_lgr_link(struct smc_link_group *lgr,
  375. struct smc_link *link,
  376. struct sk_buff *skb,
  377. struct netlink_callback *cb)
  378. {
  379. char smc_ibname[IB_DEVICE_NAME_MAX];
  380. u8 smc_gid_target[41];
  381. struct nlattr *attrs;
  382. u32 link_uid = 0;
  383. void *nlh;
  384. nlh = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
  385. &smc_gen_nl_family, NLM_F_MULTI,
  386. SMC_NETLINK_GET_LINK_SMCR);
  387. if (!nlh)
  388. goto errmsg;
  389. attrs = nla_nest_start(skb, SMC_GEN_LINK_SMCR);
  390. if (!attrs)
  391. goto errout;
  392. if (nla_put_u8(skb, SMC_NLA_LINK_ID, link->link_id))
  393. goto errattr;
  394. if (nla_put_u32(skb, SMC_NLA_LINK_STATE, link->state))
  395. goto errattr;
  396. if (nla_put_u32(skb, SMC_NLA_LINK_CONN_CNT,
  397. atomic_read(&link->conn_cnt)))
  398. goto errattr;
  399. if (nla_put_u8(skb, SMC_NLA_LINK_IB_PORT, link->ibport))
  400. goto errattr;
  401. if (nla_put_u32(skb, SMC_NLA_LINK_NET_DEV, link->ndev_ifidx))
  402. goto errattr;
  403. snprintf(smc_ibname, sizeof(smc_ibname), "%s", link->ibname);
  404. if (nla_put_string(skb, SMC_NLA_LINK_IB_DEV, smc_ibname))
  405. goto errattr;
  406. memcpy(&link_uid, link->link_uid, sizeof(link_uid));
  407. if (nla_put_u32(skb, SMC_NLA_LINK_UID, link_uid))
  408. goto errattr;
  409. memcpy(&link_uid, link->peer_link_uid, sizeof(link_uid));
  410. if (nla_put_u32(skb, SMC_NLA_LINK_PEER_UID, link_uid))
  411. goto errattr;
  412. memset(smc_gid_target, 0, sizeof(smc_gid_target));
  413. smc_gid_be16_convert(smc_gid_target, link->gid);
  414. if (nla_put_string(skb, SMC_NLA_LINK_GID, smc_gid_target))
  415. goto errattr;
  416. memset(smc_gid_target, 0, sizeof(smc_gid_target));
  417. smc_gid_be16_convert(smc_gid_target, link->peer_gid);
  418. if (nla_put_string(skb, SMC_NLA_LINK_PEER_GID, smc_gid_target))
  419. goto errattr;
  420. nla_nest_end(skb, attrs);
  421. genlmsg_end(skb, nlh);
  422. return 0;
  423. errattr:
  424. nla_nest_cancel(skb, attrs);
  425. errout:
  426. genlmsg_cancel(skb, nlh);
  427. errmsg:
  428. return -EMSGSIZE;
  429. }
  430. static int smc_nl_handle_lgr(struct smc_link_group *lgr,
  431. struct sk_buff *skb,
  432. struct netlink_callback *cb,
  433. bool list_links)
  434. {
  435. void *nlh;
  436. int i;
  437. nlh = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
  438. &smc_gen_nl_family, NLM_F_MULTI,
  439. SMC_NETLINK_GET_LGR_SMCR);
  440. if (!nlh)
  441. goto errmsg;
  442. if (smc_nl_fill_lgr(lgr, skb, cb))
  443. goto errout;
  444. genlmsg_end(skb, nlh);
  445. if (!list_links)
  446. goto out;
  447. for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
  448. if (!smc_link_usable(&lgr->lnk[i]))
  449. continue;
  450. if (smc_nl_fill_lgr_link(lgr, &lgr->lnk[i], skb, cb))
  451. goto errout;
  452. }
  453. out:
  454. return 0;
  455. errout:
  456. genlmsg_cancel(skb, nlh);
  457. errmsg:
  458. return -EMSGSIZE;
  459. }
  460. static void smc_nl_fill_lgr_list(struct smc_lgr_list *smc_lgr,
  461. struct sk_buff *skb,
  462. struct netlink_callback *cb,
  463. bool list_links)
  464. {
  465. struct smc_nl_dmp_ctx *cb_ctx = smc_nl_dmp_ctx(cb);
  466. struct smc_link_group *lgr;
  467. int snum = cb_ctx->pos[0];
  468. int num = 0;
  469. spin_lock_bh(&smc_lgr->lock);
  470. list_for_each_entry(lgr, &smc_lgr->list, list) {
  471. if (num < snum)
  472. goto next;
  473. if (smc_nl_handle_lgr(lgr, skb, cb, list_links))
  474. goto errout;
  475. next:
  476. num++;
  477. }
  478. errout:
  479. spin_unlock_bh(&smc_lgr->lock);
  480. cb_ctx->pos[0] = num;
  481. }
  482. static int smc_nl_fill_smcd_lgr(struct smc_link_group *lgr,
  483. struct sk_buff *skb,
  484. struct netlink_callback *cb)
  485. {
  486. char smc_pnet[SMC_MAX_PNETID_LEN + 1];
  487. struct smcd_dev *smcd = lgr->smcd;
  488. struct smcd_gid smcd_gid;
  489. struct nlattr *attrs;
  490. void *nlh;
  491. nlh = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
  492. &smc_gen_nl_family, NLM_F_MULTI,
  493. SMC_NETLINK_GET_LGR_SMCD);
  494. if (!nlh)
  495. goto errmsg;
  496. attrs = nla_nest_start(skb, SMC_GEN_LGR_SMCD);
  497. if (!attrs)
  498. goto errout;
  499. if (nla_put_u32(skb, SMC_NLA_LGR_D_ID, *((u32 *)&lgr->id)))
  500. goto errattr;
  501. smcd->ops->get_local_gid(smcd, &smcd_gid);
  502. if (nla_put_u64_64bit(skb, SMC_NLA_LGR_D_GID,
  503. smcd_gid.gid, SMC_NLA_LGR_D_PAD))
  504. goto errattr;
  505. if (nla_put_u64_64bit(skb, SMC_NLA_LGR_D_EXT_GID,
  506. smcd_gid.gid_ext, SMC_NLA_LGR_D_PAD))
  507. goto errattr;
  508. if (nla_put_u64_64bit(skb, SMC_NLA_LGR_D_PEER_GID, lgr->peer_gid.gid,
  509. SMC_NLA_LGR_D_PAD))
  510. goto errattr;
  511. if (nla_put_u64_64bit(skb, SMC_NLA_LGR_D_PEER_EXT_GID,
  512. lgr->peer_gid.gid_ext, SMC_NLA_LGR_D_PAD))
  513. goto errattr;
  514. if (nla_put_u8(skb, SMC_NLA_LGR_D_VLAN_ID, lgr->vlan_id))
  515. goto errattr;
  516. if (nla_put_u32(skb, SMC_NLA_LGR_D_CONNS_NUM, lgr->conns_num))
  517. goto errattr;
  518. if (nla_put_u32(skb, SMC_NLA_LGR_D_CHID, smc_ism_get_chid(lgr->smcd)))
  519. goto errattr;
  520. if (nla_put_uint(skb, SMC_NLA_LGR_D_SNDBUF_ALLOC, lgr->alloc_sndbufs))
  521. goto errattr;
  522. if (nla_put_uint(skb, SMC_NLA_LGR_D_DMB_ALLOC, lgr->alloc_rmbs))
  523. goto errattr;
  524. memcpy(smc_pnet, lgr->smcd->pnetid, SMC_MAX_PNETID_LEN);
  525. smc_pnet[SMC_MAX_PNETID_LEN] = 0;
  526. if (nla_put_string(skb, SMC_NLA_LGR_D_PNETID, smc_pnet))
  527. goto errattr;
  528. if (lgr->smc_version > SMC_V1) {
  529. struct nlattr *v2_attrs;
  530. v2_attrs = nla_nest_start(skb, SMC_NLA_LGR_D_V2_COMMON);
  531. if (!v2_attrs)
  532. goto errattr;
  533. if (smc_nl_fill_lgr_v2_common(lgr, skb, cb, v2_attrs))
  534. goto errattr;
  535. }
  536. nla_nest_end(skb, attrs);
  537. genlmsg_end(skb, nlh);
  538. return 0;
  539. errattr:
  540. nla_nest_cancel(skb, attrs);
  541. errout:
  542. genlmsg_cancel(skb, nlh);
  543. errmsg:
  544. return -EMSGSIZE;
  545. }
  546. static int smc_nl_handle_smcd_lgr(struct smcd_dev *dev,
  547. struct sk_buff *skb,
  548. struct netlink_callback *cb)
  549. {
  550. struct smc_nl_dmp_ctx *cb_ctx = smc_nl_dmp_ctx(cb);
  551. struct smc_link_group *lgr;
  552. int snum = cb_ctx->pos[1];
  553. int rc = 0, num = 0;
  554. spin_lock_bh(&dev->lgr_lock);
  555. list_for_each_entry(lgr, &dev->lgr_list, list) {
  556. if (!lgr->is_smcd)
  557. continue;
  558. if (num < snum)
  559. goto next;
  560. rc = smc_nl_fill_smcd_lgr(lgr, skb, cb);
  561. if (rc)
  562. goto errout;
  563. next:
  564. num++;
  565. }
  566. errout:
  567. spin_unlock_bh(&dev->lgr_lock);
  568. cb_ctx->pos[1] = num;
  569. return rc;
  570. }
  571. static int smc_nl_fill_smcd_dev(struct smcd_dev_list *dev_list,
  572. struct sk_buff *skb,
  573. struct netlink_callback *cb)
  574. {
  575. struct smc_nl_dmp_ctx *cb_ctx = smc_nl_dmp_ctx(cb);
  576. struct smcd_dev *smcd_dev;
  577. int snum = cb_ctx->pos[0];
  578. int rc = 0, num = 0;
  579. mutex_lock(&dev_list->mutex);
  580. list_for_each_entry(smcd_dev, &dev_list->list, list) {
  581. if (list_empty(&smcd_dev->lgr_list))
  582. continue;
  583. if (num < snum)
  584. goto next;
  585. rc = smc_nl_handle_smcd_lgr(smcd_dev, skb, cb);
  586. if (rc)
  587. goto errout;
  588. next:
  589. num++;
  590. }
  591. errout:
  592. mutex_unlock(&dev_list->mutex);
  593. cb_ctx->pos[0] = num;
  594. return rc;
  595. }
  596. int smcr_nl_get_lgr(struct sk_buff *skb, struct netlink_callback *cb)
  597. {
  598. bool list_links = false;
  599. smc_nl_fill_lgr_list(&smc_lgr_list, skb, cb, list_links);
  600. return skb->len;
  601. }
  602. int smcr_nl_get_link(struct sk_buff *skb, struct netlink_callback *cb)
  603. {
  604. bool list_links = true;
  605. smc_nl_fill_lgr_list(&smc_lgr_list, skb, cb, list_links);
  606. return skb->len;
  607. }
  608. int smcd_nl_get_lgr(struct sk_buff *skb, struct netlink_callback *cb)
  609. {
  610. smc_nl_fill_smcd_dev(&smcd_dev_list, skb, cb);
  611. return skb->len;
  612. }
  613. void smc_lgr_cleanup_early(struct smc_link_group *lgr)
  614. {
  615. spinlock_t *lgr_lock;
  616. if (!lgr)
  617. return;
  618. smc_lgr_list_head(lgr, &lgr_lock);
  619. spin_lock_bh(lgr_lock);
  620. /* do not use this link group for new connections */
  621. if (!list_empty(&lgr->list))
  622. list_del_init(&lgr->list);
  623. spin_unlock_bh(lgr_lock);
  624. __smc_lgr_terminate(lgr, true);
  625. }
  626. static void smcr_lgr_link_deactivate_all(struct smc_link_group *lgr)
  627. {
  628. int i;
  629. for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
  630. struct smc_link *lnk = &lgr->lnk[i];
  631. if (smc_link_sendable(lnk))
  632. lnk->state = SMC_LNK_INACTIVE;
  633. }
  634. wake_up_all(&lgr->llc_msg_waiter);
  635. wake_up_all(&lgr->llc_flow_waiter);
  636. }
  637. static void smc_lgr_free(struct smc_link_group *lgr);
  638. static void smc_lgr_free_work(struct work_struct *work)
  639. {
  640. struct smc_link_group *lgr = container_of(to_delayed_work(work),
  641. struct smc_link_group,
  642. free_work);
  643. spinlock_t *lgr_lock;
  644. bool conns;
  645. smc_lgr_list_head(lgr, &lgr_lock);
  646. spin_lock_bh(lgr_lock);
  647. if (lgr->freeing) {
  648. spin_unlock_bh(lgr_lock);
  649. return;
  650. }
  651. read_lock_bh(&lgr->conns_lock);
  652. conns = RB_EMPTY_ROOT(&lgr->conns_all);
  653. read_unlock_bh(&lgr->conns_lock);
  654. if (!conns) { /* number of lgr connections is no longer zero */
  655. spin_unlock_bh(lgr_lock);
  656. return;
  657. }
  658. list_del_init(&lgr->list); /* remove from smc_lgr_list */
  659. lgr->freeing = 1; /* this instance does the freeing, no new schedule */
  660. spin_unlock_bh(lgr_lock);
  661. cancel_delayed_work(&lgr->free_work);
  662. if (!lgr->is_smcd && !lgr->terminating)
  663. smc_llc_send_link_delete_all(lgr, true,
  664. SMC_LLC_DEL_PROG_INIT_TERM);
  665. if (lgr->is_smcd && !lgr->terminating)
  666. smc_ism_signal_shutdown(lgr);
  667. if (!lgr->is_smcd)
  668. smcr_lgr_link_deactivate_all(lgr);
  669. smc_lgr_free(lgr);
  670. }
  671. static void smc_lgr_terminate_work(struct work_struct *work)
  672. {
  673. struct smc_link_group *lgr = container_of(work, struct smc_link_group,
  674. terminate_work);
  675. __smc_lgr_terminate(lgr, true);
  676. }
  677. /* return next unique link id for the lgr */
  678. static u8 smcr_next_link_id(struct smc_link_group *lgr)
  679. {
  680. u8 link_id;
  681. int i;
  682. while (1) {
  683. again:
  684. link_id = ++lgr->next_link_id;
  685. if (!link_id) /* skip zero as link_id */
  686. link_id = ++lgr->next_link_id;
  687. for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
  688. if (smc_link_usable(&lgr->lnk[i]) &&
  689. lgr->lnk[i].link_id == link_id)
  690. goto again;
  691. }
  692. break;
  693. }
  694. return link_id;
  695. }
  696. static void smcr_copy_dev_info_to_link(struct smc_link *link)
  697. {
  698. struct smc_ib_device *smcibdev = link->smcibdev;
  699. snprintf(link->ibname, sizeof(link->ibname), "%s",
  700. smcibdev->ibdev->name);
  701. link->ndev_ifidx = smcibdev->ndev_ifidx[link->ibport - 1];
  702. }
  703. int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk,
  704. u8 link_idx, struct smc_init_info *ini)
  705. {
  706. struct smc_ib_device *smcibdev;
  707. u8 rndvec[3];
  708. int rc;
  709. if (lgr->smc_version == SMC_V2) {
  710. lnk->smcibdev = ini->smcrv2.ib_dev_v2;
  711. lnk->ibport = ini->smcrv2.ib_port_v2;
  712. } else {
  713. lnk->smcibdev = ini->ib_dev;
  714. lnk->ibport = ini->ib_port;
  715. }
  716. get_device(&lnk->smcibdev->ibdev->dev);
  717. atomic_inc(&lnk->smcibdev->lnk_cnt);
  718. refcount_set(&lnk->refcnt, 1); /* link refcnt is set to 1 */
  719. lnk->clearing = 0;
  720. lnk->path_mtu = lnk->smcibdev->pattr[lnk->ibport - 1].active_mtu;
  721. lnk->link_id = smcr_next_link_id(lgr);
  722. lnk->lgr = lgr;
  723. smc_lgr_hold(lgr); /* lgr_put in smcr_link_clear() */
  724. lnk->link_idx = link_idx;
  725. lnk->wr_rx_id_compl = 0;
  726. smc_ibdev_cnt_inc(lnk);
  727. smcr_copy_dev_info_to_link(lnk);
  728. atomic_set(&lnk->conn_cnt, 0);
  729. smc_llc_link_set_uid(lnk);
  730. INIT_WORK(&lnk->link_down_wrk, smc_link_down_work);
  731. if (!lnk->smcibdev->initialized) {
  732. rc = (int)smc_ib_setup_per_ibdev(lnk->smcibdev);
  733. if (rc)
  734. goto out;
  735. }
  736. get_random_bytes(rndvec, sizeof(rndvec));
  737. lnk->psn_initial = rndvec[0] + (rndvec[1] << 8) +
  738. (rndvec[2] << 16);
  739. rc = smc_ib_determine_gid(lnk->smcibdev, lnk->ibport,
  740. ini->vlan_id, lnk->gid, &lnk->sgid_index,
  741. lgr->smc_version == SMC_V2 ?
  742. &ini->smcrv2 : NULL);
  743. if (rc)
  744. goto out;
  745. rc = smc_llc_link_init(lnk);
  746. if (rc)
  747. goto out;
  748. rc = smc_wr_alloc_link_mem(lnk);
  749. if (rc)
  750. goto clear_llc_lnk;
  751. rc = smc_ib_create_protection_domain(lnk);
  752. if (rc)
  753. goto free_link_mem;
  754. rc = smc_ib_create_queue_pair(lnk);
  755. if (rc)
  756. goto dealloc_pd;
  757. rc = smc_wr_create_link(lnk);
  758. if (rc)
  759. goto destroy_qp;
  760. lnk->state = SMC_LNK_ACTIVATING;
  761. return 0;
  762. destroy_qp:
  763. smc_ib_destroy_queue_pair(lnk);
  764. dealloc_pd:
  765. smc_ib_dealloc_protection_domain(lnk);
  766. free_link_mem:
  767. smc_wr_free_link_mem(lnk);
  768. clear_llc_lnk:
  769. smc_llc_link_clear(lnk, false);
  770. out:
  771. smc_ibdev_cnt_dec(lnk);
  772. put_device(&lnk->smcibdev->ibdev->dev);
  773. smcibdev = lnk->smcibdev;
  774. memset(lnk, 0, sizeof(struct smc_link));
  775. lnk->state = SMC_LNK_UNUSED;
  776. if (!atomic_dec_return(&smcibdev->lnk_cnt))
  777. wake_up(&smcibdev->lnks_deleted);
  778. smc_lgr_put(lgr); /* lgr_hold above */
  779. return rc;
  780. }
  781. /* create a new SMC link group */
  782. static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
  783. {
  784. struct smc_link_group *lgr;
  785. struct list_head *lgr_list;
  786. struct smcd_dev *smcd;
  787. struct smc_link *lnk;
  788. spinlock_t *lgr_lock;
  789. u8 link_idx;
  790. int rc = 0;
  791. int i;
  792. if (ini->is_smcd && ini->vlan_id) {
  793. if (smc_ism_get_vlan(ini->ism_dev[ini->ism_selected],
  794. ini->vlan_id)) {
  795. rc = SMC_CLC_DECL_ISMVLANERR;
  796. goto out;
  797. }
  798. }
  799. lgr = kzalloc(sizeof(*lgr), GFP_KERNEL);
  800. if (!lgr) {
  801. rc = SMC_CLC_DECL_MEM;
  802. goto ism_put_vlan;
  803. }
  804. lgr->tx_wq = alloc_workqueue("smc_tx_wq-%*phN", 0, 0,
  805. SMC_LGR_ID_SIZE, &lgr->id);
  806. if (!lgr->tx_wq) {
  807. rc = -ENOMEM;
  808. goto free_lgr;
  809. }
  810. lgr->is_smcd = ini->is_smcd;
  811. lgr->sync_err = 0;
  812. lgr->terminating = 0;
  813. lgr->freeing = 0;
  814. lgr->vlan_id = ini->vlan_id;
  815. refcount_set(&lgr->refcnt, 1); /* set lgr refcnt to 1 */
  816. init_rwsem(&lgr->sndbufs_lock);
  817. init_rwsem(&lgr->rmbs_lock);
  818. rwlock_init(&lgr->conns_lock);
  819. for (i = 0; i < SMC_RMBE_SIZES; i++) {
  820. INIT_LIST_HEAD(&lgr->sndbufs[i]);
  821. INIT_LIST_HEAD(&lgr->rmbs[i]);
  822. }
  823. lgr->next_link_id = 0;
  824. smc_lgr_list.num += SMC_LGR_NUM_INCR;
  825. memcpy(&lgr->id, (u8 *)&smc_lgr_list.num, SMC_LGR_ID_SIZE);
  826. INIT_DELAYED_WORK(&lgr->free_work, smc_lgr_free_work);
  827. INIT_WORK(&lgr->terminate_work, smc_lgr_terminate_work);
  828. lgr->conns_all = RB_ROOT;
  829. if (ini->is_smcd) {
  830. /* SMC-D specific settings */
  831. smcd = ini->ism_dev[ini->ism_selected];
  832. get_device(smcd->ops->get_dev(smcd));
  833. lgr->peer_gid.gid =
  834. ini->ism_peer_gid[ini->ism_selected].gid;
  835. lgr->peer_gid.gid_ext =
  836. ini->ism_peer_gid[ini->ism_selected].gid_ext;
  837. lgr->smcd = ini->ism_dev[ini->ism_selected];
  838. lgr_list = &ini->ism_dev[ini->ism_selected]->lgr_list;
  839. lgr_lock = &lgr->smcd->lgr_lock;
  840. lgr->smc_version = ini->smcd_version;
  841. lgr->peer_shutdown = 0;
  842. atomic_inc(&ini->ism_dev[ini->ism_selected]->lgr_cnt);
  843. } else {
  844. /* SMC-R specific settings */
  845. struct smc_ib_device *ibdev;
  846. int ibport;
  847. lgr->role = smc->listen_smc ? SMC_SERV : SMC_CLNT;
  848. lgr->smc_version = ini->smcr_version;
  849. memcpy(lgr->peer_systemid, ini->peer_systemid,
  850. SMC_SYSTEMID_LEN);
  851. if (lgr->smc_version == SMC_V2) {
  852. ibdev = ini->smcrv2.ib_dev_v2;
  853. ibport = ini->smcrv2.ib_port_v2;
  854. lgr->saddr = ini->smcrv2.saddr;
  855. lgr->uses_gateway = ini->smcrv2.uses_gateway;
  856. memcpy(lgr->nexthop_mac, ini->smcrv2.nexthop_mac,
  857. ETH_ALEN);
  858. lgr->max_conns = ini->max_conns;
  859. lgr->max_links = ini->max_links;
  860. } else {
  861. ibdev = ini->ib_dev;
  862. ibport = ini->ib_port;
  863. lgr->max_conns = SMC_CONN_PER_LGR_MAX;
  864. lgr->max_links = SMC_LINKS_ADD_LNK_MAX;
  865. }
  866. memcpy(lgr->pnet_id, ibdev->pnetid[ibport - 1],
  867. SMC_MAX_PNETID_LEN);
  868. rc = smc_wr_alloc_lgr_mem(lgr);
  869. if (rc)
  870. goto free_wq;
  871. smc_llc_lgr_init(lgr, smc);
  872. link_idx = SMC_SINGLE_LINK;
  873. lnk = &lgr->lnk[link_idx];
  874. rc = smcr_link_init(lgr, lnk, link_idx, ini);
  875. if (rc) {
  876. smc_wr_free_lgr_mem(lgr);
  877. goto free_wq;
  878. }
  879. lgr->net = smc_ib_net(lnk->smcibdev);
  880. lgr_list = &smc_lgr_list.list;
  881. lgr_lock = &smc_lgr_list.lock;
  882. lgr->buf_type = lgr->net->smc.sysctl_smcr_buf_type;
  883. atomic_inc(&lgr_cnt);
  884. }
  885. smc->conn.lgr = lgr;
  886. spin_lock_bh(lgr_lock);
  887. list_add_tail(&lgr->list, lgr_list);
  888. spin_unlock_bh(lgr_lock);
  889. return 0;
  890. free_wq:
  891. destroy_workqueue(lgr->tx_wq);
  892. free_lgr:
  893. kfree(lgr);
  894. ism_put_vlan:
  895. if (ini->is_smcd && ini->vlan_id)
  896. smc_ism_put_vlan(ini->ism_dev[ini->ism_selected], ini->vlan_id);
  897. out:
  898. if (rc < 0) {
  899. if (rc == -ENOMEM)
  900. rc = SMC_CLC_DECL_MEM;
  901. else
  902. rc = SMC_CLC_DECL_INTERR;
  903. }
  904. return rc;
  905. }
  906. static int smc_write_space(struct smc_connection *conn)
  907. {
  908. int buffer_len = conn->peer_rmbe_size;
  909. union smc_host_cursor prod;
  910. union smc_host_cursor cons;
  911. int space;
  912. smc_curs_copy(&prod, &conn->local_tx_ctrl.prod, conn);
  913. smc_curs_copy(&cons, &conn->local_rx_ctrl.cons, conn);
  914. /* determine rx_buf space */
  915. space = buffer_len - smc_curs_diff(buffer_len, &cons, &prod);
  916. return space;
  917. }
  918. static int smc_switch_cursor(struct smc_sock *smc, struct smc_cdc_tx_pend *pend,
  919. struct smc_wr_buf *wr_buf)
  920. {
  921. struct smc_connection *conn = &smc->conn;
  922. union smc_host_cursor cons, fin;
  923. int rc = 0;
  924. int diff;
  925. smc_curs_copy(&conn->tx_curs_sent, &conn->tx_curs_fin, conn);
  926. smc_curs_copy(&fin, &conn->local_tx_ctrl_fin, conn);
  927. /* set prod cursor to old state, enforce tx_rdma_writes() */
  928. smc_curs_copy(&conn->local_tx_ctrl.prod, &fin, conn);
  929. smc_curs_copy(&cons, &conn->local_rx_ctrl.cons, conn);
  930. if (smc_curs_comp(conn->peer_rmbe_size, &cons, &fin) < 0) {
  931. /* cons cursor advanced more than fin, and prod was set
  932. * fin above, so now prod is smaller than cons. Fix that.
  933. */
  934. diff = smc_curs_diff(conn->peer_rmbe_size, &fin, &cons);
  935. smc_curs_add(conn->sndbuf_desc->len,
  936. &conn->tx_curs_sent, diff);
  937. smc_curs_add(conn->sndbuf_desc->len,
  938. &conn->tx_curs_fin, diff);
  939. smp_mb__before_atomic();
  940. atomic_add(diff, &conn->sndbuf_space);
  941. smp_mb__after_atomic();
  942. smc_curs_add(conn->peer_rmbe_size,
  943. &conn->local_tx_ctrl.prod, diff);
  944. smc_curs_add(conn->peer_rmbe_size,
  945. &conn->local_tx_ctrl_fin, diff);
  946. }
  947. /* recalculate, value is used by tx_rdma_writes() */
  948. atomic_set(&smc->conn.peer_rmbe_space, smc_write_space(conn));
  949. if (smc->sk.sk_state != SMC_INIT &&
  950. smc->sk.sk_state != SMC_CLOSED) {
  951. rc = smcr_cdc_msg_send_validation(conn, pend, wr_buf);
  952. if (!rc) {
  953. queue_delayed_work(conn->lgr->tx_wq, &conn->tx_work, 0);
  954. smc->sk.sk_data_ready(&smc->sk);
  955. }
  956. } else {
  957. smc_wr_tx_put_slot(conn->lnk,
  958. (struct smc_wr_tx_pend_priv *)pend);
  959. }
  960. return rc;
  961. }
  962. void smc_switch_link_and_count(struct smc_connection *conn,
  963. struct smc_link *to_lnk)
  964. {
  965. atomic_dec(&conn->lnk->conn_cnt);
  966. /* link_hold in smc_conn_create() */
  967. smcr_link_put(conn->lnk);
  968. conn->lnk = to_lnk;
  969. atomic_inc(&conn->lnk->conn_cnt);
  970. /* link_put in smc_conn_free() */
  971. smcr_link_hold(conn->lnk);
  972. }
  973. struct smc_link *smc_switch_conns(struct smc_link_group *lgr,
  974. struct smc_link *from_lnk, bool is_dev_err)
  975. {
  976. struct smc_link *to_lnk = NULL;
  977. struct smc_cdc_tx_pend *pend;
  978. struct smc_connection *conn;
  979. struct smc_wr_buf *wr_buf;
  980. struct smc_sock *smc;
  981. struct rb_node *node;
  982. int i, rc = 0;
  983. /* link is inactive, wake up tx waiters */
  984. smc_wr_wakeup_tx_wait(from_lnk);
  985. for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
  986. if (!smc_link_active(&lgr->lnk[i]) || i == from_lnk->link_idx)
  987. continue;
  988. if (is_dev_err && from_lnk->smcibdev == lgr->lnk[i].smcibdev &&
  989. from_lnk->ibport == lgr->lnk[i].ibport) {
  990. continue;
  991. }
  992. to_lnk = &lgr->lnk[i];
  993. break;
  994. }
  995. if (!to_lnk || !smc_wr_tx_link_hold(to_lnk)) {
  996. smc_lgr_terminate_sched(lgr);
  997. return NULL;
  998. }
  999. again:
  1000. read_lock_bh(&lgr->conns_lock);
  1001. for (node = rb_first(&lgr->conns_all); node; node = rb_next(node)) {
  1002. conn = rb_entry(node, struct smc_connection, alert_node);
  1003. if (conn->lnk != from_lnk)
  1004. continue;
  1005. smc = container_of(conn, struct smc_sock, conn);
  1006. /* conn->lnk not yet set in SMC_INIT state */
  1007. if (smc->sk.sk_state == SMC_INIT)
  1008. continue;
  1009. if (smc->sk.sk_state == SMC_CLOSED ||
  1010. smc->sk.sk_state == SMC_PEERCLOSEWAIT1 ||
  1011. smc->sk.sk_state == SMC_PEERCLOSEWAIT2 ||
  1012. smc->sk.sk_state == SMC_APPFINCLOSEWAIT ||
  1013. smc->sk.sk_state == SMC_APPCLOSEWAIT1 ||
  1014. smc->sk.sk_state == SMC_APPCLOSEWAIT2 ||
  1015. smc->sk.sk_state == SMC_PEERFINCLOSEWAIT ||
  1016. smc->sk.sk_state == SMC_PEERABORTWAIT ||
  1017. smc->sk.sk_state == SMC_PROCESSABORT) {
  1018. spin_lock_bh(&conn->send_lock);
  1019. smc_switch_link_and_count(conn, to_lnk);
  1020. spin_unlock_bh(&conn->send_lock);
  1021. continue;
  1022. }
  1023. sock_hold(&smc->sk);
  1024. read_unlock_bh(&lgr->conns_lock);
  1025. /* pre-fetch buffer outside of send_lock, might sleep */
  1026. rc = smc_cdc_get_free_slot(conn, to_lnk, &wr_buf, NULL, &pend);
  1027. if (rc)
  1028. goto err_out;
  1029. /* avoid race with smcr_tx_sndbuf_nonempty() */
  1030. spin_lock_bh(&conn->send_lock);
  1031. smc_switch_link_and_count(conn, to_lnk);
  1032. rc = smc_switch_cursor(smc, pend, wr_buf);
  1033. spin_unlock_bh(&conn->send_lock);
  1034. sock_put(&smc->sk);
  1035. if (rc)
  1036. goto err_out;
  1037. goto again;
  1038. }
  1039. read_unlock_bh(&lgr->conns_lock);
  1040. smc_wr_tx_link_put(to_lnk);
  1041. return to_lnk;
  1042. err_out:
  1043. smcr_link_down_cond_sched(to_lnk);
  1044. smc_wr_tx_link_put(to_lnk);
  1045. return NULL;
  1046. }
  1047. static void smcr_buf_unuse(struct smc_buf_desc *buf_desc, bool is_rmb,
  1048. struct smc_link_group *lgr)
  1049. {
  1050. struct rw_semaphore *lock; /* lock buffer list */
  1051. int rc;
  1052. if (is_rmb && buf_desc->is_conf_rkey && !list_empty(&lgr->list)) {
  1053. /* unregister rmb with peer */
  1054. rc = smc_llc_flow_initiate(lgr, SMC_LLC_FLOW_RKEY);
  1055. if (!rc) {
  1056. /* protect against smc_llc_cli_rkey_exchange() */
  1057. down_read(&lgr->llc_conf_mutex);
  1058. smc_llc_do_delete_rkey(lgr, buf_desc);
  1059. buf_desc->is_conf_rkey = false;
  1060. up_read(&lgr->llc_conf_mutex);
  1061. smc_llc_flow_stop(lgr, &lgr->llc_flow_lcl);
  1062. }
  1063. }
  1064. if (buf_desc->is_reg_err) {
  1065. /* buf registration failed, reuse not possible */
  1066. lock = is_rmb ? &lgr->rmbs_lock :
  1067. &lgr->sndbufs_lock;
  1068. down_write(lock);
  1069. smc_lgr_buf_list_del(lgr, is_rmb, buf_desc);
  1070. up_write(lock);
  1071. smc_buf_free(lgr, is_rmb, buf_desc);
  1072. } else {
  1073. /* memzero_explicit provides potential memory barrier semantics */
  1074. memzero_explicit(buf_desc->cpu_addr, buf_desc->len);
  1075. WRITE_ONCE(buf_desc->used, 0);
  1076. }
  1077. }
  1078. static void smcd_buf_detach(struct smc_connection *conn)
  1079. {
  1080. struct smcd_dev *smcd = conn->lgr->smcd;
  1081. u64 peer_token = conn->peer_token;
  1082. if (!conn->sndbuf_desc)
  1083. return;
  1084. smc_ism_detach_dmb(smcd, peer_token);
  1085. kfree(conn->sndbuf_desc);
  1086. conn->sndbuf_desc = NULL;
  1087. }
  1088. static void smc_buf_unuse(struct smc_connection *conn,
  1089. struct smc_link_group *lgr)
  1090. {
  1091. struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
  1092. bool is_smcd = lgr->is_smcd;
  1093. int bufsize;
  1094. if (conn->sndbuf_desc) {
  1095. bufsize = conn->sndbuf_desc->len;
  1096. if (!is_smcd && conn->sndbuf_desc->is_vm) {
  1097. smcr_buf_unuse(conn->sndbuf_desc, false, lgr);
  1098. } else {
  1099. memzero_explicit(conn->sndbuf_desc->cpu_addr, bufsize);
  1100. WRITE_ONCE(conn->sndbuf_desc->used, 0);
  1101. }
  1102. SMC_STAT_RMB_SIZE(smc, is_smcd, false, false, bufsize);
  1103. }
  1104. if (conn->rmb_desc) {
  1105. bufsize = conn->rmb_desc->len;
  1106. if (!is_smcd) {
  1107. smcr_buf_unuse(conn->rmb_desc, true, lgr);
  1108. } else {
  1109. bufsize += sizeof(struct smcd_cdc_msg);
  1110. memzero_explicit(conn->rmb_desc->cpu_addr, bufsize);
  1111. WRITE_ONCE(conn->rmb_desc->used, 0);
  1112. }
  1113. SMC_STAT_RMB_SIZE(smc, is_smcd, true, false, bufsize);
  1114. }
  1115. }
  1116. /* remove a finished connection from its link group */
  1117. void smc_conn_free(struct smc_connection *conn)
  1118. {
  1119. struct smc_link_group *lgr = conn->lgr;
  1120. if (!lgr || conn->freed)
  1121. /* Connection has never been registered in a
  1122. * link group, or has already been freed.
  1123. */
  1124. return;
  1125. conn->freed = 1;
  1126. if (!smc_conn_lgr_valid(conn))
  1127. /* Connection has already unregistered from
  1128. * link group.
  1129. */
  1130. goto lgr_put;
  1131. if (lgr->is_smcd) {
  1132. if (!list_empty(&lgr->list))
  1133. smc_ism_unset_conn(conn);
  1134. if (smc_ism_support_dmb_nocopy(lgr->smcd))
  1135. smcd_buf_detach(conn);
  1136. tasklet_kill(&conn->rx_tsklet);
  1137. } else {
  1138. smc_cdc_wait_pend_tx_wr(conn);
  1139. if (current_work() != &conn->abort_work)
  1140. cancel_work_sync(&conn->abort_work);
  1141. }
  1142. if (!list_empty(&lgr->list)) {
  1143. smc_buf_unuse(conn, lgr); /* allow buffer reuse */
  1144. smc_lgr_unregister_conn(conn);
  1145. }
  1146. if (!lgr->conns_num)
  1147. smc_lgr_schedule_free_work(lgr);
  1148. lgr_put:
  1149. if (!lgr->is_smcd)
  1150. smcr_link_put(conn->lnk); /* link_hold in smc_conn_create() */
  1151. smc_lgr_put(lgr); /* lgr_hold in smc_conn_create() */
  1152. }
  1153. /* unregister a link from a buf_desc */
  1154. static void smcr_buf_unmap_link(struct smc_buf_desc *buf_desc, bool is_rmb,
  1155. struct smc_link *lnk)
  1156. {
  1157. if (is_rmb || buf_desc->is_vm)
  1158. buf_desc->is_reg_mr[lnk->link_idx] = false;
  1159. if (!buf_desc->is_map_ib[lnk->link_idx])
  1160. return;
  1161. if ((is_rmb || buf_desc->is_vm) &&
  1162. buf_desc->mr[lnk->link_idx]) {
  1163. smc_ib_put_memory_region(buf_desc->mr[lnk->link_idx]);
  1164. buf_desc->mr[lnk->link_idx] = NULL;
  1165. }
  1166. if (is_rmb)
  1167. smc_ib_buf_unmap_sg(lnk, buf_desc, DMA_FROM_DEVICE);
  1168. else
  1169. smc_ib_buf_unmap_sg(lnk, buf_desc, DMA_TO_DEVICE);
  1170. sg_free_table(&buf_desc->sgt[lnk->link_idx]);
  1171. buf_desc->is_map_ib[lnk->link_idx] = false;
  1172. }
  1173. /* unmap all buffers of lgr for a deleted link */
  1174. static void smcr_buf_unmap_lgr(struct smc_link *lnk)
  1175. {
  1176. struct smc_link_group *lgr = lnk->lgr;
  1177. struct smc_buf_desc *buf_desc, *bf;
  1178. int i;
  1179. for (i = 0; i < SMC_RMBE_SIZES; i++) {
  1180. down_write(&lgr->rmbs_lock);
  1181. list_for_each_entry_safe(buf_desc, bf, &lgr->rmbs[i], list)
  1182. smcr_buf_unmap_link(buf_desc, true, lnk);
  1183. up_write(&lgr->rmbs_lock);
  1184. down_write(&lgr->sndbufs_lock);
  1185. list_for_each_entry_safe(buf_desc, bf, &lgr->sndbufs[i],
  1186. list)
  1187. smcr_buf_unmap_link(buf_desc, false, lnk);
  1188. up_write(&lgr->sndbufs_lock);
  1189. }
  1190. }
  1191. static void smcr_rtoken_clear_link(struct smc_link *lnk)
  1192. {
  1193. struct smc_link_group *lgr = lnk->lgr;
  1194. int i;
  1195. for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
  1196. lgr->rtokens[i][lnk->link_idx].rkey = 0;
  1197. lgr->rtokens[i][lnk->link_idx].dma_addr = 0;
  1198. }
  1199. }
  1200. static void __smcr_link_clear(struct smc_link *lnk)
  1201. {
  1202. struct smc_link_group *lgr = lnk->lgr;
  1203. struct smc_ib_device *smcibdev;
  1204. smc_wr_free_link_mem(lnk);
  1205. smc_ibdev_cnt_dec(lnk);
  1206. put_device(&lnk->smcibdev->ibdev->dev);
  1207. smcibdev = lnk->smcibdev;
  1208. memset(lnk, 0, sizeof(struct smc_link));
  1209. lnk->state = SMC_LNK_UNUSED;
  1210. if (!atomic_dec_return(&smcibdev->lnk_cnt))
  1211. wake_up(&smcibdev->lnks_deleted);
  1212. smc_lgr_put(lgr); /* lgr_hold in smcr_link_init() */
  1213. }
  1214. /* must be called under lgr->llc_conf_mutex lock */
  1215. void smcr_link_clear(struct smc_link *lnk, bool log)
  1216. {
  1217. if (!lnk->lgr || lnk->clearing ||
  1218. lnk->state == SMC_LNK_UNUSED)
  1219. return;
  1220. lnk->clearing = 1;
  1221. lnk->peer_qpn = 0;
  1222. smc_llc_link_clear(lnk, log);
  1223. smcr_buf_unmap_lgr(lnk);
  1224. smcr_rtoken_clear_link(lnk);
  1225. smc_ib_modify_qp_error(lnk);
  1226. smc_wr_free_link(lnk);
  1227. smc_ib_destroy_queue_pair(lnk);
  1228. smc_ib_dealloc_protection_domain(lnk);
  1229. smcr_link_put(lnk); /* theoretically last link_put */
  1230. }
  1231. void smcr_link_hold(struct smc_link *lnk)
  1232. {
  1233. refcount_inc(&lnk->refcnt);
  1234. }
  1235. void smcr_link_put(struct smc_link *lnk)
  1236. {
  1237. if (refcount_dec_and_test(&lnk->refcnt))
  1238. __smcr_link_clear(lnk);
  1239. }
  1240. static void smcr_buf_free(struct smc_link_group *lgr, bool is_rmb,
  1241. struct smc_buf_desc *buf_desc)
  1242. {
  1243. int i;
  1244. for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++)
  1245. smcr_buf_unmap_link(buf_desc, is_rmb, &lgr->lnk[i]);
  1246. if (!buf_desc->is_vm && buf_desc->pages)
  1247. __free_pages(buf_desc->pages, buf_desc->order);
  1248. else if (buf_desc->is_vm && buf_desc->cpu_addr)
  1249. vfree(buf_desc->cpu_addr);
  1250. kfree(buf_desc);
  1251. }
  1252. static void smcd_buf_free(struct smc_link_group *lgr, bool is_dmb,
  1253. struct smc_buf_desc *buf_desc)
  1254. {
  1255. if (is_dmb) {
  1256. /* restore original buf len */
  1257. buf_desc->len += sizeof(struct smcd_cdc_msg);
  1258. smc_ism_unregister_dmb(lgr->smcd, buf_desc);
  1259. } else {
  1260. kfree(buf_desc->cpu_addr);
  1261. }
  1262. kfree(buf_desc);
  1263. }
  1264. static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb,
  1265. struct smc_buf_desc *buf_desc)
  1266. {
  1267. if (lgr->is_smcd)
  1268. smcd_buf_free(lgr, is_rmb, buf_desc);
  1269. else
  1270. smcr_buf_free(lgr, is_rmb, buf_desc);
  1271. }
  1272. static void __smc_lgr_free_bufs(struct smc_link_group *lgr, bool is_rmb)
  1273. {
  1274. struct smc_buf_desc *buf_desc, *bf_desc;
  1275. struct list_head *buf_list;
  1276. int i;
  1277. for (i = 0; i < SMC_RMBE_SIZES; i++) {
  1278. if (is_rmb)
  1279. buf_list = &lgr->rmbs[i];
  1280. else
  1281. buf_list = &lgr->sndbufs[i];
  1282. list_for_each_entry_safe(buf_desc, bf_desc, buf_list,
  1283. list) {
  1284. smc_lgr_buf_list_del(lgr, is_rmb, buf_desc);
  1285. smc_buf_free(lgr, is_rmb, buf_desc);
  1286. }
  1287. }
  1288. }
  1289. static void smc_lgr_free_bufs(struct smc_link_group *lgr)
  1290. {
  1291. /* free send buffers */
  1292. __smc_lgr_free_bufs(lgr, false);
  1293. /* free rmbs */
  1294. __smc_lgr_free_bufs(lgr, true);
  1295. }
  1296. /* won't be freed until no one accesses to lgr anymore */
  1297. static void __smc_lgr_free(struct smc_link_group *lgr)
  1298. {
  1299. smc_lgr_free_bufs(lgr);
  1300. if (lgr->is_smcd) {
  1301. if (!atomic_dec_return(&lgr->smcd->lgr_cnt))
  1302. wake_up(&lgr->smcd->lgrs_deleted);
  1303. } else {
  1304. smc_wr_free_lgr_mem(lgr);
  1305. if (!atomic_dec_return(&lgr_cnt))
  1306. wake_up(&lgrs_deleted);
  1307. }
  1308. kfree(lgr);
  1309. }
  1310. /* remove a link group */
  1311. static void smc_lgr_free(struct smc_link_group *lgr)
  1312. {
  1313. int i;
  1314. if (!lgr->is_smcd) {
  1315. down_write(&lgr->llc_conf_mutex);
  1316. for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
  1317. if (lgr->lnk[i].state != SMC_LNK_UNUSED)
  1318. smcr_link_clear(&lgr->lnk[i], false);
  1319. }
  1320. up_write(&lgr->llc_conf_mutex);
  1321. smc_llc_lgr_clear(lgr);
  1322. }
  1323. destroy_workqueue(lgr->tx_wq);
  1324. if (lgr->is_smcd) {
  1325. smc_ism_put_vlan(lgr->smcd, lgr->vlan_id);
  1326. put_device(lgr->smcd->ops->get_dev(lgr->smcd));
  1327. }
  1328. smc_lgr_put(lgr); /* theoretically last lgr_put */
  1329. }
  1330. void smc_lgr_hold(struct smc_link_group *lgr)
  1331. {
  1332. refcount_inc(&lgr->refcnt);
  1333. }
  1334. void smc_lgr_put(struct smc_link_group *lgr)
  1335. {
  1336. if (refcount_dec_and_test(&lgr->refcnt))
  1337. __smc_lgr_free(lgr);
  1338. }
  1339. static void smc_sk_wake_ups(struct smc_sock *smc)
  1340. {
  1341. smc->sk.sk_write_space(&smc->sk);
  1342. smc->sk.sk_data_ready(&smc->sk);
  1343. smc->sk.sk_state_change(&smc->sk);
  1344. }
  1345. /* kill a connection */
  1346. static void smc_conn_kill(struct smc_connection *conn, bool soft)
  1347. {
  1348. struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
  1349. if (conn->lgr->is_smcd && conn->lgr->peer_shutdown)
  1350. conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1;
  1351. else
  1352. smc_close_abort(conn);
  1353. conn->killed = 1;
  1354. smc->sk.sk_err = ECONNABORTED;
  1355. smc_sk_wake_ups(smc);
  1356. if (conn->lgr->is_smcd) {
  1357. smc_ism_unset_conn(conn);
  1358. if (smc_ism_support_dmb_nocopy(conn->lgr->smcd))
  1359. smcd_buf_detach(conn);
  1360. if (soft)
  1361. tasklet_kill(&conn->rx_tsklet);
  1362. else
  1363. tasklet_unlock_wait(&conn->rx_tsklet);
  1364. } else {
  1365. smc_cdc_wait_pend_tx_wr(conn);
  1366. }
  1367. smc_lgr_unregister_conn(conn);
  1368. smc_close_active_abort(smc);
  1369. }
  1370. static void smc_lgr_cleanup(struct smc_link_group *lgr)
  1371. {
  1372. if (lgr->is_smcd) {
  1373. smc_ism_signal_shutdown(lgr);
  1374. } else {
  1375. u32 rsn = lgr->llc_termination_rsn;
  1376. if (!rsn)
  1377. rsn = SMC_LLC_DEL_PROG_INIT_TERM;
  1378. smc_llc_send_link_delete_all(lgr, false, rsn);
  1379. smcr_lgr_link_deactivate_all(lgr);
  1380. }
  1381. }
  1382. /* terminate link group
  1383. * @soft: true if link group shutdown can take its time
  1384. * false if immediate link group shutdown is required
  1385. */
  1386. static void __smc_lgr_terminate(struct smc_link_group *lgr, bool soft)
  1387. {
  1388. struct smc_connection *conn;
  1389. struct smc_sock *smc;
  1390. struct rb_node *node;
  1391. if (lgr->terminating)
  1392. return; /* lgr already terminating */
  1393. /* cancel free_work sync, will terminate when lgr->freeing is set */
  1394. cancel_delayed_work(&lgr->free_work);
  1395. lgr->terminating = 1;
  1396. /* kill remaining link group connections */
  1397. read_lock_bh(&lgr->conns_lock);
  1398. node = rb_first(&lgr->conns_all);
  1399. while (node) {
  1400. read_unlock_bh(&lgr->conns_lock);
  1401. conn = rb_entry(node, struct smc_connection, alert_node);
  1402. smc = container_of(conn, struct smc_sock, conn);
  1403. sock_hold(&smc->sk); /* sock_put below */
  1404. lock_sock(&smc->sk);
  1405. smc_conn_kill(conn, soft);
  1406. release_sock(&smc->sk);
  1407. sock_put(&smc->sk); /* sock_hold above */
  1408. read_lock_bh(&lgr->conns_lock);
  1409. node = rb_first(&lgr->conns_all);
  1410. }
  1411. read_unlock_bh(&lgr->conns_lock);
  1412. smc_lgr_cleanup(lgr);
  1413. smc_lgr_free(lgr);
  1414. }
  1415. /* unlink link group and schedule termination */
  1416. void smc_lgr_terminate_sched(struct smc_link_group *lgr)
  1417. {
  1418. spinlock_t *lgr_lock;
  1419. smc_lgr_list_head(lgr, &lgr_lock);
  1420. spin_lock_bh(lgr_lock);
  1421. if (list_empty(&lgr->list) || lgr->terminating || lgr->freeing) {
  1422. spin_unlock_bh(lgr_lock);
  1423. return; /* lgr already terminating */
  1424. }
  1425. list_del_init(&lgr->list);
  1426. lgr->freeing = 1;
  1427. spin_unlock_bh(lgr_lock);
  1428. schedule_work(&lgr->terminate_work);
  1429. }
  1430. /* Called when peer lgr shutdown (regularly or abnormally) is received */
  1431. void smc_smcd_terminate(struct smcd_dev *dev, struct smcd_gid *peer_gid,
  1432. unsigned short vlan)
  1433. {
  1434. struct smc_link_group *lgr, *l;
  1435. LIST_HEAD(lgr_free_list);
  1436. /* run common cleanup function and build free list */
  1437. spin_lock_bh(&dev->lgr_lock);
  1438. list_for_each_entry_safe(lgr, l, &dev->lgr_list, list) {
  1439. if ((!peer_gid->gid ||
  1440. (lgr->peer_gid.gid == peer_gid->gid &&
  1441. !smc_ism_is_emulated(dev) ? 1 :
  1442. lgr->peer_gid.gid_ext == peer_gid->gid_ext)) &&
  1443. (vlan == VLAN_VID_MASK || lgr->vlan_id == vlan)) {
  1444. if (peer_gid->gid) /* peer triggered termination */
  1445. lgr->peer_shutdown = 1;
  1446. list_move(&lgr->list, &lgr_free_list);
  1447. lgr->freeing = 1;
  1448. }
  1449. }
  1450. spin_unlock_bh(&dev->lgr_lock);
  1451. /* cancel the regular free workers and actually free lgrs */
  1452. list_for_each_entry_safe(lgr, l, &lgr_free_list, list) {
  1453. list_del_init(&lgr->list);
  1454. schedule_work(&lgr->terminate_work);
  1455. }
  1456. }
  1457. /* Called when an SMCD device is removed or the smc module is unloaded */
  1458. void smc_smcd_terminate_all(struct smcd_dev *smcd)
  1459. {
  1460. struct smc_link_group *lgr, *lg;
  1461. LIST_HEAD(lgr_free_list);
  1462. spin_lock_bh(&smcd->lgr_lock);
  1463. list_splice_init(&smcd->lgr_list, &lgr_free_list);
  1464. list_for_each_entry(lgr, &lgr_free_list, list)
  1465. lgr->freeing = 1;
  1466. spin_unlock_bh(&smcd->lgr_lock);
  1467. list_for_each_entry_safe(lgr, lg, &lgr_free_list, list) {
  1468. list_del_init(&lgr->list);
  1469. __smc_lgr_terminate(lgr, false);
  1470. }
  1471. if (atomic_read(&smcd->lgr_cnt))
  1472. wait_event(smcd->lgrs_deleted, !atomic_read(&smcd->lgr_cnt));
  1473. }
  1474. /* Called when an SMCR device is removed or the smc module is unloaded.
  1475. * If smcibdev is given, all SMCR link groups using this device are terminated.
  1476. * If smcibdev is NULL, all SMCR link groups are terminated.
  1477. */
  1478. void smc_smcr_terminate_all(struct smc_ib_device *smcibdev)
  1479. {
  1480. struct smc_link_group *lgr, *lg;
  1481. LIST_HEAD(lgr_free_list);
  1482. int i;
  1483. spin_lock_bh(&smc_lgr_list.lock);
  1484. if (!smcibdev) {
  1485. list_splice_init(&smc_lgr_list.list, &lgr_free_list);
  1486. list_for_each_entry(lgr, &lgr_free_list, list)
  1487. lgr->freeing = 1;
  1488. } else {
  1489. list_for_each_entry_safe(lgr, lg, &smc_lgr_list.list, list) {
  1490. for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
  1491. if (lgr->lnk[i].smcibdev == smcibdev)
  1492. smcr_link_down_cond_sched(&lgr->lnk[i]);
  1493. }
  1494. }
  1495. }
  1496. spin_unlock_bh(&smc_lgr_list.lock);
  1497. list_for_each_entry_safe(lgr, lg, &lgr_free_list, list) {
  1498. list_del_init(&lgr->list);
  1499. smc_llc_set_termination_rsn(lgr, SMC_LLC_DEL_OP_INIT_TERM);
  1500. __smc_lgr_terminate(lgr, false);
  1501. }
  1502. if (smcibdev) {
  1503. if (atomic_read(&smcibdev->lnk_cnt))
  1504. wait_event(smcibdev->lnks_deleted,
  1505. !atomic_read(&smcibdev->lnk_cnt));
  1506. } else {
  1507. if (atomic_read(&lgr_cnt))
  1508. wait_event(lgrs_deleted, !atomic_read(&lgr_cnt));
  1509. }
  1510. }
  1511. /* set new lgr type and clear all asymmetric link tagging */
  1512. void smcr_lgr_set_type(struct smc_link_group *lgr, enum smc_lgr_type new_type)
  1513. {
  1514. char *lgr_type = "";
  1515. int i;
  1516. for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++)
  1517. if (smc_link_usable(&lgr->lnk[i]))
  1518. lgr->lnk[i].link_is_asym = false;
  1519. if (lgr->type == new_type)
  1520. return;
  1521. lgr->type = new_type;
  1522. switch (lgr->type) {
  1523. case SMC_LGR_NONE:
  1524. lgr_type = "NONE";
  1525. break;
  1526. case SMC_LGR_SINGLE:
  1527. lgr_type = "SINGLE";
  1528. break;
  1529. case SMC_LGR_SYMMETRIC:
  1530. lgr_type = "SYMMETRIC";
  1531. break;
  1532. case SMC_LGR_ASYMMETRIC_PEER:
  1533. lgr_type = "ASYMMETRIC_PEER";
  1534. break;
  1535. case SMC_LGR_ASYMMETRIC_LOCAL:
  1536. lgr_type = "ASYMMETRIC_LOCAL";
  1537. break;
  1538. }
  1539. pr_warn_ratelimited("smc: SMC-R lg %*phN net %llu state changed: "
  1540. "%s, pnetid %.16s\n", SMC_LGR_ID_SIZE, &lgr->id,
  1541. lgr->net->net_cookie, lgr_type, lgr->pnet_id);
  1542. }
  1543. /* set new lgr type and tag a link as asymmetric */
  1544. void smcr_lgr_set_type_asym(struct smc_link_group *lgr,
  1545. enum smc_lgr_type new_type, int asym_lnk_idx)
  1546. {
  1547. smcr_lgr_set_type(lgr, new_type);
  1548. lgr->lnk[asym_lnk_idx].link_is_asym = true;
  1549. }
  1550. /* abort connection, abort_work scheduled from tasklet context */
  1551. static void smc_conn_abort_work(struct work_struct *work)
  1552. {
  1553. struct smc_connection *conn = container_of(work,
  1554. struct smc_connection,
  1555. abort_work);
  1556. struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
  1557. lock_sock(&smc->sk);
  1558. smc_conn_kill(conn, true);
  1559. release_sock(&smc->sk);
  1560. sock_put(&smc->sk); /* sock_hold done by schedulers of abort_work */
  1561. }
  1562. void smcr_port_add(struct smc_ib_device *smcibdev, u8 ibport)
  1563. {
  1564. struct smc_link_group *lgr, *n;
  1565. spin_lock_bh(&smc_lgr_list.lock);
  1566. list_for_each_entry_safe(lgr, n, &smc_lgr_list.list, list) {
  1567. struct smc_link *link;
  1568. if (strncmp(smcibdev->pnetid[ibport - 1], lgr->pnet_id,
  1569. SMC_MAX_PNETID_LEN) ||
  1570. lgr->type == SMC_LGR_SYMMETRIC ||
  1571. lgr->type == SMC_LGR_ASYMMETRIC_PEER ||
  1572. !rdma_dev_access_netns(smcibdev->ibdev, lgr->net))
  1573. continue;
  1574. if (lgr->type == SMC_LGR_SINGLE && lgr->max_links <= 1)
  1575. continue;
  1576. /* trigger local add link processing */
  1577. link = smc_llc_usable_link(lgr);
  1578. if (link)
  1579. smc_llc_add_link_local(link);
  1580. }
  1581. spin_unlock_bh(&smc_lgr_list.lock);
  1582. }
  1583. /* link is down - switch connections to alternate link,
  1584. * must be called under lgr->llc_conf_mutex lock
  1585. */
  1586. static void smcr_link_down(struct smc_link *lnk)
  1587. {
  1588. struct smc_link_group *lgr = lnk->lgr;
  1589. struct smc_link *to_lnk;
  1590. int del_link_id;
  1591. if (!lgr || lnk->state == SMC_LNK_UNUSED || list_empty(&lgr->list))
  1592. return;
  1593. to_lnk = smc_switch_conns(lgr, lnk, true);
  1594. if (!to_lnk) { /* no backup link available */
  1595. smcr_link_clear(lnk, true);
  1596. return;
  1597. }
  1598. smcr_lgr_set_type(lgr, SMC_LGR_SINGLE);
  1599. del_link_id = lnk->link_id;
  1600. if (lgr->role == SMC_SERV) {
  1601. /* trigger local delete link processing */
  1602. smc_llc_srv_delete_link_local(to_lnk, del_link_id);
  1603. } else {
  1604. if (lgr->llc_flow_lcl.type != SMC_LLC_FLOW_NONE) {
  1605. /* another llc task is ongoing */
  1606. up_write(&lgr->llc_conf_mutex);
  1607. wait_event_timeout(lgr->llc_flow_waiter,
  1608. (list_empty(&lgr->list) ||
  1609. lgr->llc_flow_lcl.type == SMC_LLC_FLOW_NONE),
  1610. SMC_LLC_WAIT_TIME);
  1611. down_write(&lgr->llc_conf_mutex);
  1612. }
  1613. if (!list_empty(&lgr->list)) {
  1614. smc_llc_send_delete_link(to_lnk, del_link_id,
  1615. SMC_LLC_REQ, true,
  1616. SMC_LLC_DEL_LOST_PATH);
  1617. smcr_link_clear(lnk, true);
  1618. }
  1619. wake_up(&lgr->llc_flow_waiter); /* wake up next waiter */
  1620. }
  1621. }
  1622. /* must be called under lgr->llc_conf_mutex lock */
  1623. void smcr_link_down_cond(struct smc_link *lnk)
  1624. {
  1625. if (smc_link_downing(&lnk->state)) {
  1626. trace_smcr_link_down(lnk, __builtin_return_address(0));
  1627. smcr_link_down(lnk);
  1628. }
  1629. }
  1630. /* will get the lgr->llc_conf_mutex lock */
  1631. void smcr_link_down_cond_sched(struct smc_link *lnk)
  1632. {
  1633. if (smc_link_downing(&lnk->state)) {
  1634. trace_smcr_link_down(lnk, __builtin_return_address(0));
  1635. smcr_link_hold(lnk); /* smcr_link_put in link_down_wrk */
  1636. if (!schedule_work(&lnk->link_down_wrk))
  1637. smcr_link_put(lnk);
  1638. }
  1639. }
  1640. void smcr_port_err(struct smc_ib_device *smcibdev, u8 ibport)
  1641. {
  1642. struct smc_link_group *lgr, *n;
  1643. int i;
  1644. list_for_each_entry_safe(lgr, n, &smc_lgr_list.list, list) {
  1645. if (strncmp(smcibdev->pnetid[ibport - 1], lgr->pnet_id,
  1646. SMC_MAX_PNETID_LEN))
  1647. continue; /* lgr is not affected */
  1648. if (list_empty(&lgr->list))
  1649. continue;
  1650. for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
  1651. struct smc_link *lnk = &lgr->lnk[i];
  1652. if (smc_link_usable(lnk) &&
  1653. lnk->smcibdev == smcibdev && lnk->ibport == ibport)
  1654. smcr_link_down_cond_sched(lnk);
  1655. }
  1656. }
  1657. }
  1658. static void smc_link_down_work(struct work_struct *work)
  1659. {
  1660. struct smc_link *link = container_of(work, struct smc_link,
  1661. link_down_wrk);
  1662. struct smc_link_group *lgr = link->lgr;
  1663. if (list_empty(&lgr->list))
  1664. goto out;
  1665. wake_up_all(&lgr->llc_msg_waiter);
  1666. down_write(&lgr->llc_conf_mutex);
  1667. smcr_link_down(link);
  1668. up_write(&lgr->llc_conf_mutex);
  1669. out:
  1670. smcr_link_put(link); /* smcr_link_hold by schedulers of link_down_work */
  1671. }
  1672. static int smc_vlan_by_tcpsk_walk(struct net_device *lower_dev,
  1673. struct netdev_nested_priv *priv)
  1674. {
  1675. unsigned short *vlan_id = (unsigned short *)priv->data;
  1676. if (is_vlan_dev(lower_dev)) {
  1677. *vlan_id = vlan_dev_vlan_id(lower_dev);
  1678. return 1;
  1679. }
  1680. return 0;
  1681. }
  1682. /* Determine vlan of internal TCP socket. */
  1683. int smc_vlan_by_tcpsk(struct socket *clcsock, struct smc_init_info *ini)
  1684. {
  1685. struct dst_entry *dst = sk_dst_get(clcsock->sk);
  1686. struct netdev_nested_priv priv;
  1687. struct net_device *ndev;
  1688. int rc = 0;
  1689. ini->vlan_id = 0;
  1690. if (!dst) {
  1691. rc = -ENOTCONN;
  1692. goto out;
  1693. }
  1694. if (!dst->dev) {
  1695. rc = -ENODEV;
  1696. goto out_rel;
  1697. }
  1698. ndev = dst->dev;
  1699. if (is_vlan_dev(ndev)) {
  1700. ini->vlan_id = vlan_dev_vlan_id(ndev);
  1701. goto out_rel;
  1702. }
  1703. priv.data = (void *)&ini->vlan_id;
  1704. rtnl_lock();
  1705. netdev_walk_all_lower_dev(ndev, smc_vlan_by_tcpsk_walk, &priv);
  1706. rtnl_unlock();
  1707. out_rel:
  1708. dst_release(dst);
  1709. out:
  1710. return rc;
  1711. }
  1712. static bool smcr_lgr_match(struct smc_link_group *lgr, u8 smcr_version,
  1713. u8 peer_systemid[],
  1714. u8 peer_gid[],
  1715. u8 peer_mac_v1[],
  1716. enum smc_lgr_role role, u32 clcqpn,
  1717. struct net *net)
  1718. {
  1719. struct smc_link *lnk;
  1720. int i;
  1721. if (memcmp(lgr->peer_systemid, peer_systemid, SMC_SYSTEMID_LEN) ||
  1722. lgr->role != role)
  1723. return false;
  1724. for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
  1725. lnk = &lgr->lnk[i];
  1726. if (!smc_link_active(lnk))
  1727. continue;
  1728. /* use verbs API to check netns, instead of lgr->net */
  1729. if (!rdma_dev_access_netns(lnk->smcibdev->ibdev, net))
  1730. return false;
  1731. if ((lgr->role == SMC_SERV || lnk->peer_qpn == clcqpn) &&
  1732. !memcmp(lnk->peer_gid, peer_gid, SMC_GID_SIZE) &&
  1733. (smcr_version == SMC_V2 ||
  1734. !memcmp(lnk->peer_mac, peer_mac_v1, ETH_ALEN)))
  1735. return true;
  1736. }
  1737. return false;
  1738. }
  1739. static bool smcd_lgr_match(struct smc_link_group *lgr,
  1740. struct smcd_dev *smcismdev,
  1741. struct smcd_gid *peer_gid)
  1742. {
  1743. if (lgr->peer_gid.gid != peer_gid->gid ||
  1744. lgr->smcd != smcismdev)
  1745. return false;
  1746. if (smc_ism_is_emulated(smcismdev) &&
  1747. lgr->peer_gid.gid_ext != peer_gid->gid_ext)
  1748. return false;
  1749. return true;
  1750. }
  1751. /* create a new SMC connection (and a new link group if necessary) */
  1752. int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini)
  1753. {
  1754. struct smc_connection *conn = &smc->conn;
  1755. struct net *net = sock_net(&smc->sk);
  1756. struct list_head *lgr_list;
  1757. struct smc_link_group *lgr;
  1758. enum smc_lgr_role role;
  1759. spinlock_t *lgr_lock;
  1760. int rc = 0;
  1761. lgr_list = ini->is_smcd ? &ini->ism_dev[ini->ism_selected]->lgr_list :
  1762. &smc_lgr_list.list;
  1763. lgr_lock = ini->is_smcd ? &ini->ism_dev[ini->ism_selected]->lgr_lock :
  1764. &smc_lgr_list.lock;
  1765. ini->first_contact_local = 1;
  1766. role = smc->listen_smc ? SMC_SERV : SMC_CLNT;
  1767. if (role == SMC_CLNT && ini->first_contact_peer)
  1768. /* create new link group as well */
  1769. goto create;
  1770. /* determine if an existing link group can be reused */
  1771. spin_lock_bh(lgr_lock);
  1772. list_for_each_entry(lgr, lgr_list, list) {
  1773. write_lock_bh(&lgr->conns_lock);
  1774. if ((ini->is_smcd ?
  1775. smcd_lgr_match(lgr, ini->ism_dev[ini->ism_selected],
  1776. &ini->ism_peer_gid[ini->ism_selected]) :
  1777. smcr_lgr_match(lgr, ini->smcr_version,
  1778. ini->peer_systemid,
  1779. ini->peer_gid, ini->peer_mac, role,
  1780. ini->ib_clcqpn, net)) &&
  1781. !lgr->sync_err &&
  1782. (ini->smcd_version == SMC_V2 ||
  1783. lgr->vlan_id == ini->vlan_id) &&
  1784. (role == SMC_CLNT || ini->is_smcd ||
  1785. (lgr->conns_num < lgr->max_conns &&
  1786. !bitmap_full(lgr->rtokens_used_mask, SMC_RMBS_PER_LGR_MAX)))) {
  1787. /* link group found */
  1788. ini->first_contact_local = 0;
  1789. conn->lgr = lgr;
  1790. rc = smc_lgr_register_conn(conn, false);
  1791. write_unlock_bh(&lgr->conns_lock);
  1792. if (!rc && delayed_work_pending(&lgr->free_work))
  1793. cancel_delayed_work(&lgr->free_work);
  1794. break;
  1795. }
  1796. write_unlock_bh(&lgr->conns_lock);
  1797. }
  1798. spin_unlock_bh(lgr_lock);
  1799. if (rc)
  1800. return rc;
  1801. if (role == SMC_CLNT && !ini->first_contact_peer &&
  1802. ini->first_contact_local) {
  1803. /* Server reuses a link group, but Client wants to start
  1804. * a new one
  1805. * send out_of_sync decline, reason synchr. error
  1806. */
  1807. return SMC_CLC_DECL_SYNCERR;
  1808. }
  1809. create:
  1810. if (ini->first_contact_local) {
  1811. rc = smc_lgr_create(smc, ini);
  1812. if (rc)
  1813. goto out;
  1814. lgr = conn->lgr;
  1815. write_lock_bh(&lgr->conns_lock);
  1816. rc = smc_lgr_register_conn(conn, true);
  1817. write_unlock_bh(&lgr->conns_lock);
  1818. if (rc) {
  1819. smc_lgr_cleanup_early(lgr);
  1820. goto out;
  1821. }
  1822. }
  1823. smc_lgr_hold(conn->lgr); /* lgr_put in smc_conn_free() */
  1824. if (!conn->lgr->is_smcd)
  1825. smcr_link_hold(conn->lnk); /* link_put in smc_conn_free() */
  1826. conn->freed = 0;
  1827. conn->local_tx_ctrl.common.type = SMC_CDC_MSG_TYPE;
  1828. conn->local_tx_ctrl.len = SMC_WR_TX_SIZE;
  1829. conn->urg_state = SMC_URG_READ;
  1830. init_waitqueue_head(&conn->cdc_pend_tx_wq);
  1831. INIT_WORK(&smc->conn.abort_work, smc_conn_abort_work);
  1832. if (ini->is_smcd) {
  1833. conn->rx_off = sizeof(struct smcd_cdc_msg);
  1834. smcd_cdc_rx_init(conn); /* init tasklet for this conn */
  1835. } else {
  1836. conn->rx_off = 0;
  1837. }
  1838. #ifndef KERNEL_HAS_ATOMIC64
  1839. spin_lock_init(&conn->acurs_lock);
  1840. #endif
  1841. out:
  1842. return rc;
  1843. }
  1844. #define SMCD_DMBE_SIZES 6 /* 0 -> 16KB, 1 -> 32KB, .. 6 -> 1MB */
  1845. #define SMCR_RMBE_SIZES 15 /* 0 -> 16KB, 1 -> 32KB, .. 15 -> 512MB */
  1846. /* convert the RMB size into the compressed notation (minimum 16K, see
  1847. * SMCD/R_DMBE_SIZES.
  1848. * In contrast to plain ilog2, this rounds towards the next power of 2,
  1849. * so the socket application gets at least its desired sndbuf / rcvbuf size.
  1850. */
  1851. static u8 smc_compress_bufsize(int size, bool is_smcd, bool is_rmb)
  1852. {
  1853. u8 compressed;
  1854. if (size <= SMC_BUF_MIN_SIZE)
  1855. return 0;
  1856. size = (size - 1) >> 14; /* convert to 16K multiple */
  1857. compressed = min_t(u8, ilog2(size) + 1,
  1858. is_smcd ? SMCD_DMBE_SIZES : SMCR_RMBE_SIZES);
  1859. #ifdef CONFIG_ARCH_NO_SG_CHAIN
  1860. if (!is_smcd && is_rmb)
  1861. /* RMBs are backed by & limited to max size of scatterlists */
  1862. compressed = min_t(u8, compressed, ilog2((SG_MAX_SINGLE_ALLOC * PAGE_SIZE) >> 14));
  1863. #endif
  1864. return compressed;
  1865. }
  1866. /* convert the RMB size from compressed notation into integer */
  1867. int smc_uncompress_bufsize(u8 compressed)
  1868. {
  1869. u32 size;
  1870. size = 0x00000001 << (((int)compressed) + 14);
  1871. return (int)size;
  1872. }
  1873. /* try to reuse a sndbuf or rmb description slot for a certain
  1874. * buffer size; if not available, return NULL
  1875. */
  1876. static struct smc_buf_desc *smc_buf_get_slot(int compressed_bufsize,
  1877. struct rw_semaphore *lock,
  1878. struct list_head *buf_list)
  1879. {
  1880. struct smc_buf_desc *buf_slot;
  1881. down_read(lock);
  1882. list_for_each_entry(buf_slot, buf_list, list) {
  1883. if (cmpxchg(&buf_slot->used, 0, 1) == 0) {
  1884. up_read(lock);
  1885. return buf_slot;
  1886. }
  1887. }
  1888. up_read(lock);
  1889. return NULL;
  1890. }
  1891. /* one of the conditions for announcing a receiver's current window size is
  1892. * that it "results in a minimum increase in the window size of 10% of the
  1893. * receive buffer space" [RFC7609]
  1894. */
  1895. static inline int smc_rmb_wnd_update_limit(int rmbe_size)
  1896. {
  1897. return max_t(int, rmbe_size / 10, SOCK_MIN_SNDBUF / 2);
  1898. }
  1899. /* map an buf to a link */
  1900. static int smcr_buf_map_link(struct smc_buf_desc *buf_desc, bool is_rmb,
  1901. struct smc_link *lnk)
  1902. {
  1903. int rc, i, nents, offset, buf_size, size, access_flags;
  1904. struct scatterlist *sg;
  1905. void *buf;
  1906. if (buf_desc->is_map_ib[lnk->link_idx])
  1907. return 0;
  1908. if (buf_desc->is_vm) {
  1909. buf = buf_desc->cpu_addr;
  1910. buf_size = buf_desc->len;
  1911. offset = offset_in_page(buf_desc->cpu_addr);
  1912. nents = PAGE_ALIGN(buf_size + offset) / PAGE_SIZE;
  1913. } else {
  1914. nents = 1;
  1915. }
  1916. rc = sg_alloc_table(&buf_desc->sgt[lnk->link_idx], nents, GFP_KERNEL);
  1917. if (rc)
  1918. return rc;
  1919. if (buf_desc->is_vm) {
  1920. /* virtually contiguous buffer */
  1921. for_each_sg(buf_desc->sgt[lnk->link_idx].sgl, sg, nents, i) {
  1922. size = min_t(int, PAGE_SIZE - offset, buf_size);
  1923. sg_set_page(sg, vmalloc_to_page(buf), size, offset);
  1924. buf += size / sizeof(*buf);
  1925. buf_size -= size;
  1926. offset = 0;
  1927. }
  1928. } else {
  1929. /* physically contiguous buffer */
  1930. sg_set_buf(buf_desc->sgt[lnk->link_idx].sgl,
  1931. buf_desc->cpu_addr, buf_desc->len);
  1932. }
  1933. /* map sg table to DMA address */
  1934. rc = smc_ib_buf_map_sg(lnk, buf_desc,
  1935. is_rmb ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
  1936. /* SMC protocol depends on mapping to one DMA address only */
  1937. if (rc != nents) {
  1938. rc = -EAGAIN;
  1939. goto free_table;
  1940. }
  1941. buf_desc->is_dma_need_sync |=
  1942. smc_ib_is_sg_need_sync(lnk, buf_desc) << lnk->link_idx;
  1943. if (is_rmb || buf_desc->is_vm) {
  1944. /* create a new memory region for the RMB or vzalloced sndbuf */
  1945. access_flags = is_rmb ?
  1946. IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE :
  1947. IB_ACCESS_LOCAL_WRITE;
  1948. rc = smc_ib_get_memory_region(lnk->roce_pd, access_flags,
  1949. buf_desc, lnk->link_idx);
  1950. if (rc)
  1951. goto buf_unmap;
  1952. smc_ib_sync_sg_for_device(lnk, buf_desc,
  1953. is_rmb ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
  1954. }
  1955. buf_desc->is_map_ib[lnk->link_idx] = true;
  1956. return 0;
  1957. buf_unmap:
  1958. smc_ib_buf_unmap_sg(lnk, buf_desc,
  1959. is_rmb ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
  1960. free_table:
  1961. sg_free_table(&buf_desc->sgt[lnk->link_idx]);
  1962. return rc;
  1963. }
  1964. /* register a new buf on IB device, rmb or vzalloced sndbuf
  1965. * must be called under lgr->llc_conf_mutex lock
  1966. */
  1967. int smcr_link_reg_buf(struct smc_link *link, struct smc_buf_desc *buf_desc)
  1968. {
  1969. if (list_empty(&link->lgr->list))
  1970. return -ENOLINK;
  1971. if (!buf_desc->is_reg_mr[link->link_idx]) {
  1972. /* register memory region for new buf */
  1973. if (buf_desc->is_vm)
  1974. buf_desc->mr[link->link_idx]->iova =
  1975. (uintptr_t)buf_desc->cpu_addr;
  1976. if (smc_wr_reg_send(link, buf_desc->mr[link->link_idx])) {
  1977. buf_desc->is_reg_err = true;
  1978. return -EFAULT;
  1979. }
  1980. buf_desc->is_reg_mr[link->link_idx] = true;
  1981. }
  1982. return 0;
  1983. }
  1984. static int _smcr_buf_map_lgr(struct smc_link *lnk, struct rw_semaphore *lock,
  1985. struct list_head *lst, bool is_rmb)
  1986. {
  1987. struct smc_buf_desc *buf_desc, *bf;
  1988. int rc = 0;
  1989. down_write(lock);
  1990. list_for_each_entry_safe(buf_desc, bf, lst, list) {
  1991. if (!buf_desc->used)
  1992. continue;
  1993. rc = smcr_buf_map_link(buf_desc, is_rmb, lnk);
  1994. if (rc)
  1995. goto out;
  1996. }
  1997. out:
  1998. up_write(lock);
  1999. return rc;
  2000. }
  2001. /* map all used buffers of lgr for a new link */
  2002. int smcr_buf_map_lgr(struct smc_link *lnk)
  2003. {
  2004. struct smc_link_group *lgr = lnk->lgr;
  2005. int i, rc = 0;
  2006. for (i = 0; i < SMC_RMBE_SIZES; i++) {
  2007. rc = _smcr_buf_map_lgr(lnk, &lgr->rmbs_lock,
  2008. &lgr->rmbs[i], true);
  2009. if (rc)
  2010. return rc;
  2011. rc = _smcr_buf_map_lgr(lnk, &lgr->sndbufs_lock,
  2012. &lgr->sndbufs[i], false);
  2013. if (rc)
  2014. return rc;
  2015. }
  2016. return 0;
  2017. }
  2018. /* register all used buffers of lgr for a new link,
  2019. * must be called under lgr->llc_conf_mutex lock
  2020. */
  2021. int smcr_buf_reg_lgr(struct smc_link *lnk)
  2022. {
  2023. struct smc_link_group *lgr = lnk->lgr;
  2024. struct smc_buf_desc *buf_desc, *bf;
  2025. int i, rc = 0;
  2026. /* reg all RMBs for a new link */
  2027. down_write(&lgr->rmbs_lock);
  2028. for (i = 0; i < SMC_RMBE_SIZES; i++) {
  2029. list_for_each_entry_safe(buf_desc, bf, &lgr->rmbs[i], list) {
  2030. if (!buf_desc->used)
  2031. continue;
  2032. rc = smcr_link_reg_buf(lnk, buf_desc);
  2033. if (rc) {
  2034. up_write(&lgr->rmbs_lock);
  2035. return rc;
  2036. }
  2037. }
  2038. }
  2039. up_write(&lgr->rmbs_lock);
  2040. if (lgr->buf_type == SMCR_PHYS_CONT_BUFS)
  2041. return rc;
  2042. /* reg all vzalloced sndbufs for a new link */
  2043. down_write(&lgr->sndbufs_lock);
  2044. for (i = 0; i < SMC_RMBE_SIZES; i++) {
  2045. list_for_each_entry_safe(buf_desc, bf, &lgr->sndbufs[i], list) {
  2046. if (!buf_desc->used || !buf_desc->is_vm)
  2047. continue;
  2048. rc = smcr_link_reg_buf(lnk, buf_desc);
  2049. if (rc) {
  2050. up_write(&lgr->sndbufs_lock);
  2051. return rc;
  2052. }
  2053. }
  2054. }
  2055. up_write(&lgr->sndbufs_lock);
  2056. return rc;
  2057. }
  2058. static struct smc_buf_desc *smcr_new_buf_create(struct smc_link_group *lgr,
  2059. int bufsize)
  2060. {
  2061. struct smc_buf_desc *buf_desc;
  2062. /* try to alloc a new buffer */
  2063. buf_desc = kzalloc(sizeof(*buf_desc), GFP_KERNEL);
  2064. if (!buf_desc)
  2065. return ERR_PTR(-ENOMEM);
  2066. switch (lgr->buf_type) {
  2067. case SMCR_PHYS_CONT_BUFS:
  2068. case SMCR_MIXED_BUFS:
  2069. buf_desc->order = get_order(bufsize);
  2070. buf_desc->pages = alloc_pages(GFP_KERNEL | __GFP_NOWARN |
  2071. __GFP_NOMEMALLOC | __GFP_COMP |
  2072. __GFP_NORETRY | __GFP_ZERO,
  2073. buf_desc->order);
  2074. if (buf_desc->pages) {
  2075. buf_desc->cpu_addr =
  2076. (void *)page_address(buf_desc->pages);
  2077. buf_desc->len = bufsize;
  2078. buf_desc->is_vm = false;
  2079. break;
  2080. }
  2081. if (lgr->buf_type == SMCR_PHYS_CONT_BUFS)
  2082. goto out;
  2083. fallthrough; // try virtually continguous buf
  2084. case SMCR_VIRT_CONT_BUFS:
  2085. buf_desc->order = get_order(bufsize);
  2086. buf_desc->cpu_addr = vzalloc(PAGE_SIZE << buf_desc->order);
  2087. if (!buf_desc->cpu_addr)
  2088. goto out;
  2089. buf_desc->pages = NULL;
  2090. buf_desc->len = bufsize;
  2091. buf_desc->is_vm = true;
  2092. break;
  2093. }
  2094. return buf_desc;
  2095. out:
  2096. kfree(buf_desc);
  2097. return ERR_PTR(-EAGAIN);
  2098. }
  2099. /* map buf_desc on all usable links,
  2100. * unused buffers stay mapped as long as the link is up
  2101. */
  2102. static int smcr_buf_map_usable_links(struct smc_link_group *lgr,
  2103. struct smc_buf_desc *buf_desc, bool is_rmb)
  2104. {
  2105. int i, rc = 0, cnt = 0;
  2106. /* protect against parallel link reconfiguration */
  2107. down_read(&lgr->llc_conf_mutex);
  2108. for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
  2109. struct smc_link *lnk = &lgr->lnk[i];
  2110. if (!smc_link_usable(lnk))
  2111. continue;
  2112. if (smcr_buf_map_link(buf_desc, is_rmb, lnk)) {
  2113. rc = -ENOMEM;
  2114. goto out;
  2115. }
  2116. cnt++;
  2117. }
  2118. out:
  2119. up_read(&lgr->llc_conf_mutex);
  2120. if (!rc && !cnt)
  2121. rc = -EINVAL;
  2122. return rc;
  2123. }
  2124. static struct smc_buf_desc *smcd_new_buf_create(struct smc_link_group *lgr,
  2125. bool is_dmb, int bufsize)
  2126. {
  2127. struct smc_buf_desc *buf_desc;
  2128. int rc;
  2129. /* try to alloc a new DMB */
  2130. buf_desc = kzalloc(sizeof(*buf_desc), GFP_KERNEL);
  2131. if (!buf_desc)
  2132. return ERR_PTR(-ENOMEM);
  2133. if (is_dmb) {
  2134. rc = smc_ism_register_dmb(lgr, bufsize, buf_desc);
  2135. if (rc) {
  2136. kfree(buf_desc);
  2137. if (rc == -ENOMEM)
  2138. return ERR_PTR(-EAGAIN);
  2139. if (rc == -ENOSPC)
  2140. return ERR_PTR(-ENOSPC);
  2141. return ERR_PTR(-EIO);
  2142. }
  2143. buf_desc->pages = virt_to_page(buf_desc->cpu_addr);
  2144. /* CDC header stored in buf. So, pretend it was smaller */
  2145. buf_desc->len = bufsize - sizeof(struct smcd_cdc_msg);
  2146. } else {
  2147. buf_desc->cpu_addr = kzalloc(bufsize, GFP_KERNEL |
  2148. __GFP_NOWARN | __GFP_NORETRY |
  2149. __GFP_NOMEMALLOC);
  2150. if (!buf_desc->cpu_addr) {
  2151. kfree(buf_desc);
  2152. return ERR_PTR(-EAGAIN);
  2153. }
  2154. buf_desc->len = bufsize;
  2155. }
  2156. return buf_desc;
  2157. }
  2158. static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
  2159. {
  2160. struct smc_buf_desc *buf_desc = ERR_PTR(-ENOMEM);
  2161. struct smc_connection *conn = &smc->conn;
  2162. struct smc_link_group *lgr = conn->lgr;
  2163. struct list_head *buf_list;
  2164. int bufsize, bufsize_comp;
  2165. struct rw_semaphore *lock; /* lock buffer list */
  2166. bool is_dgraded = false;
  2167. if (is_rmb)
  2168. /* use socket recv buffer size (w/o overhead) as start value */
  2169. bufsize = smc->sk.sk_rcvbuf / 2;
  2170. else
  2171. /* use socket send buffer size (w/o overhead) as start value */
  2172. bufsize = smc->sk.sk_sndbuf / 2;
  2173. for (bufsize_comp = smc_compress_bufsize(bufsize, is_smcd, is_rmb);
  2174. bufsize_comp >= 0; bufsize_comp--) {
  2175. if (is_rmb) {
  2176. lock = &lgr->rmbs_lock;
  2177. buf_list = &lgr->rmbs[bufsize_comp];
  2178. } else {
  2179. lock = &lgr->sndbufs_lock;
  2180. buf_list = &lgr->sndbufs[bufsize_comp];
  2181. }
  2182. bufsize = smc_uncompress_bufsize(bufsize_comp);
  2183. /* check for reusable slot in the link group */
  2184. buf_desc = smc_buf_get_slot(bufsize_comp, lock, buf_list);
  2185. if (buf_desc) {
  2186. buf_desc->is_dma_need_sync = 0;
  2187. SMC_STAT_RMB_SIZE(smc, is_smcd, is_rmb, true, bufsize);
  2188. SMC_STAT_BUF_REUSE(smc, is_smcd, is_rmb);
  2189. break; /* found reusable slot */
  2190. }
  2191. if (is_smcd)
  2192. buf_desc = smcd_new_buf_create(lgr, is_rmb, bufsize);
  2193. else
  2194. buf_desc = smcr_new_buf_create(lgr, bufsize);
  2195. if (PTR_ERR(buf_desc) == -ENOMEM)
  2196. break;
  2197. if (IS_ERR(buf_desc)) {
  2198. if (!is_dgraded) {
  2199. is_dgraded = true;
  2200. SMC_STAT_RMB_DOWNGRADED(smc, is_smcd, is_rmb);
  2201. }
  2202. continue;
  2203. }
  2204. SMC_STAT_RMB_ALLOC(smc, is_smcd, is_rmb);
  2205. SMC_STAT_RMB_SIZE(smc, is_smcd, is_rmb, true, bufsize);
  2206. buf_desc->used = 1;
  2207. down_write(lock);
  2208. smc_lgr_buf_list_add(lgr, is_rmb, buf_list, buf_desc);
  2209. up_write(lock);
  2210. break; /* found */
  2211. }
  2212. if (IS_ERR(buf_desc))
  2213. return PTR_ERR(buf_desc);
  2214. if (!is_smcd) {
  2215. if (smcr_buf_map_usable_links(lgr, buf_desc, is_rmb)) {
  2216. smcr_buf_unuse(buf_desc, is_rmb, lgr);
  2217. return -ENOMEM;
  2218. }
  2219. }
  2220. if (is_rmb) {
  2221. conn->rmb_desc = buf_desc;
  2222. conn->rmbe_size_comp = bufsize_comp;
  2223. smc->sk.sk_rcvbuf = bufsize * 2;
  2224. atomic_set(&conn->bytes_to_rcv, 0);
  2225. conn->rmbe_update_limit =
  2226. smc_rmb_wnd_update_limit(buf_desc->len);
  2227. if (is_smcd)
  2228. smc_ism_set_conn(conn); /* map RMB/smcd_dev to conn */
  2229. } else {
  2230. conn->sndbuf_desc = buf_desc;
  2231. smc->sk.sk_sndbuf = bufsize * 2;
  2232. atomic_set(&conn->sndbuf_space, bufsize);
  2233. }
  2234. return 0;
  2235. }
  2236. void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn)
  2237. {
  2238. if (!conn->sndbuf_desc->is_dma_need_sync)
  2239. return;
  2240. if (!smc_conn_lgr_valid(conn) || conn->lgr->is_smcd ||
  2241. !smc_link_active(conn->lnk))
  2242. return;
  2243. smc_ib_sync_sg_for_device(conn->lnk, conn->sndbuf_desc, DMA_TO_DEVICE);
  2244. }
  2245. void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn)
  2246. {
  2247. int i;
  2248. if (!conn->rmb_desc->is_dma_need_sync)
  2249. return;
  2250. if (!smc_conn_lgr_valid(conn) || conn->lgr->is_smcd)
  2251. return;
  2252. for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
  2253. if (!smc_link_active(&conn->lgr->lnk[i]))
  2254. continue;
  2255. smc_ib_sync_sg_for_cpu(&conn->lgr->lnk[i], conn->rmb_desc,
  2256. DMA_FROM_DEVICE);
  2257. }
  2258. }
  2259. /* create the send and receive buffer for an SMC socket;
  2260. * receive buffers are called RMBs;
  2261. * (even though the SMC protocol allows more than one RMB-element per RMB,
  2262. * the Linux implementation uses just one RMB-element per RMB, i.e. uses an
  2263. * extra RMB for every connection in a link group
  2264. */
  2265. int smc_buf_create(struct smc_sock *smc, bool is_smcd)
  2266. {
  2267. int rc;
  2268. /* create send buffer */
  2269. if (is_smcd &&
  2270. smc_ism_support_dmb_nocopy(smc->conn.lgr->smcd))
  2271. goto create_rmb;
  2272. rc = __smc_buf_create(smc, is_smcd, false);
  2273. if (rc)
  2274. return rc;
  2275. create_rmb:
  2276. /* create rmb */
  2277. rc = __smc_buf_create(smc, is_smcd, true);
  2278. if (rc && smc->conn.sndbuf_desc) {
  2279. down_write(&smc->conn.lgr->sndbufs_lock);
  2280. smc_lgr_buf_list_del(smc->conn.lgr, false,
  2281. smc->conn.sndbuf_desc);
  2282. up_write(&smc->conn.lgr->sndbufs_lock);
  2283. smc_buf_free(smc->conn.lgr, false, smc->conn.sndbuf_desc);
  2284. smc->conn.sndbuf_desc = NULL;
  2285. }
  2286. return rc;
  2287. }
  2288. int smcd_buf_attach(struct smc_sock *smc)
  2289. {
  2290. struct smc_connection *conn = &smc->conn;
  2291. struct smcd_dev *smcd = conn->lgr->smcd;
  2292. u64 peer_token = conn->peer_token;
  2293. struct smc_buf_desc *buf_desc;
  2294. int rc;
  2295. buf_desc = kzalloc(sizeof(*buf_desc), GFP_KERNEL);
  2296. if (!buf_desc)
  2297. return -ENOMEM;
  2298. /* The ghost sndbuf_desc describes the same memory region as
  2299. * peer RMB. Its lifecycle is consistent with the connection's
  2300. * and it will be freed with the connections instead of the
  2301. * link group.
  2302. */
  2303. rc = smc_ism_attach_dmb(smcd, peer_token, buf_desc);
  2304. if (rc)
  2305. goto free;
  2306. smc->sk.sk_sndbuf = buf_desc->len;
  2307. buf_desc->cpu_addr =
  2308. (u8 *)buf_desc->cpu_addr + sizeof(struct smcd_cdc_msg);
  2309. buf_desc->len -= sizeof(struct smcd_cdc_msg);
  2310. conn->sndbuf_desc = buf_desc;
  2311. conn->sndbuf_desc->used = 1;
  2312. atomic_set(&conn->sndbuf_space, conn->sndbuf_desc->len);
  2313. return 0;
  2314. free:
  2315. kfree(buf_desc);
  2316. return rc;
  2317. }
  2318. static inline int smc_rmb_reserve_rtoken_idx(struct smc_link_group *lgr)
  2319. {
  2320. int i;
  2321. for_each_clear_bit(i, lgr->rtokens_used_mask, SMC_RMBS_PER_LGR_MAX) {
  2322. if (!test_and_set_bit(i, lgr->rtokens_used_mask))
  2323. return i;
  2324. }
  2325. return -ENOSPC;
  2326. }
  2327. static int smc_rtoken_find_by_link(struct smc_link_group *lgr, int lnk_idx,
  2328. u32 rkey)
  2329. {
  2330. int i;
  2331. for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
  2332. if (test_bit(i, lgr->rtokens_used_mask) &&
  2333. lgr->rtokens[i][lnk_idx].rkey == rkey)
  2334. return i;
  2335. }
  2336. return -ENOENT;
  2337. }
  2338. /* set rtoken for a new link to an existing rmb */
  2339. void smc_rtoken_set(struct smc_link_group *lgr, int link_idx, int link_idx_new,
  2340. __be32 nw_rkey_known, __be64 nw_vaddr, __be32 nw_rkey)
  2341. {
  2342. int rtok_idx;
  2343. rtok_idx = smc_rtoken_find_by_link(lgr, link_idx, ntohl(nw_rkey_known));
  2344. if (rtok_idx == -ENOENT)
  2345. return;
  2346. lgr->rtokens[rtok_idx][link_idx_new].rkey = ntohl(nw_rkey);
  2347. lgr->rtokens[rtok_idx][link_idx_new].dma_addr = be64_to_cpu(nw_vaddr);
  2348. }
  2349. /* set rtoken for a new link whose link_id is given */
  2350. void smc_rtoken_set2(struct smc_link_group *lgr, int rtok_idx, int link_id,
  2351. __be64 nw_vaddr, __be32 nw_rkey)
  2352. {
  2353. u64 dma_addr = be64_to_cpu(nw_vaddr);
  2354. u32 rkey = ntohl(nw_rkey);
  2355. bool found = false;
  2356. int link_idx;
  2357. for (link_idx = 0; link_idx < SMC_LINKS_PER_LGR_MAX; link_idx++) {
  2358. if (lgr->lnk[link_idx].link_id == link_id) {
  2359. found = true;
  2360. break;
  2361. }
  2362. }
  2363. if (!found)
  2364. return;
  2365. lgr->rtokens[rtok_idx][link_idx].rkey = rkey;
  2366. lgr->rtokens[rtok_idx][link_idx].dma_addr = dma_addr;
  2367. }
  2368. /* add a new rtoken from peer */
  2369. int smc_rtoken_add(struct smc_link *lnk, __be64 nw_vaddr, __be32 nw_rkey)
  2370. {
  2371. struct smc_link_group *lgr = smc_get_lgr(lnk);
  2372. u64 dma_addr = be64_to_cpu(nw_vaddr);
  2373. u32 rkey = ntohl(nw_rkey);
  2374. int i;
  2375. for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
  2376. if (lgr->rtokens[i][lnk->link_idx].rkey == rkey &&
  2377. lgr->rtokens[i][lnk->link_idx].dma_addr == dma_addr &&
  2378. test_bit(i, lgr->rtokens_used_mask)) {
  2379. /* already in list */
  2380. return i;
  2381. }
  2382. }
  2383. i = smc_rmb_reserve_rtoken_idx(lgr);
  2384. if (i < 0)
  2385. return i;
  2386. lgr->rtokens[i][lnk->link_idx].rkey = rkey;
  2387. lgr->rtokens[i][lnk->link_idx].dma_addr = dma_addr;
  2388. return i;
  2389. }
  2390. /* delete an rtoken from all links */
  2391. int smc_rtoken_delete(struct smc_link *lnk, __be32 nw_rkey)
  2392. {
  2393. struct smc_link_group *lgr = smc_get_lgr(lnk);
  2394. u32 rkey = ntohl(nw_rkey);
  2395. int i, j;
  2396. for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
  2397. if (lgr->rtokens[i][lnk->link_idx].rkey == rkey &&
  2398. test_bit(i, lgr->rtokens_used_mask)) {
  2399. for (j = 0; j < SMC_LINKS_PER_LGR_MAX; j++) {
  2400. lgr->rtokens[i][j].rkey = 0;
  2401. lgr->rtokens[i][j].dma_addr = 0;
  2402. }
  2403. clear_bit(i, lgr->rtokens_used_mask);
  2404. return 0;
  2405. }
  2406. }
  2407. return -ENOENT;
  2408. }
  2409. /* save rkey and dma_addr received from peer during clc handshake */
  2410. int smc_rmb_rtoken_handling(struct smc_connection *conn,
  2411. struct smc_link *lnk,
  2412. struct smc_clc_msg_accept_confirm *clc)
  2413. {
  2414. conn->rtoken_idx = smc_rtoken_add(lnk, clc->r0.rmb_dma_addr,
  2415. clc->r0.rmb_rkey);
  2416. if (conn->rtoken_idx < 0)
  2417. return conn->rtoken_idx;
  2418. return 0;
  2419. }
  2420. static void smc_core_going_away(void)
  2421. {
  2422. struct smc_ib_device *smcibdev;
  2423. struct smcd_dev *smcd;
  2424. mutex_lock(&smc_ib_devices.mutex);
  2425. list_for_each_entry(smcibdev, &smc_ib_devices.list, list) {
  2426. int i;
  2427. for (i = 0; i < SMC_MAX_PORTS; i++)
  2428. set_bit(i, smcibdev->ports_going_away);
  2429. }
  2430. mutex_unlock(&smc_ib_devices.mutex);
  2431. mutex_lock(&smcd_dev_list.mutex);
  2432. list_for_each_entry(smcd, &smcd_dev_list.list, list) {
  2433. smcd->going_away = 1;
  2434. }
  2435. mutex_unlock(&smcd_dev_list.mutex);
  2436. }
  2437. /* Clean up all SMC link groups */
  2438. static void smc_lgrs_shutdown(void)
  2439. {
  2440. struct smcd_dev *smcd;
  2441. smc_core_going_away();
  2442. smc_smcr_terminate_all(NULL);
  2443. mutex_lock(&smcd_dev_list.mutex);
  2444. list_for_each_entry(smcd, &smcd_dev_list.list, list)
  2445. smc_smcd_terminate_all(smcd);
  2446. mutex_unlock(&smcd_dev_list.mutex);
  2447. }
  2448. static int smc_core_reboot_event(struct notifier_block *this,
  2449. unsigned long event, void *ptr)
  2450. {
  2451. smc_lgrs_shutdown();
  2452. smc_ib_unregister_client();
  2453. smc_ism_exit();
  2454. return 0;
  2455. }
  2456. static struct notifier_block smc_reboot_notifier = {
  2457. .notifier_call = smc_core_reboot_event,
  2458. };
  2459. int __init smc_core_init(void)
  2460. {
  2461. return register_reboot_notifier(&smc_reboot_notifier);
  2462. }
  2463. /* Called (from smc_exit) when module is removed */
  2464. void smc_core_exit(void)
  2465. {
  2466. unregister_reboot_notifier(&smc_reboot_notifier);
  2467. smc_lgrs_shutdown();
  2468. }