svc_rdma_transport.c 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717
  1. // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
  2. /*
  3. * Copyright (c) 2015-2018 Oracle. All rights reserved.
  4. * Copyright (c) 2014 Open Grid Computing, Inc. All rights reserved.
  5. * Copyright (c) 2005-2007 Network Appliance, Inc. All rights reserved.
  6. *
  7. * This software is available to you under a choice of one of two
  8. * licenses. You may choose to be licensed under the terms of the GNU
  9. * General Public License (GPL) Version 2, available from the file
  10. * COPYING in the main directory of this source tree, or the BSD-type
  11. * license below:
  12. *
  13. * Redistribution and use in source and binary forms, with or without
  14. * modification, are permitted provided that the following conditions
  15. * are met:
  16. *
  17. * Redistributions of source code must retain the above copyright
  18. * notice, this list of conditions and the following disclaimer.
  19. *
  20. * Redistributions in binary form must reproduce the above
  21. * copyright notice, this list of conditions and the following
  22. * disclaimer in the documentation and/or other materials provided
  23. * with the distribution.
  24. *
  25. * Neither the name of the Network Appliance, Inc. nor the names of
  26. * its contributors may be used to endorse or promote products
  27. * derived from this software without specific prior written
  28. * permission.
  29. *
  30. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  31. * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  32. * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  33. * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  34. * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  35. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  36. * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  37. * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  38. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  39. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  40. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  41. *
  42. * Author: Tom Tucker <tom@opengridcomputing.com>
  43. */
  44. #include <linux/interrupt.h>
  45. #include <linux/sched.h>
  46. #include <linux/slab.h>
  47. #include <linux/spinlock.h>
  48. #include <linux/workqueue.h>
  49. #include <linux/export.h>
  50. #include <rdma/ib_verbs.h>
  51. #include <rdma/rdma_cm.h>
  52. #include <rdma/rw.h>
  53. #include <linux/sunrpc/addr.h>
  54. #include <linux/sunrpc/debug.h>
  55. #include <linux/sunrpc/rpc_rdma.h>
  56. #include <linux/sunrpc/svc_xprt.h>
  57. #include <linux/sunrpc/svc_rdma.h>
  58. #include "xprt_rdma.h"
  59. #include <trace/events/rpcrdma.h>
  60. #define RPCDBG_FACILITY RPCDBG_SVCXPRT
  61. static struct svcxprt_rdma *svc_rdma_create_xprt(struct svc_serv *serv,
  62. struct net *net);
  63. static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
  64. struct net *net,
  65. struct sockaddr *sa, int salen,
  66. int flags);
  67. static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt);
  68. static void svc_rdma_detach(struct svc_xprt *xprt);
  69. static void svc_rdma_free(struct svc_xprt *xprt);
  70. static int svc_rdma_has_wspace(struct svc_xprt *xprt);
  71. static void svc_rdma_secure_port(struct svc_rqst *);
  72. static void svc_rdma_kill_temp_xprt(struct svc_xprt *);
  73. static const struct svc_xprt_ops svc_rdma_ops = {
  74. .xpo_create = svc_rdma_create,
  75. .xpo_recvfrom = svc_rdma_recvfrom,
  76. .xpo_sendto = svc_rdma_sendto,
  77. .xpo_release_rqst = svc_rdma_release_rqst,
  78. .xpo_detach = svc_rdma_detach,
  79. .xpo_free = svc_rdma_free,
  80. .xpo_prep_reply_hdr = svc_rdma_prep_reply_hdr,
  81. .xpo_has_wspace = svc_rdma_has_wspace,
  82. .xpo_accept = svc_rdma_accept,
  83. .xpo_secure_port = svc_rdma_secure_port,
  84. .xpo_kill_temp_xprt = svc_rdma_kill_temp_xprt,
  85. };
  86. struct svc_xprt_class svc_rdma_class = {
  87. .xcl_name = "rdma",
  88. .xcl_owner = THIS_MODULE,
  89. .xcl_ops = &svc_rdma_ops,
  90. .xcl_max_payload = RPCSVC_MAXPAYLOAD_RDMA,
  91. .xcl_ident = XPRT_TRANSPORT_RDMA,
  92. };
  93. #if defined(CONFIG_SUNRPC_BACKCHANNEL)
  94. static struct svc_xprt *svc_rdma_bc_create(struct svc_serv *, struct net *,
  95. struct sockaddr *, int, int);
  96. static void svc_rdma_bc_detach(struct svc_xprt *);
  97. static void svc_rdma_bc_free(struct svc_xprt *);
  98. static const struct svc_xprt_ops svc_rdma_bc_ops = {
  99. .xpo_create = svc_rdma_bc_create,
  100. .xpo_detach = svc_rdma_bc_detach,
  101. .xpo_free = svc_rdma_bc_free,
  102. .xpo_prep_reply_hdr = svc_rdma_prep_reply_hdr,
  103. .xpo_secure_port = svc_rdma_secure_port,
  104. };
  105. struct svc_xprt_class svc_rdma_bc_class = {
  106. .xcl_name = "rdma-bc",
  107. .xcl_owner = THIS_MODULE,
  108. .xcl_ops = &svc_rdma_bc_ops,
  109. .xcl_max_payload = (1024 - RPCRDMA_HDRLEN_MIN)
  110. };
  111. static struct svc_xprt *svc_rdma_bc_create(struct svc_serv *serv,
  112. struct net *net,
  113. struct sockaddr *sa, int salen,
  114. int flags)
  115. {
  116. struct svcxprt_rdma *cma_xprt;
  117. struct svc_xprt *xprt;
  118. cma_xprt = svc_rdma_create_xprt(serv, net);
  119. if (!cma_xprt)
  120. return ERR_PTR(-ENOMEM);
  121. xprt = &cma_xprt->sc_xprt;
  122. svc_xprt_init(net, &svc_rdma_bc_class, xprt, serv);
  123. set_bit(XPT_CONG_CTRL, &xprt->xpt_flags);
  124. serv->sv_bc_xprt = xprt;
  125. dprintk("svcrdma: %s(%p)\n", __func__, xprt);
  126. return xprt;
  127. }
  128. static void svc_rdma_bc_detach(struct svc_xprt *xprt)
  129. {
  130. dprintk("svcrdma: %s(%p)\n", __func__, xprt);
  131. }
  132. static void svc_rdma_bc_free(struct svc_xprt *xprt)
  133. {
  134. struct svcxprt_rdma *rdma =
  135. container_of(xprt, struct svcxprt_rdma, sc_xprt);
  136. dprintk("svcrdma: %s(%p)\n", __func__, xprt);
  137. if (xprt)
  138. kfree(rdma);
  139. }
  140. #endif /* CONFIG_SUNRPC_BACKCHANNEL */
  141. /* QP event handler */
  142. static void qp_event_handler(struct ib_event *event, void *context)
  143. {
  144. struct svc_xprt *xprt = context;
  145. trace_svcrdma_qp_error(event, (struct sockaddr *)&xprt->xpt_remote);
  146. switch (event->event) {
  147. /* These are considered benign events */
  148. case IB_EVENT_PATH_MIG:
  149. case IB_EVENT_COMM_EST:
  150. case IB_EVENT_SQ_DRAINED:
  151. case IB_EVENT_QP_LAST_WQE_REACHED:
  152. break;
  153. /* These are considered fatal events */
  154. case IB_EVENT_PATH_MIG_ERR:
  155. case IB_EVENT_QP_FATAL:
  156. case IB_EVENT_QP_REQ_ERR:
  157. case IB_EVENT_QP_ACCESS_ERR:
  158. case IB_EVENT_DEVICE_FATAL:
  159. default:
  160. set_bit(XPT_CLOSE, &xprt->xpt_flags);
  161. svc_xprt_enqueue(xprt);
  162. break;
  163. }
  164. }
  165. static struct svcxprt_rdma *svc_rdma_create_xprt(struct svc_serv *serv,
  166. struct net *net)
  167. {
  168. struct svcxprt_rdma *cma_xprt = kzalloc(sizeof *cma_xprt, GFP_KERNEL);
  169. if (!cma_xprt) {
  170. dprintk("svcrdma: failed to create new transport\n");
  171. return NULL;
  172. }
  173. svc_xprt_init(net, &svc_rdma_class, &cma_xprt->sc_xprt, serv);
  174. INIT_LIST_HEAD(&cma_xprt->sc_accept_q);
  175. INIT_LIST_HEAD(&cma_xprt->sc_rq_dto_q);
  176. INIT_LIST_HEAD(&cma_xprt->sc_read_complete_q);
  177. INIT_LIST_HEAD(&cma_xprt->sc_send_ctxts);
  178. INIT_LIST_HEAD(&cma_xprt->sc_recv_ctxts);
  179. INIT_LIST_HEAD(&cma_xprt->sc_rw_ctxts);
  180. init_waitqueue_head(&cma_xprt->sc_send_wait);
  181. spin_lock_init(&cma_xprt->sc_lock);
  182. spin_lock_init(&cma_xprt->sc_rq_dto_lock);
  183. spin_lock_init(&cma_xprt->sc_send_lock);
  184. spin_lock_init(&cma_xprt->sc_recv_lock);
  185. spin_lock_init(&cma_xprt->sc_rw_ctxt_lock);
  186. /*
  187. * Note that this implies that the underlying transport support
  188. * has some form of congestion control (see RFC 7530 section 3.1
  189. * paragraph 2). For now, we assume that all supported RDMA
  190. * transports are suitable here.
  191. */
  192. set_bit(XPT_CONG_CTRL, &cma_xprt->sc_xprt.xpt_flags);
  193. return cma_xprt;
  194. }
  195. static void
  196. svc_rdma_parse_connect_private(struct svcxprt_rdma *newxprt,
  197. struct rdma_conn_param *param)
  198. {
  199. const struct rpcrdma_connect_private *pmsg = param->private_data;
  200. if (pmsg &&
  201. pmsg->cp_magic == rpcrdma_cmp_magic &&
  202. pmsg->cp_version == RPCRDMA_CMP_VERSION) {
  203. newxprt->sc_snd_w_inv = pmsg->cp_flags &
  204. RPCRDMA_CMP_F_SND_W_INV_OK;
  205. dprintk("svcrdma: client send_size %u, recv_size %u "
  206. "remote inv %ssupported\n",
  207. rpcrdma_decode_buffer_size(pmsg->cp_send_size),
  208. rpcrdma_decode_buffer_size(pmsg->cp_recv_size),
  209. newxprt->sc_snd_w_inv ? "" : "un");
  210. }
  211. }
  212. /*
  213. * This function handles the CONNECT_REQUEST event on a listening
  214. * endpoint. It is passed the cma_id for the _new_ connection. The context in
  215. * this cma_id is inherited from the listening cma_id and is the svc_xprt
  216. * structure for the listening endpoint.
  217. *
  218. * This function creates a new xprt for the new connection and enqueues it on
  219. * the accept queue for the listent xprt. When the listen thread is kicked, it
  220. * will call the recvfrom method on the listen xprt which will accept the new
  221. * connection.
  222. */
  223. static void handle_connect_req(struct rdma_cm_id *new_cma_id,
  224. struct rdma_conn_param *param)
  225. {
  226. struct svcxprt_rdma *listen_xprt = new_cma_id->context;
  227. struct svcxprt_rdma *newxprt;
  228. struct sockaddr *sa;
  229. /* Create a new transport */
  230. newxprt = svc_rdma_create_xprt(listen_xprt->sc_xprt.xpt_server,
  231. listen_xprt->sc_xprt.xpt_net);
  232. if (!newxprt)
  233. return;
  234. newxprt->sc_cm_id = new_cma_id;
  235. new_cma_id->context = newxprt;
  236. svc_rdma_parse_connect_private(newxprt, param);
  237. /* Save client advertised inbound read limit for use later in accept. */
  238. newxprt->sc_ord = param->initiator_depth;
  239. sa = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.dst_addr;
  240. svc_xprt_set_remote(&newxprt->sc_xprt, sa, svc_addr_len(sa));
  241. /* The remote port is arbitrary and not under the control of the
  242. * client ULP. Set it to a fixed value so that the DRC continues
  243. * to be effective after a reconnect.
  244. */
  245. rpc_set_port((struct sockaddr *)&newxprt->sc_xprt.xpt_remote, 0);
  246. sa = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.src_addr;
  247. svc_xprt_set_local(&newxprt->sc_xprt, sa, svc_addr_len(sa));
  248. /*
  249. * Enqueue the new transport on the accept queue of the listening
  250. * transport
  251. */
  252. spin_lock_bh(&listen_xprt->sc_lock);
  253. list_add_tail(&newxprt->sc_accept_q, &listen_xprt->sc_accept_q);
  254. spin_unlock_bh(&listen_xprt->sc_lock);
  255. set_bit(XPT_CONN, &listen_xprt->sc_xprt.xpt_flags);
  256. svc_xprt_enqueue(&listen_xprt->sc_xprt);
  257. }
  258. /*
  259. * Handles events generated on the listening endpoint. These events will be
  260. * either be incoming connect requests or adapter removal events.
  261. */
  262. static int rdma_listen_handler(struct rdma_cm_id *cma_id,
  263. struct rdma_cm_event *event)
  264. {
  265. struct sockaddr *sap = (struct sockaddr *)&cma_id->route.addr.src_addr;
  266. trace_svcrdma_cm_event(event, sap);
  267. switch (event->event) {
  268. case RDMA_CM_EVENT_CONNECT_REQUEST:
  269. dprintk("svcrdma: Connect request on cma_id=%p, xprt = %p, "
  270. "event = %s (%d)\n", cma_id, cma_id->context,
  271. rdma_event_msg(event->event), event->event);
  272. handle_connect_req(cma_id, &event->param.conn);
  273. break;
  274. default:
  275. /* NB: No device removal upcall for INADDR_ANY listeners */
  276. dprintk("svcrdma: Unexpected event on listening endpoint %p, "
  277. "event = %s (%d)\n", cma_id,
  278. rdma_event_msg(event->event), event->event);
  279. break;
  280. }
  281. return 0;
  282. }
  283. static int rdma_cma_handler(struct rdma_cm_id *cma_id,
  284. struct rdma_cm_event *event)
  285. {
  286. struct sockaddr *sap = (struct sockaddr *)&cma_id->route.addr.dst_addr;
  287. struct svcxprt_rdma *rdma = cma_id->context;
  288. struct svc_xprt *xprt = &rdma->sc_xprt;
  289. trace_svcrdma_cm_event(event, sap);
  290. switch (event->event) {
  291. case RDMA_CM_EVENT_ESTABLISHED:
  292. /* Accept complete */
  293. svc_xprt_get(xprt);
  294. dprintk("svcrdma: Connection completed on DTO xprt=%p, "
  295. "cm_id=%p\n", xprt, cma_id);
  296. clear_bit(RDMAXPRT_CONN_PENDING, &rdma->sc_flags);
  297. svc_xprt_enqueue(xprt);
  298. break;
  299. case RDMA_CM_EVENT_DISCONNECTED:
  300. dprintk("svcrdma: Disconnect on DTO xprt=%p, cm_id=%p\n",
  301. xprt, cma_id);
  302. set_bit(XPT_CLOSE, &xprt->xpt_flags);
  303. svc_xprt_enqueue(xprt);
  304. svc_xprt_put(xprt);
  305. break;
  306. case RDMA_CM_EVENT_DEVICE_REMOVAL:
  307. dprintk("svcrdma: Device removal cma_id=%p, xprt = %p, "
  308. "event = %s (%d)\n", cma_id, xprt,
  309. rdma_event_msg(event->event), event->event);
  310. set_bit(XPT_CLOSE, &xprt->xpt_flags);
  311. svc_xprt_enqueue(xprt);
  312. svc_xprt_put(xprt);
  313. break;
  314. default:
  315. dprintk("svcrdma: Unexpected event on DTO endpoint %p, "
  316. "event = %s (%d)\n", cma_id,
  317. rdma_event_msg(event->event), event->event);
  318. break;
  319. }
  320. return 0;
  321. }
  322. /*
  323. * Create a listening RDMA service endpoint.
  324. */
  325. static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
  326. struct net *net,
  327. struct sockaddr *sa, int salen,
  328. int flags)
  329. {
  330. struct rdma_cm_id *listen_id;
  331. struct svcxprt_rdma *cma_xprt;
  332. int ret;
  333. dprintk("svcrdma: Creating RDMA listener\n");
  334. if ((sa->sa_family != AF_INET) && (sa->sa_family != AF_INET6)) {
  335. dprintk("svcrdma: Address family %d is not supported.\n", sa->sa_family);
  336. return ERR_PTR(-EAFNOSUPPORT);
  337. }
  338. cma_xprt = svc_rdma_create_xprt(serv, net);
  339. if (!cma_xprt)
  340. return ERR_PTR(-ENOMEM);
  341. set_bit(XPT_LISTENER, &cma_xprt->sc_xprt.xpt_flags);
  342. strcpy(cma_xprt->sc_xprt.xpt_remotebuf, "listener");
  343. listen_id = rdma_create_id(net, rdma_listen_handler, cma_xprt,
  344. RDMA_PS_TCP, IB_QPT_RC);
  345. if (IS_ERR(listen_id)) {
  346. ret = PTR_ERR(listen_id);
  347. dprintk("svcrdma: rdma_create_id failed = %d\n", ret);
  348. goto err0;
  349. }
  350. /* Allow both IPv4 and IPv6 sockets to bind a single port
  351. * at the same time.
  352. */
  353. #if IS_ENABLED(CONFIG_IPV6)
  354. ret = rdma_set_afonly(listen_id, 1);
  355. if (ret) {
  356. dprintk("svcrdma: rdma_set_afonly failed = %d\n", ret);
  357. goto err1;
  358. }
  359. #endif
  360. ret = rdma_bind_addr(listen_id, sa);
  361. if (ret) {
  362. dprintk("svcrdma: rdma_bind_addr failed = %d\n", ret);
  363. goto err1;
  364. }
  365. cma_xprt->sc_cm_id = listen_id;
  366. ret = rdma_listen(listen_id, RPCRDMA_LISTEN_BACKLOG);
  367. if (ret) {
  368. dprintk("svcrdma: rdma_listen failed = %d\n", ret);
  369. goto err1;
  370. }
  371. /*
  372. * We need to use the address from the cm_id in case the
  373. * caller specified 0 for the port number.
  374. */
  375. sa = (struct sockaddr *)&cma_xprt->sc_cm_id->route.addr.src_addr;
  376. svc_xprt_set_local(&cma_xprt->sc_xprt, sa, salen);
  377. return &cma_xprt->sc_xprt;
  378. err1:
  379. rdma_destroy_id(listen_id);
  380. err0:
  381. kfree(cma_xprt);
  382. return ERR_PTR(ret);
  383. }
  384. /*
  385. * This is the xpo_recvfrom function for listening endpoints. Its
  386. * purpose is to accept incoming connections. The CMA callback handler
  387. * has already created a new transport and attached it to the new CMA
  388. * ID.
  389. *
  390. * There is a queue of pending connections hung on the listening
  391. * transport. This queue contains the new svc_xprt structure. This
  392. * function takes svc_xprt structures off the accept_q and completes
  393. * the connection.
  394. */
  395. static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
  396. {
  397. struct svcxprt_rdma *listen_rdma;
  398. struct svcxprt_rdma *newxprt = NULL;
  399. struct rdma_conn_param conn_param;
  400. struct rpcrdma_connect_private pmsg;
  401. struct ib_qp_init_attr qp_attr;
  402. unsigned int ctxts, rq_depth;
  403. struct ib_device *dev;
  404. struct sockaddr *sap;
  405. int ret = 0;
  406. listen_rdma = container_of(xprt, struct svcxprt_rdma, sc_xprt);
  407. clear_bit(XPT_CONN, &xprt->xpt_flags);
  408. /* Get the next entry off the accept list */
  409. spin_lock_bh(&listen_rdma->sc_lock);
  410. if (!list_empty(&listen_rdma->sc_accept_q)) {
  411. newxprt = list_entry(listen_rdma->sc_accept_q.next,
  412. struct svcxprt_rdma, sc_accept_q);
  413. list_del_init(&newxprt->sc_accept_q);
  414. }
  415. if (!list_empty(&listen_rdma->sc_accept_q))
  416. set_bit(XPT_CONN, &listen_rdma->sc_xprt.xpt_flags);
  417. spin_unlock_bh(&listen_rdma->sc_lock);
  418. if (!newxprt)
  419. return NULL;
  420. dprintk("svcrdma: newxprt from accept queue = %p, cm_id=%p\n",
  421. newxprt, newxprt->sc_cm_id);
  422. dev = newxprt->sc_cm_id->device;
  423. newxprt->sc_port_num = newxprt->sc_cm_id->port_num;
  424. /* Qualify the transport resource defaults with the
  425. * capabilities of this particular device */
  426. /* Transport header, head iovec, tail iovec */
  427. newxprt->sc_max_send_sges = 3;
  428. /* Add one SGE per page list entry */
  429. newxprt->sc_max_send_sges += (svcrdma_max_req_size / PAGE_SIZE) + 1;
  430. if (newxprt->sc_max_send_sges > dev->attrs.max_send_sge)
  431. newxprt->sc_max_send_sges = dev->attrs.max_send_sge;
  432. newxprt->sc_max_req_size = svcrdma_max_req_size;
  433. newxprt->sc_max_requests = svcrdma_max_requests;
  434. newxprt->sc_max_bc_requests = svcrdma_max_bc_requests;
  435. rq_depth = newxprt->sc_max_requests + newxprt->sc_max_bc_requests;
  436. if (rq_depth > dev->attrs.max_qp_wr) {
  437. pr_warn("svcrdma: reducing receive depth to %d\n",
  438. dev->attrs.max_qp_wr);
  439. rq_depth = dev->attrs.max_qp_wr;
  440. newxprt->sc_max_requests = rq_depth - 2;
  441. newxprt->sc_max_bc_requests = 2;
  442. }
  443. newxprt->sc_fc_credits = cpu_to_be32(newxprt->sc_max_requests);
  444. ctxts = rdma_rw_mr_factor(dev, newxprt->sc_port_num, RPCSVC_MAXPAGES);
  445. ctxts *= newxprt->sc_max_requests;
  446. newxprt->sc_sq_depth = rq_depth + ctxts;
  447. if (newxprt->sc_sq_depth > dev->attrs.max_qp_wr) {
  448. pr_warn("svcrdma: reducing send depth to %d\n",
  449. dev->attrs.max_qp_wr);
  450. newxprt->sc_sq_depth = dev->attrs.max_qp_wr;
  451. }
  452. atomic_set(&newxprt->sc_sq_avail, newxprt->sc_sq_depth);
  453. newxprt->sc_pd = ib_alloc_pd(dev, 0);
  454. if (IS_ERR(newxprt->sc_pd)) {
  455. dprintk("svcrdma: error creating PD for connect request\n");
  456. goto errout;
  457. }
  458. newxprt->sc_sq_cq = ib_alloc_cq(dev, newxprt, newxprt->sc_sq_depth,
  459. 0, IB_POLL_WORKQUEUE);
  460. if (IS_ERR(newxprt->sc_sq_cq)) {
  461. dprintk("svcrdma: error creating SQ CQ for connect request\n");
  462. goto errout;
  463. }
  464. newxprt->sc_rq_cq = ib_alloc_cq(dev, newxprt, rq_depth,
  465. 0, IB_POLL_WORKQUEUE);
  466. if (IS_ERR(newxprt->sc_rq_cq)) {
  467. dprintk("svcrdma: error creating RQ CQ for connect request\n");
  468. goto errout;
  469. }
  470. memset(&qp_attr, 0, sizeof qp_attr);
  471. qp_attr.event_handler = qp_event_handler;
  472. qp_attr.qp_context = &newxprt->sc_xprt;
  473. qp_attr.port_num = newxprt->sc_port_num;
  474. qp_attr.cap.max_rdma_ctxs = ctxts;
  475. qp_attr.cap.max_send_wr = newxprt->sc_sq_depth - ctxts;
  476. qp_attr.cap.max_recv_wr = rq_depth;
  477. qp_attr.cap.max_send_sge = newxprt->sc_max_send_sges;
  478. qp_attr.cap.max_recv_sge = 1;
  479. qp_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
  480. qp_attr.qp_type = IB_QPT_RC;
  481. qp_attr.send_cq = newxprt->sc_sq_cq;
  482. qp_attr.recv_cq = newxprt->sc_rq_cq;
  483. dprintk("svcrdma: newxprt->sc_cm_id=%p, newxprt->sc_pd=%p\n",
  484. newxprt->sc_cm_id, newxprt->sc_pd);
  485. dprintk(" cap.max_send_wr = %d, cap.max_recv_wr = %d\n",
  486. qp_attr.cap.max_send_wr, qp_attr.cap.max_recv_wr);
  487. dprintk(" cap.max_send_sge = %d, cap.max_recv_sge = %d\n",
  488. qp_attr.cap.max_send_sge, qp_attr.cap.max_recv_sge);
  489. ret = rdma_create_qp(newxprt->sc_cm_id, newxprt->sc_pd, &qp_attr);
  490. if (ret) {
  491. dprintk("svcrdma: failed to create QP, ret=%d\n", ret);
  492. goto errout;
  493. }
  494. newxprt->sc_qp = newxprt->sc_cm_id->qp;
  495. if (!(dev->attrs.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS))
  496. newxprt->sc_snd_w_inv = false;
  497. if (!rdma_protocol_iwarp(dev, newxprt->sc_port_num) &&
  498. !rdma_ib_or_roce(dev, newxprt->sc_port_num))
  499. goto errout;
  500. if (!svc_rdma_post_recvs(newxprt))
  501. goto errout;
  502. /* Swap out the handler */
  503. newxprt->sc_cm_id->event_handler = rdma_cma_handler;
  504. /* Construct RDMA-CM private message */
  505. pmsg.cp_magic = rpcrdma_cmp_magic;
  506. pmsg.cp_version = RPCRDMA_CMP_VERSION;
  507. pmsg.cp_flags = 0;
  508. pmsg.cp_send_size = pmsg.cp_recv_size =
  509. rpcrdma_encode_buffer_size(newxprt->sc_max_req_size);
  510. /* Accept Connection */
  511. set_bit(RDMAXPRT_CONN_PENDING, &newxprt->sc_flags);
  512. memset(&conn_param, 0, sizeof conn_param);
  513. conn_param.responder_resources = 0;
  514. conn_param.initiator_depth = min_t(int, newxprt->sc_ord,
  515. dev->attrs.max_qp_init_rd_atom);
  516. if (!conn_param.initiator_depth) {
  517. dprintk("svcrdma: invalid ORD setting\n");
  518. ret = -EINVAL;
  519. goto errout;
  520. }
  521. conn_param.private_data = &pmsg;
  522. conn_param.private_data_len = sizeof(pmsg);
  523. ret = rdma_accept(newxprt->sc_cm_id, &conn_param);
  524. if (ret)
  525. goto errout;
  526. dprintk("svcrdma: new connection %p accepted:\n", newxprt);
  527. sap = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.src_addr;
  528. dprintk(" local address : %pIS:%u\n", sap, rpc_get_port(sap));
  529. sap = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.dst_addr;
  530. dprintk(" remote address : %pIS:%u\n", sap, rpc_get_port(sap));
  531. dprintk(" max_sge : %d\n", newxprt->sc_max_send_sges);
  532. dprintk(" sq_depth : %d\n", newxprt->sc_sq_depth);
  533. dprintk(" rdma_rw_ctxs : %d\n", ctxts);
  534. dprintk(" max_requests : %d\n", newxprt->sc_max_requests);
  535. dprintk(" ord : %d\n", conn_param.initiator_depth);
  536. trace_svcrdma_xprt_accept(&newxprt->sc_xprt);
  537. return &newxprt->sc_xprt;
  538. errout:
  539. dprintk("svcrdma: failure accepting new connection rc=%d.\n", ret);
  540. trace_svcrdma_xprt_fail(&newxprt->sc_xprt);
  541. /* Take a reference in case the DTO handler runs */
  542. svc_xprt_get(&newxprt->sc_xprt);
  543. if (newxprt->sc_qp && !IS_ERR(newxprt->sc_qp))
  544. ib_destroy_qp(newxprt->sc_qp);
  545. rdma_destroy_id(newxprt->sc_cm_id);
  546. /* This call to put will destroy the transport */
  547. svc_xprt_put(&newxprt->sc_xprt);
  548. return NULL;
  549. }
  550. /*
  551. * When connected, an svc_xprt has at least two references:
  552. *
  553. * - A reference held by the cm_id between the ESTABLISHED and
  554. * DISCONNECTED events. If the remote peer disconnected first, this
  555. * reference could be gone.
  556. *
  557. * - A reference held by the svc_recv code that called this function
  558. * as part of close processing.
  559. *
  560. * At a minimum one references should still be held.
  561. */
  562. static void svc_rdma_detach(struct svc_xprt *xprt)
  563. {
  564. struct svcxprt_rdma *rdma =
  565. container_of(xprt, struct svcxprt_rdma, sc_xprt);
  566. /* Disconnect and flush posted WQE */
  567. rdma_disconnect(rdma->sc_cm_id);
  568. }
  569. static void __svc_rdma_free(struct work_struct *work)
  570. {
  571. struct svcxprt_rdma *rdma =
  572. container_of(work, struct svcxprt_rdma, sc_work);
  573. struct svc_xprt *xprt = &rdma->sc_xprt;
  574. trace_svcrdma_xprt_free(xprt);
  575. if (rdma->sc_qp && !IS_ERR(rdma->sc_qp))
  576. ib_drain_qp(rdma->sc_qp);
  577. /* We should only be called from kref_put */
  578. if (kref_read(&xprt->xpt_ref) != 0)
  579. pr_err("svcrdma: sc_xprt still in use? (%d)\n",
  580. kref_read(&xprt->xpt_ref));
  581. svc_rdma_flush_recv_queues(rdma);
  582. /* Final put of backchannel client transport */
  583. if (xprt->xpt_bc_xprt) {
  584. xprt_put(xprt->xpt_bc_xprt);
  585. xprt->xpt_bc_xprt = NULL;
  586. }
  587. svc_rdma_destroy_rw_ctxts(rdma);
  588. svc_rdma_send_ctxts_destroy(rdma);
  589. svc_rdma_recv_ctxts_destroy(rdma);
  590. /* Destroy the QP if present (not a listener) */
  591. if (rdma->sc_qp && !IS_ERR(rdma->sc_qp))
  592. ib_destroy_qp(rdma->sc_qp);
  593. if (rdma->sc_sq_cq && !IS_ERR(rdma->sc_sq_cq))
  594. ib_free_cq(rdma->sc_sq_cq);
  595. if (rdma->sc_rq_cq && !IS_ERR(rdma->sc_rq_cq))
  596. ib_free_cq(rdma->sc_rq_cq);
  597. if (rdma->sc_pd && !IS_ERR(rdma->sc_pd))
  598. ib_dealloc_pd(rdma->sc_pd);
  599. /* Destroy the CM ID */
  600. rdma_destroy_id(rdma->sc_cm_id);
  601. kfree(rdma);
  602. }
  603. static void svc_rdma_free(struct svc_xprt *xprt)
  604. {
  605. struct svcxprt_rdma *rdma =
  606. container_of(xprt, struct svcxprt_rdma, sc_xprt);
  607. INIT_WORK(&rdma->sc_work, __svc_rdma_free);
  608. queue_work(svc_rdma_wq, &rdma->sc_work);
  609. }
  610. static int svc_rdma_has_wspace(struct svc_xprt *xprt)
  611. {
  612. struct svcxprt_rdma *rdma =
  613. container_of(xprt, struct svcxprt_rdma, sc_xprt);
  614. /*
  615. * If there are already waiters on the SQ,
  616. * return false.
  617. */
  618. if (waitqueue_active(&rdma->sc_send_wait))
  619. return 0;
  620. /* Otherwise return true. */
  621. return 1;
  622. }
  623. static void svc_rdma_secure_port(struct svc_rqst *rqstp)
  624. {
  625. set_bit(RQ_SECURE, &rqstp->rq_flags);
  626. }
  627. static void svc_rdma_kill_temp_xprt(struct svc_xprt *xprt)
  628. {
  629. }