Advertisement
tony-marine

c-code

Apr 14th, 2016
416
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 66.33 KB | None | 0 0
  1. /*
  2. * Copyright (c) 2005-2012 Intel Corporation. All rights reserved.
  3. *
  4. * This software is available to you under a choice of one of two
  5. * licenses. You may choose to be licensed under the terms of the GNU
  6. * General Public License (GPL) Version 2, available from the file
  7. * COPYING in the main directory of this source tree, or the
  8. * OpenIB.org BSD license below:
  9. *
  10. * Redistribution and use in source and binary forms, with or
  11. * without modification, are permitted provided that the following
  12. * conditions are met:
  13. *
  14. * - Redistributions of source code must retain the above
  15. * copyright notice, this list of conditions and the following
  16. * disclaimer.
  17. *
  18. * - Redistributions in binary form must reproduce the above
  19. * copyright notice, this list of conditions and the following
  20. * disclaimer in the documentation and/or other materials
  21. * provided with the distribution.
  22. *
  23. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  24. * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  25. * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  26. * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  27. * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  28. * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  29. * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  30. * SOFTWARE.
  31. */
  32.  
  33. #if HAVE_CONFIG_H
  34. # include <config.h>
  35. #endif /* HAVE_CONFIG_H */
  36.  
  37. #include <user_global.h>
  38.  
  39. #include <stdlib.h>
  40. #include <string.h>
  41. #include <user_global.h>
  42. #include <stdio.h>
  43. #include <fcntl.h>
  44. #include <errno.h>
  45. #include <stdint.h>
  46. //#include <poll.h>
  47. //#include <unistd.h>
  48. #include <pthread.h>
  49. //#include <endian.h>
  50. //#include <byteswap.h>
  51. #include <stddef.h>
  52. #include <netdb.h>
  53. #include <stats.h>
  54.  
  55. #include "cma.h"
  56. #include <infiniband/driver.h>
  57. #include <infiniband/marshall.h>
  58. #include <rdma/rdma_cma.h>
  59. #include <rdma/rdma_cma_abi.h>
  60. #include <rdma/rdma_verbs.h>
  61. #include <infiniband/ib.h>
  62. #include <ns_file.h>
  63. #include <infiniband/arch.h>
  64. #include <user_global.h>
  65. #include <trace.h>
  66.  
  67. extern int nsadi_enabled;
  68. extern int NSKVCore_getModel(void) ;
  69. extern int NSKVCore_modelToSystemClass_(int a) ;
  70.  
  71. #define CMA_INIT_CMD(req, req_size, op) \
  72. do { \
  73. memset(req, 0, req_size); \
  74. (req)->cmd = UCMA_CMD_##op; \
  75. (req)->in = req_size - sizeof(struct ucma_abi_cmd_hdr); \
  76. } while (0)
  77.  
  78. #define CMA_INIT_CMD_RESP(req, req_size, op, resp, resp_size) \
  79. do { \
  80. CMA_INIT_CMD(req, req_size, op); \
  81. (req)->out = resp_size; \
  82. (req)->response = (uintptr_t) (resp); \
  83. } while (0)
  84.  
  85. struct cma_device {
  86. struct ibv_context *verbs;
  87. struct ibv_pd *pd;
  88. uint64_t guid;
  89. int port_cnt;
  90. int refcnt;
  91. int max_qpsize;
  92. uint8_t max_initiator_depth;
  93. uint8_t max_responder_resources;
  94. };
  95.  
  96. struct cma_id_private {
  97. struct rdma_cm_id id;
  98. struct cma_device *cma_dev;
  99. void *connect;
  100. size_t connect_len;
  101. int events_completed;
  102. int connect_error;
  103. int sync;
  104. pthread_cond_t cond;
  105. pthread_mutex_t mut;
  106. uint32_t handle;
  107. struct cma_multicast *mc_list;
  108. struct ibv_qp_init_attr *qp_init_attr;
  109. uint8_t initiator_depth;
  110. uint8_t responder_resources;
  111. };
  112.  
  113. struct cma_multicast {
  114. struct cma_multicast *next;
  115. struct cma_id_private *id_priv;
  116. void *context;
  117. int events_completed;
  118. pthread_cond_t cond;
  119. uint32_t handle;
  120. union ibv_gid mgid;
  121. uint16_t mlid;
  122. struct sockaddr_storage addr;
  123. };
  124.  
  125. struct cma_event {
  126. struct rdma_cm_event event;
  127. uint8_t private_data[RDMA_MAX_PRIVATE_DATA];
  128. struct cma_id_private *id_priv;
  129. struct cma_multicast *mc;
  130. };
  131.  
  132. static struct cma_device *cma_dev_array;
  133. static int cma_dev_cnt;
  134. static pthread_mutex_t mut = PTHREAD_MUTEX_INITIALIZER;
  135. static int abi_ver = RDMA_USER_CM_MAX_ABI_VERSION;
  136. int af_ib_support;
  137.  
  138. static void ucma_cleanup(void)
  139. {
  140. ucma_ib_cleanup();
  141.  
  142. if (cma_dev_cnt) {
  143. while (cma_dev_cnt--) {
  144. if (cma_dev_array[cma_dev_cnt].refcnt)
  145. ibv_dealloc_pd(cma_dev_array[cma_dev_cnt].pd);
  146. ibv_close_device(cma_dev_array[cma_dev_cnt].verbs);
  147. }
  148.  
  149. free(cma_dev_array);
  150. cma_dev_cnt = 0;
  151. }
  152. }
  153.  
  154. static int check_abi_version(void)
  155. {
  156. char value[8];
  157.  
  158. if(g_trace_level >= TRACE_LEVEL_FIVE) uib_trace_entry_point(CHECK_ABI_VER);
  159.  
  160. if ((ibv_read_sysfs_file(ibv_get_sysfs_path(),
  161. "misc/rdma_cm/abi_version",
  162. value, sizeof value) <= 0) &&
  163. (ibv_read_sysfs_file(ibv_get_sysfs_path(),
  164. "infiniband_ucma/abi_version",
  165. value, sizeof value) <= 0)) {
  166. /*
  167. * Older version of Linux do not have class/misc. To support
  168. * backports, assume the most recent version of the ABI. If
  169. * we're wrong, we'll simply fail later when calling the ABI.
  170. */
  171. //fprintf(stderr, PFX "Warning: couldn't read ABI version.\n");
  172. //fprintf(stderr, PFX "Warning: assuming: %d\n", abi_ver);
  173. uib_trace(CHECK_ABI_VER, READ_ABI_VERSION, TRACE_LEVEL_ONE, UIB_NO_QPN,
  174. -1, UIB_NO_ERRNO, TRACE_ERROR, UIB_NO_INSTANCE, UIB_NO_TRACE_DATA, NULL);
  175. return 0;
  176. }
  177.  
  178. abi_ver = strtol(value, NULL, 10);
  179. if (abi_ver < RDMA_USER_CM_MIN_ABI_VERSION ||
  180. abi_ver > RDMA_USER_CM_MAX_ABI_VERSION) {
  181. //fprintf(stderr, PFX "Fatal: kernel ABI version %d "
  182. // "doesn't match library version %d.\n",
  183. // abi_ver, RDMA_USER_CM_MAX_ABI_VERSION);
  184. uib_trace(CHECK_ABI_VER, ABI_VERSION, TRACE_LEVEL_ONE, UIB_NO_QPN,
  185. -1, UIB_NO_ERRNO, TRACE_ERROR, UIB_NO_INSTANCE, UIB_NO_TRACE_DATA, NULL);
  186. return -1;
  187. }
  188. return 0;
  189. }
  190.  
  191. /*
  192. * This function is called holding the mutex lock
  193. * cma_dev_cnt must be set before calling this function to
  194. * ensure that the lock is not acquired recursively.
  195. */
  196. static void ucma_set_af_ib_support(void)
  197. {
  198. struct rdma_cm_id *id;
  199. struct sockaddr_ib sib;
  200. int ret;
  201.  
  202. ret = rdma_create_id(NULL, &id, NULL, RDMA_PS_IB);
  203. if (ret)
  204. return;
  205.  
  206. memset(&sib, 0, sizeof sib);
  207. sib.sib_family = AF_IB;
  208. sib.sib_sid = htonll(RDMA_IB_IP_PS_TCP);
  209. sib.sib_sid_mask = htonll(RDMA_IB_IP_PS_MASK);
  210. af_ib_support = 1;
  211. ret = rdma_bind_addr(id, (struct sockaddr *) &sib);
  212. af_ib_support = !ret;
  213.  
  214. rdma_destroy_id(id);
  215. }
  216.  
  217. int ucma_init(void)
  218. {
  219. struct ibv_device **dev_list = NULL;
  220. struct cma_device *cma_dev;
  221. struct ibv_device_attr attr;
  222. int i, ret, dev_cnt;
  223.  
  224. if(g_trace_level >= TRACE_LEVEL_FIVE) uib_trace_entry_point(UCMA_INIT);
  225.  
  226.  
  227. /* Quick check without lock to see if we're already initialized */
  228. if (cma_dev_cnt)
  229. return 0;
  230.  
  231. pthread_mutex_lock(&mut);
  232. if (cma_dev_cnt) {
  233. pthread_mutex_unlock(&mut);
  234. return 0;
  235. }
  236.  
  237. ret = check_abi_version();
  238. if (ret) {
  239. uib_trace(UCMA_INIT, SOCKET_CREATE_ERROR, TRACE_LEVEL_ONE, UIB_NO_QPN,
  240. ret, UIB_NO_ERRNO, TRACE_ERROR, UIB_NO_INSTANCE, UIB_NO_TRACE_DATA, NULL);
  241. goto err1;
  242. }
  243.  
  244. dev_list = ibv_get_device_list(&dev_cnt);
  245. if (!dev_list) {
  246. // fprintf(stderr, PFX "Fatal: unable to get RDMA device list\n");
  247. ret = ERR(ENODEV);
  248. uib_trace(UCMA_INIT, NO_DEVICES, TRACE_LEVEL_ONE, UIB_NO_QPN,
  249. -1, errno, TRACE_ERROR, 1, UIB_NO_TRACE_DATA, NULL);
  250. goto err1;
  251. }
  252.  
  253. if (!dev_cnt) {
  254. //fprintf(stderr, PFX "Fatal: no RDMA devices found\n");
  255. ret = ERR(ENODEV);
  256. uib_trace(UCMA_INIT, NO_DEVICES, TRACE_LEVEL_ONE, UIB_NO_QPN,
  257. -1, errno, TRACE_ERROR, 2, UIB_NO_TRACE_DATA, NULL);
  258. goto err2;
  259. }
  260.  
  261. cma_dev_array = calloc(dev_cnt, sizeof *cma_dev);
  262. if (!cma_dev_array) {
  263. ret = ERR(ENOMEM);
  264. uib_trace(UCMA_INIT, ALLOC_ERROR, TRACE_LEVEL_ONE, UIB_NO_QPN,
  265. -1, errno, TRACE_ERROR, UIB_NO_INSTANCE, UIB_NO_TRACE_DATA, NULL);
  266. goto err2;
  267. }
  268.  
  269. for (i = 0; dev_list[i];) {
  270. cma_dev = &cma_dev_array[i];
  271.  
  272. cma_dev->guid = ibv_get_device_guid(dev_list[i]);
  273. cma_dev->verbs = ibv_open_device(dev_list[i]);
  274. if (!cma_dev->verbs) {
  275. //fprintf(stderr, PFX "Fatal: unable to open RDMA device\n");
  276. ret = ERR(ENODEV);
  277. uib_trace(UCMA_INIT, DEVICE_OPEN_ERROR, TRACE_LEVEL_ONE, UIB_NO_QPN,
  278. -1, errno, TRACE_ERROR, UIB_NO_INSTANCE, UIB_NO_TRACE_DATA, NULL);
  279. goto err3;
  280. }
  281.  
  282. i++;
  283. ret = ibv_query_device(cma_dev->verbs, &attr);
  284. if (ret) {
  285. //fprintf(stderr, PFX "Fatal: unable to query RDMA device\n");
  286. ret = ERR(ret);
  287. uib_trace(UCMA_INIT, QUERY_DEVICE_ERROR, TRACE_LEVEL_ONE, UIB_NO_QPN,
  288. -1, errno, TRACE_ERROR, UIB_NO_INSTANCE, UIB_NO_TRACE_DATA, NULL);
  289. goto err3;
  290. }
  291.  
  292. cma_dev->port_cnt = attr.phys_port_cnt;
  293. cma_dev->max_qpsize = attr.max_qp_wr;
  294. cma_dev->max_initiator_depth = (uint8_t) attr.max_qp_init_rd_atom;
  295. cma_dev->max_responder_resources = (uint8_t) attr.max_qp_rd_atom;
  296. }
  297.  
  298. cma_dev_cnt = dev_cnt;
  299. ucma_set_af_ib_support();
  300. pthread_mutex_unlock(&mut);
  301. ibv_free_device_list(dev_list);
  302. return 0;
  303.  
  304. err3:
  305. while (i--)
  306. ibv_close_device(cma_dev_array[i].verbs);
  307. free(cma_dev_array);
  308. err2:
  309. ibv_free_device_list(dev_list);
  310. err1:
  311. pthread_mutex_unlock(&mut);
  312. return ret;
  313. }
  314.  
  315. struct ibv_context **rdma_get_devices(int *num_devices)
  316. {
  317. struct ibv_context **devs = NULL;
  318. int i;
  319.  
  320. if (ucma_init())
  321. goto out;
  322.  
  323. devs = malloc(sizeof *devs * (cma_dev_cnt + 1));
  324. if (!devs)
  325. goto out;
  326.  
  327. for (i = 0; i < cma_dev_cnt; i++)
  328. devs[i] = cma_dev_array[i].verbs;
  329. devs[i] = NULL;
  330. out:
  331. if (num_devices)
  332. *num_devices = devs ? cma_dev_cnt : 0;
  333. return devs;
  334. }
  335.  
  336. void rdma_free_devices(struct ibv_context **list)
  337. {
  338. free(list);
  339. }
  340.  
  341. void /*__attribute__((destructor))*/ rdma_cma_fini(void)
  342. {
  343. ucma_cleanup();
  344. }
  345.  
  346. struct rdma_event_channel *rdma_create_event_channel(void)
  347. {
  348. struct rdma_event_channel *channel;
  349.  
  350. if (ucma_init())
  351. return NULL;
  352.  
  353. if(g_trace_level >= TRACE_LEVEL_FIVE) uib_trace_entry_point(CREATE_EVENT_CHAN);
  354. channel = malloc(sizeof *channel);
  355. if (!channel)
  356. return NULL;
  357.  
  358. channel->fd = ns_open("/dev/infiniband/rdma_cm",strlen("/dev/infiniband/rdma_cm"), 0xdeadbeef);
  359. if (channel->fd < 0) {
  360. //fprintf(stderr, PFX "Fatal: unable to open /dev/infiniband/rdma_cm\n");
  361. uib_trace(CREATE_EVENT_CHAN, OPEN_RDMA_CM, TRACE_LEVEL_ONE, UIB_NO_QPN,
  362. UIB_NO_ECODE, UIB_NO_ERRNO, TRACE_ERROR, UIB_NO_INSTANCE, UIB_NO_TRACE_DATA, NULL);
  363. goto err;
  364. }
  365. return channel;
  366. err:
  367. free(channel);
  368. return NULL;
  369. }
  370.  
  371. void rdma_destroy_event_channel(struct rdma_event_channel *channel)
  372. {
  373. put_unregFile(channel->fd) ;
  374. ns_close(channel->fd);
  375. free(channel);
  376. }
  377.  
  378. static int ucma_get_device(struct cma_id_private *id_priv, uint64_t guid)
  379. {
  380. struct cma_device *cma_dev;
  381. int i, ret = 0;
  382.  
  383. for (i = 0; i < cma_dev_cnt; i++) {
  384. cma_dev = &cma_dev_array[i];
  385. if (cma_dev->guid == guid)
  386. goto match;
  387. }
  388.  
  389. return ERR(ENODEV);
  390. match:
  391. pthread_mutex_lock(&mut);
  392. if (!cma_dev->refcnt++) {
  393. cma_dev->pd = ibv_alloc_pd(cma_dev_array[i].verbs);
  394. if (!cma_dev->pd) {
  395. cma_dev->refcnt--;
  396. ret = ERR(ENOMEM);
  397. goto out;
  398. }
  399. }
  400. id_priv->cma_dev = cma_dev;
  401. id_priv->id.verbs = cma_dev->verbs;
  402. id_priv->id.pd = cma_dev->pd;
  403. out:
  404. pthread_mutex_unlock(&mut);
  405. return ret;
  406. }
  407.  
  408. static void ucma_put_device(struct cma_device *cma_dev)
  409. {
  410. pthread_mutex_lock(&mut);
  411. if (!--cma_dev->refcnt)
  412. ibv_dealloc_pd(cma_dev->pd);
  413. pthread_mutex_unlock(&mut);
  414. }
  415.  
  416. static void ucma_free_id(struct cma_id_private *id_priv)
  417. {
  418. if (id_priv->cma_dev)
  419. ucma_put_device(id_priv->cma_dev);
  420. pthread_cond_destroy(&id_priv->cond);
  421. pthread_mutex_destroy(&id_priv->mut);
  422. if (id_priv->id.route.path_rec)
  423. free(id_priv->id.route.path_rec);
  424.  
  425. if (id_priv->sync)
  426. rdma_destroy_event_channel(id_priv->id.channel);
  427. if (id_priv->connect_len)
  428. free(id_priv->connect);
  429. free(id_priv);
  430. }
  431.  
  432. static struct cma_id_private *ucma_alloc_id(struct rdma_event_channel *channel,
  433. void *context,
  434. enum rdma_port_space ps,
  435. enum ibv_qp_type qp_type)
  436. {
  437. struct cma_id_private *id_priv;
  438.  
  439. id_priv = calloc(1, sizeof *id_priv);
  440. if (!id_priv)
  441. return NULL;
  442.  
  443. id_priv->id.context = context;
  444. id_priv->id.ps = ps;
  445. id_priv->id.qp_type = qp_type;
  446.  
  447. if (!channel) {
  448. id_priv->id.channel = rdma_create_event_channel();
  449. if (!id_priv->id.channel)
  450. goto err;
  451. id_priv->sync = 1;
  452. } else {
  453. id_priv->id.channel = channel;
  454. }
  455.  
  456. pthread_mutex_init(&id_priv->mut, NULL);
  457. if (pthread_cond_init(&id_priv->cond, NULL))
  458. goto err;
  459.  
  460. return id_priv;
  461.  
  462. err: ucma_free_id(id_priv);
  463. return NULL;
  464. }
  465.  
  466. static int rdma_create_id2(struct rdma_event_channel *channel,
  467. struct rdma_cm_id **id, void *context,
  468. enum rdma_port_space ps, enum ibv_qp_type qp_type)
  469. {
  470. struct ucma_abi_create_id_resp resp;
  471. struct ucma_abi_create_id cmd;
  472. struct cma_id_private *id_priv;
  473. int ret;
  474.  
  475. ret = ucma_init();
  476. if (ret)
  477. return ret;
  478.  
  479. id_priv = ucma_alloc_id(channel, context, ps, qp_type);
  480. if (!id_priv)
  481. return ERR(ENOMEM);
  482.  
  483. CMA_INIT_CMD_RESP(&cmd, sizeof cmd, CREATE_ID, &resp, sizeof resp);
  484. cmd.uid = (uintptr_t) id_priv;
  485. cmd.ps = ps;
  486. cmd.qp_type = qp_type;
  487.  
  488. ret = ns_write(id_priv->id.channel->fd, &cmd, sizeof cmd);
  489. if (ret != sizeof cmd)
  490. goto err;
  491.  
  492. VALGRIND_MAKE_MEM_DEFINED(&resp, sizeof resp);
  493.  
  494. id_priv->handle = resp.id;
  495. *id = &id_priv->id;
  496. return 0;
  497.  
  498. err: ucma_free_id(id_priv);
  499. return (ERR(-ret));
  500. }
  501.  
  502. int rdma_create_id(struct rdma_event_channel *channel,
  503. struct rdma_cm_id **id, void *context,
  504. enum rdma_port_space ps)
  505. {
  506. enum ibv_qp_type qp_type;
  507.  
  508. qp_type = (ps == RDMA_PS_IPOIB || ps == RDMA_PS_UDP) ?
  509. IBV_QPT_UD : IBV_QPT_RC;
  510. return rdma_create_id2(channel, id, context, ps, qp_type);
  511. }
  512.  
  513. static int ucma_destroy_kern_id(int fd, uint32_t handle)
  514. {
  515. struct ucma_abi_destroy_id_resp resp;
  516. struct ucma_abi_destroy_id cmd;
  517. int ret;
  518.  
  519. CMA_INIT_CMD_RESP(&cmd, sizeof cmd, DESTROY_ID, &resp, sizeof resp);
  520. cmd.id = handle;
  521.  
  522. ret = ns_write(fd, &cmd, sizeof cmd);
  523. if (ret != sizeof cmd){
  524. return (ret >= 0) ? ERR(ENODATA) : ERR(-ret);
  525. }
  526.  
  527. VALGRIND_MAKE_MEM_DEFINED(&resp, sizeof resp);
  528.  
  529. return resp.events_reported;
  530. }
  531.  
  532. int rdma_destroy_id(struct rdma_cm_id *id)
  533. {
  534. struct cma_id_private *id_priv;
  535. int ret;
  536.  
  537. id_priv = container_of(id, struct cma_id_private, id);
  538. ret = ucma_destroy_kern_id(id->channel->fd, id_priv->handle);
  539. if (ret < 0)
  540. return ret;
  541.  
  542. if (id_priv->id.event)
  543. rdma_ack_cm_event(id_priv->id.event);
  544.  
  545. pthread_mutex_lock(&id_priv->mut);
  546. while (id_priv->events_completed < ret)
  547. pthread_cond_wait(&id_priv->cond, &id_priv->mut);
  548. pthread_mutex_unlock(&id_priv->mut);
  549.  
  550. ucma_free_id(id_priv);
  551. return 0;
  552. }
  553.  
  554. int ucma_addrlen(struct sockaddr *addr)
  555. {
  556. if (!addr)
  557. return 0;
  558.  
  559. switch (addr->sa_family) {
  560. case PF_INET:
  561. return sizeof(struct sockaddr_in);
  562. case PF_INET6:
  563. return sizeof(struct sockaddr_in6);
  564. case PF_IB:
  565. return af_ib_support ? sizeof(struct sockaddr_ib) : 0;
  566. default:
  567. return 0;
  568. }
  569. }
  570.  
  571. static int ucma_query_addr(struct rdma_cm_id *id)
  572. {
  573. struct ucma_abi_query_addr_resp resp;
  574. struct ucma_abi_query cmd;
  575. struct cma_id_private *id_priv;
  576. int ret;
  577.  
  578. CMA_INIT_CMD_RESP(&cmd, sizeof cmd, QUERY, &resp, sizeof resp);
  579. id_priv = container_of(id, struct cma_id_private, id);
  580. cmd.id = id_priv->handle;
  581. cmd.option = UCMA_QUERY_ADDR;
  582.  
  583. ret = ns_write(id->channel->fd, &cmd, sizeof cmd);
  584. if (ret != sizeof cmd)
  585. return (ret >= 0) ? ERR(ENODATA) : ERR(-ret);
  586.  
  587. VALGRIND_MAKE_MEM_DEFINED(&resp, sizeof resp);
  588.  
  589. memcpy(&id->route.addr.p1.src_addr, &resp.src_addr, resp.src_size);
  590. memcpy(&id->route.addr.p2.dst_addr, &resp.dst_addr, resp.dst_size);
  591.  
  592. if (!id_priv->cma_dev && resp.node_guid) {
  593. ret = ucma_get_device(id_priv, resp.node_guid);
  594. if (ret)
  595. return ret;
  596. id->port_num = resp.port_num;
  597. id->route.addr.addr.ibaddr.pkey = resp.pkey;
  598. }
  599.  
  600. return 0;
  601. }
  602.  
  603. static int ucma_query_gid(struct rdma_cm_id *id)
  604. {
  605. struct ucma_abi_query_addr_resp resp;
  606. struct ucma_abi_query cmd;
  607. struct cma_id_private *id_priv;
  608. struct sockaddr_ib *sib;
  609. int ret;
  610.  
  611. CMA_INIT_CMD_RESP(&cmd, sizeof cmd, QUERY, &resp, sizeof resp);
  612. id_priv = container_of(id, struct cma_id_private, id);
  613. cmd.id = id_priv->handle;
  614. cmd.option = UCMA_QUERY_GID;
  615.  
  616. ret = ns_write(id->channel->fd, &cmd, sizeof cmd);
  617. if (ret != sizeof cmd)
  618. return (ret >= 0) ? ERR(ENODATA) : ERR(-ret);
  619.  
  620. VALGRIND_MAKE_MEM_DEFINED(&resp, sizeof resp);
  621.  
  622. sib = (struct sockaddr_ib *) &resp.src_addr;
  623. memcpy(id->route.addr.addr.ibaddr.sgid.raw, sib->sib_addr.sib_raw,
  624. sizeof id->route.addr.addr.ibaddr.sgid);
  625.  
  626. sib = (struct sockaddr_ib *) &resp.dst_addr;
  627. memcpy(id->route.addr.addr.ibaddr.dgid.raw, sib->sib_addr.sib_raw,
  628. sizeof id->route.addr.addr.ibaddr.dgid);
  629.  
  630. return 0;
  631. }
  632.  
  633. static void ucma_convert_path(struct ibv_path_data *path_data,
  634. struct ibv_sa_path_rec *sa_path)
  635. {
  636. uint32_t fl_hop;
  637.  
  638. sa_path->dgid = path_data->path.dgid;
  639. sa_path->sgid = path_data->path.sgid;
  640. sa_path->dlid = path_data->path.dlid;
  641. sa_path->slid = path_data->path.slid;
  642. sa_path->raw_traffic = 0;
  643.  
  644. fl_hop = ntohl(path_data->path.flowlabel_hoplimit);
  645. sa_path->flow_label = htonl(fl_hop >> 8);
  646. sa_path->hop_limit = (uint8_t) fl_hop;
  647.  
  648. sa_path->traffic_class = path_data->path.tclass;
  649. sa_path->reversible = path_data->path.reversible_numpath >> 7;
  650. sa_path->numb_path = 1;
  651. sa_path->pkey = path_data->path.pkey;
  652. sa_path->sl = ntohs(path_data->path.qosclass_sl) & 0xF;
  653. sa_path->mtu_selector = 1;
  654. sa_path->mtu = path_data->path.mtu & 0x1F;
  655. sa_path->rate_selector = 1;
  656. sa_path->rate = path_data->path.rate & 0x1F;
  657. sa_path->packet_life_time_selector = 1;
  658. sa_path->packet_life_time = path_data->path.packetlifetime & 0x1F;
  659.  
  660. sa_path->preference = (uint8_t) path_data->flags;
  661. }
  662.  
  663. static int ucma_query_path(struct rdma_cm_id *id)
  664. {
  665. struct ucma_abi_query_path_resp *resp;
  666. struct ucma_abi_query cmd;
  667. struct cma_id_private *id_priv;
  668. int ret, i, size;
  669.  
  670. size = sizeof(*resp) + sizeof(struct ibv_path_data) * 6;
  671. resp = alloca(size);
  672. if (!resp)
  673. return ERR(ENOMEM);
  674.  
  675. CMA_INIT_CMD_RESP(&cmd, sizeof cmd, QUERY, resp, size);
  676. id_priv = container_of(id, struct cma_id_private, id);
  677. cmd.id = id_priv->handle;
  678. cmd.option = UCMA_QUERY_PATH;
  679.  
  680. ret = ns_write(id->channel->fd, &cmd, sizeof cmd);
  681. if (ret != sizeof cmd)
  682. return (ret >= 0) ? ERR(ENODATA) : ERR(-ret);
  683.  
  684. VALGRIND_MAKE_MEM_DEFINED(resp, size);
  685.  
  686. if (resp->num_paths) {
  687. id->route.path_rec = malloc(sizeof(*id->route.path_rec) *
  688. resp->num_paths);
  689. if (!id->route.path_rec)
  690. return ERR(ENOMEM);
  691.  
  692. id->route.num_paths = resp->num_paths;
  693. for (i = 0; i < resp->num_paths; i++)
  694. ucma_convert_path(&resp->path_data[i], &id->route.path_rec[i]);
  695. }
  696.  
  697. return 0;
  698. }
  699.  
  700. static int ucma_query_route(struct rdma_cm_id *id)
  701. {
  702. struct ucma_abi_query_route_resp resp;
  703. struct ucma_abi_query cmd;
  704. struct cma_id_private *id_priv;
  705. int ret, i;
  706.  
  707. CMA_INIT_CMD_RESP(&cmd, sizeof cmd, QUERY_ROUTE, &resp, sizeof resp);
  708. id_priv = container_of(id, struct cma_id_private, id);
  709. cmd.id = id_priv->handle;
  710.  
  711. ret = ns_write(id->channel->fd, &cmd, sizeof cmd);
  712. if (ret != sizeof cmd)
  713. return (ret >= 0) ? ERR(ENODATA) : ERR(-ret);
  714.  
  715. VALGRIND_MAKE_MEM_DEFINED(&resp, sizeof resp);
  716.  
  717. if (resp.num_paths) {
  718. id->route.path_rec = malloc(sizeof *id->route.path_rec *
  719. resp.num_paths);
  720. if (!id->route.path_rec)
  721. return ERR(ENOMEM);
  722.  
  723. id->route.num_paths = resp.num_paths;
  724. for (i = 0; i < resp.num_paths; i++)
  725. ibv_copy_path_rec_from_kern(&id->route.path_rec[i],
  726. &resp.ib_route[i]);
  727. }
  728.  
  729. memcpy(id->route.addr.addr.ibaddr.sgid.raw, resp.ib_route[0].sgid,
  730. sizeof id->route.addr.addr.ibaddr.sgid);
  731. memcpy(id->route.addr.addr.ibaddr.dgid.raw, resp.ib_route[0].dgid,
  732. sizeof id->route.addr.addr.ibaddr.dgid);
  733. id->route.addr.addr.ibaddr.pkey = resp.ib_route[0].pkey;
  734. memcpy(&id->route.addr.p1.src_addr, &resp.src_addr,
  735. sizeof resp.src_addr);
  736. memcpy(&id->route.addr.p2.dst_addr, &resp.dst_addr,
  737. sizeof resp.dst_addr);
  738.  
  739. if (!id_priv->cma_dev && resp.node_guid) {
  740. ret = ucma_get_device(id_priv, resp.node_guid);
  741. if (ret)
  742. return ret;
  743. id_priv->id.port_num = resp.port_num;
  744. }
  745.  
  746. return 0;
  747. }
  748.  
  749. static int rdma_bind_addr2(struct rdma_cm_id *id, struct sockaddr *addr,
  750. socklen_t addrlen)
  751. {
  752. struct ucma_abi_bind cmd;
  753. struct cma_id_private *id_priv;
  754. int ret;
  755.  
  756. CMA_INIT_CMD(&cmd, sizeof cmd, BIND);
  757. id_priv = container_of(id, struct cma_id_private, id);
  758. cmd.id = id_priv->handle;
  759. cmd.addr_size = addrlen;
  760. memcpy(&cmd.addr, addr, addrlen);
  761.  
  762. ret = ns_write(id->channel->fd, &cmd, sizeof cmd);
  763. if (ret != sizeof cmd)
  764. return (ret >= 0) ? ERR(ENODATA) : ERR(-ret);
  765.  
  766. return ucma_query_addr(id);
  767. }
  768.  
  769. int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
  770. {
  771. struct ucma_abi_bind_ip cmd;
  772. struct cma_id_private *id_priv;
  773. int ret, addrlen;
  774.  
  775. addrlen = ucma_addrlen(addr);
  776. if (!addrlen)
  777. return ERR(EINVAL);
  778.  
  779. if (af_ib_support)
  780. return rdma_bind_addr2(id, addr, addrlen);
  781.  
  782. CMA_INIT_CMD(&cmd, sizeof cmd, BIND_IP);
  783. id_priv = container_of(id, struct cma_id_private, id);
  784. cmd.id = id_priv->handle;
  785. memcpy(&cmd.addr, addr, addrlen);
  786.  
  787. ret = ns_write(id->channel->fd, &cmd, sizeof cmd);
  788. if (ret != sizeof cmd) {
  789. return (ret >= 0) ? ERR(ENODATA) : ERR(-ret);
  790. }
  791.  
  792. return ucma_query_route(id);
  793. }
  794.  
  795. int ucma_complete(struct rdma_cm_id *id)
  796. {
  797. struct cma_id_private *id_priv;
  798. int ret;
  799.  
  800. id_priv = container_of(id, struct cma_id_private, id);
  801. if (!id_priv->sync)
  802. return 0;
  803.  
  804. if (id_priv->id.event) {
  805. rdma_ack_cm_event(id_priv->id.event);
  806. id_priv->id.event = NULL;
  807. }
  808.  
  809. ret = rdma_get_cm_event(id_priv->id.channel, &id_priv->id.event);
  810. if (ret)
  811. return ret;
  812.  
  813. if (id_priv->id.event->status) {
  814. if (id_priv->id.event->event == RDMA_CM_EVENT_REJECTED)
  815. ret = ERR(ECONNREFUSED);
  816. else if (id_priv->id.event->status < 0)
  817. ret = ERR(id_priv->id.event->status); // NonStop change, errors are positive
  818. else
  819. ret = ERR(id_priv->id.event->status); // same as above
  820. }
  821. return ret;
  822. }
  823.  
  824. static int rdma_resolve_addr2(struct rdma_cm_id *id, struct sockaddr *src_addr,
  825. socklen_t src_len, struct sockaddr *dst_addr,
  826. socklen_t dst_len, int timeout_ms)
  827. {
  828. struct ucma_abi_resolve_addr cmd;
  829. struct cma_id_private *id_priv;
  830. int ret;
  831.  
  832. CMA_INIT_CMD(&cmd, sizeof cmd, RESOLVE_ADDR);
  833. id_priv = container_of(id, struct cma_id_private, id);
  834. cmd.id = id_priv->handle;
  835. if ((cmd.src_size = src_len))
  836. memcpy(&cmd.src_addr, src_addr, src_len);
  837. memcpy(&cmd.dst_addr, dst_addr, dst_len);
  838. cmd.dst_size = dst_len;
  839. cmd.timeout_ms = timeout_ms;
  840.  
  841. ret = ns_write(id->channel->fd, &cmd, sizeof cmd);
  842. if (ret != sizeof cmd)
  843. return (ret >= 0) ? ERR(ENODATA) : ERR(-ret);
  844.  
  845. memcpy(&id->route.addr.p2.dst_addr, dst_addr, dst_len);
  846. return ucma_complete(id);
  847. }
  848.  
  849. int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
  850. struct sockaddr *dst_addr, int timeout_ms)
  851. {
  852. struct ucma_abi_resolve_ip cmd;
  853. struct cma_id_private *id_priv;
  854. int ret, dst_len, src_len;
  855.  
  856. dst_len = ucma_addrlen(dst_addr);
  857. if (!dst_len)
  858. return ERR(EINVAL);
  859.  
  860. src_len = ucma_addrlen(src_addr);
  861. if (src_addr && !src_len)
  862. return ERR(EINVAL);
  863.  
  864. if (af_ib_support)
  865. return rdma_resolve_addr2(id, src_addr, src_len, dst_addr,
  866. dst_len, timeout_ms);
  867.  
  868. CMA_INIT_CMD(&cmd, sizeof cmd, RESOLVE_IP);
  869. id_priv = container_of(id, struct cma_id_private, id);
  870. cmd.id = id_priv->handle;
  871. if (src_addr)
  872. memcpy(&cmd.src_addr, src_addr, src_len);
  873. memcpy(&cmd.dst_addr, dst_addr, dst_len);
  874. cmd.timeout_ms = timeout_ms;
  875.  
  876. ret = ns_write(id->channel->fd, &cmd, sizeof cmd);
  877. if (ret != sizeof cmd)
  878. return (ret >= 0) ? ERR(ENODATA) : ERR(-ret);
  879.  
  880. memcpy(&id->route.addr.p2.dst_addr, dst_addr, dst_len);
  881. return ucma_complete(id);
  882. }
  883.  
  884. static int ucma_set_ib_route(struct rdma_cm_id *id)
  885. {
  886. struct rdma_addrinfo hint, *rai;
  887. int ret;
  888.  
  889. memset(&hint, 0, sizeof hint);
  890. hint.ai_flags = RAI_ROUTEONLY;
  891. hint.ai_family = id->route.addr.p1.src_addr.sa_family;
  892. hint.ai_src_len = ucma_addrlen((struct sockaddr *) &id->route.addr.p1.src_addr);
  893. hint.ai_src_addr = &id->route.addr.p1.src_addr;
  894. hint.ai_dst_len = ucma_addrlen((struct sockaddr *) &id->route.addr.p2.dst_addr);
  895. hint.ai_dst_addr = &id->route.addr.p2.dst_addr;
  896.  
  897. ret = rdma_getaddrinfo(NULL, NULL, &hint, &rai);
  898. if (ret)
  899. return ret;
  900.  
  901. if (rai->ai_route_len)
  902. ret = rdma_set_option(id, RDMA_OPTION_IB, RDMA_OPTION_IB_PATH,
  903. rai->ai_route, rai->ai_route_len);
  904. else
  905. ret = -1;
  906.  
  907. rdma_freeaddrinfo(rai);
  908. return ret;
  909. }
  910.  
  911. int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms)
  912. {
  913. struct ucma_abi_resolve_route cmd;
  914. struct cma_id_private *id_priv;
  915. int ret;
  916.  
  917. id_priv = container_of(id, struct cma_id_private, id);
  918. if (id->verbs->device->transport_type == IBV_TRANSPORT_IB) {
  919. ret = ucma_set_ib_route(id);
  920. if (!ret)
  921. goto out;
  922. }
  923.  
  924. CMA_INIT_CMD(&cmd, sizeof cmd, RESOLVE_ROUTE);
  925. cmd.id = id_priv->handle;
  926. cmd.timeout_ms = timeout_ms;
  927.  
  928. ret = ns_write(id->channel->fd, &cmd, sizeof cmd);
  929. if (ret != sizeof cmd)
  930. return (ret >= 0) ? ERR(ENODATA) : ERR(-ret);
  931.  
  932. out:
  933. return ucma_complete(id);
  934. }
  935.  
  936. static int ucma_is_ud_qp(enum ibv_qp_type qp_type)
  937. {
  938. return (qp_type == IBV_QPT_UD);
  939. }
  940.  
  941. static int rdma_init_qp_attr(struct rdma_cm_id *id, struct ibv_qp_attr *qp_attr,
  942. int *qp_attr_mask)
  943. {
  944. struct ucma_abi_init_qp_attr cmd;
  945. struct ibv_kern_qp_attr resp;
  946. struct cma_id_private *id_priv;
  947. int ret;
  948.  
  949. CMA_INIT_CMD_RESP(&cmd, sizeof cmd, INIT_QP_ATTR, &resp, sizeof resp);
  950. id_priv = container_of(id, struct cma_id_private, id);
  951. cmd.id = id_priv->handle;
  952. cmd.qp_state = qp_attr->qp_state;
  953.  
  954. ret = ns_write(id->channel->fd, &cmd, sizeof cmd);
  955. if (ret != sizeof cmd)
  956. return (ret >= 0) ? ERR(ENODATA) : ERR(-ret);
  957.  
  958. VALGRIND_MAKE_MEM_DEFINED(&resp, sizeof resp);
  959.  
  960. ibv_copy_qp_attr_from_kern(qp_attr, &resp);
  961. *qp_attr_mask = resp.qp_attr_mask;
  962. return 0;
  963. }
  964.  
  965. static int ucma_modify_qp_rtr(struct rdma_cm_id *id, uint8_t resp_res)
  966. {
  967. struct ibv_qp_attr qp_attr;
  968. int qp_attr_mask, ret;
  969.  
  970. if (!id->qp)
  971. return ERR(EINVAL);
  972.  
  973. /* Need to update QP attributes from default values. */
  974. qp_attr.qp_state = IBV_QPS_INIT;
  975. ret = rdma_init_qp_attr(id, &qp_attr, &qp_attr_mask);
  976. if (ret)
  977. return ret;
  978.  
  979. ret = ibv_modify_qp(id->qp, &qp_attr, qp_attr_mask);
  980. if (ret)
  981. return ERR(ret);
  982.  
  983. qp_attr.qp_state = IBV_QPS_RTR;
  984. ret = rdma_init_qp_attr(id, &qp_attr, &qp_attr_mask);
  985. if (ret)
  986. return ret;
  987.  
  988. if (resp_res != RDMA_MAX_RESP_RES)
  989. qp_attr.max_dest_rd_atomic = resp_res;
  990. return rdma_seterrno(ibv_modify_qp(id->qp, &qp_attr, qp_attr_mask));
  991. }
  992.  
  993. static int ucma_modify_qp_rts(struct rdma_cm_id *id, uint8_t init_depth)
  994. {
  995. struct ibv_qp_attr qp_attr;
  996. int qp_attr_mask, ret;
  997.  
  998. qp_attr.qp_state = IBV_QPS_RTS;
  999. ret = rdma_init_qp_attr(id, &qp_attr, &qp_attr_mask);
  1000. if (ret)
  1001. return ret;
  1002.  
  1003. if (init_depth != RDMA_MAX_INIT_DEPTH)
  1004. qp_attr.max_rd_atomic = init_depth;
  1005. return rdma_seterrno(ibv_modify_qp(id->qp, &qp_attr, qp_attr_mask));
  1006. }
  1007.  
  1008. static int ucma_modify_qp_sqd(struct rdma_cm_id *id)
  1009. {
  1010. struct ibv_qp_attr qp_attr;
  1011.  
  1012. if (!id->qp)
  1013. return 0;
  1014.  
  1015. qp_attr.qp_state = IBV_QPS_SQD;
  1016. return rdma_seterrno(ibv_modify_qp(id->qp, &qp_attr, IBV_QP_STATE));
  1017. }
  1018.  
  1019. static int ucma_modify_qp_err(struct rdma_cm_id *id)
  1020. {
  1021. struct ibv_qp_attr qp_attr;
  1022.  
  1023. if (!id->qp)
  1024. return 0;
  1025.  
  1026. VALGRIND_MAKE_MEM_DEFINED(&qp_attr, sizeof(qp_attr));
  1027.  
  1028. qp_attr.qp_state = IBV_QPS_ERR;
  1029. return rdma_seterrno(ibv_modify_qp(id->qp, &qp_attr, IBV_QP_STATE));
  1030. }
  1031.  
  1032. static int ucma_find_pkey(struct cma_device *cma_dev, uint8_t port_num,
  1033. uint16_t pkey, uint16_t *pkey_index)
  1034. {
  1035. int ret, i;
  1036. uint16_t chk_pkey;
  1037.  
  1038. for (i = 0, ret = 0; !ret; i++) {
  1039. ret = ibv_query_pkey(cma_dev->verbs, port_num, i, &chk_pkey);
  1040. if (!ret && pkey == chk_pkey) {
  1041. *pkey_index = (uint16_t) i;
  1042. return 0;
  1043. }
  1044. }
  1045. return ERR(EINVAL);
  1046. }
  1047.  
  1048. static int ucma_init_conn_qp3(struct cma_id_private *id_priv, struct ibv_qp *qp)
  1049. {
  1050. struct ibv_qp_attr qp_attr;
  1051. int ret;
  1052.  
  1053. ret = ucma_find_pkey(id_priv->cma_dev, id_priv->id.port_num,
  1054. id_priv->id.route.addr.addr.ibaddr.pkey,
  1055. &qp_attr.pkey_index);
  1056. if (ret)
  1057. return ret;
  1058.  
  1059. qp_attr.port_num = id_priv->id.port_num;
  1060. qp_attr.qp_state = IBV_QPS_INIT;
  1061. qp_attr.qp_access_flags = 0;
  1062.  
  1063. ret = ibv_modify_qp(qp, &qp_attr, IBV_QP_STATE | IBV_QP_ACCESS_FLAGS |
  1064. IBV_QP_PKEY_INDEX | IBV_QP_PORT);
  1065. return rdma_seterrno(ret);
  1066. }
  1067.  
  1068. static int ucma_init_conn_qp(struct cma_id_private *id_priv, struct ibv_qp *qp)
  1069. {
  1070. struct ibv_qp_attr qp_attr;
  1071. int qp_attr_mask, ret;
  1072.  
  1073. if (abi_ver == 3)
  1074. return ucma_init_conn_qp3(id_priv, qp);
  1075.  
  1076. qp_attr.qp_state = IBV_QPS_INIT;
  1077. ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
  1078. if (ret)
  1079. return ret;
  1080.  
  1081. return rdma_seterrno(ibv_modify_qp(qp, &qp_attr, qp_attr_mask));
  1082. }
  1083.  
  1084. static int ucma_init_ud_qp3(struct cma_id_private *id_priv, struct ibv_qp *qp)
  1085. {
  1086. struct ibv_qp_attr qp_attr;
  1087. int ret;
  1088.  
  1089. ret = ucma_find_pkey(id_priv->cma_dev, id_priv->id.port_num,
  1090. id_priv->id.route.addr.addr.ibaddr.pkey,
  1091. &qp_attr.pkey_index);
  1092. if (ret)
  1093. return ret;
  1094.  
  1095. qp_attr.port_num = id_priv->id.port_num;
  1096. qp_attr.qp_state = IBV_QPS_INIT;
  1097. qp_attr.qkey = RDMA_UDP_QKEY;
  1098.  
  1099. ret = ibv_modify_qp(qp, &qp_attr, IBV_QP_STATE | IBV_QP_QKEY |
  1100. IBV_QP_PKEY_INDEX | IBV_QP_PORT);
  1101. if (ret)
  1102. return ERR(ret);
  1103.  
  1104. qp_attr.qp_state = IBV_QPS_RTR;
  1105. ret = ibv_modify_qp(qp, &qp_attr, IBV_QP_STATE);
  1106. if (ret)
  1107. return ERR(ret);
  1108.  
  1109. qp_attr.qp_state = IBV_QPS_RTS;
  1110. qp_attr.sq_psn = 0;
  1111. ret = ibv_modify_qp(qp, &qp_attr, IBV_QP_STATE | IBV_QP_SQ_PSN);
  1112. return rdma_seterrno(ret);
  1113. }
  1114.  
  1115. static int ucma_init_ud_qp(struct cma_id_private *id_priv, struct ibv_qp *qp)
  1116. {
  1117. struct ibv_qp_attr qp_attr;
  1118. int qp_attr_mask, ret;
  1119.  
  1120. if (abi_ver == 3)
  1121. return ucma_init_ud_qp3(id_priv, qp);
  1122.  
  1123. qp_attr.qp_state = IBV_QPS_INIT;
  1124. ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
  1125. if (ret)
  1126. return ret;
  1127.  
  1128. ret = ibv_modify_qp(qp, &qp_attr, qp_attr_mask);
  1129. if (ret)
  1130. return ERR(ret);
  1131.  
  1132. qp_attr.qp_state = IBV_QPS_RTR;
  1133. ret = ibv_modify_qp(qp, &qp_attr, IBV_QP_STATE);
  1134. if (ret)
  1135. return ERR(ret);
  1136.  
  1137. qp_attr.qp_state = IBV_QPS_RTS;
  1138. qp_attr.sq_psn = 0;
  1139. ret = ibv_modify_qp(qp, &qp_attr, IBV_QP_STATE | IBV_QP_SQ_PSN);
  1140. return rdma_seterrno(ret);
  1141. }
  1142.  
  1143. static void ucma_destroy_cqs(struct rdma_cm_id *id)
  1144. {
  1145. if (id->recv_cq)
  1146. ibv_destroy_cq(id->recv_cq);
  1147.  
  1148. if (id->recv_cq_channel)
  1149. ibv_destroy_comp_channel(id->recv_cq_channel);
  1150.  
  1151. if (id->send_cq && (id->send_cq != id->recv_cq))
  1152. ibv_destroy_cq(id->send_cq);
  1153.  
  1154. if (id->send_cq_channel && (id->send_cq_channel != id->recv_cq_channel))
  1155. ibv_destroy_comp_channel(id->send_cq_channel);
  1156. }
  1157.  
  1158. static int ucma_create_cqs(struct rdma_cm_id *id, uint32_t send_size, uint32_t recv_size)
  1159. {
  1160. if (recv_size) {
  1161. id->recv_cq_channel = ibv_create_comp_channel(id->verbs);
  1162. if (!id->recv_cq_channel)
  1163. goto err;
  1164.  
  1165. id->recv_cq = ibv_create_cq(id->verbs, recv_size,
  1166. id, id->recv_cq_channel, 0);
  1167. if (!id->recv_cq)
  1168. goto err;
  1169. }
  1170.  
  1171. if (send_size) {
  1172. id->send_cq_channel = ibv_create_comp_channel(id->verbs);
  1173. if (!id->send_cq_channel)
  1174. goto err;
  1175.  
  1176. id->send_cq = ibv_create_cq(id->verbs, send_size,
  1177. id, id->send_cq_channel, 0);
  1178. if (!id->send_cq)
  1179. goto err;
  1180. }
  1181.  
  1182. return 0;
  1183. err:
  1184. ucma_destroy_cqs(id);
  1185. return ERR(ENOMEM);
  1186. }
  1187.  
  1188. int rdma_create_srq(struct rdma_cm_id *id, struct ibv_pd *pd,
  1189. struct ibv_srq_init_attr *attr)
  1190. {
  1191. //struct cma_id_private *id_priv;
  1192. struct ibv_srq *srq;
  1193. int ret;
  1194.  
  1195. //id_priv = container_of(id, struct cma_id_private, id);
  1196. if (!pd)
  1197. pd = id->pd;
  1198.  
  1199. #ifdef IBV_XRC_OPS
  1200. if (attr->srq_type == IBV_SRQT_XRC) {
  1201. if (!attr->ext.xrc.cq) {
  1202. ret = ucma_create_cqs(id, 0, attr->attr.max_wr);
  1203. if (ret)
  1204. return ret;
  1205.  
  1206. attr->ext.xrc.cq = id->recv_cq;
  1207. }
  1208. }
  1209.  
  1210. srq = ibv_create_xsrq(pd, attr);
  1211. #else
  1212. srq = ibv_create_srq(pd, attr);
  1213. #endif
  1214. if (!srq) {
  1215. ret = -1;
  1216. goto err;
  1217. }
  1218.  
  1219. id->pd = pd;
  1220. id->srq = srq;
  1221. return 0;
  1222. err:
  1223. ucma_destroy_cqs(id);
  1224. return ret;
  1225. }
  1226.  
  1227. void rdma_destroy_srq(struct rdma_cm_id *id)
  1228. {
  1229. ibv_destroy_srq(id->srq);
  1230. if (!id->qp)
  1231. ucma_destroy_cqs(id);
  1232. id->srq = NULL;
  1233. }
  1234.  
  1235. int rdma_create_qp(struct rdma_cm_id *id, struct ibv_pd *pd,
  1236. struct ibv_qp_init_attr *qp_init_attr)
  1237. {
  1238. struct cma_id_private *id_priv;
  1239. struct ibv_qp *qp;
  1240. int ret;
  1241.  
  1242. if (id->qp)
  1243. return ERR(EINVAL);
  1244.  
  1245. id_priv = container_of(id, struct cma_id_private, id);
  1246. if (!pd)
  1247. pd = id->pd;
  1248. else if (id->verbs != pd->context)
  1249. return ERR(EINVAL);
  1250.  
  1251. ret = ucma_create_cqs(id, qp_init_attr->send_cq ? 0 : qp_init_attr->cap.max_send_wr,
  1252. qp_init_attr->recv_cq ? 0 : qp_init_attr->cap.max_recv_wr);
  1253. if (ret)
  1254. return ret;
  1255.  
  1256. if (!qp_init_attr->send_cq)
  1257. qp_init_attr->send_cq = id->send_cq;
  1258. if (!qp_init_attr->recv_cq)
  1259. qp_init_attr->recv_cq = id->recv_cq;
  1260. qp = ibv_create_qp(pd, qp_init_attr);
  1261. if (!qp) {
  1262. ret = ERR(ENOMEM);
  1263. goto err1;
  1264. }
  1265.  
  1266. if (ucma_is_ud_qp(id->qp_type))
  1267. ret = ucma_init_ud_qp(id_priv, qp);
  1268. else
  1269. ret = ucma_init_conn_qp(id_priv, qp);
  1270. if (ret)
  1271. goto err2;
  1272.  
  1273. id->pd = pd;
  1274. id->qp = qp;
  1275.  
  1276. //
  1277. // NonStop Add, save port number (fabric) in IB QP
  1278. // used for stats.
  1279. //
  1280. {
  1281. struct ibv_qp_attr attr1 ;
  1282. struct ibv_qp_init_attr init_attr ;
  1283. struct verbs_qp *v_qp = container_of(qp, struct verbs_qp, qp);
  1284. struct process_qp_info *info = v_qp->ns_qp_info; // get pointer to the qp stats
  1285.  
  1286. if(info != NULL) {
  1287. if (ibv_query_qp(qp,&attr1,IBV_QP_PORT,&init_attr) == 0) {
  1288. info->port = attr1.port_num - 1 ;
  1289. } else {
  1290. info->port = 0 ;
  1291. }
  1292. } // End if info
  1293. }
  1294.  
  1295. return 0;
  1296. err2:
  1297. ibv_destroy_qp(qp);
  1298. err1:
  1299. ucma_destroy_cqs(id);
  1300. return ret;
  1301. }
  1302.  
  1303. void rdma_destroy_qp(struct rdma_cm_id *id)
  1304. {
  1305. ibv_destroy_qp(id->qp);
  1306. ucma_destroy_cqs(id);
  1307. id->qp = NULL;
  1308. }
  1309.  
  1310. static int ucma_valid_param(struct cma_id_private *id_priv,
  1311. struct rdma_conn_param *param)
  1312. {
  1313. if (id_priv->id.ps != RDMA_PS_TCP)
  1314. return 0;
  1315.  
  1316. if (!id_priv->id.qp && !param)
  1317. goto err;
  1318.  
  1319. if (!param)
  1320. return 0;
  1321.  
  1322. if ((param->responder_resources != RDMA_MAX_RESP_RES) &&
  1323. (param->responder_resources > id_priv->cma_dev->max_responder_resources))
  1324. goto err;
  1325.  
  1326. if ((param->initiator_depth != RDMA_MAX_INIT_DEPTH) &&
  1327. (param->initiator_depth > id_priv->cma_dev->max_initiator_depth))
  1328. goto err;
  1329.  
  1330. return 0;
  1331. err:
  1332. return ERR(EINVAL);
  1333. }
  1334.  
  1335. static void ucma_copy_conn_param_to_kern(struct cma_id_private *id_priv,
  1336. struct ucma_abi_conn_param *dst,
  1337. struct rdma_conn_param *src,
  1338. uint32_t qp_num, uint8_t srq)
  1339. {
  1340. dst->qp_num = qp_num;
  1341. dst->srq = srq;
  1342. dst->responder_resources = id_priv->responder_resources;
  1343. dst->initiator_depth = id_priv->initiator_depth;
  1344. dst->valid = 1;
  1345.  
  1346. if (id_priv->connect_len) {
  1347. memcpy(dst->private_data, id_priv->connect, id_priv->connect_len);
  1348. dst->private_data_len = id_priv->connect_len;
  1349. }
  1350.  
  1351. if (src) {
  1352. dst->flow_control = src->flow_control;
  1353. dst->retry_count = src->retry_count;
  1354. dst->rnr_retry_count = src->rnr_retry_count;
  1355.  
  1356. if (src->private_data && src->private_data_len) {
  1357. memcpy(dst->private_data + dst->private_data_len,
  1358. src->private_data, src->private_data_len);
  1359. dst->private_data_len += src->private_data_len;
  1360. }
  1361. } else {
  1362. dst->retry_count = 7;
  1363. dst->rnr_retry_count = 7;
  1364. }
  1365. }
  1366.  
  1367. int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
  1368. {
  1369. struct ucma_abi_connect cmd;
  1370. struct cma_id_private *id_priv;
  1371. int ret;
  1372.  
  1373. if(g_trace_level >= TRACE_LEVEL_FIVE) uib_trace_entry_point(RDMA_CONN);
  1374.  
  1375.  
  1376. id_priv = container_of(id, struct cma_id_private, id);
  1377. ret = ucma_valid_param(id_priv, conn_param);
  1378. if (ret)
  1379. return ret;
  1380.  
  1381. if((!nsadi_enabled)&&
  1382. (((id->route.addr.p2.dst_addr.sa_data[3] & 0x7f)!= 1)||
  1383. (id->route.addr.p2.dst_addr.sa_data[4]
  1384. != id->route.addr.p1.src_addr.sa_data[4]))){
  1385. //fprintf(stderr, PFX "Error: NSADI License not enabled\n");
  1386. uib_trace(RDMA_CONN, LICENSE_ERROR, TRACE_LEVEL_ONE, UIB_NO_QPN,
  1387. -1, ENOTSUP, TRACE_ERROR, UIB_NO_INSTANCE, UIB_NO_TRACE_DATA, NULL);
  1388. return ERR(ENOTSUP);
  1389. }
  1390.  
  1391. if (conn_param && conn_param->initiator_depth != RDMA_MAX_INIT_DEPTH)
  1392. id_priv->initiator_depth = conn_param->initiator_depth;
  1393. else
  1394. id_priv->initiator_depth = id_priv->cma_dev->max_initiator_depth;
  1395. if (conn_param && conn_param->responder_resources != RDMA_MAX_RESP_RES)
  1396. id_priv->responder_resources = conn_param->responder_resources;
  1397. else
  1398. id_priv->responder_resources = id_priv->cma_dev->max_responder_resources;
  1399.  
  1400. CMA_INIT_CMD(&cmd, sizeof cmd, CONNECT);
  1401. cmd.id = id_priv->handle;
  1402. if (id->qp) {
  1403. ucma_copy_conn_param_to_kern(id_priv, &cmd.conn_param,
  1404. conn_param, id->qp->qp_num,
  1405. (id->qp->srq != NULL));
  1406. } else if (conn_param) {
  1407. ucma_copy_conn_param_to_kern(id_priv, &cmd.conn_param,
  1408. conn_param, conn_param->qp_num,
  1409. conn_param->srq);
  1410. } else {
  1411. ucma_copy_conn_param_to_kern(id_priv, &cmd.conn_param,
  1412. conn_param, 0, 0);
  1413. }
  1414.  
  1415. ret = ns_write(id->channel->fd, &cmd, sizeof cmd);
  1416. if (ret != sizeof cmd)
  1417. return (ret >= 0) ? ERR(ENODATA) : ERR(-ret);
  1418.  
  1419. if (id_priv->connect_len) {
  1420. free(id_priv->connect);
  1421. id_priv->connect_len = 0;
  1422. }
  1423.  
  1424. return ucma_complete(id);
  1425. }
  1426.  
  1427. int rdma_listen(struct rdma_cm_id *id, int backlog)
  1428. {
  1429. struct ucma_abi_listen cmd;
  1430. struct cma_id_private *id_priv;
  1431. int ret;
  1432.  
  1433. CMA_INIT_CMD(&cmd, sizeof cmd, LISTEN);
  1434. id_priv = container_of(id, struct cma_id_private, id);
  1435. cmd.id = id_priv->handle;
  1436. cmd.backlog = backlog;
  1437.  
  1438. ret = ns_write(id->channel->fd, &cmd, sizeof cmd);
  1439. if (ret != sizeof cmd)
  1440. return (ret >= 0) ? ERR(ENODATA) : ERR(-ret);
  1441.  
  1442. if (af_ib_support)
  1443. return ucma_query_addr(id);
  1444. else
  1445. return ucma_query_route(id);
  1446. }
  1447.  
  1448. int rdma_get_request(struct rdma_cm_id *listen, struct rdma_cm_id **id)
  1449. {
  1450. struct cma_id_private *id_priv;
  1451. struct rdma_cm_event *event;
  1452. int ret;
  1453.  
  1454. id_priv = container_of(listen, struct cma_id_private, id);
  1455. if (!id_priv->sync)
  1456. return ERR(EINVAL);
  1457.  
  1458. if (listen->event) {
  1459. rdma_ack_cm_event(listen->event);
  1460. listen->event = NULL;
  1461. }
  1462.  
  1463. ret = rdma_get_cm_event(listen->channel, &event);
  1464. if (ret) {
  1465. return ret;
  1466. }
  1467.  
  1468. if (event->status) {
  1469. ret = ERR(event->status);
  1470. goto err;
  1471. }
  1472.  
  1473. if (event->event != RDMA_CM_EVENT_CONNECT_REQUEST) {
  1474. ret = ERR(EINVAL);
  1475. goto err;
  1476. }
  1477.  
  1478. if (id_priv->qp_init_attr) {
  1479. struct ibv_qp_init_attr attr;
  1480.  
  1481. attr = *id_priv->qp_init_attr;
  1482. ret = rdma_create_qp(event->id, listen->pd, &attr);
  1483. if (ret)
  1484. goto err;
  1485. }
  1486.  
  1487. *id = event->id;
  1488. (*id)->event = event;
  1489. return 0;
  1490.  
  1491. err:
  1492. listen->event = event;
  1493. return ret;
  1494. }
  1495.  
  1496. int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
  1497. {
  1498. struct ucma_abi_accept cmd;
  1499. struct cma_id_private *id_priv;
  1500. int ret;
  1501.  
  1502. if(g_trace_level >= TRACE_LEVEL_FIVE) uib_trace_entry_point(RDMA_ACCEPT);
  1503.  
  1504. id_priv = container_of(id, struct cma_id_private, id);
  1505. ret = ucma_valid_param(id_priv, conn_param);
  1506. if (ret)
  1507. return ret;
  1508.  
  1509. if((!nsadi_enabled)&&
  1510. (((id->route.addr.p2.dst_addr.sa_data[3] & 0x7f)!= 1)||
  1511. (id->route.addr.p2.dst_addr.sa_data[4]
  1512. != id->route.addr.p1.src_addr.sa_data[4]))){
  1513. // fprintf(stderr, PFX "Error: NSADI License not enabled\n");
  1514. uib_trace(RDMA_ACCEPT, LICENSE_ERROR, TRACE_LEVEL_ONE, UIB_NO_QPN,
  1515. -1, ENOTSUP, TRACE_ERROR, UIB_NO_INSTANCE, UIB_NO_TRACE_DATA, NULL);
  1516. return ERR(ENOTSUP);
  1517. }
  1518.  
  1519. if (!conn_param || conn_param->initiator_depth == RDMA_MAX_INIT_DEPTH) {
  1520. id_priv->initiator_depth = min(id_priv->initiator_depth,
  1521. id_priv->cma_dev->max_initiator_depth);
  1522. } else {
  1523. id_priv->initiator_depth = conn_param->initiator_depth;
  1524. }
  1525. if (!conn_param || conn_param->responder_resources == RDMA_MAX_RESP_RES) {
  1526. id_priv->responder_resources = min(id_priv->responder_resources,
  1527. id_priv->cma_dev->max_responder_resources);
  1528. } else {
  1529. id_priv->responder_resources = conn_param->responder_resources;
  1530. }
  1531.  
  1532. if (!ucma_is_ud_qp(id->qp_type)) {
  1533. ret = ucma_modify_qp_rtr(id, id_priv->responder_resources);
  1534. if (ret)
  1535. return ret;
  1536.  
  1537. ret = ucma_modify_qp_rts(id, id_priv->initiator_depth);
  1538. if (ret)
  1539. return ret;
  1540. }
  1541.  
  1542. CMA_INIT_CMD(&cmd, sizeof cmd, ACCEPT);
  1543. cmd.id = id_priv->handle;
  1544. cmd.uid = (uintptr_t) id_priv;
  1545. if (id->qp)
  1546. ucma_copy_conn_param_to_kern(id_priv, &cmd.conn_param,
  1547. conn_param, id->qp->qp_num,
  1548. (id->qp->srq != NULL));
  1549. else
  1550. ucma_copy_conn_param_to_kern(id_priv, &cmd.conn_param,
  1551. conn_param, conn_param->qp_num,
  1552. conn_param->srq);
  1553.  
  1554. ret = ns_write(id->channel->fd, &cmd, sizeof cmd);
  1555. if (ret != sizeof cmd) {
  1556. ucma_modify_qp_err(id);
  1557. return (ret >= 0) ? ERR(ENODATA) : ERR(-ret);
  1558. }
  1559.  
  1560. if (ucma_is_ud_qp(id->qp_type))
  1561. return 0;
  1562.  
  1563. return ucma_complete(id);
  1564. }
  1565.  
  1566. int rdma_reject(struct rdma_cm_id *id, const void *private_data,
  1567. uint8_t private_data_len)
  1568. {
  1569. struct ucma_abi_reject cmd;
  1570. struct cma_id_private *id_priv;
  1571. int ret;
  1572.  
  1573. CMA_INIT_CMD(&cmd, sizeof cmd, REJECT);
  1574.  
  1575. id_priv = container_of(id, struct cma_id_private, id);
  1576. cmd.id = id_priv->handle;
  1577. if (private_data && private_data_len) {
  1578. memcpy(cmd.private_data, private_data, private_data_len);
  1579. cmd.private_data_len = private_data_len;
  1580. }
  1581.  
  1582. ret = ns_write(id->channel->fd, &cmd, sizeof cmd);
  1583. if (ret != sizeof cmd)
  1584. return (ret >= 0) ? ERR(ENODATA) : ERR(-ret);
  1585.  
  1586. return 0;
  1587. }
  1588.  
  1589. int rdma_notify(struct rdma_cm_id *id, enum ibv_event_type event)
  1590. {
  1591. struct ucma_abi_notify cmd;
  1592. struct cma_id_private *id_priv;
  1593. int ret;
  1594.  
  1595. CMA_INIT_CMD(&cmd, sizeof cmd, NOTIFY);
  1596.  
  1597. id_priv = container_of(id, struct cma_id_private, id);
  1598. cmd.id = id_priv->handle;
  1599. cmd.event = event;
  1600. ret = ns_write(id->channel->fd, &cmd, sizeof cmd);
  1601. if (ret != sizeof cmd)
  1602. return (ret >= 0) ? ERR(ENODATA) : ERR(-ret);
  1603.  
  1604. return 0;
  1605. }
  1606.  
  1607. int rdma_disconnect(struct rdma_cm_id *id)
  1608. {
  1609. struct ucma_abi_disconnect cmd;
  1610. struct cma_id_private *id_priv;
  1611. int ret;
  1612.  
  1613. switch (id->verbs->device->transport_type) {
  1614. case IBV_TRANSPORT_IB:
  1615. ret = ucma_modify_qp_err(id);
  1616. break;
  1617. case IBV_TRANSPORT_IWARP:
  1618. ret = ucma_modify_qp_sqd(id);
  1619. break;
  1620. default:
  1621. ret = ERR(EINVAL);
  1622. }
  1623. if (ret)
  1624. return ret;
  1625.  
  1626. CMA_INIT_CMD(&cmd, sizeof cmd, DISCONNECT);
  1627. id_priv = container_of(id, struct cma_id_private, id);
  1628. cmd.id = id_priv->handle;
  1629.  
  1630. ret = ns_write(id->channel->fd, &cmd, sizeof cmd);
  1631. if (ret != sizeof cmd)
  1632. return (ret >= 0) ? ERR(ENODATA) : ERR(-ret);
  1633.  
  1634. return ucma_complete(id);
  1635. }
  1636.  
  1637. #ifdef dddd
  1638. static int rdma_join_multicast2(struct rdma_cm_id *id, struct sockaddr *addr,
  1639. socklen_t addrlen, void *context)
  1640. {
  1641. struct ucma_abi_create_id_resp resp;
  1642. struct cma_id_private *id_priv;
  1643. struct cma_multicast *mc, **pos;
  1644. int ret;
  1645.  
  1646. id_priv = container_of(id, struct cma_id_private, id);
  1647. mc = calloc(1, sizeof *mc);
  1648. if (!mc)
  1649. return ERR(ENOMEM);
  1650.  
  1651. mc->context = context;
  1652. mc->id_priv = id_priv;
  1653. memcpy(&mc->addr, addr, addrlen);
  1654. if (pthread_cond_init(&mc->cond, NULL)) {
  1655. ret = -1;
  1656. goto err1;
  1657. }
  1658.  
  1659. pthread_mutex_lock(&id_priv->mut);
  1660. mc->next = id_priv->mc_list;
  1661. id_priv->mc_list = mc;
  1662. pthread_mutex_unlock(&id_priv->mut);
  1663.  
  1664. if (af_ib_support) {
  1665. struct ucma_abi_join_mcast cmd;
  1666.  
  1667. CMA_INIT_CMD_RESP(&cmd, sizeof cmd, JOIN_MCAST, &resp, sizeof resp);
  1668. cmd.id = id_priv->handle;
  1669. memcpy(&cmd.addr, addr, addrlen);
  1670. cmd.addr_size = addrlen;
  1671. cmd.uid = (uintptr_t) mc;
  1672. cmd.reserved = 0;
  1673.  
  1674. ret = ns_write(id->channel->fd, &cmd, sizeof cmd);
  1675. if (ret != sizeof cmd) {
  1676. ret = (ret >= 0) ? ERR(ENODATA) : ERR(-ret);
  1677. goto err2;
  1678. }
  1679. } else {
  1680. struct ucma_abi_join_ip_mcast cmd;
  1681.  
  1682. CMA_INIT_CMD_RESP(&cmd, sizeof cmd, JOIN_IP_MCAST, &resp, sizeof resp);
  1683. cmd.id = id_priv->handle;
  1684. memcpy(&cmd.addr, addr, addrlen);
  1685. cmd.uid = (uintptr_t) mc;
  1686.  
  1687. ret = ns_write(id->channel->fd, &cmd, sizeof cmd);
  1688. if (ret != sizeof cmd) {
  1689. ret = (ret >= 0) ? ERR(ENODATA) : ERR(-ret);
  1690. goto err2;
  1691. }
  1692. }
  1693.  
  1694. VALGRIND_MAKE_MEM_DEFINED(&resp, sizeof resp);
  1695.  
  1696. mc->handle = resp.id;
  1697. return ucma_complete(id);
  1698.  
  1699. err2:
  1700. pthread_mutex_lock(&id_priv->mut);
  1701. for (pos = &id_priv->mc_list; *pos != mc; pos = &(*pos)->next)
  1702. ;
  1703. *pos = mc->next;
  1704. pthread_mutex_unlock(&id_priv->mut);
  1705. err1:
  1706. free(mc);
  1707. return ret;
  1708. }
  1709. #endif /* dddd */
  1710.  
  1711. int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr,
  1712. void *context)
  1713. {
  1714. #ifdef dddd
  1715. int addrlen;
  1716.  
  1717. addrlen = ucma_addrlen(addr);
  1718. if (!addrlen)
  1719. return ERR(EINVAL);
  1720.  
  1721. return rdma_join_multicast2(id, addr, addrlen, context);
  1722. #endif /* dddd */
  1723. return ERR(ENOSYS);
  1724. }
  1725.  
  1726. int rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr)
  1727. {
  1728. #ifdef dddd
  1729. struct ucma_abi_destroy_id cmd;
  1730. struct ucma_abi_destroy_id_resp resp;
  1731. struct cma_id_private *id_priv;
  1732. struct cma_multicast *mc, **pos;
  1733. int ret, addrlen;
  1734.  
  1735. addrlen = ucma_addrlen(addr);
  1736. if (!addrlen)
  1737. return ERR(EINVAL);
  1738.  
  1739. id_priv = container_of(id, struct cma_id_private, id);
  1740. pthread_mutex_lock(&id_priv->mut);
  1741. for (pos = &id_priv->mc_list; *pos; pos = &(*pos)->next)
  1742. if (!memcmp(&(*pos)->addr, addr, addrlen))
  1743. break;
  1744.  
  1745. mc = *pos;
  1746. if (*pos)
  1747. *pos = mc->next;
  1748. pthread_mutex_unlock(&id_priv->mut);
  1749. if (!mc)
  1750. return ERR(EADDRNOTAVAIL);
  1751.  
  1752. if (id->qp)
  1753. ibv_detach_mcast(id->qp, &mc->mgid, mc->mlid);
  1754.  
  1755. CMA_INIT_CMD_RESP(&cmd, sizeof cmd, LEAVE_MCAST, &resp, sizeof resp);
  1756. cmd.id = mc->handle;
  1757.  
  1758. ret = ns_write(id->channel->fd, &cmd, sizeof cmd);
  1759. if (ret != sizeof cmd) {
  1760. ret = (ret >= 0) ? ERR(ENODATA) : ERR(-ret);
  1761. goto free;
  1762. }
  1763.  
  1764. VALGRIND_MAKE_MEM_DEFINED(&resp, sizeof resp);
  1765.  
  1766. pthread_mutex_lock(&id_priv->mut);
  1767. while (mc->events_completed < resp.events_reported)
  1768. pthread_cond_wait(&mc->cond, &id_priv->mut);
  1769. pthread_mutex_unlock(&id_priv->mut);
  1770.  
  1771. ret = 0;
  1772. free:
  1773. free(mc);
  1774. return ret;
  1775. #endif /* dddd */
  1776. return ERR(ENOSYS);
  1777. }
  1778.  
  1779. static void ucma_complete_event(struct cma_id_private *id_priv)
  1780. {
  1781. pthread_mutex_lock(&id_priv->mut);
  1782. id_priv->events_completed++;
  1783. pthread_cond_signal(&id_priv->cond);
  1784. pthread_mutex_unlock(&id_priv->mut);
  1785. }
  1786.  
  1787. static void ucma_complete_mc_event(struct cma_multicast *mc)
  1788. {
  1789. pthread_mutex_lock(&mc->id_priv->mut);
  1790. mc->events_completed++;
  1791. pthread_cond_signal(&mc->cond);
  1792. mc->id_priv->events_completed++;
  1793. pthread_cond_signal(&mc->id_priv->cond);
  1794. pthread_mutex_unlock(&mc->id_priv->mut);
  1795. }
  1796.  
  1797. int rdma_ack_cm_event(struct rdma_cm_event *event)
  1798. {
  1799. struct cma_event *evt;
  1800.  
  1801. if (!event)
  1802. return ERR(EINVAL);
  1803.  
  1804. evt = container_of(event, struct cma_event, event);
  1805.  
  1806. if (evt->mc)
  1807. ucma_complete_mc_event(evt->mc);
  1808. else
  1809. ucma_complete_event(evt->id_priv);
  1810. free(evt);
  1811. return 0;
  1812. }
  1813.  
  1814. static void ucma_process_addr_resolved(struct cma_event *evt)
  1815. {
  1816. if (af_ib_support) {
  1817. evt->event.status = ucma_query_addr(&evt->id_priv->id);
  1818. if (!evt->event.status &&
  1819. evt->id_priv->id.verbs->device->transport_type == IBV_TRANSPORT_IB)
  1820. evt->event.status = ucma_query_gid(&evt->id_priv->id);
  1821. } else {
  1822. evt->event.status = ucma_query_route(&evt->id_priv->id);
  1823. }
  1824.  
  1825. if (evt->event.status)
  1826. evt->event.event = RDMA_CM_EVENT_ADDR_ERROR;
  1827. }
  1828.  
  1829. static void ucma_process_route_resolved(struct cma_event *evt)
  1830. {
  1831. if (evt->id_priv->id.verbs->device->transport_type != IBV_TRANSPORT_IB)
  1832. return;
  1833.  
  1834. if (af_ib_support)
  1835. evt->event.status = ucma_query_path(&evt->id_priv->id);
  1836. else
  1837. evt->event.status = ucma_query_route(&evt->id_priv->id);
  1838.  
  1839. if (evt->event.status)
  1840. evt->event.event = RDMA_CM_EVENT_ROUTE_ERROR;
  1841. }
  1842.  
  1843. static int ucma_query_req_info(struct rdma_cm_id *id)
  1844. {
  1845. int ret;
  1846.  
  1847. if (!af_ib_support)
  1848. return ucma_query_route(id);
  1849.  
  1850. ret = ucma_query_addr(id);
  1851. if (ret)
  1852. return ret;
  1853.  
  1854. ret = ucma_query_gid(id);
  1855. if (ret)
  1856. return ret;
  1857.  
  1858. ret = ucma_query_path(id);
  1859. if (ret)
  1860. return ret;
  1861.  
  1862. return 0;
  1863. }
  1864.  
  1865. int ns_license_data_get(char *src_str, int str_len, const char *search_str)
  1866. {
  1867. int license_data = -1;
  1868. int l_data = -1;
  1869. char scan_str[256] = "";
  1870. char *p_scan_str = scan_str;
  1871.  
  1872. if(g_trace_level >= TRACE_LEVEL_FIVE) uib_trace_entry_point(LIC_DATA_GET);
  1873.  
  1874. /* Scan the cached global feature string for the search string */
  1875. if ((p_scan_str = strstr(src_str, search_str)) != NULL) {
  1876. /* Advance to the next colon char */
  1877. if ((p_scan_str = strstr(p_scan_str,":")) != NULL) {
  1878. /* Advance to the char after the colon */
  1879. ++p_scan_str;
  1880. /* Convert the feature data string to an integer */
  1881. license_data = -2;
  1882. errno = 0;
  1883. l_data = strtol(p_scan_str,NULL,10);
  1884. if (errno == 0) {
  1885. /* Success! Return the integer data */
  1886. license_data = l_data;
  1887. //fprintf(stderr, "ns_license_data_get: Data found %s:%d\n", search_str, license_data);
  1888. } else {
  1889. //fprintf(stderr, "ns_license_data_get: Data conversion err=%d str=%s,data=%d\n", errno, scan_str, l_data);
  1890. uib_trace(LIC_DATA_GET, DATA_CONVERSION, TRACE_LEVEL_ONE, UIB_NO_QPN,
  1891. license_data, errno, TRACE_ERROR, UIB_NO_INSTANCE, 1, (int64_t *)&license_data);
  1892. }
  1893. } else {
  1894. //fprintf(stderr, "ns_license_data_get: No colon after %s in %s\n", search_str, src_str);
  1895. uib_trace(LIC_DATA_GET, NO_COLON, TRACE_LEVEL_ONE, UIB_NO_QPN,
  1896. license_data, UIB_NO_ERRNO, TRACE_ERROR, UIB_NO_INSTANCE, UIB_NO_TRACE_DATA, NULL);
  1897. }
  1898. } else {
  1899. //fprintf(stderr, "ns_license_data_get: No %s data in %s\n", search_str, src_str);
  1900. uib_trace(LIC_DATA_GET, MISSING_DATA, TRACE_LEVEL_ONE, UIB_NO_QPN,
  1901. license_data, UIB_NO_ERRNO, TRACE_ERROR, UIB_NO_INSTANCE, UIB_NO_TRACE_DATA, NULL);
  1902. }
  1903.  
  1904. /*
  1905. * Return one of the following:
  1906. * Requested license data,
  1907. * -1 if string scan failed, or
  1908. * -2 if data conversion failed
  1909. */
  1910. return (license_data);
  1911. }
  1912.  
  1913.  
  1914.  
  1915. static int ns_ucma_validate_lic (struct cma_id_private *id_priv, int dir)
  1916. {
  1917. int ret = ENOTSUP ;
  1918. int max = 2 ;
  1919. char li[NS_LICENSE_USER_MAX_STR_LEN] = "";
  1920. int li_len = sizeof(li);
  1921. int pos = -1;
  1922. int num_local_servers = -1;
  1923.  
  1924. if(g_trace_level >= TRACE_LEVEL_FIVE) uib_trace_entry_point(VALIDATE_LIC);
  1925.  
  1926.  
  1927. if ((NSKVCore_modelToSystemClass_(NSKVCore_getModel())) == NS_LICENSE_SYS_CLASS_HIGH_END) {
  1928. max = 8 ;
  1929. }
  1930.  
  1931. ret = ns_utguid_search (id_priv->id.route.path_rec->dgid.global.interface_id, li, &li_len);
  1932. if (ret == 0) {
  1933. pos = ns_license_data_get(li, li_len, NS_LICENSE_USER_POSITION);
  1934. switch (pos) {
  1935. case NS_LICENSE_USER_POS_RESOURCE:
  1936. break;
  1937. case NS_LICENSE_USER_POS_LOCAL_EXT_SRVR:
  1938. num_local_servers = ns_license_data_get(li, li_len, NS_LICENSE_USER_NUM_LOCAL_SRV);
  1939. if (num_local_servers > max) {
  1940. //fprintf(stderr, "ns_ucma_validate_lic: NSADI license does not support more than %d servers \n",max);
  1941. ret = ERANGE;
  1942. uib_trace(VALIDATE_LIC, LICENSE_ERROR, TRACE_LEVEL_ONE, UIB_NO_QPN,
  1943. ret, UIB_NO_ERRNO, TRACE_ERROR, 1, UIB_NO_TRACE_DATA, NULL);
  1944. } else {
  1945. ret = 0 ;
  1946. }
  1947. break;
  1948. case NS_LICENSE_USER_POS_REMOTE_EXT_SRVR:
  1949. //fprintf(stderr, "ns_ucma_validate_lic: NSADI cannot be targeted %s remote external servers\n", dir?"from":"by");
  1950. ret = ENOTSUP;
  1951. uib_trace(VALIDATE_LIC, LICENSE_ERROR, TRACE_LEVEL_ONE, UIB_NO_QPN,
  1952. ret, UIB_NO_ERRNO, TRACE_ERROR, 2, UIB_NO_TRACE_DATA, NULL);
  1953. break;
  1954. default:
  1955. //fprintf(stderr, "ns_ucma_validate_lic: Invalid license data, pos=%d num=%d\n",pos,num_local_servers);
  1956. ret = EBADDATA;
  1957. uib_trace(VALIDATE_LIC, LICENSE_ERROR, TRACE_LEVEL_ONE, UIB_NO_QPN,
  1958. ret, UIB_NO_ERRNO, TRACE_ERROR, 3, UIB_NO_TRACE_DATA, NULL);
  1959. break;
  1960. }
  1961. } else {
  1962. //fprintf(stderr, "ns_ucma_validate_lic: ns_utguid_search() err=%d\n",ret);
  1963. uib_trace(VALIDATE_LIC, LICENSE_ERROR, TRACE_LEVEL_ONE, UIB_NO_QPN,
  1964. ret, UIB_NO_ERRNO, TRACE_ERROR, 4, UIB_NO_TRACE_DATA, NULL);
  1965. }
  1966.  
  1967. errno = ret ;
  1968. return (ret) ;
  1969. }
  1970.  
  1971.  
  1972.  
  1973. static int ucma_process_conn_req(struct cma_event *evt,
  1974. uint32_t handle)
  1975. {
  1976. struct cma_id_private *id_priv;
  1977. int ret;
  1978.  
  1979. if(g_trace_level >= TRACE_LEVEL_FIVE) uib_trace_entry_point(PROC_CON_REQ);
  1980.  
  1981. id_priv = ucma_alloc_id(evt->id_priv->id.channel,
  1982. evt->id_priv->id.context, evt->id_priv->id.ps,
  1983. evt->id_priv->id.qp_type);
  1984. if (!id_priv) {
  1985. ucma_destroy_kern_id(evt->id_priv->id.channel->fd, handle);
  1986. ret = ERR(ENOMEM);
  1987. goto err1;
  1988. }
  1989.  
  1990. evt->event.listen_id = &evt->id_priv->id;
  1991. evt->event.id = &id_priv->id;
  1992. id_priv->handle = handle;
  1993. id_priv->initiator_depth = evt->event.param.conn.initiator_depth;
  1994. id_priv->responder_resources = evt->event.param.conn.responder_resources;
  1995.  
  1996. if (evt->id_priv->sync) {
  1997. ret = rdma_migrate_id(&id_priv->id, NULL);
  1998. if (ret)
  1999. goto err2;
  2000. }
  2001.  
  2002. ret = ucma_query_req_info(&id_priv->id);
  2003. if (ret)
  2004. goto err2;
  2005.  
  2006. if ((ret = ns_ucma_validate_lic(id_priv,0))) {
  2007. //fprintf(stderr, "ucma_process_conn_req: ns_ucma_validate_lic() err=%d\n",ret);
  2008. ERR(ret); /* TODO: (djc) this okay? */
  2009. uib_trace(PROC_CON_REQ, LICENSE_ERROR, TRACE_LEVEL_ONE, UIB_NO_QPN,
  2010. ret, UIB_NO_ERRNO, TRACE_ERROR, UIB_NO_INSTANCE, UIB_NO_TRACE_DATA, NULL);
  2011. goto err2 ;
  2012. }
  2013.  
  2014. return 0;
  2015.  
  2016. err2:
  2017. rdma_destroy_id(&id_priv->id);
  2018. err1:
  2019. ucma_complete_event(evt->id_priv);
  2020. return ret;
  2021. }
  2022.  
  2023. static int ucma_process_conn_resp(struct cma_id_private *id_priv)
  2024. {
  2025. struct ucma_abi_accept cmd;
  2026. int ret;
  2027.  
  2028. if(g_trace_level >= TRACE_LEVEL_FIVE) uib_trace_entry_point(PROC_CON_RESP);
  2029.  
  2030. if ((ret = ns_ucma_validate_lic(id_priv,1))) {
  2031. //fprintf(stderr, "ucma_process_conn_resp: ns_ucma_validate_lic() err=%d\n",ret);
  2032. ERR(ret); /* TODO: (djc) this okay? */
  2033. uib_trace(PROC_CON_RESP, LICENSE_ERROR, TRACE_LEVEL_ONE, UIB_NO_QPN,
  2034. ret, UIB_NO_ERRNO, TRACE_ERROR, UIB_NO_INSTANCE, UIB_NO_TRACE_DATA, NULL);
  2035. goto err ;
  2036. }
  2037.  
  2038. ret = ucma_modify_qp_rtr(&id_priv->id, RDMA_MAX_RESP_RES);
  2039. if (ret)
  2040. goto err;
  2041.  
  2042. ret = ucma_modify_qp_rts(&id_priv->id, RDMA_MAX_INIT_DEPTH);
  2043. if (ret)
  2044. goto err;
  2045.  
  2046. CMA_INIT_CMD(&cmd, sizeof cmd, ACCEPT);
  2047. cmd.id = id_priv->handle;
  2048.  
  2049. ret = ns_write(id_priv->id.channel->fd, &cmd, sizeof cmd);
  2050. if (ret != sizeof cmd) {
  2051. ret = (ret >= 0) ? ERR(ENODATA) : ERR(-ret);
  2052. goto err;
  2053. }
  2054.  
  2055. return 0;
  2056. err:
  2057. ucma_modify_qp_err(&id_priv->id);
  2058. return ret;
  2059. }
  2060.  
  2061. static int ucma_process_join(struct cma_event *evt)
  2062. {
  2063. evt->mc->mgid = evt->event.param.ud.ah_attr.grh.dgid;
  2064. evt->mc->mlid = evt->event.param.ud.ah_attr.dlid;
  2065.  
  2066. if (!evt->id_priv->id.qp)
  2067. return 0;
  2068.  
  2069. return rdma_seterrno(ibv_attach_mcast(evt->id_priv->id.qp,
  2070. &evt->mc->mgid, evt->mc->mlid));
  2071. }
  2072.  
  2073. static void ucma_copy_conn_event(struct cma_event *event,
  2074. struct ucma_abi_conn_param *src)
  2075. {
  2076. struct rdma_conn_param *dst = &event->event.param.conn;
  2077.  
  2078. dst->private_data_len = src->private_data_len;
  2079. if (src->private_data_len) {
  2080. dst->private_data = &event->private_data;
  2081. memcpy(&event->private_data, src->private_data,
  2082. src->private_data_len);
  2083. }
  2084.  
  2085. dst->responder_resources = src->responder_resources;
  2086. dst->initiator_depth = src->initiator_depth;
  2087. dst->flow_control = src->flow_control;
  2088. dst->retry_count = src->retry_count;
  2089. dst->rnr_retry_count = src->rnr_retry_count;
  2090. dst->srq = src->srq;
  2091. dst->qp_num = src->qp_num;
  2092. }
  2093.  
  2094. static void ucma_copy_ud_event(struct cma_event *event,
  2095. struct ucma_abi_ud_param *src)
  2096. {
  2097. struct rdma_ud_param *dst = &event->event.param.ud;
  2098.  
  2099. dst->private_data_len = src->private_data_len;
  2100. if (src->private_data_len) {
  2101. dst->private_data = &event->private_data;
  2102. memcpy(&event->private_data, src->private_data,
  2103. src->private_data_len);
  2104. }
  2105.  
  2106. ibv_copy_ah_attr_from_kern(&dst->ah_attr, &src->ah_attr);
  2107. dst->qp_num = src->qp_num;
  2108. dst->qkey = src->qkey;
  2109. }
  2110.  
  2111. int rdma_get_cm_event(struct rdma_event_channel *channel,
  2112. struct rdma_cm_event **event)
  2113. {
  2114. struct ucma_abi_event_resp resp;
  2115. struct ucma_abi_get_event cmd;
  2116. struct cma_event *evt;
  2117. int ret;
  2118. int count_xfer ;
  2119. int err ;
  2120. int data[10] ;
  2121.  
  2122. if(g_trace_level >= TRACE_LEVEL_FIVE) uib_trace_entry_point(GET_CM_EVENT);
  2123.  
  2124. ret = ucma_init();
  2125. if (ret)
  2126. return ret;
  2127.  
  2128. if (!event)
  2129. return ERR(EINVAL);
  2130.  
  2131. evt = malloc(sizeof *evt);
  2132. if (!evt)
  2133. return ERR(ENOMEM);
  2134.  
  2135. retry:
  2136. memset(evt, 0, sizeof *evt);
  2137. CMA_INIT_CMD_RESP(&cmd, sizeof cmd, GET_EVENT, &resp, sizeof resp);
  2138. //
  2139. // Issue the write to get any pending events. The NSK kernel only implements
  2140. // a non-blocking interface, so will get an immediate return with either return of
  2141. // the size of the message (success, event pending condition), or ENODATA (no
  2142. // events at this time condition).
  2143. //
  2144. ret = ns_write(channel->fd, &cmd, sizeof cmd);
  2145.  
  2146. if (ret != sizeof cmd) {
  2147. if (ret == -EAGAIN) {
  2148. // TODO: NIBVL - Need use LDONE for tag instead of -1?
  2149. put_regFile(channel->fd) ;
  2150. if ((ret = put_awaitio(channel->fd,-1,-1,&count_xfer,&err,&data))) {
  2151. return (-1) ;
  2152. }
  2153. goto retry ;
  2154. } else {
  2155. free(evt);
  2156. return (ret >= 0) ? ERR(ENODATA) : ERR(-ret);
  2157. }
  2158. }
  2159.  
  2160. put_unregFile(channel->fd) ;
  2161. VALGRIND_MAKE_MEM_DEFINED(&resp, sizeof resp);
  2162.  
  2163. evt->event.event = (enum rdma_cm_event_type)resp.event;
  2164. evt->id_priv = (void *) (uintptr_t) resp.uid;
  2165. evt->event.id = &evt->id_priv->id;
  2166.  
  2167. //
  2168. // NonStop. Errors (-errnos) coming from the kernel have a different
  2169. // value (linux) from NonStop error codes. So have to map into NonStop
  2170. // space.
  2171. //
  2172. resp.status = ns_map_errno_(resp.status) ;
  2173. evt->event.status = resp.status;
  2174.  
  2175. switch (resp.event) {
  2176. case RDMA_CM_EVENT_ADDR_RESOLVED:
  2177. ucma_process_addr_resolved(evt);
  2178. break;
  2179. case RDMA_CM_EVENT_ROUTE_RESOLVED:
  2180. ucma_process_route_resolved(evt);
  2181. break;
  2182. case RDMA_CM_EVENT_CONNECT_REQUEST:
  2183. evt->id_priv = (void *) (uintptr_t) resp.uid;
  2184. if (ucma_is_ud_qp(evt->id_priv->id.qp_type))
  2185. ucma_copy_ud_event(evt, &resp.param.ud);
  2186. else
  2187. ucma_copy_conn_event(evt, &resp.param.conn);
  2188.  
  2189. ret = ucma_process_conn_req(evt, resp.id);
  2190. if (ret) {
  2191. //fprintf(stderr, "rdma_get_cm_event: ucma_process_conn_req() err=%d\n",ret);
  2192. uib_trace(GET_CM_EVENT, CON_REQ_ERR, TRACE_LEVEL_ONE, UIB_NO_QPN,
  2193. ret, UIB_NO_ERRNO, TRACE_ERROR, UIB_NO_INSTANCE, UIB_NO_TRACE_DATA, NULL);
  2194. goto retry;
  2195. }
  2196. break;
  2197. case RDMA_CM_EVENT_CONNECT_RESPONSE:
  2198. ucma_copy_conn_event(evt, &resp.param.conn);
  2199. evt->event.status = ucma_process_conn_resp(evt->id_priv);
  2200. if (!evt->event.status)
  2201. evt->event.event = RDMA_CM_EVENT_ESTABLISHED;
  2202. else {
  2203. evt->event.event = RDMA_CM_EVENT_CONNECT_ERROR;
  2204. evt->id_priv->connect_error = 1;
  2205. }
  2206. break;
  2207. case RDMA_CM_EVENT_ESTABLISHED:
  2208. if (ucma_is_ud_qp(evt->id_priv->id.qp_type)) {
  2209. ucma_copy_ud_event(evt, &resp.param.ud);
  2210. break;
  2211. }
  2212.  
  2213. ucma_copy_conn_event(evt, &resp.param.conn);
  2214. break;
  2215. case RDMA_CM_EVENT_REJECTED:
  2216. if (evt->id_priv->connect_error) {
  2217. ucma_complete_event(evt->id_priv);
  2218. goto retry;
  2219. }
  2220. ucma_copy_conn_event(evt, &resp.param.conn);
  2221. ucma_modify_qp_err(evt->event.id);
  2222. break;
  2223. case RDMA_CM_EVENT_DISCONNECTED:
  2224. if (evt->id_priv->connect_error) {
  2225. ucma_complete_event(evt->id_priv);
  2226. goto retry;
  2227. }
  2228. ucma_copy_conn_event(evt, &resp.param.conn);
  2229. break;
  2230. case RDMA_CM_EVENT_MULTICAST_JOIN:
  2231. evt->mc = (void *) (uintptr_t) resp.uid;
  2232. evt->id_priv = evt->mc->id_priv;
  2233. evt->event.id = &evt->id_priv->id;
  2234. ucma_copy_ud_event(evt, &resp.param.ud);
  2235. evt->event.param.ud.private_data = evt->mc->context;
  2236. evt->event.status = ucma_process_join(evt);
  2237. if (evt->event.status)
  2238. evt->event.event = RDMA_CM_EVENT_MULTICAST_ERROR;
  2239. break;
  2240. case RDMA_CM_EVENT_MULTICAST_ERROR:
  2241. evt->mc = (void *) (uintptr_t) resp.uid;
  2242. evt->id_priv = evt->mc->id_priv;
  2243. evt->event.id = &evt->id_priv->id;
  2244. evt->event.param.ud.private_data = evt->mc->context;
  2245. break;
  2246. default:
  2247. evt->id_priv = (void *) (uintptr_t) resp.uid;
  2248. evt->event.id = &evt->id_priv->id;
  2249. evt->event.status = resp.status;
  2250. if (ucma_is_ud_qp(evt->id_priv->id.qp_type))
  2251. ucma_copy_ud_event(evt, &resp.param.ud);
  2252. else
  2253. ucma_copy_conn_event(evt, &resp.param.conn);
  2254. break;
  2255. }
  2256.  
  2257. *event = &evt->event;
  2258. return 0;
  2259. }
  2260.  
  2261. const char *rdma_event_str(enum rdma_cm_event_type event)
  2262. {
  2263. switch (event) {
  2264. case RDMA_CM_EVENT_ADDR_RESOLVED:
  2265. return "RDMA_CM_EVENT_ADDR_RESOLVED";
  2266. case RDMA_CM_EVENT_ADDR_ERROR:
  2267. return "RDMA_CM_EVENT_ADDR_ERROR";
  2268. case RDMA_CM_EVENT_ROUTE_RESOLVED:
  2269. return "RDMA_CM_EVENT_ROUTE_RESOLVED";
  2270. case RDMA_CM_EVENT_ROUTE_ERROR:
  2271. return "RDMA_CM_EVENT_ROUTE_ERROR";
  2272. case RDMA_CM_EVENT_CONNECT_REQUEST:
  2273. return "RDMA_CM_EVENT_CONNECT_REQUEST";
  2274. case RDMA_CM_EVENT_CONNECT_RESPONSE:
  2275. return "RDMA_CM_EVENT_CONNECT_RESPONSE";
  2276. case RDMA_CM_EVENT_CONNECT_ERROR:
  2277. return "RDMA_CM_EVENT_CONNECT_ERROR";
  2278. case RDMA_CM_EVENT_UNREACHABLE:
  2279. return "RDMA_CM_EVENT_UNREACHABLE";
  2280. case RDMA_CM_EVENT_REJECTED:
  2281. return "RDMA_CM_EVENT_REJECTED";
  2282. case RDMA_CM_EVENT_ESTABLISHED:
  2283. return "RDMA_CM_EVENT_ESTABLISHED";
  2284. case RDMA_CM_EVENT_DISCONNECTED:
  2285. return "RDMA_CM_EVENT_DISCONNECTED";
  2286. case RDMA_CM_EVENT_DEVICE_REMOVAL:
  2287. return "RDMA_CM_EVENT_DEVICE_REMOVAL";
  2288. case RDMA_CM_EVENT_MULTICAST_JOIN:
  2289. return "RDMA_CM_EVENT_MULTICAST_JOIN";
  2290. case RDMA_CM_EVENT_MULTICAST_ERROR:
  2291. return "RDMA_CM_EVENT_MULTICAST_ERROR";
  2292. case RDMA_CM_EVENT_ADDR_CHANGE:
  2293. return "RDMA_CM_EVENT_ADDR_CHANGE";
  2294. case RDMA_CM_EVENT_TIMEWAIT_EXIT:
  2295. return "RDMA_CM_EVENT_TIMEWAIT_EXIT";
  2296. default:
  2297. return "UNKNOWN EVENT";
  2298. }
  2299. }
  2300.  
  2301. int rdma_set_option(struct rdma_cm_id *id, int level, int optname,
  2302. void *optval, size_t optlen)
  2303. {
  2304. struct ucma_abi_set_option cmd;
  2305. struct cma_id_private *id_priv;
  2306. int ret;
  2307.  
  2308. CMA_INIT_CMD(&cmd, sizeof cmd, SET_OPTION);
  2309. id_priv = container_of(id, struct cma_id_private, id);
  2310. cmd.id = id_priv->handle;
  2311. cmd.optval = (uintptr_t) optval;
  2312. cmd.level = level;
  2313. cmd.optname = optname;
  2314. cmd.optlen = optlen;
  2315.  
  2316. ret = ns_write(id->channel->fd, &cmd, sizeof cmd);
  2317. if (ret != sizeof cmd)
  2318. return (ret >= 0) ? ERR(ENODATA) : ERR(-ret);
  2319.  
  2320. return 0;
  2321. }
  2322.  
  2323. int rdma_migrate_id(struct rdma_cm_id *id, struct rdma_event_channel *channel)
  2324. {
  2325. struct ucma_abi_migrate_resp resp;
  2326. struct ucma_abi_migrate_id cmd;
  2327. struct cma_id_private *id_priv;
  2328. int ret, sync;
  2329.  
  2330. id_priv = container_of(id, struct cma_id_private, id);
  2331. if (id_priv->sync && !channel)
  2332. return ERR(EINVAL);
  2333.  
  2334. if ((sync = (channel == NULL))) {
  2335. channel = rdma_create_event_channel();
  2336. if (!channel)
  2337. return -1;
  2338. }
  2339.  
  2340. CMA_INIT_CMD_RESP(&cmd, sizeof cmd, MIGRATE_ID, &resp, sizeof resp);
  2341. cmd.id = id_priv->handle;
  2342. cmd.fd = id->channel->fd;
  2343.  
  2344. ret = ns_write(channel->fd, &cmd, sizeof cmd);
  2345. if (ret != sizeof cmd) {
  2346. if (sync)
  2347. rdma_destroy_event_channel(channel);
  2348. return (ret >= 0) ? ERR(ENODATA) : ERR(-ret);
  2349. }
  2350.  
  2351. VALGRIND_MAKE_MEM_DEFINED(&resp, sizeof resp);
  2352.  
  2353. if (id_priv->sync) {
  2354. if (id->event) {
  2355. rdma_ack_cm_event(id->event);
  2356. id->event = NULL;
  2357. }
  2358. rdma_destroy_event_channel(id->channel);
  2359. }
  2360.  
  2361. /*
  2362. * Eventually if we want to support migrating channels while events are
  2363. * being processed on the current channel, we need to block here while
  2364. * there are any outstanding events on the current channel for this id
  2365. * to prevent the user from processing events for this id on the old
  2366. * channel after this call returns.
  2367. */
  2368. pthread_mutex_lock(&id_priv->mut);
  2369. id_priv->sync = sync;
  2370. id->channel = channel;
  2371. while (id_priv->events_completed < resp.events_reported)
  2372. pthread_cond_wait(&id_priv->cond, &id_priv->mut);
  2373. pthread_mutex_unlock(&id_priv->mut);
  2374.  
  2375. return 0;
  2376. }
  2377.  
  2378. static int ucma_passive_ep(struct rdma_cm_id *id, struct rdma_addrinfo *res,
  2379. struct ibv_pd *pd, struct ibv_qp_init_attr *qp_init_attr)
  2380. {
  2381. struct cma_id_private *id_priv;
  2382. int ret;
  2383.  
  2384. if (af_ib_support)
  2385. ret = rdma_bind_addr2(id, res->ai_src_addr, res->ai_src_len);
  2386. else
  2387. ret = rdma_bind_addr(id, res->ai_src_addr);
  2388. if (ret)
  2389. return ret;
  2390.  
  2391. id_priv = container_of(id, struct cma_id_private, id);
  2392. if (pd)
  2393. id->pd = pd;
  2394.  
  2395. if (qp_init_attr) {
  2396. id_priv->qp_init_attr = malloc(sizeof *qp_init_attr);
  2397. if (!id_priv->qp_init_attr)
  2398. return ERR(ENOMEM);
  2399.  
  2400. *id_priv->qp_init_attr = *qp_init_attr;
  2401. id_priv->qp_init_attr->qp_type = (enum ibv_qp_type)res->ai_qp_type;
  2402. }
  2403.  
  2404. return 0;
  2405. }
  2406.  
  2407. int rdma_create_ep(struct rdma_cm_id **id, struct rdma_addrinfo *res,
  2408. struct ibv_pd *pd, struct ibv_qp_init_attr *qp_init_attr)
  2409. {
  2410. struct rdma_cm_id *cm_id;
  2411. struct cma_id_private *id_priv;
  2412. int ret;
  2413.  
  2414. ret = rdma_create_id2(NULL, &cm_id, NULL, (enum rdma_port_space)res->ai_port_space,
  2415. (enum ibv_qp_type)res->ai_qp_type);
  2416. if (ret)
  2417. return ret;
  2418.  
  2419. if (res->ai_flags & RAI_PASSIVE) {
  2420. ret = ucma_passive_ep(cm_id, res, pd, qp_init_attr);
  2421. if (ret)
  2422. goto err;
  2423. goto out;
  2424. }
  2425.  
  2426. if (af_ib_support)
  2427. ret = rdma_resolve_addr2(cm_id, res->ai_src_addr, res->ai_src_len,
  2428. res->ai_dst_addr, res->ai_dst_len, 2000);
  2429. else
  2430. ret = rdma_resolve_addr(cm_id, res->ai_src_addr, res->ai_dst_addr, 2000);
  2431. if (ret)
  2432. goto err;
  2433.  
  2434. if (res->ai_route_len) {
  2435. ret = rdma_set_option(cm_id, RDMA_OPTION_IB, RDMA_OPTION_IB_PATH,
  2436. res->ai_route, res->ai_route_len);
  2437. if (!ret)
  2438. ret = ucma_complete(cm_id);
  2439. } else {
  2440. ret = rdma_resolve_route(cm_id, 2000);
  2441. }
  2442. if (ret)
  2443. goto err;
  2444.  
  2445. if (qp_init_attr) {
  2446. qp_init_attr->qp_type = (enum ibv_qp_type)res->ai_qp_type;
  2447. ret = rdma_create_qp(cm_id, pd, qp_init_attr);
  2448. if (ret)
  2449. goto err;
  2450. }
  2451.  
  2452. if (res->ai_connect_len) {
  2453. id_priv = container_of(cm_id, struct cma_id_private, id);
  2454. id_priv->connect = malloc(res->ai_connect_len);
  2455. if (!id_priv->connect) {
  2456. ret = ERR(ENOMEM);
  2457. goto err;
  2458. }
  2459. memcpy(id_priv->connect, res->ai_connect, res->ai_connect_len);
  2460. id_priv->connect_len = res->ai_connect_len;
  2461. }
  2462.  
  2463. out:
  2464. *id = cm_id;
  2465. return 0;
  2466.  
  2467. err:
  2468. rdma_destroy_ep(cm_id);
  2469. return ret;
  2470. }
  2471.  
  2472. void rdma_destroy_ep(struct rdma_cm_id *id)
  2473. {
  2474. struct cma_id_private *id_priv;
  2475.  
  2476. if (id->qp)
  2477. rdma_destroy_qp(id);
  2478.  
  2479. if (id->srq)
  2480. rdma_destroy_srq(id);
  2481.  
  2482. id_priv = container_of(id, struct cma_id_private, id);
  2483. if (id_priv->qp_init_attr)
  2484. free(id_priv->qp_init_attr);
  2485.  
  2486. rdma_destroy_id(id);
  2487. }
  2488.  
  2489. int ucma_max_qpsize(struct rdma_cm_id *id)
  2490. {
  2491. struct cma_id_private *id_priv;
  2492. int i, max_size = 0;
  2493.  
  2494. id_priv = container_of(id, struct cma_id_private, id);
  2495. if (id && id_priv->cma_dev) {
  2496. max_size = id_priv->cma_dev->max_qpsize;
  2497. } else {
  2498. ucma_init();
  2499. for (i = 0; i < cma_dev_cnt; i++) {
  2500. if (!max_size || max_size > cma_dev_array[i].max_qpsize)
  2501. max_size = cma_dev_array[i].max_qpsize;
  2502. }
  2503. }
  2504. return max_size;
  2505. }
  2506.  
  2507. uint16_t ucma_get_port(struct sockaddr *addr)
  2508. {
  2509. switch (addr->sa_family) {
  2510. case AF_INET:
  2511. return ((struct sockaddr_in *) addr)->sin_port;
  2512. case AF_INET6:
  2513. return ((struct sockaddr_in6 *) addr)->sin6_port;
  2514. case AF_IB:
  2515. return htons((uint16_t) ntohll(((struct sockaddr_ib *) addr)->sib_sid));
  2516. default:
  2517. return 0;
  2518. }
  2519. }
  2520.  
  2521. uint16_t rdma_get_src_port(struct rdma_cm_id *id)
  2522. {
  2523. return ucma_get_port(&id->route.addr.p1.src_addr);
  2524. }
  2525.  
  2526. uint16_t rdma_get_dst_port(struct rdma_cm_id *id)
  2527. {
  2528. return ucma_get_port(&id->route.addr.p2.dst_addr);
  2529. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement