Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

net/smc: register RMB-related memory region

A memory region created for a new RMB must be registered explicitly,
before the peer can make use of it for remote DMA transfer.

Signed-off-by: Ursula Braun <ubraun@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Ursula Braun and committed by
David S. Miller
652a1e41 897e1c24

+115 -2
+38
net/smc/af_smc.c
··· 338 338 return SMC_CLC_DECL_INTERR; 339 339 340 340 smc_wr_remember_qp_attr(link); 341 + 342 + rc = smc_wr_reg_send(link, 343 + smc->conn.rmb_desc->mr_rx[SMC_SINGLE_LINK]); 344 + if (rc) 345 + return SMC_CLC_DECL_INTERR; 346 + 341 347 /* send CONFIRM LINK response over RoCE fabric */ 342 348 rc = smc_llc_send_confirm_link(link, 343 349 link->smcibdev->mac[link->ibport - 1], ··· 464 458 if (rc) { 465 459 reason_code = SMC_CLC_DECL_INTERR; 466 460 goto decline_rdma_unlock; 461 + } 462 + } else { 463 + struct smc_buf_desc *buf_desc = smc->conn.rmb_desc; 464 + 465 + if (!buf_desc->reused) { 466 + /* register memory region for new rmb */ 467 + rc = smc_wr_reg_send(link, 468 + buf_desc->mr_rx[SMC_SINGLE_LINK]); 469 + if (rc) { 470 + reason_code = SMC_CLC_DECL_INTERR; 471 + goto decline_rdma_unlock; 472 + } 467 473 } 468 474 } 469 475 ··· 710 692 int rc; 711 693 712 694 link = &lgr->lnk[SMC_SINGLE_LINK]; 695 + 696 + rc = smc_wr_reg_send(link, 697 + smc->conn.rmb_desc->mr_rx[SMC_SINGLE_LINK]); 698 + if (rc) 699 + return SMC_CLC_DECL_INTERR; 700 + 713 701 /* send CONFIRM LINK request to client over the RoCE fabric */ 714 702 rc = smc_llc_send_confirm_link(link, 715 703 link->smcibdev->mac[link->ibport - 1], ··· 826 802 827 803 smc_close_init(new_smc); 828 804 smc_rx_init(new_smc); 805 + 806 + if (local_contact != SMC_FIRST_CONTACT) { 807 + struct smc_buf_desc *buf_desc = new_smc->conn.rmb_desc; 808 + 809 + if (!buf_desc->reused) { 810 + /* register memory region for new rmb */ 811 + rc = smc_wr_reg_send(link, 812 + buf_desc->mr_rx[SMC_SINGLE_LINK]); 813 + if (rc) { 814 + reason_code = SMC_CLC_DECL_INTERR; 815 + goto decline_rdma; 816 + } 817 + } 818 + } 829 819 830 820 rc = smc_clc_send_accept(new_smc, local_contact); 831 821 if (rc)
-1
net/smc/smc_core.c
··· 175 175 rc = smc_wr_alloc_link_mem(lnk); 176 176 if (rc) 177 177 goto free_lgr; 178 - init_waitqueue_head(&lnk->wr_tx_wait); 179 178 rc = smc_ib_create_protection_domain(lnk); 180 179 if (rc) 181 180 goto free_link_mem;
+12
net/smc/smc_core.h
··· 37 37 u8 raw[SMC_WR_BUF_SIZE]; 38 38 }; 39 39 40 + #define SMC_WR_REG_MR_WAIT_TIME (5 * HZ)/* wait time for ib_wr_reg_mr result */ 41 + 42 + enum smc_wr_reg_state { 43 + POSTED, /* ib_wr_reg_mr request posted */ 44 + CONFIRMED, /* ib_wr_reg_mr response: successful */ 45 + FAILED /* ib_wr_reg_mr response: failure */ 46 + }; 47 + 40 48 struct smc_link { 41 49 struct smc_ib_device *smcibdev; /* ib-device */ 42 50 u8 ibport; /* port - values 1 | 2 */ ··· 72 64 dma_addr_t wr_rx_dma_addr; /* DMA address of wr_rx_bufs */ 73 65 u64 wr_rx_id; /* seq # of last recv WR */ 74 66 u32 wr_rx_cnt; /* number of WR recv buffers */ 67 + 68 + struct ib_reg_wr wr_reg; /* WR register memory region */ 69 + wait_queue_head_t wr_reg_wait; /* wait for wr_reg result */ 70 + enum smc_wr_reg_state wr_reg_state; /* state of wr_reg request */ 75 71 76 72 union ib_gid gid; /* gid matching used vlan id */ 77 73 u32 peer_qpn; /* QP number of peer */
+1 -1
net/smc/smc_ib.c
··· 231 231 .recv_cq = lnk->smcibdev->roce_cq_recv, 232 232 .srq = NULL, 233 233 .cap = { 234 - .max_send_wr = SMC_WR_BUF_CNT, 235 234 /* include unsolicited rdma_writes as well, 236 235 * there are max. 2 RDMA_WRITE per 1 WR_SEND 237 236 */ 237 + .max_send_wr = SMC_WR_BUF_CNT * 3, 238 238 .max_recv_wr = SMC_WR_BUF_CNT * 3, 239 239 .max_send_sge = SMC_IB_MAX_SEND_SGE, 240 240 .max_recv_sge = 1,
+63
net/smc/smc_wr.c
··· 68 68 int i; 69 69 70 70 link = wc->qp->qp_context; 71 + 72 + if (wc->opcode == IB_WC_REG_MR) { 73 + if (wc->status) 74 + link->wr_reg_state = FAILED; 75 + else 76 + link->wr_reg_state = CONFIRMED; 77 + wake_up(&link->wr_reg_wait); 78 + return; 79 + } 80 + 71 81 pnd_snd_idx = smc_wr_tx_find_pending_index(link, wc->wr_id); 72 82 if (pnd_snd_idx == link->wr_tx_cnt) 73 83 return; ··· 250 240 &failed_wr); 251 241 if (rc) 252 242 smc_wr_tx_put_slot(link, priv); 243 + return rc; 244 + } 245 + 246 + /* Register a memory region and wait for result. */ 247 + int smc_wr_reg_send(struct smc_link *link, struct ib_mr *mr) 248 + { 249 + struct ib_send_wr *failed_wr = NULL; 250 + int rc; 251 + 252 + ib_req_notify_cq(link->smcibdev->roce_cq_send, 253 + IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS); 254 + link->wr_reg_state = POSTED; 255 + link->wr_reg.wr.wr_id = (u64)(uintptr_t)mr; 256 + link->wr_reg.mr = mr; 257 + link->wr_reg.key = mr->rkey; 258 + failed_wr = &link->wr_reg.wr; 259 + rc = ib_post_send(link->roce_qp, &link->wr_reg.wr, &failed_wr); 260 + WARN_ON(failed_wr != &link->wr_reg.wr); 261 + if (rc) 262 + return rc; 263 + 264 + rc = wait_event_interruptible_timeout(link->wr_reg_wait, 265 + (link->wr_reg_state != POSTED), 266 + SMC_WR_REG_MR_WAIT_TIME); 267 + if (!rc) { 268 + /* timeout - terminate connections */ 269 + struct smc_link_group *lgr; 270 + 271 + lgr = container_of(link, struct smc_link_group, 272 + lnk[SMC_SINGLE_LINK]); 273 + smc_lgr_terminate(lgr); 274 + return -EPIPE; 275 + } 276 + if (rc == -ERESTARTSYS) 277 + return -EINTR; 278 + switch (link->wr_reg_state) { 279 + case CONFIRMED: 280 + rc = 0; 281 + break; 282 + case FAILED: 283 + rc = -EIO; 284 + break; 285 + case POSTED: 286 + rc = -EPIPE; 287 + break; 288 + } 253 289 return rc; 254 290 } 255 291 ··· 514 458 lnk->wr_rx_ibs[i].sg_list = &lnk->wr_rx_sges[i]; 515 459 lnk->wr_rx_ibs[i].num_sge = 1; 516 460 } 461 + lnk->wr_reg.wr.next = NULL; 462 + lnk->wr_reg.wr.num_sge = 0; 463 + lnk->wr_reg.wr.send_flags = IB_SEND_SIGNALED; 464 + lnk->wr_reg.wr.opcode = IB_WR_REG_MR; 465 + lnk->wr_reg.access = IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE; 517 466 } 518 467 519 468 void smc_wr_free_link(struct smc_link *lnk) ··· 663 602 smc_wr_init_sge(lnk); 664 603 memset(lnk->wr_tx_mask, 0, 665 604 BITS_TO_LONGS(SMC_WR_BUF_CNT) * sizeof(*lnk->wr_tx_mask)); 605 + init_waitqueue_head(&lnk->wr_tx_wait); 606 + init_waitqueue_head(&lnk->wr_reg_wait); 666 607 return rc; 667 608 668 609 dma_unmap:
+1
net/smc/smc_wr.h
··· 102 102 int smc_wr_rx_register_handler(struct smc_wr_rx_handler *handler); 103 103 int smc_wr_rx_post_init(struct smc_link *link); 104 104 void smc_wr_rx_cq_handler(struct ib_cq *ib_cq, void *cq_context); 105 + int smc_wr_reg_send(struct smc_link *link, struct ib_mr *mr); 105 106 106 107 #endif /* SMC_WR_H */