Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'net-smc-two-features-for-smc-r'

Guangguan Wang says:

====================
net/smc: Two features for smc-r

v2: https://lore.kernel.org/netdev/20241202125203.48821-1-guangguan.wang@linux.alibaba.com/
v1: https://lore.kernel.org/oe-kbuild-all/202411282154.DjX7ilwF-lkp@intel.com/
====================

Link: https://patch.msgid.link/20241211023055.89610-1-guangguan.wang@linux.alibaba.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>

+56 -31
+4 -1
net/smc/af_smc.c
··· 1117 1117 ini->check_smcrv2 = true; 1118 1118 ini->smcrv2.saddr = smc->clcsock->sk->sk_rcv_saddr; 1119 1119 if (!(ini->smcr_version & SMC_V2) || 1120 - smc->clcsock->sk->sk_family != AF_INET || 1120 + #if IS_ENABLED(CONFIG_IPV6) 1121 + (smc->clcsock->sk->sk_family == AF_INET6 && 1122 + !ipv6_addr_v4mapped(&smc->clcsock->sk->sk_v6_rcv_saddr)) || 1123 + #endif 1121 1124 !smc_clc_ueid_count() || 1122 1125 smc_find_rdma_device(smc, ini)) 1123 1126 ini->smcr_version &= ~SMC_V2;
+5
net/smc/smc_core.c
··· 795 795 if (lgr->smc_version == SMC_V2) { 796 796 lnk->smcibdev = ini->smcrv2.ib_dev_v2; 797 797 lnk->ibport = ini->smcrv2.ib_port_v2; 798 + lnk->wr_rx_sge_cnt = lnk->smcibdev->ibdev->attrs.max_recv_sge < 2 ? 1 : 2; 799 + lnk->wr_rx_buflen = smc_link_shared_v2_rxbuf(lnk) ? 800 + SMC_WR_BUF_SIZE : SMC_WR_BUF_V2_SIZE; 798 801 } else { 799 802 lnk->smcibdev = ini->ib_dev; 800 803 lnk->ibport = ini->ib_port; 804 + lnk->wr_rx_sge_cnt = 1; 805 + lnk->wr_rx_buflen = SMC_WR_BUF_SIZE; 801 806 } 802 807 get_device(&lnk->smcibdev->ibdev->dev); 803 808 atomic_inc(&lnk->smcibdev->lnk_cnt);
+10 -1
net/smc/smc_core.h
··· 122 122 } ____cacheline_aligned_in_smp; 123 123 struct completion tx_ref_comp; 124 124 125 - struct smc_wr_buf *wr_rx_bufs; /* WR recv payload buffers */ 125 + u8 *wr_rx_bufs; /* WR recv payload buffers */ 126 126 struct ib_recv_wr *wr_rx_ibs; /* WR recv meta data */ 127 127 struct ib_sge *wr_rx_sges; /* WR recv scatter meta data */ 128 128 /* above three vectors have wr_rx_cnt elements and use the same index */ 129 + int wr_rx_sge_cnt; /* rx sge, V1 is 1, V2 is either 2 or 1 */ 130 + int wr_rx_buflen; /* buffer len for the first sge, len for the 131 + * second sge is lgr shared if rx sge is 2. 132 + */ 129 133 dma_addr_t wr_rx_dma_addr; /* DMA address of wr_rx_bufs */ 130 134 dma_addr_t wr_rx_v2_dma_addr; /* DMA address of v2 rx buf*/ 131 135 u64 wr_rx_id; /* seq # of last recv WR */ ··· 508 504 static inline bool smc_link_active(struct smc_link *lnk) 509 505 { 510 506 return lnk->state == SMC_LNK_ACTIVE; 507 + } 508 + 509 + static inline bool smc_link_shared_v2_rxbuf(struct smc_link *lnk) 510 + { 511 + return lnk->wr_rx_sge_cnt > 1; 511 512 } 512 513 513 514 static inline void smc_gid_be16_convert(__u8 *buf, u8 *gid_raw)
+1 -2
net/smc/smc_ib.c
··· 662 662 /* create a queue pair within the protection domain for a link */ 663 663 int smc_ib_create_queue_pair(struct smc_link *lnk) 664 664 { 665 - int sges_per_buf = (lnk->lgr->smc_version == SMC_V2) ? 2 : 1; 666 665 struct ib_qp_init_attr qp_attr = { 667 666 .event_handler = smc_ib_qp_event_handler, 668 667 .qp_context = lnk, ··· 675 676 .max_send_wr = SMC_WR_BUF_CNT * 3, 676 677 .max_recv_wr = SMC_WR_BUF_CNT * 3, 677 678 .max_send_sge = SMC_IB_MAX_SEND_SGE, 678 - .max_recv_sge = sges_per_buf, 679 + .max_recv_sge = lnk->wr_rx_sge_cnt, 679 680 .max_inline_data = 0, 680 681 }, 681 682 .sq_sig_type = IB_SIGNAL_REQ_WR,
+15 -6
net/smc/smc_llc.c
··· 997 997 } 998 998 999 999 static void smc_llc_save_add_link_rkeys(struct smc_link *link, 1000 - struct smc_link *link_new) 1000 + struct smc_link *link_new, 1001 + u8 *llc_msg) 1001 1002 { 1002 1003 struct smc_llc_msg_add_link_v2_ext *ext; 1003 1004 struct smc_link_group *lgr = link->lgr; 1004 1005 int max, i; 1005 1006 1006 - ext = (struct smc_llc_msg_add_link_v2_ext *)((u8 *)lgr->wr_rx_buf_v2 + 1007 + ext = (struct smc_llc_msg_add_link_v2_ext *)(llc_msg + 1007 1008 SMC_WR_TX_SIZE); 1008 1009 max = min_t(u8, ext->num_rkeys, SMC_LLC_RKEYS_PER_MSG_V2); 1009 1010 down_write(&lgr->rmbs_lock); ··· 1099 1098 if (rc) 1100 1099 goto out_clear_lnk; 1101 1100 if (lgr->smc_version == SMC_V2) { 1102 - smc_llc_save_add_link_rkeys(link, lnk_new); 1101 + u8 *llc_msg = smc_link_shared_v2_rxbuf(link) ? 1102 + (u8 *)lgr->wr_rx_buf_v2 : (u8 *)llc; 1103 + smc_llc_save_add_link_rkeys(link, lnk_new, llc_msg); 1103 1104 } else { 1104 1105 rc = smc_llc_cli_rkey_exchange(link, lnk_new); 1105 1106 if (rc) { ··· 1501 1498 if (rc) 1502 1499 goto out_err; 1503 1500 if (lgr->smc_version == SMC_V2) { 1504 - smc_llc_save_add_link_rkeys(link, link_new); 1501 + u8 *llc_msg = smc_link_shared_v2_rxbuf(link) ? 1502 + (u8 *)lgr->wr_rx_buf_v2 : (u8 *)add_llc; 1503 + smc_llc_save_add_link_rkeys(link, link_new, llc_msg); 1505 1504 } else { 1506 1505 rc = smc_llc_srv_rkey_exchange(link, link_new); 1507 1506 if (rc) ··· 1812 1807 if (lgr->smc_version == SMC_V2) { 1813 1808 struct smc_llc_msg_delete_rkey_v2 *llcv2; 1814 1809 1815 - memcpy(lgr->wr_rx_buf_v2, llc, sizeof(*llc)); 1816 - llcv2 = (struct smc_llc_msg_delete_rkey_v2 *)lgr->wr_rx_buf_v2; 1810 + if (smc_link_shared_v2_rxbuf(link)) { 1811 + memcpy(lgr->wr_rx_buf_v2, llc, sizeof(*llc)); 1812 + llcv2 = (struct smc_llc_msg_delete_rkey_v2 *)lgr->wr_rx_buf_v2; 1813 + } else { 1814 + llcv2 = (struct smc_llc_msg_delete_rkey_v2 *)llc; 1815 + } 1817 1816 llcv2->num_inval_rkeys = 0; 1818 1817 1819 1818 max = min_t(u8, llcv2->num_rkeys, SMC_LLC_RKEYS_PER_MSG_V2);
+21 -21
net/smc/smc_wr.c
··· 439 439 return; /* short message */ 440 440 temp_wr_id = wc->wr_id; 441 441 index = do_div(temp_wr_id, link->wr_rx_cnt); 442 - wr_rx = (struct smc_wr_rx_hdr *)&link->wr_rx_bufs[index]; 442 + wr_rx = (struct smc_wr_rx_hdr *)(link->wr_rx_bufs + index * link->wr_rx_buflen); 443 443 hash_for_each_possible(smc_wr_rx_hash, handler, list, wr_rx->type) { 444 444 if (handler->type == wr_rx->type) 445 445 handler->handler(wc, wr_rx); ··· 555 555 556 556 static void smc_wr_init_sge(struct smc_link *lnk) 557 557 { 558 - int sges_per_buf = (lnk->lgr->smc_version == SMC_V2) ? 2 : 1; 559 558 bool send_inline = (lnk->qp_attr.cap.max_inline_data > SMC_WR_TX_SIZE); 560 559 u32 i; 561 560 ··· 607 608 * the larger spillover buffer, allowing easy data mapping. 608 609 */ 609 610 for (i = 0; i < lnk->wr_rx_cnt; i++) { 610 - int x = i * sges_per_buf; 611 + int x = i * lnk->wr_rx_sge_cnt; 611 612 612 613 lnk->wr_rx_sges[x].addr = 613 - lnk->wr_rx_dma_addr + i * SMC_WR_BUF_SIZE; 614 - lnk->wr_rx_sges[x].length = SMC_WR_TX_SIZE; 614 + lnk->wr_rx_dma_addr + i * lnk->wr_rx_buflen; 615 + lnk->wr_rx_sges[x].length = smc_link_shared_v2_rxbuf(lnk) ? 616 + SMC_WR_TX_SIZE : lnk->wr_rx_buflen; 615 617 lnk->wr_rx_sges[x].lkey = lnk->roce_pd->local_dma_lkey; 616 - if (lnk->lgr->smc_version == SMC_V2) { 618 + if (lnk->lgr->smc_version == SMC_V2 && smc_link_shared_v2_rxbuf(lnk)) { 617 619 lnk->wr_rx_sges[x + 1].addr = 618 620 lnk->wr_rx_v2_dma_addr + SMC_WR_TX_SIZE; 619 621 lnk->wr_rx_sges[x + 1].length = ··· 624 624 } 625 625 lnk->wr_rx_ibs[i].next = NULL; 626 626 lnk->wr_rx_ibs[i].sg_list = &lnk->wr_rx_sges[x]; 627 - lnk->wr_rx_ibs[i].num_sge = sges_per_buf; 627 + lnk->wr_rx_ibs[i].num_sge = lnk->wr_rx_sge_cnt; 628 628 } 629 629 lnk->wr_reg.wr.next = NULL; 630 630 lnk->wr_reg.wr.num_sge = 0; ··· 655 655 656 656 if (lnk->wr_rx_dma_addr) { 657 657 ib_dma_unmap_single(ibdev, lnk->wr_rx_dma_addr, 658 - SMC_WR_BUF_SIZE * lnk->wr_rx_cnt, 658 + lnk->wr_rx_buflen * lnk->wr_rx_cnt, 659 659 DMA_FROM_DEVICE); 660 660 lnk->wr_rx_dma_addr = 0; 661 661 } ··· 740 740 741 741 int smc_wr_alloc_link_mem(struct smc_link *link) 742 742 { 743 - int sges_per_buf = link->lgr->smc_version == SMC_V2 ? 2 : 1; 744 - 745 743 /* allocate link related memory */ 746 744 link->wr_tx_bufs = kcalloc(SMC_WR_BUF_CNT, SMC_WR_BUF_SIZE, GFP_KERNEL); 747 745 if (!link->wr_tx_bufs) 748 746 goto no_mem; 749 - link->wr_rx_bufs = kcalloc(SMC_WR_BUF_CNT * 3, SMC_WR_BUF_SIZE, 747 + link->wr_rx_bufs = kcalloc(SMC_WR_BUF_CNT * 3, link->wr_rx_buflen, 750 748 GFP_KERNEL); 751 749 if (!link->wr_rx_bufs) 752 750 goto no_mem_wr_tx_bufs; ··· 772 774 if (!link->wr_tx_sges) 773 775 goto no_mem_wr_tx_rdma_sges; 774 776 link->wr_rx_sges = kcalloc(SMC_WR_BUF_CNT * 3, 775 - sizeof(link->wr_rx_sges[0]) * sges_per_buf, 777 + sizeof(link->wr_rx_sges[0]) * link->wr_rx_sge_cnt, 776 778 GFP_KERNEL); 777 779 if (!link->wr_rx_sges) 778 780 goto no_mem_wr_tx_sges; ··· 870 872 smc_wr_tx_set_wr_id(&lnk->wr_tx_id, 0); 871 873 lnk->wr_rx_id = 0; 872 874 lnk->wr_rx_dma_addr = ib_dma_map_single( 873 - ibdev, lnk->wr_rx_bufs, SMC_WR_BUF_SIZE * lnk->wr_rx_cnt, 875 + ibdev, lnk->wr_rx_bufs, lnk->wr_rx_buflen * lnk->wr_rx_cnt, 874 876 DMA_FROM_DEVICE); 875 877 if (ib_dma_mapping_error(ibdev, lnk->wr_rx_dma_addr)) { 876 878 lnk->wr_rx_dma_addr = 0; ··· 878 880 goto out; 879 881 } 880 882 if (lnk->lgr->smc_version == SMC_V2) { 881 - lnk->wr_rx_v2_dma_addr = ib_dma_map_single(ibdev, 882 - lnk->lgr->wr_rx_buf_v2, SMC_WR_BUF_V2_SIZE, 883 - DMA_FROM_DEVICE); 884 - if (ib_dma_mapping_error(ibdev, lnk->wr_rx_v2_dma_addr)) { 885 - lnk->wr_rx_v2_dma_addr = 0; 886 - rc = -EIO; 887 - goto dma_unmap; 883 + if (smc_link_shared_v2_rxbuf(lnk)) { 884 + lnk->wr_rx_v2_dma_addr = 885 + ib_dma_map_single(ibdev, lnk->lgr->wr_rx_buf_v2, 886 + SMC_WR_BUF_V2_SIZE, DMA_FROM_DEVICE); 887 + if (ib_dma_mapping_error(ibdev, lnk->wr_rx_v2_dma_addr)) { 888 + lnk->wr_rx_v2_dma_addr = 0; 889 + rc = -EIO; 890 + goto dma_unmap; 891 + } 888 892 } 889 893 lnk->wr_tx_v2_dma_addr = ib_dma_map_single(ibdev, 890 894 lnk->lgr->wr_tx_buf_v2, SMC_WR_BUF_V2_SIZE, ··· 935 935 lnk->wr_tx_v2_dma_addr = 0; 936 936 } 937 937 ib_dma_unmap_single(ibdev, lnk->wr_rx_dma_addr, 938 - SMC_WR_BUF_SIZE * lnk->wr_rx_cnt, 938 + lnk->wr_rx_buflen * lnk->wr_rx_cnt, 939 939 DMA_FROM_DEVICE); 940 940 lnk->wr_rx_dma_addr = 0; 941 941 out: