Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

RDMA/hns: Support DSCP

Add support for DSCP configuration. For DSCP, get dscp-prio mapping
via hns3 nic driver api .get_dscp_prio() and fill the SL (in WQE for
UD or in QPC for RC) with the priority value. The prio-tc mapping is
configured to HW by hns3 nic driver. HW will select a corresponding
TC according to SL and the prio-tc mapping.

Signed-off-by: Junxian Huang <huangjunxian6@hisilicon.com>
Link: https://lore.kernel.org/r/20240315093551.1650088-1-huangjunxian6@hisilicon.com
Signed-off-by: Leon Romanovsky <leon@kernel.org>

authored by

Junxian Huang and committed by
Leon Romanovsky
ee20cc17 2ca7e93b

+115 -29
+21 -10
drivers/infiniband/hw/hns/hns_roce_ah.c
··· 59 59 struct hns_roce_dev *hr_dev = to_hr_dev(ibah->device); 60 60 struct hns_roce_ib_create_ah_resp resp = {}; 61 61 struct hns_roce_ah *ah = to_hr_ah(ibah); 62 - int ret = 0; 63 - u32 max_sl; 62 + u8 tclass = get_tclass(grh); 63 + u8 priority = 0; 64 + u8 tc_mode = 0; 65 + int ret; 64 66 65 67 if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08 && udata) 66 68 return -EOPNOTSUPP; ··· 76 74 ah->av.hop_limit = grh->hop_limit; 77 75 ah->av.flowlabel = grh->flow_label; 78 76 ah->av.udp_sport = get_ah_udp_sport(ah_attr); 79 - ah->av.tclass = get_tclass(grh); 77 + ah->av.tclass = tclass; 80 78 81 - ah->av.sl = rdma_ah_get_sl(ah_attr); 82 - max_sl = min_t(u32, MAX_SERVICE_LEVEL, hr_dev->caps.sl_num - 1); 83 - if (unlikely(ah->av.sl > max_sl)) { 84 - ibdev_err_ratelimited(&hr_dev->ib_dev, 85 - "failed to set sl, sl (%u) shouldn't be larger than %u.\n", 86 - ah->av.sl, max_sl); 79 + ret = hr_dev->hw->get_dscp(hr_dev, tclass, &tc_mode, &priority); 80 + if (ret == -EOPNOTSUPP) 81 + ret = 0; 82 + 83 + if (ret && grh->sgid_attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) 84 + return ret; 85 + 86 + if (tc_mode == HNAE3_TC_MAP_MODE_DSCP && 87 + grh->sgid_attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) 88 + ah->av.sl = priority; 89 + else 90 + ah->av.sl = rdma_ah_get_sl(ah_attr); 91 + 92 + if (!check_sl_valid(hr_dev, ah->av.sl)) 87 93 return -EINVAL; 88 - } 89 94 90 95 memcpy(ah->av.dgid, grh->dgid.raw, HNS_ROCE_GID_SIZE); 91 96 memcpy(ah->av.mac, ah_attr->roce.dmac, ETH_ALEN); ··· 108 99 } 109 100 110 101 if (udata) { 102 + resp.priority = ah->av.sl; 103 + resp.tc_mode = tc_mode; 111 104 memcpy(resp.dmac, ah_attr->roce.dmac, ETH_ALEN); 112 105 ret = ib_copy_to_udata(udata, &resp, 113 106 min(udata->outlen, sizeof(resp)));
+6
drivers/infiniband/hw/hns/hns_roce_device.h
··· 645 645 struct hns_user_mmap_entry *dwqe_mmap_entry; 646 646 u32 config; 647 647 enum hns_roce_cong_type cong_type; 648 + u8 tc_mode; 649 + u8 priority; 648 650 }; 649 651 650 652 struct hns_roce_ib_iboe { ··· 952 950 int (*query_sccc)(struct hns_roce_dev *hr_dev, u32 qpn, void *buffer); 953 951 int (*query_hw_counter)(struct hns_roce_dev *hr_dev, 954 952 u64 *stats, u32 port, int *hw_counters); 953 + int (*get_dscp)(struct hns_roce_dev *hr_dev, u8 dscp, 954 + u8 *tc_mode, u8 *priority); 955 955 const struct ib_device_ops *hns_roce_dev_ops; 956 956 const struct ib_device_ops *hns_roce_dev_srq_ops; 957 957 }; ··· 1296 1292 hns_roce_user_mmap_entry_insert(struct ib_ucontext *ucontext, u64 address, 1297 1293 size_t length, 1298 1294 enum hns_roce_mmap_type mmap_type); 1295 + bool check_sl_valid(struct hns_roce_dev *hr_dev, u8 sl); 1296 + 1299 1297 #endif /* _HNS_ROCE_DEVICE_H */
+67 -18
drivers/infiniband/hw/hns/hns_roce_hw_v2.c
··· 443 443 hr_reg_write(ud_sq_wqe, UD_SEND_WQE_HOPLIMIT, ah->av.hop_limit); 444 444 hr_reg_write(ud_sq_wqe, UD_SEND_WQE_TCLASS, ah->av.tclass); 445 445 hr_reg_write(ud_sq_wqe, UD_SEND_WQE_FLOW_LABEL, ah->av.flowlabel); 446 - 447 - if (WARN_ON(ah->av.sl > MAX_SERVICE_LEVEL)) 448 - return -EINVAL; 449 - 450 446 hr_reg_write(ud_sq_wqe, UD_SEND_WQE_SL, ah->av.sl); 451 447 452 448 ud_sq_wqe->sgid_index = ah->av.gid_index; ··· 4824 4828 return 0; 4825 4829 } 4826 4830 4831 + static int hns_roce_hw_v2_get_dscp(struct hns_roce_dev *hr_dev, u8 dscp, 4832 + u8 *tc_mode, u8 *priority) 4833 + { 4834 + struct hns_roce_v2_priv *priv = hr_dev->priv; 4835 + struct hnae3_handle *handle = priv->handle; 4836 + const struct hnae3_ae_ops *ops = handle->ae_algo->ops; 4837 + 4838 + if (!ops->get_dscp_prio) 4839 + return -EOPNOTSUPP; 4840 + 4841 + return ops->get_dscp_prio(handle, dscp, tc_mode, priority); 4842 + } 4843 + 4844 + bool check_sl_valid(struct hns_roce_dev *hr_dev, u8 sl) 4845 + { 4846 + u32 max_sl; 4847 + 4848 + max_sl = min_t(u32, MAX_SERVICE_LEVEL, hr_dev->caps.sl_num - 1); 4849 + if (unlikely(sl > max_sl)) { 4850 + ibdev_err_ratelimited(&hr_dev->ib_dev, 4851 + "failed to set SL(%u). Shouldn't be larger than %u.\n", 4852 + sl, max_sl); 4853 + return false; 4854 + } 4855 + 4856 + return true; 4857 + } 4858 + 4859 + static int hns_roce_set_sl(struct ib_qp *ibqp, 4860 + const struct ib_qp_attr *attr, 4861 + struct hns_roce_v2_qp_context *context, 4862 + struct hns_roce_v2_qp_context *qpc_mask) 4863 + { 4864 + const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr); 4865 + struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); 4866 + struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); 4867 + struct ib_device *ibdev = &hr_dev->ib_dev; 4868 + int ret; 4869 + 4870 + ret = hns_roce_hw_v2_get_dscp(hr_dev, get_tclass(&attr->ah_attr.grh), 4871 + &hr_qp->tc_mode, &hr_qp->priority); 4872 + if (ret && ret != -EOPNOTSUPP && 4873 + grh->sgid_attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) { 4874 + ibdev_err_ratelimited(ibdev, 4875 + "failed to get dscp, ret = %d.\n", ret); 4876 + return ret; 4877 + } 4878 + 4879 + if (hr_qp->tc_mode == HNAE3_TC_MAP_MODE_DSCP && 4880 + grh->sgid_attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) 4881 + hr_qp->sl = hr_qp->priority; 4882 + else 4883 + hr_qp->sl = rdma_ah_get_sl(&attr->ah_attr); 4884 + 4885 + if (!check_sl_valid(hr_dev, hr_qp->sl)) 4886 + return -EINVAL; 4887 + 4888 + hr_reg_write(context, QPC_SL, hr_qp->sl); 4889 + hr_reg_clear(qpc_mask, QPC_SL); 4890 + 4891 + return 0; 4892 + } 4893 + 4827 4894 static int hns_roce_v2_set_path(struct ib_qp *ibqp, 4828 4895 const struct ib_qp_attr *attr, 4829 4896 int attr_mask, ··· 4902 4843 int is_roce_protocol; 4903 4844 u16 vlan_id = 0xffff; 4904 4845 bool is_udp = false; 4905 - u32 max_sl; 4906 4846 u8 ib_port; 4907 4847 u8 hr_port; 4908 4848 int ret; 4909 - 4910 - max_sl = min_t(u32, MAX_SERVICE_LEVEL, hr_dev->caps.sl_num - 1); 4911 - if (unlikely(sl > max_sl)) { 4912 - ibdev_err_ratelimited(ibdev, 4913 - "failed to fill QPC, sl (%u) shouldn't be larger than %u.\n", 4914 - sl, max_sl); 4915 - return -EINVAL; 4916 - } 4917 4849 4918 4850 /* 4919 4851 * If free_mr_en of qp is set, it means that this qp comes from ··· 4912 4862 * In the loopback scenario, only sl needs to be set. 4913 4863 */ 4914 4864 if (hr_qp->free_mr_en) { 4865 + if (!check_sl_valid(hr_dev, sl)) 4866 + return -EINVAL; 4915 4867 hr_reg_write(context, QPC_SL, sl); 4916 4868 hr_reg_clear(qpc_mask, QPC_SL); 4917 4869 hr_qp->sl = sl; ··· 4983 4931 memcpy(context->dgid, grh->dgid.raw, sizeof(grh->dgid.raw)); 4984 4932 memset(qpc_mask->dgid, 0, sizeof(grh->dgid.raw)); 4985 4933 4986 - hr_qp->sl = sl; 4987 - hr_reg_write(context, QPC_SL, hr_qp->sl); 4988 - hr_reg_clear(qpc_mask, QPC_SL); 4989 - 4990 - return 0; 4934 + return hns_roce_set_sl(ibqp, attr, context, qpc_mask); 4991 4935 } 4992 4936 4993 4937 static bool check_qp_state(enum ib_qp_state cur_state, ··· 6783 6735 .query_srqc = hns_roce_v2_query_srqc, 6784 6736 .query_sccc = hns_roce_v2_query_sccc, 6785 6737 .query_hw_counter = hns_roce_hw_v2_query_counter, 6738 + .get_dscp = hns_roce_hw_v2_get_dscp, 6786 6739 .hns_roce_dev_ops = &hns_roce_v2_dev_ops, 6787 6740 .hns_roce_dev_srq_ops = &hns_roce_v2_dev_srq_ops, 6788 6741 };
+13
drivers/infiniband/hw/hns/hns_roce_qp.c
··· 1386 1386 int attr_mask, struct ib_udata *udata) 1387 1387 { 1388 1388 struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); 1389 + struct hns_roce_ib_modify_qp_resp resp = {}; 1389 1390 struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); 1390 1391 enum ib_qp_state cur_state, new_state; 1391 1392 int ret = -EINVAL; ··· 1428 1427 1429 1428 ret = hr_dev->hw->modify_qp(ibqp, attr, attr_mask, cur_state, 1430 1429 new_state, udata); 1430 + if (ret) 1431 + goto out; 1432 + 1433 + if (udata && udata->outlen) { 1434 + resp.tc_mode = hr_qp->tc_mode; 1435 + resp.priority = hr_qp->sl; 1436 + ret = ib_copy_to_udata(udata, &resp, 1437 + min(udata->outlen, sizeof(resp))); 1438 + if (ret) 1439 + ibdev_err_ratelimited(&hr_dev->ib_dev, 1440 + "failed to copy modify qp resp.\n"); 1441 + } 1431 1442 1432 1443 out: 1433 1444 mutex_unlock(&hr_qp->mutex);
+8 -1
include/uapi/rdma/hns-abi.h
··· 109 109 __aligned_u64 dwqe_mmap_key; 110 110 }; 111 111 112 + struct hns_roce_ib_modify_qp_resp { 113 + __u8 tc_mode; 114 + __u8 priority; 115 + __u8 reserved[6]; 116 + }; 117 + 112 118 enum { 113 119 HNS_ROCE_EXSGE_FLAGS = 1 << 0, 114 120 HNS_ROCE_RQ_INLINE_FLAGS = 1 << 1, ··· 149 143 150 144 struct hns_roce_ib_create_ah_resp { 151 145 __u8 dmac[6]; 152 - __u8 reserved[2]; 146 + __u8 priority; 147 + __u8 tc_mode; 153 148 }; 154 149 155 150 #endif /* HNS_ABI_USER_H */