Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

RDMA/hns: Support flush cqe for hip08 in kernel space

According to IB protocol, there are some cases that work requests must
return the flush error completion status through the completion queue. Due
to hardware limitation, the driver needs to assist the flush process.

This patch adds the support of flush cqe for hip08 in the cases that
needed, such as poll cqe, post send, post recv and aeqe handle.

The patch also considered the compatibility between kernel and user space.

Signed-off-by: Yixian Liu <liuyixian@huawei.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>

authored by

Yixian Liu and committed by
Jason Gunthorpe
0425e3e6 75da9606

+241 -20
+2
drivers/infiniband/hw/hns/hns_roce_db.c
··· 41 41 found: 42 42 db->dma = sg_dma_address(page->umem->sg_head.sgl) + 43 43 (virt & ~PAGE_MASK); 44 + page->umem->sg_head.sgl->offset = virt & ~PAGE_MASK; 45 + db->virt_addr = sg_virt(page->umem->sg_head.sgl); 44 46 db->u.user_page = page; 45 47 refcount_inc(&page->refcount); 46 48
+17 -1
drivers/infiniband/hw/hns/hns_roce_device.h
··· 110 110 111 111 enum { 112 112 HNS_ROCE_SUPPORT_RQ_RECORD_DB = 1 << 0, 113 + HNS_ROCE_SUPPORT_SQ_RECORD_DB = 1 << 1, 113 114 }; 114 115 115 116 enum { ··· 191 190 HNS_ROCE_CAP_FLAG_REREG_MR = BIT(0), 192 191 HNS_ROCE_CAP_FLAG_ROCE_V1_V2 = BIT(1), 193 192 HNS_ROCE_CAP_FLAG_RQ_INLINE = BIT(2), 194 - HNS_ROCE_CAP_FLAG_RECORD_DB = BIT(3) 193 + HNS_ROCE_CAP_FLAG_RECORD_DB = BIT(3), 194 + HNS_ROCE_CAP_FLAG_SQ_RECORD_DB = BIT(4), 195 195 }; 196 196 197 197 enum hns_roce_mtt_type { ··· 387 385 struct hns_roce_user_db_page *user_page; 388 386 } u; 389 387 dma_addr_t dma; 388 + void *virt_addr; 390 389 int index; 391 390 int order; 392 391 }; ··· 527 524 struct hns_roce_buf hr_buf; 528 525 struct hns_roce_wq rq; 529 526 struct hns_roce_db rdb; 527 + struct hns_roce_db sdb; 530 528 u8 rdb_en; 529 + u8 sdb_en; 531 530 u32 doorbell_qpn; 532 531 __le32 sq_signal_bits; 533 532 u32 sq_next_wqe; ··· 646 641 int shift; 647 642 dma_addr_t cur_eqe_ba; 648 643 dma_addr_t nxt_eqe_ba; 644 + int event_type; 645 + int sub_type; 649 646 }; 650 647 651 648 struct hns_roce_eq_table { ··· 732 725 u32 tpq_buf_pg_sz; 733 726 u32 chunk_sz; /* chunk size in non multihop mode*/ 734 727 u64 flags; 728 + }; 729 + 730 + struct hns_roce_work { 731 + struct hns_roce_dev *hr_dev; 732 + struct work_struct work; 733 + u32 qpn; 734 + int event_type; 735 + int sub_type; 735 736 }; 736 737 737 738 struct hns_roce_hw { ··· 834 819 u32 tptr_size; /*only for hw v1*/ 835 820 const struct hns_roce_hw *hw; 836 821 void *priv; 822 + struct workqueue_struct *irq_workq; 837 823 }; 838 824 839 825 static inline struct hns_roce_dev *to_hr_dev(struct ib_device *ib_dev)
+173 -17
drivers/infiniband/hw/hns/hns_roce_hw_v2.c
··· 165 165 return 0; 166 166 } 167 167 168 + static int hns_roce_v2_modify_qp(struct ib_qp *ibqp, 169 + const struct ib_qp_attr *attr, 170 + int attr_mask, enum ib_qp_state cur_state, 171 + enum ib_qp_state new_state); 172 + 168 173 static int hns_roce_v2_post_send(struct ib_qp *ibqp, 169 174 const struct ib_send_wr *wr, 170 175 const struct ib_send_wr **bad_wr) ··· 181 176 struct hns_roce_qp *qp = to_hr_qp(ibqp); 182 177 struct device *dev = hr_dev->dev; 183 178 struct hns_roce_v2_db sq_db; 179 + struct ib_qp_attr attr; 184 180 unsigned int sge_ind = 0; 185 181 unsigned int owner_bit; 186 182 unsigned long flags; 187 183 unsigned int ind; 188 184 void *wqe = NULL; 189 185 bool loopback; 186 + int attr_mask; 190 187 u32 tmp_len; 191 188 int ret = 0; 192 189 u8 *smac; ··· 531 524 532 525 qp->sq_next_wqe = ind; 533 526 qp->next_sge = sge_ind; 527 + 528 + if (qp->state == IB_QPS_ERR) { 529 + attr_mask = IB_QP_STATE; 530 + attr.qp_state = IB_QPS_ERR; 531 + 532 + ret = hns_roce_v2_modify_qp(&qp->ibqp, &attr, attr_mask, 533 + qp->state, IB_QPS_ERR); 534 + if (ret) { 535 + spin_unlock_irqrestore(&qp->sq.lock, flags); 536 + *bad_wr = wr; 537 + return ret; 538 + } 539 + } 534 540 } 535 541 536 542 spin_unlock_irqrestore(&qp->sq.lock, flags); ··· 560 540 struct hns_roce_v2_wqe_data_seg *dseg; 561 541 struct hns_roce_rinl_sge *sge_list; 562 542 struct device *dev = hr_dev->dev; 543 + struct ib_qp_attr attr; 563 544 unsigned long flags; 564 545 void *wqe = NULL; 546 + int attr_mask; 565 547 int ret = 0; 566 548 int nreq; 567 549 int ind; ··· 632 610 wmb(); 633 611 634 612 *hr_qp->rdb.db_record = hr_qp->rq.head & 0xffff; 613 + 614 + if (hr_qp->state == IB_QPS_ERR) { 615 + attr_mask = IB_QP_STATE; 616 + attr.qp_state = IB_QPS_ERR; 617 + 618 + ret = hns_roce_v2_modify_qp(&hr_qp->ibqp, &attr, 619 + attr_mask, hr_qp->state, 620 + IB_QPS_ERR); 621 + if (ret) { 622 + spin_unlock_irqrestore(&hr_qp->rq.lock, flags); 623 + *bad_wr = wr; 624 + return ret; 625 + } 626 + } 635 627 } 636 628 spin_unlock_irqrestore(&hr_qp->rq.lock, flags); 637 629 ··· 1253 1217 caps->flags = HNS_ROCE_CAP_FLAG_REREG_MR | 1254 1218 HNS_ROCE_CAP_FLAG_ROCE_V1_V2 | 1255 1219 HNS_ROCE_CAP_FLAG_RQ_INLINE | 1256 - HNS_ROCE_CAP_FLAG_RECORD_DB; 1220 + HNS_ROCE_CAP_FLAG_RECORD_DB | 1221 + HNS_ROCE_CAP_FLAG_SQ_RECORD_DB; 1257 1222 caps->pkey_table_len[0] = 1; 1258 1223 caps->gid_table_len[0] = HNS_ROCE_V2_GID_INDEX_NUM; 1259 1224 caps->ceqe_depth = HNS_ROCE_V2_COMP_EQE_NUM; ··· 2046 2009 struct hns_roce_v2_cqe *cqe; 2047 2010 struct hns_roce_qp *hr_qp; 2048 2011 struct hns_roce_wq *wq; 2012 + struct ib_qp_attr attr; 2013 + int attr_mask; 2049 2014 int is_send; 2050 2015 u16 wqe_ctr; 2051 2016 u32 opcode; ··· 2134 2095 break; 2135 2096 } 2136 2097 2137 - /* CQE status error, directly return */ 2138 - if (wc->status != IB_WC_SUCCESS) 2098 + /* flush cqe if wc status is error, excluding flush error */ 2099 + if ((wc->status != IB_WC_SUCCESS) && 2100 + (wc->status != IB_WC_WR_FLUSH_ERR)) { 2101 + attr_mask = IB_QP_STATE; 2102 + attr.qp_state = IB_QPS_ERR; 2103 + return hns_roce_v2_modify_qp(&(*cur_qp)->ibqp, 2104 + &attr, attr_mask, 2105 + (*cur_qp)->state, IB_QPS_ERR); 2106 + } 2107 + 2108 + if (wc->status == IB_WC_WR_FLUSH_ERR) 2139 2109 return 0; 2140 2110 2141 2111 if (is_send) { ··· 3498 3450 goto out; 3499 3451 } 3500 3452 3453 + /* When QP state is err, SQ and RQ WQE should be flushed */ 3454 + if (new_state == IB_QPS_ERR) { 3455 + roce_set_field(context->byte_160_sq_ci_pi, 3456 + V2_QPC_BYTE_160_SQ_PRODUCER_IDX_M, 3457 + V2_QPC_BYTE_160_SQ_PRODUCER_IDX_S, 3458 + hr_qp->sq.head); 3459 + roce_set_field(qpc_mask->byte_160_sq_ci_pi, 3460 + V2_QPC_BYTE_160_SQ_PRODUCER_IDX_M, 3461 + V2_QPC_BYTE_160_SQ_PRODUCER_IDX_S, 0); 3462 + roce_set_field(context->byte_84_rq_ci_pi, 3463 + V2_QPC_BYTE_84_RQ_PRODUCER_IDX_M, 3464 + V2_QPC_BYTE_84_RQ_PRODUCER_IDX_S, 3465 + hr_qp->rq.head); 3466 + roce_set_field(qpc_mask->byte_84_rq_ci_pi, 3467 + V2_QPC_BYTE_84_RQ_PRODUCER_IDX_M, 3468 + V2_QPC_BYTE_84_RQ_PRODUCER_IDX_S, 0); 3469 + } 3470 + 3501 3471 if (attr_mask & IB_QP_AV) { 3502 3472 const struct ib_global_route *grh = 3503 3473 rdma_ah_read_grh(&attr->ah_attr); ··· 3872 3806 hns_roce_mtt_cleanup(hr_dev, &hr_qp->mtt); 3873 3807 3874 3808 if (is_user) { 3809 + if (hr_qp->sq.wqe_cnt && (hr_qp->sdb_en == 1)) 3810 + hns_roce_db_unmap_user( 3811 + to_hr_ucontext(hr_qp->ibqp.uobject->context), 3812 + &hr_qp->sdb); 3813 + 3875 3814 if (hr_qp->rq.wqe_cnt && (hr_qp->rdb_en == 1)) 3876 3815 hns_roce_db_unmap_user( 3877 3816 to_hr_ucontext(hr_qp->ibqp.uobject->context), ··· 3957 3886 dev_err(hr_dev->dev, "MODIFY CQ Failed to cmd mailbox.\n"); 3958 3887 3959 3888 return ret; 3889 + } 3890 + 3891 + static void hns_roce_set_qps_to_err(struct hns_roce_dev *hr_dev, u32 qpn) 3892 + { 3893 + struct hns_roce_qp *hr_qp; 3894 + struct ib_qp_attr attr; 3895 + int attr_mask; 3896 + int ret; 3897 + 3898 + hr_qp = __hns_roce_qp_lookup(hr_dev, qpn); 3899 + if (!hr_qp) { 3900 + dev_warn(hr_dev->dev, "no hr_qp can be found!\n"); 3901 + return; 3902 + } 3903 + 3904 + if (hr_qp->ibqp.uobject) { 3905 + if (hr_qp->sdb_en == 1) { 3906 + hr_qp->sq.head = *(int *)(hr_qp->sdb.virt_addr); 3907 + hr_qp->rq.head = *(int *)(hr_qp->rdb.virt_addr); 3908 + } else { 3909 + dev_warn(hr_dev->dev, "flush cqe is unsupported in userspace!\n"); 3910 + return; 3911 + } 3912 + } 3913 + 3914 + attr_mask = IB_QP_STATE; 3915 + attr.qp_state = IB_QPS_ERR; 3916 + ret = hns_roce_v2_modify_qp(&hr_qp->ibqp, &attr, attr_mask, 3917 + hr_qp->state, IB_QPS_ERR); 3918 + if (ret) 3919 + dev_err(hr_dev->dev, "failed to modify qp %d to err state.\n", 3920 + qpn); 3921 + } 3922 + 3923 + static void hns_roce_irq_work_handle(struct work_struct *work) 3924 + { 3925 + struct hns_roce_work *irq_work = 3926 + container_of(work, struct hns_roce_work, work); 3927 + u32 qpn = irq_work->qpn; 3928 + 3929 + switch (irq_work->event_type) { 3930 + case HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR: 3931 + case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR: 3932 + case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR: 3933 + hns_roce_set_qps_to_err(irq_work->hr_dev, qpn); 3934 + break; 3935 + default: 3936 + break; 3937 + } 3938 + 3939 + kfree(irq_work); 3940 + } 3941 + 3942 + static void hns_roce_v2_init_irq_work(struct hns_roce_dev *hr_dev, 3943 + struct hns_roce_eq *eq, u32 qpn) 3944 + { 3945 + struct hns_roce_work *irq_work; 3946 + 3947 + irq_work = kzalloc(sizeof(struct hns_roce_work), GFP_ATOMIC); 3948 + if (!irq_work) 3949 + return; 3950 + 3951 + INIT_WORK(&(irq_work->work), hns_roce_irq_work_handle); 3952 + irq_work->hr_dev = hr_dev; 3953 + irq_work->qpn = qpn; 3954 + irq_work->event_type = eq->event_type; 3955 + irq_work->sub_type = eq->sub_type; 3956 + queue_work(hr_dev->irq_workq, &(irq_work->work)); 3960 3957 } 3961 3958 3962 3959 static void set_eq_cons_index_v2(struct hns_roce_eq *eq) ··· 4129 3990 4130 3991 static void hns_roce_v2_qp_err_handle(struct hns_roce_dev *hr_dev, 4131 3992 struct hns_roce_aeqe *aeqe, 4132 - int event_type) 3993 + int event_type, u32 qpn) 4133 3994 { 4134 3995 struct device *dev = hr_dev->dev; 4135 - u32 qpn; 4136 - 4137 - qpn = roce_get_field(aeqe->event.qp_event.qp, 4138 - HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_M, 4139 - HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_S); 4140 3996 4141 3997 switch (event_type) { 4142 3998 case HNS_ROCE_EVENT_TYPE_COMM_EST: ··· 4158 4024 4159 4025 static void hns_roce_v2_cq_err_handle(struct hns_roce_dev *hr_dev, 4160 4026 struct hns_roce_aeqe *aeqe, 4161 - int event_type) 4027 + int event_type, u32 cqn) 4162 4028 { 4163 4029 struct device *dev = hr_dev->dev; 4164 - u32 cqn; 4165 - 4166 - cqn = roce_get_field(aeqe->event.cq_event.cq, 4167 - HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_M, 4168 - HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_S); 4169 4030 4170 4031 switch (event_type) { 4171 4032 case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR: ··· 4225 4096 struct hns_roce_aeqe *aeqe; 4226 4097 int aeqe_found = 0; 4227 4098 int event_type; 4099 + int sub_type; 4100 + u32 qpn; 4101 + u32 cqn; 4228 4102 4229 4103 while ((aeqe = next_aeqe_sw_v2(eq))) { 4230 4104 ··· 4239 4107 event_type = roce_get_field(aeqe->asyn, 4240 4108 HNS_ROCE_V2_AEQE_EVENT_TYPE_M, 4241 4109 HNS_ROCE_V2_AEQE_EVENT_TYPE_S); 4110 + sub_type = roce_get_field(aeqe->asyn, 4111 + HNS_ROCE_V2_AEQE_SUB_TYPE_M, 4112 + HNS_ROCE_V2_AEQE_SUB_TYPE_S); 4113 + qpn = roce_get_field(aeqe->event.qp_event.qp, 4114 + HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_M, 4115 + HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_S); 4116 + cqn = roce_get_field(aeqe->event.cq_event.cq, 4117 + HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_M, 4118 + HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_S); 4242 4119 4243 4120 switch (event_type) { 4244 4121 case HNS_ROCE_EVENT_TYPE_PATH_MIG: ··· 4261 4120 case HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR: 4262 4121 case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR: 4263 4122 case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR: 4264 - hns_roce_v2_qp_err_handle(hr_dev, aeqe, event_type); 4123 + hns_roce_v2_qp_err_handle(hr_dev, aeqe, event_type, 4124 + qpn); 4265 4125 break; 4266 4126 case HNS_ROCE_EVENT_TYPE_SRQ_LIMIT_REACH: 4267 4127 case HNS_ROCE_EVENT_TYPE_SRQ_LAST_WQE_REACH: ··· 4271 4129 break; 4272 4130 case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR: 4273 4131 case HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW: 4274 - hns_roce_v2_cq_err_handle(hr_dev, aeqe, event_type); 4132 + hns_roce_v2_cq_err_handle(hr_dev, aeqe, event_type, 4133 + cqn); 4275 4134 break; 4276 4135 case HNS_ROCE_EVENT_TYPE_DB_OVERFLOW: 4277 4136 dev_warn(dev, "DB overflow.\n"); ··· 4295 4152 break; 4296 4153 }; 4297 4154 4155 + eq->event_type = event_type; 4156 + eq->sub_type = sub_type; 4298 4157 ++eq->cons_index; 4299 4158 aeqe_found = 1; 4300 4159 ··· 4304 4159 dev_warn(dev, "cons_index overflow, set back to 0.\n"); 4305 4160 eq->cons_index = 0; 4306 4161 } 4162 + hns_roce_v2_init_irq_work(hr_dev, eq, qpn); 4307 4163 } 4308 4164 4309 4165 set_eq_cons_index_v2(eq); ··· 5121 4975 } 5122 4976 } 5123 4977 4978 + hr_dev->irq_workq = 4979 + create_singlethread_workqueue("hns_roce_irq_workqueue"); 4980 + if (!hr_dev->irq_workq) { 4981 + dev_err(dev, "Create irq workqueue failed!\n"); 4982 + goto err_request_irq_fail; 4983 + } 4984 + 5124 4985 return 0; 5125 4986 5126 4987 err_request_irq_fail: ··· 5178 5025 kfree(hr_dev->irq_names[i]); 5179 5026 5180 5027 kfree(eq_table->eq); 5028 + 5029 + flush_workqueue(hr_dev->irq_workq); 5030 + destroy_workqueue(hr_dev->irq_workq); 5181 5031 } 5182 5032 5183 5033 static const struct hns_roce_hw hns_roce_hw_v2 = {
+48 -2
drivers/infiniband/hw/hns/hns_roce_qp.c
··· 489 489 return 0; 490 490 } 491 491 492 + static int hns_roce_qp_has_sq(struct ib_qp_init_attr *attr) 493 + { 494 + if (attr->qp_type == IB_QPT_XRC_TGT) 495 + return 0; 496 + 497 + return 1; 498 + } 499 + 492 500 static int hns_roce_qp_has_rq(struct ib_qp_init_attr *attr) 493 501 { 494 502 if (attr->qp_type == IB_QPT_XRC_INI || ··· 621 613 goto err_mtt; 622 614 } 623 615 616 + if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SQ_RECORD_DB) && 617 + (udata->inlen >= sizeof(ucmd)) && 618 + (udata->outlen >= sizeof(resp)) && 619 + hns_roce_qp_has_sq(init_attr)) { 620 + ret = hns_roce_db_map_user( 621 + to_hr_ucontext(ib_pd->uobject->context), 622 + ucmd.sdb_addr, &hr_qp->sdb); 623 + if (ret) { 624 + dev_err(dev, "sq record doorbell map failed!\n"); 625 + goto err_mtt; 626 + } 627 + 628 + /* indicate kernel supports sq record db */ 629 + resp.cap_flags |= HNS_ROCE_SUPPORT_SQ_RECORD_DB; 630 + hr_qp->sdb_en = 1; 631 + } 632 + 624 633 if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) && 625 634 (udata->outlen >= sizeof(resp)) && 626 635 hns_roce_qp_has_rq(init_attr)) { ··· 646 621 ucmd.db_addr, &hr_qp->rdb); 647 622 if (ret) { 648 623 dev_err(dev, "rq record doorbell map failed!\n"); 649 - goto err_mtt; 624 + goto err_sq_dbmap; 650 625 } 651 626 } 652 627 } else { ··· 759 734 if (ib_pd->uobject && (udata->outlen >= sizeof(resp)) && 760 735 (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB)) { 761 736 762 - /* indicate kernel supports record db */ 737 + /* indicate kernel supports rq record db */ 763 738 resp.cap_flags |= HNS_ROCE_SUPPORT_RQ_RECORD_DB; 764 739 ret = ib_copy_to_udata(udata, &resp, sizeof(resp)); 765 740 if (ret) ··· 794 769 kfree(hr_qp->sq.wrid); 795 770 kfree(hr_qp->rq.wrid); 796 771 } 772 + 773 + err_sq_dbmap: 774 + if (ib_pd->uobject) 775 + if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SQ_RECORD_DB) && 776 + (udata->inlen >= sizeof(ucmd)) && 777 + (udata->outlen >= sizeof(resp)) && 778 + hns_roce_qp_has_sq(init_attr)) 779 + hns_roce_db_unmap_user( 780 + to_hr_ucontext(ib_pd->uobject->context), 781 + &hr_qp->sdb); 797 782 798 783 err_mtt: 799 784 hns_roce_mtt_cleanup(hr_dev, &hr_qp->mtt); ··· 937 902 attr->cur_qp_state : (enum ib_qp_state)hr_qp->state; 938 903 new_state = attr_mask & IB_QP_STATE ? 939 904 attr->qp_state : cur_state; 905 + 906 + if (ibqp->uobject && 907 + (attr_mask & IB_QP_STATE) && new_state == IB_QPS_ERR) { 908 + if (hr_qp->sdb_en == 1) { 909 + hr_qp->sq.head = *(int *)(hr_qp->sdb.virt_addr); 910 + hr_qp->rq.head = *(int *)(hr_qp->rdb.virt_addr); 911 + } else { 912 + dev_warn(dev, "flush cqe is not supported in userspace!\n"); 913 + goto out; 914 + } 915 + } 940 916 941 917 if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask, 942 918 IB_LINK_LAYER_ETHERNET)) {
+1
include/uapi/rdma/hns-abi.h
··· 53 53 __u8 log_sq_stride; 54 54 __u8 sq_no_prefetch; 55 55 __u8 reserved[5]; 56 + __aligned_u64 sdb_addr; 56 57 }; 57 58 58 59 struct hns_roce_ib_create_qp_resp {