IB/ehca: Fix problem with generated flush work completions

This fix enables ehca device driver to generate flush work completions
even if the application doesn't request completions for all work
requests. The current implementation of ehca will generate flush work
completions for the wrong work requests if an application uses non
signaled work completions.

Signed-off-by: Stefan Roscher <stefan.roscher@de.ibm.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>

authored by Stefan Roscher and committed by Roland Dreier 7ec4f463 6b1f9d64

+56 -31
+3 -1
drivers/infiniband/hw/ehca/ehca_classes.h
··· 163 163 /* struct for tracking if cqes have been reported to the application */ 164 164 struct ehca_qmap_entry { 165 165 u16 app_wr_id; 166 - u16 reported; 166 + u8 reported; 167 + u8 cqe_req; 167 168 }; 168 169 169 170 struct ehca_queue_map { ··· 172 171 unsigned int entries; 173 172 unsigned int tail; 174 173 unsigned int left_to_poll; 174 + unsigned int next_wqe_idx; /* Idx to first wqe to be flushed */ 175 175 }; 176 176 177 177 struct ehca_qp {
+20 -6
drivers/infiniband/hw/ehca/ehca_qp.c
··· 435 435 { 436 436 int i; 437 437 438 - qmap->tail = 0; 439 - for (i = 0; i < qmap->entries; i++) 438 + qmap->tail = qmap->entries - 1; 439 + qmap->left_to_poll = 0; 440 + qmap->next_wqe_idx = 0; 441 + for (i = 0; i < qmap->entries; i++) { 440 442 qmap->map[i].reported = 1; 443 + qmap->map[i].cqe_req = 0; 444 + } 441 445 } 442 446 443 447 /* ··· 1125 1121 void *wqe_v; 1126 1122 u64 q_ofs; 1127 1123 u32 wqe_idx; 1124 + unsigned int tail_idx; 1128 1125 1129 1126 /* convert real to abs address */ 1130 1127 wqe_p = wqe_p & (~(1UL << 63)); ··· 1138 1133 return -EFAULT; 1139 1134 } 1140 1135 1136 + tail_idx = (qmap->tail + 1) % qmap->entries; 1141 1137 wqe_idx = q_ofs / ipz_queue->qe_size; 1142 - if (wqe_idx < qmap->tail) 1143 - qmap->left_to_poll = (qmap->entries - qmap->tail) + wqe_idx; 1144 - else 1145 - qmap->left_to_poll = wqe_idx - qmap->tail; 1146 1138 1139 + /* check all processed wqes, whether a cqe is requested or not */ 1140 + while (tail_idx != wqe_idx) { 1141 + if (qmap->map[tail_idx].cqe_req) 1142 + qmap->left_to_poll++; 1143 + tail_idx = (tail_idx + 1) % qmap->entries; 1144 + } 1145 + /* save index in queue, where we have to start flushing */ 1146 + qmap->next_wqe_idx = wqe_idx; 1147 1147 return 0; 1148 1148 } 1149 1149 ··· 1195 1185 } else { 1196 1186 spin_lock_irqsave(&my_qp->send_cq->spinlock, flags); 1197 1187 my_qp->sq_map.left_to_poll = 0; 1188 + my_qp->sq_map.next_wqe_idx = (my_qp->sq_map.tail + 1) % 1189 + my_qp->sq_map.entries; 1198 1190 spin_unlock_irqrestore(&my_qp->send_cq->spinlock, flags); 1199 1191 1200 1192 spin_lock_irqsave(&my_qp->recv_cq->spinlock, flags); 1201 1193 my_qp->rq_map.left_to_poll = 0; 1194 + my_qp->rq_map.next_wqe_idx = (my_qp->rq_map.tail + 1) % 1195 + my_qp->rq_map.entries; 1202 1196 spin_unlock_irqrestore(&my_qp->recv_cq->spinlock, flags); 1203 1197 } 1204 1198
+33 -24
drivers/infiniband/hw/ehca/ehca_reqs.c
··· 179 179 180 180 qmap_entry->app_wr_id = get_app_wr_id(send_wr->wr_id); 181 181 qmap_entry->reported = 0; 182 + qmap_entry->cqe_req = 0; 182 183 183 184 switch (send_wr->opcode) { 184 185 case IB_WR_SEND: ··· 204 203 205 204 if ((send_wr->send_flags & IB_SEND_SIGNALED || 206 205 qp->init_attr.sq_sig_type == IB_SIGNAL_ALL_WR) 207 - && !hidden) 206 + && !hidden) { 208 207 wqe_p->wr_flag |= WQE_WRFLAG_REQ_SIGNAL_COM; 208 + qmap_entry->cqe_req = 1; 209 + } 209 210 210 211 if (send_wr->opcode == IB_WR_SEND_WITH_IMM || 211 212 send_wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) { ··· 572 569 qmap_entry = &my_qp->rq_map.map[rq_map_idx]; 573 570 qmap_entry->app_wr_id = get_app_wr_id(cur_recv_wr->wr_id); 574 571 qmap_entry->reported = 0; 572 + qmap_entry->cqe_req = 1; 575 573 576 574 wqe_cnt++; 577 575 } /* eof for cur_recv_wr */ ··· 710 706 goto repoll; 711 707 wc->qp = &my_qp->ib_qp; 712 708 713 - if (is_error) { 714 - /* 715 - * set left_to_poll to 0 because in error state, we will not 716 - * get any additional CQEs 717 - */ 718 - ehca_add_to_err_list(my_qp, 1); 719 - my_qp->sq_map.left_to_poll = 0; 720 - 721 - if (HAS_RQ(my_qp)) 722 - ehca_add_to_err_list(my_qp, 0); 723 - my_qp->rq_map.left_to_poll = 0; 724 - } 725 - 726 709 qmap_tail_idx = get_app_wr_id(cqe->work_request_id); 727 710 if (!(cqe->w_completion_flags & WC_SEND_RECEIVE_BIT)) 728 711 /* We got a send completion. */ ··· 717 726 else 718 727 /* We got a receive completion. */ 719 728 qmap = &my_qp->rq_map; 729 + 730 + /* advance the tail pointer */ 731 + qmap->tail = qmap_tail_idx; 732 + 733 + if (is_error) { 734 + /* 735 + * set left_to_poll to 0 because in error state, we will not 736 + * get any additional CQEs 737 + */ 738 + my_qp->sq_map.next_wqe_idx = (my_qp->sq_map.tail + 1) % 739 + my_qp->sq_map.entries; 740 + my_qp->sq_map.left_to_poll = 0; 741 + ehca_add_to_err_list(my_qp, 1); 742 + 743 + my_qp->rq_map.next_wqe_idx = (my_qp->rq_map.tail + 1) % 744 + my_qp->rq_map.entries; 745 + my_qp->rq_map.left_to_poll = 0; 746 + if (HAS_RQ(my_qp)) 747 + ehca_add_to_err_list(my_qp, 0); 748 + } 720 749 721 750 qmap_entry = &qmap->map[qmap_tail_idx]; 722 751 if (qmap_entry->reported) { ··· 748 737 749 738 wc->wr_id = replace_wr_id(cqe->work_request_id, qmap_entry->app_wr_id); 750 739 qmap_entry->reported = 1; 751 - 752 - /* this is a proper completion, we need to advance the tail pointer */ 753 - if (++qmap->tail == qmap->entries) 754 - qmap->tail = 0; 755 740 756 741 /* if left_to_poll is decremented to 0, add the QP to the error list */ 757 742 if (qmap->left_to_poll > 0) { ··· 812 805 else 813 806 qmap = &my_qp->rq_map; 814 807 815 - qmap_entry = &qmap->map[qmap->tail]; 808 + qmap_entry = &qmap->map[qmap->next_wqe_idx]; 816 809 817 810 while ((nr < num_entries) && (qmap_entry->reported == 0)) { 818 811 /* generate flush CQE */ 812 + 819 813 memset(wc, 0, sizeof(*wc)); 820 814 821 - offset = qmap->tail * ipz_queue->qe_size; 815 + offset = qmap->next_wqe_idx * ipz_queue->qe_size; 822 816 wqe = (struct ehca_wqe *)ipz_qeit_calc(ipz_queue, offset); 823 817 if (!wqe) { 824 818 ehca_err(cq->device, "Invalid wqe offset=%#lx on " ··· 858 850 859 851 wc->qp = &my_qp->ib_qp; 860 852 861 - /* mark as reported and advance tail pointer */ 853 + /* mark as reported and advance next_wqe pointer */ 862 854 qmap_entry->reported = 1; 863 - if (++qmap->tail == qmap->entries) 864 - qmap->tail = 0; 865 - qmap_entry = &qmap->map[qmap->tail]; 855 + qmap->next_wqe_idx++; 856 + if (qmap->next_wqe_idx == qmap->entries) 857 + qmap->next_wqe_idx = 0; 858 + qmap_entry = &qmap->map[qmap->next_wqe_idx]; 866 859 867 860 wc++; nr++; 868 861 }