IB/ehca: Fix problem with generated flush work completions

This fix enables ehca device driver to generate flush work completions
even if the application doesn't request completions for all work
requests. The current implementation of ehca will generate flush work
completions for the wrong work requests if an application uses non
signaled work completions.

Signed-off-by: Stefan Roscher <stefan.roscher@de.ibm.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>

authored by Stefan Roscher and committed by Roland Dreier 7ec4f463 6b1f9d64

+56 -31
+3 -1
drivers/infiniband/hw/ehca/ehca_classes.h
··· 163 /* struct for tracking if cqes have been reported to the application */ 164 struct ehca_qmap_entry { 165 u16 app_wr_id; 166 - u16 reported; 167 }; 168 169 struct ehca_queue_map { ··· 172 unsigned int entries; 173 unsigned int tail; 174 unsigned int left_to_poll; 175 }; 176 177 struct ehca_qp {
··· 163 /* struct for tracking if cqes have been reported to the application */ 164 struct ehca_qmap_entry { 165 u16 app_wr_id; 166 + u8 reported; 167 + u8 cqe_req; 168 }; 169 170 struct ehca_queue_map { ··· 171 unsigned int entries; 172 unsigned int tail; 173 unsigned int left_to_poll; 174 + unsigned int next_wqe_idx; /* Idx to first wqe to be flushed */ 175 }; 176 177 struct ehca_qp {
+20 -6
drivers/infiniband/hw/ehca/ehca_qp.c
··· 435 { 436 int i; 437 438 - qmap->tail = 0; 439 - for (i = 0; i < qmap->entries; i++) 440 qmap->map[i].reported = 1; 441 } 442 443 /* ··· 1125 void *wqe_v; 1126 u64 q_ofs; 1127 u32 wqe_idx; 1128 1129 /* convert real to abs address */ 1130 wqe_p = wqe_p & (~(1UL << 63)); ··· 1138 return -EFAULT; 1139 } 1140 1141 wqe_idx = q_ofs / ipz_queue->qe_size; 1142 - if (wqe_idx < qmap->tail) 1143 - qmap->left_to_poll = (qmap->entries - qmap->tail) + wqe_idx; 1144 - else 1145 - qmap->left_to_poll = wqe_idx - qmap->tail; 1146 1147 return 0; 1148 } 1149 ··· 1195 } else { 1196 spin_lock_irqsave(&my_qp->send_cq->spinlock, flags); 1197 my_qp->sq_map.left_to_poll = 0; 1198 spin_unlock_irqrestore(&my_qp->send_cq->spinlock, flags); 1199 1200 spin_lock_irqsave(&my_qp->recv_cq->spinlock, flags); 1201 my_qp->rq_map.left_to_poll = 0; 1202 spin_unlock_irqrestore(&my_qp->recv_cq->spinlock, flags); 1203 } 1204
··· 435 { 436 int i; 437 438 + qmap->tail = qmap->entries - 1; 439 + qmap->left_to_poll = 0; 440 + qmap->next_wqe_idx = 0; 441 + for (i = 0; i < qmap->entries; i++) { 442 qmap->map[i].reported = 1; 443 + qmap->map[i].cqe_req = 0; 444 + } 445 } 446 447 /* ··· 1121 void *wqe_v; 1122 u64 q_ofs; 1123 u32 wqe_idx; 1124 + unsigned int tail_idx; 1125 1126 /* convert real to abs address */ 1127 wqe_p = wqe_p & (~(1UL << 63)); ··· 1133 return -EFAULT; 1134 } 1135 1136 + tail_idx = (qmap->tail + 1) % qmap->entries; 1137 wqe_idx = q_ofs / ipz_queue->qe_size; 1138 1139 + /* check all processed wqes, whether a cqe is requested or not */ 1140 + while (tail_idx != wqe_idx) { 1141 + if (qmap->map[tail_idx].cqe_req) 1142 + qmap->left_to_poll++; 1143 + tail_idx = (tail_idx + 1) % qmap->entries; 1144 + } 1145 + /* save index in queue, where we have to start flushing */ 1146 + qmap->next_wqe_idx = wqe_idx; 1147 return 0; 1148 } 1149 ··· 1185 } else { 1186 spin_lock_irqsave(&my_qp->send_cq->spinlock, flags); 1187 my_qp->sq_map.left_to_poll = 0; 1188 + my_qp->sq_map.next_wqe_idx = (my_qp->sq_map.tail + 1) % 1189 + my_qp->sq_map.entries; 1190 spin_unlock_irqrestore(&my_qp->send_cq->spinlock, flags); 1191 1192 spin_lock_irqsave(&my_qp->recv_cq->spinlock, flags); 1193 my_qp->rq_map.left_to_poll = 0; 1194 + my_qp->rq_map.next_wqe_idx = (my_qp->rq_map.tail + 1) % 1195 + my_qp->rq_map.entries; 1196 spin_unlock_irqrestore(&my_qp->recv_cq->spinlock, flags); 1197 } 1198
+33 -24
drivers/infiniband/hw/ehca/ehca_reqs.c
··· 179 180 qmap_entry->app_wr_id = get_app_wr_id(send_wr->wr_id); 181 qmap_entry->reported = 0; 182 183 switch (send_wr->opcode) { 184 case IB_WR_SEND: ··· 204 205 if ((send_wr->send_flags & IB_SEND_SIGNALED || 206 qp->init_attr.sq_sig_type == IB_SIGNAL_ALL_WR) 207 - && !hidden) 208 wqe_p->wr_flag |= WQE_WRFLAG_REQ_SIGNAL_COM; 209 210 if (send_wr->opcode == IB_WR_SEND_WITH_IMM || 211 send_wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) { ··· 572 qmap_entry = &my_qp->rq_map.map[rq_map_idx]; 573 qmap_entry->app_wr_id = get_app_wr_id(cur_recv_wr->wr_id); 574 qmap_entry->reported = 0; 575 576 wqe_cnt++; 577 } /* eof for cur_recv_wr */ ··· 710 goto repoll; 711 wc->qp = &my_qp->ib_qp; 712 713 - if (is_error) { 714 - /* 715 - * set left_to_poll to 0 because in error state, we will not 716 - * get any additional CQEs 717 - */ 718 - ehca_add_to_err_list(my_qp, 1); 719 - my_qp->sq_map.left_to_poll = 0; 720 - 721 - if (HAS_RQ(my_qp)) 722 - ehca_add_to_err_list(my_qp, 0); 723 - my_qp->rq_map.left_to_poll = 0; 724 - } 725 - 726 qmap_tail_idx = get_app_wr_id(cqe->work_request_id); 727 if (!(cqe->w_completion_flags & WC_SEND_RECEIVE_BIT)) 728 /* We got a send completion. */ ··· 717 else 718 /* We got a receive completion. */ 719 qmap = &my_qp->rq_map; 720 721 qmap_entry = &qmap->map[qmap_tail_idx]; 722 if (qmap_entry->reported) { ··· 748 749 wc->wr_id = replace_wr_id(cqe->work_request_id, qmap_entry->app_wr_id); 750 qmap_entry->reported = 1; 751 - 752 - /* this is a proper completion, we need to advance the tail pointer */ 753 - if (++qmap->tail == qmap->entries) 754 - qmap->tail = 0; 755 756 /* if left_to_poll is decremented to 0, add the QP to the error list */ 757 if (qmap->left_to_poll > 0) { ··· 812 else 813 qmap = &my_qp->rq_map; 814 815 - qmap_entry = &qmap->map[qmap->tail]; 816 817 while ((nr < num_entries) && (qmap_entry->reported == 0)) { 818 /* generate flush CQE */ 819 memset(wc, 0, sizeof(*wc)); 820 821 - offset = qmap->tail * ipz_queue->qe_size; 822 wqe = (struct ehca_wqe *)ipz_qeit_calc(ipz_queue, offset); 823 if (!wqe) { 824 ehca_err(cq->device, "Invalid wqe offset=%#lx on " ··· 858 859 wc->qp = &my_qp->ib_qp; 860 861 - /* mark as reported and advance tail pointer */ 862 qmap_entry->reported = 1; 863 - if (++qmap->tail == qmap->entries) 864 - qmap->tail = 0; 865 - qmap_entry = &qmap->map[qmap->tail]; 866 867 wc++; nr++; 868 }
··· 179 180 qmap_entry->app_wr_id = get_app_wr_id(send_wr->wr_id); 181 qmap_entry->reported = 0; 182 + qmap_entry->cqe_req = 0; 183 184 switch (send_wr->opcode) { 185 case IB_WR_SEND: ··· 203 204 if ((send_wr->send_flags & IB_SEND_SIGNALED || 205 qp->init_attr.sq_sig_type == IB_SIGNAL_ALL_WR) 206 + && !hidden) { 207 wqe_p->wr_flag |= WQE_WRFLAG_REQ_SIGNAL_COM; 208 + qmap_entry->cqe_req = 1; 209 + } 210 211 if (send_wr->opcode == IB_WR_SEND_WITH_IMM || 212 send_wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) { ··· 569 qmap_entry = &my_qp->rq_map.map[rq_map_idx]; 570 qmap_entry->app_wr_id = get_app_wr_id(cur_recv_wr->wr_id); 571 qmap_entry->reported = 0; 572 + qmap_entry->cqe_req = 1; 573 574 wqe_cnt++; 575 } /* eof for cur_recv_wr */ ··· 706 goto repoll; 707 wc->qp = &my_qp->ib_qp; 708 709 qmap_tail_idx = get_app_wr_id(cqe->work_request_id); 710 if (!(cqe->w_completion_flags & WC_SEND_RECEIVE_BIT)) 711 /* We got a send completion. */ ··· 726 else 727 /* We got a receive completion. */ 728 qmap = &my_qp->rq_map; 729 + 730 + /* advance the tail pointer */ 731 + qmap->tail = qmap_tail_idx; 732 + 733 + if (is_error) { 734 + /* 735 + * set left_to_poll to 0 because in error state, we will not 736 + * get any additional CQEs 737 + */ 738 + my_qp->sq_map.next_wqe_idx = (my_qp->sq_map.tail + 1) % 739 + my_qp->sq_map.entries; 740 + my_qp->sq_map.left_to_poll = 0; 741 + ehca_add_to_err_list(my_qp, 1); 742 + 743 + my_qp->rq_map.next_wqe_idx = (my_qp->rq_map.tail + 1) % 744 + my_qp->rq_map.entries; 745 + my_qp->rq_map.left_to_poll = 0; 746 + if (HAS_RQ(my_qp)) 747 + ehca_add_to_err_list(my_qp, 0); 748 + } 749 750 qmap_entry = &qmap->map[qmap_tail_idx]; 751 if (qmap_entry->reported) { ··· 737 738 wc->wr_id = replace_wr_id(cqe->work_request_id, qmap_entry->app_wr_id); 739 qmap_entry->reported = 1; 740 741 /* if left_to_poll is decremented to 0, add the QP to the error list */ 742 if (qmap->left_to_poll > 0) { ··· 805 else 806 qmap = &my_qp->rq_map; 807 808 + qmap_entry = &qmap->map[qmap->next_wqe_idx]; 809 810 while ((nr < num_entries) && (qmap_entry->reported == 0)) { 811 /* generate flush CQE */ 812 + 813 memset(wc, 0, sizeof(*wc)); 814 815 + offset = qmap->next_wqe_idx * ipz_queue->qe_size; 816 wqe = (struct ehca_wqe *)ipz_qeit_calc(ipz_queue, offset); 817 if (!wqe) { 818 ehca_err(cq->device, "Invalid wqe offset=%#lx on " ··· 850 851 wc->qp = &my_qp->ib_qp; 852 853 + /* mark as reported and advance next_wqe pointer */ 854 qmap_entry->reported = 1; 855 + qmap->next_wqe_idx++; 856 + if (qmap->next_wqe_idx == qmap->entries) 857 + qmap->next_wqe_idx = 0; 858 + qmap_entry = &qmap->map[qmap->next_wqe_idx]; 859 860 wc++; nr++; 861 }