IB/ib_mthca: Pre-link receive WQEs in Tavor mode

We have recently discovered that Tavor mode requires each WQE in a
posted list of receive WQEs to have a valid NDA field at all times.
This requirement holds true for regular QPs as well as for SRQs. This
patch prelinks the receive queue in a regular QP and keeps the free
list in SRQ always properly linked.

Signed-off-by: Eli Cohen <eli@mellanox.co.il>
Reviewed-by: Jack Morgenstein <jackm@mellanox.co.il>
Signed-off-by: Roland Dreier <rolandd@cisco.com>

authored by Eli Cohen and committed by Roland Dreier 1d368c54 1203c42e

+22 -14
+8 -5
drivers/infiniband/hw/mthca/mthca_qp.c
··· 1175 { 1176 int ret; 1177 int i; 1178 1179 qp->refcount = 1; 1180 init_waitqueue_head(&qp->wait); ··· 1218 } 1219 1220 if (mthca_is_memfree(dev)) { 1221 - struct mthca_next_seg *next; 1222 struct mthca_data_seg *scatter; 1223 int size = (sizeof (struct mthca_next_seg) + 1224 qp->rq.max_gs * sizeof (struct mthca_data_seg)) / 16; ··· 1240 qp->sq.wqe_shift) + 1241 qp->send_wqe_offset); 1242 } 1243 } 1244 1245 qp->sq.last = get_send_wqe(qp, qp->sq.max - 1); ··· 1870 prev_wqe = qp->rq.last; 1871 qp->rq.last = wqe; 1872 1873 - ((struct mthca_next_seg *) wqe)->nda_op = 0; 1874 ((struct mthca_next_seg *) wqe)->ee_nds = 1875 cpu_to_be32(MTHCA_NEXT_DBD); 1876 ((struct mthca_next_seg *) wqe)->flags = 0; ··· 1891 1892 qp->wrid[ind] = wr->wr_id; 1893 1894 - ((struct mthca_next_seg *) prev_wqe)->nda_op = 1895 - cpu_to_be32((ind << qp->rq.wqe_shift) | 1); 1896 - wmb(); 1897 ((struct mthca_next_seg *) prev_wqe)->ee_nds = 1898 cpu_to_be32(MTHCA_NEXT_DBD | size); 1899
··· 1175 { 1176 int ret; 1177 int i; 1178 + struct mthca_next_seg *next; 1179 1180 qp->refcount = 1; 1181 init_waitqueue_head(&qp->wait); ··· 1217 } 1218 1219 if (mthca_is_memfree(dev)) { 1220 struct mthca_data_seg *scatter; 1221 int size = (sizeof (struct mthca_next_seg) + 1222 qp->rq.max_gs * sizeof (struct mthca_data_seg)) / 16; ··· 1240 qp->sq.wqe_shift) + 1241 qp->send_wqe_offset); 1242 } 1243 + } else { 1244 + for (i = 0; i < qp->rq.max; ++i) { 1245 + next = get_recv_wqe(qp, i); 1246 + next->nda_op = htonl((((i + 1) % qp->rq.max) << 1247 + qp->rq.wqe_shift) | 1); 1248 + } 1249 + 1250 } 1251 1252 qp->sq.last = get_send_wqe(qp, qp->sq.max - 1); ··· 1863 prev_wqe = qp->rq.last; 1864 qp->rq.last = wqe; 1865 1866 ((struct mthca_next_seg *) wqe)->ee_nds = 1867 cpu_to_be32(MTHCA_NEXT_DBD); 1868 ((struct mthca_next_seg *) wqe)->flags = 0; ··· 1885 1886 qp->wrid[ind] = wr->wr_id; 1887 1888 ((struct mthca_next_seg *) prev_wqe)->ee_nds = 1889 cpu_to_be32(MTHCA_NEXT_DBD | size); 1890
+14 -9
drivers/infiniband/hw/mthca/mthca_srq.c
··· 175 * scatter list L_Keys to the sentry value of 0x100. 176 */ 177 for (i = 0; i < srq->max; ++i) { 178 - wqe = get_wqe(srq, i); 179 180 - *wqe_to_link(wqe) = i < srq->max - 1 ? i + 1 : -1; 181 182 for (scatter = wqe + sizeof (struct mthca_next_seg); 183 (void *) scatter < wqe + (1 << srq->wqe_shift); ··· 478 void mthca_free_srq_wqe(struct mthca_srq *srq, u32 wqe_addr) 479 { 480 int ind; 481 482 ind = wqe_addr >> srq->wqe_shift; 483 484 spin_lock(&srq->lock); 485 486 - *wqe_to_link(get_wqe(srq, srq->last_free)) = ind; 487 *wqe_to_link(get_wqe(srq, ind)) = -1; 488 srq->last_free = ind; 489 ··· 527 prev_wqe = srq->last; 528 srq->last = wqe; 529 530 - ((struct mthca_next_seg *) wqe)->nda_op = 0; 531 ((struct mthca_next_seg *) wqe)->ee_nds = 0; 532 /* flags field will always remain 0 */ 533 ··· 547 if (i < srq->max_gs) 548 mthca_set_data_seg_inval(wqe); 549 550 - ((struct mthca_next_seg *) prev_wqe)->nda_op = 551 - cpu_to_be32((ind << srq->wqe_shift) | 1); 552 - wmb(); 553 ((struct mthca_next_seg *) prev_wqe)->ee_nds = 554 cpu_to_be32(MTHCA_NEXT_DBD); 555 ··· 620 break; 621 } 622 623 - ((struct mthca_next_seg *) wqe)->nda_op = 624 - cpu_to_be32((next_ind << srq->wqe_shift) | 1); 625 ((struct mthca_next_seg *) wqe)->ee_nds = 0; 626 /* flags field will always remain 0 */ 627
··· 175 * scatter list L_Keys to the sentry value of 0x100. 176 */ 177 for (i = 0; i < srq->max; ++i) { 178 + struct mthca_next_seg *next; 179 180 + next = wqe = get_wqe(srq, i); 181 + 182 + if (i < srq->max - 1) { 183 + *wqe_to_link(wqe) = i + 1; 184 + next->nda_op = htonl(((i + 1) << srq->wqe_shift) | 1); 185 + } else { 186 + *wqe_to_link(wqe) = -1; 187 + next->nda_op = 0; 188 + } 189 190 for (scatter = wqe + sizeof (struct mthca_next_seg); 191 (void *) scatter < wqe + (1 << srq->wqe_shift); ··· 470 void mthca_free_srq_wqe(struct mthca_srq *srq, u32 wqe_addr) 471 { 472 int ind; 473 + struct mthca_next_seg *last_free; 474 475 ind = wqe_addr >> srq->wqe_shift; 476 477 spin_lock(&srq->lock); 478 479 + last_free = get_wqe(srq, srq->last_free); 480 + *wqe_to_link(last_free) = ind; 481 + last_free->nda_op = htonl((ind << srq->wqe_shift) | 1); 482 *wqe_to_link(get_wqe(srq, ind)) = -1; 483 srq->last_free = ind; 484 ··· 516 prev_wqe = srq->last; 517 srq->last = wqe; 518 519 ((struct mthca_next_seg *) wqe)->ee_nds = 0; 520 /* flags field will always remain 0 */ 521 ··· 537 if (i < srq->max_gs) 538 mthca_set_data_seg_inval(wqe); 539 540 ((struct mthca_next_seg *) prev_wqe)->ee_nds = 541 cpu_to_be32(MTHCA_NEXT_DBD); 542 ··· 613 break; 614 } 615 616 ((struct mthca_next_seg *) wqe)->ee_nds = 0; 617 /* flags field will always remain 0 */ 618