IB/ib_mthca: Pre-link receive WQEs in Tavor mode

We have recently discovered that Tavor mode requires each WQE in a
posted list of receive WQEs to have a valid NDA field at all times.
This requirement holds true for regular QPs as well as for SRQs. This
patch prelinks the receive queue in a regular QP and keeps the free
list in SRQ always properly linked.

Signed-off-by: Eli Cohen <eli@mellanox.co.il>
Reviewed-by: Jack Morgenstein <jackm@mellanox.co.il>
Signed-off-by: Roland Dreier <rolandd@cisco.com>

authored by Eli Cohen and committed by Roland Dreier 1d368c54 1203c42e

+22 -14
+8 -5
drivers/infiniband/hw/mthca/mthca_qp.c
··· 1175 1175 { 1176 1176 int ret; 1177 1177 int i; 1178 + struct mthca_next_seg *next; 1178 1179 1179 1180 qp->refcount = 1; 1180 1181 init_waitqueue_head(&qp->wait); ··· 1218 1217 } 1219 1218 1220 1219 if (mthca_is_memfree(dev)) { 1221 - struct mthca_next_seg *next; 1222 1220 struct mthca_data_seg *scatter; 1223 1221 int size = (sizeof (struct mthca_next_seg) + 1224 1222 qp->rq.max_gs * sizeof (struct mthca_data_seg)) / 16; ··· 1240 1240 qp->sq.wqe_shift) + 1241 1241 qp->send_wqe_offset); 1242 1242 } 1243 + } else { 1244 + for (i = 0; i < qp->rq.max; ++i) { 1245 + next = get_recv_wqe(qp, i); 1246 + next->nda_op = htonl((((i + 1) % qp->rq.max) << 1247 + qp->rq.wqe_shift) | 1); 1248 + } 1249 + 1243 1250 } 1244 1251 1245 1252 qp->sq.last = get_send_wqe(qp, qp->sq.max - 1); ··· 1870 1863 prev_wqe = qp->rq.last; 1871 1864 qp->rq.last = wqe; 1872 1865 1873 - ((struct mthca_next_seg *) wqe)->nda_op = 0; 1874 1866 ((struct mthca_next_seg *) wqe)->ee_nds = 1875 1867 cpu_to_be32(MTHCA_NEXT_DBD); 1876 1868 ((struct mthca_next_seg *) wqe)->flags = 0; ··· 1891 1885 1892 1886 qp->wrid[ind] = wr->wr_id; 1893 1887 1894 - ((struct mthca_next_seg *) prev_wqe)->nda_op = 1895 - cpu_to_be32((ind << qp->rq.wqe_shift) | 1); 1896 - wmb(); 1897 1888 ((struct mthca_next_seg *) prev_wqe)->ee_nds = 1898 1889 cpu_to_be32(MTHCA_NEXT_DBD | size); 1899 1890
+14 -9
drivers/infiniband/hw/mthca/mthca_srq.c
··· 175 175 * scatter list L_Keys to the sentry value of 0x100. 176 176 */ 177 177 for (i = 0; i < srq->max; ++i) { 178 - wqe = get_wqe(srq, i); 178 + struct mthca_next_seg *next; 179 179 180 - *wqe_to_link(wqe) = i < srq->max - 1 ? i + 1 : -1; 180 + next = wqe = get_wqe(srq, i); 181 + 182 + if (i < srq->max - 1) { 183 + *wqe_to_link(wqe) = i + 1; 184 + next->nda_op = htonl(((i + 1) << srq->wqe_shift) | 1); 185 + } else { 186 + *wqe_to_link(wqe) = -1; 187 + next->nda_op = 0; 188 + } 181 189 182 190 for (scatter = wqe + sizeof (struct mthca_next_seg); 183 191 (void *) scatter < wqe + (1 << srq->wqe_shift); ··· 478 470 void mthca_free_srq_wqe(struct mthca_srq *srq, u32 wqe_addr) 479 471 { 480 472 int ind; 473 + struct mthca_next_seg *last_free; 481 474 482 475 ind = wqe_addr >> srq->wqe_shift; 483 476 484 477 spin_lock(&srq->lock); 485 478 486 - *wqe_to_link(get_wqe(srq, srq->last_free)) = ind; 479 + last_free = get_wqe(srq, srq->last_free); 480 + *wqe_to_link(last_free) = ind; 481 + last_free->nda_op = htonl((ind << srq->wqe_shift) | 1); 487 482 *wqe_to_link(get_wqe(srq, ind)) = -1; 488 483 srq->last_free = ind; 489 484 ··· 527 516 prev_wqe = srq->last; 528 517 srq->last = wqe; 529 518 530 - ((struct mthca_next_seg *) wqe)->nda_op = 0; 531 519 ((struct mthca_next_seg *) wqe)->ee_nds = 0; 532 520 /* flags field will always remain 0 */ 533 521 ··· 547 537 if (i < srq->max_gs) 548 538 mthca_set_data_seg_inval(wqe); 549 539 550 - ((struct mthca_next_seg *) prev_wqe)->nda_op = 551 - cpu_to_be32((ind << srq->wqe_shift) | 1); 552 - wmb(); 553 540 ((struct mthca_next_seg *) prev_wqe)->ee_nds = 554 541 cpu_to_be32(MTHCA_NEXT_DBD); 555 542 ··· 620 613 break; 621 614 } 622 615 623 - ((struct mthca_next_seg *) wqe)->nda_op = 624 - cpu_to_be32((next_ind << srq->wqe_shift) | 1); 625 616 ((struct mthca_next_seg *) wqe)->ee_nds = 0; 626 617 /* flags field will always remain 0 */ 627 618