IPoIB: Add send gather support

This patch acts as a preparation for using checksum offload for IB
devices capable of inserting/verifying checksum in IP packets. The
patch does not actaully turn on NETIF_F_SG - we defer that to the
patches adding checksum offload capabilities.

We only add support for send gathers for datagram mode, since existing
HW does not support checksum offload on connected QPs.

Signed-off-by: Michael S. Tsirkin <mst@mellanox.co.il>
Signed-off-by: Eli Cohen <eli@mellanox.co.il>
Signed-off-by: Roland Dreier <rolandd@cisco.com>

authored by Eli Cohen and committed by Roland Dreier 7143740d eb14032f

+83 -30
+2 -2
drivers/infiniband/ulp/ipoib/ipoib.h
··· 143 143 144 144 struct ipoib_tx_buf { 145 145 struct sk_buff *skb; 146 - u64 mapping; 146 + u64 mapping[MAX_SKB_FRAGS + 1]; 147 147 }; 148 148 149 149 struct ib_cm_id; ··· 296 296 struct ipoib_tx_buf *tx_ring; 297 297 unsigned tx_head; 298 298 unsigned tx_tail; 299 - struct ib_sge tx_sge; 299 + struct ib_sge tx_sge[MAX_SKB_FRAGS + 1]; 300 300 struct ib_send_wr tx_wr; 301 301 unsigned tx_outstanding; 302 302
+5 -5
drivers/infiniband/ulp/ipoib/ipoib_cm.c
··· 634 634 { 635 635 struct ib_send_wr *bad_wr; 636 636 637 - priv->tx_sge.addr = addr; 638 - priv->tx_sge.length = len; 637 + priv->tx_sge[0].addr = addr; 638 + priv->tx_sge[0].length = len; 639 639 640 640 priv->tx_wr.wr_id = wr_id | IPOIB_OP_CM; 641 641 ··· 676 676 return; 677 677 } 678 678 679 - tx_req->mapping = addr; 679 + tx_req->mapping[0] = addr; 680 680 681 681 if (unlikely(post_send(priv, tx, tx->tx_head & (ipoib_sendq_size - 1), 682 682 addr, skb->len))) { ··· 715 715 716 716 tx_req = &tx->tx_ring[wr_id]; 717 717 718 - ib_dma_unmap_single(priv->ca, tx_req->mapping, tx_req->skb->len, DMA_TO_DEVICE); 718 + ib_dma_unmap_single(priv->ca, tx_req->mapping[0], tx_req->skb->len, DMA_TO_DEVICE); 719 719 720 720 /* FIXME: is this right? Shouldn't we only increment on success? */ 721 721 ++dev->stats.tx_packets; ··· 1110 1110 1111 1111 while ((int) p->tx_tail - (int) p->tx_head < 0) { 1112 1112 tx_req = &p->tx_ring[p->tx_tail & (ipoib_sendq_size - 1)]; 1113 - ib_dma_unmap_single(priv->ca, tx_req->mapping, tx_req->skb->len, 1113 + ib_dma_unmap_single(priv->ca, tx_req->mapping[0], tx_req->skb->len, 1114 1114 DMA_TO_DEVICE); 1115 1115 dev_kfree_skb_any(tx_req->skb); 1116 1116 ++p->tx_tail;
+69 -20
drivers/infiniband/ulp/ipoib/ipoib_ib.c
··· 239 239 "for buf %d\n", wr_id); 240 240 } 241 241 242 + static int ipoib_dma_map_tx(struct ib_device *ca, 243 + struct ipoib_tx_buf *tx_req) 244 + { 245 + struct sk_buff *skb = tx_req->skb; 246 + u64 *mapping = tx_req->mapping; 247 + int i; 248 + 249 + mapping[0] = ib_dma_map_single(ca, skb->data, skb_headlen(skb), 250 + DMA_TO_DEVICE); 251 + if (unlikely(ib_dma_mapping_error(ca, mapping[0]))) 252 + return -EIO; 253 + 254 + for (i = 0; i < skb_shinfo(skb)->nr_frags; ++i) { 255 + skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; 256 + mapping[i + 1] = ib_dma_map_page(ca, frag->page, 257 + frag->page_offset, frag->size, 258 + DMA_TO_DEVICE); 259 + if (unlikely(ib_dma_mapping_error(ca, mapping[i + 1]))) 260 + goto partial_error; 261 + } 262 + return 0; 263 + 264 + partial_error: 265 + ib_dma_unmap_single(ca, mapping[0], skb_headlen(skb), DMA_TO_DEVICE); 266 + 267 + for (; i > 0; --i) { 268 + skb_frag_t *frag = &skb_shinfo(skb)->frags[i - 1]; 269 + ib_dma_unmap_page(ca, mapping[i], frag->size, DMA_TO_DEVICE); 270 + } 271 + return -EIO; 272 + } 273 + 274 + static void ipoib_dma_unmap_tx(struct ib_device *ca, 275 + struct ipoib_tx_buf *tx_req) 276 + { 277 + struct sk_buff *skb = tx_req->skb; 278 + u64 *mapping = tx_req->mapping; 279 + int i; 280 + 281 + ib_dma_unmap_single(ca, mapping[0], skb_headlen(skb), DMA_TO_DEVICE); 282 + 283 + for (i = 0; i < skb_shinfo(skb)->nr_frags; ++i) { 284 + skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; 285 + ib_dma_unmap_page(ca, mapping[i + 1], frag->size, 286 + DMA_TO_DEVICE); 287 + } 288 + } 289 + 242 290 static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc) 243 291 { 244 292 struct ipoib_dev_priv *priv = netdev_priv(dev); ··· 305 257 306 258 tx_req = &priv->tx_ring[wr_id]; 307 259 308 - ib_dma_unmap_single(priv->ca, tx_req->mapping, 309 - tx_req->skb->len, DMA_TO_DEVICE); 260 + ipoib_dma_unmap_tx(priv->ca, tx_req); 310 261 311 262 ++dev->stats.tx_packets; 312 263 dev->stats.tx_bytes += tx_req->skb->len; ··· 388 341 static inline int post_send(struct ipoib_dev_priv *priv, 389 342 unsigned int wr_id, 390 343 struct ib_ah *address, u32 qpn, 391 - u64 addr, int len) 344 + u64 *mapping, int headlen, 345 + skb_frag_t *frags, 346 + int nr_frags) 392 347 { 393 348 struct ib_send_wr *bad_wr; 349 + int i; 394 350 395 - priv->tx_sge.addr = addr; 396 - priv->tx_sge.length = len; 397 - 398 - priv->tx_wr.wr_id = wr_id; 399 - priv->tx_wr.wr.ud.remote_qpn = qpn; 400 - priv->tx_wr.wr.ud.ah = address; 351 + priv->tx_sge[0].addr = mapping[0]; 352 + priv->tx_sge[0].length = headlen; 353 + for (i = 0; i < nr_frags; ++i) { 354 + priv->tx_sge[i + 1].addr = mapping[i + 1]; 355 + priv->tx_sge[i + 1].length = frags[i].size; 356 + } 357 + priv->tx_wr.num_sge = nr_frags + 1; 358 + priv->tx_wr.wr_id = wr_id; 359 + priv->tx_wr.wr.ud.remote_qpn = qpn; 360 + priv->tx_wr.wr.ud.ah = address; 401 361 402 362 return ib_post_send(priv->qp, &priv->tx_wr, &bad_wr); 403 363 } ··· 414 360 { 415 361 struct ipoib_dev_priv *priv = netdev_priv(dev); 416 362 struct ipoib_tx_buf *tx_req; 417 - u64 addr; 418 363 419 364 if (unlikely(skb->len > priv->mcast_mtu + IPOIB_ENCAP_LEN)) { 420 365 ipoib_warn(priv, "packet len %d (> %d) too long to send, dropping\n", ··· 436 383 */ 437 384 tx_req = &priv->tx_ring[priv->tx_head & (ipoib_sendq_size - 1)]; 438 385 tx_req->skb = skb; 439 - addr = ib_dma_map_single(priv->ca, skb->data, skb->len, 440 - DMA_TO_DEVICE); 441 - if (unlikely(ib_dma_mapping_error(priv->ca, addr))) { 386 + if (unlikely(ipoib_dma_map_tx(priv->ca, tx_req))) { 442 387 ++dev->stats.tx_errors; 443 388 dev_kfree_skb_any(skb); 444 389 return; 445 390 } 446 - tx_req->mapping = addr; 447 391 448 392 if (unlikely(post_send(priv, priv->tx_head & (ipoib_sendq_size - 1), 449 - address->ah, qpn, addr, skb->len))) { 393 + address->ah, qpn, 394 + tx_req->mapping, skb_headlen(skb), 395 + skb_shinfo(skb)->frags, skb_shinfo(skb)->nr_frags))) { 450 396 ipoib_warn(priv, "post_send failed\n"); 451 397 ++dev->stats.tx_errors; 452 - ib_dma_unmap_single(priv->ca, addr, skb->len, DMA_TO_DEVICE); 398 + ipoib_dma_unmap_tx(priv->ca, tx_req); 453 399 dev_kfree_skb_any(skb); 454 400 } else { 455 401 dev->trans_start = jiffies; ··· 667 615 while ((int) priv->tx_tail - (int) priv->tx_head < 0) { 668 616 tx_req = &priv->tx_ring[priv->tx_tail & 669 617 (ipoib_sendq_size - 1)]; 670 - ib_dma_unmap_single(priv->ca, 671 - tx_req->mapping, 672 - tx_req->skb->len, 673 - DMA_TO_DEVICE); 618 + ipoib_dma_unmap_tx(priv->ca, tx_req); 674 619 dev_kfree_skb_any(tx_req->skb); 675 620 ++priv->tx_tail; 676 621 --priv->tx_outstanding;
+7 -3
drivers/infiniband/ulp/ipoib/ipoib_verbs.c
··· 157 157 }; 158 158 159 159 int ret, size; 160 + int i; 160 161 161 162 priv->pd = ib_alloc_pd(priv->ca); 162 163 if (IS_ERR(priv->pd)) { ··· 192 191 init_attr.send_cq = priv->cq; 193 192 init_attr.recv_cq = priv->cq; 194 193 194 + if (dev->features & NETIF_F_SG) 195 + init_attr.cap.max_send_sge = MAX_SKB_FRAGS + 1; 196 + 195 197 priv->qp = ib_create_qp(priv->pd, &init_attr); 196 198 if (IS_ERR(priv->qp)) { 197 199 printk(KERN_WARNING "%s: failed to create QP\n", ca->name); ··· 205 201 priv->dev->dev_addr[2] = (priv->qp->qp_num >> 8) & 0xff; 206 202 priv->dev->dev_addr[3] = (priv->qp->qp_num ) & 0xff; 207 203 208 - priv->tx_sge.lkey = priv->mr->lkey; 204 + for (i = 0; i < MAX_SKB_FRAGS + 1; ++i) 205 + priv->tx_sge[i].lkey = priv->mr->lkey; 209 206 210 207 priv->tx_wr.opcode = IB_WR_SEND; 211 - priv->tx_wr.sg_list = &priv->tx_sge; 212 - priv->tx_wr.num_sge = 1; 208 + priv->tx_wr.sg_list = priv->tx_sge; 213 209 priv->tx_wr.send_flags = IB_SEND_SIGNALED; 214 210 215 211 return 0;