Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

bnx2: Add prefetches to rx path.

Add prefetches of the skb and the next rx descriptor to speed up rx path.

Use prefetchw() for the skb [suggested by Eric Dumazet].

The rx descriptor is in skb->data which is mapped for streaming mode DMA.
Eric Dumazet pointed out that we should not prefetch the data before
dma_sync. So we prefetch only if dma_sync is no_op on the system.

Signed-off-by: Michael Chan <mchan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Michael Chan and committed by
David S. Miller
a33fa66b c67938a9

+14 -3
+13 -3
drivers/net/bnx2.c
··· 2719 2719 } 2720 2720 2721 2721 rx_buf->skb = skb; 2722 + rx_buf->desc = (struct l2_fhdr *) skb->data; 2722 2723 dma_unmap_addr_set(rx_buf, mapping, mapping); 2723 2724 2724 2725 rxbd->rx_bd_haddr_hi = (u64) mapping >> 32; ··· 2942 2941 rxr->rx_prod_bseq += bp->rx_buf_use_size; 2943 2942 2944 2943 prod_rx_buf->skb = skb; 2944 + prod_rx_buf->desc = (struct l2_fhdr *) skb->data; 2945 2945 2946 2946 if (cons == prod) 2947 2947 return; ··· 3076 3074 u16 hw_cons, sw_cons, sw_ring_cons, sw_prod, sw_ring_prod; 3077 3075 struct l2_fhdr *rx_hdr; 3078 3076 int rx_pkt = 0, pg_ring_used = 0; 3077 + struct pci_dev *pdev = bp->pdev; 3079 3078 3080 3079 hw_cons = bnx2_get_hw_rx_cons(bnapi); 3081 3080 sw_cons = rxr->rx_cons; ··· 3089 3086 while (sw_cons != hw_cons) { 3090 3087 unsigned int len, hdr_len; 3091 3088 u32 status; 3092 - struct sw_bd *rx_buf; 3089 + struct sw_bd *rx_buf, *next_rx_buf; 3093 3090 struct sk_buff *skb; 3094 3091 dma_addr_t dma_addr; 3095 3092 u16 vtag = 0; ··· 3100 3097 3101 3098 rx_buf = &rxr->rx_buf_ring[sw_ring_cons]; 3102 3099 skb = rx_buf->skb; 3100 + prefetchw(skb); 3103 3101 3102 + if (!get_dma_ops(&pdev->dev)->sync_single_for_cpu) { 3103 + next_rx_buf = 3104 + &rxr->rx_buf_ring[ 3105 + RX_RING_IDX(NEXT_RX_BD(sw_cons))]; 3106 + prefetch(next_rx_buf->desc); 3107 + } 3104 3108 rx_buf->skb = NULL; 3105 3109 3106 3110 dma_addr = dma_unmap_addr(rx_buf, mapping); ··· 3116 3106 BNX2_RX_OFFSET + BNX2_RX_COPY_THRESH, 3117 3107 PCI_DMA_FROMDEVICE); 3118 3108 3119 - rx_hdr = (struct l2_fhdr *) skb->data; 3109 + rx_hdr = rx_buf->desc; 3120 3110 len = rx_hdr->l2_fhdr_pkt_len; 3121 3111 status = rx_hdr->l2_fhdr_status; 3122 3112 ··· 5774 5764 rx_buf = &rxr->rx_buf_ring[rx_start_idx]; 5775 5765 rx_skb = rx_buf->skb; 5776 5766 5777 - rx_hdr = (struct l2_fhdr *) rx_skb->data; 5767 + rx_hdr = rx_buf->desc; 5778 5768 skb_reserve(rx_skb, BNX2_RX_OFFSET); 5779 5769 5780 5770 pci_dma_sync_single_for_cpu(bp->pdev,
+1
drivers/net/bnx2.h
··· 6551 6551 6552 6552 struct sw_bd { 6553 6553 struct sk_buff *skb; 6554 + struct l2_fhdr *desc; 6554 6555 DEFINE_DMA_UNMAP_ADDR(mapping); 6555 6556 }; 6556 6557