Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

bcm63xx_enet: convert to build_skb

We can increase the efficiency of rx path by using buffers to receive
packets then build SKBs around them just before passing into the network
stack. In contrast, preallocating SKBs too early reduces CPU cache
efficiency.

Check if we're in NAPI context when refilling RX. Normally we're almost
always running in NAPI context. Dispatch to napi_alloc_frag directly
instead of relying on netdev_alloc_frag which does the same but
with the overhead of local_bh_disable/enable.

Tested on BCM6328 320 MHz and iperf3 -M 512 to measure packet/sec
performance. Included netif_receive_skb_list and NET_IP_ALIGN
optimizations.

Before:
[ ID] Interval Transfer Bandwidth Retr
[ 4] 0.00-10.00 sec 49.9 MBytes 41.9 Mbits/sec 197 sender
[ 4] 0.00-10.00 sec 49.3 MBytes 41.3 Mbits/sec receiver

After:
[ ID] Interval Transfer Bandwidth Retr
[ 4] 0.00-30.00 sec 171 MBytes 47.8 Mbits/sec 272 sender
[ 4] 0.00-30.00 sec 170 MBytes 47.6 Mbits/sec receiver

Signed-off-by: Sieng Piaw Liew <liew.s.piaw@gmail.com>
Acked-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

authored by

Sieng Piaw Liew and committed by
Jakub Kicinski
d27de0ef 3d0b7265

+71 -54
+61 -50
drivers/net/ethernet/broadcom/bcm63xx_enet.c
··· 220 220 /* 221 221 * refill rx queue 222 222 */ 223 - static int bcm_enet_refill_rx(struct net_device *dev) 223 + static int bcm_enet_refill_rx(struct net_device *dev, bool napi_mode) 224 224 { 225 225 struct bcm_enet_priv *priv; 226 226 ··· 228 228 229 229 while (priv->rx_desc_count < priv->rx_ring_size) { 230 230 struct bcm_enet_desc *desc; 231 - struct sk_buff *skb; 232 - dma_addr_t p; 233 231 int desc_idx; 234 232 u32 len_stat; 235 233 236 234 desc_idx = priv->rx_dirty_desc; 237 235 desc = &priv->rx_desc_cpu[desc_idx]; 238 236 239 - if (!priv->rx_skb[desc_idx]) { 240 - if (priv->enet_is_sw) 241 - skb = netdev_alloc_skb_ip_align(dev, priv->rx_skb_size); 237 + if (!priv->rx_buf[desc_idx]) { 238 + void *buf; 239 + 240 + if (likely(napi_mode)) 241 + buf = napi_alloc_frag(priv->rx_frag_size); 242 242 else 243 - skb = netdev_alloc_skb(dev, priv->rx_skb_size); 244 - if (!skb) 243 + buf = netdev_alloc_frag(priv->rx_frag_size); 244 + if (unlikely(!buf)) 245 245 break; 246 - priv->rx_skb[desc_idx] = skb; 247 - p = dma_map_single(&priv->pdev->dev, skb->data, 248 - priv->rx_skb_size, 249 - DMA_FROM_DEVICE); 250 - desc->address = p; 246 + priv->rx_buf[desc_idx] = buf; 247 + desc->address = dma_map_single(&priv->pdev->dev, 248 + buf + priv->rx_buf_offset, 249 + priv->rx_buf_size, 250 + DMA_FROM_DEVICE); 251 251 } 252 252 253 - len_stat = priv->rx_skb_size << DMADESC_LENGTH_SHIFT; 253 + len_stat = priv->rx_buf_size << DMADESC_LENGTH_SHIFT; 254 254 len_stat |= DMADESC_OWNER_MASK; 255 255 if (priv->rx_dirty_desc == priv->rx_ring_size - 1) { 256 256 len_stat |= (DMADESC_WRAP_MASK >> priv->dma_desc_shift); ··· 290 290 struct net_device *dev = priv->net_dev; 291 291 292 292 spin_lock(&priv->rx_lock); 293 - bcm_enet_refill_rx(dev); 293 + bcm_enet_refill_rx(dev, false); 294 294 spin_unlock(&priv->rx_lock); 295 295 } 296 296 ··· 320 320 int desc_idx; 321 321 u32 len_stat; 322 322 unsigned int len; 323 + void *buf; 323 324 324 325 desc_idx = priv->rx_curr_desc; 325 326 desc = &priv->rx_desc_cpu[desc_idx]; ··· 366 365 } 367 366 368 367 /* valid packet */ 369 - skb = priv->rx_skb[desc_idx]; 368 + buf = priv->rx_buf[desc_idx]; 370 369 len = (len_stat & DMADESC_LENGTH_MASK) >> DMADESC_LENGTH_SHIFT; 371 370 /* don't include FCS */ 372 371 len -= 4; 373 372 374 373 if (len < copybreak) { 375 - struct sk_buff *nskb; 376 - 377 - nskb = napi_alloc_skb(&priv->napi, len); 378 - if (!nskb) { 374 + skb = napi_alloc_skb(&priv->napi, len); 375 + if (unlikely(!skb)) { 379 376 /* forget packet, just rearm desc */ 380 377 dev->stats.rx_dropped++; 381 378 continue; ··· 381 382 382 383 dma_sync_single_for_cpu(kdev, desc->address, 383 384 len, DMA_FROM_DEVICE); 384 - memcpy(nskb->data, skb->data, len); 385 + memcpy(skb->data, buf + priv->rx_buf_offset, len); 385 386 dma_sync_single_for_device(kdev, desc->address, 386 387 len, DMA_FROM_DEVICE); 387 - skb = nskb; 388 388 } else { 389 - dma_unmap_single(&priv->pdev->dev, desc->address, 390 - priv->rx_skb_size, DMA_FROM_DEVICE); 391 - priv->rx_skb[desc_idx] = NULL; 389 + dma_unmap_single(kdev, desc->address, 390 + priv->rx_buf_size, DMA_FROM_DEVICE); 391 + priv->rx_buf[desc_idx] = NULL; 392 + 393 + skb = build_skb(buf, priv->rx_frag_size); 394 + if (unlikely(!skb)) { 395 + skb_free_frag(buf); 396 + dev->stats.rx_dropped++; 397 + continue; 398 + } 399 + skb_reserve(skb, priv->rx_buf_offset); 392 400 } 393 401 394 402 skb_put(skb, len); ··· 409 403 netif_receive_skb_list(&rx_list); 410 404 411 405 if (processed || !priv->rx_desc_count) { 412 - bcm_enet_refill_rx(dev); 406 + bcm_enet_refill_rx(dev, true); 413 407 414 408 /* kick rx dma */ 415 409 enet_dmac_writel(priv, priv->dma_chan_en_mask, ··· 866 860 priv->pause_tx ? "tx" : "off"); 867 861 } 868 862 869 - static void bcm_enet_free_rx_skb_ring(struct device *kdev, struct bcm_enet_priv *priv) 863 + static void bcm_enet_free_rx_buf_ring(struct device *kdev, struct bcm_enet_priv *priv) 870 864 { 871 865 int i; 872 866 873 867 for (i = 0; i < priv->rx_ring_size; i++) { 874 868 struct bcm_enet_desc *desc; 875 869 876 - if (!priv->rx_skb[i]) 870 + if (!priv->rx_buf[i]) 877 871 continue; 878 872 879 873 desc = &priv->rx_desc_cpu[i]; 880 - dma_unmap_single(kdev, desc->address, priv->rx_skb_size, 874 + dma_unmap_single(kdev, desc->address, priv->rx_buf_size, 881 875 DMA_FROM_DEVICE); 882 - kfree_skb(priv->rx_skb[i]); 876 + skb_free_frag(priv->rx_buf[i]); 883 877 } 884 - kfree(priv->rx_skb); 878 + kfree(priv->rx_buf); 885 879 } 886 880 887 881 /* ··· 993 987 priv->tx_curr_desc = 0; 994 988 spin_lock_init(&priv->tx_lock); 995 989 996 - /* init & fill rx ring with skbs */ 997 - priv->rx_skb = kcalloc(priv->rx_ring_size, sizeof(struct sk_buff *), 990 + /* init & fill rx ring with buffers */ 991 + priv->rx_buf = kcalloc(priv->rx_ring_size, sizeof(void *), 998 992 GFP_KERNEL); 999 - if (!priv->rx_skb) { 993 + if (!priv->rx_buf) { 1000 994 ret = -ENOMEM; 1001 995 goto out_free_tx_skb; 1002 996 } ··· 1013 1007 enet_dmac_writel(priv, ENETDMA_BUFALLOC_FORCE_MASK | 0, 1014 1008 ENETDMAC_BUFALLOC, priv->rx_chan); 1015 1009 1016 - if (bcm_enet_refill_rx(dev)) { 1017 - dev_err(kdev, "cannot allocate rx skb queue\n"); 1010 + if (bcm_enet_refill_rx(dev, false)) { 1011 + dev_err(kdev, "cannot allocate rx buffer queue\n"); 1018 1012 ret = -ENOMEM; 1019 1013 goto out; 1020 1014 } ··· 1108 1102 return 0; 1109 1103 1110 1104 out: 1111 - bcm_enet_free_rx_skb_ring(kdev, priv); 1105 + bcm_enet_free_rx_buf_ring(kdev, priv); 1112 1106 1113 1107 out_free_tx_skb: 1114 1108 kfree(priv->tx_skb); ··· 1214 1208 /* force reclaim of all tx buffers */ 1215 1209 bcm_enet_tx_reclaim(dev, 1); 1216 1210 1217 - /* free the rx skb ring */ 1218 - bcm_enet_free_rx_skb_ring(kdev, priv); 1211 + /* free the rx buffer ring */ 1212 + bcm_enet_free_rx_buf_ring(kdev, priv); 1219 1213 1220 1214 /* free remaining allocated memory */ 1221 1215 kfree(priv->tx_skb); ··· 1639 1633 * align rx buffer size to dma burst len, account FCS since 1640 1634 * it's appended 1641 1635 */ 1642 - priv->rx_skb_size = ALIGN(actual_mtu + ETH_FCS_LEN, 1636 + priv->rx_buf_size = ALIGN(actual_mtu + ETH_FCS_LEN, 1643 1637 priv->dma_maxburst * 4); 1638 + 1639 + priv->rx_frag_size = SKB_DATA_ALIGN(priv->rx_buf_offset + priv->rx_buf_size) + 1640 + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 1644 1641 1645 1642 dev->mtu = new_mtu; 1646 1643 return 0; ··· 1729 1720 1730 1721 priv->enet_is_sw = false; 1731 1722 priv->dma_maxburst = BCMENET_DMA_MAXBURST; 1723 + priv->rx_buf_offset = NET_SKB_PAD; 1732 1724 1733 1725 ret = bcm_enet_change_mtu(dev, dev->mtu); 1734 1726 if (ret) ··· 2147 2137 priv->tx_skb = kcalloc(priv->tx_ring_size, sizeof(struct sk_buff *), 2148 2138 GFP_KERNEL); 2149 2139 if (!priv->tx_skb) { 2150 - dev_err(kdev, "cannot allocate rx skb queue\n"); 2140 + dev_err(kdev, "cannot allocate tx skb queue\n"); 2151 2141 ret = -ENOMEM; 2152 2142 goto out_free_tx_ring; 2153 2143 } ··· 2157 2147 priv->tx_curr_desc = 0; 2158 2148 spin_lock_init(&priv->tx_lock); 2159 2149 2160 - /* init & fill rx ring with skbs */ 2161 - priv->rx_skb = kcalloc(priv->rx_ring_size, sizeof(struct sk_buff *), 2150 + /* init & fill rx ring with buffers */ 2151 + priv->rx_buf = kcalloc(priv->rx_ring_size, sizeof(void *), 2162 2152 GFP_KERNEL); 2163 - if (!priv->rx_skb) { 2164 - dev_err(kdev, "cannot allocate rx skb queue\n"); 2153 + if (!priv->rx_buf) { 2154 + dev_err(kdev, "cannot allocate rx buffer queue\n"); 2165 2155 ret = -ENOMEM; 2166 2156 goto out_free_tx_skb; 2167 2157 } ··· 2208 2198 enet_dma_writel(priv, ENETDMA_BUFALLOC_FORCE_MASK | 0, 2209 2199 ENETDMA_BUFALLOC_REG(priv->rx_chan)); 2210 2200 2211 - if (bcm_enet_refill_rx(dev)) { 2212 - dev_err(kdev, "cannot allocate rx skb queue\n"); 2201 + if (bcm_enet_refill_rx(dev, false)) { 2202 + dev_err(kdev, "cannot allocate rx buffer queue\n"); 2213 2203 ret = -ENOMEM; 2214 2204 goto out; 2215 2205 } ··· 2308 2298 return 0; 2309 2299 2310 2300 out: 2311 - bcm_enet_free_rx_skb_ring(kdev, priv); 2301 + bcm_enet_free_rx_buf_ring(kdev, priv); 2312 2302 2313 2303 out_free_tx_skb: 2314 2304 kfree(priv->tx_skb); ··· 2358 2348 /* force reclaim of all tx buffers */ 2359 2349 bcm_enet_tx_reclaim(dev, 1); 2360 2350 2361 - /* free the rx skb ring */ 2362 - bcm_enet_free_rx_skb_ring(kdev, priv); 2351 + /* free the rx buffer ring */ 2352 + bcm_enet_free_rx_buf_ring(kdev, priv); 2363 2353 2364 2354 /* free remaining allocated memory */ 2365 2355 kfree(priv->tx_skb); ··· 2658 2648 priv->rx_ring_size = BCMENET_DEF_RX_DESC; 2659 2649 priv->tx_ring_size = BCMENET_DEF_TX_DESC; 2660 2650 priv->dma_maxburst = BCMENETSW_DMA_MAXBURST; 2651 + priv->rx_buf_offset = NET_SKB_PAD + NET_IP_ALIGN; 2661 2652 2662 2653 pd = dev_get_platdata(&pdev->dev); 2663 2654 if (pd) {
+10 -4
drivers/net/ethernet/broadcom/bcm63xx_enet.h
··· 230 230 /* next dirty rx descriptor to refill */ 231 231 int rx_dirty_desc; 232 232 233 - /* size of allocated rx skbs */ 234 - unsigned int rx_skb_size; 233 + /* size of allocated rx buffers */ 234 + unsigned int rx_buf_size; 235 235 236 - /* list of skb given to hw for rx */ 237 - struct sk_buff **rx_skb; 236 + /* allocated rx buffer offset */ 237 + unsigned int rx_buf_offset; 238 + 239 + /* size of allocated rx frag */ 240 + unsigned int rx_frag_size; 241 + 242 + /* list of buffer given to hw for rx */ 243 + void **rx_buf; 238 244 239 245 /* used when rx skb allocation failed, so we defer rx queue 240 246 * refill */