Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'systemport-tx-napi-improvements'

Florian Fainelli says:

====================
net: systemport: TX/NAPI improvements

This patch series builds up on Doug's latest changes done in BCMGENET to reduce
the number of spurious interrupts in NAPI, simplify pointer arithmetic and
finally tracking of per TX ring statistics to be SMP friendly.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>

+74 -12
+69 -12
drivers/net/ethernet/broadcom/bcmsysport.c
··· 284 284 STAT_MIB_SOFT("alloc_rx_buff_failed", mib.alloc_rx_buff_failed), 285 285 STAT_MIB_SOFT("rx_dma_failed", mib.rx_dma_failed), 286 286 STAT_MIB_SOFT("tx_dma_failed", mib.tx_dma_failed), 287 + /* Per TX-queue statistics are dynamically appended */ 287 288 }; 288 289 289 290 #define BCM_SYSPORT_STATS_LEN ARRAY_SIZE(bcm_sysport_gstrings_stats) ··· 339 338 continue; 340 339 j++; 341 340 } 342 - return j; 341 + /* Include per-queue statistics */ 342 + return j + dev->num_tx_queues * NUM_SYSPORT_TXQ_STAT; 343 343 default: 344 344 return -EOPNOTSUPP; 345 345 } ··· 351 349 { 352 350 struct bcm_sysport_priv *priv = netdev_priv(dev); 353 351 const struct bcm_sysport_stats *s; 352 + char buf[128]; 354 353 int i, j; 355 354 356 355 switch (stringset) { ··· 363 360 continue; 364 361 365 362 memcpy(data + j * ETH_GSTRING_LEN, s->stat_string, 363 + ETH_GSTRING_LEN); 364 + j++; 365 + } 366 + 367 + for (i = 0; i < dev->num_tx_queues; i++) { 368 + snprintf(buf, sizeof(buf), "txq%d_packets", i); 369 + memcpy(data + j * ETH_GSTRING_LEN, buf, 370 + ETH_GSTRING_LEN); 371 + j++; 372 + 373 + snprintf(buf, sizeof(buf), "txq%d_bytes", i); 374 + memcpy(data + j * ETH_GSTRING_LEN, buf, 366 375 ETH_GSTRING_LEN); 367 376 j++; 368 377 } ··· 433 418 struct ethtool_stats *stats, u64 *data) 434 419 { 435 420 struct bcm_sysport_priv *priv = netdev_priv(dev); 421 + struct bcm_sysport_tx_ring *ring; 436 422 int i, j; 437 423 438 424 if (netif_running(dev)) ··· 450 434 p = (char *)priv; 451 435 p += s->stat_offset; 452 436 data[j] = *(unsigned long *)p; 437 + j++; 438 + } 439 + 440 + /* For SYSTEMPORT Lite since we have holes in our statistics, j would 441 + * be equal to BCM_SYSPORT_STATS_LEN at the end of the loop, but it 442 + * needs to point to how many total statistics we have minus the 443 + * number of per TX queue statistics 444 + */ 445 + j = bcm_sysport_get_sset_count(dev, ETH_SS_STATS) - 446 + dev->num_tx_queues * NUM_SYSPORT_TXQ_STAT; 447 + 448 + for (i = 0; i < dev->num_tx_queues; i++) { 449 + ring = &priv->tx_rings[i]; 450 + data[j] = ring->packets; 451 + j++; 452 + data[j] = ring->bytes; 453 453 j++; 454 454 } 455 455 } ··· 669 637 u16 len, status; 670 638 struct bcm_rsb *rsb; 671 639 640 + /* Clear status before servicing to reduce spurious interrupts */ 641 + intrl2_0_writel(priv, INTRL2_0_RDMA_MBDONE, INTRL2_CPU_CLEAR); 642 + 672 643 /* Determine how much we should process since last call, SYSTEMPORT Lite 673 644 * groups the producer and consumer indexes into the same 32-bit 674 645 * which we access using RDMA_CONS_INDEX ··· 682 647 p_index = rdma_readl(priv, RDMA_CONS_INDEX); 683 648 p_index &= RDMA_PROD_INDEX_MASK; 684 649 685 - if (p_index < priv->rx_c_index) 686 - to_process = (RDMA_CONS_INDEX_MASK + 1) - 687 - priv->rx_c_index + p_index; 688 - else 689 - to_process = p_index - priv->rx_c_index; 650 + to_process = (p_index - priv->rx_c_index) & RDMA_CONS_INDEX_MASK; 690 651 691 652 netif_dbg(priv, rx_status, ndev, 692 653 "p_index=%d rx_c_index=%d to_process=%d\n", ··· 777 746 return processed; 778 747 } 779 748 780 - static void bcm_sysport_tx_reclaim_one(struct bcm_sysport_priv *priv, 749 + static void bcm_sysport_tx_reclaim_one(struct bcm_sysport_tx_ring *ring, 781 750 struct bcm_sysport_cb *cb, 782 751 unsigned int *bytes_compl, 783 752 unsigned int *pkts_compl) 784 753 { 754 + struct bcm_sysport_priv *priv = ring->priv; 785 755 struct device *kdev = &priv->pdev->dev; 786 - struct net_device *ndev = priv->netdev; 787 756 788 757 if (cb->skb) { 789 - ndev->stats.tx_bytes += cb->skb->len; 758 + ring->bytes += cb->skb->len; 790 759 *bytes_compl += cb->skb->len; 791 760 dma_unmap_single(kdev, dma_unmap_addr(cb, dma_addr), 792 761 dma_unmap_len(cb, dma_len), 793 762 DMA_TO_DEVICE); 794 - ndev->stats.tx_packets++; 763 + ring->packets++; 795 764 (*pkts_compl)++; 796 765 bcm_sysport_free_cb(cb); 797 766 /* SKB fragment */ 798 767 } else if (dma_unmap_addr(cb, dma_addr)) { 799 - ndev->stats.tx_bytes += dma_unmap_len(cb, dma_len); 768 + ring->bytes += dma_unmap_len(cb, dma_len); 800 769 dma_unmap_page(kdev, dma_unmap_addr(cb, dma_addr), 801 770 dma_unmap_len(cb, dma_len), DMA_TO_DEVICE); 802 771 dma_unmap_addr_set(cb, dma_addr, 0); ··· 812 781 unsigned int pkts_compl = 0, bytes_compl = 0; 813 782 struct bcm_sysport_cb *cb; 814 783 u32 hw_ind; 784 + 785 + /* Clear status before servicing to reduce spurious interrupts */ 786 + if (!ring->priv->is_lite) 787 + intrl2_1_writel(ring->priv, BIT(ring->index), INTRL2_CPU_CLEAR); 788 + else 789 + intrl2_0_writel(ring->priv, BIT(ring->index + 790 + INTRL2_0_TDMA_MBDONE_SHIFT), INTRL2_CPU_CLEAR); 815 791 816 792 /* Compute how many descriptors have been processed since last call */ 817 793 hw_ind = tdma_readl(priv, TDMA_DESC_RING_PROD_CONS_INDEX(ring->index)); ··· 841 803 842 804 while (last_tx_cn-- > 0) { 843 805 cb = ring->cbs + last_c_index; 844 - bcm_sysport_tx_reclaim_one(priv, cb, &bytes_compl, &pkts_compl); 806 + bcm_sysport_tx_reclaim_one(ring, cb, &bytes_compl, &pkts_compl); 845 807 846 808 ring->desc_count++; 847 809 last_c_index++; ··· 1670 1632 return 0; 1671 1633 } 1672 1634 1635 + static struct net_device_stats *bcm_sysport_get_nstats(struct net_device *dev) 1636 + { 1637 + struct bcm_sysport_priv *priv = netdev_priv(dev); 1638 + unsigned long tx_bytes = 0, tx_packets = 0; 1639 + struct bcm_sysport_tx_ring *ring; 1640 + unsigned int q; 1641 + 1642 + for (q = 0; q < dev->num_tx_queues; q++) { 1643 + ring = &priv->tx_rings[q]; 1644 + tx_bytes += ring->bytes; 1645 + tx_packets += ring->packets; 1646 + } 1647 + 1648 + dev->stats.tx_bytes = tx_bytes; 1649 + dev->stats.tx_packets = tx_packets; 1650 + return &dev->stats; 1651 + } 1652 + 1673 1653 static void bcm_sysport_netif_start(struct net_device *dev) 1674 1654 { 1675 1655 struct bcm_sysport_priv *priv = netdev_priv(dev); ··· 1949 1893 #ifdef CONFIG_NET_POLL_CONTROLLER 1950 1894 .ndo_poll_controller = bcm_sysport_poll_controller, 1951 1895 #endif 1896 + .ndo_get_stats = bcm_sysport_get_nstats, 1952 1897 }; 1953 1898 1954 1899 #define REV_FMT "v%2x.%02x"
+5
drivers/net/ethernet/broadcom/bcmsysport.h
··· 647 647 .reg_offset = ofs, \ 648 648 } 649 649 650 + /* TX bytes and packets */ 651 + #define NUM_SYSPORT_TXQ_STAT 2 652 + 650 653 struct bcm_sysport_stats { 651 654 char stat_string[ETH_GSTRING_LEN]; 652 655 int stat_sizeof; ··· 693 690 struct bcm_sysport_cb *cbs; /* Transmit control blocks */ 694 691 struct dma_desc *desc_cpu; /* CPU view of the descriptor */ 695 692 struct bcm_sysport_priv *priv; /* private context backpointer */ 693 + unsigned long packets; /* packets statistics */ 694 + unsigned long bytes; /* bytes statistics */ 696 695 }; 697 696 698 697 /* Driver private structure */