Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

net: add per-cpu storage and net->core_stats

Before adding yet another possibly contended atomic_long_t,
it is time to add per-cpu storage for existing ones:
dev->tx_dropped, dev->rx_dropped, and dev->rx_nohandler

Because many devices do not have to increment such counters,
allocate the per-cpu storage on demand, so that dev_get_stats()
does not have to spend considerable time folding zero counters.

Note that some drivers have abused these counters which
were supposed to be only used by core networking stack.

v4: should use per_cpu_ptr() in dev_get_stats() (Jakub)
v3: added a READ_ONCE() in netdev_core_stats_alloc() (Paolo)
v2: add a missing include (reported by kernel test robot <lkp@intel.com>)
Change in netdev_core_stats_alloc() (Jakub)

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: jeffreyji <jeffreyji@google.com>
Reviewed-by: Brian Vazquez <brianvv@google.com>
Reviewed-by: Jakub Kicinski <kuba@kernel.org>
Acked-by: Paolo Abeni <pabeni@redhat.com>
Link: https://lore.kernel.org/r/20220311051420.2608812-1-eric.dumazet@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

authored by

Eric Dumazet and committed by
Jakub Kicinski
625788b5 a8c06337

+101 -40
+1 -1
drivers/net/bonding/bond_main.c
··· 5120 5120 if (xmit_suc) 5121 5121 return NETDEV_TX_OK; 5122 5122 5123 - atomic_long_inc(&bond_dev->tx_dropped); 5123 + dev_core_stats_tx_dropped_inc(bond_dev); 5124 5124 return NET_XMIT_DROP; 5125 5125 } 5126 5126
+2 -2
drivers/net/ethernet/broadcom/bnxt/bnxt.c
··· 370 370 i = skb_get_queue_mapping(skb); 371 371 if (unlikely(i >= bp->tx_nr_rings)) { 372 372 dev_kfree_skb_any(skb); 373 - atomic_long_inc(&dev->tx_dropped); 373 + dev_core_stats_tx_dropped_inc(dev); 374 374 return NETDEV_TX_OK; 375 375 } 376 376 ··· 646 646 if (txr->kick_pending) 647 647 bnxt_txr_db_kick(bp, txr, txr->tx_prod); 648 648 txr->tx_buf_ring[txr->tx_prod].skb = NULL; 649 - atomic_long_inc(&dev->tx_dropped); 649 + dev_core_stats_tx_dropped_inc(dev); 650 650 return NETDEV_TX_OK; 651 651 } 652 652
+2 -2
drivers/net/ethernet/hisilicon/hns/hns_ethtool.c
··· 887 887 p[21] = net_stats->rx_compressed; 888 888 p[22] = net_stats->tx_compressed; 889 889 890 - p[23] = netdev->rx_dropped.counter; 891 - p[24] = netdev->tx_dropped.counter; 890 + p[23] = 0; /* was netdev->rx_dropped.counter */ 891 + p[24] = 0; /* was netdev->tx_dropped.counter */ 892 892 893 893 p[25] = priv->tx_timeout_count; 894 894
+1 -1
drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c
··· 207 207 dev = skb->dev; 208 208 port = rmnet_get_port_rcu(dev); 209 209 if (unlikely(!port)) { 210 - atomic_long_inc(&skb->dev->rx_nohandler); 210 + dev_core_stats_rx_nohandler_inc(skb->dev); 211 211 kfree_skb(skb); 212 212 goto done; 213 213 }
+1 -1
drivers/net/ipvlan/ipvlan_core.c
··· 555 555 schedule_work(&port->wq); 556 556 } else { 557 557 spin_unlock(&port->backlog.lock); 558 - atomic_long_inc(&skb->dev->rx_dropped); 558 + dev_core_stats_rx_dropped_inc(skb->dev); 559 559 kfree_skb(skb); 560 560 } 561 561 }
+1 -1
drivers/net/macvlan.c
··· 371 371 free_nskb: 372 372 kfree_skb(nskb); 373 373 err: 374 - atomic_long_inc(&skb->dev->rx_dropped); 374 + dev_core_stats_rx_dropped_inc(skb->dev); 375 375 } 376 376 377 377 static void macvlan_flush_sources(struct macvlan_port *port,
+1 -1
drivers/net/net_failover.c
··· 89 89 static netdev_tx_t net_failover_drop_xmit(struct sk_buff *skb, 90 90 struct net_device *dev) 91 91 { 92 - atomic_long_inc(&dev->tx_dropped); 92 + dev_core_stats_tx_dropped_inc(dev); 93 93 dev_kfree_skb_any(skb); 94 94 return NETDEV_TX_OK; 95 95 }
+8 -8
drivers/net/tun.c
··· 1135 1135 return NETDEV_TX_OK; 1136 1136 1137 1137 drop: 1138 - atomic_long_inc(&dev->tx_dropped); 1138 + dev_core_stats_tx_dropped_inc(dev); 1139 1139 skb_tx_error(skb); 1140 1140 kfree_skb_reason(skb, drop_reason); 1141 1141 rcu_read_unlock(); ··· 1291 1291 void *frame = tun_xdp_to_ptr(xdp); 1292 1292 1293 1293 if (__ptr_ring_produce(&tfile->tx_ring, frame)) { 1294 - atomic_long_inc(&dev->tx_dropped); 1294 + dev_core_stats_tx_dropped_inc(dev); 1295 1295 break; 1296 1296 } 1297 1297 nxmit++; ··· 1626 1626 trace_xdp_exception(tun->dev, xdp_prog, act); 1627 1627 fallthrough; 1628 1628 case XDP_DROP: 1629 - atomic_long_inc(&tun->dev->rx_dropped); 1629 + dev_core_stats_rx_dropped_inc(tun->dev); 1630 1630 break; 1631 1631 } 1632 1632 ··· 1797 1797 */ 1798 1798 skb = tun_build_skb(tun, tfile, from, &gso, len, &skb_xdp); 1799 1799 if (IS_ERR(skb)) { 1800 - atomic_long_inc(&tun->dev->rx_dropped); 1800 + dev_core_stats_rx_dropped_inc(tun->dev); 1801 1801 return PTR_ERR(skb); 1802 1802 } 1803 1803 if (!skb) ··· 1826 1826 1827 1827 if (IS_ERR(skb)) { 1828 1828 if (PTR_ERR(skb) != -EAGAIN) 1829 - atomic_long_inc(&tun->dev->rx_dropped); 1829 + dev_core_stats_rx_dropped_inc(tun->dev); 1830 1830 if (frags) 1831 1831 mutex_unlock(&tfile->napi_mutex); 1832 1832 return PTR_ERR(skb); ··· 1841 1841 err = -EFAULT; 1842 1842 drop_reason = SKB_DROP_REASON_SKB_UCOPY_FAULT; 1843 1843 drop: 1844 - atomic_long_inc(&tun->dev->rx_dropped); 1844 + dev_core_stats_rx_dropped_inc(tun->dev); 1845 1845 kfree_skb_reason(skb, drop_reason); 1846 1846 if (frags) { 1847 1847 tfile->napi.skb = NULL; ··· 1876 1876 pi.proto = htons(ETH_P_IPV6); 1877 1877 break; 1878 1878 default: 1879 - atomic_long_inc(&tun->dev->rx_dropped); 1879 + dev_core_stats_rx_dropped_inc(tun->dev); 1880 1880 kfree_skb(skb); 1881 1881 return -EINVAL; 1882 1882 } ··· 1956 1956 skb_headlen(skb)); 1957 1957 1958 1958 if (unlikely(headlen > skb_headlen(skb))) { 1959 - atomic_long_inc(&tun->dev->rx_dropped); 1959 + dev_core_stats_rx_dropped_inc(tun->dev); 1960 1960 napi_free_frags(&tfile->napi); 1961 1961 rcu_read_unlock(); 1962 1962 mutex_unlock(&tfile->napi_mutex);
+1 -1
drivers/net/vxlan/vxlan_core.c
··· 1760 1760 1761 1761 if (unlikely(!(vxlan->dev->flags & IFF_UP))) { 1762 1762 rcu_read_unlock(); 1763 - atomic_long_inc(&vxlan->dev->rx_dropped); 1763 + dev_core_stats_rx_dropped_inc(vxlan->dev); 1764 1764 vxlan_vnifilter_count(vxlan, vni, vninode, 1765 1765 VXLAN_VNI_STATS_RX_DROPS, 0); 1766 1766 goto drop;
+37 -9
include/linux/netdevice.h
··· 28 28 #include <linux/prefetch.h> 29 29 #include <asm/cache.h> 30 30 #include <asm/byteorder.h> 31 + #include <asm/local.h> 31 32 32 33 #include <linux/percpu.h> 33 34 #include <linux/rculist.h> ··· 195 194 unsigned long tx_compressed; 196 195 }; 197 196 197 + /* per-cpu stats, allocated on demand. 198 + * Try to fit them in a single cache line, for dev_get_stats() sake. 199 + */ 200 + struct net_device_core_stats { 201 + local_t rx_dropped; 202 + local_t tx_dropped; 203 + local_t rx_nohandler; 204 + } __aligned(4 * sizeof(local_t)); 198 205 199 206 #include <linux/cache.h> 200 207 #include <linux/skbuff.h> ··· 1744 1735 * @stats: Statistics struct, which was left as a legacy, use 1745 1736 * rtnl_link_stats64 instead 1746 1737 * 1747 - * @rx_dropped: Dropped packets by core network, 1738 + * @core_stats: core networking counters, 1748 1739 * do not use this in drivers 1749 - * @tx_dropped: Dropped packets by core network, 1750 - * do not use this in drivers 1751 - * @rx_nohandler: nohandler dropped packets by core network on 1752 - * inactive devices, do not use this in drivers 1753 1740 * @carrier_up_count: Number of times the carrier has been up 1754 1741 * @carrier_down_count: Number of times the carrier has been down 1755 1742 * ··· 2028 2023 2029 2024 struct net_device_stats stats; /* not used by modern drivers */ 2030 2025 2031 - atomic_long_t rx_dropped; 2032 - atomic_long_t tx_dropped; 2033 - atomic_long_t rx_nohandler; 2026 + struct net_device_core_stats __percpu *core_stats; 2034 2027 2035 2028 /* Stats to monitor link on/off, flapping */ 2036 2029 atomic_t carrier_up_count; ··· 3842 3839 return false; 3843 3840 } 3844 3841 3842 + struct net_device_core_stats *netdev_core_stats_alloc(struct net_device *dev); 3843 + 3844 + static inline struct net_device_core_stats *dev_core_stats(struct net_device *dev) 3845 + { 3846 + /* This READ_ONCE() pairs with the write in netdev_core_stats_alloc() */ 3847 + struct net_device_core_stats __percpu *p = READ_ONCE(dev->core_stats); 3848 + 3849 + if (likely(p)) 3850 + return this_cpu_ptr(p); 3851 + 3852 + return netdev_core_stats_alloc(dev); 3853 + } 3854 + 3855 + #define DEV_CORE_STATS_INC(FIELD) \ 3856 + static inline void dev_core_stats_##FIELD##_inc(struct net_device *dev) \ 3857 + { \ 3858 + struct net_device_core_stats *p = dev_core_stats(dev); \ 3859 + \ 3860 + if (p) \ 3861 + local_inc(&p->FIELD); \ 3862 + } 3863 + DEV_CORE_STATS_INC(rx_dropped) 3864 + DEV_CORE_STATS_INC(tx_dropped) 3865 + DEV_CORE_STATS_INC(rx_nohandler) 3866 + 3845 3867 static __always_inline int ____dev_forward_skb(struct net_device *dev, 3846 3868 struct sk_buff *skb, 3847 3869 const bool check_mtu) 3848 3870 { 3849 3871 if (skb_orphan_frags(skb, GFP_ATOMIC) || 3850 3872 unlikely(!__is_skb_forwardable(dev, skb, check_mtu))) { 3851 - atomic_long_inc(&dev->rx_dropped); 3873 + dev_core_stats_rx_dropped_inc(dev); 3852 3874 kfree_skb(skb); 3853 3875 return NET_RX_DROP; 3854 3876 }
+1 -1
include/net/bonding.h
··· 770 770 771 771 static inline netdev_tx_t bond_tx_drop(struct net_device *dev, struct sk_buff *skb) 772 772 { 773 - atomic_long_inc(&dev->tx_dropped); 773 + dev_core_stats_tx_dropped_inc(dev); 774 774 dev_kfree_skb_any(skb); 775 775 return NET_XMIT_DROP; 776 776 }
+42 -9
net/core/dev.c
··· 3633 3633 out_kfree_skb: 3634 3634 kfree_skb(skb); 3635 3635 out_null: 3636 - atomic_long_inc(&dev->tx_dropped); 3636 + dev_core_stats_tx_dropped_inc(dev); 3637 3637 return NULL; 3638 3638 } 3639 3639 ··· 4184 4184 rc = -ENETDOWN; 4185 4185 rcu_read_unlock_bh(); 4186 4186 4187 - atomic_long_inc(&dev->tx_dropped); 4187 + dev_core_stats_tx_dropped_inc(dev); 4188 4188 kfree_skb_list(skb); 4189 4189 return rc; 4190 4190 out: ··· 4236 4236 local_bh_enable(); 4237 4237 return ret; 4238 4238 drop: 4239 - atomic_long_inc(&dev->tx_dropped); 4239 + dev_core_stats_tx_dropped_inc(dev); 4240 4240 kfree_skb_list(skb); 4241 4241 return NET_XMIT_DROP; 4242 4242 } ··· 4602 4602 sd->dropped++; 4603 4603 rps_unlock_irq_restore(sd, &flags); 4604 4604 4605 - atomic_long_inc(&skb->dev->rx_dropped); 4605 + dev_core_stats_rx_dropped_inc(skb->dev); 4606 4606 kfree_skb_reason(skb, reason); 4607 4607 return NET_RX_DROP; 4608 4608 } ··· 5357 5357 } else { 5358 5358 drop: 5359 5359 if (!deliver_exact) { 5360 - atomic_long_inc(&skb->dev->rx_dropped); 5360 + dev_core_stats_rx_dropped_inc(skb->dev); 5361 5361 kfree_skb_reason(skb, SKB_DROP_REASON_PTYPE_ABSENT); 5362 5362 } else { 5363 - atomic_long_inc(&skb->dev->rx_nohandler); 5363 + dev_core_stats_rx_nohandler_inc(skb->dev); 5364 5364 kfree_skb(skb); 5365 5365 } 5366 5366 /* Jamal, now you will not able to escape explaining ··· 10280 10280 } 10281 10281 EXPORT_SYMBOL(netdev_stats_to_stats64); 10282 10282 10283 + struct net_device_core_stats *netdev_core_stats_alloc(struct net_device *dev) 10284 + { 10285 + struct net_device_core_stats __percpu *p; 10286 + 10287 + p = alloc_percpu_gfp(struct net_device_core_stats, 10288 + GFP_ATOMIC | __GFP_NOWARN); 10289 + 10290 + if (p && cmpxchg(&dev->core_stats, NULL, p)) 10291 + free_percpu(p); 10292 + 10293 + /* This READ_ONCE() pairs with the cmpxchg() above */ 10294 + p = READ_ONCE(dev->core_stats); 10295 + if (!p) 10296 + return NULL; 10297 + 10298 + return this_cpu_ptr(p); 10299 + } 10300 + EXPORT_SYMBOL(netdev_core_stats_alloc); 10301 + 10283 10302 /** 10284 10303 * dev_get_stats - get network device statistics 10285 10304 * @dev: device to get statistics from ··· 10313 10294 struct rtnl_link_stats64 *storage) 10314 10295 { 10315 10296 const struct net_device_ops *ops = dev->netdev_ops; 10297 + const struct net_device_core_stats __percpu *p; 10316 10298 10317 10299 if (ops->ndo_get_stats64) { 10318 10300 memset(storage, 0, sizeof(*storage)); ··· 10323 10303 } else { 10324 10304 netdev_stats_to_stats64(storage, &dev->stats); 10325 10305 } 10326 - storage->rx_dropped += (unsigned long)atomic_long_read(&dev->rx_dropped); 10327 - storage->tx_dropped += (unsigned long)atomic_long_read(&dev->tx_dropped); 10328 - storage->rx_nohandler += (unsigned long)atomic_long_read(&dev->rx_nohandler); 10306 + 10307 + /* This READ_ONCE() pairs with the write in netdev_core_stats_alloc() */ 10308 + p = READ_ONCE(dev->core_stats); 10309 + if (p) { 10310 + const struct net_device_core_stats *core_stats; 10311 + int i; 10312 + 10313 + for_each_possible_cpu(i) { 10314 + core_stats = per_cpu_ptr(p, i); 10315 + storage->rx_dropped += local_read(&core_stats->rx_dropped); 10316 + storage->tx_dropped += local_read(&core_stats->tx_dropped); 10317 + storage->rx_nohandler += local_read(&core_stats->rx_nohandler); 10318 + } 10319 + } 10329 10320 return storage; 10330 10321 } 10331 10322 EXPORT_SYMBOL(dev_get_stats); ··· 10598 10567 free_percpu(dev->pcpu_refcnt); 10599 10568 dev->pcpu_refcnt = NULL; 10600 10569 #endif 10570 + free_percpu(dev->core_stats); 10571 + dev->core_stats = NULL; 10601 10572 free_percpu(dev->xdp_bulkq); 10602 10573 dev->xdp_bulkq = NULL; 10603 10574
+1 -1
net/core/gro_cells.c
··· 28 28 29 29 if (skb_queue_len(&cell->napi_skbs) > netdev_max_backlog) { 30 30 drop: 31 - atomic_long_inc(&dev->rx_dropped); 31 + dev_core_stats_rx_dropped_inc(dev); 32 32 kfree_skb(skb); 33 33 res = NET_RX_DROP; 34 34 goto unlock;
+1 -1
net/hsr/hsr_device.c
··· 221 221 skb_reset_mac_len(skb); 222 222 hsr_forward_skb(skb, master); 223 223 } else { 224 - atomic_long_inc(&dev->tx_dropped); 224 + dev_core_stats_tx_dropped_inc(dev); 225 225 dev_kfree_skb_any(skb); 226 226 } 227 227 return NETDEV_TX_OK;
+1 -1
net/xfrm/xfrm_device.c
··· 143 143 segs = skb_gso_segment(skb, esp_features); 144 144 if (IS_ERR(segs)) { 145 145 kfree_skb(skb); 146 - atomic_long_inc(&dev->tx_dropped); 146 + dev_core_stats_tx_dropped_inc(dev); 147 147 return NULL; 148 148 } else { 149 149 consume_skb(skb);