Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'netpoll-second-round-of-fixes'

Eric Dumazet says:

====================
netpoll: second round of fixes.

As diagnosed by Song Liu, ndo_poll_controller() can
be very dangerous on loaded hosts, since the cpu
calling ndo_poll_controller() might steal all NAPI
contexts (for all RX/TX queues of the NIC).

This capture, showing one ksoftirqd eating all cycles
can last for unlimited amount of time, since one
cpu is generally not able to drain all the queues under load.

It seems that all networking drivers that do use NAPI
for their TX completions, should not provide a ndo_poll_controller() :

Most NAPI drivers have netpoll support already handled
in core networking stack, since netpoll_poll_dev()
uses poll_napi(dev) to iterate through registered
NAPI contexts for a device.

First patch is a fix in poll_one_napi().

Then following patches take care of ten drivers.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>

+1 -243
-22
drivers/net/ethernet/amazon/ena/ena_netdev.c
··· 2185 2185 return NETDEV_TX_OK; 2186 2186 } 2187 2187 2188 - #ifdef CONFIG_NET_POLL_CONTROLLER 2189 - static void ena_netpoll(struct net_device *netdev) 2190 - { 2191 - struct ena_adapter *adapter = netdev_priv(netdev); 2192 - int i; 2193 - 2194 - /* Dont schedule NAPI if the driver is in the middle of reset 2195 - * or netdev is down. 2196 - */ 2197 - 2198 - if (!test_bit(ENA_FLAG_DEV_UP, &adapter->flags) || 2199 - test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags)) 2200 - return; 2201 - 2202 - for (i = 0; i < adapter->num_queues; i++) 2203 - napi_schedule(&adapter->ena_napi[i].napi); 2204 - } 2205 - #endif /* CONFIG_NET_POLL_CONTROLLER */ 2206 - 2207 2188 static u16 ena_select_queue(struct net_device *dev, struct sk_buff *skb, 2208 2189 struct net_device *sb_dev, 2209 2190 select_queue_fallback_t fallback) ··· 2350 2369 .ndo_change_mtu = ena_change_mtu, 2351 2370 .ndo_set_mac_address = NULL, 2352 2371 .ndo_validate_addr = eth_validate_addr, 2353 - #ifdef CONFIG_NET_POLL_CONTROLLER 2354 - .ndo_poll_controller = ena_netpoll, 2355 - #endif /* CONFIG_NET_POLL_CONTROLLER */ 2356 2372 }; 2357 2373 2358 2374 static int ena_device_validate_params(struct ena_adapter *adapter,
-18
drivers/net/ethernet/hisilicon/hns/hns_enet.c
··· 1503 1503 return phy_mii_ioctl(phy_dev, ifr, cmd); 1504 1504 } 1505 1505 1506 - /* use only for netconsole to poll with the device without interrupt */ 1507 - #ifdef CONFIG_NET_POLL_CONTROLLER 1508 - static void hns_nic_poll_controller(struct net_device *ndev) 1509 - { 1510 - struct hns_nic_priv *priv = netdev_priv(ndev); 1511 - unsigned long flags; 1512 - int i; 1513 - 1514 - local_irq_save(flags); 1515 - for (i = 0; i < priv->ae_handle->q_num * 2; i++) 1516 - napi_schedule(&priv->ring_data[i].napi); 1517 - local_irq_restore(flags); 1518 - } 1519 - #endif 1520 - 1521 1506 static netdev_tx_t hns_nic_net_xmit(struct sk_buff *skb, 1522 1507 struct net_device *ndev) 1523 1508 { ··· 1955 1970 .ndo_set_features = hns_nic_set_features, 1956 1971 .ndo_fix_features = hns_nic_fix_features, 1957 1972 .ndo_get_stats64 = hns_nic_get_stats64, 1958 - #ifdef CONFIG_NET_POLL_CONTROLLER 1959 - .ndo_poll_controller = hns_nic_poll_controller, 1960 - #endif 1961 1973 .ndo_set_rx_mode = hns_nic_set_rx_mode, 1962 1974 .ndo_select_queue = hns_nic_select_queue, 1963 1975 };
-20
drivers/net/ethernet/huawei/hinic/hinic_main.c
··· 789 789 stats->tx_errors = nic_tx_stats->tx_dropped; 790 790 } 791 791 792 - #ifdef CONFIG_NET_POLL_CONTROLLER 793 - static void hinic_netpoll(struct net_device *netdev) 794 - { 795 - struct hinic_dev *nic_dev = netdev_priv(netdev); 796 - int i, num_qps; 797 - 798 - num_qps = hinic_hwdev_num_qps(nic_dev->hwdev); 799 - for (i = 0; i < num_qps; i++) { 800 - struct hinic_txq *txq = &nic_dev->txqs[i]; 801 - struct hinic_rxq *rxq = &nic_dev->rxqs[i]; 802 - 803 - napi_schedule(&txq->napi); 804 - napi_schedule(&rxq->napi); 805 - } 806 - } 807 - #endif 808 - 809 792 static const struct net_device_ops hinic_netdev_ops = { 810 793 .ndo_open = hinic_open, 811 794 .ndo_stop = hinic_close, ··· 801 818 .ndo_start_xmit = hinic_xmit_frame, 802 819 .ndo_tx_timeout = hinic_tx_timeout, 803 820 .ndo_get_stats64 = hinic_get_stats64, 804 - #ifdef CONFIG_NET_POLL_CONTROLLER 805 - .ndo_poll_controller = hinic_netpoll, 806 - #endif 807 821 }; 808 822 809 823 static void netdev_features_init(struct net_device *netdev)
-14
drivers/net/ethernet/ibm/ehea/ehea_main.c
··· 921 921 return rx; 922 922 } 923 923 924 - #ifdef CONFIG_NET_POLL_CONTROLLER 925 - static void ehea_netpoll(struct net_device *dev) 926 - { 927 - struct ehea_port *port = netdev_priv(dev); 928 - int i; 929 - 930 - for (i = 0; i < port->num_def_qps; i++) 931 - napi_schedule(&port->port_res[i].napi); 932 - } 933 - #endif 934 - 935 924 static irqreturn_t ehea_recv_irq_handler(int irq, void *param) 936 925 { 937 926 struct ehea_port_res *pr = param; ··· 2942 2953 .ndo_open = ehea_open, 2943 2954 .ndo_stop = ehea_stop, 2944 2955 .ndo_start_xmit = ehea_start_xmit, 2945 - #ifdef CONFIG_NET_POLL_CONTROLLER 2946 - .ndo_poll_controller = ehea_netpoll, 2947 - #endif 2948 2956 .ndo_get_stats64 = ehea_get_stats64, 2949 2957 .ndo_set_mac_address = ehea_set_mac_addr, 2950 2958 .ndo_validate_addr = eth_validate_addr,
-16
drivers/net/ethernet/ibm/ibmvnic.c
··· 2207 2207 return frames_processed; 2208 2208 } 2209 2209 2210 - #ifdef CONFIG_NET_POLL_CONTROLLER 2211 - static void ibmvnic_netpoll_controller(struct net_device *dev) 2212 - { 2213 - struct ibmvnic_adapter *adapter = netdev_priv(dev); 2214 - int i; 2215 - 2216 - replenish_pools(netdev_priv(dev)); 2217 - for (i = 0; i < adapter->req_rx_queues; i++) 2218 - ibmvnic_interrupt_rx(adapter->rx_scrq[i]->irq, 2219 - adapter->rx_scrq[i]); 2220 - } 2221 - #endif 2222 - 2223 2210 static int wait_for_reset(struct ibmvnic_adapter *adapter) 2224 2211 { 2225 2212 int rc, ret; ··· 2279 2292 .ndo_set_mac_address = ibmvnic_set_mac, 2280 2293 .ndo_validate_addr = eth_validate_addr, 2281 2294 .ndo_tx_timeout = ibmvnic_tx_timeout, 2282 - #ifdef CONFIG_NET_POLL_CONTROLLER 2283 - .ndo_poll_controller = ibmvnic_netpoll_controller, 2284 - #endif 2285 2295 .ndo_change_mtu = ibmvnic_change_mtu, 2286 2296 .ndo_features_check = ibmvnic_features_check, 2287 2297 };
-23
drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
··· 72 72 work_func_t func, int delay); 73 73 static void netxen_cancel_fw_work(struct netxen_adapter *adapter); 74 74 static int netxen_nic_poll(struct napi_struct *napi, int budget); 75 - #ifdef CONFIG_NET_POLL_CONTROLLER 76 - static void netxen_nic_poll_controller(struct net_device *netdev); 77 - #endif 78 75 79 76 static void netxen_create_sysfs_entries(struct netxen_adapter *adapter); 80 77 static void netxen_remove_sysfs_entries(struct netxen_adapter *adapter); ··· 578 581 .ndo_tx_timeout = netxen_tx_timeout, 579 582 .ndo_fix_features = netxen_fix_features, 580 583 .ndo_set_features = netxen_set_features, 581 - #ifdef CONFIG_NET_POLL_CONTROLLER 582 - .ndo_poll_controller = netxen_nic_poll_controller, 583 - #endif 584 584 }; 585 585 586 586 static inline bool netxen_function_zero(struct pci_dev *pdev) ··· 2395 2401 2396 2402 return work_done; 2397 2403 } 2398 - 2399 - #ifdef CONFIG_NET_POLL_CONTROLLER 2400 - static void netxen_nic_poll_controller(struct net_device *netdev) 2401 - { 2402 - int ring; 2403 - struct nx_host_sds_ring *sds_ring; 2404 - struct netxen_adapter *adapter = netdev_priv(netdev); 2405 - struct netxen_recv_context *recv_ctx = &adapter->recv_ctx; 2406 - 2407 - disable_irq(adapter->irq); 2408 - for (ring = 0; ring < adapter->max_sds_rings; ring++) { 2409 - sds_ring = &recv_ctx->sds_rings[ring]; 2410 - netxen_intr(adapter->irq, sds_ring); 2411 - } 2412 - enable_irq(adapter->irq); 2413 - } 2414 - #endif 2415 2404 2416 2405 static int 2417 2406 nx_incr_dev_ref_cnt(struct netxen_adapter *adapter)
-45
drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
··· 59 59 static void qlcnic_tx_timeout(struct net_device *netdev); 60 60 static void qlcnic_attach_work(struct work_struct *work); 61 61 static void qlcnic_fwinit_work(struct work_struct *work); 62 - #ifdef CONFIG_NET_POLL_CONTROLLER 63 - static void qlcnic_poll_controller(struct net_device *netdev); 64 - #endif 65 62 66 63 static void qlcnic_idc_debug_info(struct qlcnic_adapter *adapter, u8 encoding); 67 64 static int qlcnic_can_start_firmware(struct qlcnic_adapter *adapter); ··· 542 545 .ndo_udp_tunnel_add = qlcnic_add_vxlan_port, 543 546 .ndo_udp_tunnel_del = qlcnic_del_vxlan_port, 544 547 .ndo_features_check = qlcnic_features_check, 545 - #ifdef CONFIG_NET_POLL_CONTROLLER 546 - .ndo_poll_controller = qlcnic_poll_controller, 547 - #endif 548 548 #ifdef CONFIG_QLCNIC_SRIOV 549 549 .ndo_set_vf_mac = qlcnic_sriov_set_vf_mac, 550 550 .ndo_set_vf_rate = qlcnic_sriov_set_vf_tx_rate, ··· 3193 3199 napi_schedule(&tx_ring->napi); 3194 3200 return IRQ_HANDLED; 3195 3201 } 3196 - 3197 - #ifdef CONFIG_NET_POLL_CONTROLLER 3198 - static void qlcnic_poll_controller(struct net_device *netdev) 3199 - { 3200 - struct qlcnic_adapter *adapter = netdev_priv(netdev); 3201 - struct qlcnic_host_sds_ring *sds_ring; 3202 - struct qlcnic_recv_context *recv_ctx; 3203 - struct qlcnic_host_tx_ring *tx_ring; 3204 - int ring; 3205 - 3206 - if (!test_bit(__QLCNIC_DEV_UP, &adapter->state)) 3207 - return; 3208 - 3209 - recv_ctx = adapter->recv_ctx; 3210 - 3211 - for (ring = 0; ring < adapter->drv_sds_rings; ring++) { 3212 - sds_ring = &recv_ctx->sds_rings[ring]; 3213 - qlcnic_disable_sds_intr(adapter, sds_ring); 3214 - napi_schedule(&sds_ring->napi); 3215 - } 3216 - 3217 - if (adapter->flags & QLCNIC_MSIX_ENABLED) { 3218 - /* Only Multi-Tx queue capable devices need to 3219 - * schedule NAPI for TX rings 3220 - */ 3221 - if ((qlcnic_83xx_check(adapter) && 3222 - (adapter->flags & QLCNIC_TX_INTR_SHARED)) || 3223 - (qlcnic_82xx_check(adapter) && 3224 - !qlcnic_check_multi_tx(adapter))) 3225 - return; 3226 - 3227 - for (ring = 0; ring < adapter->drv_tx_rings; ring++) { 3228 - tx_ring = &adapter->tx_ring[ring]; 3229 - qlcnic_disable_tx_intr(adapter, tx_ring); 3230 - napi_schedule(&tx_ring->napi); 3231 - } 3232 - } 3233 - } 3234 - #endif 3235 3202 3236 3203 static void 3237 3204 qlcnic_idc_debug_info(struct qlcnic_adapter *adapter, u8 encoding)
-26
drivers/net/ethernet/sfc/efx.c
··· 2208 2208 2209 2209 /************************************************************************** 2210 2210 * 2211 - * Kernel netpoll interface 2212 - * 2213 - *************************************************************************/ 2214 - 2215 - #ifdef CONFIG_NET_POLL_CONTROLLER 2216 - 2217 - /* Although in the common case interrupts will be disabled, this is not 2218 - * guaranteed. However, all our work happens inside the NAPI callback, 2219 - * so no locking is required. 2220 - */ 2221 - static void efx_netpoll(struct net_device *net_dev) 2222 - { 2223 - struct efx_nic *efx = netdev_priv(net_dev); 2224 - struct efx_channel *channel; 2225 - 2226 - efx_for_each_channel(channel, efx) 2227 - efx_schedule_channel(channel); 2228 - } 2229 - 2230 - #endif 2231 - 2232 - /************************************************************************** 2233 - * 2234 2211 * Kernel net device interface 2235 2212 * 2236 2213 *************************************************************************/ ··· 2486 2509 #endif 2487 2510 .ndo_get_phys_port_id = efx_get_phys_port_id, 2488 2511 .ndo_get_phys_port_name = efx_get_phys_port_name, 2489 - #ifdef CONFIG_NET_POLL_CONTROLLER 2490 - .ndo_poll_controller = efx_netpoll, 2491 - #endif 2492 2512 .ndo_setup_tc = efx_setup_tc, 2493 2513 #ifdef CONFIG_RFS_ACCEL 2494 2514 .ndo_rx_flow_steer = efx_filter_rfs,
-26
drivers/net/ethernet/sfc/falcon/efx.c
··· 2054 2054 2055 2055 /************************************************************************** 2056 2056 * 2057 - * Kernel netpoll interface 2058 - * 2059 - *************************************************************************/ 2060 - 2061 - #ifdef CONFIG_NET_POLL_CONTROLLER 2062 - 2063 - /* Although in the common case interrupts will be disabled, this is not 2064 - * guaranteed. However, all our work happens inside the NAPI callback, 2065 - * so no locking is required. 2066 - */ 2067 - static void ef4_netpoll(struct net_device *net_dev) 2068 - { 2069 - struct ef4_nic *efx = netdev_priv(net_dev); 2070 - struct ef4_channel *channel; 2071 - 2072 - ef4_for_each_channel(channel, efx) 2073 - ef4_schedule_channel(channel); 2074 - } 2075 - 2076 - #endif 2077 - 2078 - /************************************************************************** 2079 - * 2080 2057 * Kernel net device interface 2081 2058 * 2082 2059 *************************************************************************/ ··· 2227 2250 .ndo_set_mac_address = ef4_set_mac_address, 2228 2251 .ndo_set_rx_mode = ef4_set_rx_mode, 2229 2252 .ndo_set_features = ef4_set_features, 2230 - #ifdef CONFIG_NET_POLL_CONTROLLER 2231 - .ndo_poll_controller = ef4_netpoll, 2232 - #endif 2233 2253 .ndo_setup_tc = ef4_setup_tc, 2234 2254 #ifdef CONFIG_RFS_ACCEL 2235 2255 .ndo_rx_flow_steer = ef4_filter_rfs,
-14
drivers/net/virtio_net.c
··· 1699 1699 tot->rx_frame_errors = dev->stats.rx_frame_errors; 1700 1700 } 1701 1701 1702 - #ifdef CONFIG_NET_POLL_CONTROLLER 1703 - static void virtnet_netpoll(struct net_device *dev) 1704 - { 1705 - struct virtnet_info *vi = netdev_priv(dev); 1706 - int i; 1707 - 1708 - for (i = 0; i < vi->curr_queue_pairs; i++) 1709 - napi_schedule(&vi->rq[i].napi); 1710 - } 1711 - #endif 1712 - 1713 1702 static void virtnet_ack_link_announce(struct virtnet_info *vi) 1714 1703 { 1715 1704 rtnl_lock(); ··· 2436 2447 .ndo_get_stats64 = virtnet_stats, 2437 2448 .ndo_vlan_rx_add_vid = virtnet_vlan_rx_add_vid, 2438 2449 .ndo_vlan_rx_kill_vid = virtnet_vlan_rx_kill_vid, 2439 - #ifdef CONFIG_NET_POLL_CONTROLLER 2440 - .ndo_poll_controller = virtnet_netpoll, 2441 - #endif 2442 2450 .ndo_bpf = virtnet_xdp, 2443 2451 .ndo_xdp_xmit = virtnet_xdp_xmit, 2444 2452 .ndo_features_check = passthru_features_check,
+1 -19
net/core/netpoll.c
··· 135 135 } 136 136 } 137 137 138 - /* 139 - * Check whether delayed processing was scheduled for our NIC. If so, 140 - * we attempt to grab the poll lock and use ->poll() to pump the card. 141 - * If this fails, either we've recursed in ->poll() or it's already 142 - * running on another CPU. 143 - * 144 - * Note: we don't mask interrupts with this lock because we're using 145 - * trylock here and interrupts are already disabled in the softirq 146 - * case. Further, we test the poll_owner to avoid recursion on UP 147 - * systems where the lock doesn't exist. 148 - */ 149 138 static void poll_one_napi(struct napi_struct *napi) 150 139 { 151 - int work = 0; 152 - 153 - /* net_rx_action's ->poll() invocations and our's are 154 - * synchronized by this test which is only made while 155 - * holding the napi->poll_lock. 156 - */ 157 - if (!test_bit(NAPI_STATE_SCHED, &napi->state)) 158 - return; 140 + int work; 159 141 160 142 /* If we set this bit but see that it has already been set, 161 143 * that indicates that napi has been disabled and we need