Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

net: stmmac: Rework coalesce timer and fix multi-queue races

This follows David Miller advice and tries to fix coalesce timer in
multi-queue scenarios.

We are now using per-queue coalesce values and per-queue TX timer.

Coalesce timer default values was changed to 1ms and the coalesce frames
to 25.

Tested in B2B setup between XGMAC2 and GMAC5.

Signed-off-by: Jose Abreu <joabreu@synopsys.com>
Fixes: ce736788e8a ("net: stmmac: adding multiple buffers for TX")
Cc: Florian Fainelli <f.fainelli@gmail.com>
Cc: Neil Armstrong <narmstrong@baylibre.com>
Cc: Jerome Brunet <jbrunet@baylibre.com>
Cc: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Joao Pinto <jpinto@synopsys.com>
Cc: Giuseppe Cavallaro <peppe.cavallaro@st.com>
Cc: Alexandre Torgue <alexandre.torgue@st.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Jose Abreu and committed by
David S. Miller
8fce3331 5211da9c

+146 -106
+2 -2
drivers/net/ethernet/stmicro/stmmac/common.h
··· 258 258 #define MAX_DMA_RIWT 0xff 259 259 #define MIN_DMA_RIWT 0x20 260 260 /* Tx coalesce parameters */ 261 - #define STMMAC_COAL_TX_TIMER 40000 261 + #define STMMAC_COAL_TX_TIMER 1000 262 262 #define STMMAC_MAX_COAL_TX_TICK 100000 263 263 #define STMMAC_TX_MAX_FRAMES 256 264 - #define STMMAC_TX_FRAMES 64 264 + #define STMMAC_TX_FRAMES 25 265 265 266 266 /* Packets types */ 267 267 enum packets_types {
+12 -2
drivers/net/ethernet/stmicro/stmmac/stmmac.h
··· 48 48 49 49 /* Frequently used values are kept adjacent for cache effect */ 50 50 struct stmmac_tx_queue { 51 + u32 tx_count_frames; 52 + struct timer_list txtimer; 51 53 u32 queue_index; 52 54 struct stmmac_priv *priv_data; 53 55 struct dma_extended_desc *dma_etx ____cacheline_aligned_in_smp; ··· 75 73 u32 rx_zeroc_thresh; 76 74 dma_addr_t dma_rx_phy; 77 75 u32 rx_tail_addr; 76 + }; 77 + 78 + struct stmmac_channel { 78 79 struct napi_struct napi ____cacheline_aligned_in_smp; 80 + struct stmmac_priv *priv_data; 81 + u32 index; 82 + int has_rx; 83 + int has_tx; 79 84 }; 80 85 81 86 struct stmmac_tc_entry { ··· 118 109 119 110 struct stmmac_priv { 120 111 /* Frequently used values are kept adjacent for cache effect */ 121 - u32 tx_count_frames; 122 112 u32 tx_coal_frames; 123 113 u32 tx_coal_timer; 124 114 125 115 int tx_coalesce; 126 116 int hwts_tx_en; 127 117 bool tx_path_in_lpi_mode; 128 - struct timer_list txtimer; 129 118 bool tso; 130 119 131 120 unsigned int dma_buf_sz; ··· 143 136 144 137 /* TX Queue */ 145 138 struct stmmac_tx_queue tx_queue[MTL_MAX_TX_QUEUES]; 139 + 140 + /* Generic channel for NAPI */ 141 + struct stmmac_channel channel[STMMAC_CH_MAX]; 146 142 147 143 bool oldlink; 148 144 int speed;
+131 -102
drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
··· 148 148 static void stmmac_disable_all_queues(struct stmmac_priv *priv) 149 149 { 150 150 u32 rx_queues_cnt = priv->plat->rx_queues_to_use; 151 + u32 tx_queues_cnt = priv->plat->tx_queues_to_use; 152 + u32 maxq = max(rx_queues_cnt, tx_queues_cnt); 151 153 u32 queue; 152 154 153 - for (queue = 0; queue < rx_queues_cnt; queue++) { 154 - struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue]; 155 + for (queue = 0; queue < maxq; queue++) { 156 + struct stmmac_channel *ch = &priv->channel[queue]; 155 157 156 - napi_disable(&rx_q->napi); 158 + napi_disable(&ch->napi); 157 159 } 158 160 } 159 161 ··· 166 164 static void stmmac_enable_all_queues(struct stmmac_priv *priv) 167 165 { 168 166 u32 rx_queues_cnt = priv->plat->rx_queues_to_use; 167 + u32 tx_queues_cnt = priv->plat->tx_queues_to_use; 168 + u32 maxq = max(rx_queues_cnt, tx_queues_cnt); 169 169 u32 queue; 170 170 171 - for (queue = 0; queue < rx_queues_cnt; queue++) { 172 - struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue]; 171 + for (queue = 0; queue < maxq; queue++) { 172 + struct stmmac_channel *ch = &priv->channel[queue]; 173 173 174 - napi_enable(&rx_q->napi); 174 + napi_enable(&ch->napi); 175 175 } 176 176 } 177 177 ··· 1847 1843 * @queue: TX queue index 1848 1844 * Description: it reclaims the transmit resources after transmission completes. 1849 1845 */ 1850 - static void stmmac_tx_clean(struct stmmac_priv *priv, u32 queue) 1846 + static int stmmac_tx_clean(struct stmmac_priv *priv, int budget, u32 queue) 1851 1847 { 1852 1848 struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue]; 1853 1849 unsigned int bytes_compl = 0, pkts_compl = 0; 1854 - unsigned int entry; 1850 + unsigned int entry, count = 0; 1855 1851 1856 - netif_tx_lock(priv->dev); 1852 + __netif_tx_lock_bh(netdev_get_tx_queue(priv->dev, queue)); 1857 1853 1858 1854 priv->xstats.tx_clean++; 1859 1855 1860 1856 entry = tx_q->dirty_tx; 1861 - while (entry != tx_q->cur_tx) { 1857 + while ((entry != tx_q->cur_tx) && (count < budget)) { 1862 1858 struct sk_buff *skb = tx_q->tx_skbuff[entry]; 1863 1859 struct dma_desc *p; 1864 1860 int status; ··· 1873 1869 /* Check if the descriptor is owned by the DMA */ 1874 1870 if (unlikely(status & tx_dma_own)) 1875 1871 break; 1872 + 1873 + count++; 1876 1874 1877 1875 /* Make sure descriptor fields are read after reading 1878 1876 * the own bit. ··· 1943 1937 stmmac_enable_eee_mode(priv); 1944 1938 mod_timer(&priv->eee_ctrl_timer, STMMAC_LPI_T(eee_timer)); 1945 1939 } 1946 - netif_tx_unlock(priv->dev); 1940 + 1941 + __netif_tx_unlock_bh(netdev_get_tx_queue(priv->dev, queue)); 1942 + 1943 + return count; 1947 1944 } 1948 1945 1949 1946 /** ··· 2029 2020 return false; 2030 2021 } 2031 2022 2023 + static int stmmac_napi_check(struct stmmac_priv *priv, u32 chan) 2024 + { 2025 + int status = stmmac_dma_interrupt_status(priv, priv->ioaddr, 2026 + &priv->xstats, chan); 2027 + struct stmmac_channel *ch = &priv->channel[chan]; 2028 + bool needs_work = false; 2029 + 2030 + if ((status & handle_rx) && ch->has_rx) { 2031 + needs_work = true; 2032 + } else { 2033 + status &= ~handle_rx; 2034 + } 2035 + 2036 + if ((status & handle_tx) && ch->has_tx) { 2037 + needs_work = true; 2038 + } else { 2039 + status &= ~handle_tx; 2040 + } 2041 + 2042 + if (needs_work && napi_schedule_prep(&ch->napi)) { 2043 + stmmac_disable_dma_irq(priv, priv->ioaddr, chan); 2044 + __napi_schedule(&ch->napi); 2045 + } 2046 + 2047 + return status; 2048 + } 2049 + 2032 2050 /** 2033 2051 * stmmac_dma_interrupt - DMA ISR 2034 2052 * @priv: driver private structure ··· 2070 2034 u32 channels_to_check = tx_channel_count > rx_channel_count ? 2071 2035 tx_channel_count : rx_channel_count; 2072 2036 u32 chan; 2073 - bool poll_scheduled = false; 2074 2037 int status[max_t(u32, MTL_MAX_TX_QUEUES, MTL_MAX_RX_QUEUES)]; 2075 2038 2076 2039 /* Make sure we never check beyond our status buffer. */ 2077 2040 if (WARN_ON_ONCE(channels_to_check > ARRAY_SIZE(status))) 2078 2041 channels_to_check = ARRAY_SIZE(status); 2079 2042 2080 - /* Each DMA channel can be used for rx and tx simultaneously, yet 2081 - * napi_struct is embedded in struct stmmac_rx_queue rather than in a 2082 - * stmmac_channel struct. 2083 - * Because of this, stmmac_poll currently checks (and possibly wakes) 2084 - * all tx queues rather than just a single tx queue. 2085 - */ 2086 2043 for (chan = 0; chan < channels_to_check; chan++) 2087 - status[chan] = stmmac_dma_interrupt_status(priv, priv->ioaddr, 2088 - &priv->xstats, chan); 2089 - 2090 - for (chan = 0; chan < rx_channel_count; chan++) { 2091 - if (likely(status[chan] & handle_rx)) { 2092 - struct stmmac_rx_queue *rx_q = &priv->rx_queue[chan]; 2093 - 2094 - if (likely(napi_schedule_prep(&rx_q->napi))) { 2095 - stmmac_disable_dma_irq(priv, priv->ioaddr, chan); 2096 - __napi_schedule(&rx_q->napi); 2097 - poll_scheduled = true; 2098 - } 2099 - } 2100 - } 2101 - 2102 - /* If we scheduled poll, we already know that tx queues will be checked. 2103 - * If we didn't schedule poll, see if any DMA channel (used by tx) has a 2104 - * completed transmission, if so, call stmmac_poll (once). 2105 - */ 2106 - if (!poll_scheduled) { 2107 - for (chan = 0; chan < tx_channel_count; chan++) { 2108 - if (status[chan] & handle_tx) { 2109 - /* It doesn't matter what rx queue we choose 2110 - * here. We use 0 since it always exists. 2111 - */ 2112 - struct stmmac_rx_queue *rx_q = 2113 - &priv->rx_queue[0]; 2114 - 2115 - if (likely(napi_schedule_prep(&rx_q->napi))) { 2116 - stmmac_disable_dma_irq(priv, 2117 - priv->ioaddr, chan); 2118 - __napi_schedule(&rx_q->napi); 2119 - } 2120 - break; 2121 - } 2122 - } 2123 - } 2044 + status[chan] = stmmac_napi_check(priv, chan); 2124 2045 2125 2046 for (chan = 0; chan < tx_channel_count; chan++) { 2126 2047 if (unlikely(status[chan] & tx_hard_error_bump_tc)) { ··· 2226 2233 return ret; 2227 2234 } 2228 2235 2236 + static void stmmac_tx_timer_arm(struct stmmac_priv *priv, u32 queue) 2237 + { 2238 + struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue]; 2239 + 2240 + mod_timer(&tx_q->txtimer, STMMAC_COAL_TIMER(priv->tx_coal_timer)); 2241 + } 2242 + 2229 2243 /** 2230 2244 * stmmac_tx_timer - mitigation sw timer for tx. 2231 2245 * @data: data pointer ··· 2241 2241 */ 2242 2242 static void stmmac_tx_timer(struct timer_list *t) 2243 2243 { 2244 - struct stmmac_priv *priv = from_timer(priv, t, txtimer); 2245 - u32 tx_queues_count = priv->plat->tx_queues_to_use; 2246 - u32 queue; 2244 + struct stmmac_tx_queue *tx_q = from_timer(tx_q, t, txtimer); 2245 + struct stmmac_priv *priv = tx_q->priv_data; 2246 + struct stmmac_channel *ch; 2247 2247 2248 - /* let's scan all the tx queues */ 2249 - for (queue = 0; queue < tx_queues_count; queue++) 2250 - stmmac_tx_clean(priv, queue); 2248 + ch = &priv->channel[tx_q->queue_index]; 2249 + 2250 + if (likely(napi_schedule_prep(&ch->napi))) 2251 + __napi_schedule(&ch->napi); 2251 2252 } 2252 2253 2253 2254 /** ··· 2261 2260 */ 2262 2261 static void stmmac_init_tx_coalesce(struct stmmac_priv *priv) 2263 2262 { 2263 + u32 tx_channel_count = priv->plat->tx_queues_to_use; 2264 + u32 chan; 2265 + 2264 2266 priv->tx_coal_frames = STMMAC_TX_FRAMES; 2265 2267 priv->tx_coal_timer = STMMAC_COAL_TX_TIMER; 2266 - timer_setup(&priv->txtimer, stmmac_tx_timer, 0); 2267 - priv->txtimer.expires = STMMAC_COAL_TIMER(priv->tx_coal_timer); 2268 - add_timer(&priv->txtimer); 2268 + 2269 + for (chan = 0; chan < tx_channel_count; chan++) { 2270 + struct stmmac_tx_queue *tx_q = &priv->tx_queue[chan]; 2271 + 2272 + timer_setup(&tx_q->txtimer, stmmac_tx_timer, 0); 2273 + } 2269 2274 } 2270 2275 2271 2276 static void stmmac_set_rings_length(struct stmmac_priv *priv) ··· 2599 2592 static int stmmac_open(struct net_device *dev) 2600 2593 { 2601 2594 struct stmmac_priv *priv = netdev_priv(dev); 2595 + u32 chan; 2602 2596 int ret; 2603 2597 2604 2598 stmmac_check_ether_addr(priv); ··· 2696 2688 if (dev->phydev) 2697 2689 phy_stop(dev->phydev); 2698 2690 2699 - del_timer_sync(&priv->txtimer); 2691 + for (chan = 0; chan < priv->plat->tx_queues_to_use; chan++) 2692 + del_timer_sync(&priv->tx_queue[chan].txtimer); 2693 + 2700 2694 stmmac_hw_teardown(dev); 2701 2695 init_error: 2702 2696 free_dma_desc_resources(priv); ··· 2718 2708 static int stmmac_release(struct net_device *dev) 2719 2709 { 2720 2710 struct stmmac_priv *priv = netdev_priv(dev); 2711 + u32 chan; 2721 2712 2722 2713 if (priv->eee_enabled) 2723 2714 del_timer_sync(&priv->eee_ctrl_timer); ··· 2733 2722 2734 2723 stmmac_disable_all_queues(priv); 2735 2724 2736 - del_timer_sync(&priv->txtimer); 2725 + for (chan = 0; chan < priv->plat->tx_queues_to_use; chan++) 2726 + del_timer_sync(&priv->tx_queue[chan].txtimer); 2737 2727 2738 2728 /* Free the IRQ lines */ 2739 2729 free_irq(dev->irq, dev); ··· 2948 2936 priv->xstats.tx_tso_nfrags += nfrags; 2949 2937 2950 2938 /* Manage tx mitigation */ 2951 - priv->tx_count_frames += nfrags + 1; 2952 - if (likely(priv->tx_coal_frames > priv->tx_count_frames)) { 2953 - mod_timer(&priv->txtimer, 2954 - STMMAC_COAL_TIMER(priv->tx_coal_timer)); 2955 - } else { 2956 - priv->tx_count_frames = 0; 2939 + tx_q->tx_count_frames += nfrags + 1; 2940 + if (priv->tx_coal_frames <= tx_q->tx_count_frames) { 2957 2941 stmmac_set_tx_ic(priv, desc); 2958 2942 priv->xstats.tx_set_ic_bit++; 2943 + tx_q->tx_count_frames = 0; 2944 + } else { 2945 + stmmac_tx_timer_arm(priv, queue); 2959 2946 } 2960 2947 2961 2948 skb_tx_timestamp(skb); ··· 3157 3146 * This approach takes care about the fragments: desc is the first 3158 3147 * element in case of no SG. 3159 3148 */ 3160 - priv->tx_count_frames += nfrags + 1; 3161 - if (likely(priv->tx_coal_frames > priv->tx_count_frames)) { 3162 - mod_timer(&priv->txtimer, 3163 - STMMAC_COAL_TIMER(priv->tx_coal_timer)); 3164 - } else { 3165 - priv->tx_count_frames = 0; 3149 + tx_q->tx_count_frames += nfrags + 1; 3150 + if (priv->tx_coal_frames <= tx_q->tx_count_frames) { 3166 3151 stmmac_set_tx_ic(priv, desc); 3167 3152 priv->xstats.tx_set_ic_bit++; 3153 + tx_q->tx_count_frames = 0; 3154 + } else { 3155 + stmmac_tx_timer_arm(priv, queue); 3168 3156 } 3169 3157 3170 3158 skb_tx_timestamp(skb); ··· 3209 3199 netdev_tx_sent_queue(netdev_get_tx_queue(dev, queue), skb->len); 3210 3200 3211 3201 stmmac_enable_dma_transmission(priv, priv->ioaddr); 3202 + 3212 3203 stmmac_set_tx_tail_ptr(priv, priv->ioaddr, tx_q->tx_tail_addr, queue); 3213 3204 3214 3205 return NETDEV_TX_OK; ··· 3330 3319 static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue) 3331 3320 { 3332 3321 struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue]; 3322 + struct stmmac_channel *ch = &priv->channel[queue]; 3333 3323 unsigned int entry = rx_q->cur_rx; 3334 3324 int coe = priv->hw->rx_csum; 3335 3325 unsigned int next_entry; ··· 3503 3491 else 3504 3492 skb->ip_summed = CHECKSUM_UNNECESSARY; 3505 3493 3506 - napi_gro_receive(&rx_q->napi, skb); 3494 + napi_gro_receive(&ch->napi, skb); 3507 3495 3508 3496 priv->dev->stats.rx_packets++; 3509 3497 priv->dev->stats.rx_bytes += frame_len; ··· 3526 3514 * Description : 3527 3515 * To look at the incoming frames and clear the tx resources. 3528 3516 */ 3529 - static int stmmac_poll(struct napi_struct *napi, int budget) 3517 + static int stmmac_napi_poll(struct napi_struct *napi, int budget) 3530 3518 { 3531 - struct stmmac_rx_queue *rx_q = 3532 - container_of(napi, struct stmmac_rx_queue, napi); 3533 - struct stmmac_priv *priv = rx_q->priv_data; 3534 - u32 tx_count = priv->plat->tx_queues_to_use; 3535 - u32 chan = rx_q->queue_index; 3536 - int work_done = 0; 3537 - u32 queue; 3519 + struct stmmac_channel *ch = 3520 + container_of(napi, struct stmmac_channel, napi); 3521 + struct stmmac_priv *priv = ch->priv_data; 3522 + int work_done = 0, work_rem = budget; 3523 + u32 chan = ch->index; 3538 3524 3539 3525 priv->xstats.napi_poll++; 3540 3526 3541 - /* check all the queues */ 3542 - for (queue = 0; queue < tx_count; queue++) 3543 - stmmac_tx_clean(priv, queue); 3527 + if (ch->has_tx) { 3528 + int done = stmmac_tx_clean(priv, work_rem, chan); 3544 3529 3545 - work_done = stmmac_rx(priv, budget, rx_q->queue_index); 3546 - if (work_done < budget) { 3547 - napi_complete_done(napi, work_done); 3548 - stmmac_enable_dma_irq(priv, priv->ioaddr, chan); 3530 + work_done += done; 3531 + work_rem -= done; 3549 3532 } 3533 + 3534 + if (ch->has_rx) { 3535 + int done = stmmac_rx(priv, work_rem, chan); 3536 + 3537 + work_done += done; 3538 + work_rem -= done; 3539 + } 3540 + 3541 + if (work_done < budget && napi_complete_done(napi, work_done)) 3542 + stmmac_enable_dma_irq(priv, priv->ioaddr, chan); 3543 + 3550 3544 return work_done; 3551 3545 } 3552 3546 ··· 4216 4198 { 4217 4199 struct net_device *ndev = NULL; 4218 4200 struct stmmac_priv *priv; 4201 + u32 queue, maxq; 4219 4202 int ret = 0; 4220 - u32 queue; 4221 4203 4222 4204 ndev = alloc_etherdev_mqs(sizeof(struct stmmac_priv), 4223 4205 MTL_MAX_TX_QUEUES, ··· 4340 4322 "Enable RX Mitigation via HW Watchdog Timer\n"); 4341 4323 } 4342 4324 4343 - for (queue = 0; queue < priv->plat->rx_queues_to_use; queue++) { 4344 - struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue]; 4325 + /* Setup channels NAPI */ 4326 + maxq = max(priv->plat->rx_queues_to_use, priv->plat->tx_queues_to_use); 4345 4327 4346 - netif_napi_add(ndev, &rx_q->napi, stmmac_poll, 4347 - (8 * priv->plat->rx_queues_to_use)); 4328 + for (queue = 0; queue < maxq; queue++) { 4329 + struct stmmac_channel *ch = &priv->channel[queue]; 4330 + 4331 + ch->priv_data = priv; 4332 + ch->index = queue; 4333 + 4334 + if (queue < priv->plat->rx_queues_to_use) 4335 + ch->has_rx = true; 4336 + if (queue < priv->plat->tx_queues_to_use) 4337 + ch->has_tx = true; 4338 + 4339 + netif_napi_add(ndev, &ch->napi, stmmac_napi_poll, 4340 + NAPI_POLL_WEIGHT); 4348 4341 } 4349 4342 4350 4343 mutex_init(&priv->lock); ··· 4401 4372 priv->hw->pcs != STMMAC_PCS_RTBI) 4402 4373 stmmac_mdio_unregister(ndev); 4403 4374 error_mdio_register: 4404 - for (queue = 0; queue < priv->plat->rx_queues_to_use; queue++) { 4405 - struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue]; 4375 + for (queue = 0; queue < maxq; queue++) { 4376 + struct stmmac_channel *ch = &priv->channel[queue]; 4406 4377 4407 - netif_napi_del(&rx_q->napi); 4378 + netif_napi_del(&ch->napi); 4408 4379 } 4409 4380 error_hw_init: 4410 4381 destroy_workqueue(priv->wq);
+1
include/linux/stmmac.h
··· 30 30 31 31 #define MTL_MAX_RX_QUEUES 8 32 32 #define MTL_MAX_TX_QUEUES 8 33 + #define STMMAC_CH_MAX 8 33 34 34 35 #define STMMAC_RX_COE_NONE 0 35 36 #define STMMAC_RX_COE_TYPE1 1