Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

liquidio: prevent rx queues from getting stalled

This commit has fix for RX traffic issues when we stress test the driver
with continuous ifconfig up/down under very high traffic conditions.

Reason for the issue is that, in existing liquidio_stop function NAPI is
disabled even before actual FW/HW interface is brought down via
send_rx_ctrl_cmd(lio, 0). Between time frame of NAPI disable and actual
interface down in firmware, firmware continuously enqueues rx traffic to
host. When interrupt happens for new packets, host irq handler fails in
scheduling NAPI as the NAPI is already disabled.

After "ifconfig <iface> up", Host re-enables NAPI but cannot schedule it
until it receives another Rx interrupt. Host never receives Rx interrupt as
it never cleared the Rx interrupt it received during interface down
operation. NIC Rx interrupt gets cleared only when Host processes queue and
clears the queue counts. Above anomaly leads to other issues like packet
overflow in FW/HW queues, backpressure.

Fix:
This commit fixes this issue by disabling NAPI only after informing
firmware to stop queueing packets to host via send_rx_ctrl_cmd(lio, 0).
send_rx_ctrl_cmd is not visible in the patch as it is already there in the
code. The DOWN command also waits for any pending packets to be processed
by NAPI so that the deadlock will not occur.

Signed-off-by: Raghu Vatsavayi <raghu.vatsavayi@cavium.com>
Acked-by: Derek Chickles <derek.chickles@cavium.com>
Signed-off-by: Felix Manlunas <felix.manlunas@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Raghu Vatsavayi and committed by
David S. Miller
ccdd0b4c 6f14f49c

+53 -19
+23
drivers/net/ethernet/cavium/liquidio/lio_core.c
··· 1146 1146 octeon_free_soft_command(oct, sc); 1147 1147 return 0; 1148 1148 } 1149 + 1150 + int lio_wait_for_clean_oq(struct octeon_device *oct) 1151 + { 1152 + int retry = 100, pending_pkts = 0; 1153 + int idx; 1154 + 1155 + do { 1156 + pending_pkts = 0; 1157 + 1158 + for (idx = 0; idx < MAX_OCTEON_OUTPUT_QUEUES(oct); idx++) { 1159 + if (!(oct->io_qmask.oq & BIT_ULL(idx))) 1160 + continue; 1161 + pending_pkts += 1162 + atomic_read(&oct->droq[idx]->pkts_pending); 1163 + } 1164 + 1165 + if (pending_pkts > 0) 1166 + schedule_timeout_uninterruptible(1); 1167 + 1168 + } while (retry-- && pending_pkts); 1169 + 1170 + return pending_pkts; 1171 + }
+15 -10
drivers/net/ethernet/cavium/liquidio/lio_main.c
··· 2084 2084 struct octeon_device *oct = lio->oct_dev; 2085 2085 struct napi_struct *napi, *n; 2086 2086 2087 - if (oct->props[lio->ifidx].napi_enabled) { 2088 - list_for_each_entry_safe(napi, n, &netdev->napi_list, dev_list) 2089 - napi_disable(napi); 2090 - 2091 - oct->props[lio->ifidx].napi_enabled = 0; 2092 - 2093 - if (OCTEON_CN23XX_PF(oct)) 2094 - oct->droq[0]->ops.poll_mode = 0; 2095 - } 2096 - 2097 2087 ifstate_reset(lio, LIO_IFSTATE_RUNNING); 2098 2088 2099 2089 netif_tx_disable(netdev); ··· 2107 2117 if (lio->ptp_clock) { 2108 2118 ptp_clock_unregister(lio->ptp_clock); 2109 2119 lio->ptp_clock = NULL; 2120 + } 2121 + 2122 + /* Wait for any pending Rx descriptors */ 2123 + if (lio_wait_for_clean_oq(oct)) 2124 + netif_info(lio, rx_err, lio->netdev, 2125 + "Proceeding with stop interface after partial RX desc processing\n"); 2126 + 2127 + if (oct->props[lio->ifidx].napi_enabled == 1) { 2128 + list_for_each_entry_safe(napi, n, &netdev->napi_list, dev_list) 2129 + napi_disable(napi); 2130 + 2131 + oct->props[lio->ifidx].napi_enabled = 0; 2132 + 2133 + if (OCTEON_CN23XX_PF(oct)) 2134 + oct->droq[0]->ops.poll_mode = 0; 2110 2135 } 2111 2136 2112 2137 dev_info(&oct->pci_dev->dev, "%s interface is stopped\n", netdev->name);
+14 -9
drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
··· 1138 1138 /* tell Octeon to stop forwarding packets to host */ 1139 1139 send_rx_ctrl_cmd(lio, 0); 1140 1140 1141 - if (oct->props[lio->ifidx].napi_enabled) { 1142 - list_for_each_entry_safe(napi, n, &netdev->napi_list, dev_list) 1143 - napi_disable(napi); 1144 - 1145 - oct->props[lio->ifidx].napi_enabled = 0; 1146 - 1147 - oct->droq[0]->ops.poll_mode = 0; 1148 - } 1149 - 1150 1141 netif_info(lio, ifdown, lio->netdev, "Stopping interface!\n"); 1151 1142 /* Inform that netif carrier is down */ 1152 1143 lio->intf_open = 0; ··· 1149 1158 ifstate_reset(lio, LIO_IFSTATE_RUNNING); 1150 1159 1151 1160 stop_txqs(netdev); 1161 + 1162 + /* Wait for any pending Rx descriptors */ 1163 + if (lio_wait_for_clean_oq(oct)) 1164 + netif_info(lio, rx_err, lio->netdev, 1165 + "Proceeding with stop interface after partial RX desc processing\n"); 1166 + 1167 + if (oct->props[lio->ifidx].napi_enabled == 1) { 1168 + list_for_each_entry_safe(napi, n, &netdev->napi_list, dev_list) 1169 + napi_disable(napi); 1170 + 1171 + oct->props[lio->ifidx].napi_enabled = 0; 1172 + 1173 + oct->droq[0]->ops.poll_mode = 0; 1174 + } 1152 1175 1153 1176 dev_info(&oct->pci_dev->dev, "%s interface is stopped\n", netdev->name); 1154 1177
+1
drivers/net/ethernet/cavium/liquidio/octeon_network.h
··· 190 190 191 191 int octeon_setup_interrupt(struct octeon_device *oct, u32 num_ioqs); 192 192 193 + int lio_wait_for_clean_oq(struct octeon_device *oct); 193 194 /** 194 195 * \brief Register ethtool operations 195 196 * @param netdev pointer to network device