Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

vmxnet3: Fix tx queue race condition with XDP

If XDP traffic runs on a CPU which is greater than or equal to
the number of the Tx queues of the NIC, then vmxnet3_xdp_get_tq()
always picks up queue 0 for transmission as it uses reciprocal scale
instead of simple modulo operation.

vmxnet3_xdp_xmit() and vmxnet3_xdp_xmit_frame() use the above
returned queue without any locking which can lead to race conditions
when multiple XDP xmits run in parallel on different CPU's.

This patch uses a simple module scheme when the current CPU equals or
exceeds the number of Tx queues on the NIC. It also adds locking in
vmxnet3_xdp_xmit() and vmxnet3_xdp_xmit_frame() functions.

Fixes: 54f00cce1178 ("vmxnet3: Add XDP support.")
Signed-off-by: Sankararaman Jayaraman <sankararaman.jayaraman@broadcom.com>
Signed-off-by: Ronak Doshi <ronak.doshi@broadcom.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Link: https://patch.msgid.link/20250131042340.156547-1-sankararaman.jayaraman@broadcom.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

authored by

Sankararaman Jayaraman and committed by
Jakub Kicinski
3f1baa91 a8aa6a6d

+12 -2
+12 -2
drivers/net/vmxnet3/vmxnet3_xdp.c
··· 28 28 if (likely(cpu < tq_number)) 29 29 tq = &adapter->tx_queue[cpu]; 30 30 else 31 - tq = &adapter->tx_queue[reciprocal_scale(cpu, tq_number)]; 31 + tq = &adapter->tx_queue[cpu % tq_number]; 32 32 33 33 return tq; 34 34 } ··· 124 124 u32 buf_size; 125 125 u32 dw2; 126 126 127 + spin_lock_irq(&tq->tx_lock); 127 128 dw2 = (tq->tx_ring.gen ^ 0x1) << VMXNET3_TXD_GEN_SHIFT; 128 129 dw2 |= xdpf->len; 129 130 ctx.sop_txd = tq->tx_ring.base + tq->tx_ring.next2fill; ··· 135 134 136 135 if (vmxnet3_cmd_ring_desc_avail(&tq->tx_ring) == 0) { 137 136 tq->stats.tx_ring_full++; 137 + spin_unlock_irq(&tq->tx_lock); 138 138 return -ENOSPC; 139 139 } 140 140 ··· 144 142 tbi->dma_addr = dma_map_single(&adapter->pdev->dev, 145 143 xdpf->data, buf_size, 146 144 DMA_TO_DEVICE); 147 - if (dma_mapping_error(&adapter->pdev->dev, tbi->dma_addr)) 145 + if (dma_mapping_error(&adapter->pdev->dev, tbi->dma_addr)) { 146 + spin_unlock_irq(&tq->tx_lock); 148 147 return -EFAULT; 148 + } 149 149 tbi->map_type |= VMXNET3_MAP_SINGLE; 150 150 } else { /* XDP buffer from page pool */ 151 151 page = virt_to_page(xdpf->data); ··· 186 182 dma_wmb(); 187 183 gdesc->dword[2] = cpu_to_le32(le32_to_cpu(gdesc->dword[2]) ^ 188 184 VMXNET3_TXD_GEN); 185 + spin_unlock_irq(&tq->tx_lock); 189 186 190 187 /* No need to handle the case when tx_num_deferred doesn't reach 191 188 * threshold. Backend driver at hypervisor side will poll and reset ··· 230 225 { 231 226 struct vmxnet3_adapter *adapter = netdev_priv(dev); 232 227 struct vmxnet3_tx_queue *tq; 228 + struct netdev_queue *nq; 233 229 int i; 234 230 235 231 if (unlikely(test_bit(VMXNET3_STATE_BIT_QUIESCED, &adapter->state))) ··· 242 236 if (tq->stopped) 243 237 return -ENETDOWN; 244 238 239 + nq = netdev_get_tx_queue(adapter->netdev, tq->qid); 240 + 241 + __netif_tx_lock(nq, smp_processor_id()); 245 242 for (i = 0; i < n; i++) { 246 243 if (vmxnet3_xdp_xmit_frame(adapter, frames[i], tq, true)) { 247 244 tq->stats.xdp_xmit_err++; ··· 252 243 } 253 244 } 254 245 tq->stats.xdp_xmit += i; 246 + __netif_tx_unlock(nq); 255 247 256 248 return i; 257 249 }