Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'net-use-read_once-write_once-for-ring-index-accesses'

Jakub Kicinski says:

====================
net: use READ_ONCE/WRITE_ONCE for ring index accesses

Small follow up to the lockless ring stop/start macros.
Update the doc and the drivers suggested by Eric:
https://lore.kernel.org/all/CANn89iJrBGSybMX1FqrhCEMWT3Nnz2=2+aStsbbwpWzKHjk51g@mail.gmail.com/

====================

Link: https://lore.kernel.org/r/20230412015038.674023-1-kuba@kernel.org
Signed-off-by: Paolo Abeni <pabeni@redhat.com>

+42 -48
+27 -34
Documentation/networking/driver.rst
··· 47 47 48 48 .. code-block:: c 49 49 50 + static u32 drv_tx_avail(struct drv_ring *dr) 51 + { 52 + u32 used = READ_ONCE(dr->prod) - READ_ONCE(dr->cons); 53 + 54 + return dr->tx_ring_size - (used & bp->tx_ring_mask); 55 + } 56 + 50 57 static netdev_tx_t drv_hard_start_xmit(struct sk_buff *skb, 51 58 struct net_device *dev) 52 59 { 53 60 struct drv *dp = netdev_priv(dev); 61 + struct netdev_queue *txq; 62 + struct drv_ring *dr; 63 + int idx; 54 64 55 - lock_tx(dp); 65 + idx = skb_get_queue_mapping(skb); 66 + dr = dp->tx_rings[idx]; 67 + txq = netdev_get_tx_queue(dev, idx); 68 + 56 69 //... 57 - /* This is a hard error log it. */ 58 - if (TX_BUFFS_AVAIL(dp) <= (skb_shinfo(skb)->nr_frags + 1)) { 70 + /* This should be a very rare race - log it. */ 71 + if (drv_tx_avail(dr) <= skb_shinfo(skb)->nr_frags + 1) { 59 72 netif_stop_queue(dev); 60 - unlock_tx(dp); 61 - printk(KERN_ERR PFX "%s: BUG! Tx Ring full when queue awake!\n", 62 - dev->name); 73 + netdev_warn(dev, "Tx Ring full when queue awake!\n"); 63 74 return NETDEV_TX_BUSY; 64 75 } 65 76 66 77 //... queue packet to card ... 67 - //... update tx consumer index ... 68 78 69 - if (TX_BUFFS_AVAIL(dp) <= (MAX_SKB_FRAGS + 1)) 70 - netif_stop_queue(dev); 79 + netdev_tx_sent_queue(txq, skb->len); 71 80 72 - //... 73 - unlock_tx(dp); 81 + //... update tx producer index using WRITE_ONCE() ... 82 + 83 + if (!netif_txq_maybe_stop(txq, drv_tx_avail(dr), 84 + MAX_SKB_FRAGS + 1, 2 * MAX_SKB_FRAGS)) 85 + dr->stats.stopped++; 86 + 74 87 //... 75 88 return NETDEV_TX_OK; 76 89 } ··· 92 79 93 80 .. code-block:: c 94 81 95 - if (netif_queue_stopped(dp->dev) && 96 - TX_BUFFS_AVAIL(dp) > (MAX_SKB_FRAGS + 1)) 97 - netif_wake_queue(dp->dev); 82 + //... update tx consumer index using WRITE_ONCE() ... 98 83 99 - For a non-scatter-gather supporting card, the three tests simply become: 100 - 101 - .. code-block:: c 102 - 103 - /* This is a hard error log it. */ 104 - if (TX_BUFFS_AVAIL(dp) <= 0) 105 - 106 - and: 107 - 108 - .. code-block:: c 109 - 110 - if (TX_BUFFS_AVAIL(dp) == 0) 111 - 112 - and: 113 - 114 - .. code-block:: c 115 - 116 - if (netif_queue_stopped(dp->dev) && 117 - TX_BUFFS_AVAIL(dp) > 0) 118 - netif_wake_queue(dp->dev); 84 + netif_txq_completed_wake(txq, cmpl_pkts, cmpl_bytes, 85 + drv_tx_avail(dr), 2 * MAX_SKB_FRAGS); 119 86 120 87 Lockless queue stop / wake helper macros 121 88 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+3 -3
drivers/net/ethernet/broadcom/bnxt/bnxt.c
··· 472 472 prod = NEXT_TX(prod); 473 473 tx_push->doorbell = 474 474 cpu_to_le32(DB_KEY_TX_PUSH | DB_LONG_TX_PUSH | prod); 475 - txr->tx_prod = prod; 475 + WRITE_ONCE(txr->tx_prod, prod); 476 476 477 477 tx_buf->is_push = 1; 478 478 netdev_tx_sent_queue(txq, skb->len); ··· 583 583 wmb(); 584 584 585 585 prod = NEXT_TX(prod); 586 - txr->tx_prod = prod; 586 + WRITE_ONCE(txr->tx_prod, prod); 587 587 588 588 if (!netdev_xmit_more() || netif_xmit_stopped(txq)) 589 589 bnxt_txr_db_kick(bp, txr, prod); ··· 688 688 dev_kfree_skb_any(skb); 689 689 } 690 690 691 - txr->tx_cons = cons; 691 + WRITE_ONCE(txr->tx_cons, cons); 692 692 693 693 __netif_txq_completed_wake(txq, nr_pkts, tx_bytes, 694 694 bnxt_tx_avail(bp, txr), bp->tx_wake_thresh,
+4 -5
drivers/net/ethernet/broadcom/bnxt/bnxt.h
··· 2231 2231 #define SFF_MODULE_ID_QSFP28 0x11 2232 2232 #define BNXT_MAX_PHY_I2C_RESP_SIZE 64 2233 2233 2234 - static inline u32 bnxt_tx_avail(struct bnxt *bp, struct bnxt_tx_ring_info *txr) 2234 + static inline u32 bnxt_tx_avail(struct bnxt *bp, 2235 + const struct bnxt_tx_ring_info *txr) 2235 2236 { 2236 - /* Tell compiler to fetch tx indices from memory. */ 2237 - barrier(); 2237 + u32 used = READ_ONCE(txr->tx_prod) - READ_ONCE(txr->tx_cons); 2238 2238 2239 - return bp->tx_ring_size - 2240 - ((txr->tx_prod - txr->tx_cons) & bp->tx_ring_mask); 2239 + return bp->tx_ring_size - (used & bp->tx_ring_mask); 2241 2240 } 2242 2241 2243 2242 static inline void bnxt_writeq(struct bnxt *bp, u64 val,
+3 -3
drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
··· 64 64 int frag_len; 65 65 66 66 prod = NEXT_TX(prod); 67 - txr->tx_prod = prod; 67 + WRITE_ONCE(txr->tx_prod, prod); 68 68 69 69 /* first fill up the first buffer */ 70 70 frag_tx_buf = &txr->tx_buf_ring[prod]; ··· 94 94 /* Sync TX BD */ 95 95 wmb(); 96 96 prod = NEXT_TX(prod); 97 - txr->tx_prod = prod; 97 + WRITE_ONCE(txr->tx_prod, prod); 98 98 99 99 return tx_buf; 100 100 } ··· 161 161 } 162 162 tx_cons = NEXT_TX(tx_cons); 163 163 } 164 - txr->tx_cons = tx_cons; 164 + WRITE_ONCE(txr->tx_cons, tx_cons); 165 165 if (rx_doorbell_needed) { 166 166 tx_buf = &txr->tx_buf_ring[last_tx_cons]; 167 167 bnxt_db_write(bp, &rxr->rx_db, tx_buf->rx_prod);
+5 -3
drivers/net/ethernet/mellanox/mlx4/en_tx.c
··· 228 228 229 229 static inline bool mlx4_en_is_tx_ring_full(struct mlx4_en_tx_ring *ring) 230 230 { 231 - return ring->prod - ring->cons > ring->full_size; 231 + u32 used = READ_ONCE(ring->prod) - READ_ONCE(ring->cons); 232 + 233 + return used > ring->full_size; 232 234 } 233 235 234 236 static void mlx4_en_stamp_wqe(struct mlx4_en_priv *priv, ··· 1085 1083 op_own |= cpu_to_be32(MLX4_WQE_CTRL_IIP); 1086 1084 } 1087 1085 1088 - ring->prod += nr_txbb; 1086 + WRITE_ONCE(ring->prod, ring->prod + nr_txbb); 1089 1087 1090 1088 /* If we used a bounce buffer then copy descriptor back into place */ 1091 1089 if (unlikely(bounce)) ··· 1216 1214 1217 1215 rx_ring->xdp_tx++; 1218 1216 1219 - ring->prod += MLX4_EN_XDP_TX_NRTXBB; 1217 + WRITE_ONCE(ring->prod, ring->prod + MLX4_EN_XDP_TX_NRTXBB); 1220 1218 1221 1219 /* Ensure new descriptor hits memory 1222 1220 * before setting ownership of this descriptor to HW