Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

iavf: pack iavf_ring more efficiently

Before replacing the Rx buffer management with libie, clean up
&iavf_ring a bit.
There are several fields not used anywhere in the code -- simply remove
them. Move ::tail up to remove a hole. Replace ::arm_wb boolean with
1-bit flag in ::flags to free 1 more byte. Finally, move ::prev_pkt_ctr
out of &iavf_tx_queue_stats -- it doesn't belong there (used for Tx
stall detection). Place it next to the stats on the ring itself to fill
the 4-byte slot.
The result: no holes and all the hot fields fit into the first 64-byte
cacheline.

Signed-off-by: Alexander Lobakin <aleksander.lobakin@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>

authored by

Alexander Lobakin and committed by
Tony Nguyen
97cadd3d e6c91556

+9 -25
+6 -6
drivers/net/ethernet/intel/iavf/iavf_txrx.c
··· 185 185 * pending work. 186 186 */ 187 187 packets = tx_ring->stats.packets & INT_MAX; 188 - if (tx_ring->tx_stats.prev_pkt_ctr == packets) { 188 + if (tx_ring->prev_pkt_ctr == packets) { 189 189 iavf_force_wb(vsi, tx_ring->q_vector); 190 190 continue; 191 191 } ··· 194 194 * to iavf_get_tx_pending() 195 195 */ 196 196 smp_rmb(); 197 - tx_ring->tx_stats.prev_pkt_ctr = 197 + tx_ring->prev_pkt_ctr = 198 198 iavf_get_tx_pending(tx_ring, true) ? packets : -1; 199 199 } 200 200 } ··· 320 320 ((j / WB_STRIDE) == 0) && (j > 0) && 321 321 !test_bit(__IAVF_VSI_DOWN, vsi->state) && 322 322 (IAVF_DESC_UNUSED(tx_ring) != tx_ring->count)) 323 - tx_ring->arm_wb = true; 323 + tx_ring->flags |= IAVF_TXR_FLAGS_ARM_WB; 324 324 } 325 325 326 326 /* notify netdev of completed buffers */ ··· 675 675 676 676 tx_ring->next_to_use = 0; 677 677 tx_ring->next_to_clean = 0; 678 - tx_ring->tx_stats.prev_pkt_ctr = -1; 678 + tx_ring->prev_pkt_ctr = -1; 679 679 return 0; 680 680 681 681 err: ··· 1491 1491 clean_complete = false; 1492 1492 continue; 1493 1493 } 1494 - arm_wb |= ring->arm_wb; 1495 - ring->arm_wb = false; 1494 + arm_wb |= !!(ring->flags & IAVF_TXR_FLAGS_ARM_WB); 1495 + ring->flags &= ~IAVF_TXR_FLAGS_ARM_WB; 1496 1496 } 1497 1497 1498 1498 /* Handle case where we are called by netpoll with a budget of 0 */
+3 -19
drivers/net/ethernet/intel/iavf/iavf_txrx.h
··· 227 227 u64 tx_done_old; 228 228 u64 tx_linearize; 229 229 u64 tx_force_wb; 230 - int prev_pkt_ctr; 231 230 u64 tx_lost_interrupt; 232 231 }; 233 232 ··· 234 235 u64 non_eop_descs; 235 236 u64 alloc_page_failed; 236 237 u64 alloc_buff_failed; 237 - }; 238 - 239 - enum iavf_ring_state_t { 240 - __IAVF_TX_FDIR_INIT_DONE, 241 - __IAVF_TX_XPS_INIT_DONE, 242 - __IAVF_RING_STATE_NBITS /* must be last */ 243 238 }; 244 239 245 240 /* some useful defines for virtchannel interface, which ··· 257 264 struct iavf_tx_buffer *tx_bi; 258 265 struct iavf_rx_buffer *rx_bi; 259 266 }; 260 - DECLARE_BITMAP(state, __IAVF_RING_STATE_NBITS); 261 - u16 queue_index; /* Queue number of ring */ 262 - u8 dcb_tc; /* Traffic class of ring */ 263 267 u8 __iomem *tail; 268 + u16 queue_index; /* Queue number of ring */ 264 269 265 270 /* high bit set means dynamic, use accessors routines to read/write. 266 271 * hardware only supports 2us resolution for the ITR registers. ··· 268 277 u16 itr_setting; 269 278 270 279 u16 count; /* Number of descriptors */ 271 - u16 reg_idx; /* HW register index of the ring */ 272 280 273 281 /* used in interrupt processing */ 274 282 u16 next_to_use; 275 283 u16 next_to_clean; 276 284 277 - u8 atr_sample_rate; 278 - u8 atr_count; 279 - 280 - bool ring_active; /* is ring online or not */ 281 - bool arm_wb; /* do something to arm write back */ 282 - u8 packet_stride; 283 - 284 285 u16 flags; 285 286 #define IAVF_TXR_FLAGS_WB_ON_ITR BIT(0) 286 - /* BIT(1) is free, was IAVF_RXR_FLAGS_BUILD_SKB_ENABLED */ 287 + #define IAVF_TXR_FLAGS_ARM_WB BIT(1) 287 288 /* BIT(2) is free */ 288 289 #define IAVF_TXRX_FLAGS_VLAN_TAG_LOC_L2TAG1 BIT(3) 289 290 #define IAVF_TXR_FLAGS_VLAN_TAG_LOC_L2TAG2 BIT(4) ··· 289 306 struct iavf_rx_queue_stats rx_stats; 290 307 }; 291 308 309 + int prev_pkt_ctr; /* For Tx stall detection */ 292 310 unsigned int size; /* length of descriptor ring in bytes */ 293 311 dma_addr_t dma; /* physical address of ring */ 294 312