Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

i40e: Use the xsk batched rx allocation interface

Use the new xsk batched rx allocation interface for the zero-copy data
path. As the array of struct xdp_buff pointers kept by the driver is
really a ring that wraps, the allocation routine is modified to detect
a wrap and in that case call the allocation function twice. The
allocation function cannot deal with wrapped rings, only arrays. As we
now know exactly how many buffers we get and that there is no
wrapping, the allocation function can be simplified even more as all
if-statements in the allocation loop can be removed, improving
performance.

Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20210922075613.12186-6-magnus.karlsson@gmail.com

authored by

Magnus Karlsson and committed by
Daniel Borkmann
6aab0bb0 db804cfc

+26 -28
+26 -28
drivers/net/ethernet/intel/i40e/i40e_xsk.c
··· 193 193 { 194 194 u16 ntu = rx_ring->next_to_use; 195 195 union i40e_rx_desc *rx_desc; 196 - struct xdp_buff **bi, *xdp; 196 + struct xdp_buff **xdp; 197 + u32 nb_buffs, i; 197 198 dma_addr_t dma; 198 - bool ok = true; 199 199 200 200 rx_desc = I40E_RX_DESC(rx_ring, ntu); 201 - bi = i40e_rx_bi(rx_ring, ntu); 202 - do { 203 - xdp = xsk_buff_alloc(rx_ring->xsk_pool); 204 - if (!xdp) { 205 - ok = false; 206 - goto no_buffers; 207 - } 208 - *bi = xdp; 209 - dma = xsk_buff_xdp_get_dma(xdp); 201 + xdp = i40e_rx_bi(rx_ring, ntu); 202 + 203 + nb_buffs = min_t(u16, count, rx_ring->count - ntu); 204 + nb_buffs = xsk_buff_alloc_batch(rx_ring->xsk_pool, xdp, nb_buffs); 205 + if (!nb_buffs) 206 + return false; 207 + 208 + i = nb_buffs; 209 + while (i--) { 210 + dma = xsk_buff_xdp_get_dma(*xdp); 210 211 rx_desc->read.pkt_addr = cpu_to_le64(dma); 211 212 rx_desc->read.hdr_addr = 0; 212 213 213 214 rx_desc++; 214 - bi++; 215 - ntu++; 216 - 217 - if (unlikely(ntu == rx_ring->count)) { 218 - rx_desc = I40E_RX_DESC(rx_ring, 0); 219 - bi = i40e_rx_bi(rx_ring, 0); 220 - ntu = 0; 221 - } 222 - } while (--count); 223 - 224 - no_buffers: 225 - if (rx_ring->next_to_use != ntu) { 226 - /* clear the status bits for the next_to_use descriptor */ 227 - rx_desc->wb.qword1.status_error_len = 0; 228 - i40e_release_rx_desc(rx_ring, ntu); 215 + xdp++; 229 216 } 230 217 231 - return ok; 218 + ntu += nb_buffs; 219 + if (ntu == rx_ring->count) { 220 + rx_desc = I40E_RX_DESC(rx_ring, 0); 221 + xdp = i40e_rx_bi(rx_ring, 0); 222 + ntu = 0; 223 + } 224 + 225 + /* clear the status bits for the next_to_use descriptor */ 226 + rx_desc->wb.qword1.status_error_len = 0; 227 + i40e_release_rx_desc(rx_ring, ntu); 228 + 229 + return count == nb_buffs ? true : false; 232 230 } 233 231 234 232 /** ··· 363 365 break; 364 366 365 367 bi = *i40e_rx_bi(rx_ring, next_to_clean); 366 - bi->data_end = bi->data + size; 368 + xsk_buff_set_size(bi, size); 367 369 xsk_buff_dma_sync_for_cpu(bi, rx_ring->xsk_pool); 368 370 369 371 xdp_res = i40e_run_xdp_zc(rx_ring, bi);