Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'rework-mvneta-napi_poll-loop-for-XDP-multi-buffers'

Lorenzo Bianconi says:

====================
rework mvneta napi_poll loop for XDP multi-buffers

Rework mvneta_rx_swbm routine in order to process all rx descriptors before
building the skb or run the xdp program attached to the interface.
Introduce xdp_get_shared_info_from_{buff,frame} utility routines to get the
skb_shared_info pointer from xdp_buff or xdp_frame.
This is a preliminary series to enable multi-buffers and jumbo frames for XDP
according to [1]

[1] https://github.com/xdp-project/xdp-project/blob/master/areas/core/xdp-multi-buffer01-design.org

Changes since v1:
- rely on skb_frag_* utility routines to access page/offset/len of the xdp multi-buffer
====================

Signed-off-by: David S. Miller <davem@davemloft.net>

+138 -98
+123 -98
drivers/net/ethernet/marvell/mvneta.c
··· 698 698 /* Index of first RX DMA descriptor to refill */ 699 699 int first_to_refill; 700 700 u32 refill_num; 701 - 702 - /* pointer to uncomplete skb buffer */ 703 - struct sk_buff *skb; 704 - int left_size; 705 701 }; 706 702 707 703 static enum cpuhp_state online_hpstate; ··· 2022 2026 return i; 2023 2027 } 2024 2028 2029 + static void 2030 + mvneta_xdp_put_buff(struct mvneta_port *pp, struct mvneta_rx_queue *rxq, 2031 + struct xdp_buff *xdp, int sync_len, bool napi) 2032 + { 2033 + struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp); 2034 + int i; 2035 + 2036 + page_pool_put_page(rxq->page_pool, virt_to_head_page(xdp->data), 2037 + sync_len, napi); 2038 + for (i = 0; i < sinfo->nr_frags; i++) 2039 + page_pool_put_full_page(rxq->page_pool, 2040 + skb_frag_page(&sinfo->frags[i]), napi); 2041 + } 2042 + 2025 2043 static int 2026 2044 mvneta_xdp_submit_frame(struct mvneta_port *pp, struct mvneta_tx_queue *txq, 2027 2045 struct xdp_frame *xdpf, bool dma_map) ··· 2168 2158 static int 2169 2159 mvneta_run_xdp(struct mvneta_port *pp, struct mvneta_rx_queue *rxq, 2170 2160 struct bpf_prog *prog, struct xdp_buff *xdp, 2171 - struct mvneta_stats *stats) 2161 + u32 frame_sz, struct mvneta_stats *stats) 2172 2162 { 2173 - unsigned int len, sync; 2174 - struct page *page; 2163 + unsigned int len, data_len, sync; 2175 2164 u32 ret, act; 2176 2165 2177 2166 len = xdp->data_end - xdp->data_hard_start - pp->rx_offset_correction; 2167 + data_len = xdp->data_end - xdp->data; 2178 2168 act = bpf_prog_run_xdp(prog, xdp); 2179 2169 2180 2170 /* Due xdp_adjust_tail: DMA sync for_device cover max len CPU touch */ ··· 2190 2180 2191 2181 err = xdp_do_redirect(pp->dev, xdp, prog); 2192 2182 if (unlikely(err)) { 2183 + mvneta_xdp_put_buff(pp, rxq, xdp, sync, true); 2193 2184 ret = MVNETA_XDP_DROPPED; 2194 - page = virt_to_head_page(xdp->data); 2195 - page_pool_put_page(rxq->page_pool, page, sync, true); 2196 2185 } else { 2197 2186 ret = MVNETA_XDP_REDIR; 2198 2187 stats->xdp_redirect++; ··· 2200 2191 } 2201 2192 case XDP_TX: 2202 2193 ret = mvneta_xdp_xmit_back(pp, xdp); 2203 - if (ret != MVNETA_XDP_TX) { 2204 - page = virt_to_head_page(xdp->data); 2205 - page_pool_put_page(rxq->page_pool, page, sync, true); 2206 - } 2194 + if (ret != MVNETA_XDP_TX) 2195 + mvneta_xdp_put_buff(pp, rxq, xdp, sync, true); 2207 2196 break; 2208 2197 default: 2209 2198 bpf_warn_invalid_xdp_action(act); ··· 2210 2203 trace_xdp_exception(pp->dev, prog, act); 2211 2204 /* fall through */ 2212 2205 case XDP_DROP: 2213 - page = virt_to_head_page(xdp->data); 2214 - page_pool_put_page(rxq->page_pool, page, sync, true); 2206 + mvneta_xdp_put_buff(pp, rxq, xdp, sync, true); 2215 2207 ret = MVNETA_XDP_DROPPED; 2216 2208 stats->xdp_drop++; 2217 2209 break; 2218 2210 } 2219 2211 2220 - stats->rx_bytes += xdp->data_end - xdp->data; 2212 + stats->rx_bytes += frame_sz + xdp->data_end - xdp->data - data_len; 2221 2213 stats->rx_packets++; 2222 2214 2223 2215 return ret; 2224 2216 } 2225 2217 2226 - static int 2218 + static void 2227 2219 mvneta_swbm_rx_frame(struct mvneta_port *pp, 2228 2220 struct mvneta_rx_desc *rx_desc, 2229 2221 struct mvneta_rx_queue *rxq, 2230 - struct xdp_buff *xdp, 2231 - struct bpf_prog *xdp_prog, 2222 + struct xdp_buff *xdp, int *size, 2232 2223 struct page *page, 2233 2224 struct mvneta_stats *stats) 2234 2225 { ··· 2234 2229 int data_len = -MVNETA_MH_SIZE, len; 2235 2230 struct net_device *dev = pp->dev; 2236 2231 enum dma_data_direction dma_dir; 2237 - int ret = 0; 2232 + struct skb_shared_info *sinfo; 2238 2233 2239 2234 if (MVNETA_SKB_SIZE(rx_desc->data_size) > PAGE_SIZE) { 2240 2235 len = MVNETA_MAX_RX_BUF_SIZE; ··· 2257 2252 xdp->data_end = xdp->data + data_len; 2258 2253 xdp_set_data_meta_invalid(xdp); 2259 2254 2260 - if (xdp_prog) { 2261 - ret = mvneta_run_xdp(pp, rxq, xdp_prog, xdp, stats); 2262 - if (ret) 2263 - goto out; 2264 - } 2255 + sinfo = xdp_get_shared_info_from_buff(xdp); 2256 + sinfo->nr_frags = 0; 2265 2257 2266 - rxq->skb = build_skb(xdp->data_hard_start, PAGE_SIZE); 2267 - if (unlikely(!rxq->skb)) { 2268 - struct mvneta_pcpu_stats *stats = this_cpu_ptr(pp->stats); 2269 - 2270 - netdev_err(dev, "Can't allocate skb on queue %d\n", rxq->id); 2271 - 2272 - u64_stats_update_begin(&stats->syncp); 2273 - stats->es.skb_alloc_error++; 2274 - stats->rx_dropped++; 2275 - u64_stats_update_end(&stats->syncp); 2276 - 2277 - return -ENOMEM; 2278 - } 2279 - page_pool_release_page(rxq->page_pool, page); 2280 - 2281 - skb_reserve(rxq->skb, 2282 - xdp->data - xdp->data_hard_start); 2283 - skb_put(rxq->skb, xdp->data_end - xdp->data); 2284 - mvneta_rx_csum(pp, rx_desc->status, rxq->skb); 2285 - 2286 - rxq->left_size = rx_desc->data_size - len; 2287 - 2288 - out: 2258 + *size = rx_desc->data_size - len; 2289 2259 rx_desc->buf_phys_addr = 0; 2290 - 2291 - return ret; 2292 2260 } 2293 2261 2294 2262 static void 2295 2263 mvneta_swbm_add_rx_fragment(struct mvneta_port *pp, 2296 2264 struct mvneta_rx_desc *rx_desc, 2297 2265 struct mvneta_rx_queue *rxq, 2266 + struct xdp_buff *xdp, int *size, 2298 2267 struct page *page) 2299 2268 { 2269 + struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp); 2300 2270 struct net_device *dev = pp->dev; 2301 2271 enum dma_data_direction dma_dir; 2302 2272 int data_len, len; 2303 2273 2304 - if (rxq->left_size > MVNETA_MAX_RX_BUF_SIZE) { 2274 + if (*size > MVNETA_MAX_RX_BUF_SIZE) { 2305 2275 len = MVNETA_MAX_RX_BUF_SIZE; 2306 2276 data_len = len; 2307 2277 } else { 2308 - len = rxq->left_size; 2278 + len = *size; 2309 2279 data_len = len - ETH_FCS_LEN; 2310 2280 } 2311 2281 dma_dir = page_pool_get_dma_dir(rxq->page_pool); 2312 2282 dma_sync_single_for_cpu(dev->dev.parent, 2313 2283 rx_desc->buf_phys_addr, 2314 2284 len, dma_dir); 2315 - if (data_len > 0) { 2316 - /* refill descriptor with new buffer later */ 2317 - skb_add_rx_frag(rxq->skb, 2318 - skb_shinfo(rxq->skb)->nr_frags, 2319 - page, pp->rx_offset_correction, data_len, 2320 - PAGE_SIZE); 2285 + 2286 + if (data_len > 0 && sinfo->nr_frags < MAX_SKB_FRAGS) { 2287 + skb_frag_t *frag = &sinfo->frags[sinfo->nr_frags]; 2288 + 2289 + skb_frag_off_set(frag, pp->rx_offset_correction); 2290 + skb_frag_size_set(frag, data_len); 2291 + __skb_frag_set_page(frag, page); 2292 + sinfo->nr_frags++; 2293 + 2294 + rx_desc->buf_phys_addr = 0; 2321 2295 } 2322 - page_pool_release_page(rxq->page_pool, page); 2323 - rx_desc->buf_phys_addr = 0; 2324 - rxq->left_size -= len; 2296 + *size -= len; 2297 + } 2298 + 2299 + static struct sk_buff * 2300 + mvneta_swbm_build_skb(struct mvneta_port *pp, struct mvneta_rx_queue *rxq, 2301 + struct xdp_buff *xdp, u32 desc_status) 2302 + { 2303 + struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp); 2304 + int i, num_frags = sinfo->nr_frags; 2305 + skb_frag_t frags[MAX_SKB_FRAGS]; 2306 + struct sk_buff *skb; 2307 + 2308 + memcpy(frags, sinfo->frags, sizeof(skb_frag_t) * num_frags); 2309 + 2310 + skb = build_skb(xdp->data_hard_start, PAGE_SIZE); 2311 + if (!skb) 2312 + return ERR_PTR(-ENOMEM); 2313 + 2314 + page_pool_release_page(rxq->page_pool, virt_to_page(xdp->data)); 2315 + 2316 + skb_reserve(skb, xdp->data - xdp->data_hard_start); 2317 + skb_put(skb, xdp->data_end - xdp->data); 2318 + mvneta_rx_csum(pp, desc_status, skb); 2319 + 2320 + for (i = 0; i < num_frags; i++) { 2321 + struct page *page = skb_frag_page(&frags[i]); 2322 + 2323 + skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, 2324 + page, skb_frag_off(&frags[i]), 2325 + skb_frag_size(&frags[i]), PAGE_SIZE); 2326 + page_pool_release_page(rxq->page_pool, page); 2327 + } 2328 + 2329 + return skb; 2325 2330 } 2326 2331 2327 2332 /* Main rx processing when using software buffer management */ ··· 2339 2324 struct mvneta_port *pp, int budget, 2340 2325 struct mvneta_rx_queue *rxq) 2341 2326 { 2342 - int rx_proc = 0, rx_todo, refill; 2327 + int rx_proc = 0, rx_todo, refill, size = 0; 2343 2328 struct net_device *dev = pp->dev; 2329 + struct xdp_buff xdp_buf = { 2330 + .frame_sz = PAGE_SIZE, 2331 + .rxq = &rxq->xdp_rxq, 2332 + }; 2344 2333 struct mvneta_stats ps = {}; 2345 2334 struct bpf_prog *xdp_prog; 2346 - struct xdp_buff xdp_buf; 2335 + u32 desc_status, frame_sz; 2347 2336 2348 2337 /* Get number of received packets */ 2349 2338 rx_todo = mvneta_rxq_busy_desc_num_get(pp, rxq); 2350 2339 2351 2340 rcu_read_lock(); 2352 2341 xdp_prog = READ_ONCE(pp->xdp_prog); 2353 - xdp_buf.rxq = &rxq->xdp_rxq; 2354 - xdp_buf.frame_sz = PAGE_SIZE; 2355 2342 2356 2343 /* Fairness NAPI loop */ 2357 2344 while (rx_proc < budget && rx_proc < rx_todo) { 2358 2345 struct mvneta_rx_desc *rx_desc = mvneta_rxq_next_desc_get(rxq); 2359 2346 u32 rx_status, index; 2347 + struct sk_buff *skb; 2360 2348 struct page *page; 2361 2349 2362 2350 index = rx_desc - rxq->descs; ··· 2370 2352 rxq->refill_num++; 2371 2353 2372 2354 if (rx_status & MVNETA_RXD_FIRST_DESC) { 2373 - int err; 2374 - 2375 2355 /* Check errors only for FIRST descriptor */ 2376 2356 if (rx_status & MVNETA_RXD_ERR_SUMMARY) { 2377 2357 mvneta_rx_error(pp, rx_desc); 2378 - /* leave the descriptor untouched */ 2379 - continue; 2358 + goto next; 2380 2359 } 2381 2360 2382 - err = mvneta_swbm_rx_frame(pp, rx_desc, rxq, &xdp_buf, 2383 - xdp_prog, page, &ps); 2384 - if (err) 2385 - continue; 2361 + size = rx_desc->data_size; 2362 + frame_sz = size - ETH_FCS_LEN; 2363 + desc_status = rx_desc->status; 2364 + 2365 + mvneta_swbm_rx_frame(pp, rx_desc, rxq, &xdp_buf, 2366 + &size, page, &ps); 2386 2367 } else { 2387 - if (unlikely(!rxq->skb)) { 2388 - pr_debug("no skb for rx_status 0x%x\n", 2389 - rx_status); 2368 + if (unlikely(!xdp_buf.data_hard_start)) 2390 2369 continue; 2391 - } 2392 - mvneta_swbm_add_rx_fragment(pp, rx_desc, rxq, page); 2370 + 2371 + mvneta_swbm_add_rx_fragment(pp, rx_desc, rxq, &xdp_buf, 2372 + &size, page); 2393 2373 } /* Middle or Last descriptor */ 2394 2374 2395 2375 if (!(rx_status & MVNETA_RXD_LAST_DESC)) 2396 2376 /* no last descriptor this time */ 2397 2377 continue; 2398 2378 2399 - if (rxq->left_size) { 2400 - pr_err("get last desc, but left_size (%d) != 0\n", 2401 - rxq->left_size); 2402 - dev_kfree_skb_any(rxq->skb); 2403 - rxq->left_size = 0; 2404 - rxq->skb = NULL; 2405 - continue; 2379 + if (size) { 2380 + mvneta_xdp_put_buff(pp, rxq, &xdp_buf, -1, true); 2381 + goto next; 2406 2382 } 2407 2383 2408 - ps.rx_bytes += rxq->skb->len; 2384 + if (xdp_prog && 2385 + mvneta_run_xdp(pp, rxq, xdp_prog, &xdp_buf, frame_sz, &ps)) 2386 + goto next; 2387 + 2388 + skb = mvneta_swbm_build_skb(pp, rxq, &xdp_buf, desc_status); 2389 + if (IS_ERR(skb)) { 2390 + struct mvneta_pcpu_stats *stats = this_cpu_ptr(pp->stats); 2391 + 2392 + mvneta_xdp_put_buff(pp, rxq, &xdp_buf, -1, true); 2393 + 2394 + u64_stats_update_begin(&stats->syncp); 2395 + stats->es.skb_alloc_error++; 2396 + stats->rx_dropped++; 2397 + u64_stats_update_end(&stats->syncp); 2398 + 2399 + goto next; 2400 + } 2401 + 2402 + ps.rx_bytes += skb->len; 2409 2403 ps.rx_packets++; 2410 2404 2411 - /* Linux processing */ 2412 - rxq->skb->protocol = eth_type_trans(rxq->skb, dev); 2413 - 2414 - napi_gro_receive(napi, rxq->skb); 2415 - 2416 - /* clean uncomplete skb pointer in queue */ 2417 - rxq->skb = NULL; 2405 + skb->protocol = eth_type_trans(skb, dev); 2406 + napi_gro_receive(napi, skb); 2407 + next: 2408 + xdp_buf.data_hard_start = NULL; 2418 2409 } 2419 2410 rcu_read_unlock(); 2411 + 2412 + if (xdp_buf.data_hard_start) 2413 + mvneta_xdp_put_buff(pp, rxq, &xdp_buf, -1, true); 2420 2414 2421 2415 if (ps.xdp_redirect) 2422 2416 xdp_do_flush_map(); ··· 3358 3328 { 3359 3329 mvneta_rxq_drop_pkts(pp, rxq); 3360 3330 3361 - if (rxq->skb) 3362 - dev_kfree_skb_any(rxq->skb); 3363 - 3364 3331 if (rxq->descs) 3365 3332 dma_free_coherent(pp->dev->dev.parent, 3366 3333 rxq->size * MVNETA_DESC_ALIGNED_SIZE, ··· 3370 3343 rxq->descs_phys = 0; 3371 3344 rxq->first_to_refill = 0; 3372 3345 rxq->refill_num = 0; 3373 - rxq->skb = NULL; 3374 - rxq->left_size = 0; 3375 3346 } 3376 3347 3377 3348 static int mvneta_txq_sw_init(struct mvneta_port *pp,
+15
include/net/xdp.h
··· 85 85 ((xdp)->data_hard_start + (xdp)->frame_sz - \ 86 86 SKB_DATA_ALIGN(sizeof(struct skb_shared_info))) 87 87 88 + static inline struct skb_shared_info * 89 + xdp_get_shared_info_from_buff(struct xdp_buff *xdp) 90 + { 91 + return (struct skb_shared_info *)xdp_data_hard_end(xdp); 92 + } 93 + 88 94 struct xdp_frame { 89 95 void *data; 90 96 u16 len; ··· 103 97 struct xdp_mem_info mem; 104 98 struct net_device *dev_rx; /* used by cpumap */ 105 99 }; 100 + 101 + static inline struct skb_shared_info * 102 + xdp_get_shared_info_from_frame(struct xdp_frame *frame) 103 + { 104 + void *data_hard_start = frame->data - frame->headroom - sizeof(*frame); 105 + 106 + return (struct skb_shared_info *)(data_hard_start + frame->frame_sz - 107 + SKB_DATA_ALIGN(sizeof(struct skb_shared_info))); 108 + } 106 109 107 110 /* Clear kernel pointers in xdp_frame */ 108 111 static inline void xdp_scrub_frame(struct xdp_frame *frame)