Merge https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next

+14 -18

Documentation/bpf/btf.rst

··· 565 565 In libbpf, the map can be defined with extra annotation like below: 566 566 :: 567 567 568 - struct bpf_map_def SEC("maps") btf_map = { 569 - .type = BPF_MAP_TYPE_ARRAY, 570 - .key_size = sizeof(int), 571 - .value_size = sizeof(struct ipv_counts), 572 - .max_entries = 4, 573 - }; 574 - BPF_ANNOTATE_KV_PAIR(btf_map, int, struct ipv_counts); 568 + struct { 569 + __uint(type, BPF_MAP_TYPE_ARRAY); 570 + __type(key, int); 571 + __type(value, struct ipv_counts); 572 + __uint(max_entries, 4); 573 + } btf_map SEC(".maps"); 575 574 576 - Here, the parameters for macro BPF_ANNOTATE_KV_PAIR are map name, key and 577 - value types for the map. During ELF parsing, libbpf is able to extract 578 - key/value type_id's and assign them to BPF_MAP_CREATE attributes 579 - automatically. 575 + During ELF parsing, libbpf is able to extract key/value type_id's and assign 576 + them to BPF_MAP_CREATE attributes automatically. 580 577 581 578 .. _BPF_Prog_Load: 582 579 ··· 821 824 ___A b1:4; 822 825 enum A b2:4; 823 826 }; 824 - struct bpf_map_def SEC("maps") tmpmap = { 825 - .type = BPF_MAP_TYPE_ARRAY, 826 - .key_size = sizeof(__u32), 827 - .value_size = sizeof(struct tmp_t), 828 - .max_entries = 1, 829 - }; 830 - BPF_ANNOTATE_KV_PAIR(tmpmap, int, struct tmp_t); 827 + struct { 828 + __uint(type, BPF_MAP_TYPE_ARRAY); 829 + __type(key, int); 830 + __type(value, struct tmp_t); 831 + __uint(max_entries, 1); 832 + } tmpmap SEC(".maps"); 831 833 832 834 bpftool is able to pretty print like below: 833 835 ::

+124 -80

drivers/net/ethernet/marvell/mvneta.c

··· 1884 1884 bytes_compl += buf->skb->len; 1885 1885 pkts_compl++; 1886 1886 dev_kfree_skb_any(buf->skb); 1887 - } else if (buf->type == MVNETA_TYPE_XDP_TX || 1888 - buf->type == MVNETA_TYPE_XDP_NDO) { 1887 + } else if ((buf->type == MVNETA_TYPE_XDP_TX || 1888 + buf->type == MVNETA_TYPE_XDP_NDO) && buf->xdpf) { 1889 1889 if (napi && buf->type == MVNETA_TYPE_XDP_TX) 1890 1890 xdp_return_frame_rx_napi(buf->xdpf); 1891 1891 else ··· 2060 2060 2061 2061 static void 2062 2062 mvneta_xdp_put_buff(struct mvneta_port *pp, struct mvneta_rx_queue *rxq, 2063 - struct xdp_buff *xdp, struct skb_shared_info *sinfo, 2064 - int sync_len) 2063 + struct xdp_buff *xdp, int sync_len) 2065 2064 { 2065 + struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp); 2066 2066 int i; 2067 + 2068 + if (likely(!xdp_buff_has_frags(xdp))) 2069 + goto out; 2067 2070 2068 2071 for (i = 0; i < sinfo->nr_frags; i++) 2069 2072 page_pool_put_full_page(rxq->page_pool, 2070 2073 skb_frag_page(&sinfo->frags[i]), true); 2074 + 2075 + out: 2071 2076 page_pool_put_page(rxq->page_pool, virt_to_head_page(xdp->data), 2072 2077 sync_len, true); 2073 2078 } 2074 2079 2075 2080 static int 2076 2081 mvneta_xdp_submit_frame(struct mvneta_port *pp, struct mvneta_tx_queue *txq, 2077 - struct xdp_frame *xdpf, bool dma_map) 2082 + struct xdp_frame *xdpf, int *nxmit_byte, bool dma_map) 2078 2083 { 2079 - struct mvneta_tx_desc *tx_desc; 2080 - struct mvneta_tx_buf *buf; 2081 - dma_addr_t dma_addr; 2084 + struct skb_shared_info *sinfo = xdp_get_shared_info_from_frame(xdpf); 2085 + struct device *dev = pp->dev->dev.parent; 2086 + struct mvneta_tx_desc *tx_desc = NULL; 2087 + int i, num_frames = 1; 2088 + struct page *page; 2082 2089 2083 - if (txq->count >= txq->tx_stop_threshold) 2090 + if (unlikely(xdp_frame_has_frags(xdpf))) 2091 + num_frames += sinfo->nr_frags; 2092 + 2093 + if (txq->count + num_frames >= txq->size) 2084 2094 return MVNETA_XDP_DROPPED; 2085 2095 2086 - tx_desc = mvneta_txq_next_desc_get(txq); 2096 + for (i = 0; i < num_frames; i++) { 2097 + struct mvneta_tx_buf *buf = &txq->buf[txq->txq_put_index]; 2098 + skb_frag_t *frag = NULL; 2099 + int len = xdpf->len; 2100 + dma_addr_t dma_addr; 2087 2101 2088 - buf = &txq->buf[txq->txq_put_index]; 2089 - if (dma_map) { 2090 - /* ndo_xdp_xmit */ 2091 - dma_addr = dma_map_single(pp->dev->dev.parent, xdpf->data, 2092 - xdpf->len, DMA_TO_DEVICE); 2093 - if (dma_mapping_error(pp->dev->dev.parent, dma_addr)) { 2094 - mvneta_txq_desc_put(txq); 2095 - return MVNETA_XDP_DROPPED; 2102 + if (unlikely(i)) { /* paged area */ 2103 + frag = &sinfo->frags[i - 1]; 2104 + len = skb_frag_size(frag); 2096 2105 } 2097 - buf->type = MVNETA_TYPE_XDP_NDO; 2098 - } else { 2099 - struct page *page = virt_to_page(xdpf->data); 2100 2106 2101 - dma_addr = page_pool_get_dma_addr(page) + 2102 - sizeof(*xdpf) + xdpf->headroom; 2103 - dma_sync_single_for_device(pp->dev->dev.parent, dma_addr, 2104 - xdpf->len, DMA_BIDIRECTIONAL); 2105 - buf->type = MVNETA_TYPE_XDP_TX; 2107 + tx_desc = mvneta_txq_next_desc_get(txq); 2108 + if (dma_map) { 2109 + /* ndo_xdp_xmit */ 2110 + void *data; 2111 + 2112 + data = unlikely(frag) ? skb_frag_address(frag) 2113 + : xdpf->data; 2114 + dma_addr = dma_map_single(dev, data, len, 2115 + DMA_TO_DEVICE); 2116 + if (dma_mapping_error(dev, dma_addr)) { 2117 + mvneta_txq_desc_put(txq); 2118 + goto unmap; 2119 + } 2120 + 2121 + buf->type = MVNETA_TYPE_XDP_NDO; 2122 + } else { 2123 + page = unlikely(frag) ? skb_frag_page(frag) 2124 + : virt_to_page(xdpf->data); 2125 + dma_addr = page_pool_get_dma_addr(page); 2126 + if (unlikely(frag)) 2127 + dma_addr += skb_frag_off(frag); 2128 + else 2129 + dma_addr += sizeof(*xdpf) + xdpf->headroom; 2130 + dma_sync_single_for_device(dev, dma_addr, len, 2131 + DMA_BIDIRECTIONAL); 2132 + buf->type = MVNETA_TYPE_XDP_TX; 2133 + } 2134 + buf->xdpf = unlikely(i) ? NULL : xdpf; 2135 + 2136 + tx_desc->command = unlikely(i) ? 0 : MVNETA_TXD_F_DESC; 2137 + tx_desc->buf_phys_addr = dma_addr; 2138 + tx_desc->data_size = len; 2139 + *nxmit_byte += len; 2140 + 2141 + mvneta_txq_inc_put(txq); 2106 2142 } 2107 - buf->xdpf = xdpf; 2108 2143 2109 - tx_desc->command = MVNETA_TXD_FLZ_DESC; 2110 - tx_desc->buf_phys_addr = dma_addr; 2111 - tx_desc->data_size = xdpf->len; 2144 + /*last descriptor */ 2145 + if (likely(tx_desc)) 2146 + tx_desc->command |= MVNETA_TXD_L_DESC | MVNETA_TXD_Z_PAD; 2112 2147 2113 - mvneta_txq_inc_put(txq); 2114 - txq->pending++; 2115 - txq->count++; 2148 + txq->pending += num_frames; 2149 + txq->count += num_frames; 2116 2150 2117 2151 return MVNETA_XDP_TX; 2152 + 2153 + unmap: 2154 + for (i--; i >= 0; i--) { 2155 + mvneta_txq_desc_put(txq); 2156 + tx_desc = txq->descs + txq->next_desc_to_proc; 2157 + dma_unmap_single(dev, tx_desc->buf_phys_addr, 2158 + tx_desc->data_size, 2159 + DMA_TO_DEVICE); 2160 + } 2161 + 2162 + return MVNETA_XDP_DROPPED; 2118 2163 } 2119 2164 2120 2165 static int ··· 2168 2123 struct mvneta_pcpu_stats *stats = this_cpu_ptr(pp->stats); 2169 2124 struct mvneta_tx_queue *txq; 2170 2125 struct netdev_queue *nq; 2126 + int cpu, nxmit_byte = 0; 2171 2127 struct xdp_frame *xdpf; 2172 - int cpu; 2173 2128 u32 ret; 2174 2129 2175 2130 xdpf = xdp_convert_buff_to_frame(xdp); ··· 2181 2136 nq = netdev_get_tx_queue(pp->dev, txq->id); 2182 2137 2183 2138 __netif_tx_lock(nq, cpu); 2184 - ret = mvneta_xdp_submit_frame(pp, txq, xdpf, false); 2139 + ret = mvneta_xdp_submit_frame(pp, txq, xdpf, &nxmit_byte, false); 2185 2140 if (ret == MVNETA_XDP_TX) { 2186 2141 u64_stats_update_begin(&stats->syncp); 2187 - stats->es.ps.tx_bytes += xdpf->len; 2142 + stats->es.ps.tx_bytes += nxmit_byte; 2188 2143 stats->es.ps.tx_packets++; 2189 2144 stats->es.ps.xdp_tx++; 2190 2145 u64_stats_update_end(&stats->syncp); ··· 2223 2178 2224 2179 __netif_tx_lock(nq, cpu); 2225 2180 for (i = 0; i < num_frame; i++) { 2226 - ret = mvneta_xdp_submit_frame(pp, txq, frames[i], true); 2181 + ret = mvneta_xdp_submit_frame(pp, txq, frames[i], &nxmit_byte, 2182 + true); 2227 2183 if (ret != MVNETA_XDP_TX) 2228 2184 break; 2229 2185 2230 - nxmit_byte += frames[i]->len; 2231 2186 nxmit++; 2232 2187 } 2233 2188 ··· 2250 2205 struct bpf_prog *prog, struct xdp_buff *xdp, 2251 2206 u32 frame_sz, struct mvneta_stats *stats) 2252 2207 { 2253 - struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp); 2254 2208 unsigned int len, data_len, sync; 2255 2209 u32 ret, act; 2256 2210 ··· 2270 2226 2271 2227 err = xdp_do_redirect(pp->dev, xdp, prog); 2272 2228 if (unlikely(err)) { 2273 - mvneta_xdp_put_buff(pp, rxq, xdp, sinfo, sync); 2229 + mvneta_xdp_put_buff(pp, rxq, xdp, sync); 2274 2230 ret = MVNETA_XDP_DROPPED; 2275 2231 } else { 2276 2232 ret = MVNETA_XDP_REDIR; ··· 2281 2237 case XDP_TX: 2282 2238 ret = mvneta_xdp_xmit_back(pp, xdp); 2283 2239 if (ret != MVNETA_XDP_TX) 2284 - mvneta_xdp_put_buff(pp, rxq, xdp, sinfo, sync); 2240 + mvneta_xdp_put_buff(pp, rxq, xdp, sync); 2285 2241 break; 2286 2242 default: 2287 2243 bpf_warn_invalid_xdp_action(pp->dev, prog, act); ··· 2290 2246 trace_xdp_exception(pp->dev, prog, act); 2291 2247 fallthrough; 2292 2248 case XDP_DROP: 2293 - mvneta_xdp_put_buff(pp, rxq, xdp, sinfo, sync); 2249 + mvneta_xdp_put_buff(pp, rxq, xdp, sync); 2294 2250 ret = MVNETA_XDP_DROPPED; 2295 2251 stats->xdp_drop++; 2296 2252 break; ··· 2313 2269 int data_len = -MVNETA_MH_SIZE, len; 2314 2270 struct net_device *dev = pp->dev; 2315 2271 enum dma_data_direction dma_dir; 2316 - struct skb_shared_info *sinfo; 2317 2272 2318 2273 if (*size > MVNETA_MAX_RX_BUF_SIZE) { 2319 2274 len = MVNETA_MAX_RX_BUF_SIZE; ··· 2332 2289 2333 2290 /* Prefetch header */ 2334 2291 prefetch(data); 2292 + xdp_buff_clear_frags_flag(xdp); 2335 2293 xdp_prepare_buff(xdp, data, pp->rx_offset_correction + MVNETA_MH_SIZE, 2336 2294 data_len, false); 2337 - 2338 - sinfo = xdp_get_shared_info_from_buff(xdp); 2339 - sinfo->nr_frags = 0; 2340 2295 } 2341 2296 2342 2297 static void ··· 2342 2301 struct mvneta_rx_desc *rx_desc, 2343 2302 struct mvneta_rx_queue *rxq, 2344 2303 struct xdp_buff *xdp, int *size, 2345 - struct skb_shared_info *xdp_sinfo, 2346 2304 struct page *page) 2347 2305 { 2306 + struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp); 2348 2307 struct net_device *dev = pp->dev; 2349 2308 enum dma_data_direction dma_dir; 2350 2309 int data_len, len; ··· 2362 2321 len, dma_dir); 2363 2322 rx_desc->buf_phys_addr = 0; 2364 2323 2365 - if (data_len > 0 && xdp_sinfo->nr_frags < MAX_SKB_FRAGS) { 2366 - skb_frag_t *frag = &xdp_sinfo->frags[xdp_sinfo->nr_frags++]; 2324 + if (!xdp_buff_has_frags(xdp)) 2325 + sinfo->nr_frags = 0; 2326 + 2327 + if (data_len > 0 && sinfo->nr_frags < MAX_SKB_FRAGS) { 2328 + skb_frag_t *frag = &sinfo->frags[sinfo->nr_frags++]; 2367 2329 2368 2330 skb_frag_off_set(frag, pp->rx_offset_correction); 2369 2331 skb_frag_size_set(frag, data_len); 2370 2332 __skb_frag_set_page(frag, page); 2333 + 2334 + if (!xdp_buff_has_frags(xdp)) { 2335 + sinfo->xdp_frags_size = *size; 2336 + xdp_buff_set_frags_flag(xdp); 2337 + } 2338 + if (page_is_pfmemalloc(page)) 2339 + xdp_buff_set_frag_pfmemalloc(xdp); 2371 2340 } else { 2372 2341 page_pool_put_full_page(rxq->page_pool, page, true); 2373 - } 2374 - 2375 - /* last fragment */ 2376 - if (len == *size) { 2377 - struct skb_shared_info *sinfo; 2378 - 2379 - sinfo = xdp_get_shared_info_from_buff(xdp); 2380 - sinfo->nr_frags = xdp_sinfo->nr_frags; 2381 - memcpy(sinfo->frags, xdp_sinfo->frags, 2382 - sinfo->nr_frags * sizeof(skb_frag_t)); 2383 2342 } 2384 2343 *size -= len; 2385 2344 } ··· 2389 2348 struct xdp_buff *xdp, u32 desc_status) 2390 2349 { 2391 2350 struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp); 2392 - int i, num_frags = sinfo->nr_frags; 2393 2351 struct sk_buff *skb; 2352 + u8 num_frags; 2353 + 2354 + if (unlikely(xdp_buff_has_frags(xdp))) 2355 + num_frags = sinfo->nr_frags; 2394 2356 2395 2357 skb = build_skb(xdp->data_hard_start, PAGE_SIZE); 2396 2358 if (!skb) ··· 2405 2361 skb_put(skb, xdp->data_end - xdp->data); 2406 2362 skb->ip_summed = mvneta_rx_csum(pp, desc_status); 2407 2363 2408 - for (i = 0; i < num_frags; i++) { 2409 - skb_frag_t *frag = &sinfo->frags[i]; 2410 - 2411 - skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, 2412 - skb_frag_page(frag), skb_frag_off(frag), 2413 - skb_frag_size(frag), PAGE_SIZE); 2414 - } 2364 + if (unlikely(xdp_buff_has_frags(xdp))) 2365 + xdp_update_skb_shared_info(skb, num_frags, 2366 + sinfo->xdp_frags_size, 2367 + num_frags * xdp->frame_sz, 2368 + xdp_buff_is_frag_pfmemalloc(xdp)); 2415 2369 2416 2370 return skb; 2417 2371 } ··· 2421 2379 { 2422 2380 int rx_proc = 0, rx_todo, refill, size = 0; 2423 2381 struct net_device *dev = pp->dev; 2424 - struct skb_shared_info sinfo; 2425 2382 struct mvneta_stats ps = {}; 2426 2383 struct bpf_prog *xdp_prog; 2427 2384 u32 desc_status, frame_sz; ··· 2428 2387 2429 2388 xdp_init_buff(&xdp_buf, PAGE_SIZE, &rxq->xdp_rxq); 2430 2389 xdp_buf.data_hard_start = NULL; 2431 - 2432 - sinfo.nr_frags = 0; 2433 2390 2434 2391 /* Get number of received packets */ 2435 2392 rx_todo = mvneta_rxq_busy_desc_num_get(pp, rxq); ··· 2470 2431 } 2471 2432 2472 2433 mvneta_swbm_add_rx_fragment(pp, rx_desc, rxq, &xdp_buf, 2473 - &size, &sinfo, page); 2434 + &size, page); 2474 2435 } /* Middle or Last descriptor */ 2475 2436 2476 2437 if (!(rx_status & MVNETA_RXD_LAST_DESC)) ··· 2478 2439 continue; 2479 2440 2480 2441 if (size) { 2481 - mvneta_xdp_put_buff(pp, rxq, &xdp_buf, &sinfo, -1); 2442 + mvneta_xdp_put_buff(pp, rxq, &xdp_buf, -1); 2482 2443 goto next; 2483 2444 } 2484 2445 ··· 2490 2451 if (IS_ERR(skb)) { 2491 2452 struct mvneta_pcpu_stats *stats = this_cpu_ptr(pp->stats); 2492 2453 2493 - mvneta_xdp_put_buff(pp, rxq, &xdp_buf, &sinfo, -1); 2454 + mvneta_xdp_put_buff(pp, rxq, &xdp_buf, -1); 2494 2455 2495 2456 u64_stats_update_begin(&stats->syncp); 2496 2457 stats->es.skb_alloc_error++; ··· 2507 2468 napi_gro_receive(napi, skb); 2508 2469 next: 2509 2470 xdp_buf.data_hard_start = NULL; 2510 - sinfo.nr_frags = 0; 2511 2471 } 2512 2472 2513 2473 if (xdp_buf.data_hard_start) 2514 - mvneta_xdp_put_buff(pp, rxq, &xdp_buf, &sinfo, -1); 2474 + mvneta_xdp_put_buff(pp, rxq, &xdp_buf, -1); 2515 2475 2516 2476 if (ps.xdp_redirect) 2517 2477 xdp_do_flush_map(); ··· 3298 3260 return err; 3299 3261 } 3300 3262 3301 - err = xdp_rxq_info_reg(&rxq->xdp_rxq, pp->dev, rxq->id, 0); 3263 + err = __xdp_rxq_info_reg(&rxq->xdp_rxq, pp->dev, rxq->id, 0, 3264 + PAGE_SIZE); 3302 3265 if (err < 0) 3303 3266 goto err_free_pp; 3304 3267 ··· 3779 3740 static int mvneta_change_mtu(struct net_device *dev, int mtu) 3780 3741 { 3781 3742 struct mvneta_port *pp = netdev_priv(dev); 3743 + struct bpf_prog *prog = pp->xdp_prog; 3782 3744 int ret; 3783 3745 3784 3746 if (!IS_ALIGNED(MVNETA_RX_PKT_SIZE(mtu), 8)) { ··· 3788 3748 mtu = ALIGN(MVNETA_RX_PKT_SIZE(mtu), 8); 3789 3749 } 3790 3750 3791 - if (pp->xdp_prog && mtu > MVNETA_MAX_RX_BUF_SIZE) { 3792 - netdev_info(dev, "Illegal MTU value %d for XDP mode\n", mtu); 3751 + if (prog && !prog->aux->xdp_has_frags && 3752 + mtu > MVNETA_MAX_RX_BUF_SIZE) { 3753 + netdev_info(dev, "Illegal MTU %d for XDP prog without frags\n", 3754 + mtu); 3755 + 3793 3756 return -EINVAL; 3794 3757 } 3795 3758 ··· 4533 4490 struct mvneta_port *pp = netdev_priv(dev); 4534 4491 struct bpf_prog *old_prog; 4535 4492 4536 - if (prog && dev->mtu > MVNETA_MAX_RX_BUF_SIZE) { 4537 - NL_SET_ERR_MSG_MOD(extack, "MTU too large for XDP"); 4493 + if (prog && !prog->aux->xdp_has_frags && 4494 + dev->mtu > MVNETA_MAX_RX_BUF_SIZE) { 4495 + NL_SET_ERR_MSG_MOD(extack, "prog does not support XDP frags"); 4538 4496 return -EOPNOTSUPP; 4539 4497 } 4540 4498

+49 -33

include/linux/bpf.h

··· 194 194 struct work_struct work; 195 195 struct mutex freeze_mutex; 196 196 atomic64_t writecnt; 197 + /* 'Ownership' of program-containing map is claimed by the first program 198 + * that is going to use this map or by the first program which FD is 199 + * stored in the map to make sure that all callers and callees have the 200 + * same prog type, JITed flag and xdp_has_frags flag. 201 + */ 202 + struct { 203 + spinlock_t lock; 204 + enum bpf_prog_type type; 205 + bool jited; 206 + bool xdp_has_frags; 207 + } owner; 197 208 }; 198 209 199 210 static inline bool map_value_has_spin_lock(const struct bpf_map *map) ··· 589 578 const struct btf_type *t, int off, int size, 590 579 enum bpf_access_type atype, 591 580 u32 *next_btf_id); 592 - bool (*check_kfunc_call)(u32 kfunc_btf_id, struct module *owner); 593 581 }; 594 582 595 583 struct bpf_prog_offload_ops { ··· 949 939 bool func_proto_unreliable; 950 940 bool sleepable; 951 941 bool tail_call_reachable; 942 + bool xdp_has_frags; 952 943 struct hlist_node tramp_hlist; 953 944 /* BTF_KIND_FUNC_PROTO for valid attach_btf_id */ 954 945 const struct btf_type *attach_func_proto; ··· 1010 999 }; 1011 1000 1012 1001 struct bpf_array_aux { 1013 - /* 'Ownership' of prog array is claimed by the first program that 1014 - * is going to use this map or by the first program which FD is 1015 - * stored in the map to make sure that all callers and callees have 1016 - * the same prog type and JITed flag. 1017 - */ 1018 - struct { 1019 - spinlock_t lock; 1020 - enum bpf_prog_type type; 1021 - bool jited; 1022 - } owner; 1023 1002 /* Programs with direct jumps into programs part of this array. */ 1024 1003 struct list_head poke_progs; 1025 1004 struct bpf_map *map; ··· 1184 1183 struct rcu_head rcu; 1185 1184 }; 1186 1185 1187 - bool bpf_prog_array_compatible(struct bpf_array *array, const struct bpf_prog *fp); 1186 + static inline bool map_type_contains_progs(struct bpf_map *map) 1187 + { 1188 + return map->map_type == BPF_MAP_TYPE_PROG_ARRAY || 1189 + map->map_type == BPF_MAP_TYPE_DEVMAP || 1190 + map->map_type == BPF_MAP_TYPE_CPUMAP; 1191 + } 1192 + 1193 + bool bpf_prog_map_compatible(struct bpf_map *map, const struct bpf_prog *fp); 1188 1194 int bpf_prog_calc_tag(struct bpf_prog *fp); 1189 1195 1190 1196 const struct bpf_func_proto *bpf_get_trace_printk_proto(void); ··· 1259 1251 struct bpf_cg_run_ctx { 1260 1252 struct bpf_run_ctx run_ctx; 1261 1253 const struct bpf_prog_array_item *prog_item; 1254 + int retval; 1262 1255 }; 1263 1256 1264 1257 struct bpf_trace_run_ctx { ··· 1292 1283 1293 1284 typedef u32 (*bpf_prog_run_fn)(const struct bpf_prog *prog, const void *ctx); 1294 1285 1295 - static __always_inline u32 1286 + static __always_inline int 1296 1287 BPF_PROG_RUN_ARRAY_CG_FLAGS(const struct bpf_prog_array __rcu *array_rcu, 1297 1288 const void *ctx, bpf_prog_run_fn run_prog, 1298 - u32 *ret_flags) 1289 + int retval, u32 *ret_flags) 1299 1290 { 1300 1291 const struct bpf_prog_array_item *item; 1301 1292 const struct bpf_prog *prog; 1302 1293 const struct bpf_prog_array *array; 1303 1294 struct bpf_run_ctx *old_run_ctx; 1304 1295 struct bpf_cg_run_ctx run_ctx; 1305 - u32 ret = 1; 1306 1296 u32 func_ret; 1307 1297 1298 + run_ctx.retval = retval; 1308 1299 migrate_disable(); 1309 1300 rcu_read_lock(); 1310 1301 array = rcu_dereference(array_rcu); ··· 1313 1304 while ((prog = READ_ONCE(item->prog))) { 1314 1305 run_ctx.prog_item = item; 1315 1306 func_ret = run_prog(prog, ctx); 1316 - ret &= (func_ret & 1); 1307 + if (!(func_ret & 1) && !IS_ERR_VALUE((long)run_ctx.retval)) 1308 + run_ctx.retval = -EPERM; 1317 1309 *(ret_flags) |= (func_ret >> 1); 1318 1310 item++; 1319 1311 } 1320 1312 bpf_reset_run_ctx(old_run_ctx); 1321 1313 rcu_read_unlock(); 1322 1314 migrate_enable(); 1323 - return ret; 1315 + return run_ctx.retval; 1324 1316 } 1325 1317 1326 - static __always_inline u32 1318 + static __always_inline int 1327 1319 BPF_PROG_RUN_ARRAY_CG(const struct bpf_prog_array __rcu *array_rcu, 1328 - const void *ctx, bpf_prog_run_fn run_prog) 1320 + const void *ctx, bpf_prog_run_fn run_prog, 1321 + int retval) 1329 1322 { 1330 1323 const struct bpf_prog_array_item *item; 1331 1324 const struct bpf_prog *prog; 1332 1325 const struct bpf_prog_array *array; 1333 1326 struct bpf_run_ctx *old_run_ctx; 1334 1327 struct bpf_cg_run_ctx run_ctx; 1335 - u32 ret = 1; 1336 1328 1329 + run_ctx.retval = retval; 1337 1330 migrate_disable(); 1338 1331 rcu_read_lock(); 1339 1332 array = rcu_dereference(array_rcu); ··· 1343 1332 old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx); 1344 1333 while ((prog = READ_ONCE(item->prog))) { 1345 1334 run_ctx.prog_item = item; 1346 - ret &= run_prog(prog, ctx); 1335 + if (!run_prog(prog, ctx) && !IS_ERR_VALUE((long)run_ctx.retval)) 1336 + run_ctx.retval = -EPERM; 1347 1337 item++; 1348 1338 } 1349 1339 bpf_reset_run_ctx(old_run_ctx); 1350 1340 rcu_read_unlock(); 1351 1341 migrate_enable(); 1352 - return ret; 1342 + return run_ctx.retval; 1353 1343 } 1354 1344 1355 1345 static __always_inline u32 ··· 1403 1391 * 0: NET_XMIT_SUCCESS skb should be transmitted 1404 1392 * 1: NET_XMIT_DROP skb should be dropped and cn 1405 1393 * 2: NET_XMIT_CN skb should be transmitted and cn 1406 - * 3: -EPERM skb should be dropped 1394 + * 3: -err skb should be dropped 1407 1395 */ 1408 1396 #define BPF_PROG_CGROUP_INET_EGRESS_RUN_ARRAY(array, ctx, func) \ 1409 1397 ({ \ 1410 1398 u32 _flags = 0; \ 1411 1399 bool _cn; \ 1412 1400 u32 _ret; \ 1413 - _ret = BPF_PROG_RUN_ARRAY_CG_FLAGS(array, ctx, func, &_flags); \ 1401 + _ret = BPF_PROG_RUN_ARRAY_CG_FLAGS(array, ctx, func, 0, &_flags); \ 1414 1402 _cn = _flags & BPF_RET_SET_CN; \ 1415 - if (_ret) \ 1403 + if (_ret && !IS_ERR_VALUE((long)_ret)) \ 1404 + _ret = -EFAULT; \ 1405 + if (!_ret) \ 1416 1406 _ret = (_cn ? NET_XMIT_CN : NET_XMIT_SUCCESS); \ 1417 1407 else \ 1418 - _ret = (_cn ? NET_XMIT_DROP : -EPERM); \ 1408 + _ret = (_cn ? NET_XMIT_DROP : _ret); \ 1419 1409 _ret; \ 1420 1410 }) 1421 1411 ··· 1738 1724 int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog, 1739 1725 const union bpf_attr *kattr, 1740 1726 union bpf_attr __user *uattr); 1741 - bool bpf_prog_test_check_kfunc_call(u32 kfunc_id, struct module *owner); 1742 1727 bool btf_ctx_access(int off, int size, enum bpf_access_type type, 1743 1728 const struct bpf_prog *prog, 1744 1729 struct bpf_insn_access_aux *info); ··· 1989 1976 return -ENOTSUPP; 1990 1977 } 1991 1978 1992 - static inline bool bpf_prog_test_check_kfunc_call(u32 kfunc_id, 1993 - struct module *owner) 1994 - { 1995 - return false; 1996 - } 1997 - 1998 1979 static inline void bpf_map_put(struct bpf_map *map) 1999 1980 { 2000 1981 } ··· 2083 2076 int sock_map_get_from_fd(const union bpf_attr *attr, struct bpf_prog *prog); 2084 2077 int sock_map_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype); 2085 2078 int sock_map_update_elem_sys(struct bpf_map *map, void *key, void *value, u64 flags); 2079 + int sock_map_bpf_prog_query(const union bpf_attr *attr, 2080 + union bpf_attr __user *uattr); 2081 + 2086 2082 void sock_map_unhash(struct sock *sk); 2087 2083 void sock_map_close(struct sock *sk, long timeout); 2088 2084 #else ··· 2138 2128 u64 flags) 2139 2129 { 2140 2130 return -EOPNOTSUPP; 2131 + } 2132 + 2133 + static inline int sock_map_bpf_prog_query(const union bpf_attr *attr, 2134 + union bpf_attr __user *uattr) 2135 + { 2136 + return -EINVAL; 2141 2137 } 2142 2138 #endif /* CONFIG_BPF_SYSCALL */ 2143 2139 #endif /* CONFIG_NET && CONFIG_BPF_SYSCALL */

+7

include/linux/bpf_verifier.h

··· 521 521 522 522 int check_ptr_off_reg(struct bpf_verifier_env *env, 523 523 const struct bpf_reg_state *reg, int regno); 524 + int check_kfunc_mem_size_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg, 525 + u32 regno); 524 526 int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg, 525 527 u32 regno, u32 mem_size); 526 528 ··· 564 562 static inline u32 type_flag(u32 type) 565 563 { 566 564 return type & ~BPF_BASE_TYPE_MASK; 565 + } 566 + 567 + static inline enum bpf_prog_type resolve_prog_type(struct bpf_prog *prog) 568 + { 569 + return prog->aux->dst_prog ? prog->aux->dst_prog->type : prog->type; 567 570 } 568 571 569 572 #endif /* _LINUX_BPF_VERIFIER_H */

+36 -41

include/linux/btf.h

··· 12 12 #define BTF_TYPE_EMIT(type) ((void)(type *)0) 13 13 #define BTF_TYPE_EMIT_ENUM(enum_val) ((void)enum_val) 14 14 15 + enum btf_kfunc_type { 16 + BTF_KFUNC_TYPE_CHECK, 17 + BTF_KFUNC_TYPE_ACQUIRE, 18 + BTF_KFUNC_TYPE_RELEASE, 19 + BTF_KFUNC_TYPE_RET_NULL, 20 + BTF_KFUNC_TYPE_MAX, 21 + }; 22 + 15 23 struct btf; 16 24 struct btf_member; 17 25 struct btf_type; 18 26 union bpf_attr; 19 27 struct btf_show; 28 + struct btf_id_set; 29 + 30 + struct btf_kfunc_id_set { 31 + struct module *owner; 32 + union { 33 + struct { 34 + struct btf_id_set *check_set; 35 + struct btf_id_set *acquire_set; 36 + struct btf_id_set *release_set; 37 + struct btf_id_set *ret_null_set; 38 + }; 39 + struct btf_id_set *sets[BTF_KFUNC_TYPE_MAX]; 40 + }; 41 + }; 20 42 21 43 extern const struct file_operations btf_fops; 22 44 ··· 329 307 const char *btf_name_by_offset(const struct btf *btf, u32 offset); 330 308 struct btf *btf_parse_vmlinux(void); 331 309 struct btf *bpf_prog_get_target_btf(const struct bpf_prog *prog); 310 + bool btf_kfunc_id_set_contains(const struct btf *btf, 311 + enum bpf_prog_type prog_type, 312 + enum btf_kfunc_type type, u32 kfunc_btf_id); 313 + int register_btf_kfunc_id_set(enum bpf_prog_type prog_type, 314 + const struct btf_kfunc_id_set *s); 332 315 #else 333 316 static inline const struct btf_type *btf_type_by_id(const struct btf *btf, 334 317 u32 type_id) ··· 345 318 { 346 319 return NULL; 347 320 } 348 - #endif 349 - 350 - struct kfunc_btf_id_set { 351 - struct list_head list; 352 - struct btf_id_set *set; 353 - struct module *owner; 354 - }; 355 - 356 - struct kfunc_btf_id_list { 357 - struct list_head list; 358 - struct mutex mutex; 359 - }; 360 - 361 - #ifdef CONFIG_DEBUG_INFO_BTF_MODULES 362 - void register_kfunc_btf_id_set(struct kfunc_btf_id_list *l, 363 - struct kfunc_btf_id_set *s); 364 - void unregister_kfunc_btf_id_set(struct kfunc_btf_id_list *l, 365 - struct kfunc_btf_id_set *s); 366 - bool bpf_check_mod_kfunc_call(struct kfunc_btf_id_list *klist, u32 kfunc_id, 367 - struct module *owner); 368 - 369 - extern struct kfunc_btf_id_list bpf_tcp_ca_kfunc_list; 370 - extern struct kfunc_btf_id_list prog_test_kfunc_list; 371 - #else 372 - static inline void register_kfunc_btf_id_set(struct kfunc_btf_id_list *l, 373 - struct kfunc_btf_id_set *s) 374 - { 375 - } 376 - static inline void unregister_kfunc_btf_id_set(struct kfunc_btf_id_list *l, 377 - struct kfunc_btf_id_set *s) 378 - { 379 - } 380 - static inline bool bpf_check_mod_kfunc_call(struct kfunc_btf_id_list *klist, 381 - u32 kfunc_id, struct module *owner) 321 + static inline bool btf_kfunc_id_set_contains(const struct btf *btf, 322 + enum bpf_prog_type prog_type, 323 + enum btf_kfunc_type type, 324 + u32 kfunc_btf_id) 382 325 { 383 326 return false; 384 327 } 385 - 386 - static struct kfunc_btf_id_list bpf_tcp_ca_kfunc_list __maybe_unused; 387 - static struct kfunc_btf_id_list prog_test_kfunc_list __maybe_unused; 328 + static inline int register_btf_kfunc_id_set(enum bpf_prog_type prog_type, 329 + const struct btf_kfunc_id_set *s) 330 + { 331 + return 0; 332 + } 388 333 #endif 389 - 390 - #define DEFINE_KFUNC_BTF_ID_SET(set, name) \ 391 - struct kfunc_btf_id_set name = { LIST_HEAD_INIT(name.list), (set), \ 392 - THIS_MODULE } 393 334 394 335 #endif

+7 -6

include/linux/btf_ids.h

··· 11 11 #ifdef CONFIG_DEBUG_INFO_BTF 12 12 13 13 #include <linux/compiler.h> /* for __PASTE */ 14 + #include <linux/compiler_attributes.h> /* for __maybe_unused */ 14 15 15 16 /* 16 17 * Following macros help to define lists of BTF IDs placed ··· 147 146 148 147 #else 149 148 150 - #define BTF_ID_LIST(name) static u32 name[5]; 149 + #define BTF_ID_LIST(name) static u32 __maybe_unused name[5]; 151 150 #define BTF_ID(prefix, name) 152 151 #define BTF_ID_UNUSED 153 - #define BTF_ID_LIST_GLOBAL(name, n) u32 name[n]; 154 - #define BTF_ID_LIST_SINGLE(name, prefix, typename) static u32 name[1]; 155 - #define BTF_ID_LIST_GLOBAL_SINGLE(name, prefix, typename) u32 name[1]; 156 - #define BTF_SET_START(name) static struct btf_id_set name = { 0 }; 157 - #define BTF_SET_START_GLOBAL(name) static struct btf_id_set name = { 0 }; 152 + #define BTF_ID_LIST_GLOBAL(name, n) u32 __maybe_unused name[n]; 153 + #define BTF_ID_LIST_SINGLE(name, prefix, typename) static u32 __maybe_unused name[1]; 154 + #define BTF_ID_LIST_GLOBAL_SINGLE(name, prefix, typename) u32 __maybe_unused name[1]; 155 + #define BTF_SET_START(name) static struct btf_id_set __maybe_unused name = { 0 }; 156 + #define BTF_SET_START_GLOBAL(name) static struct btf_id_set __maybe_unused name = { 0 }; 158 157 #define BTF_SET_END(name) 159 158 160 159 #endif /* CONFIG_DEBUG_INFO_BTF */

+4 -1

include/linux/filter.h

··· 1356 1356 s32 level; 1357 1357 s32 optname; 1358 1358 s32 optlen; 1359 - s32 retval; 1359 + /* for retval in struct bpf_cg_run_ctx */ 1360 + struct task_struct *current_task; 1361 + /* Temporary "register" for indirect stores to ppos. */ 1362 + u64 tmp_reg; 1360 1363 }; 1361 1364 1362 1365 int copy_bpf_fprog_from_user(struct sock_fprog *dst, sockptr_t src, int len);

+1

include/linux/skbuff.h

··· 557 557 * Warning : all fields before dataref are cleared in __alloc_skb() 558 558 */ 559 559 atomic_t dataref; 560 + unsigned int xdp_frags_size; 560 561 561 562 /* Intermediate layers must ensure that destructor_arg 562 563 * remains valid until skb destructor */

+23

include/net/netfilter/nf_conntrack_bpf.h

··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + 3 + #ifndef _NF_CONNTRACK_BPF_H 4 + #define _NF_CONNTRACK_BPF_H 5 + 6 + #include <linux/btf.h> 7 + #include <linux/kconfig.h> 8 + 9 + #if (IS_BUILTIN(CONFIG_NF_CONNTRACK) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) || \ 10 + (IS_MODULE(CONFIG_NF_CONNTRACK) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF_MODULES)) 11 + 12 + extern int register_nf_conntrack_bpf(void); 13 + 14 + #else 15 + 16 + static inline int register_nf_conntrack_bpf(void) 17 + { 18 + return 0; 19 + } 20 + 21 + #endif 22 + 23 + #endif /* _NF_CONNTRACK_BPF_H */

+104 -4

include/net/xdp.h

··· 60 60 u32 reg_state; 61 61 struct xdp_mem_info mem; 62 62 unsigned int napi_id; 63 + u32 frag_size; 63 64 } ____cacheline_aligned; /* perf critical, avoid false-sharing */ 64 65 65 66 struct xdp_txq_info { 66 67 struct net_device *dev; 68 + }; 69 + 70 + enum xdp_buff_flags { 71 + XDP_FLAGS_HAS_FRAGS = BIT(0), /* non-linear xdp buff */ 72 + XDP_FLAGS_FRAGS_PF_MEMALLOC = BIT(1), /* xdp paged memory is under 73 + * pressure 74 + */ 67 75 }; 68 76 69 77 struct xdp_buff { ··· 82 74 struct xdp_rxq_info *rxq; 83 75 struct xdp_txq_info *txq; 84 76 u32 frame_sz; /* frame size to deduce data_hard_end/reserved tailroom*/ 77 + u32 flags; /* supported values defined in xdp_buff_flags */ 85 78 }; 79 + 80 + static __always_inline bool xdp_buff_has_frags(struct xdp_buff *xdp) 81 + { 82 + return !!(xdp->flags & XDP_FLAGS_HAS_FRAGS); 83 + } 84 + 85 + static __always_inline void xdp_buff_set_frags_flag(struct xdp_buff *xdp) 86 + { 87 + xdp->flags |= XDP_FLAGS_HAS_FRAGS; 88 + } 89 + 90 + static __always_inline void xdp_buff_clear_frags_flag(struct xdp_buff *xdp) 91 + { 92 + xdp->flags &= ~XDP_FLAGS_HAS_FRAGS; 93 + } 94 + 95 + static __always_inline bool xdp_buff_is_frag_pfmemalloc(struct xdp_buff *xdp) 96 + { 97 + return !!(xdp->flags & XDP_FLAGS_FRAGS_PF_MEMALLOC); 98 + } 99 + 100 + static __always_inline void xdp_buff_set_frag_pfmemalloc(struct xdp_buff *xdp) 101 + { 102 + xdp->flags |= XDP_FLAGS_FRAGS_PF_MEMALLOC; 103 + } 86 104 87 105 static __always_inline void 88 106 xdp_init_buff(struct xdp_buff *xdp, u32 frame_sz, struct xdp_rxq_info *rxq) 89 107 { 90 108 xdp->frame_sz = frame_sz; 91 109 xdp->rxq = rxq; 110 + xdp->flags = 0; 92 111 } 93 112 94 113 static __always_inline void ··· 146 111 return (struct skb_shared_info *)xdp_data_hard_end(xdp); 147 112 } 148 113 114 + static __always_inline unsigned int xdp_get_buff_len(struct xdp_buff *xdp) 115 + { 116 + unsigned int len = xdp->data_end - xdp->data; 117 + struct skb_shared_info *sinfo; 118 + 119 + if (likely(!xdp_buff_has_frags(xdp))) 120 + goto out; 121 + 122 + sinfo = xdp_get_shared_info_from_buff(xdp); 123 + len += sinfo->xdp_frags_size; 124 + out: 125 + return len; 126 + } 127 + 149 128 struct xdp_frame { 150 129 void *data; 151 130 u16 len; ··· 171 122 */ 172 123 struct xdp_mem_info mem; 173 124 struct net_device *dev_rx; /* used by cpumap */ 125 + u32 flags; /* supported values defined in xdp_buff_flags */ 174 126 }; 127 + 128 + static __always_inline bool xdp_frame_has_frags(struct xdp_frame *frame) 129 + { 130 + return !!(frame->flags & XDP_FLAGS_HAS_FRAGS); 131 + } 132 + 133 + static __always_inline bool xdp_frame_is_frag_pfmemalloc(struct xdp_frame *frame) 134 + { 135 + return !!(frame->flags & XDP_FLAGS_FRAGS_PF_MEMALLOC); 136 + } 175 137 176 138 #define XDP_BULK_QUEUE_SIZE 16 177 139 struct xdp_frame_bulk { ··· 219 159 frame->dev_rx = NULL; 220 160 } 221 161 162 + static inline void 163 + xdp_update_skb_shared_info(struct sk_buff *skb, u8 nr_frags, 164 + unsigned int size, unsigned int truesize, 165 + bool pfmemalloc) 166 + { 167 + skb_shinfo(skb)->nr_frags = nr_frags; 168 + 169 + skb->len += size; 170 + skb->data_len += size; 171 + skb->truesize += truesize; 172 + skb->pfmemalloc |= pfmemalloc; 173 + } 174 + 222 175 /* Avoids inlining WARN macro in fast-path */ 223 176 void xdp_warn(const char *msg, const char *func, const int line); 224 177 #define XDP_WARN(msg) xdp_warn(msg, __func__, __LINE__) ··· 253 180 xdp->data_end = frame->data + frame->len; 254 181 xdp->data_meta = frame->data - frame->metasize; 255 182 xdp->frame_sz = frame->frame_sz; 183 + xdp->flags = frame->flags; 256 184 } 257 185 258 186 static inline ··· 280 206 xdp_frame->headroom = headroom - sizeof(*xdp_frame); 281 207 xdp_frame->metasize = metasize; 282 208 xdp_frame->frame_sz = xdp->frame_sz; 209 + xdp_frame->flags = xdp->flags; 283 210 284 211 return 0; 285 212 } ··· 305 230 return xdp_frame; 306 231 } 307 232 233 + void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct, 234 + struct xdp_buff *xdp); 308 235 void xdp_return_frame(struct xdp_frame *xdpf); 309 236 void xdp_return_frame_rx_napi(struct xdp_frame *xdpf); 310 237 void xdp_return_buff(struct xdp_buff *xdp); ··· 323 246 static inline void xdp_release_frame(struct xdp_frame *xdpf) 324 247 { 325 248 struct xdp_mem_info *mem = &xdpf->mem; 249 + struct skb_shared_info *sinfo; 250 + int i; 326 251 327 252 /* Curr only page_pool needs this */ 328 - if (mem->type == MEM_TYPE_PAGE_POOL) 329 - __xdp_release_frame(xdpf->data, mem); 253 + if (mem->type != MEM_TYPE_PAGE_POOL) 254 + return; 255 + 256 + if (likely(!xdp_frame_has_frags(xdpf))) 257 + goto out; 258 + 259 + sinfo = xdp_get_shared_info_from_frame(xdpf); 260 + for (i = 0; i < sinfo->nr_frags; i++) { 261 + struct page *page = skb_frag_page(&sinfo->frags[i]); 262 + 263 + __xdp_release_frame(page_address(page), mem); 264 + } 265 + out: 266 + __xdp_release_frame(xdpf->data, mem); 330 267 } 331 268 332 - int xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq, 333 - struct net_device *dev, u32 queue_index, unsigned int napi_id); 269 + int __xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq, 270 + struct net_device *dev, u32 queue_index, 271 + unsigned int napi_id, u32 frag_size); 272 + static inline int 273 + xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq, 274 + struct net_device *dev, u32 queue_index, 275 + unsigned int napi_id) 276 + { 277 + return __xdp_rxq_info_reg(xdp_rxq, dev, queue_index, napi_id, 0); 278 + } 279 + 334 280 void xdp_rxq_info_unreg(struct xdp_rxq_info *xdp_rxq); 335 281 void xdp_rxq_info_unused(struct xdp_rxq_info *xdp_rxq); 336 282 bool xdp_rxq_info_is_reg(struct xdp_rxq_info *xdp_rxq);

+63

include/uapi/linux/bpf.h

··· 330 330 * *ctx_out*, *data_in* and *data_out* must be NULL. 331 331 * *repeat* must be zero. 332 332 * 333 + * BPF_PROG_RUN is an alias for BPF_PROG_TEST_RUN. 334 + * 333 335 * Return 334 336 * Returns zero on success. On error, -1 is returned and *errno* 335 337 * is set appropriately. ··· 1113 1111 */ 1114 1112 #define BPF_F_SLEEPABLE (1U << 4) 1115 1113 1114 + /* If BPF_F_XDP_HAS_FRAGS is used in BPF_PROG_LOAD command, the loaded program 1115 + * fully support xdp frags. 1116 + */ 1117 + #define BPF_F_XDP_HAS_FRAGS (1U << 5) 1118 + 1116 1119 /* When BPF ldimm64's insn[0].src_reg != 0 then this can have 1117 1120 * the following extensions: 1118 1121 * ··· 1782 1775 * 0 on success, or a negative error in case of failure. 1783 1776 * 1784 1777 * u64 bpf_get_current_pid_tgid(void) 1778 + * Description 1779 + * Get the current pid and tgid. 1785 1780 * Return 1786 1781 * A 64-bit integer containing the current tgid and pid, and 1787 1782 * created as such: ··· 1791 1782 * *current_task*\ **->pid**. 1792 1783 * 1793 1784 * u64 bpf_get_current_uid_gid(void) 1785 + * Description 1786 + * Get the current uid and gid. 1794 1787 * Return 1795 1788 * A 64-bit integer containing the current GID and UID, and 1796 1789 * created as such: *current_gid* **<< 32 \|** *current_uid*. ··· 2267 2256 * The 32-bit hash. 2268 2257 * 2269 2258 * u64 bpf_get_current_task(void) 2259 + * Description 2260 + * Get the current task. 2270 2261 * Return 2271 2262 * A pointer to the current task struct. 2272 2263 * ··· 2382 2369 * indicate that the hash is outdated and to trigger a 2383 2370 * recalculation the next time the kernel tries to access this 2384 2371 * hash or when the **bpf_get_hash_recalc**\ () helper is called. 2372 + * Return 2373 + * void. 2385 2374 * 2386 2375 * long bpf_get_numa_node_id(void) 2387 2376 * Description ··· 2481 2466 * A 8-byte long unique number or 0 if *sk* is NULL. 2482 2467 * 2483 2468 * u32 bpf_get_socket_uid(struct sk_buff *skb) 2469 + * Description 2470 + * Get the owner UID of the socked associated to *skb*. 2484 2471 * Return 2485 2472 * The owner UID of the socket associated to *skb*. If the socket 2486 2473 * is **NULL**, or if it is not a full socket (i.e. if it is a ··· 3257 3240 * The id is returned or 0 in case the id could not be retrieved. 3258 3241 * 3259 3242 * u64 bpf_get_current_cgroup_id(void) 3243 + * Description 3244 + * Get the current cgroup id based on the cgroup within which 3245 + * the current task is running. 3260 3246 * Return 3261 3247 * A 64-bit integer containing the current cgroup id based 3262 3248 * on the cgroup within which the current task is running. ··· 5038 5018 * 5039 5019 * Return 5040 5020 * The number of arguments of the traced function. 5021 + * 5022 + * int bpf_get_retval(void) 5023 + * Description 5024 + * Get the syscall's return value that will be returned to userspace. 5025 + * 5026 + * This helper is currently supported by cgroup programs only. 5027 + * Return 5028 + * The syscall's return value. 5029 + * 5030 + * int bpf_set_retval(int retval) 5031 + * Description 5032 + * Set the syscall's return value that will be returned to userspace. 5033 + * 5034 + * This helper is currently supported by cgroup programs only. 5035 + * Return 5036 + * 0 on success, or a negative error in case of failure. 5037 + * 5038 + * u64 bpf_xdp_get_buff_len(struct xdp_buff *xdp_md) 5039 + * Description 5040 + * Get the total size of a given xdp buff (linear and paged area) 5041 + * Return 5042 + * The total size of a given xdp buffer. 5043 + * 5044 + * long bpf_xdp_load_bytes(struct xdp_buff *xdp_md, u32 offset, void *buf, u32 len) 5045 + * Description 5046 + * This helper is provided as an easy way to load data from a 5047 + * xdp buffer. It can be used to load *len* bytes from *offset* from 5048 + * the frame associated to *xdp_md*, into the buffer pointed by 5049 + * *buf*. 5050 + * Return 5051 + * 0 on success, or a negative error in case of failure. 5052 + * 5053 + * long bpf_xdp_store_bytes(struct xdp_buff *xdp_md, u32 offset, void *buf, u32 len) 5054 + * Description 5055 + * Store *len* bytes from buffer *buf* into the frame 5056 + * associated to *xdp_md*, at *offset*. 5057 + * Return 5058 + * 0 on success, or a negative error in case of failure. 5041 5059 */ 5042 5060 #define __BPF_FUNC_MAPPER(FN) \ 5043 5061 FN(unspec), \ ··· 5264 5206 FN(get_func_arg), \ 5265 5207 FN(get_func_ret), \ 5266 5208 FN(get_func_arg_cnt), \ 5209 + FN(get_retval), \ 5210 + FN(set_retval), \ 5211 + FN(xdp_get_buff_len), \ 5212 + FN(xdp_load_bytes), \ 5213 + FN(xdp_store_bytes), \ 5267 5214 /* */ 5268 5215 5269 5216 /* integer value in 'imm' field of BPF_CALL instruction selects which helper

+1 -3

kernel/bpf/arraymap.c

··· 837 837 static void *prog_fd_array_get_ptr(struct bpf_map *map, 838 838 struct file *map_file, int fd) 839 839 { 840 - struct bpf_array *array = container_of(map, struct bpf_array, map); 841 840 struct bpf_prog *prog = bpf_prog_get(fd); 842 841 843 842 if (IS_ERR(prog)) 844 843 return prog; 845 844 846 - if (!bpf_prog_array_compatible(array, prog)) { 845 + if (!bpf_prog_map_compatible(map, prog)) { 847 846 bpf_prog_put(prog); 848 847 return ERR_PTR(-EINVAL); 849 848 } ··· 1070 1071 INIT_WORK(&aux->work, prog_array_map_clear_deferred); 1071 1072 INIT_LIST_HEAD(&aux->poke_progs); 1072 1073 mutex_init(&aux->poke_mutex); 1073 - spin_lock_init(&aux->owner.lock); 1074 1074 1075 1075 map = array_map_alloc(attr); 1076 1076 if (IS_ERR(map)) {

+334 -46

kernel/bpf/btf.c

··· 198 198 DEFINE_IDR(btf_idr); 199 199 DEFINE_SPINLOCK(btf_idr_lock); 200 200 201 + enum btf_kfunc_hook { 202 + BTF_KFUNC_HOOK_XDP, 203 + BTF_KFUNC_HOOK_TC, 204 + BTF_KFUNC_HOOK_STRUCT_OPS, 205 + BTF_KFUNC_HOOK_MAX, 206 + }; 207 + 208 + enum { 209 + BTF_KFUNC_SET_MAX_CNT = 32, 210 + }; 211 + 212 + struct btf_kfunc_set_tab { 213 + struct btf_id_set *sets[BTF_KFUNC_HOOK_MAX][BTF_KFUNC_TYPE_MAX]; 214 + }; 215 + 201 216 struct btf { 202 217 void *data; 203 218 struct btf_type **types; ··· 227 212 refcount_t refcnt; 228 213 u32 id; 229 214 struct rcu_head rcu; 215 + struct btf_kfunc_set_tab *kfunc_set_tab; 230 216 231 217 /* split BTF support */ 232 218 struct btf *base_btf; ··· 1547 1531 spin_unlock_irqrestore(&btf_idr_lock, flags); 1548 1532 } 1549 1533 1534 + static void btf_free_kfunc_set_tab(struct btf *btf) 1535 + { 1536 + struct btf_kfunc_set_tab *tab = btf->kfunc_set_tab; 1537 + int hook, type; 1538 + 1539 + if (!tab) 1540 + return; 1541 + /* For module BTF, we directly assign the sets being registered, so 1542 + * there is nothing to free except kfunc_set_tab. 1543 + */ 1544 + if (btf_is_module(btf)) 1545 + goto free_tab; 1546 + for (hook = 0; hook < ARRAY_SIZE(tab->sets); hook++) { 1547 + for (type = 0; type < ARRAY_SIZE(tab->sets[0]); type++) 1548 + kfree(tab->sets[hook][type]); 1549 + } 1550 + free_tab: 1551 + kfree(tab); 1552 + btf->kfunc_set_tab = NULL; 1553 + } 1554 + 1550 1555 static void btf_free(struct btf *btf) 1551 1556 { 1557 + btf_free_kfunc_set_tab(btf); 1552 1558 kvfree(btf->types); 1553 1559 kvfree(btf->resolved_sizes); 1554 1560 kvfree(btf->resolved_ids); ··· 5654 5616 return true; 5655 5617 } 5656 5618 5619 + static bool is_kfunc_arg_mem_size(const struct btf *btf, 5620 + const struct btf_param *arg, 5621 + const struct bpf_reg_state *reg) 5622 + { 5623 + int len, sfx_len = sizeof("__sz") - 1; 5624 + const struct btf_type *t; 5625 + const char *param_name; 5626 + 5627 + t = btf_type_skip_modifiers(btf, arg->type, NULL); 5628 + if (!btf_type_is_scalar(t) || reg->type != SCALAR_VALUE) 5629 + return false; 5630 + 5631 + /* In the future, this can be ported to use BTF tagging */ 5632 + param_name = btf_name_by_offset(btf, arg->name_off); 5633 + if (str_is_empty(param_name)) 5634 + return false; 5635 + len = strlen(param_name); 5636 + if (len < sfx_len) 5637 + return false; 5638 + param_name += len - sfx_len; 5639 + if (strncmp(param_name, "__sz", sfx_len)) 5640 + return false; 5641 + 5642 + return true; 5643 + } 5644 + 5657 5645 static int btf_check_func_arg_match(struct bpf_verifier_env *env, 5658 5646 const struct btf *btf, u32 func_id, 5659 5647 struct bpf_reg_state *regs, 5660 5648 bool ptr_to_mem_ok) 5661 5649 { 5662 5650 struct bpf_verifier_log *log = &env->log; 5651 + u32 i, nargs, ref_id, ref_obj_id = 0; 5663 5652 bool is_kfunc = btf_is_kernel(btf); 5664 5653 const char *func_name, *ref_tname; 5665 5654 const struct btf_type *t, *ref_t; 5666 5655 const struct btf_param *args; 5667 - u32 i, nargs, ref_id; 5656 + int ref_regno = 0; 5657 + bool rel = false; 5668 5658 5669 5659 t = btf_type_by_id(btf, func_id); 5670 5660 if (!t || !btf_type_is_func(t)) { ··· 5770 5704 if (reg->type == PTR_TO_BTF_ID) { 5771 5705 reg_btf = reg->btf; 5772 5706 reg_ref_id = reg->btf_id; 5707 + /* Ensure only one argument is referenced PTR_TO_BTF_ID */ 5708 + if (reg->ref_obj_id) { 5709 + if (ref_obj_id) { 5710 + bpf_log(log, "verifier internal error: more than one arg with ref_obj_id R%d %u %u\n", 5711 + regno, reg->ref_obj_id, ref_obj_id); 5712 + return -EFAULT; 5713 + } 5714 + ref_regno = regno; 5715 + ref_obj_id = reg->ref_obj_id; 5716 + } 5773 5717 } else { 5774 5718 reg_btf = btf_vmlinux; 5775 5719 reg_ref_id = *reg2btf_ids[reg->type]; ··· 5803 5727 u32 type_size; 5804 5728 5805 5729 if (is_kfunc) { 5730 + bool arg_mem_size = i + 1 < nargs && is_kfunc_arg_mem_size(btf, &args[i + 1], &regs[regno + 1]); 5731 + 5806 5732 /* Permit pointer to mem, but only when argument 5807 5733 * type is pointer to scalar, or struct composed 5808 5734 * (recursively) of scalars. 5735 + * When arg_mem_size is true, the pointer can be 5736 + * void *. 5809 5737 */ 5810 5738 if (!btf_type_is_scalar(ref_t) && 5811 - !__btf_type_is_scalar_struct(log, btf, ref_t, 0)) { 5739 + !__btf_type_is_scalar_struct(log, btf, ref_t, 0) && 5740 + (arg_mem_size ? !btf_type_is_void(ref_t) : 1)) { 5812 5741 bpf_log(log, 5813 - "arg#%d pointer type %s %s must point to scalar or struct with scalar\n", 5814 - i, btf_type_str(ref_t), ref_tname); 5742 + "arg#%d pointer type %s %s must point to %sscalar, or struct with scalar\n", 5743 + i, btf_type_str(ref_t), ref_tname, arg_mem_size ? "void, " : ""); 5815 5744 return -EINVAL; 5745 + } 5746 + 5747 + /* Check for mem, len pair */ 5748 + if (arg_mem_size) { 5749 + if (check_kfunc_mem_size_reg(env, &regs[regno + 1], regno + 1)) { 5750 + bpf_log(log, "arg#%d arg#%d memory, len pair leads to invalid memory access\n", 5751 + i, i + 1); 5752 + return -EINVAL; 5753 + } 5754 + i++; 5755 + continue; 5816 5756 } 5817 5757 } 5818 5758 ··· 5850 5758 } 5851 5759 } 5852 5760 5853 - return 0; 5761 + /* Either both are set, or neither */ 5762 + WARN_ON_ONCE((ref_obj_id && !ref_regno) || (!ref_obj_id && ref_regno)); 5763 + if (is_kfunc) { 5764 + rel = btf_kfunc_id_set_contains(btf, resolve_prog_type(env->prog), 5765 + BTF_KFUNC_TYPE_RELEASE, func_id); 5766 + /* We already made sure ref_obj_id is set only for one argument */ 5767 + if (rel && !ref_obj_id) { 5768 + bpf_log(log, "release kernel function %s expects refcounted PTR_TO_BTF_ID\n", 5769 + func_name); 5770 + return -EINVAL; 5771 + } 5772 + /* Allow (!rel && ref_obj_id), so that passing such referenced PTR_TO_BTF_ID to 5773 + * other kfuncs works 5774 + */ 5775 + } 5776 + /* returns argument register number > 0 in case of reference release kfunc */ 5777 + return rel ? ref_regno : 0; 5854 5778 } 5855 5779 5856 5780 /* Compare BTF of a function with given bpf_reg_state. ··· 6308 6200 return bsearch(&id, set->ids, set->cnt, sizeof(u32), btf_id_cmp_func) != NULL; 6309 6201 } 6310 6202 6203 + enum { 6204 + BTF_MODULE_F_LIVE = (1 << 0), 6205 + }; 6206 + 6311 6207 #ifdef CONFIG_DEBUG_INFO_BTF_MODULES 6312 6208 struct btf_module { 6313 6209 struct list_head list; 6314 6210 struct module *module; 6315 6211 struct btf *btf; 6316 6212 struct bin_attribute *sysfs_attr; 6213 + int flags; 6317 6214 }; 6318 6215 6319 6216 static LIST_HEAD(btf_modules); ··· 6346 6233 int err = 0; 6347 6234 6348 6235 if (mod->btf_data_size == 0 || 6349 - (op != MODULE_STATE_COMING && op != MODULE_STATE_GOING)) 6236 + (op != MODULE_STATE_COMING && op != MODULE_STATE_LIVE && 6237 + op != MODULE_STATE_GOING)) 6350 6238 goto out; 6351 6239 6352 6240 switch (op) { ··· 6406 6292 } 6407 6293 6408 6294 break; 6295 + case MODULE_STATE_LIVE: 6296 + mutex_lock(&btf_module_mutex); 6297 + list_for_each_entry_safe(btf_mod, tmp, &btf_modules, list) { 6298 + if (btf_mod->module != module) 6299 + continue; 6300 + 6301 + btf_mod->flags |= BTF_MODULE_F_LIVE; 6302 + break; 6303 + } 6304 + mutex_unlock(&btf_module_mutex); 6305 + break; 6409 6306 case MODULE_STATE_GOING: 6410 6307 mutex_lock(&btf_module_mutex); 6411 6308 list_for_each_entry_safe(btf_mod, tmp, &btf_modules, list) { ··· 6463 6338 if (btf_mod->btf != btf) 6464 6339 continue; 6465 6340 6466 - if (try_module_get(btf_mod->module)) 6341 + /* We must only consider module whose __init routine has 6342 + * finished, hence we must check for BTF_MODULE_F_LIVE flag, 6343 + * which is set from the notifier callback for 6344 + * MODULE_STATE_LIVE. 6345 + */ 6346 + if ((btf_mod->flags & BTF_MODULE_F_LIVE) && try_module_get(btf_mod->module)) 6467 6347 res = btf_mod->module; 6468 6348 6469 6349 break; ··· 6477 6347 #endif 6478 6348 6479 6349 return res; 6350 + } 6351 + 6352 + /* Returns struct btf corresponding to the struct module 6353 + * 6354 + * This function can return NULL or ERR_PTR. Note that caller must 6355 + * release reference for struct btf iff btf_is_module is true. 6356 + */ 6357 + static struct btf *btf_get_module_btf(const struct module *module) 6358 + { 6359 + struct btf *btf = NULL; 6360 + #ifdef CONFIG_DEBUG_INFO_BTF_MODULES 6361 + struct btf_module *btf_mod, *tmp; 6362 + #endif 6363 + 6364 + if (!module) 6365 + return bpf_get_btf_vmlinux(); 6366 + #ifdef CONFIG_DEBUG_INFO_BTF_MODULES 6367 + mutex_lock(&btf_module_mutex); 6368 + list_for_each_entry_safe(btf_mod, tmp, &btf_modules, list) { 6369 + if (btf_mod->module != module) 6370 + continue; 6371 + 6372 + btf_get(btf_mod->btf); 6373 + btf = btf_mod->btf; 6374 + break; 6375 + } 6376 + mutex_unlock(&btf_module_mutex); 6377 + #endif 6378 + 6379 + return btf; 6480 6380 } 6481 6381 6482 6382 BPF_CALL_4(bpf_btf_find_by_name_kind, char *, name, int, name_sz, u32, kind, int, flags) ··· 6576 6416 BTF_TRACING_TYPE_xxx 6577 6417 #undef BTF_TRACING_TYPE 6578 6418 6579 - /* BTF ID set registration API for modules */ 6419 + /* Kernel Function (kfunc) BTF ID set registration API */ 6580 6420 6581 - #ifdef CONFIG_DEBUG_INFO_BTF_MODULES 6582 - 6583 - void register_kfunc_btf_id_set(struct kfunc_btf_id_list *l, 6584 - struct kfunc_btf_id_set *s) 6421 + static int __btf_populate_kfunc_set(struct btf *btf, enum btf_kfunc_hook hook, 6422 + enum btf_kfunc_type type, 6423 + struct btf_id_set *add_set, bool vmlinux_set) 6585 6424 { 6586 - mutex_lock(&l->mutex); 6587 - list_add(&s->list, &l->list); 6588 - mutex_unlock(&l->mutex); 6589 - } 6590 - EXPORT_SYMBOL_GPL(register_kfunc_btf_id_set); 6425 + struct btf_kfunc_set_tab *tab; 6426 + struct btf_id_set *set; 6427 + u32 set_cnt; 6428 + int ret; 6591 6429 6592 - void unregister_kfunc_btf_id_set(struct kfunc_btf_id_list *l, 6593 - struct kfunc_btf_id_set *s) 6594 - { 6595 - mutex_lock(&l->mutex); 6596 - list_del_init(&s->list); 6597 - mutex_unlock(&l->mutex); 6598 - } 6599 - EXPORT_SYMBOL_GPL(unregister_kfunc_btf_id_set); 6600 - 6601 - bool bpf_check_mod_kfunc_call(struct kfunc_btf_id_list *klist, u32 kfunc_id, 6602 - struct module *owner) 6603 - { 6604 - struct kfunc_btf_id_set *s; 6605 - 6606 - mutex_lock(&klist->mutex); 6607 - list_for_each_entry(s, &klist->list, list) { 6608 - if (s->owner == owner && btf_id_set_contains(s->set, kfunc_id)) { 6609 - mutex_unlock(&klist->mutex); 6610 - return true; 6611 - } 6430 + if (hook >= BTF_KFUNC_HOOK_MAX || type >= BTF_KFUNC_TYPE_MAX) { 6431 + ret = -EINVAL; 6432 + goto end; 6612 6433 } 6613 - mutex_unlock(&klist->mutex); 6614 - return false; 6434 + 6435 + if (!add_set->cnt) 6436 + return 0; 6437 + 6438 + tab = btf->kfunc_set_tab; 6439 + if (!tab) { 6440 + tab = kzalloc(sizeof(*tab), GFP_KERNEL | __GFP_NOWARN); 6441 + if (!tab) 6442 + return -ENOMEM; 6443 + btf->kfunc_set_tab = tab; 6444 + } 6445 + 6446 + set = tab->sets[hook][type]; 6447 + /* Warn when register_btf_kfunc_id_set is called twice for the same hook 6448 + * for module sets. 6449 + */ 6450 + if (WARN_ON_ONCE(set && !vmlinux_set)) { 6451 + ret = -EINVAL; 6452 + goto end; 6453 + } 6454 + 6455 + /* We don't need to allocate, concatenate, and sort module sets, because 6456 + * only one is allowed per hook. Hence, we can directly assign the 6457 + * pointer and return. 6458 + */ 6459 + if (!vmlinux_set) { 6460 + tab->sets[hook][type] = add_set; 6461 + return 0; 6462 + } 6463 + 6464 + /* In case of vmlinux sets, there may be more than one set being 6465 + * registered per hook. To create a unified set, we allocate a new set 6466 + * and concatenate all individual sets being registered. While each set 6467 + * is individually sorted, they may become unsorted when concatenated, 6468 + * hence re-sorting the final set again is required to make binary 6469 + * searching the set using btf_id_set_contains function work. 6470 + */ 6471 + set_cnt = set ? set->cnt : 0; 6472 + 6473 + if (set_cnt > U32_MAX - add_set->cnt) { 6474 + ret = -EOVERFLOW; 6475 + goto end; 6476 + } 6477 + 6478 + if (set_cnt + add_set->cnt > BTF_KFUNC_SET_MAX_CNT) { 6479 + ret = -E2BIG; 6480 + goto end; 6481 + } 6482 + 6483 + /* Grow set */ 6484 + set = krealloc(tab->sets[hook][type], 6485 + offsetof(struct btf_id_set, ids[set_cnt + add_set->cnt]), 6486 + GFP_KERNEL | __GFP_NOWARN); 6487 + if (!set) { 6488 + ret = -ENOMEM; 6489 + goto end; 6490 + } 6491 + 6492 + /* For newly allocated set, initialize set->cnt to 0 */ 6493 + if (!tab->sets[hook][type]) 6494 + set->cnt = 0; 6495 + tab->sets[hook][type] = set; 6496 + 6497 + /* Concatenate the two sets */ 6498 + memcpy(set->ids + set->cnt, add_set->ids, add_set->cnt * sizeof(set->ids[0])); 6499 + set->cnt += add_set->cnt; 6500 + 6501 + sort(set->ids, set->cnt, sizeof(set->ids[0]), btf_id_cmp_func, NULL); 6502 + 6503 + return 0; 6504 + end: 6505 + btf_free_kfunc_set_tab(btf); 6506 + return ret; 6615 6507 } 6616 6508 6617 - #define DEFINE_KFUNC_BTF_ID_LIST(name) \ 6618 - struct kfunc_btf_id_list name = { LIST_HEAD_INIT(name.list), \ 6619 - __MUTEX_INITIALIZER(name.mutex) }; \ 6620 - EXPORT_SYMBOL_GPL(name) 6509 + static int btf_populate_kfunc_set(struct btf *btf, enum btf_kfunc_hook hook, 6510 + const struct btf_kfunc_id_set *kset) 6511 + { 6512 + bool vmlinux_set = !btf_is_module(btf); 6513 + int type, ret; 6621 6514 6622 - DEFINE_KFUNC_BTF_ID_LIST(bpf_tcp_ca_kfunc_list); 6623 - DEFINE_KFUNC_BTF_ID_LIST(prog_test_kfunc_list); 6515 + for (type = 0; type < ARRAY_SIZE(kset->sets); type++) { 6516 + if (!kset->sets[type]) 6517 + continue; 6624 6518 6625 - #endif 6519 + ret = __btf_populate_kfunc_set(btf, hook, type, kset->sets[type], vmlinux_set); 6520 + if (ret) 6521 + break; 6522 + } 6523 + return ret; 6524 + } 6525 + 6526 + static bool __btf_kfunc_id_set_contains(const struct btf *btf, 6527 + enum btf_kfunc_hook hook, 6528 + enum btf_kfunc_type type, 6529 + u32 kfunc_btf_id) 6530 + { 6531 + struct btf_id_set *set; 6532 + 6533 + if (hook >= BTF_KFUNC_HOOK_MAX || type >= BTF_KFUNC_TYPE_MAX) 6534 + return false; 6535 + if (!btf->kfunc_set_tab) 6536 + return false; 6537 + set = btf->kfunc_set_tab->sets[hook][type]; 6538 + if (!set) 6539 + return false; 6540 + return btf_id_set_contains(set, kfunc_btf_id); 6541 + } 6542 + 6543 + static int bpf_prog_type_to_kfunc_hook(enum bpf_prog_type prog_type) 6544 + { 6545 + switch (prog_type) { 6546 + case BPF_PROG_TYPE_XDP: 6547 + return BTF_KFUNC_HOOK_XDP; 6548 + case BPF_PROG_TYPE_SCHED_CLS: 6549 + return BTF_KFUNC_HOOK_TC; 6550 + case BPF_PROG_TYPE_STRUCT_OPS: 6551 + return BTF_KFUNC_HOOK_STRUCT_OPS; 6552 + default: 6553 + return BTF_KFUNC_HOOK_MAX; 6554 + } 6555 + } 6556 + 6557 + /* Caution: 6558 + * Reference to the module (obtained using btf_try_get_module) corresponding to 6559 + * the struct btf *MUST* be held when calling this function from verifier 6560 + * context. This is usually true as we stash references in prog's kfunc_btf_tab; 6561 + * keeping the reference for the duration of the call provides the necessary 6562 + * protection for looking up a well-formed btf->kfunc_set_tab. 6563 + */ 6564 + bool btf_kfunc_id_set_contains(const struct btf *btf, 6565 + enum bpf_prog_type prog_type, 6566 + enum btf_kfunc_type type, u32 kfunc_btf_id) 6567 + { 6568 + enum btf_kfunc_hook hook; 6569 + 6570 + hook = bpf_prog_type_to_kfunc_hook(prog_type); 6571 + return __btf_kfunc_id_set_contains(btf, hook, type, kfunc_btf_id); 6572 + } 6573 + 6574 + /* This function must be invoked only from initcalls/module init functions */ 6575 + int register_btf_kfunc_id_set(enum bpf_prog_type prog_type, 6576 + const struct btf_kfunc_id_set *kset) 6577 + { 6578 + enum btf_kfunc_hook hook; 6579 + struct btf *btf; 6580 + int ret; 6581 + 6582 + btf = btf_get_module_btf(kset->owner); 6583 + if (IS_ERR_OR_NULL(btf)) 6584 + return btf ? PTR_ERR(btf) : -ENOENT; 6585 + 6586 + hook = bpf_prog_type_to_kfunc_hook(prog_type); 6587 + ret = btf_populate_kfunc_set(btf, hook, kset); 6588 + /* reference is only taken for module BTF */ 6589 + if (btf_is_module(btf)) 6590 + btf_put(btf); 6591 + return ret; 6592 + } 6593 + EXPORT_SYMBOL_GPL(register_btf_kfunc_id_set); 6626 6594 6627 6595 int bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id, 6628 6596 const struct btf *targ_btf, __u32 targ_id)

+94 -55

kernel/bpf/cgroup.c

··· 1044 1044 * NET_XMIT_DROP (1) - drop packet and notify TCP to call cwr 1045 1045 * NET_XMIT_CN (2) - continue with packet output and notify TCP 1046 1046 * to call cwr 1047 - * -EPERM - drop packet 1047 + * -err - drop packet 1048 1048 * 1049 1049 * For ingress packets, this function will return -EPERM if any 1050 1050 * attached program was found and if it returned != 1 during execution. ··· 1079 1079 cgrp->bpf.effective[atype], skb, __bpf_prog_run_save_cb); 1080 1080 } else { 1081 1081 ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], skb, 1082 - __bpf_prog_run_save_cb); 1083 - ret = (ret == 1 ? 0 : -EPERM); 1082 + __bpf_prog_run_save_cb, 0); 1083 + if (ret && !IS_ERR_VALUE((long)ret)) 1084 + ret = -EFAULT; 1084 1085 } 1085 1086 bpf_restore_data_end(skb, saved_data_end); 1086 1087 __skb_pull(skb, offset); ··· 1108 1107 enum cgroup_bpf_attach_type atype) 1109 1108 { 1110 1109 struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); 1111 - int ret; 1112 1110 1113 - ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], sk, bpf_prog_run); 1114 - return ret == 1 ? 0 : -EPERM; 1111 + return BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], sk, 1112 + bpf_prog_run, 0); 1115 1113 } 1116 1114 EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk); 1117 1115 ··· 1142 1142 }; 1143 1143 struct sockaddr_storage unspec; 1144 1144 struct cgroup *cgrp; 1145 - int ret; 1146 1145 1147 1146 /* Check socket family since not all sockets represent network 1148 1147 * endpoint (e.g. AF_UNIX). ··· 1155 1156 } 1156 1157 1157 1158 cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); 1158 - ret = BPF_PROG_RUN_ARRAY_CG_FLAGS(cgrp->bpf.effective[atype], &ctx, 1159 - bpf_prog_run, flags); 1160 - 1161 - return ret == 1 ? 0 : -EPERM; 1159 + return BPF_PROG_RUN_ARRAY_CG_FLAGS(cgrp->bpf.effective[atype], &ctx, 1160 + bpf_prog_run, 0, flags); 1162 1161 } 1163 1162 EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_addr); 1164 1163 ··· 1181 1184 enum cgroup_bpf_attach_type atype) 1182 1185 { 1183 1186 struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); 1184 - int ret; 1185 1187 1186 - ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], sock_ops, 1187 - bpf_prog_run); 1188 - return ret == 1 ? 0 : -EPERM; 1188 + return BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], sock_ops, 1189 + bpf_prog_run, 0); 1189 1190 } 1190 1191 EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_ops); 1191 1192 ··· 1196 1201 .major = major, 1197 1202 .minor = minor, 1198 1203 }; 1199 - int allow; 1204 + int ret; 1200 1205 1201 1206 rcu_read_lock(); 1202 1207 cgrp = task_dfl_cgroup(current); 1203 - allow = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], &ctx, 1204 - bpf_prog_run); 1208 + ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], &ctx, 1209 + bpf_prog_run, 0); 1205 1210 rcu_read_unlock(); 1206 1211 1207 - return !allow; 1212 + return ret; 1208 1213 } 1214 + 1215 + BPF_CALL_0(bpf_get_retval) 1216 + { 1217 + struct bpf_cg_run_ctx *ctx = 1218 + container_of(current->bpf_ctx, struct bpf_cg_run_ctx, run_ctx); 1219 + 1220 + return ctx->retval; 1221 + } 1222 + 1223 + static const struct bpf_func_proto bpf_get_retval_proto = { 1224 + .func = bpf_get_retval, 1225 + .gpl_only = false, 1226 + .ret_type = RET_INTEGER, 1227 + }; 1228 + 1229 + BPF_CALL_1(bpf_set_retval, int, retval) 1230 + { 1231 + struct bpf_cg_run_ctx *ctx = 1232 + container_of(current->bpf_ctx, struct bpf_cg_run_ctx, run_ctx); 1233 + 1234 + ctx->retval = retval; 1235 + return 0; 1236 + } 1237 + 1238 + static const struct bpf_func_proto bpf_set_retval_proto = { 1239 + .func = bpf_set_retval, 1240 + .gpl_only = false, 1241 + .ret_type = RET_INTEGER, 1242 + .arg1_type = ARG_ANYTHING, 1243 + }; 1209 1244 1210 1245 static const struct bpf_func_proto * 1211 1246 cgroup_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) ··· 1249 1224 return &bpf_get_current_cgroup_id_proto; 1250 1225 case BPF_FUNC_perf_event_output: 1251 1226 return &bpf_event_output_data_proto; 1227 + case BPF_FUNC_get_retval: 1228 + return &bpf_get_retval_proto; 1229 + case BPF_FUNC_set_retval: 1230 + return &bpf_set_retval_proto; 1252 1231 default: 1253 1232 return bpf_base_func_proto(func_id); 1254 1233 } ··· 1366 1337 1367 1338 rcu_read_lock(); 1368 1339 cgrp = task_dfl_cgroup(current); 1369 - ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], &ctx, bpf_prog_run); 1340 + ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], &ctx, 1341 + bpf_prog_run, 0); 1370 1342 rcu_read_unlock(); 1371 1343 1372 1344 kfree(ctx.cur_val); ··· 1380 1350 kfree(ctx.new_val); 1381 1351 } 1382 1352 1383 - return ret == 1 ? 0 : -EPERM; 1353 + return ret; 1384 1354 } 1385 1355 1386 1356 #ifdef CONFIG_NET ··· 1482 1452 1483 1453 lock_sock(sk); 1484 1454 ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[CGROUP_SETSOCKOPT], 1485 - &ctx, bpf_prog_run); 1455 + &ctx, bpf_prog_run, 0); 1486 1456 release_sock(sk); 1487 1457 1488 - if (!ret) { 1489 - ret = -EPERM; 1458 + if (ret) 1490 1459 goto out; 1491 - } 1492 1460 1493 1461 if (ctx.optlen == -1) { 1494 1462 /* optlen set to -1, bypass kernel */ ··· 1546 1518 .sk = sk, 1547 1519 .level = level, 1548 1520 .optname = optname, 1549 - .retval = retval, 1521 + .current_task = current, 1550 1522 }; 1551 1523 int ret; 1552 1524 ··· 1590 1562 1591 1563 lock_sock(sk); 1592 1564 ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[CGROUP_GETSOCKOPT], 1593 - &ctx, bpf_prog_run); 1565 + &ctx, bpf_prog_run, retval); 1594 1566 release_sock(sk); 1595 1567 1596 - if (!ret) { 1597 - ret = -EPERM; 1568 + if (ret < 0) 1598 1569 goto out; 1599 - } 1600 1570 1601 1571 if (ctx.optlen > max_optlen || ctx.optlen < 0) { 1602 - ret = -EFAULT; 1603 - goto out; 1604 - } 1605 - 1606 - /* BPF programs only allowed to set retval to 0, not some 1607 - * arbitrary value. 1608 - */ 1609 - if (ctx.retval != 0 && ctx.retval != retval) { 1610 1572 ret = -EFAULT; 1611 1573 goto out; 1612 1574 } ··· 1608 1590 goto out; 1609 1591 } 1610 1592 } 1611 - 1612 - ret = ctx.retval; 1613 1593 1614 1594 out: 1615 1595 sockopt_free_buf(&ctx, &buf); ··· 1623 1607 .sk = sk, 1624 1608 .level = level, 1625 1609 .optname = optname, 1626 - .retval = retval, 1627 1610 .optlen = *optlen, 1628 1611 .optval = optval, 1629 1612 .optval_end = optval + *optlen, 1613 + .current_task = current, 1630 1614 }; 1631 1615 int ret; 1632 1616 ··· 1639 1623 */ 1640 1624 1641 1625 ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[CGROUP_GETSOCKOPT], 1642 - &ctx, bpf_prog_run); 1643 - if (!ret) 1644 - return -EPERM; 1626 + &ctx, bpf_prog_run, retval); 1627 + if (ret < 0) 1628 + return ret; 1645 1629 1646 1630 if (ctx.optlen > *optlen) 1647 - return -EFAULT; 1648 - 1649 - /* BPF programs only allowed to set retval to 0, not some 1650 - * arbitrary value. 1651 - */ 1652 - if (ctx.retval != 0 && ctx.retval != retval) 1653 1631 return -EFAULT; 1654 1632 1655 1633 /* BPF programs can shrink the buffer, export the modifications. ··· 1651 1641 if (ctx.optlen != 0) 1652 1642 *optlen = ctx.optlen; 1653 1643 1654 - return ctx.retval; 1644 + return ret; 1655 1645 } 1656 1646 #endif 1657 1647 ··· 2067 2057 *insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, optlen); 2068 2058 break; 2069 2059 case offsetof(struct bpf_sockopt, retval): 2070 - if (type == BPF_WRITE) 2071 - *insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_STX_MEM, retval); 2072 - else 2073 - *insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, retval); 2060 + BUILD_BUG_ON(offsetof(struct bpf_cg_run_ctx, run_ctx) != 0); 2061 + 2062 + if (type == BPF_WRITE) { 2063 + int treg = BPF_REG_9; 2064 + 2065 + if (si->src_reg == treg || si->dst_reg == treg) 2066 + --treg; 2067 + if (si->src_reg == treg || si->dst_reg == treg) 2068 + --treg; 2069 + *insn++ = BPF_STX_MEM(BPF_DW, si->dst_reg, treg, 2070 + offsetof(struct bpf_sockopt_kern, tmp_reg)); 2071 + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_sockopt_kern, current_task), 2072 + treg, si->dst_reg, 2073 + offsetof(struct bpf_sockopt_kern, current_task)); 2074 + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct task_struct, bpf_ctx), 2075 + treg, treg, 2076 + offsetof(struct task_struct, bpf_ctx)); 2077 + *insn++ = BPF_STX_MEM(BPF_FIELD_SIZEOF(struct bpf_cg_run_ctx, retval), 2078 + treg, si->src_reg, 2079 + offsetof(struct bpf_cg_run_ctx, retval)); 2080 + *insn++ = BPF_LDX_MEM(BPF_DW, treg, si->dst_reg, 2081 + offsetof(struct bpf_sockopt_kern, tmp_reg)); 2082 + } else { 2083 + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_sockopt_kern, current_task), 2084 + si->dst_reg, si->src_reg, 2085 + offsetof(struct bpf_sockopt_kern, current_task)); 2086 + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct task_struct, bpf_ctx), 2087 + si->dst_reg, si->dst_reg, 2088 + offsetof(struct task_struct, bpf_ctx)); 2089 + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_cg_run_ctx, retval), 2090 + si->dst_reg, si->dst_reg, 2091 + offsetof(struct bpf_cg_run_ctx, retval)); 2092 + } 2074 2093 break; 2075 2094 case offsetof(struct bpf_sockopt, optval): 2076 2095 *insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, optval);

+14 -14

kernel/bpf/core.c

··· 1829 1829 } 1830 1830 #endif 1831 1831 1832 - bool bpf_prog_array_compatible(struct bpf_array *array, 1833 - const struct bpf_prog *fp) 1832 + bool bpf_prog_map_compatible(struct bpf_map *map, 1833 + const struct bpf_prog *fp) 1834 1834 { 1835 1835 bool ret; 1836 1836 1837 1837 if (fp->kprobe_override) 1838 1838 return false; 1839 1839 1840 - spin_lock(&array->aux->owner.lock); 1841 - 1842 - if (!array->aux->owner.type) { 1840 + spin_lock(&map->owner.lock); 1841 + if (!map->owner.type) { 1843 1842 /* There's no owner yet where we could check for 1844 1843 * compatibility. 1845 1844 */ 1846 - array->aux->owner.type = fp->type; 1847 - array->aux->owner.jited = fp->jited; 1845 + map->owner.type = fp->type; 1846 + map->owner.jited = fp->jited; 1847 + map->owner.xdp_has_frags = fp->aux->xdp_has_frags; 1848 1848 ret = true; 1849 1849 } else { 1850 - ret = array->aux->owner.type == fp->type && 1851 - array->aux->owner.jited == fp->jited; 1850 + ret = map->owner.type == fp->type && 1851 + map->owner.jited == fp->jited && 1852 + map->owner.xdp_has_frags == fp->aux->xdp_has_frags; 1852 1853 } 1853 - spin_unlock(&array->aux->owner.lock); 1854 + spin_unlock(&map->owner.lock); 1855 + 1854 1856 return ret; 1855 1857 } 1856 1858 ··· 1864 1862 mutex_lock(&aux->used_maps_mutex); 1865 1863 for (i = 0; i < aux->used_map_cnt; i++) { 1866 1864 struct bpf_map *map = aux->used_maps[i]; 1867 - struct bpf_array *array; 1868 1865 1869 - if (map->map_type != BPF_MAP_TYPE_PROG_ARRAY) 1866 + if (!map_type_contains_progs(map)) 1870 1867 continue; 1871 1868 1872 - array = container_of(map, struct bpf_array, map); 1873 - if (!bpf_prog_array_compatible(array, fp)) { 1869 + if (!bpf_prog_map_compatible(map, fp)) { 1874 1870 ret = -EINVAL; 1875 1871 goto out; 1876 1872 }

+5 -3

kernel/bpf/cpumap.c

··· 397 397 return 0; 398 398 } 399 399 400 - static int __cpu_map_load_bpf_program(struct bpf_cpu_map_entry *rcpu, int fd) 400 + static int __cpu_map_load_bpf_program(struct bpf_cpu_map_entry *rcpu, 401 + struct bpf_map *map, int fd) 401 402 { 402 403 struct bpf_prog *prog; 403 404 ··· 406 405 if (IS_ERR(prog)) 407 406 return PTR_ERR(prog); 408 407 409 - if (prog->expected_attach_type != BPF_XDP_CPUMAP) { 408 + if (prog->expected_attach_type != BPF_XDP_CPUMAP || 409 + !bpf_prog_map_compatible(map, prog)) { 410 410 bpf_prog_put(prog); 411 411 return -EINVAL; 412 412 } ··· 459 457 rcpu->map_id = map->id; 460 458 rcpu->value.qsize = value->qsize; 461 459 462 - if (fd > 0 && __cpu_map_load_bpf_program(rcpu, fd)) 460 + if (fd > 0 && __cpu_map_load_bpf_program(rcpu, map, fd)) 463 461 goto free_ptr_ring; 464 462 465 463 /* Setup kthread */

+2 -1

kernel/bpf/devmap.c

··· 858 858 BPF_PROG_TYPE_XDP, false); 859 859 if (IS_ERR(prog)) 860 860 goto err_put_dev; 861 - if (prog->expected_attach_type != BPF_XDP_DEVMAP) 861 + if (prog->expected_attach_type != BPF_XDP_DEVMAP || 862 + !bpf_prog_map_compatible(&dtab->map, prog)) 862 863 goto err_put_prog; 863 864 } 864 865

+15 -9

kernel/bpf/syscall.c

··· 556 556 557 557 static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp) 558 558 { 559 - const struct bpf_map *map = filp->private_data; 560 - const struct bpf_array *array; 559 + struct bpf_map *map = filp->private_data; 561 560 u32 type = 0, jited = 0; 562 561 563 - if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY) { 564 - array = container_of(map, struct bpf_array, map); 565 - spin_lock(&array->aux->owner.lock); 566 - type = array->aux->owner.type; 567 - jited = array->aux->owner.jited; 568 - spin_unlock(&array->aux->owner.lock); 562 + if (map_type_contains_progs(map)) { 563 + spin_lock(&map->owner.lock); 564 + type = map->owner.type; 565 + jited = map->owner.jited; 566 + spin_unlock(&map->owner.lock); 569 567 } 570 568 571 569 seq_printf(m, ··· 872 874 atomic64_set(&map->refcnt, 1); 873 875 atomic64_set(&map->usercnt, 1); 874 876 mutex_init(&map->freeze_mutex); 877 + spin_lock_init(&map->owner.lock); 875 878 876 879 map->spin_lock_off = -EINVAL; 877 880 map->timer_off = -EINVAL; ··· 2216 2217 BPF_F_ANY_ALIGNMENT | 2217 2218 BPF_F_TEST_STATE_FREQ | 2218 2219 BPF_F_SLEEPABLE | 2219 - BPF_F_TEST_RND_HI32)) 2220 + BPF_F_TEST_RND_HI32 | 2221 + BPF_F_XDP_HAS_FRAGS)) 2220 2222 return -EINVAL; 2221 2223 2222 2224 if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && ··· 2303 2303 prog->aux->dst_prog = dst_prog; 2304 2304 prog->aux->offload_requested = !!attr->prog_ifindex; 2305 2305 prog->aux->sleepable = attr->prog_flags & BPF_F_SLEEPABLE; 2306 + prog->aux->xdp_has_frags = attr->prog_flags & BPF_F_XDP_HAS_FRAGS; 2306 2307 2307 2308 err = security_bpf_prog_alloc(prog->aux); 2308 2309 if (err) ··· 3319 3318 case BPF_FLOW_DISSECTOR: 3320 3319 case BPF_SK_LOOKUP: 3321 3320 return netns_bpf_prog_query(attr, uattr); 3321 + case BPF_SK_SKB_STREAM_PARSER: 3322 + case BPF_SK_SKB_STREAM_VERDICT: 3323 + case BPF_SK_MSG_VERDICT: 3324 + case BPF_SK_SKB_VERDICT: 3325 + return sock_map_bpf_prog_query(attr, uattr); 3322 3326 default: 3323 3327 return -EINVAL; 3324 3328 }

+129 -67

kernel/bpf/verifier.c

··· 452 452 { 453 453 return base_type(type) == PTR_TO_SOCKET || 454 454 base_type(type) == PTR_TO_TCP_SOCK || 455 - base_type(type) == PTR_TO_MEM; 455 + base_type(type) == PTR_TO_MEM || 456 + base_type(type) == PTR_TO_BTF_ID; 456 457 } 457 458 458 459 static bool type_is_rdonly_mem(u32 type) ··· 1744 1743 } 1745 1744 1746 1745 static struct btf *__find_kfunc_desc_btf(struct bpf_verifier_env *env, 1747 - s16 offset, struct module **btf_modp) 1746 + s16 offset) 1748 1747 { 1749 1748 struct bpf_kfunc_btf kf_btf = { .offset = offset }; 1750 1749 struct bpf_kfunc_btf_tab *tab; ··· 1798 1797 sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]), 1799 1798 kfunc_btf_cmp_by_off, NULL); 1800 1799 } 1801 - if (btf_modp) 1802 - *btf_modp = b->module; 1803 1800 return b->btf; 1804 1801 } 1805 1802 ··· 1814 1815 } 1815 1816 1816 1817 static struct btf *find_kfunc_desc_btf(struct bpf_verifier_env *env, 1817 - u32 func_id, s16 offset, 1818 - struct module **btf_modp) 1818 + u32 func_id, s16 offset) 1819 1819 { 1820 1820 if (offset) { 1821 1821 if (offset < 0) { ··· 1825 1827 return ERR_PTR(-EINVAL); 1826 1828 } 1827 1829 1828 - return __find_kfunc_desc_btf(env, offset, btf_modp); 1830 + return __find_kfunc_desc_btf(env, offset); 1829 1831 } 1830 1832 return btf_vmlinux ?: ERR_PTR(-ENOENT); 1831 1833 } ··· 1888 1890 prog_aux->kfunc_btf_tab = btf_tab; 1889 1891 } 1890 1892 1891 - desc_btf = find_kfunc_desc_btf(env, func_id, offset, NULL); 1893 + desc_btf = find_kfunc_desc_btf(env, func_id, offset); 1892 1894 if (IS_ERR(desc_btf)) { 1893 1895 verbose(env, "failed to find BTF for kernel function\n"); 1894 1896 return PTR_ERR(desc_btf); ··· 2349 2351 if (insn->src_reg != BPF_PSEUDO_KFUNC_CALL) 2350 2352 return NULL; 2351 2353 2352 - desc_btf = find_kfunc_desc_btf(data, insn->imm, insn->off, NULL); 2354 + desc_btf = find_kfunc_desc_btf(data, insn->imm, insn->off); 2353 2355 if (IS_ERR(desc_btf)) 2354 2356 return "<error>"; 2355 2357 ··· 3495 3497 } 3496 3498 3497 3499 #define MAX_PACKET_OFF 0xffff 3498 - 3499 - static enum bpf_prog_type resolve_prog_type(struct bpf_prog *prog) 3500 - { 3501 - return prog->aux->dst_prog ? prog->aux->dst_prog->type : prog->type; 3502 - } 3503 3500 3504 3501 static bool may_access_direct_pkt_data(struct bpf_verifier_env *env, 3505 3502 const struct bpf_call_arg_meta *meta, ··· 4870 4877 } 4871 4878 } 4872 4879 4880 + static int check_mem_size_reg(struct bpf_verifier_env *env, 4881 + struct bpf_reg_state *reg, u32 regno, 4882 + bool zero_size_allowed, 4883 + struct bpf_call_arg_meta *meta) 4884 + { 4885 + int err; 4886 + 4887 + /* This is used to refine r0 return value bounds for helpers 4888 + * that enforce this value as an upper bound on return values. 4889 + * See do_refine_retval_range() for helpers that can refine 4890 + * the return value. C type of helper is u32 so we pull register 4891 + * bound from umax_value however, if negative verifier errors 4892 + * out. Only upper bounds can be learned because retval is an 4893 + * int type and negative retvals are allowed. 4894 + */ 4895 + if (meta) 4896 + meta->msize_max_value = reg->umax_value; 4897 + 4898 + /* The register is SCALAR_VALUE; the access check 4899 + * happens using its boundaries. 4900 + */ 4901 + if (!tnum_is_const(reg->var_off)) 4902 + /* For unprivileged variable accesses, disable raw 4903 + * mode so that the program is required to 4904 + * initialize all the memory that the helper could 4905 + * just partially fill up. 4906 + */ 4907 + meta = NULL; 4908 + 4909 + if (reg->smin_value < 0) { 4910 + verbose(env, "R%d min value is negative, either use unsigned or 'var &= const'\n", 4911 + regno); 4912 + return -EACCES; 4913 + } 4914 + 4915 + if (reg->umin_value == 0) { 4916 + err = check_helper_mem_access(env, regno - 1, 0, 4917 + zero_size_allowed, 4918 + meta); 4919 + if (err) 4920 + return err; 4921 + } 4922 + 4923 + if (reg->umax_value >= BPF_MAX_VAR_SIZ) { 4924 + verbose(env, "R%d unbounded memory access, use 'var &= const' or 'if (var < const)'\n", 4925 + regno); 4926 + return -EACCES; 4927 + } 4928 + err = check_helper_mem_access(env, regno - 1, 4929 + reg->umax_value, 4930 + zero_size_allowed, meta); 4931 + if (!err) 4932 + err = mark_chain_precision(env, regno); 4933 + return err; 4934 + } 4935 + 4873 4936 int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg, 4874 4937 u32 regno, u32 mem_size) 4875 4938 { ··· 4947 4898 } 4948 4899 4949 4900 return check_helper_mem_access(env, regno, mem_size, true, NULL); 4901 + } 4902 + 4903 + int check_kfunc_mem_size_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg, 4904 + u32 regno) 4905 + { 4906 + struct bpf_reg_state *mem_reg = &cur_regs(env)[regno - 1]; 4907 + bool may_be_null = type_may_be_null(mem_reg->type); 4908 + struct bpf_reg_state saved_reg; 4909 + int err; 4910 + 4911 + WARN_ON_ONCE(regno < BPF_REG_2 || regno > BPF_REG_5); 4912 + 4913 + if (may_be_null) { 4914 + saved_reg = *mem_reg; 4915 + mark_ptr_not_null_reg(mem_reg); 4916 + } 4917 + 4918 + err = check_mem_size_reg(env, reg, regno, true, NULL); 4919 + 4920 + if (may_be_null) 4921 + *mem_reg = saved_reg; 4922 + return err; 4950 4923 } 4951 4924 4952 4925 /* Implementation details: ··· 5510 5439 } else if (arg_type_is_mem_size(arg_type)) { 5511 5440 bool zero_size_allowed = (arg_type == ARG_CONST_SIZE_OR_ZERO); 5512 5441 5513 - /* This is used to refine r0 return value bounds for helpers 5514 - * that enforce this value as an upper bound on return values. 5515 - * See do_refine_retval_range() for helpers that can refine 5516 - * the return value. C type of helper is u32 so we pull register 5517 - * bound from umax_value however, if negative verifier errors 5518 - * out. Only upper bounds can be learned because retval is an 5519 - * int type and negative retvals are allowed. 5520 - */ 5521 - meta->msize_max_value = reg->umax_value; 5522 - 5523 - /* The register is SCALAR_VALUE; the access check 5524 - * happens using its boundaries. 5525 - */ 5526 - if (!tnum_is_const(reg->var_off)) 5527 - /* For unprivileged variable accesses, disable raw 5528 - * mode so that the program is required to 5529 - * initialize all the memory that the helper could 5530 - * just partially fill up. 5531 - */ 5532 - meta = NULL; 5533 - 5534 - if (reg->smin_value < 0) { 5535 - verbose(env, "R%d min value is negative, either use unsigned or 'var &= const'\n", 5536 - regno); 5537 - return -EACCES; 5538 - } 5539 - 5540 - if (reg->umin_value == 0) { 5541 - err = check_helper_mem_access(env, regno - 1, 0, 5542 - zero_size_allowed, 5543 - meta); 5544 - if (err) 5545 - return err; 5546 - } 5547 - 5548 - if (reg->umax_value >= BPF_MAX_VAR_SIZ) { 5549 - verbose(env, "R%d unbounded memory access, use 'var &= const' or 'if (var < const)'\n", 5550 - regno); 5551 - return -EACCES; 5552 - } 5553 - err = check_helper_mem_access(env, regno - 1, 5554 - reg->umax_value, 5555 - zero_size_allowed, meta); 5556 - if (!err) 5557 - err = mark_chain_precision(env, regno); 5442 + err = check_mem_size_reg(env, reg, regno, zero_size_allowed, meta); 5558 5443 } else if (arg_type_is_alloc_size(arg_type)) { 5559 5444 if (!tnum_is_const(reg->var_off)) { 5560 5445 verbose(env, "R%d is not a known constant'\n", ··· 6869 6842 } 6870 6843 } 6871 6844 6872 - static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn) 6845 + static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, 6846 + int *insn_idx_p) 6873 6847 { 6874 6848 const struct btf_type *t, *func, *func_proto, *ptr_type; 6875 6849 struct bpf_reg_state *regs = cur_regs(env); 6876 6850 const char *func_name, *ptr_type_name; 6877 6851 u32 i, nargs, func_id, ptr_type_id; 6878 - struct module *btf_mod = NULL; 6852 + int err, insn_idx = *insn_idx_p; 6879 6853 const struct btf_param *args; 6880 6854 struct btf *desc_btf; 6881 - int err; 6855 + bool acq; 6882 6856 6883 6857 /* skip for now, but return error when we find this in fixup_kfunc_call */ 6884 6858 if (!insn->imm) 6885 6859 return 0; 6886 6860 6887 - desc_btf = find_kfunc_desc_btf(env, insn->imm, insn->off, &btf_mod); 6861 + desc_btf = find_kfunc_desc_btf(env, insn->imm, insn->off); 6888 6862 if (IS_ERR(desc_btf)) 6889 6863 return PTR_ERR(desc_btf); 6890 6864 ··· 6894 6866 func_name = btf_name_by_offset(desc_btf, func->name_off); 6895 6867 func_proto = btf_type_by_id(desc_btf, func->type); 6896 6868 6897 - if (!env->ops->check_kfunc_call || 6898 - !env->ops->check_kfunc_call(func_id, btf_mod)) { 6869 + if (!btf_kfunc_id_set_contains(desc_btf, resolve_prog_type(env->prog), 6870 + BTF_KFUNC_TYPE_CHECK, func_id)) { 6899 6871 verbose(env, "calling kernel function %s is not allowed\n", 6900 6872 func_name); 6901 6873 return -EACCES; 6902 6874 } 6903 6875 6876 + acq = btf_kfunc_id_set_contains(desc_btf, resolve_prog_type(env->prog), 6877 + BTF_KFUNC_TYPE_ACQUIRE, func_id); 6878 + 6904 6879 /* Check the arguments */ 6905 6880 err = btf_check_kfunc_arg_match(env, desc_btf, func_id, regs); 6906 - if (err) 6881 + if (err < 0) 6907 6882 return err; 6883 + /* In case of release function, we get register number of refcounted 6884 + * PTR_TO_BTF_ID back from btf_check_kfunc_arg_match, do the release now 6885 + */ 6886 + if (err) { 6887 + err = release_reference(env, regs[err].ref_obj_id); 6888 + if (err) { 6889 + verbose(env, "kfunc %s#%d reference has not been acquired before\n", 6890 + func_name, func_id); 6891 + return err; 6892 + } 6893 + } 6908 6894 6909 6895 for (i = 0; i < CALLER_SAVED_REGS; i++) 6910 6896 mark_reg_not_init(env, regs, caller_saved[i]); 6911 6897 6912 6898 /* Check return type */ 6913 6899 t = btf_type_skip_modifiers(desc_btf, func_proto->type, NULL); 6900 + 6901 + if (acq && !btf_type_is_ptr(t)) { 6902 + verbose(env, "acquire kernel function does not return PTR_TO_BTF_ID\n"); 6903 + return -EINVAL; 6904 + } 6905 + 6914 6906 if (btf_type_is_scalar(t)) { 6915 6907 mark_reg_unknown(env, regs, BPF_REG_0); 6916 6908 mark_btf_func_reg_size(env, BPF_REG_0, t->size); ··· 6949 6901 regs[BPF_REG_0].btf = desc_btf; 6950 6902 regs[BPF_REG_0].type = PTR_TO_BTF_ID; 6951 6903 regs[BPF_REG_0].btf_id = ptr_type_id; 6904 + if (btf_kfunc_id_set_contains(desc_btf, resolve_prog_type(env->prog), 6905 + BTF_KFUNC_TYPE_RET_NULL, func_id)) { 6906 + regs[BPF_REG_0].type |= PTR_MAYBE_NULL; 6907 + /* For mark_ptr_or_null_reg, see 93c230e3f5bd6 */ 6908 + regs[BPF_REG_0].id = ++env->id_gen; 6909 + } 6952 6910 mark_btf_func_reg_size(env, BPF_REG_0, sizeof(void *)); 6911 + if (acq) { 6912 + int id = acquire_reference_state(env, insn_idx); 6913 + 6914 + if (id < 0) 6915 + return id; 6916 + regs[BPF_REG_0].id = id; 6917 + regs[BPF_REG_0].ref_obj_id = id; 6918 + } 6953 6919 } /* else { add_kfunc_call() ensures it is btf_type_is_void(t) } */ 6954 6920 6955 6921 nargs = btf_type_vlen(func_proto); ··· 11611 11549 if (insn->src_reg == BPF_PSEUDO_CALL) 11612 11550 err = check_func_call(env, insn, &env->insn_idx); 11613 11551 else if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) 11614 - err = check_kfunc_call(env, insn); 11552 + err = check_kfunc_call(env, insn, &env->insn_idx); 11615 11553 else 11616 11554 err = check_helper_call(env, insn, &env->insn_idx); 11617 11555 if (err)

+3

kernel/trace/bpf_trace.c

··· 1562 1562 1563 1563 extern const struct bpf_func_proto bpf_skb_output_proto; 1564 1564 extern const struct bpf_func_proto bpf_xdp_output_proto; 1565 + extern const struct bpf_func_proto bpf_xdp_get_buff_len_trace_proto; 1565 1566 1566 1567 BPF_CALL_3(bpf_get_stackid_raw_tp, struct bpf_raw_tracepoint_args *, args, 1567 1568 struct bpf_map *, map, u64, flags) ··· 1662 1661 return &bpf_sock_from_file_proto; 1663 1662 case BPF_FUNC_get_socket_cookie: 1664 1663 return &bpf_get_socket_ptr_cookie_proto; 1664 + case BPF_FUNC_xdp_get_buff_len: 1665 + return &bpf_xdp_get_buff_len_trace_proto; 1665 1666 #endif 1666 1667 case BPF_FUNC_seq_printf: 1667 1668 return prog->expected_attach_type == BPF_TRACE_ITER ?

+229 -34

net/bpf/test_run.c

··· 5 5 #include <linux/btf.h> 6 6 #include <linux/btf_ids.h> 7 7 #include <linux/slab.h> 8 + #include <linux/init.h> 8 9 #include <linux/vmalloc.h> 9 10 #include <linux/etherdevice.h> 10 11 #include <linux/filter.h> ··· 131 130 132 131 static int bpf_test_finish(const union bpf_attr *kattr, 133 132 union bpf_attr __user *uattr, const void *data, 134 - u32 size, u32 retval, u32 duration) 133 + struct skb_shared_info *sinfo, u32 size, 134 + u32 retval, u32 duration) 135 135 { 136 136 void __user *data_out = u64_to_user_ptr(kattr->test.data_out); 137 137 int err = -EFAULT; ··· 147 145 err = -ENOSPC; 148 146 } 149 147 150 - if (data_out && copy_to_user(data_out, data, copy_size)) 151 - goto out; 148 + if (data_out) { 149 + int len = sinfo ? copy_size - sinfo->xdp_frags_size : copy_size; 150 + 151 + if (copy_to_user(data_out, data, len)) 152 + goto out; 153 + 154 + if (sinfo) { 155 + int i, offset = len, data_len; 156 + 157 + for (i = 0; i < sinfo->nr_frags; i++) { 158 + skb_frag_t *frag = &sinfo->frags[i]; 159 + 160 + if (offset >= copy_size) { 161 + err = -ENOSPC; 162 + break; 163 + } 164 + 165 + data_len = min_t(int, copy_size - offset, 166 + skb_frag_size(frag)); 167 + 168 + if (copy_to_user(data_out + offset, 169 + skb_frag_address(frag), 170 + data_len)) 171 + goto out; 172 + 173 + offset += data_len; 174 + } 175 + } 176 + } 177 + 152 178 if (copy_to_user(&uattr->test.data_size_out, &size, sizeof(size))) 153 179 goto out; 154 180 if (copy_to_user(&uattr->test.retval, &retval, sizeof(retval))) ··· 201 171 { 202 172 return a + 1; 203 173 } 174 + EXPORT_SYMBOL_GPL(bpf_fentry_test1); 175 + ALLOW_ERROR_INJECTION(bpf_fentry_test1, ERRNO); 204 176 205 177 int noinline bpf_fentry_test2(int a, u64 b) 206 178 { ··· 264 232 return sk; 265 233 } 266 234 235 + struct prog_test_ref_kfunc { 236 + int a; 237 + int b; 238 + struct prog_test_ref_kfunc *next; 239 + }; 240 + 241 + static struct prog_test_ref_kfunc prog_test_struct = { 242 + .a = 42, 243 + .b = 108, 244 + .next = &prog_test_struct, 245 + }; 246 + 247 + noinline struct prog_test_ref_kfunc * 248 + bpf_kfunc_call_test_acquire(unsigned long *scalar_ptr) 249 + { 250 + /* randomly return NULL */ 251 + if (get_jiffies_64() % 2) 252 + return NULL; 253 + return &prog_test_struct; 254 + } 255 + 256 + noinline void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p) 257 + { 258 + } 259 + 260 + struct prog_test_pass1 { 261 + int x0; 262 + struct { 263 + int x1; 264 + struct { 265 + int x2; 266 + struct { 267 + int x3; 268 + }; 269 + }; 270 + }; 271 + }; 272 + 273 + struct prog_test_pass2 { 274 + int len; 275 + short arr1[4]; 276 + struct { 277 + char arr2[4]; 278 + unsigned long arr3[8]; 279 + } x; 280 + }; 281 + 282 + struct prog_test_fail1 { 283 + void *p; 284 + int x; 285 + }; 286 + 287 + struct prog_test_fail2 { 288 + int x8; 289 + struct prog_test_pass1 x; 290 + }; 291 + 292 + struct prog_test_fail3 { 293 + int len; 294 + char arr1[2]; 295 + char arr2[]; 296 + }; 297 + 298 + noinline void bpf_kfunc_call_test_pass_ctx(struct __sk_buff *skb) 299 + { 300 + } 301 + 302 + noinline void bpf_kfunc_call_test_pass1(struct prog_test_pass1 *p) 303 + { 304 + } 305 + 306 + noinline void bpf_kfunc_call_test_pass2(struct prog_test_pass2 *p) 307 + { 308 + } 309 + 310 + noinline void bpf_kfunc_call_test_fail1(struct prog_test_fail1 *p) 311 + { 312 + } 313 + 314 + noinline void bpf_kfunc_call_test_fail2(struct prog_test_fail2 *p) 315 + { 316 + } 317 + 318 + noinline void bpf_kfunc_call_test_fail3(struct prog_test_fail3 *p) 319 + { 320 + } 321 + 322 + noinline void bpf_kfunc_call_test_mem_len_pass1(void *mem, int mem__sz) 323 + { 324 + } 325 + 326 + noinline void bpf_kfunc_call_test_mem_len_fail1(void *mem, int len) 327 + { 328 + } 329 + 330 + noinline void bpf_kfunc_call_test_mem_len_fail2(u64 *mem, int len) 331 + { 332 + } 333 + 267 334 __diag_pop(); 268 335 269 336 ALLOW_ERROR_INJECTION(bpf_modify_return_test, ERRNO); 270 337 271 - BTF_SET_START(test_sk_kfunc_ids) 338 + BTF_SET_START(test_sk_check_kfunc_ids) 272 339 BTF_ID(func, bpf_kfunc_call_test1) 273 340 BTF_ID(func, bpf_kfunc_call_test2) 274 341 BTF_ID(func, bpf_kfunc_call_test3) 275 - BTF_SET_END(test_sk_kfunc_ids) 342 + BTF_ID(func, bpf_kfunc_call_test_acquire) 343 + BTF_ID(func, bpf_kfunc_call_test_release) 344 + BTF_ID(func, bpf_kfunc_call_test_pass_ctx) 345 + BTF_ID(func, bpf_kfunc_call_test_pass1) 346 + BTF_ID(func, bpf_kfunc_call_test_pass2) 347 + BTF_ID(func, bpf_kfunc_call_test_fail1) 348 + BTF_ID(func, bpf_kfunc_call_test_fail2) 349 + BTF_ID(func, bpf_kfunc_call_test_fail3) 350 + BTF_ID(func, bpf_kfunc_call_test_mem_len_pass1) 351 + BTF_ID(func, bpf_kfunc_call_test_mem_len_fail1) 352 + BTF_ID(func, bpf_kfunc_call_test_mem_len_fail2) 353 + BTF_SET_END(test_sk_check_kfunc_ids) 276 354 277 - bool bpf_prog_test_check_kfunc_call(u32 kfunc_id, struct module *owner) 278 - { 279 - if (btf_id_set_contains(&test_sk_kfunc_ids, kfunc_id)) 280 - return true; 281 - return bpf_check_mod_kfunc_call(&prog_test_kfunc_list, kfunc_id, owner); 282 - } 355 + BTF_SET_START(test_sk_acquire_kfunc_ids) 356 + BTF_ID(func, bpf_kfunc_call_test_acquire) 357 + BTF_SET_END(test_sk_acquire_kfunc_ids) 283 358 284 - static void *bpf_test_init(const union bpf_attr *kattr, u32 size, 285 - u32 headroom, u32 tailroom) 359 + BTF_SET_START(test_sk_release_kfunc_ids) 360 + BTF_ID(func, bpf_kfunc_call_test_release) 361 + BTF_SET_END(test_sk_release_kfunc_ids) 362 + 363 + BTF_SET_START(test_sk_ret_null_kfunc_ids) 364 + BTF_ID(func, bpf_kfunc_call_test_acquire) 365 + BTF_SET_END(test_sk_ret_null_kfunc_ids) 366 + 367 + static void *bpf_test_init(const union bpf_attr *kattr, u32 user_size, 368 + u32 size, u32 headroom, u32 tailroom) 286 369 { 287 370 void __user *data_in = u64_to_user_ptr(kattr->test.data_in); 288 - u32 user_size = kattr->test.data_size_in; 289 371 void *data; 290 372 291 373 if (size < ETH_HLEN || size > PAGE_SIZE - headroom - tailroom) ··· 727 581 if (kattr->test.flags || kattr->test.cpu) 728 582 return -EINVAL; 729 583 730 - data = bpf_test_init(kattr, size, NET_SKB_PAD + NET_IP_ALIGN, 584 + data = bpf_test_init(kattr, kattr->test.data_size_in, 585 + size, NET_SKB_PAD + NET_IP_ALIGN, 731 586 SKB_DATA_ALIGN(sizeof(struct skb_shared_info))); 732 587 if (IS_ERR(data)) 733 588 return PTR_ERR(data); ··· 830 683 /* bpf program can never convert linear skb to non-linear */ 831 684 if (WARN_ON_ONCE(skb_is_nonlinear(skb))) 832 685 size = skb_headlen(skb); 833 - ret = bpf_test_finish(kattr, uattr, skb->data, size, retval, duration); 686 + ret = bpf_test_finish(kattr, uattr, skb->data, NULL, size, retval, 687 + duration); 834 688 if (!ret) 835 689 ret = bpf_ctx_finish(kattr, uattr, ctx, 836 690 sizeof(struct __sk_buff)); ··· 906 758 union bpf_attr __user *uattr) 907 759 { 908 760 u32 tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 909 - u32 headroom = XDP_PACKET_HEADROOM; 910 761 u32 size = kattr->test.data_size_in; 762 + u32 headroom = XDP_PACKET_HEADROOM; 763 + u32 retval, duration, max_data_sz; 911 764 u32 repeat = kattr->test.repeat; 912 765 struct netdev_rx_queue *rxqueue; 766 + struct skb_shared_info *sinfo; 913 767 struct xdp_buff xdp = {}; 914 - u32 retval, duration; 768 + int i, ret = -EINVAL; 915 769 struct xdp_md *ctx; 916 - u32 max_data_sz; 917 770 void *data; 918 - int ret = -EINVAL; 919 771 920 772 if (prog->expected_attach_type == BPF_XDP_DEVMAP || 921 773 prog->expected_attach_type == BPF_XDP_CPUMAP) ··· 935 787 headroom -= ctx->data; 936 788 } 937 789 938 - /* XDP have extra tailroom as (most) drivers use full page */ 939 790 max_data_sz = 4096 - headroom - tailroom; 791 + size = min_t(u32, size, max_data_sz); 940 792 941 - data = bpf_test_init(kattr, max_data_sz, headroom, tailroom); 793 + data = bpf_test_init(kattr, size, max_data_sz, headroom, tailroom); 942 794 if (IS_ERR(data)) { 943 795 ret = PTR_ERR(data); 944 796 goto free_ctx; 945 797 } 946 798 947 799 rxqueue = __netif_get_rx_queue(current->nsproxy->net_ns->loopback_dev, 0); 948 - xdp_init_buff(&xdp, headroom + max_data_sz + tailroom, 949 - &rxqueue->xdp_rxq); 800 + rxqueue->xdp_rxq.frag_size = headroom + max_data_sz + tailroom; 801 + xdp_init_buff(&xdp, rxqueue->xdp_rxq.frag_size, &rxqueue->xdp_rxq); 950 802 xdp_prepare_buff(&xdp, data, headroom, size, true); 803 + sinfo = xdp_get_shared_info_from_buff(&xdp); 951 804 952 805 ret = xdp_convert_md_to_buff(ctx, &xdp); 953 806 if (ret) 954 807 goto free_data; 955 808 809 + if (unlikely(kattr->test.data_size_in > size)) { 810 + void __user *data_in = u64_to_user_ptr(kattr->test.data_in); 811 + 812 + while (size < kattr->test.data_size_in) { 813 + struct page *page; 814 + skb_frag_t *frag; 815 + int data_len; 816 + 817 + page = alloc_page(GFP_KERNEL); 818 + if (!page) { 819 + ret = -ENOMEM; 820 + goto out; 821 + } 822 + 823 + frag = &sinfo->frags[sinfo->nr_frags++]; 824 + __skb_frag_set_page(frag, page); 825 + 826 + data_len = min_t(int, kattr->test.data_size_in - size, 827 + PAGE_SIZE); 828 + skb_frag_size_set(frag, data_len); 829 + 830 + if (copy_from_user(page_address(page), data_in + size, 831 + data_len)) { 832 + ret = -EFAULT; 833 + goto out; 834 + } 835 + sinfo->xdp_frags_size += data_len; 836 + size += data_len; 837 + } 838 + xdp_buff_set_frags_flag(&xdp); 839 + } 840 + 956 841 if (repeat > 1) 957 842 bpf_prog_change_xdp(NULL, prog); 843 + 958 844 ret = bpf_test_run(prog, &xdp, repeat, &retval, &duration, true); 959 845 /* We convert the xdp_buff back to an xdp_md before checking the return 960 846 * code so the reference count of any held netdevice will be decremented ··· 998 816 if (ret) 999 817 goto out; 1000 818 1001 - if (xdp.data_meta != data + headroom || 1002 - xdp.data_end != xdp.data_meta + size) 1003 - size = xdp.data_end - xdp.data_meta; 1004 - 1005 - ret = bpf_test_finish(kattr, uattr, xdp.data_meta, size, retval, 1006 - duration); 819 + size = xdp.data_end - xdp.data_meta + sinfo->xdp_frags_size; 820 + ret = bpf_test_finish(kattr, uattr, xdp.data_meta, sinfo, size, 821 + retval, duration); 1007 822 if (!ret) 1008 823 ret = bpf_ctx_finish(kattr, uattr, ctx, 1009 824 sizeof(struct xdp_md)); ··· 1009 830 if (repeat > 1) 1010 831 bpf_prog_change_xdp(prog, NULL); 1011 832 free_data: 833 + for (i = 0; i < sinfo->nr_frags; i++) 834 + __free_page(skb_frag_page(&sinfo->frags[i])); 1012 835 kfree(data); 1013 836 free_ctx: 1014 837 kfree(ctx); ··· 1057 876 if (size < ETH_HLEN) 1058 877 return -EINVAL; 1059 878 1060 - data = bpf_test_init(kattr, size, 0, 0); 879 + data = bpf_test_init(kattr, kattr->test.data_size_in, size, 0, 0); 1061 880 if (IS_ERR(data)) 1062 881 return PTR_ERR(data); 1063 882 ··· 1092 911 if (ret < 0) 1093 912 goto out; 1094 913 1095 - ret = bpf_test_finish(kattr, uattr, &flow_keys, sizeof(flow_keys), 1096 - retval, duration); 914 + ret = bpf_test_finish(kattr, uattr, &flow_keys, NULL, 915 + sizeof(flow_keys), retval, duration); 1097 916 if (!ret) 1098 917 ret = bpf_ctx_finish(kattr, uattr, user_ctx, 1099 918 sizeof(struct bpf_flow_keys)); ··· 1197 1016 user_ctx->cookie = sock_gen_cookie(ctx.selected_sk); 1198 1017 } 1199 1018 1200 - ret = bpf_test_finish(kattr, uattr, NULL, 0, retval, duration); 1019 + ret = bpf_test_finish(kattr, uattr, NULL, NULL, 0, retval, duration); 1201 1020 if (!ret) 1202 1021 ret = bpf_ctx_finish(kattr, uattr, user_ctx, sizeof(*user_ctx)); 1203 1022 ··· 1248 1067 kfree(ctx); 1249 1068 return err; 1250 1069 } 1070 + 1071 + static const struct btf_kfunc_id_set bpf_prog_test_kfunc_set = { 1072 + .owner = THIS_MODULE, 1073 + .check_set = &test_sk_check_kfunc_ids, 1074 + .acquire_set = &test_sk_acquire_kfunc_ids, 1075 + .release_set = &test_sk_release_kfunc_ids, 1076 + .ret_null_set = &test_sk_ret_null_kfunc_ids, 1077 + }; 1078 + 1079 + static int __init bpf_prog_test_run_init(void) 1080 + { 1081 + return register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &bpf_prog_test_kfunc_set); 1082 + } 1083 + late_initcall(bpf_prog_test_run_init);

+240 -6

net/core/filter.c

··· 3783 3783 .arg2_type = ARG_ANYTHING, 3784 3784 .arg3_type = ARG_ANYTHING, 3785 3785 }; 3786 + 3787 + BPF_CALL_1(bpf_xdp_get_buff_len, struct xdp_buff*, xdp) 3788 + { 3789 + return xdp_get_buff_len(xdp); 3790 + } 3791 + 3792 + static const struct bpf_func_proto bpf_xdp_get_buff_len_proto = { 3793 + .func = bpf_xdp_get_buff_len, 3794 + .gpl_only = false, 3795 + .ret_type = RET_INTEGER, 3796 + .arg1_type = ARG_PTR_TO_CTX, 3797 + }; 3798 + 3799 + BTF_ID_LIST_SINGLE(bpf_xdp_get_buff_len_bpf_ids, struct, xdp_buff) 3800 + 3801 + const struct bpf_func_proto bpf_xdp_get_buff_len_trace_proto = { 3802 + .func = bpf_xdp_get_buff_len, 3803 + .gpl_only = false, 3804 + .arg1_type = ARG_PTR_TO_BTF_ID, 3805 + .arg1_btf_id = &bpf_xdp_get_buff_len_bpf_ids[0], 3806 + }; 3807 + 3786 3808 static unsigned long xdp_get_metalen(const struct xdp_buff *xdp) 3787 3809 { 3788 3810 return xdp_data_meta_unsupported(xdp) ? 0 : ··· 3839 3817 .arg2_type = ARG_ANYTHING, 3840 3818 }; 3841 3819 3820 + static void bpf_xdp_copy_buf(struct xdp_buff *xdp, unsigned long off, 3821 + void *buf, unsigned long len, bool flush) 3822 + { 3823 + unsigned long ptr_len, ptr_off = 0; 3824 + skb_frag_t *next_frag, *end_frag; 3825 + struct skb_shared_info *sinfo; 3826 + void *src, *dst; 3827 + u8 *ptr_buf; 3828 + 3829 + if (likely(xdp->data_end - xdp->data >= off + len)) { 3830 + src = flush ? buf : xdp->data + off; 3831 + dst = flush ? xdp->data + off : buf; 3832 + memcpy(dst, src, len); 3833 + return; 3834 + } 3835 + 3836 + sinfo = xdp_get_shared_info_from_buff(xdp); 3837 + end_frag = &sinfo->frags[sinfo->nr_frags]; 3838 + next_frag = &sinfo->frags[0]; 3839 + 3840 + ptr_len = xdp->data_end - xdp->data; 3841 + ptr_buf = xdp->data; 3842 + 3843 + while (true) { 3844 + if (off < ptr_off + ptr_len) { 3845 + unsigned long copy_off = off - ptr_off; 3846 + unsigned long copy_len = min(len, ptr_len - copy_off); 3847 + 3848 + src = flush ? buf : ptr_buf + copy_off; 3849 + dst = flush ? ptr_buf + copy_off : buf; 3850 + memcpy(dst, src, copy_len); 3851 + 3852 + off += copy_len; 3853 + len -= copy_len; 3854 + buf += copy_len; 3855 + } 3856 + 3857 + if (!len || next_frag == end_frag) 3858 + break; 3859 + 3860 + ptr_off += ptr_len; 3861 + ptr_buf = skb_frag_address(next_frag); 3862 + ptr_len = skb_frag_size(next_frag); 3863 + next_frag++; 3864 + } 3865 + } 3866 + 3867 + static void *bpf_xdp_pointer(struct xdp_buff *xdp, u32 offset, u32 len) 3868 + { 3869 + struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp); 3870 + u32 size = xdp->data_end - xdp->data; 3871 + void *addr = xdp->data; 3872 + int i; 3873 + 3874 + if (unlikely(offset > 0xffff || len > 0xffff)) 3875 + return ERR_PTR(-EFAULT); 3876 + 3877 + if (offset + len > xdp_get_buff_len(xdp)) 3878 + return ERR_PTR(-EINVAL); 3879 + 3880 + if (offset < size) /* linear area */ 3881 + goto out; 3882 + 3883 + offset -= size; 3884 + for (i = 0; i < sinfo->nr_frags; i++) { /* paged area */ 3885 + u32 frag_size = skb_frag_size(&sinfo->frags[i]); 3886 + 3887 + if (offset < frag_size) { 3888 + addr = skb_frag_address(&sinfo->frags[i]); 3889 + size = frag_size; 3890 + break; 3891 + } 3892 + offset -= frag_size; 3893 + } 3894 + out: 3895 + return offset + len < size ? addr + offset : NULL; 3896 + } 3897 + 3898 + BPF_CALL_4(bpf_xdp_load_bytes, struct xdp_buff *, xdp, u32, offset, 3899 + void *, buf, u32, len) 3900 + { 3901 + void *ptr; 3902 + 3903 + ptr = bpf_xdp_pointer(xdp, offset, len); 3904 + if (IS_ERR(ptr)) 3905 + return PTR_ERR(ptr); 3906 + 3907 + if (!ptr) 3908 + bpf_xdp_copy_buf(xdp, offset, buf, len, false); 3909 + else 3910 + memcpy(buf, ptr, len); 3911 + 3912 + return 0; 3913 + } 3914 + 3915 + static const struct bpf_func_proto bpf_xdp_load_bytes_proto = { 3916 + .func = bpf_xdp_load_bytes, 3917 + .gpl_only = false, 3918 + .ret_type = RET_INTEGER, 3919 + .arg1_type = ARG_PTR_TO_CTX, 3920 + .arg2_type = ARG_ANYTHING, 3921 + .arg3_type = ARG_PTR_TO_UNINIT_MEM, 3922 + .arg4_type = ARG_CONST_SIZE, 3923 + }; 3924 + 3925 + BPF_CALL_4(bpf_xdp_store_bytes, struct xdp_buff *, xdp, u32, offset, 3926 + void *, buf, u32, len) 3927 + { 3928 + void *ptr; 3929 + 3930 + ptr = bpf_xdp_pointer(xdp, offset, len); 3931 + if (IS_ERR(ptr)) 3932 + return PTR_ERR(ptr); 3933 + 3934 + if (!ptr) 3935 + bpf_xdp_copy_buf(xdp, offset, buf, len, true); 3936 + else 3937 + memcpy(ptr, buf, len); 3938 + 3939 + return 0; 3940 + } 3941 + 3942 + static const struct bpf_func_proto bpf_xdp_store_bytes_proto = { 3943 + .func = bpf_xdp_store_bytes, 3944 + .gpl_only = false, 3945 + .ret_type = RET_INTEGER, 3946 + .arg1_type = ARG_PTR_TO_CTX, 3947 + .arg2_type = ARG_ANYTHING, 3948 + .arg3_type = ARG_PTR_TO_UNINIT_MEM, 3949 + .arg4_type = ARG_CONST_SIZE, 3950 + }; 3951 + 3952 + static int bpf_xdp_frags_increase_tail(struct xdp_buff *xdp, int offset) 3953 + { 3954 + struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp); 3955 + skb_frag_t *frag = &sinfo->frags[sinfo->nr_frags - 1]; 3956 + struct xdp_rxq_info *rxq = xdp->rxq; 3957 + unsigned int tailroom; 3958 + 3959 + if (!rxq->frag_size || rxq->frag_size > xdp->frame_sz) 3960 + return -EOPNOTSUPP; 3961 + 3962 + tailroom = rxq->frag_size - skb_frag_size(frag) - skb_frag_off(frag); 3963 + if (unlikely(offset > tailroom)) 3964 + return -EINVAL; 3965 + 3966 + memset(skb_frag_address(frag) + skb_frag_size(frag), 0, offset); 3967 + skb_frag_size_add(frag, offset); 3968 + sinfo->xdp_frags_size += offset; 3969 + 3970 + return 0; 3971 + } 3972 + 3973 + static int bpf_xdp_frags_shrink_tail(struct xdp_buff *xdp, int offset) 3974 + { 3975 + struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp); 3976 + int i, n_frags_free = 0, len_free = 0; 3977 + 3978 + if (unlikely(offset > (int)xdp_get_buff_len(xdp) - ETH_HLEN)) 3979 + return -EINVAL; 3980 + 3981 + for (i = sinfo->nr_frags - 1; i >= 0 && offset > 0; i--) { 3982 + skb_frag_t *frag = &sinfo->frags[i]; 3983 + int shrink = min_t(int, offset, skb_frag_size(frag)); 3984 + 3985 + len_free += shrink; 3986 + offset -= shrink; 3987 + 3988 + if (skb_frag_size(frag) == shrink) { 3989 + struct page *page = skb_frag_page(frag); 3990 + 3991 + __xdp_return(page_address(page), &xdp->rxq->mem, 3992 + false, NULL); 3993 + n_frags_free++; 3994 + } else { 3995 + skb_frag_size_sub(frag, shrink); 3996 + break; 3997 + } 3998 + } 3999 + sinfo->nr_frags -= n_frags_free; 4000 + sinfo->xdp_frags_size -= len_free; 4001 + 4002 + if (unlikely(!sinfo->nr_frags)) { 4003 + xdp_buff_clear_frags_flag(xdp); 4004 + xdp->data_end -= offset; 4005 + } 4006 + 4007 + return 0; 4008 + } 4009 + 3842 4010 BPF_CALL_2(bpf_xdp_adjust_tail, struct xdp_buff *, xdp, int, offset) 3843 4011 { 3844 4012 void *data_hard_end = xdp_data_hard_end(xdp); /* use xdp->frame_sz */ 3845 4013 void *data_end = xdp->data_end + offset; 4014 + 4015 + if (unlikely(xdp_buff_has_frags(xdp))) { /* non-linear xdp buff */ 4016 + if (offset < 0) 4017 + return bpf_xdp_frags_shrink_tail(xdp, -offset); 4018 + 4019 + return bpf_xdp_frags_increase_tail(xdp, offset); 4020 + } 3846 4021 3847 4022 /* Notice that xdp_data_hard_end have reserved some tailroom */ 3848 4023 if (unlikely(data_end > data_hard_end)) ··· 4265 4046 { 4266 4047 struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); 4267 4048 enum bpf_map_type map_type = ri->map_type; 4049 + 4050 + /* XDP_REDIRECT is not fully supported yet for xdp frags since 4051 + * not all XDP capable drivers can map non-linear xdp_frame in 4052 + * ndo_xdp_xmit. 4053 + */ 4054 + if (unlikely(xdp_buff_has_frags(xdp) && 4055 + map_type != BPF_MAP_TYPE_CPUMAP)) 4056 + return -EOPNOTSUPP; 4268 4057 4269 4058 if (map_type == BPF_MAP_TYPE_XSKMAP) 4270 4059 return __xdp_do_redirect_xsk(ri, dev, xdp, xdp_prog); ··· 4817 4590 }; 4818 4591 #endif 4819 4592 4820 - static unsigned long bpf_xdp_copy(void *dst_buff, const void *src_buff, 4593 + static unsigned long bpf_xdp_copy(void *dst, const void *ctx, 4821 4594 unsigned long off, unsigned long len) 4822 4595 { 4823 - memcpy(dst_buff, src_buff + off, len); 4596 + struct xdp_buff *xdp = (struct xdp_buff *)ctx; 4597 + 4598 + bpf_xdp_copy_buf(xdp, off, dst, len, false); 4824 4599 return 0; 4825 4600 } 4826 4601 ··· 4833 4604 4834 4605 if (unlikely(flags & ~(BPF_F_CTXLEN_MASK | BPF_F_INDEX_MASK))) 4835 4606 return -EINVAL; 4836 - if (unlikely(!xdp || 4837 - xdp_size > (unsigned long)(xdp->data_end - xdp->data))) 4607 + 4608 + if (unlikely(!xdp || xdp_size > xdp_get_buff_len(xdp))) 4838 4609 return -EFAULT; 4839 4610 4840 - return bpf_event_output(map, flags, meta, meta_size, xdp->data, 4611 + return bpf_event_output(map, flags, meta, meta_size, xdp, 4841 4612 xdp_size, bpf_xdp_copy); 4842 4613 } 4843 4614 ··· 7762 7533 return &bpf_xdp_redirect_map_proto; 7763 7534 case BPF_FUNC_xdp_adjust_tail: 7764 7535 return &bpf_xdp_adjust_tail_proto; 7536 + case BPF_FUNC_xdp_get_buff_len: 7537 + return &bpf_xdp_get_buff_len_proto; 7538 + case BPF_FUNC_xdp_load_bytes: 7539 + return &bpf_xdp_load_bytes_proto; 7540 + case BPF_FUNC_xdp_store_bytes: 7541 + return &bpf_xdp_store_bytes_proto; 7765 7542 case BPF_FUNC_fib_lookup: 7766 7543 return &bpf_xdp_fib_lookup_proto; 7767 7544 case BPF_FUNC_check_mtu: ··· 10297 10062 .convert_ctx_access = tc_cls_act_convert_ctx_access, 10298 10063 .gen_prologue = tc_cls_act_prologue, 10299 10064 .gen_ld_abs = bpf_gen_ld_abs, 10300 - .check_kfunc_call = bpf_prog_test_check_kfunc_call, 10301 10065 }; 10302 10066 10303 10067 const struct bpf_prog_ops tc_cls_act_prog_ops = {

+1

net/core/net_namespace.c

··· 301 301 302 302 return peer; 303 303 } 304 + EXPORT_SYMBOL_GPL(get_net_ns_by_id); 304 305 305 306 /* 306 307 * setup_net runs the initializers for the network namespace object.

+70 -7

net/core/sock_map.c

··· 1416 1416 return NULL; 1417 1417 } 1418 1418 1419 - static int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog, 1420 - struct bpf_prog *old, u32 which) 1419 + static int sock_map_prog_lookup(struct bpf_map *map, struct bpf_prog ***pprog, 1420 + u32 which) 1421 1421 { 1422 1422 struct sk_psock_progs *progs = sock_map_progs(map); 1423 - struct bpf_prog **pprog; 1424 1423 1425 1424 if (!progs) 1426 1425 return -EOPNOTSUPP; 1427 1426 1428 1427 switch (which) { 1429 1428 case BPF_SK_MSG_VERDICT: 1430 - pprog = &progs->msg_parser; 1429 + *pprog = &progs->msg_parser; 1431 1430 break; 1432 1431 #if IS_ENABLED(CONFIG_BPF_STREAM_PARSER) 1433 1432 case BPF_SK_SKB_STREAM_PARSER: 1434 - pprog = &progs->stream_parser; 1433 + *pprog = &progs->stream_parser; 1435 1434 break; 1436 1435 #endif 1437 1436 case BPF_SK_SKB_STREAM_VERDICT: 1438 1437 if (progs->skb_verdict) 1439 1438 return -EBUSY; 1440 - pprog = &progs->stream_verdict; 1439 + *pprog = &progs->stream_verdict; 1441 1440 break; 1442 1441 case BPF_SK_SKB_VERDICT: 1443 1442 if (progs->stream_verdict) 1444 1443 return -EBUSY; 1445 - pprog = &progs->skb_verdict; 1444 + *pprog = &progs->skb_verdict; 1446 1445 break; 1447 1446 default: 1448 1447 return -EOPNOTSUPP; 1449 1448 } 1449 + 1450 + return 0; 1451 + } 1452 + 1453 + static int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog, 1454 + struct bpf_prog *old, u32 which) 1455 + { 1456 + struct bpf_prog **pprog; 1457 + int ret; 1458 + 1459 + ret = sock_map_prog_lookup(map, &pprog, which); 1460 + if (ret) 1461 + return ret; 1450 1462 1451 1463 if (old) 1452 1464 return psock_replace_prog(pprog, prog, old); 1453 1465 1454 1466 psock_set_prog(pprog, prog); 1455 1467 return 0; 1468 + } 1469 + 1470 + int sock_map_bpf_prog_query(const union bpf_attr *attr, 1471 + union bpf_attr __user *uattr) 1472 + { 1473 + __u32 __user *prog_ids = u64_to_user_ptr(attr->query.prog_ids); 1474 + u32 prog_cnt = 0, flags = 0, ufd = attr->target_fd; 1475 + struct bpf_prog **pprog; 1476 + struct bpf_prog *prog; 1477 + struct bpf_map *map; 1478 + struct fd f; 1479 + u32 id = 0; 1480 + int ret; 1481 + 1482 + if (attr->query.query_flags) 1483 + return -EINVAL; 1484 + 1485 + f = fdget(ufd); 1486 + map = __bpf_map_get(f); 1487 + if (IS_ERR(map)) 1488 + return PTR_ERR(map); 1489 + 1490 + rcu_read_lock(); 1491 + 1492 + ret = sock_map_prog_lookup(map, &pprog, attr->query.attach_type); 1493 + if (ret) 1494 + goto end; 1495 + 1496 + prog = *pprog; 1497 + prog_cnt = !prog ? 0 : 1; 1498 + 1499 + if (!attr->query.prog_cnt || !prog_ids || !prog_cnt) 1500 + goto end; 1501 + 1502 + /* we do not hold the refcnt, the bpf prog may be released 1503 + * asynchronously and the id would be set to 0. 1504 + */ 1505 + id = data_race(prog->aux->id); 1506 + if (id == 0) 1507 + prog_cnt = 0; 1508 + 1509 + end: 1510 + rcu_read_unlock(); 1511 + 1512 + if (copy_to_user(&uattr->query.attach_flags, &flags, sizeof(flags)) || 1513 + (id != 0 && copy_to_user(prog_ids, &id, sizeof(u32))) || 1514 + copy_to_user(&uattr->query.prog_cnt, &prog_cnt, sizeof(prog_cnt))) 1515 + ret = -EFAULT; 1516 + 1517 + fdput(f); 1518 + return ret; 1456 1519 } 1457 1520 1458 1521 static void sock_map_unlink(struct sock *sk, struct sk_psock_link *link)

+72 -6

net/core/xdp.c

··· 162 162 } 163 163 164 164 /* Returns 0 on success, negative on failure */ 165 - int xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq, 166 - struct net_device *dev, u32 queue_index, unsigned int napi_id) 165 + int __xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq, 166 + struct net_device *dev, u32 queue_index, 167 + unsigned int napi_id, u32 frag_size) 167 168 { 168 169 if (!dev) { 169 170 WARN(1, "Missing net_device from driver"); ··· 186 185 xdp_rxq->dev = dev; 187 186 xdp_rxq->queue_index = queue_index; 188 187 xdp_rxq->napi_id = napi_id; 188 + xdp_rxq->frag_size = frag_size; 189 189 190 190 xdp_rxq->reg_state = REG_STATE_REGISTERED; 191 191 return 0; 192 192 } 193 - EXPORT_SYMBOL_GPL(xdp_rxq_info_reg); 193 + EXPORT_SYMBOL_GPL(__xdp_rxq_info_reg); 194 194 195 195 void xdp_rxq_info_unused(struct xdp_rxq_info *xdp_rxq) 196 196 { ··· 371 369 * is used for those calls sites. Thus, allowing for faster recycling 372 370 * of xdp_frames/pages in those cases. 373 371 */ 374 - static void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct, 375 - struct xdp_buff *xdp) 372 + void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct, 373 + struct xdp_buff *xdp) 376 374 { 377 375 struct xdp_mem_allocator *xa; 378 376 struct page *page; ··· 408 406 409 407 void xdp_return_frame(struct xdp_frame *xdpf) 410 408 { 409 + struct skb_shared_info *sinfo; 410 + int i; 411 + 412 + if (likely(!xdp_frame_has_frags(xdpf))) 413 + goto out; 414 + 415 + sinfo = xdp_get_shared_info_from_frame(xdpf); 416 + for (i = 0; i < sinfo->nr_frags; i++) { 417 + struct page *page = skb_frag_page(&sinfo->frags[i]); 418 + 419 + __xdp_return(page_address(page), &xdpf->mem, false, NULL); 420 + } 421 + out: 411 422 __xdp_return(xdpf->data, &xdpf->mem, false, NULL); 412 423 } 413 424 EXPORT_SYMBOL_GPL(xdp_return_frame); 414 425 415 426 void xdp_return_frame_rx_napi(struct xdp_frame *xdpf) 416 427 { 428 + struct skb_shared_info *sinfo; 429 + int i; 430 + 431 + if (likely(!xdp_frame_has_frags(xdpf))) 432 + goto out; 433 + 434 + sinfo = xdp_get_shared_info_from_frame(xdpf); 435 + for (i = 0; i < sinfo->nr_frags; i++) { 436 + struct page *page = skb_frag_page(&sinfo->frags[i]); 437 + 438 + __xdp_return(page_address(page), &xdpf->mem, true, NULL); 439 + } 440 + out: 417 441 __xdp_return(xdpf->data, &xdpf->mem, true, NULL); 418 442 } 419 443 EXPORT_SYMBOL_GPL(xdp_return_frame_rx_napi); ··· 475 447 struct xdp_mem_allocator *xa; 476 448 477 449 if (mem->type != MEM_TYPE_PAGE_POOL) { 478 - __xdp_return(xdpf->data, &xdpf->mem, false, NULL); 450 + xdp_return_frame(xdpf); 479 451 return; 480 452 } 481 453 ··· 494 466 bq->xa = rhashtable_lookup(mem_id_ht, &mem->id, mem_id_rht_params); 495 467 } 496 468 469 + if (unlikely(xdp_frame_has_frags(xdpf))) { 470 + struct skb_shared_info *sinfo; 471 + int i; 472 + 473 + sinfo = xdp_get_shared_info_from_frame(xdpf); 474 + for (i = 0; i < sinfo->nr_frags; i++) { 475 + skb_frag_t *frag = &sinfo->frags[i]; 476 + 477 + bq->q[bq->count++] = skb_frag_address(frag); 478 + if (bq->count == XDP_BULK_QUEUE_SIZE) 479 + xdp_flush_frame_bulk(bq); 480 + } 481 + } 497 482 bq->q[bq->count++] = xdpf->data; 498 483 } 499 484 EXPORT_SYMBOL_GPL(xdp_return_frame_bulk); 500 485 501 486 void xdp_return_buff(struct xdp_buff *xdp) 502 487 { 488 + struct skb_shared_info *sinfo; 489 + int i; 490 + 491 + if (likely(!xdp_buff_has_frags(xdp))) 492 + goto out; 493 + 494 + sinfo = xdp_get_shared_info_from_buff(xdp); 495 + for (i = 0; i < sinfo->nr_frags; i++) { 496 + struct page *page = skb_frag_page(&sinfo->frags[i]); 497 + 498 + __xdp_return(page_address(page), &xdp->rxq->mem, true, xdp); 499 + } 500 + out: 503 501 __xdp_return(xdp->data, &xdp->rxq->mem, true, xdp); 504 502 } 505 503 ··· 615 561 struct sk_buff *skb, 616 562 struct net_device *dev) 617 563 { 564 + struct skb_shared_info *sinfo = xdp_get_shared_info_from_frame(xdpf); 618 565 unsigned int headroom, frame_size; 619 566 void *hard_start; 567 + u8 nr_frags; 568 + 569 + /* xdp frags frame */ 570 + if (unlikely(xdp_frame_has_frags(xdpf))) 571 + nr_frags = sinfo->nr_frags; 620 572 621 573 /* Part of headroom was reserved to xdpf */ 622 574 headroom = sizeof(*xdpf) + xdpf->headroom; ··· 641 581 __skb_put(skb, xdpf->len); 642 582 if (xdpf->metasize) 643 583 skb_metadata_set(skb, xdpf->metasize); 584 + 585 + if (unlikely(xdp_frame_has_frags(xdpf))) 586 + xdp_update_skb_shared_info(skb, nr_frags, 587 + sinfo->xdp_frags_size, 588 + nr_frags * xdpf->frame_sz, 589 + xdp_frame_is_frag_pfmemalloc(xdpf)); 644 590 645 591 /* Essential SKB info: protocol and skb->dev */ 646 592 skb->protocol = eth_type_trans(skb, dev);

+13 -9

net/ipv4/bpf_tcp_ca.c

··· 1 1 // SPDX-License-Identifier: GPL-2.0 2 2 /* Copyright (c) 2019 Facebook */ 3 3 4 + #include <linux/init.h> 4 5 #include <linux/types.h> 5 6 #include <linux/bpf_verifier.h> 6 7 #include <linux/bpf.h> ··· 213 212 } 214 213 } 215 214 216 - BTF_SET_START(bpf_tcp_ca_kfunc_ids) 215 + BTF_SET_START(bpf_tcp_ca_check_kfunc_ids) 217 216 BTF_ID(func, tcp_reno_ssthresh) 218 217 BTF_ID(func, tcp_reno_cong_avoid) 219 218 BTF_ID(func, tcp_reno_undo_cwnd) 220 219 BTF_ID(func, tcp_slow_start) 221 220 BTF_ID(func, tcp_cong_avoid_ai) 222 - BTF_SET_END(bpf_tcp_ca_kfunc_ids) 221 + BTF_SET_END(bpf_tcp_ca_check_kfunc_ids) 223 222 224 - static bool bpf_tcp_ca_check_kfunc_call(u32 kfunc_btf_id, struct module *owner) 225 - { 226 - if (btf_id_set_contains(&bpf_tcp_ca_kfunc_ids, kfunc_btf_id)) 227 - return true; 228 - return bpf_check_mod_kfunc_call(&bpf_tcp_ca_kfunc_list, kfunc_btf_id, owner); 229 - } 223 + static const struct btf_kfunc_id_set bpf_tcp_ca_kfunc_set = { 224 + .owner = THIS_MODULE, 225 + .check_set = &bpf_tcp_ca_check_kfunc_ids, 226 + }; 230 227 231 228 static const struct bpf_verifier_ops bpf_tcp_ca_verifier_ops = { 232 229 .get_func_proto = bpf_tcp_ca_get_func_proto, 233 230 .is_valid_access = bpf_tcp_ca_is_valid_access, 234 231 .btf_struct_access = bpf_tcp_ca_btf_struct_access, 235 - .check_kfunc_call = bpf_tcp_ca_check_kfunc_call, 236 232 }; 237 233 238 234 static int bpf_tcp_ca_init_member(const struct btf_type *t, ··· 298 300 .init = bpf_tcp_ca_init, 299 301 .name = "tcp_congestion_ops", 300 302 }; 303 + 304 + static int __init bpf_tcp_ca_kfunc_init(void) 305 + { 306 + return register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &bpf_tcp_ca_kfunc_set); 307 + } 308 + late_initcall(bpf_tcp_ca_kfunc_init);

+10 -8

net/ipv4/tcp_bbr.c

··· 1154 1154 .set_state = bbr_set_state, 1155 1155 }; 1156 1156 1157 - BTF_SET_START(tcp_bbr_kfunc_ids) 1157 + BTF_SET_START(tcp_bbr_check_kfunc_ids) 1158 1158 #ifdef CONFIG_X86 1159 1159 #ifdef CONFIG_DYNAMIC_FTRACE 1160 1160 BTF_ID(func, bbr_init) ··· 1167 1167 BTF_ID(func, bbr_set_state) 1168 1168 #endif 1169 1169 #endif 1170 - BTF_SET_END(tcp_bbr_kfunc_ids) 1170 + BTF_SET_END(tcp_bbr_check_kfunc_ids) 1171 1171 1172 - static DEFINE_KFUNC_BTF_ID_SET(&tcp_bbr_kfunc_ids, tcp_bbr_kfunc_btf_set); 1172 + static const struct btf_kfunc_id_set tcp_bbr_kfunc_set = { 1173 + .owner = THIS_MODULE, 1174 + .check_set = &tcp_bbr_check_kfunc_ids, 1175 + }; 1173 1176 1174 1177 static int __init bbr_register(void) 1175 1178 { 1176 1179 int ret; 1177 1180 1178 1181 BUILD_BUG_ON(sizeof(struct bbr) > ICSK_CA_PRIV_SIZE); 1179 - ret = tcp_register_congestion_control(&tcp_bbr_cong_ops); 1180 - if (ret) 1182 + 1183 + ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &tcp_bbr_kfunc_set); 1184 + if (ret < 0) 1181 1185 return ret; 1182 - register_kfunc_btf_id_set(&bpf_tcp_ca_kfunc_list, &tcp_bbr_kfunc_btf_set); 1183 - return 0; 1186 + return tcp_register_congestion_control(&tcp_bbr_cong_ops); 1184 1187 } 1185 1188 1186 1189 static void __exit bbr_unregister(void) 1187 1190 { 1188 - unregister_kfunc_btf_id_set(&bpf_tcp_ca_kfunc_list, &tcp_bbr_kfunc_btf_set); 1189 1191 tcp_unregister_congestion_control(&tcp_bbr_cong_ops); 1190 1192 } 1191 1193

+9 -8

net/ipv4/tcp_cubic.c

··· 485 485 .name = "cubic", 486 486 }; 487 487 488 - BTF_SET_START(tcp_cubic_kfunc_ids) 488 + BTF_SET_START(tcp_cubic_check_kfunc_ids) 489 489 #ifdef CONFIG_X86 490 490 #ifdef CONFIG_DYNAMIC_FTRACE 491 491 BTF_ID(func, cubictcp_init) ··· 496 496 BTF_ID(func, cubictcp_acked) 497 497 #endif 498 498 #endif 499 - BTF_SET_END(tcp_cubic_kfunc_ids) 499 + BTF_SET_END(tcp_cubic_check_kfunc_ids) 500 500 501 - static DEFINE_KFUNC_BTF_ID_SET(&tcp_cubic_kfunc_ids, tcp_cubic_kfunc_btf_set); 501 + static const struct btf_kfunc_id_set tcp_cubic_kfunc_set = { 502 + .owner = THIS_MODULE, 503 + .check_set = &tcp_cubic_check_kfunc_ids, 504 + }; 502 505 503 506 static int __init cubictcp_register(void) 504 507 { ··· 537 534 /* divide by bic_scale and by constant Srtt (100ms) */ 538 535 do_div(cube_factor, bic_scale * 10); 539 536 540 - ret = tcp_register_congestion_control(&cubictcp); 541 - if (ret) 537 + ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &tcp_cubic_kfunc_set); 538 + if (ret < 0) 542 539 return ret; 543 - register_kfunc_btf_id_set(&bpf_tcp_ca_kfunc_list, &tcp_cubic_kfunc_btf_set); 544 - return 0; 540 + return tcp_register_congestion_control(&cubictcp); 545 541 } 546 542 547 543 static void __exit cubictcp_unregister(void) 548 544 { 549 - unregister_kfunc_btf_id_set(&bpf_tcp_ca_kfunc_list, &tcp_cubic_kfunc_btf_set); 550 545 tcp_unregister_congestion_control(&cubictcp); 551 546 } 552 547

+10 -8

net/ipv4/tcp_dctcp.c

··· 238 238 .name = "dctcp-reno", 239 239 }; 240 240 241 - BTF_SET_START(tcp_dctcp_kfunc_ids) 241 + BTF_SET_START(tcp_dctcp_check_kfunc_ids) 242 242 #ifdef CONFIG_X86 243 243 #ifdef CONFIG_DYNAMIC_FTRACE 244 244 BTF_ID(func, dctcp_init) ··· 249 249 BTF_ID(func, dctcp_state) 250 250 #endif 251 251 #endif 252 - BTF_SET_END(tcp_dctcp_kfunc_ids) 252 + BTF_SET_END(tcp_dctcp_check_kfunc_ids) 253 253 254 - static DEFINE_KFUNC_BTF_ID_SET(&tcp_dctcp_kfunc_ids, tcp_dctcp_kfunc_btf_set); 254 + static const struct btf_kfunc_id_set tcp_dctcp_kfunc_set = { 255 + .owner = THIS_MODULE, 256 + .check_set = &tcp_dctcp_check_kfunc_ids, 257 + }; 255 258 256 259 static int __init dctcp_register(void) 257 260 { 258 261 int ret; 259 262 260 263 BUILD_BUG_ON(sizeof(struct dctcp) > ICSK_CA_PRIV_SIZE); 261 - ret = tcp_register_congestion_control(&dctcp); 262 - if (ret) 264 + 265 + ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &tcp_dctcp_kfunc_set); 266 + if (ret < 0) 263 267 return ret; 264 - register_kfunc_btf_id_set(&bpf_tcp_ca_kfunc_list, &tcp_dctcp_kfunc_btf_set); 265 - return 0; 268 + return tcp_register_congestion_control(&dctcp); 266 269 } 267 270 268 271 static void __exit dctcp_unregister(void) 269 272 { 270 - unregister_kfunc_btf_id_set(&bpf_tcp_ca_kfunc_list, &tcp_dctcp_kfunc_btf_set); 271 273 tcp_unregister_congestion_control(&dctcp); 272 274 } 273 275

+5

net/netfilter/Makefile

··· 14 14 nf_conntrack-$(CONFIG_NF_CT_PROTO_DCCP) += nf_conntrack_proto_dccp.o 15 15 nf_conntrack-$(CONFIG_NF_CT_PROTO_SCTP) += nf_conntrack_proto_sctp.o 16 16 nf_conntrack-$(CONFIG_NF_CT_PROTO_GRE) += nf_conntrack_proto_gre.o 17 + ifeq ($(CONFIG_NF_CONNTRACK),m) 18 + nf_conntrack-$(CONFIG_DEBUG_INFO_BTF_MODULES) += nf_conntrack_bpf.o 19 + else ifeq ($(CONFIG_NF_CONNTRACK),y) 20 + nf_conntrack-$(CONFIG_DEBUG_INFO_BTF) += nf_conntrack_bpf.o 21 + endif 17 22 18 23 obj-$(CONFIG_NETFILTER) = netfilter.o 19 24

+257

net/netfilter/nf_conntrack_bpf.c

··· 1 + // SPDX-License-Identifier: GPL-2.0-only 2 + /* Unstable Conntrack Helpers for XDP and TC-BPF hook 3 + * 4 + * These are called from the XDP and SCHED_CLS BPF programs. Note that it is 5 + * allowed to break compatibility for these functions since the interface they 6 + * are exposed through to BPF programs is explicitly unstable. 7 + */ 8 + 9 + #include <linux/bpf.h> 10 + #include <linux/btf.h> 11 + #include <linux/types.h> 12 + #include <linux/btf_ids.h> 13 + #include <linux/net_namespace.h> 14 + #include <net/netfilter/nf_conntrack.h> 15 + #include <net/netfilter/nf_conntrack_core.h> 16 + 17 + /* bpf_ct_opts - Options for CT lookup helpers 18 + * 19 + * Members: 20 + * @netns_id - Specify the network namespace for lookup 21 + * Values: 22 + * BPF_F_CURRENT_NETNS (-1) 23 + * Use namespace associated with ctx (xdp_md, __sk_buff) 24 + * [0, S32_MAX] 25 + * Network Namespace ID 26 + * @error - Out parameter, set for any errors encountered 27 + * Values: 28 + * -EINVAL - Passed NULL for bpf_tuple pointer 29 + * -EINVAL - opts->reserved is not 0 30 + * -EINVAL - netns_id is less than -1 31 + * -EINVAL - opts__sz isn't NF_BPF_CT_OPTS_SZ (12) 32 + * -EPROTO - l4proto isn't one of IPPROTO_TCP or IPPROTO_UDP 33 + * -ENONET - No network namespace found for netns_id 34 + * -ENOENT - Conntrack lookup could not find entry for tuple 35 + * -EAFNOSUPPORT - tuple__sz isn't one of sizeof(tuple->ipv4) 36 + * or sizeof(tuple->ipv6) 37 + * @l4proto - Layer 4 protocol 38 + * Values: 39 + * IPPROTO_TCP, IPPROTO_UDP 40 + * @reserved - Reserved member, will be reused for more options in future 41 + * Values: 42 + * 0 43 + */ 44 + struct bpf_ct_opts { 45 + s32 netns_id; 46 + s32 error; 47 + u8 l4proto; 48 + u8 reserved[3]; 49 + }; 50 + 51 + enum { 52 + NF_BPF_CT_OPTS_SZ = 12, 53 + }; 54 + 55 + static struct nf_conn *__bpf_nf_ct_lookup(struct net *net, 56 + struct bpf_sock_tuple *bpf_tuple, 57 + u32 tuple_len, u8 protonum, 58 + s32 netns_id) 59 + { 60 + struct nf_conntrack_tuple_hash *hash; 61 + struct nf_conntrack_tuple tuple; 62 + 63 + if (unlikely(protonum != IPPROTO_TCP && protonum != IPPROTO_UDP)) 64 + return ERR_PTR(-EPROTO); 65 + if (unlikely(netns_id < BPF_F_CURRENT_NETNS)) 66 + return ERR_PTR(-EINVAL); 67 + 68 + memset(&tuple, 0, sizeof(tuple)); 69 + switch (tuple_len) { 70 + case sizeof(bpf_tuple->ipv4): 71 + tuple.src.l3num = AF_INET; 72 + tuple.src.u3.ip = bpf_tuple->ipv4.saddr; 73 + tuple.src.u.tcp.port = bpf_tuple->ipv4.sport; 74 + tuple.dst.u3.ip = bpf_tuple->ipv4.daddr; 75 + tuple.dst.u.tcp.port = bpf_tuple->ipv4.dport; 76 + break; 77 + case sizeof(bpf_tuple->ipv6): 78 + tuple.src.l3num = AF_INET6; 79 + memcpy(tuple.src.u3.ip6, bpf_tuple->ipv6.saddr, sizeof(bpf_tuple->ipv6.saddr)); 80 + tuple.src.u.tcp.port = bpf_tuple->ipv6.sport; 81 + memcpy(tuple.dst.u3.ip6, bpf_tuple->ipv6.daddr, sizeof(bpf_tuple->ipv6.daddr)); 82 + tuple.dst.u.tcp.port = bpf_tuple->ipv6.dport; 83 + break; 84 + default: 85 + return ERR_PTR(-EAFNOSUPPORT); 86 + } 87 + 88 + tuple.dst.protonum = protonum; 89 + 90 + if (netns_id >= 0) { 91 + net = get_net_ns_by_id(net, netns_id); 92 + if (unlikely(!net)) 93 + return ERR_PTR(-ENONET); 94 + } 95 + 96 + hash = nf_conntrack_find_get(net, &nf_ct_zone_dflt, &tuple); 97 + if (netns_id >= 0) 98 + put_net(net); 99 + if (!hash) 100 + return ERR_PTR(-ENOENT); 101 + return nf_ct_tuplehash_to_ctrack(hash); 102 + } 103 + 104 + __diag_push(); 105 + __diag_ignore(GCC, 8, "-Wmissing-prototypes", 106 + "Global functions as their definitions will be in nf_conntrack BTF"); 107 + 108 + /* bpf_xdp_ct_lookup - Lookup CT entry for the given tuple, and acquire a 109 + * reference to it 110 + * 111 + * Parameters: 112 + * @xdp_ctx - Pointer to ctx (xdp_md) in XDP program 113 + * Cannot be NULL 114 + * @bpf_tuple - Pointer to memory representing the tuple to look up 115 + * Cannot be NULL 116 + * @tuple__sz - Length of the tuple structure 117 + * Must be one of sizeof(bpf_tuple->ipv4) or 118 + * sizeof(bpf_tuple->ipv6) 119 + * @opts - Additional options for lookup (documented above) 120 + * Cannot be NULL 121 + * @opts__sz - Length of the bpf_ct_opts structure 122 + * Must be NF_BPF_CT_OPTS_SZ (12) 123 + */ 124 + struct nf_conn * 125 + bpf_xdp_ct_lookup(struct xdp_md *xdp_ctx, struct bpf_sock_tuple *bpf_tuple, 126 + u32 tuple__sz, struct bpf_ct_opts *opts, u32 opts__sz) 127 + { 128 + struct xdp_buff *ctx = (struct xdp_buff *)xdp_ctx; 129 + struct net *caller_net; 130 + struct nf_conn *nfct; 131 + 132 + BUILD_BUG_ON(sizeof(struct bpf_ct_opts) != NF_BPF_CT_OPTS_SZ); 133 + 134 + if (!opts) 135 + return NULL; 136 + if (!bpf_tuple || opts->reserved[0] || opts->reserved[1] || 137 + opts->reserved[2] || opts__sz != NF_BPF_CT_OPTS_SZ) { 138 + opts->error = -EINVAL; 139 + return NULL; 140 + } 141 + caller_net = dev_net(ctx->rxq->dev); 142 + nfct = __bpf_nf_ct_lookup(caller_net, bpf_tuple, tuple__sz, opts->l4proto, 143 + opts->netns_id); 144 + if (IS_ERR(nfct)) { 145 + opts->error = PTR_ERR(nfct); 146 + return NULL; 147 + } 148 + return nfct; 149 + } 150 + 151 + /* bpf_skb_ct_lookup - Lookup CT entry for the given tuple, and acquire a 152 + * reference to it 153 + * 154 + * Parameters: 155 + * @skb_ctx - Pointer to ctx (__sk_buff) in TC program 156 + * Cannot be NULL 157 + * @bpf_tuple - Pointer to memory representing the tuple to look up 158 + * Cannot be NULL 159 + * @tuple__sz - Length of the tuple structure 160 + * Must be one of sizeof(bpf_tuple->ipv4) or 161 + * sizeof(bpf_tuple->ipv6) 162 + * @opts - Additional options for lookup (documented above) 163 + * Cannot be NULL 164 + * @opts__sz - Length of the bpf_ct_opts structure 165 + * Must be NF_BPF_CT_OPTS_SZ (12) 166 + */ 167 + struct nf_conn * 168 + bpf_skb_ct_lookup(struct __sk_buff *skb_ctx, struct bpf_sock_tuple *bpf_tuple, 169 + u32 tuple__sz, struct bpf_ct_opts *opts, u32 opts__sz) 170 + { 171 + struct sk_buff *skb = (struct sk_buff *)skb_ctx; 172 + struct net *caller_net; 173 + struct nf_conn *nfct; 174 + 175 + BUILD_BUG_ON(sizeof(struct bpf_ct_opts) != NF_BPF_CT_OPTS_SZ); 176 + 177 + if (!opts) 178 + return NULL; 179 + if (!bpf_tuple || opts->reserved[0] || opts->reserved[1] || 180 + opts->reserved[2] || opts__sz != NF_BPF_CT_OPTS_SZ) { 181 + opts->error = -EINVAL; 182 + return NULL; 183 + } 184 + caller_net = skb->dev ? dev_net(skb->dev) : sock_net(skb->sk); 185 + nfct = __bpf_nf_ct_lookup(caller_net, bpf_tuple, tuple__sz, opts->l4proto, 186 + opts->netns_id); 187 + if (IS_ERR(nfct)) { 188 + opts->error = PTR_ERR(nfct); 189 + return NULL; 190 + } 191 + return nfct; 192 + } 193 + 194 + /* bpf_ct_release - Release acquired nf_conn object 195 + * 196 + * This must be invoked for referenced PTR_TO_BTF_ID, and the verifier rejects 197 + * the program if any references remain in the program in all of the explored 198 + * states. 199 + * 200 + * Parameters: 201 + * @nf_conn - Pointer to referenced nf_conn object, obtained using 202 + * bpf_xdp_ct_lookup or bpf_skb_ct_lookup. 203 + */ 204 + void bpf_ct_release(struct nf_conn *nfct) 205 + { 206 + if (!nfct) 207 + return; 208 + nf_ct_put(nfct); 209 + } 210 + 211 + __diag_pop() 212 + 213 + BTF_SET_START(nf_ct_xdp_check_kfunc_ids) 214 + BTF_ID(func, bpf_xdp_ct_lookup) 215 + BTF_ID(func, bpf_ct_release) 216 + BTF_SET_END(nf_ct_xdp_check_kfunc_ids) 217 + 218 + BTF_SET_START(nf_ct_tc_check_kfunc_ids) 219 + BTF_ID(func, bpf_skb_ct_lookup) 220 + BTF_ID(func, bpf_ct_release) 221 + BTF_SET_END(nf_ct_tc_check_kfunc_ids) 222 + 223 + BTF_SET_START(nf_ct_acquire_kfunc_ids) 224 + BTF_ID(func, bpf_xdp_ct_lookup) 225 + BTF_ID(func, bpf_skb_ct_lookup) 226 + BTF_SET_END(nf_ct_acquire_kfunc_ids) 227 + 228 + BTF_SET_START(nf_ct_release_kfunc_ids) 229 + BTF_ID(func, bpf_ct_release) 230 + BTF_SET_END(nf_ct_release_kfunc_ids) 231 + 232 + /* Both sets are identical */ 233 + #define nf_ct_ret_null_kfunc_ids nf_ct_acquire_kfunc_ids 234 + 235 + static const struct btf_kfunc_id_set nf_conntrack_xdp_kfunc_set = { 236 + .owner = THIS_MODULE, 237 + .check_set = &nf_ct_xdp_check_kfunc_ids, 238 + .acquire_set = &nf_ct_acquire_kfunc_ids, 239 + .release_set = &nf_ct_release_kfunc_ids, 240 + .ret_null_set = &nf_ct_ret_null_kfunc_ids, 241 + }; 242 + 243 + static const struct btf_kfunc_id_set nf_conntrack_tc_kfunc_set = { 244 + .owner = THIS_MODULE, 245 + .check_set = &nf_ct_tc_check_kfunc_ids, 246 + .acquire_set = &nf_ct_acquire_kfunc_ids, 247 + .release_set = &nf_ct_release_kfunc_ids, 248 + .ret_null_set = &nf_ct_ret_null_kfunc_ids, 249 + }; 250 + 251 + int register_nf_conntrack_bpf(void) 252 + { 253 + int ret; 254 + 255 + ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, &nf_conntrack_xdp_kfunc_set); 256 + return ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &nf_conntrack_tc_kfunc_set); 257 + }

+8

net/netfilter/nf_conntrack_core.c

··· 34 34 #include <linux/rculist_nulls.h> 35 35 36 36 #include <net/netfilter/nf_conntrack.h> 37 + #include <net/netfilter/nf_conntrack_bpf.h> 37 38 #include <net/netfilter/nf_conntrack_l4proto.h> 38 39 #include <net/netfilter/nf_conntrack_expect.h> 39 40 #include <net/netfilter/nf_conntrack_helper.h> ··· 2751 2750 conntrack_gc_work_init(&conntrack_gc_work); 2752 2751 queue_delayed_work(system_power_efficient_wq, &conntrack_gc_work.dwork, HZ); 2753 2752 2753 + ret = register_nf_conntrack_bpf(); 2754 + if (ret < 0) 2755 + goto err_kfunc; 2756 + 2754 2757 return 0; 2755 2758 2759 + err_kfunc: 2760 + cancel_delayed_work_sync(&conntrack_gc_work.dwork); 2761 + nf_conntrack_proto_fini(); 2756 2762 err_proto: 2757 2763 nf_conntrack_seqadj_fini(); 2758 2764 err_seqadj:

+222 -28

net/unix/af_unix.c

··· 3240 3240 return sk; 3241 3241 } 3242 3242 3243 - static struct sock *unix_next_socket(struct seq_file *seq, 3244 - struct sock *sk, 3245 - loff_t *pos) 3243 + static struct sock *unix_get_first(struct seq_file *seq, loff_t *pos) 3246 3244 { 3247 3245 unsigned long bucket = get_bucket(*pos); 3246 + struct sock *sk; 3248 3247 3249 - while (sk > (struct sock *)SEQ_START_TOKEN) { 3250 - sk = sk_next(sk); 3251 - if (!sk) 3252 - goto next_bucket; 3253 - if (sock_net(sk) == seq_file_net(seq)) 3254 - return sk; 3255 - } 3256 - 3257 - do { 3248 + while (bucket < ARRAY_SIZE(unix_socket_table)) { 3258 3249 spin_lock(&unix_table_locks[bucket]); 3250 + 3259 3251 sk = unix_from_bucket(seq, pos); 3260 3252 if (sk) 3261 3253 return sk; 3262 3254 3263 - next_bucket: 3264 - spin_unlock(&unix_table_locks[bucket++]); 3265 - *pos = set_bucket_offset(bucket, 1); 3266 - } while (bucket < ARRAY_SIZE(unix_socket_table)); 3255 + spin_unlock(&unix_table_locks[bucket]); 3256 + 3257 + *pos = set_bucket_offset(++bucket, 1); 3258 + } 3267 3259 3268 3260 return NULL; 3261 + } 3262 + 3263 + static struct sock *unix_get_next(struct seq_file *seq, struct sock *sk, 3264 + loff_t *pos) 3265 + { 3266 + unsigned long bucket = get_bucket(*pos); 3267 + 3268 + for (sk = sk_next(sk); sk; sk = sk_next(sk)) 3269 + if (sock_net(sk) == seq_file_net(seq)) 3270 + return sk; 3271 + 3272 + spin_unlock(&unix_table_locks[bucket]); 3273 + 3274 + *pos = set_bucket_offset(++bucket, 1); 3275 + 3276 + return unix_get_first(seq, pos); 3269 3277 } 3270 3278 3271 3279 static void *unix_seq_start(struct seq_file *seq, loff_t *pos) ··· 3281 3273 if (!*pos) 3282 3274 return SEQ_START_TOKEN; 3283 3275 3284 - if (get_bucket(*pos) >= ARRAY_SIZE(unix_socket_table)) 3285 - return NULL; 3286 - 3287 - return unix_next_socket(seq, NULL, pos); 3276 + return unix_get_first(seq, pos); 3288 3277 } 3289 3278 3290 3279 static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos) 3291 3280 { 3292 3281 ++*pos; 3293 - return unix_next_socket(seq, v, pos); 3282 + 3283 + if (v == SEQ_START_TOKEN) 3284 + return unix_get_first(seq, pos); 3285 + 3286 + return unix_get_next(seq, v, pos); 3294 3287 } 3295 3288 3296 3289 static void unix_seq_stop(struct seq_file *seq, void *v) ··· 3356 3347 }; 3357 3348 3358 3349 #if IS_BUILTIN(CONFIG_UNIX) && defined(CONFIG_BPF_SYSCALL) 3350 + struct bpf_unix_iter_state { 3351 + struct seq_net_private p; 3352 + unsigned int cur_sk; 3353 + unsigned int end_sk; 3354 + unsigned int max_sk; 3355 + struct sock **batch; 3356 + bool st_bucket_done; 3357 + }; 3358 + 3359 3359 struct bpf_iter__unix { 3360 3360 __bpf_md_ptr(struct bpf_iter_meta *, meta); 3361 3361 __bpf_md_ptr(struct unix_sock *, unix_sk); ··· 3383 3365 return bpf_iter_run_prog(prog, &ctx); 3384 3366 } 3385 3367 3368 + static int bpf_iter_unix_hold_batch(struct seq_file *seq, struct sock *start_sk) 3369 + 3370 + { 3371 + struct bpf_unix_iter_state *iter = seq->private; 3372 + unsigned int expected = 1; 3373 + struct sock *sk; 3374 + 3375 + sock_hold(start_sk); 3376 + iter->batch[iter->end_sk++] = start_sk; 3377 + 3378 + for (sk = sk_next(start_sk); sk; sk = sk_next(sk)) { 3379 + if (sock_net(sk) != seq_file_net(seq)) 3380 + continue; 3381 + 3382 + if (iter->end_sk < iter->max_sk) { 3383 + sock_hold(sk); 3384 + iter->batch[iter->end_sk++] = sk; 3385 + } 3386 + 3387 + expected++; 3388 + } 3389 + 3390 + spin_unlock(&unix_table_locks[start_sk->sk_hash]); 3391 + 3392 + return expected; 3393 + } 3394 + 3395 + static void bpf_iter_unix_put_batch(struct bpf_unix_iter_state *iter) 3396 + { 3397 + while (iter->cur_sk < iter->end_sk) 3398 + sock_put(iter->batch[iter->cur_sk++]); 3399 + } 3400 + 3401 + static int bpf_iter_unix_realloc_batch(struct bpf_unix_iter_state *iter, 3402 + unsigned int new_batch_sz) 3403 + { 3404 + struct sock **new_batch; 3405 + 3406 + new_batch = kvmalloc(sizeof(*new_batch) * new_batch_sz, 3407 + GFP_USER | __GFP_NOWARN); 3408 + if (!new_batch) 3409 + return -ENOMEM; 3410 + 3411 + bpf_iter_unix_put_batch(iter); 3412 + kvfree(iter->batch); 3413 + iter->batch = new_batch; 3414 + iter->max_sk = new_batch_sz; 3415 + 3416 + return 0; 3417 + } 3418 + 3419 + static struct sock *bpf_iter_unix_batch(struct seq_file *seq, 3420 + loff_t *pos) 3421 + { 3422 + struct bpf_unix_iter_state *iter = seq->private; 3423 + unsigned int expected; 3424 + bool resized = false; 3425 + struct sock *sk; 3426 + 3427 + if (iter->st_bucket_done) 3428 + *pos = set_bucket_offset(get_bucket(*pos) + 1, 1); 3429 + 3430 + again: 3431 + /* Get a new batch */ 3432 + iter->cur_sk = 0; 3433 + iter->end_sk = 0; 3434 + 3435 + sk = unix_get_first(seq, pos); 3436 + if (!sk) 3437 + return NULL; /* Done */ 3438 + 3439 + expected = bpf_iter_unix_hold_batch(seq, sk); 3440 + 3441 + if (iter->end_sk == expected) { 3442 + iter->st_bucket_done = true; 3443 + return sk; 3444 + } 3445 + 3446 + if (!resized && !bpf_iter_unix_realloc_batch(iter, expected * 3 / 2)) { 3447 + resized = true; 3448 + goto again; 3449 + } 3450 + 3451 + return sk; 3452 + } 3453 + 3454 + static void *bpf_iter_unix_seq_start(struct seq_file *seq, loff_t *pos) 3455 + { 3456 + if (!*pos) 3457 + return SEQ_START_TOKEN; 3458 + 3459 + /* bpf iter does not support lseek, so it always 3460 + * continue from where it was stop()-ped. 3461 + */ 3462 + return bpf_iter_unix_batch(seq, pos); 3463 + } 3464 + 3465 + static void *bpf_iter_unix_seq_next(struct seq_file *seq, void *v, loff_t *pos) 3466 + { 3467 + struct bpf_unix_iter_state *iter = seq->private; 3468 + struct sock *sk; 3469 + 3470 + /* Whenever seq_next() is called, the iter->cur_sk is 3471 + * done with seq_show(), so advance to the next sk in 3472 + * the batch. 3473 + */ 3474 + if (iter->cur_sk < iter->end_sk) 3475 + sock_put(iter->batch[iter->cur_sk++]); 3476 + 3477 + ++*pos; 3478 + 3479 + if (iter->cur_sk < iter->end_sk) 3480 + sk = iter->batch[iter->cur_sk]; 3481 + else 3482 + sk = bpf_iter_unix_batch(seq, pos); 3483 + 3484 + return sk; 3485 + } 3486 + 3386 3487 static int bpf_iter_unix_seq_show(struct seq_file *seq, void *v) 3387 3488 { 3388 3489 struct bpf_iter_meta meta; 3389 3490 struct bpf_prog *prog; 3390 3491 struct sock *sk = v; 3391 3492 uid_t uid; 3493 + bool slow; 3494 + int ret; 3392 3495 3393 3496 if (v == SEQ_START_TOKEN) 3394 3497 return 0; 3395 3498 3499 + slow = lock_sock_fast(sk); 3500 + 3501 + if (unlikely(sk_unhashed(sk))) { 3502 + ret = SEQ_SKIP; 3503 + goto unlock; 3504 + } 3505 + 3396 3506 uid = from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk)); 3397 3507 meta.seq = seq; 3398 3508 prog = bpf_iter_get_info(&meta, false); 3399 - return unix_prog_seq_show(prog, &meta, v, uid); 3509 + ret = unix_prog_seq_show(prog, &meta, v, uid); 3510 + unlock: 3511 + unlock_sock_fast(sk, slow); 3512 + return ret; 3400 3513 } 3401 3514 3402 3515 static void bpf_iter_unix_seq_stop(struct seq_file *seq, void *v) 3403 3516 { 3517 + struct bpf_unix_iter_state *iter = seq->private; 3404 3518 struct bpf_iter_meta meta; 3405 3519 struct bpf_prog *prog; 3406 3520 ··· 3543 3393 (void)unix_prog_seq_show(prog, &meta, v, 0); 3544 3394 } 3545 3395 3546 - unix_seq_stop(seq, v); 3396 + if (iter->cur_sk < iter->end_sk) 3397 + bpf_iter_unix_put_batch(iter); 3547 3398 } 3548 3399 3549 3400 static const struct seq_operations bpf_iter_unix_seq_ops = { 3550 - .start = unix_seq_start, 3551 - .next = unix_seq_next, 3401 + .start = bpf_iter_unix_seq_start, 3402 + .next = bpf_iter_unix_seq_next, 3552 3403 .stop = bpf_iter_unix_seq_stop, 3553 3404 .show = bpf_iter_unix_seq_show, 3554 3405 }; ··· 3598 3447 DEFINE_BPF_ITER_FUNC(unix, struct bpf_iter_meta *meta, 3599 3448 struct unix_sock *unix_sk, uid_t uid) 3600 3449 3450 + #define INIT_BATCH_SZ 16 3451 + 3452 + static int bpf_iter_init_unix(void *priv_data, struct bpf_iter_aux_info *aux) 3453 + { 3454 + struct bpf_unix_iter_state *iter = priv_data; 3455 + int err; 3456 + 3457 + err = bpf_iter_init_seq_net(priv_data, aux); 3458 + if (err) 3459 + return err; 3460 + 3461 + err = bpf_iter_unix_realloc_batch(iter, INIT_BATCH_SZ); 3462 + if (err) { 3463 + bpf_iter_fini_seq_net(priv_data); 3464 + return err; 3465 + } 3466 + 3467 + return 0; 3468 + } 3469 + 3470 + static void bpf_iter_fini_unix(void *priv_data) 3471 + { 3472 + struct bpf_unix_iter_state *iter = priv_data; 3473 + 3474 + bpf_iter_fini_seq_net(priv_data); 3475 + kvfree(iter->batch); 3476 + } 3477 + 3601 3478 static const struct bpf_iter_seq_info unix_seq_info = { 3602 3479 .seq_ops = &bpf_iter_unix_seq_ops, 3603 - .init_seq_private = bpf_iter_init_seq_net, 3604 - .fini_seq_private = bpf_iter_fini_seq_net, 3605 - .seq_priv_size = sizeof(struct seq_net_private), 3480 + .init_seq_private = bpf_iter_init_unix, 3481 + .fini_seq_private = bpf_iter_fini_unix, 3482 + .seq_priv_size = sizeof(struct bpf_unix_iter_state), 3606 3483 }; 3484 + 3485 + static const struct bpf_func_proto * 3486 + bpf_iter_unix_get_func_proto(enum bpf_func_id func_id, 3487 + const struct bpf_prog *prog) 3488 + { 3489 + switch (func_id) { 3490 + case BPF_FUNC_setsockopt: 3491 + return &bpf_sk_setsockopt_proto; 3492 + case BPF_FUNC_getsockopt: 3493 + return &bpf_sk_getsockopt_proto; 3494 + default: 3495 + return NULL; 3496 + } 3497 + } 3607 3498 3608 3499 static struct bpf_iter_reg unix_reg_info = { 3609 3500 .target = "unix", ··· 3654 3461 { offsetof(struct bpf_iter__unix, unix_sk), 3655 3462 PTR_TO_BTF_ID_OR_NULL }, 3656 3463 }, 3464 + .get_func_proto = bpf_iter_unix_get_func_proto, 3657 3465 .seq_info = &unix_seq_info, 3658 3466 }; 3659 3467

+4 -4

samples/bpf/xdp1_user.c

··· 26 26 { 27 27 __u32 curr_prog_id = 0; 28 28 29 - if (bpf_get_link_xdp_id(ifindex, &curr_prog_id, xdp_flags)) { 30 - printf("bpf_get_link_xdp_id failed\n"); 29 + if (bpf_xdp_query_id(ifindex, xdp_flags, &curr_prog_id)) { 30 + printf("bpf_xdp_query_id failed\n"); 31 31 exit(1); 32 32 } 33 33 if (prog_id == curr_prog_id) 34 - bpf_set_link_xdp_fd(ifindex, -1, xdp_flags); 34 + bpf_xdp_detach(ifindex, xdp_flags, NULL); 35 35 else if (!curr_prog_id) 36 36 printf("couldn't find a prog id on a given interface\n"); 37 37 else ··· 143 143 signal(SIGINT, int_exit); 144 144 signal(SIGTERM, int_exit); 145 145 146 - if (bpf_set_link_xdp_fd(ifindex, prog_fd, xdp_flags) < 0) { 146 + if (bpf_xdp_attach(ifindex, prog_fd, xdp_flags, NULL) < 0) { 147 147 printf("link set xdp fd failed\n"); 148 148 return 1; 149 149 }

+4 -4

samples/bpf/xdp_adjust_tail_user.c

··· 34 34 __u32 curr_prog_id = 0; 35 35 36 36 if (ifindex > -1) { 37 - if (bpf_get_link_xdp_id(ifindex, &curr_prog_id, xdp_flags)) { 38 - printf("bpf_get_link_xdp_id failed\n"); 37 + if (bpf_xdp_query_id(ifindex, xdp_flags, &curr_prog_id)) { 38 + printf("bpf_xdp_query_id failed\n"); 39 39 exit(1); 40 40 } 41 41 if (prog_id == curr_prog_id) 42 - bpf_set_link_xdp_fd(ifindex, -1, xdp_flags); 42 + bpf_xdp_detach(ifindex, xdp_flags, NULL); 43 43 else if (!curr_prog_id) 44 44 printf("couldn't find a prog id on a given iface\n"); 45 45 else ··· 173 173 signal(SIGINT, int_exit); 174 174 signal(SIGTERM, int_exit); 175 175 176 - if (bpf_set_link_xdp_fd(ifindex, prog_fd, xdp_flags) < 0) { 176 + if (bpf_xdp_attach(ifindex, prog_fd, xdp_flags, NULL) < 0) { 177 177 printf("link set xdp fd failed\n"); 178 178 return 1; 179 179 }

+2 -2

samples/bpf/xdp_fwd_user.c

··· 33 33 { 34 34 int err; 35 35 36 - err = bpf_set_link_xdp_fd(idx, prog_fd, xdp_flags); 36 + err = bpf_xdp_attach(idx, prog_fd, xdp_flags, NULL); 37 37 if (err < 0) { 38 38 printf("ERROR: failed to attach program to %s\n", name); 39 39 return err; ··· 51 51 { 52 52 int err; 53 53 54 - err = bpf_set_link_xdp_fd(idx, -1, xdp_flags); 54 + err = bpf_xdp_detach(idx, xdp_flags, NULL); 55 55 if (err < 0) 56 56 printf("ERROR: failed to detach program from %s\n", name); 57 57

+5 -5

samples/bpf/xdp_router_ipv4_user.c

··· 43 43 int i = 0; 44 44 45 45 for (i = 0; i < total_ifindex; i++) { 46 - if (bpf_get_link_xdp_id(ifindex_list[i], &prog_id, flags)) { 47 - printf("bpf_get_link_xdp_id on iface %d failed\n", 46 + if (bpf_xdp_query_id(ifindex_list[i], flags, &prog_id)) { 47 + printf("bpf_xdp_query_id on iface %d failed\n", 48 48 ifindex_list[i]); 49 49 exit(1); 50 50 } 51 51 if (prog_id_list[i] == prog_id) 52 - bpf_set_link_xdp_fd(ifindex_list[i], -1, flags); 52 + bpf_xdp_detach(ifindex_list[i], flags, NULL); 53 53 else if (!prog_id) 54 54 printf("couldn't find a prog id on iface %d\n", 55 55 ifindex_list[i]); ··· 716 716 } 717 717 prog_id_list = (__u32 *)calloc(total_ifindex, sizeof(__u32 *)); 718 718 for (i = 0; i < total_ifindex; i++) { 719 - if (bpf_set_link_xdp_fd(ifindex_list[i], prog_fd, flags) < 0) { 719 + if (bpf_xdp_attach(ifindex_list[i], prog_fd, flags, NULL) < 0) { 720 720 printf("link set xdp fd failed\n"); 721 721 int recovery_index = i; 722 722 723 723 for (i = 0; i < recovery_index; i++) 724 - bpf_set_link_xdp_fd(ifindex_list[i], -1, flags); 724 + bpf_xdp_detach(ifindex_list[i], flags, NULL); 725 725 726 726 return 1; 727 727 }

+9 -9

samples/bpf/xdp_rxq_info_user.c

··· 62 62 __u32 curr_prog_id = 0; 63 63 64 64 if (ifindex > -1) { 65 - if (bpf_get_link_xdp_id(ifindex, &curr_prog_id, xdp_flags)) { 66 - printf("bpf_get_link_xdp_id failed\n"); 65 + if (bpf_xdp_query_id(ifindex, xdp_flags, &curr_prog_id)) { 66 + printf("bpf_xdp_query_id failed\n"); 67 67 exit(EXIT_FAIL); 68 68 } 69 69 if (prog_id == curr_prog_id) { 70 70 fprintf(stderr, 71 71 "Interrupted: Removing XDP program on ifindex:%d device:%s\n", 72 72 ifindex, ifname); 73 - bpf_set_link_xdp_fd(ifindex, -1, xdp_flags); 73 + bpf_xdp_detach(ifindex, xdp_flags, NULL); 74 74 } else if (!curr_prog_id) { 75 75 printf("couldn't find a prog id on a given iface\n"); 76 76 } else { ··· 209 209 210 210 static struct record *alloc_record_per_rxq(void) 211 211 { 212 - unsigned int nr_rxqs = bpf_map__def(rx_queue_index_map)->max_entries; 212 + unsigned int nr_rxqs = bpf_map__max_entries(rx_queue_index_map); 213 213 struct record *array; 214 214 215 215 array = calloc(nr_rxqs, sizeof(struct record)); ··· 222 222 223 223 static struct stats_record *alloc_stats_record(void) 224 224 { 225 - unsigned int nr_rxqs = bpf_map__def(rx_queue_index_map)->max_entries; 225 + unsigned int nr_rxqs = bpf_map__max_entries(rx_queue_index_map); 226 226 struct stats_record *rec; 227 227 int i; 228 228 ··· 241 241 242 242 static void free_stats_record(struct stats_record *r) 243 243 { 244 - unsigned int nr_rxqs = bpf_map__def(rx_queue_index_map)->max_entries; 244 + unsigned int nr_rxqs = bpf_map__max_entries(rx_queue_index_map); 245 245 int i; 246 246 247 247 for (i = 0; i < nr_rxqs; i++) ··· 289 289 map_collect_percpu(fd, 0, &rec->stats); 290 290 291 291 fd = bpf_map__fd(rx_queue_index_map); 292 - max_rxqs = bpf_map__def(rx_queue_index_map)->max_entries; 292 + max_rxqs = bpf_map__max_entries(rx_queue_index_map); 293 293 for (i = 0; i < max_rxqs; i++) 294 294 map_collect_percpu(fd, i, &rec->rxq[i]); 295 295 } ··· 335 335 struct stats_record *stats_prev, 336 336 int action, __u32 cfg_opt) 337 337 { 338 - unsigned int nr_rxqs = bpf_map__def(rx_queue_index_map)->max_entries; 338 + unsigned int nr_rxqs = bpf_map__max_entries(rx_queue_index_map); 339 339 unsigned int nr_cpus = bpf_num_possible_cpus(); 340 340 double pps = 0, err = 0; 341 341 struct record *rec, *prev; ··· 582 582 signal(SIGINT, int_exit); 583 583 signal(SIGTERM, int_exit); 584 584 585 - if (bpf_set_link_xdp_fd(ifindex, prog_fd, xdp_flags) < 0) { 585 + if (bpf_xdp_attach(ifindex, prog_fd, xdp_flags, NULL) < 0) { 586 586 fprintf(stderr, "link set xdp fd failed\n"); 587 587 return EXIT_FAIL_XDP; 588 588 }

+4 -4

samples/bpf/xdp_sample_pkts_user.c

··· 30 30 __u32 info_len = sizeof(info); 31 31 int err; 32 32 33 - err = bpf_set_link_xdp_fd(idx, fd, xdp_flags); 33 + err = bpf_xdp_attach(idx, fd, xdp_flags, NULL); 34 34 if (err < 0) { 35 35 printf("ERROR: failed to attach program to %s\n", name); 36 36 return err; ··· 51 51 __u32 curr_prog_id = 0; 52 52 int err = 0; 53 53 54 - err = bpf_get_link_xdp_id(idx, &curr_prog_id, xdp_flags); 54 + err = bpf_xdp_query_id(idx, xdp_flags, &curr_prog_id); 55 55 if (err) { 56 - printf("bpf_get_link_xdp_id failed\n"); 56 + printf("bpf_xdp_query_id failed\n"); 57 57 return err; 58 58 } 59 59 if (prog_id == curr_prog_id) { 60 - err = bpf_set_link_xdp_fd(idx, -1, xdp_flags); 60 + err = bpf_xdp_detach(idx, xdp_flags, NULL); 61 61 if (err < 0) 62 62 printf("ERROR: failed to detach prog from %s\n", name); 63 63 } else if (!curr_prog_id) {

+4 -5

samples/bpf/xdp_sample_user.c

··· 1265 1265 int ret; 1266 1266 1267 1267 if (prog_id) { 1268 - ret = bpf_get_link_xdp_id(ifindex, &cur_prog_id, xdp_flags); 1268 + ret = bpf_xdp_query_id(ifindex, xdp_flags, &cur_prog_id); 1269 1269 if (ret < 0) 1270 1270 return -errno; 1271 1271 ··· 1278 1278 } 1279 1279 } 1280 1280 1281 - return bpf_set_link_xdp_fd(ifindex, -1, xdp_flags); 1281 + return bpf_xdp_detach(ifindex, xdp_flags, NULL); 1282 1282 } 1283 1283 1284 1284 int sample_install_xdp(struct bpf_program *xdp_prog, int ifindex, bool generic, ··· 1295 1295 1296 1296 xdp_flags |= !force ? XDP_FLAGS_UPDATE_IF_NOEXIST : 0; 1297 1297 xdp_flags |= generic ? XDP_FLAGS_SKB_MODE : XDP_FLAGS_DRV_MODE; 1298 - ret = bpf_set_link_xdp_fd(ifindex, bpf_program__fd(xdp_prog), 1299 - xdp_flags); 1298 + ret = bpf_xdp_attach(ifindex, bpf_program__fd(xdp_prog), xdp_flags, NULL); 1300 1299 if (ret < 0) { 1301 1300 ret = -errno; 1302 1301 fprintf(stderr, ··· 1307 1308 return ret; 1308 1309 } 1309 1310 1310 - ret = bpf_get_link_xdp_id(ifindex, &prog_id, xdp_flags); 1311 + ret = bpf_xdp_query_id(ifindex, xdp_flags, &prog_id); 1311 1312 if (ret < 0) { 1312 1313 ret = -errno; 1313 1314 fprintf(stderr,

+5 -5

samples/bpf/xdp_tx_iptunnel_user.c

··· 32 32 __u32 curr_prog_id = 0; 33 33 34 34 if (ifindex > -1) { 35 - if (bpf_get_link_xdp_id(ifindex, &curr_prog_id, xdp_flags)) { 36 - printf("bpf_get_link_xdp_id failed\n"); 35 + if (bpf_xdp_query_id(ifindex, xdp_flags, &curr_prog_id)) { 36 + printf("bpf_xdp_query_id failed\n"); 37 37 exit(1); 38 38 } 39 39 if (prog_id == curr_prog_id) 40 - bpf_set_link_xdp_fd(ifindex, -1, xdp_flags); 40 + bpf_xdp_detach(ifindex, xdp_flags, NULL); 41 41 else if (!curr_prog_id) 42 42 printf("couldn't find a prog id on a given iface\n"); 43 43 else ··· 288 288 } 289 289 } 290 290 291 - if (bpf_set_link_xdp_fd(ifindex, prog_fd, xdp_flags) < 0) { 291 + if (bpf_xdp_attach(ifindex, prog_fd, xdp_flags, NULL) < 0) { 292 292 printf("link set xdp fd failed\n"); 293 293 return 1; 294 294 } ··· 302 302 303 303 poll_stats(kill_after_s); 304 304 305 - bpf_set_link_xdp_fd(ifindex, -1, xdp_flags); 305 + bpf_xdp_detach(ifindex, xdp_flags, NULL); 306 306 307 307 return 0; 308 308 }

+1 -1

samples/bpf/xdpsock_ctrl_proc.c

··· 173 173 unlink(SOCKET_NAME); 174 174 175 175 /* Unset fd for given ifindex */ 176 - err = bpf_set_link_xdp_fd(ifindex, -1, 0); 176 + err = bpf_xdp_detach(ifindex, 0, NULL); 177 177 if (err) { 178 178 fprintf(stderr, "Error when unsetting bpf prog_fd for ifindex(%d)\n", ifindex); 179 179 return err;

+5 -5

samples/bpf/xdpsock_user.c

··· 571 571 { 572 572 u32 curr_prog_id = 0; 573 573 574 - if (bpf_get_link_xdp_id(opt_ifindex, &curr_prog_id, opt_xdp_flags)) { 575 - printf("bpf_get_link_xdp_id failed\n"); 574 + if (bpf_xdp_query_id(opt_ifindex, opt_xdp_flags, &curr_prog_id)) { 575 + printf("bpf_xdp_query_id failed\n"); 576 576 exit(EXIT_FAILURE); 577 577 } 578 578 579 579 if (prog_id == curr_prog_id) 580 - bpf_set_link_xdp_fd(opt_ifindex, -1, opt_xdp_flags); 580 + bpf_xdp_detach(opt_ifindex, opt_xdp_flags, NULL); 581 581 else if (!curr_prog_id) 582 582 printf("couldn't find a prog id on a given interface\n"); 583 583 else ··· 1027 1027 if (ret) 1028 1028 exit_with_error(-ret); 1029 1029 1030 - ret = bpf_get_link_xdp_id(opt_ifindex, &prog_id, opt_xdp_flags); 1030 + ret = bpf_xdp_query_id(opt_ifindex, opt_xdp_flags, &prog_id); 1031 1031 if (ret) 1032 1032 exit_with_error(-ret); 1033 1033 ··· 1760 1760 exit(EXIT_FAILURE); 1761 1761 } 1762 1762 1763 - if (bpf_set_link_xdp_fd(opt_ifindex, prog_fd, opt_xdp_flags) < 0) { 1763 + if (bpf_xdp_attach(opt_ifindex, prog_fd, opt_xdp_flags, NULL) < 0) { 1764 1764 fprintf(stderr, "ERROR: link set xdp fd failed\n"); 1765 1765 exit(EXIT_FAILURE); 1766 1766 }

+2 -2

samples/bpf/xsk_fwd.c

··· 974 974 int i; 975 975 976 976 for (i = 0 ; i < n_ports; i++) 977 - bpf_set_link_xdp_fd(if_nametoindex(port_params[i].iface), -1, 978 - port_params[i].xsk_cfg.xdp_flags); 977 + bpf_xdp_detach(if_nametoindex(port_params[i].iface), 978 + port_params[i].xsk_cfg.xdp_flags, NULL); 979 979 } 980 980 981 981 int main(int argc, char **argv)

+104 -20

scripts/bpf_doc.py

··· 87 87 self.line = '' 88 88 self.helpers = [] 89 89 self.commands = [] 90 + self.desc_unique_helpers = set() 91 + self.define_unique_helpers = [] 92 + self.desc_syscalls = [] 93 + self.enum_syscalls = [] 90 94 91 95 def parse_element(self): 92 96 proto = self.parse_symbol() 93 - desc = self.parse_desc() 94 - ret = self.parse_ret() 97 + desc = self.parse_desc(proto) 98 + ret = self.parse_ret(proto) 95 99 return APIElement(proto=proto, desc=desc, ret=ret) 96 100 97 101 def parse_helper(self): 98 102 proto = self.parse_proto() 99 - desc = self.parse_desc() 100 - ret = self.parse_ret() 103 + desc = self.parse_desc(proto) 104 + ret = self.parse_ret(proto) 101 105 return Helper(proto=proto, desc=desc, ret=ret) 102 106 103 107 def parse_symbol(self): 104 - p = re.compile(' \* ?(.+)$') 108 + p = re.compile(' \* ?(BPF\w+)$') 105 109 capture = p.match(self.line) 106 110 if not capture: 107 111 raise NoSyscallCommandFound ··· 131 127 self.line = self.reader.readline() 132 128 return capture.group(1) 133 129 134 - def parse_desc(self): 130 + def parse_desc(self, proto): 135 131 p = re.compile(' \* ?(?:\t| {5,8})Description$') 136 132 capture = p.match(self.line) 137 133 if not capture: 138 - # Helper can have empty description and we might be parsing another 139 - # attribute: return but do not consume. 140 - return '' 134 + raise Exception("No description section found for " + proto) 141 135 # Description can be several lines, some of them possibly empty, and it 142 136 # stops when another subsection title is met. 143 137 desc = '' 138 + desc_present = False 144 139 while True: 145 140 self.line = self.reader.readline() 146 141 if self.line == ' *\n': ··· 148 145 p = re.compile(' \* ?(?:\t| {5,8})(?:\t| {8})(.*)') 149 146 capture = p.match(self.line) 150 147 if capture: 148 + desc_present = True 151 149 desc += capture.group(1) + '\n' 152 150 else: 153 151 break 152 + 153 + if not desc_present: 154 + raise Exception("No description found for " + proto) 154 155 return desc 155 156 156 - def parse_ret(self): 157 + def parse_ret(self, proto): 157 158 p = re.compile(' \* ?(?:\t| {5,8})Return$') 158 159 capture = p.match(self.line) 159 160 if not capture: 160 - # Helper can have empty retval and we might be parsing another 161 - # attribute: return but do not consume. 162 - return '' 161 + raise Exception("No return section found for " + proto) 163 162 # Return value description can be several lines, some of them possibly 164 163 # empty, and it stops when another subsection title is met. 165 164 ret = '' 165 + ret_present = False 166 166 while True: 167 167 self.line = self.reader.readline() 168 168 if self.line == ' *\n': ··· 174 168 p = re.compile(' \* ?(?:\t| {5,8})(?:\t| {8})(.*)') 175 169 capture = p.match(self.line) 176 170 if capture: 171 + ret_present = True 177 172 ret += capture.group(1) + '\n' 178 173 else: 179 174 break 175 + 176 + if not ret_present: 177 + raise Exception("No return found for " + proto) 180 178 return ret 181 179 182 - def seek_to(self, target, help_message): 180 + def seek_to(self, target, help_message, discard_lines = 1): 183 181 self.reader.seek(0) 184 182 offset = self.reader.read().find(target) 185 183 if offset == -1: 186 184 raise Exception(help_message) 187 185 self.reader.seek(offset) 188 186 self.reader.readline() 189 - self.reader.readline() 187 + for _ in range(discard_lines): 188 + self.reader.readline() 190 189 self.line = self.reader.readline() 191 190 192 - def parse_syscall(self): 191 + def parse_desc_syscall(self): 193 192 self.seek_to('* DOC: eBPF Syscall Commands', 194 193 'Could not find start of eBPF syscall descriptions list') 195 194 while True: 196 195 try: 197 196 command = self.parse_element() 198 197 self.commands.append(command) 198 + self.desc_syscalls.append(command.proto) 199 + 199 200 except NoSyscallCommandFound: 200 201 break 201 202 202 - def parse_helpers(self): 203 + def parse_enum_syscall(self): 204 + self.seek_to('enum bpf_cmd {', 205 + 'Could not find start of bpf_cmd enum', 0) 206 + # Searches for either one or more BPF\w+ enums 207 + bpf_p = re.compile('\s*(BPF\w+)+') 208 + # Searches for an enum entry assigned to another entry, 209 + # for e.g. BPF_PROG_RUN = BPF_PROG_TEST_RUN, which is 210 + # not documented hence should be skipped in check to 211 + # determine if the right number of syscalls are documented 212 + assign_p = re.compile('\s*(BPF\w+)\s*=\s*(BPF\w+)') 213 + bpf_cmd_str = '' 214 + while True: 215 + capture = assign_p.match(self.line) 216 + if capture: 217 + # Skip line if an enum entry is assigned to another entry 218 + self.line = self.reader.readline() 219 + continue 220 + capture = bpf_p.match(self.line) 221 + if capture: 222 + bpf_cmd_str += self.line 223 + else: 224 + break 225 + self.line = self.reader.readline() 226 + # Find the number of occurences of BPF\w+ 227 + self.enum_syscalls = re.findall('(BPF\w+)+', bpf_cmd_str) 228 + 229 + def parse_desc_helpers(self): 203 230 self.seek_to('* Start of BPF helper function descriptions:', 204 231 'Could not find start of eBPF helper descriptions list') 205 232 while True: 206 233 try: 207 234 helper = self.parse_helper() 208 235 self.helpers.append(helper) 236 + proto = helper.proto_break_down() 237 + self.desc_unique_helpers.add(proto['name']) 209 238 except NoHelperFound: 210 239 break 211 240 241 + def parse_define_helpers(self): 242 + # Parse the number of FN(...) in #define __BPF_FUNC_MAPPER to compare 243 + # later with the number of unique function names present in description. 244 + # Note: seek_to(..) discards the first line below the target search text, 245 + # resulting in FN(unspec) being skipped and not added to self.define_unique_helpers. 246 + self.seek_to('#define __BPF_FUNC_MAPPER(FN)', 247 + 'Could not find start of eBPF helper definition list') 248 + # Searches for either one or more FN(\w+) defines or a backslash for newline 249 + p = re.compile('\s*(FN$\w+$)+|\\\\') 250 + fn_defines_str = '' 251 + while True: 252 + capture = p.match(self.line) 253 + if capture: 254 + fn_defines_str += self.line 255 + else: 256 + break 257 + self.line = self.reader.readline() 258 + # Find the number of occurences of FN(\w+) 259 + self.define_unique_helpers = re.findall('FN$\w+$', fn_defines_str) 260 + 212 261 def run(self): 213 - self.parse_syscall() 214 - self.parse_helpers() 262 + self.parse_desc_syscall() 263 + self.parse_enum_syscall() 264 + self.parse_desc_helpers() 265 + self.parse_define_helpers() 215 266 self.reader.close() 216 267 217 268 ############################################################################### ··· 298 235 self.print_one(elem) 299 236 self.print_footer() 300 237 238 + def elem_number_check(self, desc_unique_elem, define_unique_elem, type, instance): 239 + """ 240 + Checks the number of helpers/syscalls documented within the header file 241 + description with those defined as part of enum/macro and raise an 242 + Exception if they don't match. 243 + """ 244 + nr_desc_unique_elem = len(desc_unique_elem) 245 + nr_define_unique_elem = len(define_unique_elem) 246 + if nr_desc_unique_elem != nr_define_unique_elem: 247 + exception_msg = ''' 248 + The number of unique %s in description (%d) doesn\'t match the number of unique %s defined in %s (%d) 249 + ''' % (type, nr_desc_unique_elem, type, instance, nr_define_unique_elem) 250 + if nr_desc_unique_elem < nr_define_unique_elem: 251 + # Function description is parsed until no helper is found (which can be due to 252 + # misformatting). Hence, only print the first missing/misformatted helper/enum. 253 + exception_msg += ''' 254 + The description for %s is not present or formatted correctly. 255 + ''' % (define_unique_elem[nr_desc_unique_elem]) 256 + raise Exception(exception_msg) 301 257 302 258 class PrinterRST(Printer): 303 259 """ ··· 377 295 378 296 print('') 379 297 380 - 381 298 class PrinterHelpersRST(PrinterRST): 382 299 """ 383 300 A printer for dumping collected information about helpers as a ReStructured ··· 386 305 """ 387 306 def __init__(self, parser): 388 307 self.elements = parser.helpers 308 + self.elem_number_check(parser.desc_unique_helpers, parser.define_unique_helpers, 'helper', '__BPF_FUNC_MAPPER') 389 309 390 310 def print_header(self): 391 311 header = '''\ ··· 560 478 """ 561 479 def __init__(self, parser): 562 480 self.elements = parser.commands 481 + self.elem_number_check(parser.desc_syscalls, parser.enum_syscalls, 'syscall', 'bpf_cmd') 563 482 564 483 def print_header(self): 565 484 header = '''\ ··· 592 509 """ 593 510 def __init__(self, parser): 594 511 self.elements = parser.helpers 512 + self.elem_number_check(parser.desc_unique_helpers, parser.define_unique_helpers, 'helper', '__BPF_FUNC_MAPPER') 595 513 596 514 type_fwds = [ 597 515 'struct bpf_fib_lookup',

+1 -1

security/device_cgroup.c

··· 838 838 int rc = BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(type, major, minor, access); 839 839 840 840 if (rc) 841 - return -EPERM; 841 + return rc; 842 842 843 843 #ifdef CONFIG_CGROUP_DEVICE 844 844 return devcgroup_legacy_check_permission(type, major, minor, access);

+1 -1

tools/bpf/bpftool/btf.c

··· 902 902 equal_fn_for_key_as_id, NULL); 903 903 btf_map_table = hashmap__new(hash_fn_for_key_as_id, 904 904 equal_fn_for_key_as_id, NULL); 905 - if (!btf_prog_table || !btf_map_table) { 905 + if (IS_ERR(btf_prog_table) || IS_ERR(btf_map_table)) { 906 906 hashmap__free(btf_prog_table); 907 907 hashmap__free(btf_map_table); 908 908 if (fd >= 0)

+4 -2

tools/bpf/bpftool/cgroup.c

··· 50 50 const char *attach_flags_str, 51 51 int level) 52 52 { 53 + char prog_name[MAX_PROG_FULL_NAME]; 53 54 struct bpf_prog_info info = {}; 54 55 __u32 info_len = sizeof(info); 55 56 int prog_fd; ··· 64 63 return -1; 65 64 } 66 65 66 + get_prog_full_name(&info, prog_fd, prog_name, sizeof(prog_name)); 67 67 if (json_output) { 68 68 jsonw_start_object(json_wtr); 69 69 jsonw_uint_field(json_wtr, "id", info.id); ··· 75 73 jsonw_uint_field(json_wtr, "attach_type", attach_type); 76 74 jsonw_string_field(json_wtr, "attach_flags", 77 75 attach_flags_str); 78 - jsonw_string_field(json_wtr, "name", info.name); 76 + jsonw_string_field(json_wtr, "name", prog_name); 79 77 jsonw_end_object(json_wtr); 80 78 } else { 81 79 printf("%s%-8u ", level ? " " : "", info.id); ··· 83 81 printf("%-15s", attach_type_name[attach_type]); 84 82 else 85 83 printf("type %-10u", attach_type); 86 - printf(" %-15s %-15s\n", attach_flags_str, info.name); 84 + printf(" %-15s %-15s\n", attach_flags_str, prog_name); 87 85 } 88 86 89 87 close(prog_fd);

+44

tools/bpf/bpftool/common.c

··· 24 24 #include <bpf/bpf.h> 25 25 #include <bpf/hashmap.h> 26 26 #include <bpf/libbpf.h> /* libbpf_num_possible_cpus */ 27 + #include <bpf/btf.h> 27 28 28 29 #include "main.h" 29 30 ··· 303 302 return names[BPF_OBJ_UNKNOWN]; 304 303 305 304 return names[type]; 305 + } 306 + 307 + void get_prog_full_name(const struct bpf_prog_info *prog_info, int prog_fd, 308 + char *name_buff, size_t buff_len) 309 + { 310 + const char *prog_name = prog_info->name; 311 + const struct btf_type *func_type; 312 + const struct bpf_func_info finfo; 313 + struct bpf_prog_info info = {}; 314 + __u32 info_len = sizeof(info); 315 + struct btf *prog_btf = NULL; 316 + 317 + if (buff_len <= BPF_OBJ_NAME_LEN || 318 + strlen(prog_info->name) < BPF_OBJ_NAME_LEN - 1) 319 + goto copy_name; 320 + 321 + if (!prog_info->btf_id || prog_info->nr_func_info == 0) 322 + goto copy_name; 323 + 324 + info.nr_func_info = 1; 325 + info.func_info_rec_size = prog_info->func_info_rec_size; 326 + if (info.func_info_rec_size > sizeof(finfo)) 327 + info.func_info_rec_size = sizeof(finfo); 328 + info.func_info = ptr_to_u64(&finfo); 329 + 330 + if (bpf_obj_get_info_by_fd(prog_fd, &info, &info_len)) 331 + goto copy_name; 332 + 333 + prog_btf = btf__load_from_kernel_by_id(info.btf_id); 334 + if (!prog_btf) 335 + goto copy_name; 336 + 337 + func_type = btf__type_by_id(prog_btf, finfo.type_id); 338 + if (!func_type || !btf_is_func(func_type)) 339 + goto copy_name; 340 + 341 + prog_name = btf__name_by_offset(prog_btf, func_type->name_off); 342 + 343 + copy_name: 344 + snprintf(name_buff, buff_len, "%s", prog_name); 345 + 346 + if (prog_btf) 347 + btf__free(prog_btf); 306 348 } 307 349 308 350 int get_fd_type(int fd)

+7 -7

tools/bpf/bpftool/gen.c

··· 227 227 /* only generate definitions for memory-mapped internal maps */ 228 228 if (!bpf_map__is_internal(map)) 229 229 continue; 230 - if (!(bpf_map__def(map)->map_flags & BPF_F_MMAPABLE)) 230 + if (!(bpf_map__map_flags(map) & BPF_F_MMAPABLE)) 231 231 continue; 232 232 233 233 if (!get_map_ident(map, map_ident, sizeof(map_ident))) ··· 468 468 if (!get_map_ident(map, ident, sizeof(ident))) 469 469 continue; 470 470 if (bpf_map__is_internal(map) && 471 - (bpf_map__def(map)->map_flags & BPF_F_MMAPABLE)) 471 + (bpf_map__map_flags(map) & BPF_F_MMAPABLE)) 472 472 printf("\tmunmap(skel->%1$s, %2$zd);\n", 473 473 ident, bpf_map_mmap_sz(map)); 474 474 codegen("\ ··· 536 536 continue; 537 537 538 538 if (!bpf_map__is_internal(map) || 539 - !(bpf_map__def(map)->map_flags & BPF_F_MMAPABLE)) 539 + !(bpf_map__map_flags(map) & BPF_F_MMAPABLE)) 540 540 continue; 541 541 542 542 codegen("\ ··· 600 600 continue; 601 601 602 602 if (!bpf_map__is_internal(map) || 603 - !(bpf_map__def(map)->map_flags & BPF_F_MMAPABLE)) 603 + !(bpf_map__map_flags(map) & BPF_F_MMAPABLE)) 604 604 continue; 605 605 606 - if (bpf_map__def(map)->map_flags & BPF_F_RDONLY_PROG) 606 + if (bpf_map__map_flags(map) & BPF_F_RDONLY_PROG) 607 607 mmap_flags = "PROT_READ"; 608 608 else 609 609 mmap_flags = "PROT_READ | PROT_WRITE"; ··· 927 927 s = (struct bpf_object_skeleton *)calloc(1, sizeof(*s));\n\ 928 928 if (!s) \n\ 929 929 goto err; \n\ 930 - obj->skeleton = s; \n\ 931 930 \n\ 932 931 s->sz = sizeof(*s); \n\ 933 932 s->name = \"%1$s\"; \n\ ··· 961 962 i, bpf_map__name(map), i, ident); 962 963 /* memory-mapped internal maps */ 963 964 if (bpf_map__is_internal(map) && 964 - (bpf_map__def(map)->map_flags & BPF_F_MMAPABLE)) { 965 + (bpf_map__map_flags(map) & BPF_F_MMAPABLE)) { 965 966 printf("\ts->maps[%zu].mmaped = (void **)&obj->%s;\n", 966 967 i, ident); 967 968 } ··· 999 1000 \n\ 1000 1001 s->data = (void *)%2$s__elf_bytes(&s->data_sz); \n\ 1001 1002 \n\ 1003 + obj->skeleton = s; \n\ 1002 1004 return 0; \n\ 1003 1005 err: \n\ 1004 1006 bpf_object__destroy_skeleton(s); \n\

+2 -1

tools/bpf/bpftool/link.c

··· 2 2 /* Copyright (C) 2020 Facebook */ 3 3 4 4 #include <errno.h> 5 + #include <linux/err.h> 5 6 #include <net/if.h> 6 7 #include <stdio.h> 7 8 #include <unistd.h> ··· 307 306 if (show_pinned) { 308 307 link_table = hashmap__new(hash_fn_for_key_as_id, 309 308 equal_fn_for_key_as_id, NULL); 310 - if (!link_table) { 309 + if (IS_ERR(link_table)) { 311 310 p_err("failed to create hashmap for pinned paths"); 312 311 return -1; 313 312 }

+8 -1

tools/bpf/bpftool/main.c

··· 478 478 } 479 479 480 480 if (!legacy_libbpf) { 481 - ret = libbpf_set_strict_mode(LIBBPF_STRICT_ALL); 481 + enum libbpf_strict_mode mode; 482 + 483 + /* Allow legacy map definitions for skeleton generation. 484 + * It will still be rejected if users use LIBBPF_STRICT_ALL 485 + * mode for loading generated skeleton. 486 + */ 487 + mode = (__LIBBPF_STRICT_LAST - 1) & ~LIBBPF_STRICT_MAP_DEFINITIONS; 488 + ret = libbpf_set_strict_mode(mode); 482 489 if (ret) 483 490 p_err("failed to enable libbpf strict mode: %d", ret); 484 491 }

+4

tools/bpf/bpftool/main.h

··· 140 140 int cmd_select(const struct cmd *cmds, int argc, char **argv, 141 141 int (*help)(int argc, char **argv)); 142 142 143 + #define MAX_PROG_FULL_NAME 128 144 + void get_prog_full_name(const struct bpf_prog_info *prog_info, int prog_fd, 145 + char *name_buff, size_t buff_len); 146 + 143 147 int get_fd_type(int fd); 144 148 const char *get_fd_type_name(enum bpf_obj_type type); 145 149 char *get_fdinfo(int fd, const char *key);

+1 -1

tools/bpf/bpftool/map.c

··· 699 699 if (show_pinned) { 700 700 map_table = hashmap__new(hash_fn_for_key_as_id, 701 701 equal_fn_for_key_as_id, NULL); 702 - if (!map_table) { 702 + if (IS_ERR(map_table)) { 703 703 p_err("failed to create hashmap for pinned paths"); 704 704 return -1; 705 705 }

+1 -1

tools/bpf/bpftool/net.c

··· 551 551 if (attach_type == NET_ATTACH_TYPE_XDP_OFFLOAD) 552 552 flags |= XDP_FLAGS_HW_MODE; 553 553 554 - return bpf_set_link_xdp_fd(ifindex, progfd, flags); 554 + return bpf_xdp_attach(ifindex, progfd, flags, NULL); 555 555 } 556 556 557 557 static int do_attach(int argc, char **argv)

+2 -1

tools/bpf/bpftool/pids.c

··· 1 1 // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) 2 2 /* Copyright (C) 2020 Facebook */ 3 3 #include <errno.h> 4 + #include <linux/err.h> 4 5 #include <stdbool.h> 5 6 #include <stdio.h> 6 7 #include <stdlib.h> ··· 102 101 libbpf_print_fn_t default_print; 103 102 104 103 *map = hashmap__new(hash_fn_for_key_as_id, equal_fn_for_key_as_id, NULL); 105 - if (!*map) { 104 + if (IS_ERR(*map)) { 106 105 p_err("failed to create hashmap for PID references"); 107 106 return -1; 108 107 }

+19 -11

tools/bpf/bpftool/prog.c

··· 424 424 free(value); 425 425 } 426 426 427 - static void print_prog_header_json(struct bpf_prog_info *info) 427 + static void print_prog_header_json(struct bpf_prog_info *info, int fd) 428 428 { 429 + char prog_name[MAX_PROG_FULL_NAME]; 430 + 429 431 jsonw_uint_field(json_wtr, "id", info->id); 430 432 if (info->type < ARRAY_SIZE(prog_type_name)) 431 433 jsonw_string_field(json_wtr, "type", ··· 435 433 else 436 434 jsonw_uint_field(json_wtr, "type", info->type); 437 435 438 - if (*info->name) 439 - jsonw_string_field(json_wtr, "name", info->name); 436 + if (*info->name) { 437 + get_prog_full_name(info, fd, prog_name, sizeof(prog_name)); 438 + jsonw_string_field(json_wtr, "name", prog_name); 439 + } 440 440 441 441 jsonw_name(json_wtr, "tag"); 442 442 jsonw_printf(json_wtr, "\"" BPF_TAG_FMT "\"", ··· 459 455 char *memlock; 460 456 461 457 jsonw_start_object(json_wtr); 462 - print_prog_header_json(info); 458 + print_prog_header_json(info, fd); 463 459 print_dev_json(info->ifindex, info->netns_dev, info->netns_ino); 464 460 465 461 if (info->load_time) { ··· 511 507 jsonw_end_object(json_wtr); 512 508 } 513 509 514 - static void print_prog_header_plain(struct bpf_prog_info *info) 510 + static void print_prog_header_plain(struct bpf_prog_info *info, int fd) 515 511 { 512 + char prog_name[MAX_PROG_FULL_NAME]; 513 + 516 514 printf("%u: ", info->id); 517 515 if (info->type < ARRAY_SIZE(prog_type_name)) 518 516 printf("%s ", prog_type_name[info->type]); 519 517 else 520 518 printf("type %u ", info->type); 521 519 522 - if (*info->name) 523 - printf("name %s ", info->name); 520 + if (*info->name) { 521 + get_prog_full_name(info, fd, prog_name, sizeof(prog_name)); 522 + printf("name %s ", prog_name); 523 + } 524 524 525 525 printf("tag "); 526 526 fprint_hex(stdout, info->tag, BPF_TAG_SIZE, ""); ··· 542 534 { 543 535 char *memlock; 544 536 545 - print_prog_header_plain(info); 537 + print_prog_header_plain(info, fd); 546 538 547 539 if (info->load_time) { 548 540 char buf[32]; ··· 649 641 if (show_pinned) { 650 642 prog_table = hashmap__new(hash_fn_for_key_as_id, 651 643 equal_fn_for_key_as_id, NULL); 652 - if (!prog_table) { 644 + if (IS_ERR(prog_table)) { 653 645 p_err("failed to create hashmap for pinned paths"); 654 646 return -1; 655 647 } ··· 980 972 981 973 if (json_output && nb_fds > 1) { 982 974 jsonw_start_object(json_wtr); /* prog object */ 983 - print_prog_header_json(&info); 975 + print_prog_header_json(&info, fds[i]); 984 976 jsonw_name(json_wtr, "insns"); 985 977 } else if (nb_fds > 1) { 986 - print_prog_header_plain(&info); 978 + print_prog_header_plain(&info, fds[i]); 987 979 } 988 980 989 981 err = prog_dump(&info, mode, filepath, opcodes, visual, linum);

+1 -3

tools/bpf/bpftool/struct_ops.c

··· 480 480 static int do_register(int argc, char **argv) 481 481 { 482 482 LIBBPF_OPTS(bpf_object_open_opts, open_opts); 483 - const struct bpf_map_def *def; 484 483 struct bpf_map_info info = {}; 485 484 __u32 info_len = sizeof(info); 486 485 int nr_errs = 0, nr_maps = 0; ··· 509 510 } 510 511 511 512 bpf_object__for_each_map(map, obj) { 512 - def = bpf_map__def(map); 513 - if (def->type != BPF_MAP_TYPE_STRUCT_OPS) 513 + if (bpf_map__type(map) != BPF_MAP_TYPE_STRUCT_OPS) 514 514 continue; 515 515 516 516 link = bpf_map__attach_struct_ops(map);

+4 -2

tools/bpf/resolve_btfids/Makefile

··· 20 20 ARCH = $(HOSTARCH) 21 21 RM ?= rm 22 22 CROSS_COMPILE = 23 + CFLAGS := $(KBUILD_HOSTCFLAGS) 24 + LDFLAGS := $(KBUILD_HOSTLDFLAGS) 23 25 24 26 OUTPUT ?= $(srctree)/tools/bpf/resolve_btfids/ 25 27 ··· 49 47 50 48 $(BPFOBJ): $(wildcard $(LIBBPF_SRC)/*.[ch] $(LIBBPF_SRC)/Makefile) | $(LIBBPF_OUT) 51 49 $(Q)$(MAKE) $(submake_extras) -C $(LIBBPF_SRC) OUTPUT=$(LIBBPF_OUT) \ 52 - DESTDIR=$(LIBBPF_DESTDIR) prefix= \ 50 + DESTDIR=$(LIBBPF_DESTDIR) prefix= EXTRA_CFLAGS="$(CFLAGS)" \ 53 51 $(abspath $@) install_headers 54 52 55 - CFLAGS := -g \ 53 + CFLAGS += -g \ 56 54 -I$(srctree)/tools/include \ 57 55 -I$(srctree)/tools/include/uapi \ 58 56 -I$(LIBBPF_INCLUDE) \

+63

tools/include/uapi/linux/bpf.h

··· 330 330 * *ctx_out*, *data_in* and *data_out* must be NULL. 331 331 * *repeat* must be zero. 332 332 * 333 + * BPF_PROG_RUN is an alias for BPF_PROG_TEST_RUN. 334 + * 333 335 * Return 334 336 * Returns zero on success. On error, -1 is returned and *errno* 335 337 * is set appropriately. ··· 1113 1111 */ 1114 1112 #define BPF_F_SLEEPABLE (1U << 4) 1115 1113 1114 + /* If BPF_F_XDP_HAS_FRAGS is used in BPF_PROG_LOAD command, the loaded program 1115 + * fully support xdp frags. 1116 + */ 1117 + #define BPF_F_XDP_HAS_FRAGS (1U << 5) 1118 + 1116 1119 /* When BPF ldimm64's insn[0].src_reg != 0 then this can have 1117 1120 * the following extensions: 1118 1121 * ··· 1782 1775 * 0 on success, or a negative error in case of failure. 1783 1776 * 1784 1777 * u64 bpf_get_current_pid_tgid(void) 1778 + * Description 1779 + * Get the current pid and tgid. 1785 1780 * Return 1786 1781 * A 64-bit integer containing the current tgid and pid, and 1787 1782 * created as such: ··· 1791 1782 * *current_task*\ **->pid**. 1792 1783 * 1793 1784 * u64 bpf_get_current_uid_gid(void) 1785 + * Description 1786 + * Get the current uid and gid. 1794 1787 * Return 1795 1788 * A 64-bit integer containing the current GID and UID, and 1796 1789 * created as such: *current_gid* **<< 32 \|** *current_uid*. ··· 2267 2256 * The 32-bit hash. 2268 2257 * 2269 2258 * u64 bpf_get_current_task(void) 2259 + * Description 2260 + * Get the current task. 2270 2261 * Return 2271 2262 * A pointer to the current task struct. 2272 2263 * ··· 2382 2369 * indicate that the hash is outdated and to trigger a 2383 2370 * recalculation the next time the kernel tries to access this 2384 2371 * hash or when the **bpf_get_hash_recalc**\ () helper is called. 2372 + * Return 2373 + * void. 2385 2374 * 2386 2375 * long bpf_get_numa_node_id(void) 2387 2376 * Description ··· 2481 2466 * A 8-byte long unique number or 0 if *sk* is NULL. 2482 2467 * 2483 2468 * u32 bpf_get_socket_uid(struct sk_buff *skb) 2469 + * Description 2470 + * Get the owner UID of the socked associated to *skb*. 2484 2471 * Return 2485 2472 * The owner UID of the socket associated to *skb*. If the socket 2486 2473 * is **NULL**, or if it is not a full socket (i.e. if it is a ··· 3257 3240 * The id is returned or 0 in case the id could not be retrieved. 3258 3241 * 3259 3242 * u64 bpf_get_current_cgroup_id(void) 3243 + * Description 3244 + * Get the current cgroup id based on the cgroup within which 3245 + * the current task is running. 3260 3246 * Return 3261 3247 * A 64-bit integer containing the current cgroup id based 3262 3248 * on the cgroup within which the current task is running. ··· 5038 5018 * 5039 5019 * Return 5040 5020 * The number of arguments of the traced function. 5021 + * 5022 + * int bpf_get_retval(void) 5023 + * Description 5024 + * Get the syscall's return value that will be returned to userspace. 5025 + * 5026 + * This helper is currently supported by cgroup programs only. 5027 + * Return 5028 + * The syscall's return value. 5029 + * 5030 + * int bpf_set_retval(int retval) 5031 + * Description 5032 + * Set the syscall's return value that will be returned to userspace. 5033 + * 5034 + * This helper is currently supported by cgroup programs only. 5035 + * Return 5036 + * 0 on success, or a negative error in case of failure. 5037 + * 5038 + * u64 bpf_xdp_get_buff_len(struct xdp_buff *xdp_md) 5039 + * Description 5040 + * Get the total size of a given xdp buff (linear and paged area) 5041 + * Return 5042 + * The total size of a given xdp buffer. 5043 + * 5044 + * long bpf_xdp_load_bytes(struct xdp_buff *xdp_md, u32 offset, void *buf, u32 len) 5045 + * Description 5046 + * This helper is provided as an easy way to load data from a 5047 + * xdp buffer. It can be used to load *len* bytes from *offset* from 5048 + * the frame associated to *xdp_md*, into the buffer pointed by 5049 + * *buf*. 5050 + * Return 5051 + * 0 on success, or a negative error in case of failure. 5052 + * 5053 + * long bpf_xdp_store_bytes(struct xdp_buff *xdp_md, u32 offset, void *buf, u32 len) 5054 + * Description 5055 + * Store *len* bytes from buffer *buf* into the frame 5056 + * associated to *xdp_md*, at *offset*. 5057 + * Return 5058 + * 0 on success, or a negative error in case of failure. 5041 5059 */ 5042 5060 #define __BPF_FUNC_MAPPER(FN) \ 5043 5061 FN(unspec), \ ··· 5264 5206 FN(get_func_arg), \ 5265 5207 FN(get_func_ret), \ 5266 5208 FN(get_func_arg_cnt), \ 5209 + FN(get_retval), \ 5210 + FN(set_retval), \ 5211 + FN(xdp_get_buff_len), \ 5212 + FN(xdp_load_bytes), \ 5213 + FN(xdp_store_bytes), \ 5267 5214 /* */ 5268 5215 5269 5216 /* integer value in 'imm' field of BPF_CALL instruction selects which helper

+7 -2

tools/lib/bpf/bpf.c

··· 754 754 .flags = flags, 755 755 ); 756 756 757 - return bpf_prog_attach_xattr(prog_fd, target_fd, type, &opts); 757 + return bpf_prog_attach_opts(prog_fd, target_fd, type, &opts); 758 758 } 759 759 760 - int bpf_prog_attach_xattr(int prog_fd, int target_fd, 760 + int bpf_prog_attach_opts(int prog_fd, int target_fd, 761 761 enum bpf_attach_type type, 762 762 const struct bpf_prog_attach_opts *opts) 763 763 { ··· 777 777 ret = sys_bpf(BPF_PROG_ATTACH, &attr, sizeof(attr)); 778 778 return libbpf_err_errno(ret); 779 779 } 780 + 781 + __attribute__((alias("bpf_prog_attach_opts"))) 782 + int bpf_prog_attach_xattr(int prog_fd, int target_fd, 783 + enum bpf_attach_type type, 784 + const struct bpf_prog_attach_opts *opts); 780 785 781 786 int bpf_prog_detach(int target_fd, enum bpf_attach_type type) 782 787 {

+4

tools/lib/bpf/bpf.h

··· 391 391 392 392 LIBBPF_API int bpf_prog_attach(int prog_fd, int attachable_fd, 393 393 enum bpf_attach_type type, unsigned int flags); 394 + LIBBPF_API int bpf_prog_attach_opts(int prog_fd, int attachable_fd, 395 + enum bpf_attach_type type, 396 + const struct bpf_prog_attach_opts *opts); 397 + LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_prog_attach_opts() instead") 394 398 LIBBPF_API int bpf_prog_attach_xattr(int prog_fd, int attachable_fd, 395 399 enum bpf_attach_type type, 396 400 const struct bpf_prog_attach_opts *opts);

+1 -1

tools/lib/bpf/bpf_helpers.h

··· 133 133 unsigned int value_size; 134 134 unsigned int max_entries; 135 135 unsigned int map_flags; 136 - }; 136 + } __attribute__((deprecated("use BTF-defined maps in .maps section"))); 137 137 138 138 enum libbpf_pin_type { 139 139 LIBBPF_PIN_NONE,

+30 -1

tools/lib/bpf/btf.c

··· 1620 1620 struct btf_pipe { 1621 1621 const struct btf *src; 1622 1622 struct btf *dst; 1623 + struct hashmap *str_off_map; /* map string offsets from src to dst */ 1623 1624 }; 1624 1625 1625 1626 static int btf_rewrite_str(__u32 *str_off, void *ctx) 1626 1627 { 1627 1628 struct btf_pipe *p = ctx; 1628 - int off; 1629 + void *mapped_off; 1630 + int off, err; 1629 1631 1630 1632 if (!*str_off) /* nothing to do for empty strings */ 1631 1633 return 0; 1632 1634 1635 + if (p->str_off_map && 1636 + hashmap__find(p->str_off_map, (void *)(long)*str_off, &mapped_off)) { 1637 + *str_off = (__u32)(long)mapped_off; 1638 + return 0; 1639 + } 1640 + 1633 1641 off = btf__add_str(p->dst, btf__str_by_offset(p->src, *str_off)); 1634 1642 if (off < 0) 1635 1643 return off; 1644 + 1645 + /* Remember string mapping from src to dst. It avoids 1646 + * performing expensive string comparisons. 1647 + */ 1648 + if (p->str_off_map) { 1649 + err = hashmap__append(p->str_off_map, (void *)(long)*str_off, (void *)(long)off); 1650 + if (err) 1651 + return err; 1652 + } 1636 1653 1637 1654 *str_off = off; 1638 1655 return 0; ··· 1697 1680 return 0; 1698 1681 } 1699 1682 1683 + static size_t btf_dedup_identity_hash_fn(const void *key, void *ctx); 1684 + static bool btf_dedup_equal_fn(const void *k1, const void *k2, void *ctx); 1685 + 1700 1686 int btf__add_btf(struct btf *btf, const struct btf *src_btf) 1701 1687 { 1702 1688 struct btf_pipe p = { .src = src_btf, .dst = btf }; ··· 1731 1711 /* pre-allocate enough memory for type offset index for new types */ 1732 1712 off = btf_add_type_offs_mem(btf, cnt); 1733 1713 if (!off) 1714 + return libbpf_err(-ENOMEM); 1715 + 1716 + /* Map the string offsets from src_btf to the offsets from btf to improve performance */ 1717 + p.str_off_map = hashmap__new(btf_dedup_identity_hash_fn, btf_dedup_equal_fn, NULL); 1718 + if (IS_ERR(p.str_off_map)) 1734 1719 return libbpf_err(-ENOMEM); 1735 1720 1736 1721 /* bulk copy types data for all types from src_btf */ ··· 1779 1754 btf->hdr->str_off += data_sz; 1780 1755 btf->nr_types += cnt; 1781 1756 1757 + hashmap__free(p.str_off_map); 1758 + 1782 1759 /* return type ID of the first added BTF type */ 1783 1760 return btf->start_id + btf->nr_types - cnt; 1784 1761 err_out: ··· 1793 1766 /* and now restore original strings section size; types data size 1794 1767 * wasn't modified, so doesn't need restoring, see big comment above */ 1795 1768 btf->hdr->str_len = old_strs_len; 1769 + 1770 + hashmap__free(p.str_off_map); 1796 1771 1797 1772 return libbpf_err(err); 1798 1773 }

+21 -1

tools/lib/bpf/btf.h

··· 375 375 const struct btf_dump_type_data_opts *opts); 376 376 377 377 /* 378 - * A set of helpers for easier BTF types handling 378 + * A set of helpers for easier BTF types handling. 379 + * 380 + * The inline functions below rely on constants from the kernel headers which 381 + * may not be available for applications including this header file. To avoid 382 + * compilation errors, we define all the constants here that were added after 383 + * the initial introduction of the BTF_KIND* constants. 379 384 */ 385 + #ifndef BTF_KIND_FUNC 386 + #define BTF_KIND_FUNC 12 /* Function */ 387 + #define BTF_KIND_FUNC_PROTO 13 /* Function Proto */ 388 + #endif 389 + #ifndef BTF_KIND_VAR 390 + #define BTF_KIND_VAR 14 /* Variable */ 391 + #define BTF_KIND_DATASEC 15 /* Section */ 392 + #endif 393 + #ifndef BTF_KIND_FLOAT 394 + #define BTF_KIND_FLOAT 16 /* Floating point */ 395 + #endif 396 + /* The kernel header switched to enums, so these two were never #defined */ 397 + #define BTF_KIND_DECL_TAG 17 /* Decl Tag */ 398 + #define BTF_KIND_TYPE_TAG 18 /* Type Tag */ 399 + 380 400 static inline __u16 btf_kind(const struct btf_type *t) 381 401 { 382 402 return BTF_INFO_KIND(t->info);

+1 -2

tools/lib/bpf/hashmap.c

··· 75 75 76 76 void hashmap__free(struct hashmap *map) 77 77 { 78 - if (!map) 78 + if (IS_ERR_OR_NULL(map)) 79 79 return; 80 80 81 81 hashmap__clear(map); ··· 238 238 239 239 return true; 240 240 } 241 -

+19

tools/lib/bpf/libbpf.c

··· 235 235 SEC_SLEEPABLE = 8, 236 236 /* allow non-strict prefix matching */ 237 237 SEC_SLOPPY_PFX = 16, 238 + /* BPF program support non-linear XDP buffer */ 239 + SEC_XDP_FRAGS = 32, 238 240 }; 239 241 240 242 struct bpf_sec_def { ··· 1939 1937 if (obj->efile.maps_shndx < 0) 1940 1938 return 0; 1941 1939 1940 + if (libbpf_mode & LIBBPF_STRICT_MAP_DEFINITIONS) { 1941 + pr_warn("legacy map definitions in SEC(\"maps\") are not supported\n"); 1942 + return -EOPNOTSUPP; 1943 + } 1944 + 1942 1945 if (!symbols) 1943 1946 return -EINVAL; 1944 1947 ··· 2005 1998 i, obj->path); 2006 1999 return -LIBBPF_ERRNO__FORMAT; 2007 2000 } 2001 + 2002 + pr_warn("map '%s' (legacy): legacy map definitions are deprecated, use BTF-defined maps instead\n", map_name); 2008 2003 2009 2004 if (ELF64_ST_BIND(sym->st_info) == STB_LOCAL) { 2010 2005 pr_warn("map '%s' (legacy): static maps are not supported\n", map_name); ··· 4199 4190 return 0; 4200 4191 4201 4192 if (!bpf_map__is_internal(map)) { 4193 + pr_warn("Use of BPF_ANNOTATE_KV_PAIR is deprecated, use BTF-defined maps in .maps section instead\n"); 4202 4194 ret = btf__get_map_kv_tids(obj->btf, map->name, def->key_size, 4203 4195 def->value_size, &key_type_id, 4204 4196 &value_type_id); ··· 6572 6562 if (def & SEC_SLEEPABLE) 6573 6563 opts->prog_flags |= BPF_F_SLEEPABLE; 6574 6564 6565 + if (prog->type == BPF_PROG_TYPE_XDP && (def & SEC_XDP_FRAGS)) 6566 + opts->prog_flags |= BPF_F_XDP_HAS_FRAGS; 6567 + 6575 6568 if ((prog->type == BPF_PROG_TYPE_TRACING || 6576 6569 prog->type == BPF_PROG_TYPE_LSM || 6577 6570 prog->type == BPF_PROG_TYPE_EXT) && !prog->attach_btf_id) { ··· 8613 8600 SEC_DEF("lsm.s/", LSM, BPF_LSM_MAC, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_lsm), 8614 8601 SEC_DEF("iter/", TRACING, BPF_TRACE_ITER, SEC_ATTACH_BTF, attach_iter), 8615 8602 SEC_DEF("syscall", SYSCALL, 0, SEC_SLEEPABLE), 8603 + SEC_DEF("xdp.frags/devmap", XDP, BPF_XDP_DEVMAP, SEC_XDP_FRAGS), 8616 8604 SEC_DEF("xdp_devmap/", XDP, BPF_XDP_DEVMAP, SEC_ATTACHABLE), 8605 + SEC_DEF("xdp.frags/cpumap", XDP, BPF_XDP_CPUMAP, SEC_XDP_FRAGS), 8617 8606 SEC_DEF("xdp_cpumap/", XDP, BPF_XDP_CPUMAP, SEC_ATTACHABLE), 8607 + SEC_DEF("xdp.frags", XDP, BPF_XDP, SEC_XDP_FRAGS), 8618 8608 SEC_DEF("xdp", XDP, BPF_XDP, SEC_ATTACHABLE_OPT | SEC_SLOPPY_PFX), 8619 8609 SEC_DEF("perf_event", PERF_EVENT, 0, SEC_NONE | SEC_SLOPPY_PFX), 8620 8610 SEC_DEF("lwt_in", LWT_IN, 0, SEC_NONE | SEC_SLOPPY_PFX), ··· 11811 11795 11812 11796 void bpf_object__destroy_skeleton(struct bpf_object_skeleton *s) 11813 11797 { 11798 + if (!s) 11799 + return; 11800 + 11814 11801 if (s->progs) 11815 11802 bpf_object__detach_skeleton(s); 11816 11803 if (s->obj)

+31 -1

tools/lib/bpf/libbpf.h

··· 706 706 LIBBPF_API int bpf_map__fd(const struct bpf_map *map); 707 707 LIBBPF_API int bpf_map__reuse_fd(struct bpf_map *map, int fd); 708 708 /* get map definition */ 709 - LIBBPF_API const struct bpf_map_def *bpf_map__def(const struct bpf_map *map); 709 + LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 8, "use appropriate getters or setters instead") 710 + const struct bpf_map_def *bpf_map__def(const struct bpf_map *map); 710 711 /* get map name */ 711 712 LIBBPF_API const char *bpf_map__name(const struct bpf_map *map); 712 713 /* get/set map type */ ··· 833 832 }; 834 833 #define bpf_xdp_set_link_opts__last_field old_fd 835 834 835 + LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_xdp_attach() instead") 836 836 LIBBPF_API int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags); 837 + LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_xdp_attach() instead") 837 838 LIBBPF_API int bpf_set_link_xdp_fd_opts(int ifindex, int fd, __u32 flags, 838 839 const struct bpf_xdp_set_link_opts *opts); 840 + LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_xdp_query_id() instead") 839 841 LIBBPF_API int bpf_get_link_xdp_id(int ifindex, __u32 *prog_id, __u32 flags); 842 + LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_xdp_query() instead") 840 843 LIBBPF_API int bpf_get_link_xdp_info(int ifindex, struct xdp_link_info *info, 841 844 size_t info_size, __u32 flags); 845 + 846 + struct bpf_xdp_attach_opts { 847 + size_t sz; 848 + int old_prog_fd; 849 + size_t :0; 850 + }; 851 + #define bpf_xdp_attach_opts__last_field old_prog_fd 852 + 853 + struct bpf_xdp_query_opts { 854 + size_t sz; 855 + __u32 prog_id; /* output */ 856 + __u32 drv_prog_id; /* output */ 857 + __u32 hw_prog_id; /* output */ 858 + __u32 skb_prog_id; /* output */ 859 + __u8 attach_mode; /* output */ 860 + size_t :0; 861 + }; 862 + #define bpf_xdp_query_opts__last_field attach_mode 863 + 864 + LIBBPF_API int bpf_xdp_attach(int ifindex, int prog_fd, __u32 flags, 865 + const struct bpf_xdp_attach_opts *opts); 866 + LIBBPF_API int bpf_xdp_detach(int ifindex, __u32 flags, 867 + const struct bpf_xdp_attach_opts *opts); 868 + LIBBPF_API int bpf_xdp_query(int ifindex, int flags, struct bpf_xdp_query_opts *opts); 869 + LIBBPF_API int bpf_xdp_query_id(int ifindex, int flags, __u32 *prog_id); 842 870 843 871 /* TC related API */ 844 872 enum bpf_tc_attach_point {

+5

tools/lib/bpf/libbpf.map

··· 247 247 bpf_link_create; 248 248 bpf_link_update; 249 249 bpf_map__set_initial_value; 250 + bpf_prog_attach_opts; 250 251 bpf_program__attach_cgroup; 251 252 bpf_program__attach_lsm; 252 253 bpf_program__is_lsm; ··· 428 427 bpf_program__log_level; 429 428 bpf_program__set_log_buf; 430 429 bpf_program__set_log_level; 430 + bpf_xdp_attach; 431 + bpf_xdp_detach; 432 + bpf_xdp_query; 433 + bpf_xdp_query_id; 431 434 libbpf_probe_bpf_helper; 432 435 libbpf_probe_bpf_map_type; 433 436 libbpf_probe_bpf_prog_type;

+5

tools/lib/bpf/libbpf_legacy.h

··· 73 73 * operation. 74 74 */ 75 75 LIBBPF_STRICT_AUTO_RLIMIT_MEMLOCK = 0x10, 76 + /* 77 + * Error out on any SEC("maps") map definition, which are deprecated 78 + * in favor of BTF-defined map definitions in SEC(".maps"). 79 + */ 80 + LIBBPF_STRICT_MAP_DEFINITIONS = 0x20, 76 81 77 82 __LIBBPF_STRICT_LAST, 78 83 };

+89 -38

tools/lib/bpf/netlink.c

··· 217 217 return libbpf_netlink_send_recv(&req, NULL, NULL, NULL); 218 218 } 219 219 220 + int bpf_xdp_attach(int ifindex, int prog_fd, __u32 flags, const struct bpf_xdp_attach_opts *opts) 221 + { 222 + int old_prog_fd, err; 223 + 224 + if (!OPTS_VALID(opts, bpf_xdp_attach_opts)) 225 + return libbpf_err(-EINVAL); 226 + 227 + old_prog_fd = OPTS_GET(opts, old_prog_fd, 0); 228 + if (old_prog_fd) 229 + flags |= XDP_FLAGS_REPLACE; 230 + else 231 + old_prog_fd = -1; 232 + 233 + err = __bpf_set_link_xdp_fd_replace(ifindex, prog_fd, old_prog_fd, flags); 234 + return libbpf_err(err); 235 + } 236 + 237 + int bpf_xdp_detach(int ifindex, __u32 flags, const struct bpf_xdp_attach_opts *opts) 238 + { 239 + return bpf_xdp_attach(ifindex, -1, flags, opts); 240 + } 241 + 220 242 int bpf_set_link_xdp_fd_opts(int ifindex, int fd, __u32 flags, 221 243 const struct bpf_xdp_set_link_opts *opts) 222 244 { ··· 325 303 return 0; 326 304 } 327 305 328 - int bpf_get_link_xdp_info(int ifindex, struct xdp_link_info *info, 329 - size_t info_size, __u32 flags) 306 + int bpf_xdp_query(int ifindex, int xdp_flags, struct bpf_xdp_query_opts *opts) 330 307 { 331 - struct xdp_id_md xdp_id = {}; 332 - __u32 mask; 333 - int ret; 334 308 struct libbpf_nla_req req = { 335 309 .nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)), 336 310 .nh.nlmsg_type = RTM_GETLINK, 337 311 .nh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST, 338 312 .ifinfo.ifi_family = AF_PACKET, 339 313 }; 314 + struct xdp_id_md xdp_id = {}; 315 + int err; 340 316 341 - if (flags & ~XDP_FLAGS_MASK || !info_size) 317 + if (!OPTS_VALID(opts, bpf_xdp_query_opts)) 318 + return libbpf_err(-EINVAL); 319 + 320 + if (xdp_flags & ~XDP_FLAGS_MASK) 342 321 return libbpf_err(-EINVAL); 343 322 344 323 /* Check whether the single {HW,DRV,SKB} mode is set */ 345 - flags &= (XDP_FLAGS_SKB_MODE | XDP_FLAGS_DRV_MODE | XDP_FLAGS_HW_MODE); 346 - mask = flags - 1; 347 - if (flags && flags & mask) 324 + xdp_flags &= XDP_FLAGS_SKB_MODE | XDP_FLAGS_DRV_MODE | XDP_FLAGS_HW_MODE; 325 + if (xdp_flags & (xdp_flags - 1)) 348 326 return libbpf_err(-EINVAL); 349 327 350 328 xdp_id.ifindex = ifindex; 351 - xdp_id.flags = flags; 329 + xdp_id.flags = xdp_flags; 352 330 353 - ret = libbpf_netlink_send_recv(&req, __dump_link_nlmsg, 331 + err = libbpf_netlink_send_recv(&req, __dump_link_nlmsg, 354 332 get_xdp_info, &xdp_id); 355 - if (!ret) { 356 - size_t sz = min(info_size, sizeof(xdp_id.info)); 333 + if (err) 334 + return libbpf_err(err); 357 335 358 - memcpy(info, &xdp_id.info, sz); 359 - memset((void *) info + sz, 0, info_size - sz); 360 - } 361 - 362 - return libbpf_err(ret); 363 - } 364 - 365 - static __u32 get_xdp_id(struct xdp_link_info *info, __u32 flags) 366 - { 367 - flags &= XDP_FLAGS_MODES; 368 - 369 - if (info->attach_mode != XDP_ATTACHED_MULTI && !flags) 370 - return info->prog_id; 371 - if (flags & XDP_FLAGS_DRV_MODE) 372 - return info->drv_prog_id; 373 - if (flags & XDP_FLAGS_HW_MODE) 374 - return info->hw_prog_id; 375 - if (flags & XDP_FLAGS_SKB_MODE) 376 - return info->skb_prog_id; 336 + OPTS_SET(opts, prog_id, xdp_id.info.prog_id); 337 + OPTS_SET(opts, drv_prog_id, xdp_id.info.drv_prog_id); 338 + OPTS_SET(opts, hw_prog_id, xdp_id.info.hw_prog_id); 339 + OPTS_SET(opts, skb_prog_id, xdp_id.info.skb_prog_id); 340 + OPTS_SET(opts, attach_mode, xdp_id.info.attach_mode); 377 341 378 342 return 0; 379 343 } 380 344 381 - int bpf_get_link_xdp_id(int ifindex, __u32 *prog_id, __u32 flags) 345 + int bpf_get_link_xdp_info(int ifindex, struct xdp_link_info *info, 346 + size_t info_size, __u32 flags) 382 347 { 383 - struct xdp_link_info info; 348 + LIBBPF_OPTS(bpf_xdp_query_opts, opts); 349 + size_t sz; 350 + int err; 351 + 352 + if (!info_size) 353 + return libbpf_err(-EINVAL); 354 + 355 + err = bpf_xdp_query(ifindex, flags, &opts); 356 + if (err) 357 + return libbpf_err(err); 358 + 359 + /* struct xdp_link_info field layout matches struct bpf_xdp_query_opts 360 + * layout after sz field 361 + */ 362 + sz = min(info_size, offsetofend(struct xdp_link_info, attach_mode)); 363 + memcpy(info, &opts.prog_id, sz); 364 + memset((void *)info + sz, 0, info_size - sz); 365 + 366 + return 0; 367 + } 368 + 369 + int bpf_xdp_query_id(int ifindex, int flags, __u32 *prog_id) 370 + { 371 + LIBBPF_OPTS(bpf_xdp_query_opts, opts); 384 372 int ret; 385 373 386 - ret = bpf_get_link_xdp_info(ifindex, &info, sizeof(info), flags); 387 - if (!ret) 388 - *prog_id = get_xdp_id(&info, flags); 374 + ret = bpf_xdp_query(ifindex, flags, &opts); 375 + if (ret) 376 + return libbpf_err(ret); 389 377 390 - return libbpf_err(ret); 378 + flags &= XDP_FLAGS_MODES; 379 + 380 + if (opts.attach_mode != XDP_ATTACHED_MULTI && !flags) 381 + *prog_id = opts.prog_id; 382 + else if (flags & XDP_FLAGS_DRV_MODE) 383 + *prog_id = opts.drv_prog_id; 384 + else if (flags & XDP_FLAGS_HW_MODE) 385 + *prog_id = opts.hw_prog_id; 386 + else if (flags & XDP_FLAGS_SKB_MODE) 387 + *prog_id = opts.skb_prog_id; 388 + else 389 + *prog_id = 0; 390 + 391 + return 0; 392 + } 393 + 394 + 395 + int bpf_get_link_xdp_id(int ifindex, __u32 *prog_id, __u32 flags) 396 + { 397 + return bpf_xdp_query_id(ifindex, flags, prog_id); 391 398 } 392 399 393 400 typedef int (*qdisc_config_t)(struct libbpf_nla_req *req);

+27 -37

tools/perf/util/bpf-loader.c

··· 1005 1005 { 1006 1006 struct bpf_map_op *op; 1007 1007 const char *map_name = bpf_map__name(map); 1008 - const struct bpf_map_def *def = bpf_map__def(map); 1009 1008 1010 - if (IS_ERR(def)) { 1011 - pr_debug("Unable to get map definition from '%s'\n", 1012 - map_name); 1009 + if (!map) { 1010 + pr_debug("Map '%s' is invalid\n", map_name); 1013 1011 return -BPF_LOADER_ERRNO__INTERNAL; 1014 1012 } 1015 1013 1016 - if (def->type != BPF_MAP_TYPE_ARRAY) { 1014 + if (bpf_map__type(map) != BPF_MAP_TYPE_ARRAY) { 1017 1015 pr_debug("Map %s type is not BPF_MAP_TYPE_ARRAY\n", 1018 1016 map_name); 1019 1017 return -BPF_LOADER_ERRNO__OBJCONF_MAP_TYPE; 1020 1018 } 1021 - if (def->key_size < sizeof(unsigned int)) { 1019 + if (bpf_map__key_size(map) < sizeof(unsigned int)) { 1022 1020 pr_debug("Map %s has incorrect key size\n", map_name); 1023 1021 return -BPF_LOADER_ERRNO__OBJCONF_MAP_KEYSIZE; 1024 1022 } 1025 - switch (def->value_size) { 1023 + switch (bpf_map__value_size(map)) { 1026 1024 case 1: 1027 1025 case 2: 1028 1026 case 4: ··· 1062 1064 struct parse_events_term *term, 1063 1065 struct evlist *evlist) 1064 1066 { 1065 - const struct bpf_map_def *def; 1066 1067 struct bpf_map_op *op; 1067 1068 const char *map_name = bpf_map__name(map); 1068 1069 struct evsel *evsel = evlist__find_evsel_by_str(evlist, term->val.str); ··· 1072 1075 return -BPF_LOADER_ERRNO__OBJCONF_MAP_NOEVT; 1073 1076 } 1074 1077 1075 - def = bpf_map__def(map); 1076 - if (IS_ERR(def)) { 1077 - pr_debug("Unable to get map definition from '%s'\n", 1078 - map_name); 1079 - return PTR_ERR(def); 1078 + if (!map) { 1079 + pr_debug("Map '%s' is invalid\n", map_name); 1080 + return PTR_ERR(map); 1080 1081 } 1081 1082 1082 1083 /* 1083 1084 * No need to check key_size and value_size: 1084 1085 * kernel has already checked them. 1085 1086 */ 1086 - if (def->type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) { 1087 + if (bpf_map__type(map) != BPF_MAP_TYPE_PERF_EVENT_ARRAY) { 1087 1088 pr_debug("Map %s type is not BPF_MAP_TYPE_PERF_EVENT_ARRAY\n", 1088 1089 map_name); 1089 1090 return -BPF_LOADER_ERRNO__OBJCONF_MAP_TYPE; ··· 1130 1135 const char *map_name) 1131 1136 { 1132 1137 struct parse_events_array *array = &term->array; 1133 - const struct bpf_map_def *def; 1134 1138 unsigned int i; 1135 1139 1136 1140 if (!array->nr_ranges) ··· 1140 1146 return -BPF_LOADER_ERRNO__INTERNAL; 1141 1147 } 1142 1148 1143 - def = bpf_map__def(map); 1144 - if (IS_ERR(def)) { 1145 - pr_debug("ERROR: Unable to get map definition from '%s'\n", 1146 - map_name); 1149 + if (!map) { 1150 + pr_debug("Map '%s' is invalid\n", map_name); 1147 1151 return -BPF_LOADER_ERRNO__INTERNAL; 1148 1152 } 1149 1153 ··· 1150 1158 size_t length = array->ranges[i].length; 1151 1159 unsigned int idx = start + length - 1; 1152 1160 1153 - if (idx >= def->max_entries) { 1161 + if (idx >= bpf_map__max_entries(map)) { 1154 1162 pr_debug("ERROR: index %d too large\n", idx); 1155 1163 return -BPF_LOADER_ERRNO__OBJCONF_MAP_IDX2BIG; 1156 1164 } ··· 1243 1251 } 1244 1252 1245 1253 typedef int (*map_config_func_t)(const char *name, int map_fd, 1246 - const struct bpf_map_def *pdef, 1254 + const struct bpf_map *map, 1247 1255 struct bpf_map_op *op, 1248 1256 void *pkey, void *arg); 1249 1257 1250 1258 static int 1251 1259 foreach_key_array_all(map_config_func_t func, 1252 1260 void *arg, const char *name, 1253 - int map_fd, const struct bpf_map_def *pdef, 1261 + int map_fd, const struct bpf_map *map, 1254 1262 struct bpf_map_op *op) 1255 1263 { 1256 1264 unsigned int i; 1257 1265 int err; 1258 1266 1259 - for (i = 0; i < pdef->max_entries; i++) { 1260 - err = func(name, map_fd, pdef, op, &i, arg); 1267 + for (i = 0; i < bpf_map__max_entries(map); i++) { 1268 + err = func(name, map_fd, map, op, &i, arg); 1261 1269 if (err) { 1262 1270 pr_debug("ERROR: failed to insert value to %s[%u]\n", 1263 1271 name, i); ··· 1270 1278 static int 1271 1279 foreach_key_array_ranges(map_config_func_t func, void *arg, 1272 1280 const char *name, int map_fd, 1273 - const struct bpf_map_def *pdef, 1281 + const struct bpf_map *map, 1274 1282 struct bpf_map_op *op) 1275 1283 { 1276 1284 unsigned int i, j; ··· 1283 1291 for (j = 0; j < length; j++) { 1284 1292 unsigned int idx = start + j; 1285 1293 1286 - err = func(name, map_fd, pdef, op, &idx, arg); 1294 + err = func(name, map_fd, map, op, &idx, arg); 1287 1295 if (err) { 1288 1296 pr_debug("ERROR: failed to insert value to %s[%u]\n", 1289 1297 name, idx); ··· 1299 1307 map_config_func_t func, 1300 1308 void *arg) 1301 1309 { 1302 - int err, map_fd; 1310 + int err, map_fd, type; 1303 1311 struct bpf_map_op *op; 1304 - const struct bpf_map_def *def; 1305 1312 const char *name = bpf_map__name(map); 1306 1313 struct bpf_map_priv *priv = bpf_map__priv(map); 1307 1314 ··· 1313 1322 return 0; 1314 1323 } 1315 1324 1316 - def = bpf_map__def(map); 1317 - if (IS_ERR(def)) { 1318 - pr_debug("ERROR: failed to get definition from map %s\n", name); 1325 + if (!map) { 1326 + pr_debug("Map '%s' is invalid\n", name); 1319 1327 return -BPF_LOADER_ERRNO__INTERNAL; 1320 1328 } 1321 1329 map_fd = bpf_map__fd(map); ··· 1323 1333 return map_fd; 1324 1334 } 1325 1335 1336 + type = bpf_map__type(map); 1326 1337 list_for_each_entry(op, &priv->ops_list, list) { 1327 - switch (def->type) { 1338 + switch (type) { 1328 1339 case BPF_MAP_TYPE_ARRAY: 1329 1340 case BPF_MAP_TYPE_PERF_EVENT_ARRAY: 1330 1341 switch (op->key_type) { 1331 1342 case BPF_MAP_KEY_ALL: 1332 1343 err = foreach_key_array_all(func, arg, name, 1333 - map_fd, def, op); 1344 + map_fd, map, op); 1334 1345 break; 1335 1346 case BPF_MAP_KEY_RANGES: 1336 1347 err = foreach_key_array_ranges(func, arg, name, 1337 - map_fd, def, 1338 - op); 1348 + map_fd, map, op); 1339 1349 break; 1340 1350 default: 1341 1351 pr_debug("ERROR: keytype for map '%s' invalid\n", ··· 1444 1454 1445 1455 static int 1446 1456 apply_obj_config_map_for_key(const char *name, int map_fd, 1447 - const struct bpf_map_def *pdef, 1457 + const struct bpf_map *map, 1448 1458 struct bpf_map_op *op, 1449 1459 void *pkey, void *arg __maybe_unused) 1450 1460 { ··· 1453 1463 switch (op->op_type) { 1454 1464 case BPF_MAP_OP_SET_VALUE: 1455 1465 err = apply_config_value_for_key(map_fd, pkey, 1456 - pdef->value_size, 1466 + bpf_map__value_size(map), 1457 1467 op->v.value); 1458 1468 break; 1459 1469 case BPF_MAP_OP_SET_EVSEL:

+14 -14

tools/perf/util/bpf_map.c

··· 9 9 #include <stdlib.h> 10 10 #include <unistd.h> 11 11 12 - static bool bpf_map_def__is_per_cpu(const struct bpf_map_def *def) 12 + static bool bpf_map__is_per_cpu(enum bpf_map_type type) 13 13 { 14 - return def->type == BPF_MAP_TYPE_PERCPU_HASH || 15 - def->type == BPF_MAP_TYPE_PERCPU_ARRAY || 16 - def->type == BPF_MAP_TYPE_LRU_PERCPU_HASH || 17 - def->type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE; 14 + return type == BPF_MAP_TYPE_PERCPU_HASH || 15 + type == BPF_MAP_TYPE_PERCPU_ARRAY || 16 + type == BPF_MAP_TYPE_LRU_PERCPU_HASH || 17 + type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE; 18 18 } 19 19 20 - static void *bpf_map_def__alloc_value(const struct bpf_map_def *def) 20 + static void *bpf_map__alloc_value(const struct bpf_map *map) 21 21 { 22 - if (bpf_map_def__is_per_cpu(def)) 23 - return malloc(round_up(def->value_size, 8) * sysconf(_SC_NPROCESSORS_CONF)); 22 + if (bpf_map__is_per_cpu(bpf_map__type(map))) 23 + return malloc(round_up(bpf_map__value_size(map), 8) * 24 + sysconf(_SC_NPROCESSORS_CONF)); 24 25 25 - return malloc(def->value_size); 26 + return malloc(bpf_map__value_size(map)); 26 27 } 27 28 28 29 int bpf_map__fprintf(struct bpf_map *map, FILE *fp) 29 30 { 30 - const struct bpf_map_def *def = bpf_map__def(map); 31 31 void *prev_key = NULL, *key, *value; 32 32 int fd = bpf_map__fd(map), err; 33 33 int printed = 0; ··· 35 35 if (fd < 0) 36 36 return fd; 37 37 38 - if (IS_ERR(def)) 39 - return PTR_ERR(def); 38 + if (!map) 39 + return PTR_ERR(map); 40 40 41 41 err = -ENOMEM; 42 - key = malloc(def->key_size); 42 + key = malloc(bpf_map__key_size(map)); 43 43 if (key == NULL) 44 44 goto out; 45 45 46 - value = bpf_map_def__alloc_value(def); 46 + value = bpf_map__alloc_value(map); 47 47 if (value == NULL) 48 48 goto out_free_key; 49 49

+2 -2

tools/testing/selftests/bpf/Makefile

··· 21 21 22 22 BPF_GCC ?= $(shell command -v bpf-gcc;) 23 23 SAN_CFLAGS ?= 24 - CFLAGS += -g -O0 -rdynamic -Wall $(GENFLAGS) $(SAN_CFLAGS) \ 24 + CFLAGS += -g -O0 -rdynamic -Wall -Werror $(GENFLAGS) $(SAN_CFLAGS) \ 25 25 -I$(CURDIR) -I$(INCLUDE_DIR) -I$(GENDIR) -I$(LIBDIR) \ 26 26 -I$(TOOLSINCDIR) -I$(APIDIR) -I$(OUTPUT) 27 27 LDFLAGS += $(SAN_CFLAGS) ··· 292 292 MENDIAN=$(if $(IS_LITTLE_ENDIAN),-mlittle-endian,-mbig-endian) 293 293 294 294 CLANG_SYS_INCLUDES = $(call get_sys_includes,$(CLANG)) 295 - BPF_CFLAGS = -g -D__TARGET_ARCH_$(SRCARCH) $(MENDIAN) \ 295 + BPF_CFLAGS = -g -Werror -D__TARGET_ARCH_$(SRCARCH) $(MENDIAN) \ 296 296 -I$(INCLUDE_DIR) -I$(CURDIR) -I$(APIDIR) \ 297 297 -I$(abspath $(OUTPUT)/../usr/include) 298 298

+13 -8

tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c

··· 109 109 .write = bpf_testmod_test_write, 110 110 }; 111 111 112 - BTF_SET_START(bpf_testmod_kfunc_ids) 112 + BTF_SET_START(bpf_testmod_check_kfunc_ids) 113 113 BTF_ID(func, bpf_testmod_test_mod_kfunc) 114 - BTF_SET_END(bpf_testmod_kfunc_ids) 114 + BTF_SET_END(bpf_testmod_check_kfunc_ids) 115 115 116 - static DEFINE_KFUNC_BTF_ID_SET(&bpf_testmod_kfunc_ids, bpf_testmod_kfunc_btf_set); 116 + static const struct btf_kfunc_id_set bpf_testmod_kfunc_set = { 117 + .owner = THIS_MODULE, 118 + .check_set = &bpf_testmod_check_kfunc_ids, 119 + }; 120 + 121 + extern int bpf_fentry_test1(int a); 117 122 118 123 static int bpf_testmod_init(void) 119 124 { 120 125 int ret; 121 126 122 - ret = sysfs_create_bin_file(kernel_kobj, &bin_attr_bpf_testmod_file); 123 - if (ret) 127 + ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &bpf_testmod_kfunc_set); 128 + if (ret < 0) 124 129 return ret; 125 - register_kfunc_btf_id_set(&prog_test_kfunc_list, &bpf_testmod_kfunc_btf_set); 126 - return 0; 130 + if (bpf_fentry_test1(0) < 0) 131 + return -EINVAL; 132 + return sysfs_create_bin_file(kernel_kobj, &bin_attr_bpf_testmod_file); 127 133 } 128 134 129 135 static void bpf_testmod_exit(void) 130 136 { 131 - unregister_kfunc_btf_id_set(&prog_test_kfunc_list, &bpf_testmod_kfunc_btf_set); 132 137 return sysfs_remove_bin_file(kernel_kobj, &bin_attr_bpf_testmod_file); 133 138 } 134 139

+5

tools/testing/selftests/bpf/config

··· 48 48 CONFIG_BLK_DEV_LOOP=y 49 49 CONFIG_FUNCTION_TRACER=y 50 50 CONFIG_DYNAMIC_FTRACE=y 51 + CONFIG_NETFILTER=y 52 + CONFIG_NF_DEFRAG_IPV4=y 53 + CONFIG_NF_DEFRAG_IPV6=y 54 + CONFIG_NF_CONNTRACK=y 55 + CONFIG_USERFAULTFD=y

+17 -3

tools/testing/selftests/bpf/prog_tests/bind_perm.c

··· 1 1 // SPDX-License-Identifier: GPL-2.0 2 - #include <test_progs.h> 3 - #include "bind_perm.skel.h" 4 - 2 + #define _GNU_SOURCE 3 + #include <sched.h> 4 + #include <stdlib.h> 5 5 #include <sys/types.h> 6 6 #include <sys/socket.h> 7 7 #include <sys/capability.h> 8 8 9 + #include "test_progs.h" 10 + #include "bind_perm.skel.h" 11 + 9 12 static int duration; 13 + 14 + static int create_netns(void) 15 + { 16 + if (!ASSERT_OK(unshare(CLONE_NEWNET), "create netns")) 17 + return -1; 18 + 19 + return 0; 20 + } 10 21 11 22 void try_bind(int family, int port, int expected_errno) 12 23 { ··· 85 74 bool cap_was_effective; 86 75 struct bind_perm *skel; 87 76 int cgroup_fd; 77 + 78 + if (create_netns()) 79 + return; 88 80 89 81 cgroup_fd = test__join_cgroup("/bind_perm"); 90 82 if (CHECK(cgroup_fd < 0, "cg-join", "errno %d", errno))

+100

tools/testing/selftests/bpf/prog_tests/bpf_iter_setsockopt_unix.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* Copyright Amazon.com Inc. or its affiliates. */ 3 + #include <sys/socket.h> 4 + #include <sys/un.h> 5 + #include <test_progs.h> 6 + #include "bpf_iter_setsockopt_unix.skel.h" 7 + 8 + #define NR_CASES 5 9 + 10 + static int create_unix_socket(struct bpf_iter_setsockopt_unix *skel) 11 + { 12 + struct sockaddr_un addr = { 13 + .sun_family = AF_UNIX, 14 + .sun_path = "", 15 + }; 16 + socklen_t len; 17 + int fd, err; 18 + 19 + fd = socket(AF_UNIX, SOCK_STREAM, 0); 20 + if (!ASSERT_NEQ(fd, -1, "socket")) 21 + return -1; 22 + 23 + len = offsetof(struct sockaddr_un, sun_path); 24 + err = bind(fd, (struct sockaddr *)&addr, len); 25 + if (!ASSERT_OK(err, "bind")) 26 + return -1; 27 + 28 + len = sizeof(addr); 29 + err = getsockname(fd, (struct sockaddr *)&addr, &len); 30 + if (!ASSERT_OK(err, "getsockname")) 31 + return -1; 32 + 33 + memcpy(&skel->bss->sun_path, &addr.sun_path, 34 + len - offsetof(struct sockaddr_un, sun_path)); 35 + 36 + return fd; 37 + } 38 + 39 + static void test_sndbuf(struct bpf_iter_setsockopt_unix *skel, int fd) 40 + { 41 + socklen_t optlen; 42 + int i, err; 43 + 44 + for (i = 0; i < NR_CASES; i++) { 45 + if (!ASSERT_NEQ(skel->data->sndbuf_getsockopt[i], -1, 46 + "bpf_(get|set)sockopt")) 47 + return; 48 + 49 + err = setsockopt(fd, SOL_SOCKET, SO_SNDBUF, 50 + &(skel->data->sndbuf_setsockopt[i]), 51 + sizeof(skel->data->sndbuf_setsockopt[i])); 52 + if (!ASSERT_OK(err, "setsockopt")) 53 + return; 54 + 55 + optlen = sizeof(skel->bss->sndbuf_getsockopt_expected[i]); 56 + err = getsockopt(fd, SOL_SOCKET, SO_SNDBUF, 57 + &(skel->bss->sndbuf_getsockopt_expected[i]), 58 + &optlen); 59 + if (!ASSERT_OK(err, "getsockopt")) 60 + return; 61 + 62 + if (!ASSERT_EQ(skel->data->sndbuf_getsockopt[i], 63 + skel->bss->sndbuf_getsockopt_expected[i], 64 + "bpf_(get|set)sockopt")) 65 + return; 66 + } 67 + } 68 + 69 + void test_bpf_iter_setsockopt_unix(void) 70 + { 71 + struct bpf_iter_setsockopt_unix *skel; 72 + int err, unix_fd, iter_fd; 73 + char buf; 74 + 75 + skel = bpf_iter_setsockopt_unix__open_and_load(); 76 + if (!ASSERT_OK_PTR(skel, "open_and_load")) 77 + return; 78 + 79 + unix_fd = create_unix_socket(skel); 80 + if (!ASSERT_NEQ(unix_fd, -1, "create_unix_server")) 81 + goto destroy; 82 + 83 + skel->links.change_sndbuf = bpf_program__attach_iter(skel->progs.change_sndbuf, NULL); 84 + if (!ASSERT_OK_PTR(skel->links.change_sndbuf, "bpf_program__attach_iter")) 85 + goto destroy; 86 + 87 + iter_fd = bpf_iter_create(bpf_link__fd(skel->links.change_sndbuf)); 88 + if (!ASSERT_GE(iter_fd, 0, "bpf_iter_create")) 89 + goto destroy; 90 + 91 + while ((err = read(iter_fd, &buf, sizeof(buf))) == -1 && 92 + errno == EAGAIN) 93 + ; 94 + if (!ASSERT_OK(err, "read iter error")) 95 + goto destroy; 96 + 97 + test_sndbuf(skel, unix_fd); 98 + destroy: 99 + bpf_iter_setsockopt_unix__destroy(skel); 100 + }

+230

tools/testing/selftests/bpf/prog_tests/bpf_mod_race.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + #include <unistd.h> 3 + #include <pthread.h> 4 + #include <sys/mman.h> 5 + #include <stdatomic.h> 6 + #include <test_progs.h> 7 + #include <sys/syscall.h> 8 + #include <linux/module.h> 9 + #include <linux/userfaultfd.h> 10 + 11 + #include "ksym_race.skel.h" 12 + #include "bpf_mod_race.skel.h" 13 + #include "kfunc_call_race.skel.h" 14 + 15 + /* This test crafts a race between btf_try_get_module and do_init_module, and 16 + * checks whether btf_try_get_module handles the invocation for a well-formed 17 + * but uninitialized module correctly. Unless the module has completed its 18 + * initcalls, the verifier should fail the program load and return ENXIO. 19 + * 20 + * userfaultfd is used to trigger a fault in an fmod_ret program, and make it 21 + * sleep, then the BPF program is loaded and the return value from verifier is 22 + * inspected. After this, the userfaultfd is closed so that the module loading 23 + * thread makes forward progress, and fmod_ret injects an error so that the 24 + * module load fails and it is freed. 25 + * 26 + * If the verifier succeeded in loading the supplied program, it will end up 27 + * taking reference to freed module, and trigger a crash when the program fd 28 + * is closed later. This is true for both kfuncs and ksyms. In both cases, 29 + * the crash is triggered inside bpf_prog_free_deferred, when module reference 30 + * is finally released. 31 + */ 32 + 33 + struct test_config { 34 + const char *str_open; 35 + void *(*bpf_open_and_load)(); 36 + void (*bpf_destroy)(void *); 37 + }; 38 + 39 + enum test_state { 40 + _TS_INVALID, 41 + TS_MODULE_LOAD, 42 + TS_MODULE_LOAD_FAIL, 43 + }; 44 + 45 + static _Atomic enum test_state state = _TS_INVALID; 46 + 47 + static int sys_finit_module(int fd, const char *param_values, int flags) 48 + { 49 + return syscall(__NR_finit_module, fd, param_values, flags); 50 + } 51 + 52 + static int sys_delete_module(const char *name, unsigned int flags) 53 + { 54 + return syscall(__NR_delete_module, name, flags); 55 + } 56 + 57 + static int load_module(const char *mod) 58 + { 59 + int ret, fd; 60 + 61 + fd = open("bpf_testmod.ko", O_RDONLY); 62 + if (fd < 0) 63 + return fd; 64 + 65 + ret = sys_finit_module(fd, "", 0); 66 + close(fd); 67 + if (ret < 0) 68 + return ret; 69 + return 0; 70 + } 71 + 72 + static void *load_module_thread(void *p) 73 + { 74 + 75 + if (!ASSERT_NEQ(load_module("bpf_testmod.ko"), 0, "load_module_thread must fail")) 76 + atomic_store(&state, TS_MODULE_LOAD); 77 + else 78 + atomic_store(&state, TS_MODULE_LOAD_FAIL); 79 + return p; 80 + } 81 + 82 + static int sys_userfaultfd(int flags) 83 + { 84 + return syscall(__NR_userfaultfd, flags); 85 + } 86 + 87 + static int test_setup_uffd(void *fault_addr) 88 + { 89 + struct uffdio_register uffd_register = {}; 90 + struct uffdio_api uffd_api = {}; 91 + int uffd; 92 + 93 + uffd = sys_userfaultfd(O_CLOEXEC); 94 + if (uffd < 0) 95 + return -errno; 96 + 97 + uffd_api.api = UFFD_API; 98 + uffd_api.features = 0; 99 + if (ioctl(uffd, UFFDIO_API, &uffd_api)) { 100 + close(uffd); 101 + return -1; 102 + } 103 + 104 + uffd_register.range.start = (unsigned long)fault_addr; 105 + uffd_register.range.len = 4096; 106 + uffd_register.mode = UFFDIO_REGISTER_MODE_MISSING; 107 + if (ioctl(uffd, UFFDIO_REGISTER, &uffd_register)) { 108 + close(uffd); 109 + return -1; 110 + } 111 + return uffd; 112 + } 113 + 114 + static void test_bpf_mod_race_config(const struct test_config *config) 115 + { 116 + void *fault_addr, *skel_fail; 117 + struct bpf_mod_race *skel; 118 + struct uffd_msg uffd_msg; 119 + pthread_t load_mod_thrd; 120 + _Atomic int *blockingp; 121 + int uffd, ret; 122 + 123 + fault_addr = mmap(0, 4096, PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); 124 + if (!ASSERT_NEQ(fault_addr, MAP_FAILED, "mmap for uffd registration")) 125 + return; 126 + 127 + if (!ASSERT_OK(sys_delete_module("bpf_testmod", 0), "unload bpf_testmod")) 128 + goto end_mmap; 129 + 130 + skel = bpf_mod_race__open(); 131 + if (!ASSERT_OK_PTR(skel, "bpf_mod_kfunc_race__open")) 132 + goto end_module; 133 + 134 + skel->rodata->bpf_mod_race_config.tgid = getpid(); 135 + skel->rodata->bpf_mod_race_config.inject_error = -4242; 136 + skel->rodata->bpf_mod_race_config.fault_addr = fault_addr; 137 + if (!ASSERT_OK(bpf_mod_race__load(skel), "bpf_mod___load")) 138 + goto end_destroy; 139 + blockingp = (_Atomic int *)&skel->bss->bpf_blocking; 140 + 141 + if (!ASSERT_OK(bpf_mod_race__attach(skel), "bpf_mod_kfunc_race__attach")) 142 + goto end_destroy; 143 + 144 + uffd = test_setup_uffd(fault_addr); 145 + if (!ASSERT_GE(uffd, 0, "userfaultfd open + register address")) 146 + goto end_destroy; 147 + 148 + if (!ASSERT_OK(pthread_create(&load_mod_thrd, NULL, load_module_thread, NULL), 149 + "load module thread")) 150 + goto end_uffd; 151 + 152 + /* Now, we either fail loading module, or block in bpf prog, spin to find out */ 153 + while (!atomic_load(&state) && !atomic_load(blockingp)) 154 + ; 155 + if (!ASSERT_EQ(state, _TS_INVALID, "module load should block")) 156 + goto end_join; 157 + if (!ASSERT_EQ(*blockingp, 1, "module load blocked")) { 158 + pthread_kill(load_mod_thrd, SIGKILL); 159 + goto end_uffd; 160 + } 161 + 162 + /* We might have set bpf_blocking to 1, but may have not blocked in 163 + * bpf_copy_from_user. Read userfaultfd descriptor to verify that. 164 + */ 165 + if (!ASSERT_EQ(read(uffd, &uffd_msg, sizeof(uffd_msg)), sizeof(uffd_msg), 166 + "read uffd block event")) 167 + goto end_join; 168 + if (!ASSERT_EQ(uffd_msg.event, UFFD_EVENT_PAGEFAULT, "read uffd event is pagefault")) 169 + goto end_join; 170 + 171 + /* We know that load_mod_thrd is blocked in the fmod_ret program, the 172 + * module state is still MODULE_STATE_COMING because mod->init hasn't 173 + * returned. This is the time we try to load a program calling kfunc and 174 + * check if we get ENXIO from verifier. 175 + */ 176 + skel_fail = config->bpf_open_and_load(); 177 + ret = errno; 178 + if (!ASSERT_EQ(skel_fail, NULL, config->str_open)) { 179 + /* Close uffd to unblock load_mod_thrd */ 180 + close(uffd); 181 + uffd = -1; 182 + while (atomic_load(blockingp) != 2) 183 + ; 184 + ASSERT_OK(kern_sync_rcu(), "kern_sync_rcu"); 185 + config->bpf_destroy(skel_fail); 186 + goto end_join; 187 + 188 + } 189 + ASSERT_EQ(ret, ENXIO, "verifier returns ENXIO"); 190 + ASSERT_EQ(skel->data->res_try_get_module, false, "btf_try_get_module == false"); 191 + 192 + close(uffd); 193 + uffd = -1; 194 + end_join: 195 + pthread_join(load_mod_thrd, NULL); 196 + if (uffd < 0) 197 + ASSERT_EQ(atomic_load(&state), TS_MODULE_LOAD_FAIL, "load_mod_thrd success"); 198 + end_uffd: 199 + if (uffd >= 0) 200 + close(uffd); 201 + end_destroy: 202 + bpf_mod_race__destroy(skel); 203 + ASSERT_OK(kern_sync_rcu(), "kern_sync_rcu"); 204 + end_module: 205 + sys_delete_module("bpf_testmod", 0); 206 + ASSERT_OK(load_module("bpf_testmod.ko"), "restore bpf_testmod"); 207 + end_mmap: 208 + munmap(fault_addr, 4096); 209 + atomic_store(&state, _TS_INVALID); 210 + } 211 + 212 + static const struct test_config ksym_config = { 213 + .str_open = "ksym_race__open_and_load", 214 + .bpf_open_and_load = (void *)ksym_race__open_and_load, 215 + .bpf_destroy = (void *)ksym_race__destroy, 216 + }; 217 + 218 + static const struct test_config kfunc_config = { 219 + .str_open = "kfunc_call_race__open_and_load", 220 + .bpf_open_and_load = (void *)kfunc_call_race__open_and_load, 221 + .bpf_destroy = (void *)kfunc_call_race__destroy, 222 + }; 223 + 224 + void serial_test_bpf_mod_race(void) 225 + { 226 + if (test__start_subtest("ksym (used_btfs UAF)")) 227 + test_bpf_mod_race_config(&ksym_config); 228 + if (test__start_subtest("kfunc (kfunc_btf_tab UAF)")) 229 + test_bpf_mod_race_config(&kfunc_config); 230 + }

+48

tools/testing/selftests/bpf/prog_tests/bpf_nf.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + #include <test_progs.h> 3 + #include <network_helpers.h> 4 + #include "test_bpf_nf.skel.h" 5 + 6 + enum { 7 + TEST_XDP, 8 + TEST_TC_BPF, 9 + }; 10 + 11 + void test_bpf_nf_ct(int mode) 12 + { 13 + struct test_bpf_nf *skel; 14 + int prog_fd, err, retval; 15 + 16 + skel = test_bpf_nf__open_and_load(); 17 + if (!ASSERT_OK_PTR(skel, "test_bpf_nf__open_and_load")) 18 + return; 19 + 20 + if (mode == TEST_XDP) 21 + prog_fd = bpf_program__fd(skel->progs.nf_xdp_ct_test); 22 + else 23 + prog_fd = bpf_program__fd(skel->progs.nf_skb_ct_test); 24 + 25 + err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4), NULL, NULL, 26 + (__u32 *)&retval, NULL); 27 + if (!ASSERT_OK(err, "bpf_prog_test_run")) 28 + goto end; 29 + 30 + ASSERT_EQ(skel->bss->test_einval_bpf_tuple, -EINVAL, "Test EINVAL for NULL bpf_tuple"); 31 + ASSERT_EQ(skel->bss->test_einval_reserved, -EINVAL, "Test EINVAL for reserved not set to 0"); 32 + ASSERT_EQ(skel->bss->test_einval_netns_id, -EINVAL, "Test EINVAL for netns_id < -1"); 33 + ASSERT_EQ(skel->bss->test_einval_len_opts, -EINVAL, "Test EINVAL for len__opts != NF_BPF_CT_OPTS_SZ"); 34 + ASSERT_EQ(skel->bss->test_eproto_l4proto, -EPROTO, "Test EPROTO for l4proto != TCP or UDP"); 35 + ASSERT_EQ(skel->bss->test_enonet_netns_id, -ENONET, "Test ENONET for bad but valid netns_id"); 36 + ASSERT_EQ(skel->bss->test_enoent_lookup, -ENOENT, "Test ENOENT for failed lookup"); 37 + ASSERT_EQ(skel->bss->test_eafnosupport, -EAFNOSUPPORT, "Test EAFNOSUPPORT for invalid len__tuple"); 38 + end: 39 + test_bpf_nf__destroy(skel); 40 + } 41 + 42 + void test_bpf_nf(void) 43 + { 44 + if (test__start_subtest("xdp-ct")) 45 + test_bpf_nf_ct(TEST_XDP); 46 + if (test__start_subtest("tc-bpf-ct")) 47 + test_bpf_nf_ct(TEST_TC_BPF); 48 + }

+4

tools/testing/selftests/bpf/prog_tests/btf.c

··· 4560 4560 has_btf_ext = btf_ext != NULL; 4561 4561 btf_ext__free(btf_ext); 4562 4562 4563 + /* temporary disable LIBBPF_STRICT_MAP_DEFINITIONS to test legacy maps */ 4564 + libbpf_set_strict_mode((__LIBBPF_STRICT_LAST - 1) & ~LIBBPF_STRICT_MAP_DEFINITIONS); 4563 4565 obj = bpf_object__open(test->file); 4564 4566 err = libbpf_get_error(obj); 4565 4567 if (CHECK(err, "obj: %d", err)) ··· 4686 4684 fprintf(stderr, "OK"); 4687 4685 4688 4686 done: 4687 + libbpf_set_strict_mode(LIBBPF_STRICT_ALL); 4688 + 4689 4689 btf__free(btf); 4690 4690 free(func_info); 4691 4691 bpf_object__close(obj);

+6 -6

tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c

··· 194 194 195 195 attach_opts.flags = BPF_F_ALLOW_OVERRIDE | BPF_F_REPLACE; 196 196 attach_opts.replace_prog_fd = allow_prog[0]; 197 - if (CHECK(!bpf_prog_attach_xattr(allow_prog[6], cg1, 197 + if (CHECK(!bpf_prog_attach_opts(allow_prog[6], cg1, 198 198 BPF_CGROUP_INET_EGRESS, &attach_opts), 199 199 "fail_prog_replace_override", "unexpected success\n")) 200 200 goto err; 201 201 CHECK_FAIL(errno != EINVAL); 202 202 203 203 attach_opts.flags = BPF_F_REPLACE; 204 - if (CHECK(!bpf_prog_attach_xattr(allow_prog[6], cg1, 204 + if (CHECK(!bpf_prog_attach_opts(allow_prog[6], cg1, 205 205 BPF_CGROUP_INET_EGRESS, &attach_opts), 206 206 "fail_prog_replace_no_multi", "unexpected success\n")) 207 207 goto err; ··· 209 209 210 210 attach_opts.flags = BPF_F_ALLOW_MULTI | BPF_F_REPLACE; 211 211 attach_opts.replace_prog_fd = -1; 212 - if (CHECK(!bpf_prog_attach_xattr(allow_prog[6], cg1, 212 + if (CHECK(!bpf_prog_attach_opts(allow_prog[6], cg1, 213 213 BPF_CGROUP_INET_EGRESS, &attach_opts), 214 214 "fail_prog_replace_bad_fd", "unexpected success\n")) 215 215 goto err; ··· 217 217 218 218 /* replacing a program that is not attached to cgroup should fail */ 219 219 attach_opts.replace_prog_fd = allow_prog[3]; 220 - if (CHECK(!bpf_prog_attach_xattr(allow_prog[6], cg1, 220 + if (CHECK(!bpf_prog_attach_opts(allow_prog[6], cg1, 221 221 BPF_CGROUP_INET_EGRESS, &attach_opts), 222 222 "fail_prog_replace_no_ent", "unexpected success\n")) 223 223 goto err; ··· 225 225 226 226 /* replace 1st from the top program */ 227 227 attach_opts.replace_prog_fd = allow_prog[0]; 228 - if (CHECK(bpf_prog_attach_xattr(allow_prog[6], cg1, 228 + if (CHECK(bpf_prog_attach_opts(allow_prog[6], cg1, 229 229 BPF_CGROUP_INET_EGRESS, &attach_opts), 230 230 "prog_replace", "errno=%d\n", errno)) 231 231 goto err; 232 232 233 233 /* replace program with itself */ 234 234 attach_opts.replace_prog_fd = allow_prog[6]; 235 - if (CHECK(bpf_prog_attach_xattr(allow_prog[6], cg1, 235 + if (CHECK(bpf_prog_attach_opts(allow_prog[6], cg1, 236 236 BPF_CGROUP_INET_EGRESS, &attach_opts), 237 237 "prog_replace", "errno=%d\n", errno)) 238 238 goto err;

+481

tools/testing/selftests/bpf/prog_tests/cgroup_getset_retval.c

··· 1 + // SPDX-License-Identifier: GPL-2.0-only 2 + 3 + /* 4 + * Copyright 2021 Google LLC. 5 + */ 6 + 7 + #include <test_progs.h> 8 + #include <cgroup_helpers.h> 9 + #include <network_helpers.h> 10 + 11 + #include "cgroup_getset_retval_setsockopt.skel.h" 12 + #include "cgroup_getset_retval_getsockopt.skel.h" 13 + 14 + #define SOL_CUSTOM 0xdeadbeef 15 + 16 + static int zero; 17 + 18 + static void test_setsockopt_set(int cgroup_fd, int sock_fd) 19 + { 20 + struct cgroup_getset_retval_setsockopt *obj; 21 + struct bpf_link *link_set_eunatch = NULL; 22 + 23 + obj = cgroup_getset_retval_setsockopt__open_and_load(); 24 + if (!ASSERT_OK_PTR(obj, "skel-load")) 25 + return; 26 + 27 + /* Attach setsockopt that sets EUNATCH, assert that 28 + * we actually get that error when we run setsockopt() 29 + */ 30 + link_set_eunatch = bpf_program__attach_cgroup(obj->progs.set_eunatch, 31 + cgroup_fd); 32 + if (!ASSERT_OK_PTR(link_set_eunatch, "cg-attach-set_eunatch")) 33 + goto close_bpf_object; 34 + 35 + if (!ASSERT_ERR(setsockopt(sock_fd, SOL_SOCKET, SO_REUSEADDR, 36 + &zero, sizeof(int)), "setsockopt")) 37 + goto close_bpf_object; 38 + if (!ASSERT_EQ(errno, EUNATCH, "setsockopt-errno")) 39 + goto close_bpf_object; 40 + 41 + if (!ASSERT_EQ(obj->bss->invocations, 1, "invocations")) 42 + goto close_bpf_object; 43 + if (!ASSERT_FALSE(obj->bss->assertion_error, "assertion_error")) 44 + goto close_bpf_object; 45 + 46 + close_bpf_object: 47 + bpf_link__destroy(link_set_eunatch); 48 + 49 + cgroup_getset_retval_setsockopt__destroy(obj); 50 + } 51 + 52 + static void test_setsockopt_set_and_get(int cgroup_fd, int sock_fd) 53 + { 54 + struct cgroup_getset_retval_setsockopt *obj; 55 + struct bpf_link *link_set_eunatch = NULL, *link_get_retval = NULL; 56 + 57 + obj = cgroup_getset_retval_setsockopt__open_and_load(); 58 + if (!ASSERT_OK_PTR(obj, "skel-load")) 59 + return; 60 + 61 + /* Attach setsockopt that sets EUNATCH, and one that gets the 62 + * previously set errno. Assert that we get the same errno back. 63 + */ 64 + link_set_eunatch = bpf_program__attach_cgroup(obj->progs.set_eunatch, 65 + cgroup_fd); 66 + if (!ASSERT_OK_PTR(link_set_eunatch, "cg-attach-set_eunatch")) 67 + goto close_bpf_object; 68 + link_get_retval = bpf_program__attach_cgroup(obj->progs.get_retval, 69 + cgroup_fd); 70 + if (!ASSERT_OK_PTR(link_get_retval, "cg-attach-get_retval")) 71 + goto close_bpf_object; 72 + 73 + if (!ASSERT_ERR(setsockopt(sock_fd, SOL_SOCKET, SO_REUSEADDR, 74 + &zero, sizeof(int)), "setsockopt")) 75 + goto close_bpf_object; 76 + if (!ASSERT_EQ(errno, EUNATCH, "setsockopt-errno")) 77 + goto close_bpf_object; 78 + 79 + if (!ASSERT_EQ(obj->bss->invocations, 2, "invocations")) 80 + goto close_bpf_object; 81 + if (!ASSERT_FALSE(obj->bss->assertion_error, "assertion_error")) 82 + goto close_bpf_object; 83 + if (!ASSERT_EQ(obj->bss->retval_value, -EUNATCH, "retval_value")) 84 + goto close_bpf_object; 85 + 86 + close_bpf_object: 87 + bpf_link__destroy(link_set_eunatch); 88 + bpf_link__destroy(link_get_retval); 89 + 90 + cgroup_getset_retval_setsockopt__destroy(obj); 91 + } 92 + 93 + static void test_setsockopt_default_zero(int cgroup_fd, int sock_fd) 94 + { 95 + struct cgroup_getset_retval_setsockopt *obj; 96 + struct bpf_link *link_get_retval = NULL; 97 + 98 + obj = cgroup_getset_retval_setsockopt__open_and_load(); 99 + if (!ASSERT_OK_PTR(obj, "skel-load")) 100 + return; 101 + 102 + /* Attach setsockopt that gets the previously set errno. 103 + * Assert that, without anything setting one, we get 0. 104 + */ 105 + link_get_retval = bpf_program__attach_cgroup(obj->progs.get_retval, 106 + cgroup_fd); 107 + if (!ASSERT_OK_PTR(link_get_retval, "cg-attach-get_retval")) 108 + goto close_bpf_object; 109 + 110 + if (!ASSERT_OK(setsockopt(sock_fd, SOL_SOCKET, SO_REUSEADDR, 111 + &zero, sizeof(int)), "setsockopt")) 112 + goto close_bpf_object; 113 + 114 + if (!ASSERT_EQ(obj->bss->invocations, 1, "invocations")) 115 + goto close_bpf_object; 116 + if (!ASSERT_FALSE(obj->bss->assertion_error, "assertion_error")) 117 + goto close_bpf_object; 118 + if (!ASSERT_EQ(obj->bss->retval_value, 0, "retval_value")) 119 + goto close_bpf_object; 120 + 121 + close_bpf_object: 122 + bpf_link__destroy(link_get_retval); 123 + 124 + cgroup_getset_retval_setsockopt__destroy(obj); 125 + } 126 + 127 + static void test_setsockopt_default_zero_and_set(int cgroup_fd, int sock_fd) 128 + { 129 + struct cgroup_getset_retval_setsockopt *obj; 130 + struct bpf_link *link_get_retval = NULL, *link_set_eunatch = NULL; 131 + 132 + obj = cgroup_getset_retval_setsockopt__open_and_load(); 133 + if (!ASSERT_OK_PTR(obj, "skel-load")) 134 + return; 135 + 136 + /* Attach setsockopt that gets the previously set errno, and then 137 + * one that sets the errno to EUNATCH. Assert that the get does not 138 + * see EUNATCH set later, and does not prevent EUNATCH from being set. 139 + */ 140 + link_get_retval = bpf_program__attach_cgroup(obj->progs.get_retval, 141 + cgroup_fd); 142 + if (!ASSERT_OK_PTR(link_get_retval, "cg-attach-get_retval")) 143 + goto close_bpf_object; 144 + link_set_eunatch = bpf_program__attach_cgroup(obj->progs.set_eunatch, 145 + cgroup_fd); 146 + if (!ASSERT_OK_PTR(link_set_eunatch, "cg-attach-set_eunatch")) 147 + goto close_bpf_object; 148 + 149 + if (!ASSERT_ERR(setsockopt(sock_fd, SOL_SOCKET, SO_REUSEADDR, 150 + &zero, sizeof(int)), "setsockopt")) 151 + goto close_bpf_object; 152 + if (!ASSERT_EQ(errno, EUNATCH, "setsockopt-errno")) 153 + goto close_bpf_object; 154 + 155 + if (!ASSERT_EQ(obj->bss->invocations, 2, "invocations")) 156 + goto close_bpf_object; 157 + if (!ASSERT_FALSE(obj->bss->assertion_error, "assertion_error")) 158 + goto close_bpf_object; 159 + if (!ASSERT_EQ(obj->bss->retval_value, 0, "retval_value")) 160 + goto close_bpf_object; 161 + 162 + close_bpf_object: 163 + bpf_link__destroy(link_get_retval); 164 + bpf_link__destroy(link_set_eunatch); 165 + 166 + cgroup_getset_retval_setsockopt__destroy(obj); 167 + } 168 + 169 + static void test_setsockopt_override(int cgroup_fd, int sock_fd) 170 + { 171 + struct cgroup_getset_retval_setsockopt *obj; 172 + struct bpf_link *link_set_eunatch = NULL, *link_set_eisconn = NULL; 173 + struct bpf_link *link_get_retval = NULL; 174 + 175 + obj = cgroup_getset_retval_setsockopt__open_and_load(); 176 + if (!ASSERT_OK_PTR(obj, "skel-load")) 177 + return; 178 + 179 + /* Attach setsockopt that sets EUNATCH, then one that sets EISCONN, 180 + * and then one that gets the exported errno. Assert both the syscall 181 + * and the helper sees the last set errno. 182 + */ 183 + link_set_eunatch = bpf_program__attach_cgroup(obj->progs.set_eunatch, 184 + cgroup_fd); 185 + if (!ASSERT_OK_PTR(link_set_eunatch, "cg-attach-set_eunatch")) 186 + goto close_bpf_object; 187 + link_set_eisconn = bpf_program__attach_cgroup(obj->progs.set_eisconn, 188 + cgroup_fd); 189 + if (!ASSERT_OK_PTR(link_set_eisconn, "cg-attach-set_eisconn")) 190 + goto close_bpf_object; 191 + link_get_retval = bpf_program__attach_cgroup(obj->progs.get_retval, 192 + cgroup_fd); 193 + if (!ASSERT_OK_PTR(link_get_retval, "cg-attach-get_retval")) 194 + goto close_bpf_object; 195 + 196 + if (!ASSERT_ERR(setsockopt(sock_fd, SOL_SOCKET, SO_REUSEADDR, 197 + &zero, sizeof(int)), "setsockopt")) 198 + goto close_bpf_object; 199 + if (!ASSERT_EQ(errno, EISCONN, "setsockopt-errno")) 200 + goto close_bpf_object; 201 + 202 + if (!ASSERT_EQ(obj->bss->invocations, 3, "invocations")) 203 + goto close_bpf_object; 204 + if (!ASSERT_FALSE(obj->bss->assertion_error, "assertion_error")) 205 + goto close_bpf_object; 206 + if (!ASSERT_EQ(obj->bss->retval_value, -EISCONN, "retval_value")) 207 + goto close_bpf_object; 208 + 209 + close_bpf_object: 210 + bpf_link__destroy(link_set_eunatch); 211 + bpf_link__destroy(link_set_eisconn); 212 + bpf_link__destroy(link_get_retval); 213 + 214 + cgroup_getset_retval_setsockopt__destroy(obj); 215 + } 216 + 217 + static void test_setsockopt_legacy_eperm(int cgroup_fd, int sock_fd) 218 + { 219 + struct cgroup_getset_retval_setsockopt *obj; 220 + struct bpf_link *link_legacy_eperm = NULL, *link_get_retval = NULL; 221 + 222 + obj = cgroup_getset_retval_setsockopt__open_and_load(); 223 + if (!ASSERT_OK_PTR(obj, "skel-load")) 224 + return; 225 + 226 + /* Attach setsockopt that return a reject without setting errno 227 + * (legacy reject), and one that gets the errno. Assert that for 228 + * backward compatibility the syscall result in EPERM, and this 229 + * is also visible to the helper. 230 + */ 231 + link_legacy_eperm = bpf_program__attach_cgroup(obj->progs.legacy_eperm, 232 + cgroup_fd); 233 + if (!ASSERT_OK_PTR(link_legacy_eperm, "cg-attach-legacy_eperm")) 234 + goto close_bpf_object; 235 + link_get_retval = bpf_program__attach_cgroup(obj->progs.get_retval, 236 + cgroup_fd); 237 + if (!ASSERT_OK_PTR(link_get_retval, "cg-attach-get_retval")) 238 + goto close_bpf_object; 239 + 240 + if (!ASSERT_ERR(setsockopt(sock_fd, SOL_SOCKET, SO_REUSEADDR, 241 + &zero, sizeof(int)), "setsockopt")) 242 + goto close_bpf_object; 243 + if (!ASSERT_EQ(errno, EPERM, "setsockopt-errno")) 244 + goto close_bpf_object; 245 + 246 + if (!ASSERT_EQ(obj->bss->invocations, 2, "invocations")) 247 + goto close_bpf_object; 248 + if (!ASSERT_FALSE(obj->bss->assertion_error, "assertion_error")) 249 + goto close_bpf_object; 250 + if (!ASSERT_EQ(obj->bss->retval_value, -EPERM, "retval_value")) 251 + goto close_bpf_object; 252 + 253 + close_bpf_object: 254 + bpf_link__destroy(link_legacy_eperm); 255 + bpf_link__destroy(link_get_retval); 256 + 257 + cgroup_getset_retval_setsockopt__destroy(obj); 258 + } 259 + 260 + static void test_setsockopt_legacy_no_override(int cgroup_fd, int sock_fd) 261 + { 262 + struct cgroup_getset_retval_setsockopt *obj; 263 + struct bpf_link *link_set_eunatch = NULL, *link_legacy_eperm = NULL; 264 + struct bpf_link *link_get_retval = NULL; 265 + 266 + obj = cgroup_getset_retval_setsockopt__open_and_load(); 267 + if (!ASSERT_OK_PTR(obj, "skel-load")) 268 + return; 269 + 270 + /* Attach setsockopt that sets EUNATCH, then one that return a reject 271 + * without setting errno, and then one that gets the exported errno. 272 + * Assert both the syscall and the helper's errno are unaffected by 273 + * the second prog (i.e. legacy rejects does not override the errno 274 + * to EPERM). 275 + */ 276 + link_set_eunatch = bpf_program__attach_cgroup(obj->progs.set_eunatch, 277 + cgroup_fd); 278 + if (!ASSERT_OK_PTR(link_set_eunatch, "cg-attach-set_eunatch")) 279 + goto close_bpf_object; 280 + link_legacy_eperm = bpf_program__attach_cgroup(obj->progs.legacy_eperm, 281 + cgroup_fd); 282 + if (!ASSERT_OK_PTR(link_legacy_eperm, "cg-attach-legacy_eperm")) 283 + goto close_bpf_object; 284 + link_get_retval = bpf_program__attach_cgroup(obj->progs.get_retval, 285 + cgroup_fd); 286 + if (!ASSERT_OK_PTR(link_get_retval, "cg-attach-get_retval")) 287 + goto close_bpf_object; 288 + 289 + if (!ASSERT_ERR(setsockopt(sock_fd, SOL_SOCKET, SO_REUSEADDR, 290 + &zero, sizeof(int)), "setsockopt")) 291 + goto close_bpf_object; 292 + if (!ASSERT_EQ(errno, EUNATCH, "setsockopt-errno")) 293 + goto close_bpf_object; 294 + 295 + if (!ASSERT_EQ(obj->bss->invocations, 3, "invocations")) 296 + goto close_bpf_object; 297 + if (!ASSERT_FALSE(obj->bss->assertion_error, "assertion_error")) 298 + goto close_bpf_object; 299 + if (!ASSERT_EQ(obj->bss->retval_value, -EUNATCH, "retval_value")) 300 + goto close_bpf_object; 301 + 302 + close_bpf_object: 303 + bpf_link__destroy(link_set_eunatch); 304 + bpf_link__destroy(link_legacy_eperm); 305 + bpf_link__destroy(link_get_retval); 306 + 307 + cgroup_getset_retval_setsockopt__destroy(obj); 308 + } 309 + 310 + static void test_getsockopt_get(int cgroup_fd, int sock_fd) 311 + { 312 + struct cgroup_getset_retval_getsockopt *obj; 313 + struct bpf_link *link_get_retval = NULL; 314 + int buf; 315 + socklen_t optlen = sizeof(buf); 316 + 317 + obj = cgroup_getset_retval_getsockopt__open_and_load(); 318 + if (!ASSERT_OK_PTR(obj, "skel-load")) 319 + return; 320 + 321 + /* Attach getsockopt that gets previously set errno. Assert that the 322 + * error from kernel is in both ctx_retval_value and retval_value. 323 + */ 324 + link_get_retval = bpf_program__attach_cgroup(obj->progs.get_retval, 325 + cgroup_fd); 326 + if (!ASSERT_OK_PTR(link_get_retval, "cg-attach-get_retval")) 327 + goto close_bpf_object; 328 + 329 + if (!ASSERT_ERR(getsockopt(sock_fd, SOL_CUSTOM, 0, 330 + &buf, &optlen), "getsockopt")) 331 + goto close_bpf_object; 332 + if (!ASSERT_EQ(errno, EOPNOTSUPP, "getsockopt-errno")) 333 + goto close_bpf_object; 334 + 335 + if (!ASSERT_EQ(obj->bss->invocations, 1, "invocations")) 336 + goto close_bpf_object; 337 + if (!ASSERT_FALSE(obj->bss->assertion_error, "assertion_error")) 338 + goto close_bpf_object; 339 + if (!ASSERT_EQ(obj->bss->retval_value, -EOPNOTSUPP, "retval_value")) 340 + goto close_bpf_object; 341 + if (!ASSERT_EQ(obj->bss->ctx_retval_value, -EOPNOTSUPP, "ctx_retval_value")) 342 + goto close_bpf_object; 343 + 344 + close_bpf_object: 345 + bpf_link__destroy(link_get_retval); 346 + 347 + cgroup_getset_retval_getsockopt__destroy(obj); 348 + } 349 + 350 + static void test_getsockopt_override(int cgroup_fd, int sock_fd) 351 + { 352 + struct cgroup_getset_retval_getsockopt *obj; 353 + struct bpf_link *link_set_eisconn = NULL; 354 + int buf; 355 + socklen_t optlen = sizeof(buf); 356 + 357 + obj = cgroup_getset_retval_getsockopt__open_and_load(); 358 + if (!ASSERT_OK_PTR(obj, "skel-load")) 359 + return; 360 + 361 + /* Attach getsockopt that sets retval to -EISCONN. Assert that this 362 + * overrides the value from kernel. 363 + */ 364 + link_set_eisconn = bpf_program__attach_cgroup(obj->progs.set_eisconn, 365 + cgroup_fd); 366 + if (!ASSERT_OK_PTR(link_set_eisconn, "cg-attach-set_eisconn")) 367 + goto close_bpf_object; 368 + 369 + if (!ASSERT_ERR(getsockopt(sock_fd, SOL_CUSTOM, 0, 370 + &buf, &optlen), "getsockopt")) 371 + goto close_bpf_object; 372 + if (!ASSERT_EQ(errno, EISCONN, "getsockopt-errno")) 373 + goto close_bpf_object; 374 + 375 + if (!ASSERT_EQ(obj->bss->invocations, 1, "invocations")) 376 + goto close_bpf_object; 377 + if (!ASSERT_FALSE(obj->bss->assertion_error, "assertion_error")) 378 + goto close_bpf_object; 379 + 380 + close_bpf_object: 381 + bpf_link__destroy(link_set_eisconn); 382 + 383 + cgroup_getset_retval_getsockopt__destroy(obj); 384 + } 385 + 386 + static void test_getsockopt_retval_sync(int cgroup_fd, int sock_fd) 387 + { 388 + struct cgroup_getset_retval_getsockopt *obj; 389 + struct bpf_link *link_set_eisconn = NULL, *link_clear_retval = NULL; 390 + struct bpf_link *link_get_retval = NULL; 391 + int buf; 392 + socklen_t optlen = sizeof(buf); 393 + 394 + obj = cgroup_getset_retval_getsockopt__open_and_load(); 395 + if (!ASSERT_OK_PTR(obj, "skel-load")) 396 + return; 397 + 398 + /* Attach getsockopt that sets retval to -EISCONN, and one that clears 399 + * ctx retval. Assert that the clearing ctx retval is synced to helper 400 + * and clears any errors both from kernel and BPF.. 401 + */ 402 + link_set_eisconn = bpf_program__attach_cgroup(obj->progs.set_eisconn, 403 + cgroup_fd); 404 + if (!ASSERT_OK_PTR(link_set_eisconn, "cg-attach-set_eisconn")) 405 + goto close_bpf_object; 406 + link_clear_retval = bpf_program__attach_cgroup(obj->progs.clear_retval, 407 + cgroup_fd); 408 + if (!ASSERT_OK_PTR(link_clear_retval, "cg-attach-clear_retval")) 409 + goto close_bpf_object; 410 + link_get_retval = bpf_program__attach_cgroup(obj->progs.get_retval, 411 + cgroup_fd); 412 + if (!ASSERT_OK_PTR(link_get_retval, "cg-attach-get_retval")) 413 + goto close_bpf_object; 414 + 415 + if (!ASSERT_OK(getsockopt(sock_fd, SOL_CUSTOM, 0, 416 + &buf, &optlen), "getsockopt")) 417 + goto close_bpf_object; 418 + 419 + if (!ASSERT_EQ(obj->bss->invocations, 3, "invocations")) 420 + goto close_bpf_object; 421 + if (!ASSERT_FALSE(obj->bss->assertion_error, "assertion_error")) 422 + goto close_bpf_object; 423 + if (!ASSERT_EQ(obj->bss->retval_value, 0, "retval_value")) 424 + goto close_bpf_object; 425 + if (!ASSERT_EQ(obj->bss->ctx_retval_value, 0, "ctx_retval_value")) 426 + goto close_bpf_object; 427 + 428 + close_bpf_object: 429 + bpf_link__destroy(link_set_eisconn); 430 + bpf_link__destroy(link_clear_retval); 431 + bpf_link__destroy(link_get_retval); 432 + 433 + cgroup_getset_retval_getsockopt__destroy(obj); 434 + } 435 + 436 + void test_cgroup_getset_retval(void) 437 + { 438 + int cgroup_fd = -1; 439 + int sock_fd = -1; 440 + 441 + cgroup_fd = test__join_cgroup("/cgroup_getset_retval"); 442 + if (!ASSERT_GE(cgroup_fd, 0, "cg-create")) 443 + goto close_fd; 444 + 445 + sock_fd = start_server(AF_INET, SOCK_DGRAM, NULL, 0, 0); 446 + if (!ASSERT_GE(sock_fd, 0, "start-server")) 447 + goto close_fd; 448 + 449 + if (test__start_subtest("setsockopt-set")) 450 + test_setsockopt_set(cgroup_fd, sock_fd); 451 + 452 + if (test__start_subtest("setsockopt-set_and_get")) 453 + test_setsockopt_set_and_get(cgroup_fd, sock_fd); 454 + 455 + if (test__start_subtest("setsockopt-default_zero")) 456 + test_setsockopt_default_zero(cgroup_fd, sock_fd); 457 + 458 + if (test__start_subtest("setsockopt-default_zero_and_set")) 459 + test_setsockopt_default_zero_and_set(cgroup_fd, sock_fd); 460 + 461 + if (test__start_subtest("setsockopt-override")) 462 + test_setsockopt_override(cgroup_fd, sock_fd); 463 + 464 + if (test__start_subtest("setsockopt-legacy_eperm")) 465 + test_setsockopt_legacy_eperm(cgroup_fd, sock_fd); 466 + 467 + if (test__start_subtest("setsockopt-legacy_no_override")) 468 + test_setsockopt_legacy_no_override(cgroup_fd, sock_fd); 469 + 470 + if (test__start_subtest("getsockopt-get")) 471 + test_getsockopt_get(cgroup_fd, sock_fd); 472 + 473 + if (test__start_subtest("getsockopt-override")) 474 + test_getsockopt_override(cgroup_fd, sock_fd); 475 + 476 + if (test__start_subtest("getsockopt-retval_sync")) 477 + test_getsockopt_retval_sync(cgroup_fd, sock_fd); 478 + 479 + close_fd: 480 + close(cgroup_fd); 481 + }

+1 -1

tools/testing/selftests/bpf/prog_tests/flow_dissector.c

··· 457 457 if (map_fd < 0) 458 458 return -1; 459 459 460 - for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) { 460 + for (i = 0; i < bpf_map__max_entries(prog_array); i++) { 461 461 snprintf(prog_name, sizeof(prog_name), "flow_dissector_%d", i); 462 462 463 463 prog = bpf_object__find_program_by_name(obj, prog_name);

+1 -1

tools/testing/selftests/bpf/prog_tests/global_data.c

··· 121 121 if (CHECK_FAIL(map_fd < 0)) 122 122 return; 123 123 124 - buff = malloc(bpf_map__def(map)->value_size); 124 + buff = malloc(bpf_map__value_size(map)); 125 125 if (buff) 126 126 err = bpf_map_update_elem(map_fd, &zero, buff, 0); 127 127 free(buff);

+1 -1

tools/testing/selftests/bpf/prog_tests/global_data_init.c

··· 20 20 if (CHECK_FAIL(!map || !bpf_map__is_internal(map))) 21 21 goto out; 22 22 23 - sz = bpf_map__def(map)->value_size; 23 + sz = bpf_map__value_size(map); 24 24 newval = malloc(sz); 25 25 if (CHECK_FAIL(!newval)) 26 26 goto out;

+6

tools/testing/selftests/bpf/prog_tests/kfunc_call.c

··· 27 27 ASSERT_OK(err, "bpf_prog_test_run(test2)"); 28 28 ASSERT_EQ(retval, 3, "test2-retval"); 29 29 30 + prog_fd = skel->progs.kfunc_call_test_ref_btf_id.prog_fd; 31 + err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4), 32 + NULL, NULL, (__u32 *)&retval, NULL); 33 + ASSERT_OK(err, "bpf_prog_test_run(test_ref_btf_id)"); 34 + ASSERT_EQ(retval, 0, "test_ref_btf_id-retval"); 35 + 30 36 kfunc_call_test_lskel__destroy(skel); 31 37 } 32 38

+66

tools/testing/selftests/bpf/prog_tests/sockmap_basic.c

··· 8 8 #include "test_sockmap_update.skel.h" 9 9 #include "test_sockmap_invalid_update.skel.h" 10 10 #include "test_sockmap_skb_verdict_attach.skel.h" 11 + #include "test_sockmap_progs_query.skel.h" 11 12 #include "bpf_iter_sockmap.skel.h" 12 13 13 14 #define TCP_REPAIR 19 /* TCP sock is under repair right now */ ··· 316 315 test_sockmap_skb_verdict_attach__destroy(skel); 317 316 } 318 317 318 + static __u32 query_prog_id(int prog_fd) 319 + { 320 + struct bpf_prog_info info = {}; 321 + __u32 info_len = sizeof(info); 322 + int err; 323 + 324 + err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); 325 + if (!ASSERT_OK(err, "bpf_obj_get_info_by_fd") || 326 + !ASSERT_EQ(info_len, sizeof(info), "bpf_obj_get_info_by_fd")) 327 + return 0; 328 + 329 + return info.id; 330 + } 331 + 332 + static void test_sockmap_progs_query(enum bpf_attach_type attach_type) 333 + { 334 + struct test_sockmap_progs_query *skel; 335 + int err, map_fd, verdict_fd; 336 + __u32 attach_flags = 0; 337 + __u32 prog_ids[3] = {}; 338 + __u32 prog_cnt = 3; 339 + 340 + skel = test_sockmap_progs_query__open_and_load(); 341 + if (!ASSERT_OK_PTR(skel, "test_sockmap_progs_query__open_and_load")) 342 + return; 343 + 344 + map_fd = bpf_map__fd(skel->maps.sock_map); 345 + 346 + if (attach_type == BPF_SK_MSG_VERDICT) 347 + verdict_fd = bpf_program__fd(skel->progs.prog_skmsg_verdict); 348 + else 349 + verdict_fd = bpf_program__fd(skel->progs.prog_skb_verdict); 350 + 351 + err = bpf_prog_query(map_fd, attach_type, 0 /* query flags */, 352 + &attach_flags, prog_ids, &prog_cnt); 353 + ASSERT_OK(err, "bpf_prog_query failed"); 354 + ASSERT_EQ(attach_flags, 0, "wrong attach_flags on query"); 355 + ASSERT_EQ(prog_cnt, 0, "wrong program count on query"); 356 + 357 + err = bpf_prog_attach(verdict_fd, map_fd, attach_type, 0); 358 + if (!ASSERT_OK(err, "bpf_prog_attach failed")) 359 + goto out; 360 + 361 + prog_cnt = 1; 362 + err = bpf_prog_query(map_fd, attach_type, 0 /* query flags */, 363 + &attach_flags, prog_ids, &prog_cnt); 364 + ASSERT_OK(err, "bpf_prog_query failed"); 365 + ASSERT_EQ(attach_flags, 0, "wrong attach_flags on query"); 366 + ASSERT_EQ(prog_cnt, 1, "wrong program count on query"); 367 + ASSERT_EQ(prog_ids[0], query_prog_id(verdict_fd), 368 + "wrong prog_ids on query"); 369 + 370 + bpf_prog_detach2(verdict_fd, map_fd, attach_type); 371 + out: 372 + test_sockmap_progs_query__destroy(skel); 373 + } 374 + 319 375 void test_sockmap_basic(void) 320 376 { 321 377 if (test__start_subtest("sockmap create_update_free")) ··· 399 341 test_sockmap_skb_verdict_attach(BPF_SK_SKB_STREAM_VERDICT, 400 342 BPF_SK_SKB_VERDICT); 401 343 } 344 + if (test__start_subtest("sockmap msg_verdict progs query")) 345 + test_sockmap_progs_query(BPF_SK_MSG_VERDICT); 346 + if (test__start_subtest("sockmap stream_parser progs query")) 347 + test_sockmap_progs_query(BPF_SK_SKB_STREAM_PARSER); 348 + if (test__start_subtest("sockmap stream_verdict progs query")) 349 + test_sockmap_progs_query(BPF_SK_SKB_STREAM_VERDICT); 350 + if (test__start_subtest("sockmap skb_verdict progs query")) 351 + test_sockmap_progs_query(BPF_SK_SKB_VERDICT); 402 352 }

+5 -7

tools/testing/selftests/bpf/prog_tests/sockmap_listen.c

··· 1413 1413 1414 1414 static void test_ops_cleanup(const struct bpf_map *map) 1415 1415 { 1416 - const struct bpf_map_def *def; 1417 1416 int err, mapfd; 1418 1417 u32 key; 1419 1418 1420 - def = bpf_map__def(map); 1421 1419 mapfd = bpf_map__fd(map); 1422 1420 1423 - for (key = 0; key < def->max_entries; key++) { 1421 + for (key = 0; key < bpf_map__max_entries(map); key++) { 1424 1422 err = bpf_map_delete_elem(mapfd, &key); 1425 1423 if (err && errno != EINVAL && errno != ENOENT) 1426 1424 FAIL_ERRNO("map_delete: expected EINVAL/ENOENT"); ··· 1441 1443 1442 1444 static const char *map_type_str(const struct bpf_map *map) 1443 1445 { 1444 - const struct bpf_map_def *def; 1446 + int type; 1445 1447 1446 - def = bpf_map__def(map); 1447 - if (IS_ERR(def)) 1448 + if (!map) 1448 1449 return "invalid"; 1450 + type = bpf_map__type(map); 1449 1451 1450 - switch (def->type) { 1452 + switch (type) { 1451 1453 case BPF_MAP_TYPE_SOCKMAP: 1452 1454 return "sockmap"; 1453 1455 case BPF_MAP_TYPE_SOCKHASH:

+2 -2

tools/testing/selftests/bpf/prog_tests/sockopt_sk.c

··· 173 173 } 174 174 175 175 memset(&buf, 0, sizeof(buf)); 176 - buf.zc.address = 12345; /* rejected by BPF */ 176 + buf.zc.address = 12345; /* Not page aligned. Rejected by tcp_zerocopy_receive() */ 177 177 optlen = sizeof(buf.zc); 178 178 errno = 0; 179 179 err = getsockopt(fd, SOL_TCP, TCP_ZEROCOPY_RECEIVE, &buf, &optlen); 180 - if (errno != EPERM) { 180 + if (errno != EINVAL) { 181 181 log_err("Unexpected getsockopt(TCP_ZEROCOPY_RECEIVE) err=%d errno=%d", 182 182 err, errno); 183 183 goto err;

+18 -18

tools/testing/selftests/bpf/prog_tests/tailcalls.c

··· 37 37 if (CHECK_FAIL(map_fd < 0)) 38 38 goto out; 39 39 40 - for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) { 40 + for (i = 0; i < bpf_map__max_entries(prog_array); i++) { 41 41 snprintf(prog_name, sizeof(prog_name), "classifier_%d", i); 42 42 43 43 prog = bpf_object__find_program_by_name(obj, prog_name); ··· 53 53 goto out; 54 54 } 55 55 56 - for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) { 56 + for (i = 0; i < bpf_map__max_entries(prog_array); i++) { 57 57 err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0, 58 58 &duration, &retval, NULL); 59 59 CHECK(err || retval != i, "tailcall", ··· 69 69 CHECK(err || retval != 3, "tailcall", "err %d errno %d retval %d\n", 70 70 err, errno, retval); 71 71 72 - for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) { 72 + for (i = 0; i < bpf_map__max_entries(prog_array); i++) { 73 73 snprintf(prog_name, sizeof(prog_name), "classifier_%d", i); 74 74 75 75 prog = bpf_object__find_program_by_name(obj, prog_name); ··· 90 90 CHECK(err || retval != 0, "tailcall", "err %d errno %d retval %d\n", 91 91 err, errno, retval); 92 92 93 - for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) { 94 - j = bpf_map__def(prog_array)->max_entries - 1 - i; 93 + for (i = 0; i < bpf_map__max_entries(prog_array); i++) { 94 + j = bpf_map__max_entries(prog_array) - 1 - i; 95 95 snprintf(prog_name, sizeof(prog_name), "classifier_%d", j); 96 96 97 97 prog = bpf_object__find_program_by_name(obj, prog_name); ··· 107 107 goto out; 108 108 } 109 109 110 - for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) { 111 - j = bpf_map__def(prog_array)->max_entries - 1 - i; 110 + for (i = 0; i < bpf_map__max_entries(prog_array); i++) { 111 + j = bpf_map__max_entries(prog_array) - 1 - i; 112 112 113 113 err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0, 114 114 &duration, &retval, NULL); ··· 125 125 CHECK(err || retval != 3, "tailcall", "err %d errno %d retval %d\n", 126 126 err, errno, retval); 127 127 128 - for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) { 128 + for (i = 0; i < bpf_map__max_entries(prog_array); i++) { 129 129 err = bpf_map_delete_elem(map_fd, &i); 130 130 if (CHECK_FAIL(err >= 0 || errno != ENOENT)) 131 131 goto out; ··· 175 175 if (CHECK_FAIL(map_fd < 0)) 176 176 goto out; 177 177 178 - for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) { 178 + for (i = 0; i < bpf_map__max_entries(prog_array); i++) { 179 179 snprintf(prog_name, sizeof(prog_name), "classifier_%d", i); 180 180 181 181 prog = bpf_object__find_program_by_name(obj, prog_name); ··· 353 353 if (CHECK_FAIL(map_fd < 0)) 354 354 return; 355 355 356 - for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) { 356 + for (i = 0; i < bpf_map__max_entries(prog_array); i++) { 357 357 snprintf(prog_name, sizeof(prog_name), "classifier_%d", i); 358 358 359 359 prog = bpf_object__find_program_by_name(obj, prog_name); ··· 369 369 goto out; 370 370 } 371 371 372 - for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) { 372 + for (i = 0; i < bpf_map__max_entries(prog_array); i++) { 373 373 err = bpf_map_update_elem(data_fd, &zero, &i, BPF_ANY); 374 374 if (CHECK_FAIL(err)) 375 375 goto out; ··· 380 380 "err %d errno %d retval %d\n", err, errno, retval); 381 381 } 382 382 383 - for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) { 383 + for (i = 0; i < bpf_map__max_entries(prog_array); i++) { 384 384 err = bpf_map_update_elem(data_fd, &zero, &i, BPF_ANY); 385 385 if (CHECK_FAIL(err)) 386 386 goto out; ··· 441 441 if (CHECK_FAIL(map_fd < 0)) 442 442 return; 443 443 444 - for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) { 444 + for (i = 0; i < bpf_map__max_entries(prog_array); i++) { 445 445 snprintf(prog_name, sizeof(prog_name), "classifier_%d", i); 446 446 447 447 prog = bpf_object__find_program_by_name(obj, prog_name); ··· 457 457 goto out; 458 458 } 459 459 460 - for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) { 460 + for (i = 0; i < bpf_map__max_entries(prog_array); i++) { 461 461 err = bpf_map_update_elem(data_fd, &zero, &key[i], BPF_ANY); 462 462 if (CHECK_FAIL(err)) 463 463 goto out; ··· 468 468 "err %d errno %d retval %d\n", err, errno, retval); 469 469 } 470 470 471 - for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) { 471 + for (i = 0; i < bpf_map__max_entries(prog_array); i++) { 472 472 err = bpf_map_update_elem(data_fd, &zero, &key[i], BPF_ANY); 473 473 if (CHECK_FAIL(err)) 474 474 goto out; ··· 520 520 goto out; 521 521 522 522 /* nop -> jmp */ 523 - for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) { 523 + for (i = 0; i < bpf_map__max_entries(prog_array); i++) { 524 524 snprintf(prog_name, sizeof(prog_name), "classifier_%d", i); 525 525 526 526 prog = bpf_object__find_program_by_name(obj, prog_name); ··· 681 681 if (CHECK_FAIL(map_fd < 0)) 682 682 goto out; 683 683 684 - for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) { 684 + for (i = 0; i < bpf_map__max_entries(prog_array); i++) { 685 685 snprintf(prog_name, sizeof(prog_name), "classifier_%d", i); 686 686 687 687 prog = bpf_object__find_program_by_name(obj, prog_name); ··· 778 778 if (CHECK_FAIL(map_fd < 0)) 779 779 goto out; 780 780 781 - for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) { 781 + for (i = 0; i < bpf_map__max_entries(prog_array); i++) { 782 782 snprintf(prog_name, sizeof(prog_name), "classifier_%d", i); 783 783 784 784 prog = bpf_object__find_program_by_name(obj, prog_name);

+104

tools/testing/selftests/bpf/prog_tests/xdp_adjust_frags.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + #include <test_progs.h> 3 + #include <network_helpers.h> 4 + 5 + void test_xdp_update_frags(void) 6 + { 7 + const char *file = "./test_xdp_update_frags.o"; 8 + __u32 duration, retval, size; 9 + struct bpf_program *prog; 10 + struct bpf_object *obj; 11 + int err, prog_fd; 12 + __u32 *offset; 13 + __u8 *buf; 14 + 15 + obj = bpf_object__open(file); 16 + if (libbpf_get_error(obj)) 17 + return; 18 + 19 + prog = bpf_object__next_program(obj, NULL); 20 + if (bpf_object__load(obj)) 21 + return; 22 + 23 + prog_fd = bpf_program__fd(prog); 24 + 25 + buf = malloc(128); 26 + if (!ASSERT_OK_PTR(buf, "alloc buf 128b")) 27 + goto out; 28 + 29 + memset(buf, 0, 128); 30 + offset = (__u32 *)buf; 31 + *offset = 16; 32 + buf[*offset] = 0xaa; /* marker at offset 16 (head) */ 33 + buf[*offset + 15] = 0xaa; /* marker at offset 31 (head) */ 34 + 35 + err = bpf_prog_test_run(prog_fd, 1, buf, 128, 36 + buf, &size, &retval, &duration); 37 + 38 + /* test_xdp_update_frags: buf[16,31]: 0xaa -> 0xbb */ 39 + ASSERT_OK(err, "xdp_update_frag"); 40 + ASSERT_EQ(retval, XDP_PASS, "xdp_update_frag retval"); 41 + ASSERT_EQ(buf[16], 0xbb, "xdp_update_frag buf[16]"); 42 + ASSERT_EQ(buf[31], 0xbb, "xdp_update_frag buf[31]"); 43 + 44 + free(buf); 45 + 46 + buf = malloc(9000); 47 + if (!ASSERT_OK_PTR(buf, "alloc buf 9Kb")) 48 + goto out; 49 + 50 + memset(buf, 0, 9000); 51 + offset = (__u32 *)buf; 52 + *offset = 5000; 53 + buf[*offset] = 0xaa; /* marker at offset 5000 (frag0) */ 54 + buf[*offset + 15] = 0xaa; /* marker at offset 5015 (frag0) */ 55 + 56 + err = bpf_prog_test_run(prog_fd, 1, buf, 9000, 57 + buf, &size, &retval, &duration); 58 + 59 + /* test_xdp_update_frags: buf[5000,5015]: 0xaa -> 0xbb */ 60 + ASSERT_OK(err, "xdp_update_frag"); 61 + ASSERT_EQ(retval, XDP_PASS, "xdp_update_frag retval"); 62 + ASSERT_EQ(buf[5000], 0xbb, "xdp_update_frag buf[5000]"); 63 + ASSERT_EQ(buf[5015], 0xbb, "xdp_update_frag buf[5015]"); 64 + 65 + memset(buf, 0, 9000); 66 + offset = (__u32 *)buf; 67 + *offset = 3510; 68 + buf[*offset] = 0xaa; /* marker at offset 3510 (head) */ 69 + buf[*offset + 15] = 0xaa; /* marker at offset 3525 (frag0) */ 70 + 71 + err = bpf_prog_test_run(prog_fd, 1, buf, 9000, 72 + buf, &size, &retval, &duration); 73 + 74 + /* test_xdp_update_frags: buf[3510,3525]: 0xaa -> 0xbb */ 75 + ASSERT_OK(err, "xdp_update_frag"); 76 + ASSERT_EQ(retval, XDP_PASS, "xdp_update_frag retval"); 77 + ASSERT_EQ(buf[3510], 0xbb, "xdp_update_frag buf[3510]"); 78 + ASSERT_EQ(buf[3525], 0xbb, "xdp_update_frag buf[3525]"); 79 + 80 + memset(buf, 0, 9000); 81 + offset = (__u32 *)buf; 82 + *offset = 7606; 83 + buf[*offset] = 0xaa; /* marker at offset 7606 (frag0) */ 84 + buf[*offset + 15] = 0xaa; /* marker at offset 7621 (frag1) */ 85 + 86 + err = bpf_prog_test_run(prog_fd, 1, buf, 9000, 87 + buf, &size, &retval, &duration); 88 + 89 + /* test_xdp_update_frags: buf[7606,7621]: 0xaa -> 0xbb */ 90 + ASSERT_OK(err, "xdp_update_frag"); 91 + ASSERT_EQ(retval, XDP_PASS, "xdp_update_frag retval"); 92 + ASSERT_EQ(buf[7606], 0xbb, "xdp_update_frag buf[7606]"); 93 + ASSERT_EQ(buf[7621], 0xbb, "xdp_update_frag buf[7621]"); 94 + 95 + free(buf); 96 + out: 97 + bpf_object__close(obj); 98 + } 99 + 100 + void test_xdp_adjust_frags(void) 101 + { 102 + if (test__start_subtest("xdp_adjust_frags")) 103 + test_xdp_update_frags(); 104 + }

+153 -40

tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c

··· 11 11 char buf[128]; 12 12 13 13 err = bpf_prog_test_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd); 14 - if (CHECK_FAIL(err)) 14 + if (ASSERT_OK(err, "test_xdp_adjust_tail_shrink")) 15 15 return; 16 16 17 17 err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4), 18 18 buf, &size, &retval, &duration); 19 - 20 - CHECK(err || retval != XDP_DROP, 21 - "ipv4", "err %d errno %d retval %d size %d\n", 22 - err, errno, retval, size); 19 + ASSERT_OK(err, "ipv4"); 20 + ASSERT_EQ(retval, XDP_DROP, "ipv4 retval"); 23 21 24 22 expect_sz = sizeof(pkt_v6) - 20; /* Test shrink with 20 bytes */ 25 23 err = bpf_prog_test_run(prog_fd, 1, &pkt_v6, sizeof(pkt_v6), 26 24 buf, &size, &retval, &duration); 27 - CHECK(err || retval != XDP_TX || size != expect_sz, 28 - "ipv6", "err %d errno %d retval %d size %d expect-size %d\n", 29 - err, errno, retval, size, expect_sz); 25 + ASSERT_OK(err, "ipv6"); 26 + ASSERT_EQ(retval, XDP_TX, "ipv6 retval"); 27 + ASSERT_EQ(size, expect_sz, "ipv6 size"); 28 + 30 29 bpf_object__close(obj); 31 30 } 32 31 ··· 38 39 int err, prog_fd; 39 40 40 41 err = bpf_prog_test_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd); 41 - if (CHECK_FAIL(err)) 42 + if (ASSERT_OK(err, "test_xdp_adjust_tail_grow")) 42 43 return; 43 44 44 45 err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4), 45 46 buf, &size, &retval, &duration); 46 - CHECK(err || retval != XDP_DROP, 47 - "ipv4", "err %d errno %d retval %d size %d\n", 48 - err, errno, retval, size); 47 + ASSERT_OK(err, "ipv4"); 48 + ASSERT_EQ(retval, XDP_DROP, "ipv4 retval"); 49 49 50 50 expect_sz = sizeof(pkt_v6) + 40; /* Test grow with 40 bytes */ 51 51 err = bpf_prog_test_run(prog_fd, 1, &pkt_v6, sizeof(pkt_v6) /* 74 */, 52 52 buf, &size, &retval, &duration); 53 - CHECK(err || retval != XDP_TX || size != expect_sz, 54 - "ipv6", "err %d errno %d retval %d size %d expect-size %d\n", 55 - err, errno, retval, size, expect_sz); 53 + ASSERT_OK(err, "ipv6"); 54 + ASSERT_EQ(retval, XDP_TX, "ipv6 retval"); 55 + ASSERT_EQ(size, expect_sz, "ipv6 size"); 56 56 57 57 bpf_object__close(obj); 58 58 } ··· 74 76 }; 75 77 76 78 err = bpf_prog_test_load(file, BPF_PROG_TYPE_XDP, &obj, &tattr.prog_fd); 77 - if (CHECK_ATTR(err, "load", "err %d errno %d\n", err, errno)) 79 + if (ASSERT_OK(err, "test_xdp_adjust_tail_grow")) 78 80 return; 79 81 80 82 /* Test case-64 */ ··· 84 86 /* Kernel side alloc packet memory area that is zero init */ 85 87 err = bpf_prog_test_run_xattr(&tattr); 86 88 87 - CHECK_ATTR(errno != ENOSPC /* Due limit copy_size in bpf_test_finish */ 88 - || tattr.retval != XDP_TX 89 - || tattr.data_size_out != 192, /* Expected grow size */ 90 - "case-64", 91 - "err %d errno %d retval %d size %d\n", 92 - err, errno, tattr.retval, tattr.data_size_out); 89 + ASSERT_EQ(errno, ENOSPC, "case-64 errno"); /* Due limit copy_size in bpf_test_finish */ 90 + ASSERT_EQ(tattr.retval, XDP_TX, "case-64 retval"); 91 + ASSERT_EQ(tattr.data_size_out, 192, "case-64 data_size_out"); /* Expected grow size */ 93 92 94 93 /* Extra checks for data contents */ 95 - CHECK_ATTR(tattr.data_size_out != 192 96 - || buf[0] != 1 || buf[63] != 1 /* 0-63 memset to 1 */ 97 - || buf[64] != 0 || buf[127] != 0 /* 64-127 memset to 0 */ 98 - || buf[128] != 1 || buf[191] != 1, /*128-191 memset to 1 */ 99 - "case-64-data", 100 - "err %d errno %d retval %d size %d\n", 101 - err, errno, tattr.retval, tattr.data_size_out); 94 + ASSERT_EQ(buf[0], 1, "case-64-data buf[0]"); /* 0-63 memset to 1 */ 95 + ASSERT_EQ(buf[63], 1, "case-64-data buf[63]"); 96 + ASSERT_EQ(buf[64], 0, "case-64-data buf[64]"); /* 64-127 memset to 0 */ 97 + ASSERT_EQ(buf[127], 0, "case-64-data buf[127]"); 98 + ASSERT_EQ(buf[128], 1, "case-64-data buf[128]"); /* 128-191 memset to 1 */ 99 + ASSERT_EQ(buf[191], 1, "case-64-data buf[191]"); 102 100 103 101 /* Test case-128 */ 104 102 memset(buf, 2, sizeof(buf)); ··· 103 109 err = bpf_prog_test_run_xattr(&tattr); 104 110 105 111 max_grow = 4096 - XDP_PACKET_HEADROOM - tailroom; /* 3520 */ 106 - CHECK_ATTR(err 107 - || tattr.retval != XDP_TX 108 - || tattr.data_size_out != max_grow,/* Expect max grow size */ 109 - "case-128", 110 - "err %d errno %d retval %d size %d expect-size %d\n", 111 - err, errno, tattr.retval, tattr.data_size_out, max_grow); 112 + ASSERT_OK(err, "case-128"); 113 + ASSERT_EQ(tattr.retval, XDP_TX, "case-128 retval"); 114 + ASSERT_EQ(tattr.data_size_out, max_grow, "case-128 data_size_out"); /* Expect max grow */ 112 115 113 116 /* Extra checks for data content: Count grow size, will contain zeros */ 114 117 for (i = 0, cnt = 0; i < sizeof(buf); i++) { 115 118 if (buf[i] == 0) 116 119 cnt++; 117 120 } 118 - CHECK_ATTR((cnt != (max_grow - tattr.data_size_in)) /* Grow increase */ 119 - || tattr.data_size_out != max_grow, /* Total grow size */ 120 - "case-128-data", 121 - "err %d errno %d retval %d size %d grow-size %d\n", 122 - err, errno, tattr.retval, tattr.data_size_out, cnt); 121 + ASSERT_EQ(cnt, max_grow - tattr.data_size_in, "case-128-data cnt"); /* Grow increase */ 122 + ASSERT_EQ(tattr.data_size_out, max_grow, "case-128-data data_size_out"); /* Total grow */ 123 123 124 + bpf_object__close(obj); 125 + } 126 + 127 + void test_xdp_adjust_frags_tail_shrink(void) 128 + { 129 + const char *file = "./test_xdp_adjust_tail_shrink.o"; 130 + __u32 duration, retval, size, exp_size; 131 + struct bpf_program *prog; 132 + struct bpf_object *obj; 133 + int err, prog_fd; 134 + __u8 *buf; 135 + 136 + /* For the individual test cases, the first byte in the packet 137 + * indicates which test will be run. 138 + */ 139 + obj = bpf_object__open(file); 140 + if (libbpf_get_error(obj)) 141 + return; 142 + 143 + prog = bpf_object__next_program(obj, NULL); 144 + if (bpf_object__load(obj)) 145 + return; 146 + 147 + prog_fd = bpf_program__fd(prog); 148 + 149 + buf = malloc(9000); 150 + if (!ASSERT_OK_PTR(buf, "alloc buf 9Kb")) 151 + goto out; 152 + 153 + memset(buf, 0, 9000); 154 + 155 + /* Test case removing 10 bytes from last frag, NOT freeing it */ 156 + exp_size = 8990; /* 9000 - 10 */ 157 + err = bpf_prog_test_run(prog_fd, 1, buf, 9000, 158 + buf, &size, &retval, &duration); 159 + 160 + ASSERT_OK(err, "9Kb-10b"); 161 + ASSERT_EQ(retval, XDP_TX, "9Kb-10b retval"); 162 + ASSERT_EQ(size, exp_size, "9Kb-10b size"); 163 + 164 + /* Test case removing one of two pages, assuming 4K pages */ 165 + buf[0] = 1; 166 + exp_size = 4900; /* 9000 - 4100 */ 167 + err = bpf_prog_test_run(prog_fd, 1, buf, 9000, 168 + buf, &size, &retval, &duration); 169 + 170 + ASSERT_OK(err, "9Kb-4Kb"); 171 + ASSERT_EQ(retval, XDP_TX, "9Kb-4Kb retval"); 172 + ASSERT_EQ(size, exp_size, "9Kb-4Kb size"); 173 + 174 + /* Test case removing two pages resulting in a linear xdp_buff */ 175 + buf[0] = 2; 176 + exp_size = 800; /* 9000 - 8200 */ 177 + err = bpf_prog_test_run(prog_fd, 1, buf, 9000, 178 + buf, &size, &retval, &duration); 179 + 180 + ASSERT_OK(err, "9Kb-9Kb"); 181 + ASSERT_EQ(retval, XDP_TX, "9Kb-9Kb retval"); 182 + ASSERT_EQ(size, exp_size, "9Kb-9Kb size"); 183 + 184 + free(buf); 185 + out: 186 + bpf_object__close(obj); 187 + } 188 + 189 + void test_xdp_adjust_frags_tail_grow(void) 190 + { 191 + const char *file = "./test_xdp_adjust_tail_grow.o"; 192 + __u32 duration, retval, size, exp_size; 193 + struct bpf_program *prog; 194 + struct bpf_object *obj; 195 + int err, i, prog_fd; 196 + __u8 *buf; 197 + 198 + obj = bpf_object__open(file); 199 + if (libbpf_get_error(obj)) 200 + return; 201 + 202 + prog = bpf_object__next_program(obj, NULL); 203 + if (bpf_object__load(obj)) 204 + return; 205 + 206 + prog_fd = bpf_program__fd(prog); 207 + 208 + buf = malloc(16384); 209 + if (!ASSERT_OK_PTR(buf, "alloc buf 16Kb")) 210 + goto out; 211 + 212 + /* Test case add 10 bytes to last frag */ 213 + memset(buf, 1, 16384); 214 + size = 9000; 215 + exp_size = size + 10; 216 + err = bpf_prog_test_run(prog_fd, 1, buf, size, 217 + buf, &size, &retval, &duration); 218 + 219 + ASSERT_OK(err, "9Kb+10b"); 220 + ASSERT_EQ(retval, XDP_TX, "9Kb+10b retval"); 221 + ASSERT_EQ(size, exp_size, "9Kb+10b size"); 222 + 223 + for (i = 0; i < 9000; i++) 224 + ASSERT_EQ(buf[i], 1, "9Kb+10b-old"); 225 + 226 + for (i = 9000; i < 9010; i++) 227 + ASSERT_EQ(buf[i], 0, "9Kb+10b-new"); 228 + 229 + for (i = 9010; i < 16384; i++) 230 + ASSERT_EQ(buf[i], 1, "9Kb+10b-untouched"); 231 + 232 + /* Test a too large grow */ 233 + memset(buf, 1, 16384); 234 + size = 9001; 235 + exp_size = size; 236 + err = bpf_prog_test_run(prog_fd, 1, buf, size, 237 + buf, &size, &retval, &duration); 238 + 239 + ASSERT_OK(err, "9Kb+10b"); 240 + ASSERT_EQ(retval, XDP_DROP, "9Kb+10b retval"); 241 + ASSERT_EQ(size, exp_size, "9Kb+10b size"); 242 + 243 + free(buf); 244 + out: 124 245 bpf_object__close(obj); 125 246 } 126 247 ··· 247 138 test_xdp_adjust_tail_grow(); 248 139 if (test__start_subtest("xdp_adjust_tail_grow2")) 249 140 test_xdp_adjust_tail_grow2(); 141 + if (test__start_subtest("xdp_adjust_frags_tail_shrink")) 142 + test_xdp_adjust_frags_tail_shrink(); 143 + if (test__start_subtest("xdp_adjust_frags_tail_grow")) 144 + test_xdp_adjust_frags_tail_grow(); 250 145 }

+84 -53

tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c

··· 10 10 int pkt_len; 11 11 }; 12 12 13 + struct test_ctx_s { 14 + bool passed; 15 + int pkt_size; 16 + }; 17 + 18 + struct test_ctx_s test_ctx; 19 + 13 20 static void on_sample(void *ctx, int cpu, void *data, __u32 size) 14 21 { 15 - int duration = 0; 16 22 struct meta *meta = (struct meta *)data; 17 23 struct ipv4_packet *trace_pkt_v4 = data + sizeof(*meta); 24 + unsigned char *raw_pkt = data + sizeof(*meta); 25 + struct test_ctx_s *tst_ctx = ctx; 18 26 19 - if (CHECK(size < sizeof(pkt_v4) + sizeof(*meta), 20 - "check_size", "size %u < %zu\n", 21 - size, sizeof(pkt_v4) + sizeof(*meta))) 27 + ASSERT_GE(size, sizeof(pkt_v4) + sizeof(*meta), "check_size"); 28 + ASSERT_EQ(meta->ifindex, if_nametoindex("lo"), "check_meta_ifindex"); 29 + ASSERT_EQ(meta->pkt_len, tst_ctx->pkt_size, "check_meta_pkt_len"); 30 + ASSERT_EQ(memcmp(trace_pkt_v4, &pkt_v4, sizeof(pkt_v4)), 0, 31 + "check_packet_content"); 32 + 33 + if (meta->pkt_len > sizeof(pkt_v4)) { 34 + for (int i = 0; i < meta->pkt_len - sizeof(pkt_v4); i++) 35 + ASSERT_EQ(raw_pkt[i + sizeof(pkt_v4)], (unsigned char)i, 36 + "check_packet_content"); 37 + } 38 + 39 + tst_ctx->passed = true; 40 + } 41 + 42 + #define BUF_SZ 9000 43 + 44 + static void run_xdp_bpf2bpf_pkt_size(int pkt_fd, struct perf_buffer *pb, 45 + struct test_xdp_bpf2bpf *ftrace_skel, 46 + int pkt_size) 47 + { 48 + __u32 duration = 0, retval, size; 49 + __u8 *buf, *buf_in; 50 + int err; 51 + 52 + if (!ASSERT_LE(pkt_size, BUF_SZ, "pkt_size") || 53 + !ASSERT_GE(pkt_size, sizeof(pkt_v4), "pkt_size")) 22 54 return; 23 55 24 - if (CHECK(meta->ifindex != if_nametoindex("lo"), "check_meta_ifindex", 25 - "meta->ifindex = %d\n", meta->ifindex)) 56 + buf_in = malloc(BUF_SZ); 57 + if (!ASSERT_OK_PTR(buf_in, "buf_in malloc()")) 26 58 return; 27 59 28 - if (CHECK(meta->pkt_len != sizeof(pkt_v4), "check_meta_pkt_len", 29 - "meta->pkt_len = %zd\n", sizeof(pkt_v4))) 60 + buf = malloc(BUF_SZ); 61 + if (!ASSERT_OK_PTR(buf, "buf malloc()")) { 62 + free(buf_in); 30 63 return; 64 + } 31 65 32 - if (CHECK(memcmp(trace_pkt_v4, &pkt_v4, sizeof(pkt_v4)), 33 - "check_packet_content", "content not the same\n")) 34 - return; 66 + test_ctx.passed = false; 67 + test_ctx.pkt_size = pkt_size; 35 68 36 - *(bool *)ctx = true; 69 + memcpy(buf_in, &pkt_v4, sizeof(pkt_v4)); 70 + if (pkt_size > sizeof(pkt_v4)) { 71 + for (int i = 0; i < (pkt_size - sizeof(pkt_v4)); i++) 72 + buf_in[i + sizeof(pkt_v4)] = i; 73 + } 74 + 75 + /* Run test program */ 76 + err = bpf_prog_test_run(pkt_fd, 1, buf_in, pkt_size, 77 + buf, &size, &retval, &duration); 78 + 79 + ASSERT_OK(err, "ipv4"); 80 + ASSERT_EQ(retval, XDP_PASS, "ipv4 retval"); 81 + ASSERT_EQ(size, pkt_size, "ipv4 size"); 82 + 83 + /* Make sure bpf_xdp_output() was triggered and it sent the expected 84 + * data to the perf ring buffer. 85 + */ 86 + err = perf_buffer__poll(pb, 100); 87 + 88 + ASSERT_GE(err, 0, "perf_buffer__poll"); 89 + ASSERT_TRUE(test_ctx.passed, "test passed"); 90 + /* Verify test results */ 91 + ASSERT_EQ(ftrace_skel->bss->test_result_fentry, if_nametoindex("lo"), 92 + "fentry result"); 93 + ASSERT_EQ(ftrace_skel->bss->test_result_fexit, XDP_PASS, "fexit result"); 94 + 95 + free(buf); 96 + free(buf_in); 37 97 } 38 98 39 99 void test_xdp_bpf2bpf(void) 40 100 { 41 - __u32 duration = 0, retval, size; 42 - char buf[128]; 43 101 int err, pkt_fd, map_fd; 44 - bool passed = false; 45 - struct iphdr iph; 46 - struct iptnl_info value4 = {.family = AF_INET}; 102 + int pkt_sizes[] = {sizeof(pkt_v4), 1024, 4100, 8200}; 103 + struct iptnl_info value4 = {.family = AF_INET6}; 47 104 struct test_xdp *pkt_skel = NULL; 48 105 struct test_xdp_bpf2bpf *ftrace_skel = NULL; 49 106 struct vip key4 = {.protocol = 6, .family = AF_INET}; ··· 109 52 110 53 /* Load XDP program to introspect */ 111 54 pkt_skel = test_xdp__open_and_load(); 112 - if (CHECK(!pkt_skel, "pkt_skel_load", "test_xdp skeleton failed\n")) 55 + if (!ASSERT_OK_PTR(pkt_skel, "test_xdp__open_and_load")) 113 56 return; 114 57 115 58 pkt_fd = bpf_program__fd(pkt_skel->progs._xdp_tx_iptunnel); ··· 119 62 120 63 /* Load trace program */ 121 64 ftrace_skel = test_xdp_bpf2bpf__open(); 122 - if (CHECK(!ftrace_skel, "__open", "ftrace skeleton failed\n")) 65 + if (!ASSERT_OK_PTR(ftrace_skel, "test_xdp_bpf2bpf__open")) 123 66 goto out; 124 67 125 68 /* Demonstrate the bpf_program__set_attach_target() API rather than ··· 134 77 bpf_program__set_attach_target(prog, pkt_fd, "_xdp_tx_iptunnel"); 135 78 136 79 err = test_xdp_bpf2bpf__load(ftrace_skel); 137 - if (CHECK(err, "__load", "ftrace skeleton failed\n")) 80 + if (!ASSERT_OK(err, "test_xdp_bpf2bpf__load")) 138 81 goto out; 139 82 140 83 err = test_xdp_bpf2bpf__attach(ftrace_skel); 141 - if (CHECK(err, "ftrace_attach", "ftrace attach failed: %d\n", err)) 84 + if (!ASSERT_OK(err, "test_xdp_bpf2bpf__attach")) 142 85 goto out; 143 86 144 87 /* Set up perf buffer */ 145 - pb = perf_buffer__new(bpf_map__fd(ftrace_skel->maps.perf_buf_map), 1, 146 - on_sample, NULL, &passed, NULL); 88 + pb = perf_buffer__new(bpf_map__fd(ftrace_skel->maps.perf_buf_map), 8, 89 + on_sample, NULL, &test_ctx, NULL); 147 90 if (!ASSERT_OK_PTR(pb, "perf_buf__new")) 148 91 goto out; 149 92 150 - /* Run test program */ 151 - err = bpf_prog_test_run(pkt_fd, 1, &pkt_v4, sizeof(pkt_v4), 152 - buf, &size, &retval, &duration); 153 - memcpy(&iph, buf + sizeof(struct ethhdr), sizeof(iph)); 154 - if (CHECK(err || retval != XDP_TX || size != 74 || 155 - iph.protocol != IPPROTO_IPIP, "ipv4", 156 - "err %d errno %d retval %d size %d\n", 157 - err, errno, retval, size)) 158 - goto out; 159 - 160 - /* Make sure bpf_xdp_output() was triggered and it sent the expected 161 - * data to the perf ring buffer. 162 - */ 163 - err = perf_buffer__poll(pb, 100); 164 - if (CHECK(err < 0, "perf_buffer__poll", "err %d\n", err)) 165 - goto out; 166 - 167 - CHECK_FAIL(!passed); 168 - 169 - /* Verify test results */ 170 - if (CHECK(ftrace_skel->bss->test_result_fentry != if_nametoindex("lo"), 171 - "result", "fentry failed err %llu\n", 172 - ftrace_skel->bss->test_result_fentry)) 173 - goto out; 174 - 175 - CHECK(ftrace_skel->bss->test_result_fexit != XDP_TX, "result", 176 - "fexit failed err %llu\n", ftrace_skel->bss->test_result_fexit); 177 - 93 + for (int i = 0; i < ARRAY_SIZE(pkt_sizes); i++) 94 + run_xdp_bpf2bpf_pkt_size(pkt_fd, pb, ftrace_skel, 95 + pkt_sizes[i]); 178 96 out: 179 - if (pb) 180 - perf_buffer__free(pb); 97 + perf_buffer__free(pb); 181 98 test_xdp__destroy(pkt_skel); 182 99 test_xdp_bpf2bpf__destroy(ftrace_skel); 183 100 }

+63 -1

tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c

··· 3 3 #include <linux/if_link.h> 4 4 #include <test_progs.h> 5 5 6 + #include "test_xdp_with_cpumap_frags_helpers.skel.h" 6 7 #include "test_xdp_with_cpumap_helpers.skel.h" 7 8 8 9 #define IFINDEX_LO 1 9 10 10 - void serial_test_xdp_cpumap_attach(void) 11 + void test_xdp_with_cpumap_helpers(void) 11 12 { 12 13 struct test_xdp_with_cpumap_helpers *skel; 13 14 struct bpf_prog_info info = {}; ··· 55 54 err = bpf_map_update_elem(map_fd, &idx, &val, 0); 56 55 ASSERT_NEQ(err, 0, "Add non-BPF_XDP_CPUMAP program to cpumap entry"); 57 56 57 + /* Try to attach BPF_XDP program with frags to cpumap when we have 58 + * already loaded a BPF_XDP program on the map 59 + */ 60 + idx = 1; 61 + val.qsize = 192; 62 + val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_dummy_cm_frags); 63 + err = bpf_map_update_elem(map_fd, &idx, &val, 0); 64 + ASSERT_NEQ(err, 0, "Add BPF_XDP program with frags to cpumap entry"); 65 + 58 66 out_close: 59 67 test_xdp_with_cpumap_helpers__destroy(skel); 68 + } 69 + 70 + void test_xdp_with_cpumap_frags_helpers(void) 71 + { 72 + struct test_xdp_with_cpumap_frags_helpers *skel; 73 + struct bpf_prog_info info = {}; 74 + __u32 len = sizeof(info); 75 + struct bpf_cpumap_val val = { 76 + .qsize = 192, 77 + }; 78 + int err, frags_prog_fd, map_fd; 79 + __u32 idx = 0; 80 + 81 + skel = test_xdp_with_cpumap_frags_helpers__open_and_load(); 82 + if (!ASSERT_OK_PTR(skel, "test_xdp_with_cpumap_helpers__open_and_load")) 83 + return; 84 + 85 + frags_prog_fd = bpf_program__fd(skel->progs.xdp_dummy_cm_frags); 86 + map_fd = bpf_map__fd(skel->maps.cpu_map); 87 + err = bpf_obj_get_info_by_fd(frags_prog_fd, &info, &len); 88 + if (!ASSERT_OK(err, "bpf_obj_get_info_by_fd")) 89 + goto out_close; 90 + 91 + val.bpf_prog.fd = frags_prog_fd; 92 + err = bpf_map_update_elem(map_fd, &idx, &val, 0); 93 + ASSERT_OK(err, "Add program to cpumap entry"); 94 + 95 + err = bpf_map_lookup_elem(map_fd, &idx, &val); 96 + ASSERT_OK(err, "Read cpumap entry"); 97 + ASSERT_EQ(info.id, val.bpf_prog.id, 98 + "Match program id to cpumap entry prog_id"); 99 + 100 + /* Try to attach BPF_XDP program to cpumap when we have 101 + * already loaded a BPF_XDP program with frags on the map 102 + */ 103 + idx = 1; 104 + val.qsize = 192; 105 + val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_dummy_cm); 106 + err = bpf_map_update_elem(map_fd, &idx, &val, 0); 107 + ASSERT_NEQ(err, 0, "Add BPF_XDP program to cpumap entry"); 108 + 109 + out_close: 110 + test_xdp_with_cpumap_frags_helpers__destroy(skel); 111 + } 112 + 113 + void serial_test_xdp_cpumap_attach(void) 114 + { 115 + if (test__start_subtest("CPUMAP with programs in entries")) 116 + test_xdp_with_cpumap_helpers(); 117 + 118 + if (test__start_subtest("CPUMAP with frags programs in entries")) 119 + test_xdp_with_cpumap_frags_helpers(); 60 120 }

+55

tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c

··· 4 4 #include <test_progs.h> 5 5 6 6 #include "test_xdp_devmap_helpers.skel.h" 7 + #include "test_xdp_with_devmap_frags_helpers.skel.h" 7 8 #include "test_xdp_with_devmap_helpers.skel.h" 8 9 9 10 #define IFINDEX_LO 1 ··· 57 56 err = bpf_map_update_elem(map_fd, &idx, &val, 0); 58 57 ASSERT_NEQ(err, 0, "Add non-BPF_XDP_DEVMAP program to devmap entry"); 59 58 59 + /* Try to attach BPF_XDP program with frags to devmap when we have 60 + * already loaded a BPF_XDP program on the map 61 + */ 62 + idx = 1; 63 + val.ifindex = 1; 64 + val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_dummy_dm_frags); 65 + err = bpf_map_update_elem(map_fd, &idx, &val, 0); 66 + ASSERT_NEQ(err, 0, "Add BPF_XDP program with frags to devmap entry"); 67 + 60 68 out_close: 61 69 test_xdp_with_devmap_helpers__destroy(skel); 62 70 } ··· 81 71 } 82 72 } 83 73 74 + void test_xdp_with_devmap_frags_helpers(void) 75 + { 76 + struct test_xdp_with_devmap_frags_helpers *skel; 77 + struct bpf_prog_info info = {}; 78 + struct bpf_devmap_val val = { 79 + .ifindex = IFINDEX_LO, 80 + }; 81 + __u32 len = sizeof(info); 82 + int err, dm_fd_frags, map_fd; 83 + __u32 idx = 0; 84 + 85 + skel = test_xdp_with_devmap_frags_helpers__open_and_load(); 86 + if (!ASSERT_OK_PTR(skel, "test_xdp_with_devmap_helpers__open_and_load")) 87 + return; 88 + 89 + dm_fd_frags = bpf_program__fd(skel->progs.xdp_dummy_dm_frags); 90 + map_fd = bpf_map__fd(skel->maps.dm_ports); 91 + err = bpf_obj_get_info_by_fd(dm_fd_frags, &info, &len); 92 + if (!ASSERT_OK(err, "bpf_obj_get_info_by_fd")) 93 + goto out_close; 94 + 95 + val.bpf_prog.fd = dm_fd_frags; 96 + err = bpf_map_update_elem(map_fd, &idx, &val, 0); 97 + ASSERT_OK(err, "Add frags program to devmap entry"); 98 + 99 + err = bpf_map_lookup_elem(map_fd, &idx, &val); 100 + ASSERT_OK(err, "Read devmap entry"); 101 + ASSERT_EQ(info.id, val.bpf_prog.id, 102 + "Match program id to devmap entry prog_id"); 103 + 104 + /* Try to attach BPF_XDP program to devmap when we have 105 + * already loaded a BPF_XDP program with frags on the map 106 + */ 107 + idx = 1; 108 + val.ifindex = 1; 109 + val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_dummy_dm); 110 + err = bpf_map_update_elem(map_fd, &idx, &val, 0); 111 + ASSERT_NEQ(err, 0, "Add BPF_XDP program to devmap entry"); 112 + 113 + out_close: 114 + test_xdp_with_devmap_frags_helpers__destroy(skel); 115 + } 84 116 85 117 void serial_test_xdp_devmap_attach(void) 86 118 { 87 119 if (test__start_subtest("DEVMAP with programs in entries")) 88 120 test_xdp_with_devmap_helpers(); 121 + 122 + if (test__start_subtest("DEVMAP with frags programs in entries")) 123 + test_xdp_with_devmap_frags_helpers(); 89 124 90 125 if (test__start_subtest("Verifier check of DEVMAP programs")) 91 126 test_neg_xdp_devmap_helpers();

+60

tools/testing/selftests/bpf/progs/bpf_iter_setsockopt_unix.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* Copyright Amazon.com Inc. or its affiliates. */ 3 + #include "bpf_iter.h" 4 + #include "bpf_tracing_net.h" 5 + #include <bpf/bpf_helpers.h> 6 + #include <limits.h> 7 + 8 + #define AUTOBIND_LEN 6 9 + char sun_path[AUTOBIND_LEN]; 10 + 11 + #define NR_CASES 5 12 + int sndbuf_setsockopt[NR_CASES] = {-1, 0, 8192, INT_MAX / 2, INT_MAX}; 13 + int sndbuf_getsockopt[NR_CASES] = {-1, -1, -1, -1, -1}; 14 + int sndbuf_getsockopt_expected[NR_CASES]; 15 + 16 + static inline int cmpname(struct unix_sock *unix_sk) 17 + { 18 + int i; 19 + 20 + for (i = 0; i < AUTOBIND_LEN; i++) { 21 + if (unix_sk->addr->name->sun_path[i] != sun_path[i]) 22 + return -1; 23 + } 24 + 25 + return 0; 26 + } 27 + 28 + SEC("iter/unix") 29 + int change_sndbuf(struct bpf_iter__unix *ctx) 30 + { 31 + struct unix_sock *unix_sk = ctx->unix_sk; 32 + int i, err; 33 + 34 + if (!unix_sk || !unix_sk->addr) 35 + return 0; 36 + 37 + if (unix_sk->addr->name->sun_path[0]) 38 + return 0; 39 + 40 + if (cmpname(unix_sk)) 41 + return 0; 42 + 43 + for (i = 0; i < NR_CASES; i++) { 44 + err = bpf_setsockopt(unix_sk, SOL_SOCKET, SO_SNDBUF, 45 + &sndbuf_setsockopt[i], 46 + sizeof(sndbuf_setsockopt[i])); 47 + if (err) 48 + break; 49 + 50 + err = bpf_getsockopt(unix_sk, SOL_SOCKET, SO_SNDBUF, 51 + &sndbuf_getsockopt[i], 52 + sizeof(sndbuf_getsockopt[i])); 53 + if (err) 54 + break; 55 + } 56 + 57 + return 0; 58 + } 59 + 60 + char _license[] SEC("license") = "GPL";

+1 -1

tools/testing/selftests/bpf/progs/bpf_iter_unix.c

··· 63 63 BPF_SEQ_PRINTF(seq, " @"); 64 64 65 65 for (i = 1; i < len; i++) { 66 - /* unix_mkname() tests this upper bound. */ 66 + /* unix_validate_addr() tests this upper bound. */ 67 67 if (i >= sizeof(struct sockaddr_un)) 68 68 break; 69 69

+100

tools/testing/selftests/bpf/progs/bpf_mod_race.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + #include <vmlinux.h> 3 + #include <bpf/bpf_helpers.h> 4 + #include <bpf/bpf_tracing.h> 5 + 6 + const volatile struct { 7 + /* thread to activate trace programs for */ 8 + pid_t tgid; 9 + /* return error from __init function */ 10 + int inject_error; 11 + /* uffd monitored range start address */ 12 + void *fault_addr; 13 + } bpf_mod_race_config = { -1 }; 14 + 15 + int bpf_blocking = 0; 16 + int res_try_get_module = -1; 17 + 18 + static __always_inline bool check_thread_id(void) 19 + { 20 + struct task_struct *task = bpf_get_current_task_btf(); 21 + 22 + return task->tgid == bpf_mod_race_config.tgid; 23 + } 24 + 25 + /* The trace of execution is something like this: 26 + * 27 + * finit_module() 28 + * load_module() 29 + * prepare_coming_module() 30 + * notifier_call(MODULE_STATE_COMING) 31 + * btf_parse_module() 32 + * btf_alloc_id() // Visible to userspace at this point 33 + * list_add(btf_mod->list, &btf_modules) 34 + * do_init_module() 35 + * freeinit = kmalloc() 36 + * ret = mod->init() 37 + * bpf_prog_widen_race() 38 + * bpf_copy_from_user() 39 + * ...<sleep>... 40 + * if (ret < 0) 41 + * ... 42 + * free_module() 43 + * return ret 44 + * 45 + * At this point, module loading thread is blocked, we now load the program: 46 + * 47 + * bpf_check 48 + * add_kfunc_call/check_pseudo_btf_id 49 + * btf_try_get_module 50 + * try_get_module_live == false 51 + * return -ENXIO 52 + * 53 + * Without the fix (try_get_module_live in btf_try_get_module): 54 + * 55 + * bpf_check 56 + * add_kfunc_call/check_pseudo_btf_id 57 + * btf_try_get_module 58 + * try_get_module == true 59 + * <store module reference in btf_kfunc_tab or used_btf array> 60 + * ... 61 + * return fd 62 + * 63 + * Now, if we inject an error in the blocked program, our module will be freed 64 + * (going straight from MODULE_STATE_COMING to MODULE_STATE_GOING). 65 + * Later, when bpf program is freed, it will try to module_put already freed 66 + * module. This is why try_get_module_live returns false if mod->state is not 67 + * MODULE_STATE_LIVE. 68 + */ 69 + 70 + SEC("fmod_ret.s/bpf_fentry_test1") 71 + int BPF_PROG(widen_race, int a, int ret) 72 + { 73 + char dst; 74 + 75 + if (!check_thread_id()) 76 + return 0; 77 + /* Indicate that we will attempt to block */ 78 + bpf_blocking = 1; 79 + bpf_copy_from_user(&dst, 1, bpf_mod_race_config.fault_addr); 80 + return bpf_mod_race_config.inject_error; 81 + } 82 + 83 + SEC("fexit/do_init_module") 84 + int BPF_PROG(fexit_init_module, struct module *mod, int ret) 85 + { 86 + if (!check_thread_id()) 87 + return 0; 88 + /* Indicate that we finished blocking */ 89 + bpf_blocking = 2; 90 + return 0; 91 + } 92 + 93 + SEC("fexit/btf_try_get_module") 94 + int BPF_PROG(fexit_module_get, const struct btf *btf, struct module *mod) 95 + { 96 + res_try_get_module = !!mod; 97 + return 0; 98 + } 99 + 100 + char _license[] SEC("license") = "GPL";

+2

tools/testing/selftests/bpf/progs/bpf_tracing_net.h

··· 5 5 #define AF_INET 2 6 6 #define AF_INET6 10 7 7 8 + #define SOL_SOCKET 1 9 + #define SO_SNDBUF 7 8 10 #define __SO_ACCEPTCON (1 << 16) 9 11 10 12 #define SOL_TCP 6

+45

tools/testing/selftests/bpf/progs/cgroup_getset_retval_getsockopt.c

··· 1 + // SPDX-License-Identifier: GPL-2.0-only 2 + 3 + /* 4 + * Copyright 2021 Google LLC. 5 + */ 6 + 7 + #include <errno.h> 8 + #include <linux/bpf.h> 9 + #include <bpf/bpf_helpers.h> 10 + 11 + __u32 invocations = 0; 12 + __u32 assertion_error = 0; 13 + __u32 retval_value = 0; 14 + __u32 ctx_retval_value = 0; 15 + 16 + SEC("cgroup/getsockopt") 17 + int get_retval(struct bpf_sockopt *ctx) 18 + { 19 + retval_value = bpf_get_retval(); 20 + ctx_retval_value = ctx->retval; 21 + __sync_fetch_and_add(&invocations, 1); 22 + 23 + return 1; 24 + } 25 + 26 + SEC("cgroup/getsockopt") 27 + int set_eisconn(struct bpf_sockopt *ctx) 28 + { 29 + __sync_fetch_and_add(&invocations, 1); 30 + 31 + if (bpf_set_retval(-EISCONN)) 32 + assertion_error = 1; 33 + 34 + return 1; 35 + } 36 + 37 + SEC("cgroup/getsockopt") 38 + int clear_retval(struct bpf_sockopt *ctx) 39 + { 40 + __sync_fetch_and_add(&invocations, 1); 41 + 42 + ctx->retval = 0; 43 + 44 + return 1; 45 + }

+52

tools/testing/selftests/bpf/progs/cgroup_getset_retval_setsockopt.c

··· 1 + // SPDX-License-Identifier: GPL-2.0-only 2 + 3 + /* 4 + * Copyright 2021 Google LLC. 5 + */ 6 + 7 + #include <errno.h> 8 + #include <linux/bpf.h> 9 + #include <bpf/bpf_helpers.h> 10 + 11 + __u32 invocations = 0; 12 + __u32 assertion_error = 0; 13 + __u32 retval_value = 0; 14 + 15 + SEC("cgroup/setsockopt") 16 + int get_retval(struct bpf_sockopt *ctx) 17 + { 18 + retval_value = bpf_get_retval(); 19 + __sync_fetch_and_add(&invocations, 1); 20 + 21 + return 1; 22 + } 23 + 24 + SEC("cgroup/setsockopt") 25 + int set_eunatch(struct bpf_sockopt *ctx) 26 + { 27 + __sync_fetch_and_add(&invocations, 1); 28 + 29 + if (bpf_set_retval(-EUNATCH)) 30 + assertion_error = 1; 31 + 32 + return 0; 33 + } 34 + 35 + SEC("cgroup/setsockopt") 36 + int set_eisconn(struct bpf_sockopt *ctx) 37 + { 38 + __sync_fetch_and_add(&invocations, 1); 39 + 40 + if (bpf_set_retval(-EISCONN)) 41 + assertion_error = 1; 42 + 43 + return 0; 44 + } 45 + 46 + SEC("cgroup/setsockopt") 47 + int legacy_eperm(struct bpf_sockopt *ctx) 48 + { 49 + __sync_fetch_and_add(&invocations, 1); 50 + 51 + return 0; 52 + }

+6 -6

tools/testing/selftests/bpf/progs/freplace_cls_redirect.c

··· 7 7 #include <bpf/bpf_endian.h> 8 8 #include <bpf/bpf_helpers.h> 9 9 10 - struct bpf_map_def SEC("maps") sock_map = { 11 - .type = BPF_MAP_TYPE_SOCKMAP, 12 - .key_size = sizeof(int), 13 - .value_size = sizeof(int), 14 - .max_entries = 2, 15 - }; 10 + struct { 11 + __uint(type, BPF_MAP_TYPE_SOCKMAP); 12 + __type(key, int); 13 + __type(value, int); 14 + __uint(max_entries, 2); 15 + } sock_map SEC(".maps"); 16 16 17 17 SEC("freplace/cls_redirect") 18 18 int freplace_cls_redirect_test(struct __sk_buff *skb)

+14

tools/testing/selftests/bpf/progs/kfunc_call_race.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + #include <vmlinux.h> 3 + #include <bpf/bpf_helpers.h> 4 + 5 + extern void bpf_testmod_test_mod_kfunc(int i) __ksym; 6 + 7 + SEC("tc") 8 + int kfunc_call_fail(struct __sk_buff *ctx) 9 + { 10 + bpf_testmod_test_mod_kfunc(0); 11 + return 0; 12 + } 13 + 14 + char _license[] SEC("license") = "GPL";

+50 -2

tools/testing/selftests/bpf/progs/kfunc_call_test.c

··· 1 1 // SPDX-License-Identifier: GPL-2.0 2 2 /* Copyright (c) 2021 Facebook */ 3 - #include <linux/bpf.h> 3 + #include <vmlinux.h> 4 4 #include <bpf/bpf_helpers.h> 5 - #include "bpf_tcp_helpers.h" 6 5 7 6 extern int bpf_kfunc_call_test2(struct sock *sk, __u32 a, __u32 b) __ksym; 8 7 extern __u64 bpf_kfunc_call_test1(struct sock *sk, __u32 a, __u64 b, 9 8 __u32 c, __u64 d) __ksym; 9 + 10 + extern struct prog_test_ref_kfunc *bpf_kfunc_call_test_acquire(unsigned long *sp) __ksym; 11 + extern void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p) __ksym; 12 + extern void bpf_kfunc_call_test_pass_ctx(struct __sk_buff *skb) __ksym; 13 + extern void bpf_kfunc_call_test_pass1(struct prog_test_pass1 *p) __ksym; 14 + extern void bpf_kfunc_call_test_pass2(struct prog_test_pass2 *p) __ksym; 15 + extern void bpf_kfunc_call_test_mem_len_pass1(void *mem, int len) __ksym; 16 + extern void bpf_kfunc_call_test_mem_len_fail2(__u64 *mem, int len) __ksym; 10 17 11 18 SEC("tc") 12 19 int kfunc_call_test2(struct __sk_buff *skb) ··· 49 42 ret += (__u32)a; /* ret should be 12 */ 50 43 51 44 return ret; 45 + } 46 + 47 + SEC("tc") 48 + int kfunc_call_test_ref_btf_id(struct __sk_buff *skb) 49 + { 50 + struct prog_test_ref_kfunc *pt; 51 + unsigned long s = 0; 52 + int ret = 0; 53 + 54 + pt = bpf_kfunc_call_test_acquire(&s); 55 + if (pt) { 56 + if (pt->a != 42 || pt->b != 108) 57 + ret = -1; 58 + bpf_kfunc_call_test_release(pt); 59 + } 60 + return ret; 61 + } 62 + 63 + SEC("tc") 64 + int kfunc_call_test_pass(struct __sk_buff *skb) 65 + { 66 + struct prog_test_pass1 p1 = {}; 67 + struct prog_test_pass2 p2 = {}; 68 + short a = 0; 69 + __u64 b = 0; 70 + long c = 0; 71 + char d = 0; 72 + int e = 0; 73 + 74 + bpf_kfunc_call_test_pass_ctx(skb); 75 + bpf_kfunc_call_test_pass1(&p1); 76 + bpf_kfunc_call_test_pass2(&p2); 77 + 78 + bpf_kfunc_call_test_mem_len_pass1(&a, sizeof(a)); 79 + bpf_kfunc_call_test_mem_len_pass1(&b, sizeof(b)); 80 + bpf_kfunc_call_test_mem_len_pass1(&c, sizeof(c)); 81 + bpf_kfunc_call_test_mem_len_pass1(&d, sizeof(d)); 82 + bpf_kfunc_call_test_mem_len_pass1(&e, sizeof(e)); 83 + bpf_kfunc_call_test_mem_len_fail2(&b, -1); 84 + 85 + return 0; 52 86 } 53 87 54 88 char _license[] SEC("license") = "GPL";

+13

tools/testing/selftests/bpf/progs/ksym_race.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + #include <vmlinux.h> 3 + #include <bpf/bpf_helpers.h> 4 + 5 + extern int bpf_testmod_ksym_percpu __ksym; 6 + 7 + SEC("tc") 8 + int ksym_fail(struct __sk_buff *ctx) 9 + { 10 + return *(int *)bpf_this_cpu_ptr(&bpf_testmod_ksym_percpu); 11 + } 12 + 13 + char _license[] SEC("license") = "GPL";

+12 -12

tools/testing/selftests/bpf/progs/sample_map_ret0.c

··· 2 2 #include <linux/bpf.h> 3 3 #include <bpf/bpf_helpers.h> 4 4 5 - struct bpf_map_def SEC("maps") htab = { 6 - .type = BPF_MAP_TYPE_HASH, 7 - .key_size = sizeof(__u32), 8 - .value_size = sizeof(long), 9 - .max_entries = 2, 10 - }; 5 + struct { 6 + __uint(type, BPF_MAP_TYPE_HASH); 7 + __type(key, __u32); 8 + __type(value, long); 9 + __uint(max_entries, 2); 10 + } htab SEC(".maps"); 11 11 12 - struct bpf_map_def SEC("maps") array = { 13 - .type = BPF_MAP_TYPE_ARRAY, 14 - .key_size = sizeof(__u32), 15 - .value_size = sizeof(long), 16 - .max_entries = 2, 17 - }; 12 + struct { 13 + __uint(type, BPF_MAP_TYPE_ARRAY); 14 + __type(key, __u32); 15 + __type(value, long); 16 + __uint(max_entries, 2); 17 + } array SEC(".maps"); 18 18 19 19 /* Sample program which should always load for testing control paths. */ 20 20 SEC(".text") int func()

-2

tools/testing/selftests/bpf/progs/sockmap_parse_prog.c

··· 7 7 { 8 8 void *data_end = (void *)(long) skb->data_end; 9 9 void *data = (void *)(long) skb->data; 10 - __u32 lport = skb->local_port; 11 - __u32 rport = skb->remote_port; 12 10 __u8 *d = data; 13 11 int err; 14 12

+16 -16

tools/testing/selftests/bpf/progs/sockopt_sk.c

··· 73 73 */ 74 74 75 75 if (optval + sizeof(struct tcp_zerocopy_receive) > optval_end) 76 - return 0; /* EPERM, bounds check */ 76 + return 0; /* bounds check */ 77 77 78 78 if (((struct tcp_zerocopy_receive *)optval)->address != 0) 79 - return 0; /* EPERM, unexpected data */ 79 + return 0; /* unexpected data */ 80 80 81 81 return 1; 82 82 } 83 83 84 84 if (ctx->level == SOL_IP && ctx->optname == IP_FREEBIND) { 85 85 if (optval + 1 > optval_end) 86 - return 0; /* EPERM, bounds check */ 86 + return 0; /* bounds check */ 87 87 88 88 ctx->retval = 0; /* Reset system call return value to zero */ 89 89 ··· 96 96 * bytes of data. 97 97 */ 98 98 if (optval_end - optval != page_size) 99 - return 0; /* EPERM, unexpected data size */ 99 + return 0; /* unexpected data size */ 100 100 101 101 return 1; 102 102 } 103 103 104 104 if (ctx->level != SOL_CUSTOM) 105 - return 0; /* EPERM, deny everything except custom level */ 105 + return 0; /* deny everything except custom level */ 106 106 107 107 if (optval + 1 > optval_end) 108 - return 0; /* EPERM, bounds check */ 108 + return 0; /* bounds check */ 109 109 110 110 storage = bpf_sk_storage_get(&socket_storage_map, ctx->sk, 0, 111 111 BPF_SK_STORAGE_GET_F_CREATE); 112 112 if (!storage) 113 - return 0; /* EPERM, couldn't get sk storage */ 113 + return 0; /* couldn't get sk storage */ 114 114 115 115 if (!ctx->retval) 116 - return 0; /* EPERM, kernel should not have handled 116 + return 0; /* kernel should not have handled 117 117 * SOL_CUSTOM, something is wrong! 118 118 */ 119 119 ctx->retval = 0; /* Reset system call return value to zero */ ··· 152 152 /* Overwrite SO_SNDBUF value */ 153 153 154 154 if (optval + sizeof(__u32) > optval_end) 155 - return 0; /* EPERM, bounds check */ 155 + return 0; /* bounds check */ 156 156 157 157 *(__u32 *)optval = 0x55AA; 158 158 ctx->optlen = 4; ··· 164 164 /* Always use cubic */ 165 165 166 166 if (optval + 5 > optval_end) 167 - return 0; /* EPERM, bounds check */ 167 + return 0; /* bounds check */ 168 168 169 169 memcpy(optval, "cubic", 5); 170 170 ctx->optlen = 5; ··· 175 175 if (ctx->level == SOL_IP && ctx->optname == IP_FREEBIND) { 176 176 /* Original optlen is larger than PAGE_SIZE. */ 177 177 if (ctx->optlen != page_size * 2) 178 - return 0; /* EPERM, unexpected data size */ 178 + return 0; /* unexpected data size */ 179 179 180 180 if (optval + 1 > optval_end) 181 - return 0; /* EPERM, bounds check */ 181 + return 0; /* bounds check */ 182 182 183 183 /* Make sure we can trim the buffer. */ 184 184 optval[0] = 0; ··· 189 189 * bytes of data. 190 190 */ 191 191 if (optval_end - optval != page_size) 192 - return 0; /* EPERM, unexpected data size */ 192 + return 0; /* unexpected data size */ 193 193 194 194 return 1; 195 195 } 196 196 197 197 if (ctx->level != SOL_CUSTOM) 198 - return 0; /* EPERM, deny everything except custom level */ 198 + return 0; /* deny everything except custom level */ 199 199 200 200 if (optval + 1 > optval_end) 201 - return 0; /* EPERM, bounds check */ 201 + return 0; /* bounds check */ 202 202 203 203 storage = bpf_sk_storage_get(&socket_storage_map, ctx->sk, 0, 204 204 BPF_SK_STORAGE_GET_F_CREATE); 205 205 if (!storage) 206 - return 0; /* EPERM, couldn't get sk storage */ 206 + return 0; /* couldn't get sk storage */ 207 207 208 208 storage->val = optval[0]; 209 209 ctx->optlen = -1; /* BPF has consumed this option, don't call kernel

+118

tools/testing/selftests/bpf/progs/test_bpf_nf.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + #include <vmlinux.h> 3 + #include <bpf/bpf_helpers.h> 4 + 5 + #define EAFNOSUPPORT 97 6 + #define EPROTO 71 7 + #define ENONET 64 8 + #define EINVAL 22 9 + #define ENOENT 2 10 + 11 + int test_einval_bpf_tuple = 0; 12 + int test_einval_reserved = 0; 13 + int test_einval_netns_id = 0; 14 + int test_einval_len_opts = 0; 15 + int test_eproto_l4proto = 0; 16 + int test_enonet_netns_id = 0; 17 + int test_enoent_lookup = 0; 18 + int test_eafnosupport = 0; 19 + 20 + struct nf_conn; 21 + 22 + struct bpf_ct_opts___local { 23 + s32 netns_id; 24 + s32 error; 25 + u8 l4proto; 26 + u8 reserved[3]; 27 + } __attribute__((preserve_access_index)); 28 + 29 + struct nf_conn *bpf_xdp_ct_lookup(struct xdp_md *, struct bpf_sock_tuple *, u32, 30 + struct bpf_ct_opts___local *, u32) __ksym; 31 + struct nf_conn *bpf_skb_ct_lookup(struct __sk_buff *, struct bpf_sock_tuple *, u32, 32 + struct bpf_ct_opts___local *, u32) __ksym; 33 + void bpf_ct_release(struct nf_conn *) __ksym; 34 + 35 + static __always_inline void 36 + nf_ct_test(struct nf_conn *(*func)(void *, struct bpf_sock_tuple *, u32, 37 + struct bpf_ct_opts___local *, u32), 38 + void *ctx) 39 + { 40 + struct bpf_ct_opts___local opts_def = { .l4proto = IPPROTO_TCP, .netns_id = -1 }; 41 + struct bpf_sock_tuple bpf_tuple; 42 + struct nf_conn *ct; 43 + 44 + __builtin_memset(&bpf_tuple, 0, sizeof(bpf_tuple.ipv4)); 45 + 46 + ct = func(ctx, NULL, 0, &opts_def, sizeof(opts_def)); 47 + if (ct) 48 + bpf_ct_release(ct); 49 + else 50 + test_einval_bpf_tuple = opts_def.error; 51 + 52 + opts_def.reserved[0] = 1; 53 + ct = func(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, sizeof(opts_def)); 54 + opts_def.reserved[0] = 0; 55 + opts_def.l4proto = IPPROTO_TCP; 56 + if (ct) 57 + bpf_ct_release(ct); 58 + else 59 + test_einval_reserved = opts_def.error; 60 + 61 + opts_def.netns_id = -2; 62 + ct = func(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, sizeof(opts_def)); 63 + opts_def.netns_id = -1; 64 + if (ct) 65 + bpf_ct_release(ct); 66 + else 67 + test_einval_netns_id = opts_def.error; 68 + 69 + ct = func(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, sizeof(opts_def) - 1); 70 + if (ct) 71 + bpf_ct_release(ct); 72 + else 73 + test_einval_len_opts = opts_def.error; 74 + 75 + opts_def.l4proto = IPPROTO_ICMP; 76 + ct = func(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, sizeof(opts_def)); 77 + opts_def.l4proto = IPPROTO_TCP; 78 + if (ct) 79 + bpf_ct_release(ct); 80 + else 81 + test_eproto_l4proto = opts_def.error; 82 + 83 + opts_def.netns_id = 0xf00f; 84 + ct = func(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, sizeof(opts_def)); 85 + opts_def.netns_id = -1; 86 + if (ct) 87 + bpf_ct_release(ct); 88 + else 89 + test_enonet_netns_id = opts_def.error; 90 + 91 + ct = func(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, sizeof(opts_def)); 92 + if (ct) 93 + bpf_ct_release(ct); 94 + else 95 + test_enoent_lookup = opts_def.error; 96 + 97 + ct = func(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4) - 1, &opts_def, sizeof(opts_def)); 98 + if (ct) 99 + bpf_ct_release(ct); 100 + else 101 + test_eafnosupport = opts_def.error; 102 + } 103 + 104 + SEC("xdp") 105 + int nf_xdp_ct_test(struct xdp_md *ctx) 106 + { 107 + nf_ct_test((void *)bpf_xdp_ct_lookup, ctx); 108 + return 0; 109 + } 110 + 111 + SEC("tc") 112 + int nf_skb_ct_test(struct __sk_buff *ctx) 113 + { 114 + nf_ct_test((void *)bpf_skb_ct_lookup, ctx); 115 + return 0; 116 + } 117 + 118 + char _license[] SEC("license") = "GPL";

+3

tools/testing/selftests/bpf/progs/test_btf_haskv.c

··· 9 9 unsigned int v6; 10 10 }; 11 11 12 + #pragma GCC diagnostic push 13 + #pragma GCC diagnostic ignored "-Wdeprecated-declarations" 12 14 struct bpf_map_def SEC("maps") btf_map = { 13 15 .type = BPF_MAP_TYPE_ARRAY, 14 16 .key_size = sizeof(int), 15 17 .value_size = sizeof(struct ipv_counts), 16 18 .max_entries = 4, 17 19 }; 20 + #pragma GCC diagnostic pop 18 21 19 22 BPF_ANNOTATE_KV_PAIR(btf_map, int, struct ipv_counts); 20 23

+3

tools/testing/selftests/bpf/progs/test_btf_newkv.c

··· 9 9 unsigned int v6; 10 10 }; 11 11 12 + #pragma GCC diagnostic push 13 + #pragma GCC diagnostic ignored "-Wdeprecated-declarations" 12 14 /* just to validate we can handle maps in multiple sections */ 13 15 struct bpf_map_def SEC("maps") btf_map_legacy = { 14 16 .type = BPF_MAP_TYPE_ARRAY, ··· 18 16 .value_size = sizeof(long long), 19 17 .max_entries = 4, 20 18 }; 19 + #pragma GCC diagnostic pop 21 20 22 21 BPF_ANNOTATE_KV_PAIR(btf_map_legacy, int, struct ipv_counts); 23 22

+6 -6

tools/testing/selftests/bpf/progs/test_btf_nokv.c

··· 8 8 unsigned int v6; 9 9 }; 10 10 11 - struct bpf_map_def SEC("maps") btf_map = { 12 - .type = BPF_MAP_TYPE_ARRAY, 13 - .key_size = sizeof(int), 14 - .value_size = sizeof(struct ipv_counts), 15 - .max_entries = 4, 16 - }; 11 + struct { 12 + __uint(type, BPF_MAP_TYPE_ARRAY); 13 + __uint(key_size, sizeof(int)); 14 + __uint(value_size, sizeof(struct ipv_counts)); 15 + __uint(max_entries, 4); 16 + } btf_map SEC(".maps"); 17 17 18 18 __attribute__((noinline)) 19 19 int test_long_fname_2(void)

+6 -6

tools/testing/selftests/bpf/progs/test_skb_cgroup_id_kern.c

··· 10 10 11 11 #define NUM_CGROUP_LEVELS 4 12 12 13 - struct bpf_map_def SEC("maps") cgroup_ids = { 14 - .type = BPF_MAP_TYPE_ARRAY, 15 - .key_size = sizeof(__u32), 16 - .value_size = sizeof(__u64), 17 - .max_entries = NUM_CGROUP_LEVELS, 18 - }; 13 + struct { 14 + __uint(type, BPF_MAP_TYPE_ARRAY); 15 + __type(key, __u32); 16 + __type(value, __u64); 17 + __uint(max_entries, NUM_CGROUP_LEVELS); 18 + } cgroup_ids SEC(".maps"); 19 19 20 20 static __always_inline void log_nth_level(struct __sk_buff *skb, __u32 level) 21 21 {

+24

tools/testing/selftests/bpf/progs/test_sockmap_progs_query.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + #include "vmlinux.h" 3 + #include <bpf/bpf_helpers.h> 4 + 5 + struct { 6 + __uint(type, BPF_MAP_TYPE_SOCKMAP); 7 + __uint(max_entries, 1); 8 + __type(key, __u32); 9 + __type(value, __u64); 10 + } sock_map SEC(".maps"); 11 + 12 + SEC("sk_skb") 13 + int prog_skb_verdict(struct __sk_buff *skb) 14 + { 15 + return SK_PASS; 16 + } 17 + 18 + SEC("sk_msg") 19 + int prog_skmsg_verdict(struct sk_msg_md *msg) 20 + { 21 + return SK_PASS; 22 + } 23 + 24 + char _license[] SEC("license") = "GPL";

+6 -6

tools/testing/selftests/bpf/progs/test_tc_edt.c

··· 17 17 #define THROTTLE_RATE_BPS (5 * 1000 * 1000) 18 18 19 19 /* flow_key => last_tstamp timestamp used */ 20 - struct bpf_map_def SEC("maps") flow_map = { 21 - .type = BPF_MAP_TYPE_HASH, 22 - .key_size = sizeof(uint32_t), 23 - .value_size = sizeof(uint64_t), 24 - .max_entries = 1, 25 - }; 20 + struct { 21 + __uint(type, BPF_MAP_TYPE_HASH); 22 + __type(key, uint32_t); 23 + __type(value, uint64_t); 24 + __uint(max_entries, 1); 25 + } flow_map SEC(".maps"); 26 26 27 27 static inline int throttle_flow(struct __sk_buff *skb) 28 28 {

+6 -6

tools/testing/selftests/bpf/progs/test_tcp_check_syncookie_kern.c

··· 16 16 #include <bpf/bpf_helpers.h> 17 17 #include <bpf/bpf_endian.h> 18 18 19 - struct bpf_map_def SEC("maps") results = { 20 - .type = BPF_MAP_TYPE_ARRAY, 21 - .key_size = sizeof(__u32), 22 - .value_size = sizeof(__u32), 23 - .max_entries = 3, 24 - }; 19 + struct { 20 + __uint(type, BPF_MAP_TYPE_ARRAY); 21 + __type(key, __u32); 22 + __type(value, __u32); 23 + __uint(max_entries, 3); 24 + } results SEC(".maps"); 25 25 26 26 static __always_inline __s64 gen_syncookie(void *data_end, struct bpf_sock *sk, 27 27 void *iph, __u32 ip_size,

+7 -3

tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c

··· 7 7 { 8 8 void *data_end = (void *)(long)xdp->data_end; 9 9 void *data = (void *)(long)xdp->data; 10 - unsigned int data_len; 10 + int data_len = bpf_xdp_get_buff_len(xdp); 11 11 int offset = 0; 12 12 13 13 /* Data length determine test case */ 14 - data_len = data_end - data; 15 14 16 15 if (data_len == 54) { /* sizeof(pkt_v4) */ 17 16 offset = 4096; /* test too large offset */ ··· 19 20 } else if (data_len == 64) { 20 21 offset = 128; 21 22 } else if (data_len == 128) { 22 - offset = 4096 - 256 - 320 - data_len; /* Max tail grow 3520 */ 23 + /* Max tail grow 3520 */ 24 + offset = 4096 - 256 - 320 - data_len; 25 + } else if (data_len == 9000) { 26 + offset = 10; 27 + } else if (data_len == 9001) { 28 + offset = 4096; 23 29 } else { 24 30 return XDP_ABORTED; /* No matching test */ 25 31 }

+28 -4

tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_shrink.c

··· 12 12 SEC("xdp") 13 13 int _xdp_adjust_tail_shrink(struct xdp_md *xdp) 14 14 { 15 - void *data_end = (void *)(long)xdp->data_end; 16 - void *data = (void *)(long)xdp->data; 15 + __u8 *data_end = (void *)(long)xdp->data_end; 16 + __u8 *data = (void *)(long)xdp->data; 17 17 int offset = 0; 18 18 19 - if (data_end - data == 54) /* sizeof(pkt_v4) */ 19 + switch (bpf_xdp_get_buff_len(xdp)) { 20 + case 54: 21 + /* sizeof(pkt_v4) */ 20 22 offset = 256; /* shrink too much */ 21 - else 23 + break; 24 + case 9000: 25 + /* non-linear buff test cases */ 26 + if (data + 1 > data_end) 27 + return XDP_DROP; 28 + 29 + switch (data[0]) { 30 + case 0: 31 + offset = 10; 32 + break; 33 + case 1: 34 + offset = 4100; 35 + break; 36 + case 2: 37 + offset = 8200; 38 + break; 39 + default: 40 + return XDP_DROP; 41 + } 42 + break; 43 + default: 22 44 offset = 20; 45 + break; 46 + } 23 47 if (bpf_xdp_adjust_tail(xdp, 0 - offset)) 24 48 return XDP_DROP; 25 49 return XDP_TX;

+1 -1

tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c

··· 49 49 void *data = (void *)(long)xdp->data; 50 50 51 51 meta.ifindex = xdp->rxq->dev->ifindex; 52 - meta.pkt_len = data_end - data; 52 + meta.pkt_len = bpf_xdp_get_buff_len((struct xdp_md *)xdp); 53 53 bpf_xdp_output(xdp, &perf_buf_map, 54 54 ((__u64) meta.pkt_len << 32) | 55 55 BPF_F_CURRENT_CPU,

+42

tools/testing/selftests/bpf/progs/test_xdp_update_frags.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* 3 + * This program is free software; you can redistribute it and/or 4 + * modify it under the terms of version 2 of the GNU General Public 5 + * License as published by the Free Software Foundation. 6 + */ 7 + #include <linux/bpf.h> 8 + #include <linux/if_ether.h> 9 + #include <bpf/bpf_helpers.h> 10 + 11 + int _version SEC("version") = 1; 12 + 13 + SEC("xdp.frags") 14 + int xdp_adjust_frags(struct xdp_md *xdp) 15 + { 16 + __u8 *data_end = (void *)(long)xdp->data_end; 17 + __u8 *data = (void *)(long)xdp->data; 18 + __u8 val[16] = {}; 19 + __u32 offset; 20 + int err; 21 + 22 + if (data + sizeof(__u32) > data_end) 23 + return XDP_DROP; 24 + 25 + offset = *(__u32 *)data; 26 + err = bpf_xdp_load_bytes(xdp, offset, val, sizeof(val)); 27 + if (err < 0) 28 + return XDP_DROP; 29 + 30 + if (val[0] != 0xaa || val[15] != 0xaa) /* marker */ 31 + return XDP_DROP; 32 + 33 + val[0] = 0xbb; /* update the marker */ 34 + val[15] = 0xbb; 35 + err = bpf_xdp_store_bytes(xdp, offset, val, sizeof(val)); 36 + if (err < 0) 37 + return XDP_DROP; 38 + 39 + return XDP_PASS; 40 + } 41 + 42 + char _license[] SEC("license") = "GPL";

+27

tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_frags_helpers.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + 3 + #include <linux/bpf.h> 4 + #include <bpf/bpf_helpers.h> 5 + 6 + #define IFINDEX_LO 1 7 + 8 + struct { 9 + __uint(type, BPF_MAP_TYPE_CPUMAP); 10 + __uint(key_size, sizeof(__u32)); 11 + __uint(value_size, sizeof(struct bpf_cpumap_val)); 12 + __uint(max_entries, 4); 13 + } cpu_map SEC(".maps"); 14 + 15 + SEC("xdp_cpumap/dummy_cm") 16 + int xdp_dummy_cm(struct xdp_md *ctx) 17 + { 18 + return XDP_PASS; 19 + } 20 + 21 + SEC("xdp.frags/cpumap") 22 + int xdp_dummy_cm_frags(struct xdp_md *ctx) 23 + { 24 + return XDP_PASS; 25 + } 26 + 27 + char _license[] SEC("license") = "GPL";

+6

tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_helpers.c

··· 33 33 return XDP_PASS; 34 34 } 35 35 36 + SEC("xdp.frags/cpumap") 37 + int xdp_dummy_cm_frags(struct xdp_md *ctx) 38 + { 39 + return XDP_PASS; 40 + } 41 + 36 42 char _license[] SEC("license") = "GPL";

+27

tools/testing/selftests/bpf/progs/test_xdp_with_devmap_frags_helpers.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + #include <linux/bpf.h> 3 + #include <bpf/bpf_helpers.h> 4 + 5 + struct { 6 + __uint(type, BPF_MAP_TYPE_DEVMAP); 7 + __uint(key_size, sizeof(__u32)); 8 + __uint(value_size, sizeof(struct bpf_devmap_val)); 9 + __uint(max_entries, 4); 10 + } dm_ports SEC(".maps"); 11 + 12 + /* valid program on DEVMAP entry via SEC name; 13 + * has access to egress and ingress ifindex 14 + */ 15 + SEC("xdp_devmap/map_prog") 16 + int xdp_dummy_dm(struct xdp_md *ctx) 17 + { 18 + return XDP_PASS; 19 + } 20 + 21 + SEC("xdp.frags/devmap") 22 + int xdp_dummy_dm_frags(struct xdp_md *ctx) 23 + { 24 + return XDP_PASS; 25 + } 26 + 27 + char _license[] SEC("license") = "GPL";

+7

tools/testing/selftests/bpf/progs/test_xdp_with_devmap_helpers.c

··· 40 40 41 41 return XDP_PASS; 42 42 } 43 + 44 + SEC("xdp.frags/devmap") 45 + int xdp_dummy_dm_frags(struct xdp_md *ctx) 46 + { 47 + return XDP_PASS; 48 + } 49 + 43 50 char _license[] SEC("license") = "GPL";

+28

tools/testing/selftests/bpf/test_verifier.c

··· 31 31 #include <linux/if_ether.h> 32 32 #include <linux/btf.h> 33 33 34 + #include <bpf/btf.h> 34 35 #include <bpf/bpf.h> 35 36 #include <bpf/libbpf.h> 36 37 ··· 67 66 static int skips; 68 67 static bool verbose = false; 69 68 69 + struct kfunc_btf_id_pair { 70 + const char *kfunc; 71 + int insn_idx; 72 + }; 73 + 70 74 struct bpf_test { 71 75 const char *descr; 72 76 struct bpf_insn insns[MAX_INSNS]; ··· 98 92 int fixup_map_reuseport_array[MAX_FIXUPS]; 99 93 int fixup_map_ringbuf[MAX_FIXUPS]; 100 94 int fixup_map_timer[MAX_FIXUPS]; 95 + struct kfunc_btf_id_pair fixup_kfunc_btf_id[MAX_FIXUPS]; 101 96 /* Expected verifier log output for result REJECT or VERBOSE_ACCEPT. 102 97 * Can be a tab-separated sequence of expected strings. An empty string 103 98 * means no log verification. ··· 751 744 int *fixup_map_reuseport_array = test->fixup_map_reuseport_array; 752 745 int *fixup_map_ringbuf = test->fixup_map_ringbuf; 753 746 int *fixup_map_timer = test->fixup_map_timer; 747 + struct kfunc_btf_id_pair *fixup_kfunc_btf_id = test->fixup_kfunc_btf_id; 754 748 755 749 if (test->fill_helper) { 756 750 test->fill_insns = calloc(MAX_TEST_INSNS, sizeof(struct bpf_insn)); ··· 943 935 prog[*fixup_map_timer].imm = map_fds[21]; 944 936 fixup_map_timer++; 945 937 } while (*fixup_map_timer); 938 + } 939 + 940 + /* Patch in kfunc BTF IDs */ 941 + if (fixup_kfunc_btf_id->kfunc) { 942 + struct btf *btf; 943 + int btf_id; 944 + 945 + do { 946 + btf_id = 0; 947 + btf = btf__load_vmlinux_btf(); 948 + if (btf) { 949 + btf_id = btf__find_by_name_kind(btf, 950 + fixup_kfunc_btf_id->kfunc, 951 + BTF_KIND_FUNC); 952 + btf_id = btf_id < 0 ? 0 : btf_id; 953 + } 954 + btf__free(btf); 955 + prog[fixup_kfunc_btf_id->insn_idx].imm = btf_id; 956 + fixup_kfunc_btf_id++; 957 + } while (fixup_kfunc_btf_id->kfunc); 946 958 } 947 959 } 948 960

+75

tools/testing/selftests/bpf/verifier/calls.c

··· 22 22 .result = ACCEPT, 23 23 }, 24 24 { 25 + "calls: invalid kfunc call: ptr_to_mem to struct with non-scalar", 26 + .insns = { 27 + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), 28 + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), 29 + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0), 30 + BPF_EXIT_INSN(), 31 + }, 32 + .prog_type = BPF_PROG_TYPE_SCHED_CLS, 33 + .result = REJECT, 34 + .errstr = "arg#0 pointer type STRUCT prog_test_fail1 must point to scalar", 35 + .fixup_kfunc_btf_id = { 36 + { "bpf_kfunc_call_test_fail1", 2 }, 37 + }, 38 + }, 39 + { 40 + "calls: invalid kfunc call: ptr_to_mem to struct with nesting depth > 4", 41 + .insns = { 42 + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), 43 + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), 44 + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0), 45 + BPF_EXIT_INSN(), 46 + }, 47 + .prog_type = BPF_PROG_TYPE_SCHED_CLS, 48 + .result = REJECT, 49 + .errstr = "max struct nesting depth exceeded\narg#0 pointer type STRUCT prog_test_fail2", 50 + .fixup_kfunc_btf_id = { 51 + { "bpf_kfunc_call_test_fail2", 2 }, 52 + }, 53 + }, 54 + { 55 + "calls: invalid kfunc call: ptr_to_mem to struct with FAM", 56 + .insns = { 57 + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), 58 + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), 59 + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0), 60 + BPF_EXIT_INSN(), 61 + }, 62 + .prog_type = BPF_PROG_TYPE_SCHED_CLS, 63 + .result = REJECT, 64 + .errstr = "arg#0 pointer type STRUCT prog_test_fail3 must point to scalar", 65 + .fixup_kfunc_btf_id = { 66 + { "bpf_kfunc_call_test_fail3", 2 }, 67 + }, 68 + }, 69 + { 70 + "calls: invalid kfunc call: reg->type != PTR_TO_CTX", 71 + .insns = { 72 + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), 73 + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), 74 + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0), 75 + BPF_EXIT_INSN(), 76 + }, 77 + .prog_type = BPF_PROG_TYPE_SCHED_CLS, 78 + .result = REJECT, 79 + .errstr = "arg#0 expected pointer to ctx, but got PTR", 80 + .fixup_kfunc_btf_id = { 81 + { "bpf_kfunc_call_test_pass_ctx", 2 }, 82 + }, 83 + }, 84 + { 85 + "calls: invalid kfunc call: void * not allowed in func proto without mem size arg", 86 + .insns = { 87 + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), 88 + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), 89 + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0), 90 + BPF_EXIT_INSN(), 91 + }, 92 + .prog_type = BPF_PROG_TYPE_SCHED_CLS, 93 + .result = REJECT, 94 + .errstr = "arg#0 pointer type UNKNOWN must point to scalar", 95 + .fixup_kfunc_btf_id = { 96 + { "bpf_kfunc_call_test_mem_len_fail1", 2 }, 97 + }, 98 + }, 99 + { 25 100 "calls: basic sanity", 26 101 .insns = { 27 102 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),

+4 -1

tools/testing/selftests/bpf/xdpxceiver.c

··· 906 906 return true; 907 907 case STAT_TEST_RX_FULL: 908 908 xsk_stat = stats.rx_ring_full; 909 - expected_stat -= RX_FULL_RXQSIZE; 909 + if (ifobject->umem->num_frames < XSK_RING_PROD__DEFAULT_NUM_DESCS) 910 + expected_stat = ifobject->umem->num_frames - RX_FULL_RXQSIZE; 911 + else 912 + expected_stat = XSK_RING_PROD__DEFAULT_NUM_DESCS - RX_FULL_RXQSIZE; 910 913 break; 911 914 case STAT_TEST_RX_FILL_EMPTY: 912 915 xsk_stat = stats.rx_fill_ring_empty_descs;