Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

bpf: Introduce device-bound XDP programs

New flag BPF_F_XDP_DEV_BOUND_ONLY plus all the infra to have a way
to associate a netdev with a BPF program at load time.

netdevsim checks are dropped in favor of generic check in dev_xdp_attach.

Cc: John Fastabend <john.fastabend@gmail.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Martin KaFai Lau <martin.lau@linux.dev>
Cc: Jakub Kicinski <kuba@kernel.org>
Cc: Willem de Bruijn <willemb@google.com>
Cc: Jesper Dangaard Brouer <brouer@redhat.com>
Cc: Anatoly Burakov <anatoly.burakov@intel.com>
Cc: Alexander Lobakin <alexandr.lobakin@intel.com>
Cc: Magnus Karlsson <magnus.karlsson@gmail.com>
Cc: Maryam Tahhan <mtahhan@redhat.com>
Cc: xdp-hints@xdp-project.net
Cc: netdev@vger.kernel.org
Signed-off-by: Stanislav Fomichev <sdf@google.com>
Link: https://lore.kernel.org/r/20230119221536.3349901-6-sdf@google.com
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>

authored by

Stanislav Fomichev and committed by
Martin KaFai Lau
2b3486bc 89bbc53a

+113 -38
-4
drivers/net/netdevsim/bpf.c
··· 315 315 NSIM_EA(bpf->extack, "xdpoffload of non-bound program"); 316 316 return -EINVAL; 317 317 } 318 - if (!bpf_offload_dev_match(bpf->prog, ns->netdev)) { 319 - NSIM_EA(bpf->extack, "program bound to different dev"); 320 - return -EINVAL; 321 - } 322 318 323 319 state = bpf->prog->aux->offload->dev_priv; 324 320 if (WARN_ON(strcmp(state->state, "xlated"))) {
+20 -4
include/linux/bpf.h
··· 1261 1261 enum bpf_prog_type saved_dst_prog_type; 1262 1262 enum bpf_attach_type saved_dst_attach_type; 1263 1263 bool verifier_zext; /* Zero extensions has been inserted by verifier. */ 1264 - bool offload_requested; 1264 + bool dev_bound; /* Program is bound to the netdev. */ 1265 + bool offload_requested; /* Program is bound and offloaded to the netdev. */ 1265 1266 bool attach_btf_trace; /* true if attaching to BTF-enabled raw tp */ 1266 1267 bool func_proto_unreliable; 1267 1268 bool sleepable; ··· 2452 2451 bool bpf_prog_get_ok(struct bpf_prog *, enum bpf_prog_type *, bool); 2453 2452 2454 2453 int bpf_prog_offload_compile(struct bpf_prog *prog); 2455 - void bpf_prog_offload_destroy(struct bpf_prog *prog); 2454 + void bpf_prog_dev_bound_destroy(struct bpf_prog *prog); 2456 2455 int bpf_prog_offload_info_fill(struct bpf_prog_info *info, 2457 2456 struct bpf_prog *prog); 2458 2457 ··· 2480 2479 void unpriv_ebpf_notify(int new_state); 2481 2480 2482 2481 #if defined(CONFIG_NET) && defined(CONFIG_BPF_SYSCALL) 2483 - int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr); 2482 + int bpf_prog_dev_bound_init(struct bpf_prog *prog, union bpf_attr *attr); 2483 + void bpf_dev_bound_netdev_unregister(struct net_device *dev); 2484 + 2485 + static inline bool bpf_prog_is_dev_bound(const struct bpf_prog_aux *aux) 2486 + { 2487 + return aux->dev_bound; 2488 + } 2484 2489 2485 2490 static inline bool bpf_prog_is_offloaded(const struct bpf_prog_aux *aux) 2486 2491 { ··· 2514 2507 void sock_map_destroy(struct sock *sk); 2515 2508 void sock_map_close(struct sock *sk, long timeout); 2516 2509 #else 2517 - static inline int bpf_prog_offload_init(struct bpf_prog *prog, 2510 + static inline int bpf_prog_dev_bound_init(struct bpf_prog *prog, 2518 2511 union bpf_attr *attr) 2519 2512 { 2520 2513 return -EOPNOTSUPP; 2514 + } 2515 + 2516 + static inline void bpf_dev_bound_netdev_unregister(struct net_device *dev) 2517 + { 2518 + } 2519 + 2520 + static inline bool bpf_prog_is_dev_bound(const struct bpf_prog_aux *aux) 2521 + { 2522 + return false; 2521 2523 } 2522 2524 2523 2525 static inline bool bpf_prog_is_offloaded(struct bpf_prog_aux *aux)
+5
include/uapi/linux/bpf.h
··· 1156 1156 */ 1157 1157 #define BPF_F_XDP_HAS_FRAGS (1U << 5) 1158 1158 1159 + /* If BPF_F_XDP_DEV_BOUND_ONLY is used in BPF_PROG_LOAD command, the loaded 1160 + * program becomes device-bound but can access XDP metadata. 1161 + */ 1162 + #define BPF_F_XDP_DEV_BOUND_ONLY (1U << 6) 1163 + 1159 1164 /* link_create.kprobe_multi.flags used in LINK_CREATE command for 1160 1165 * BPF_TRACE_KPROBE_MULTI attach type to create return probe. 1161 1166 */
+2 -2
kernel/bpf/core.c
··· 2553 2553 #endif 2554 2554 bpf_free_used_maps(aux); 2555 2555 bpf_free_used_btfs(aux); 2556 - if (bpf_prog_is_offloaded(aux)) 2557 - bpf_prog_offload_destroy(aux->prog); 2556 + if (bpf_prog_is_dev_bound(aux)) 2557 + bpf_prog_dev_bound_destroy(aux->prog); 2558 2558 #ifdef CONFIG_PERF_EVENTS 2559 2559 if (aux->prog->has_callchain_buf) 2560 2560 put_callchain_buffers();
+71 -24
kernel/bpf/offload.c
··· 41 41 struct bpf_offload_netdev { 42 42 struct rhash_head l; 43 43 struct net_device *netdev; 44 - struct bpf_offload_dev *offdev; 44 + struct bpf_offload_dev *offdev; /* NULL when bound-only */ 45 45 struct list_head progs; 46 46 struct list_head maps; 47 47 struct list_head offdev_netdevs; ··· 89 89 INIT_LIST_HEAD(&ondev->progs); 90 90 INIT_LIST_HEAD(&ondev->maps); 91 91 92 - down_write(&bpf_devs_lock); 93 92 err = rhashtable_insert_fast(&offdevs, &ondev->l, offdevs_params); 94 93 if (err) { 95 94 netdev_warn(netdev, "failed to register for BPF offload\n"); 96 - goto err_unlock_free; 95 + goto err_free; 97 96 } 98 97 99 - list_add(&ondev->offdev_netdevs, &offdev->netdevs); 100 - up_write(&bpf_devs_lock); 98 + if (offdev) 99 + list_add(&ondev->offdev_netdevs, &offdev->netdevs); 101 100 return 0; 102 101 103 - err_unlock_free: 104 - up_write(&bpf_devs_lock); 102 + err_free: 105 103 kfree(ondev); 106 104 return err; 107 105 } ··· 147 149 static void __bpf_offload_dev_netdev_unregister(struct bpf_offload_dev *offdev, 148 150 struct net_device *netdev) 149 151 { 150 - struct bpf_offload_netdev *ondev, *altdev; 152 + struct bpf_offload_netdev *ondev, *altdev = NULL; 151 153 struct bpf_offloaded_map *offmap, *mtmp; 152 154 struct bpf_prog_offload *offload, *ptmp; 153 155 154 156 ASSERT_RTNL(); 155 157 156 - down_write(&bpf_devs_lock); 157 158 ondev = rhashtable_lookup_fast(&offdevs, &netdev, offdevs_params); 158 159 if (WARN_ON(!ondev)) 159 - goto unlock; 160 + return; 160 161 161 162 WARN_ON(rhashtable_remove_fast(&offdevs, &ondev->l, offdevs_params)); 162 - list_del(&ondev->offdev_netdevs); 163 163 164 164 /* Try to move the objects to another netdev of the device */ 165 - altdev = list_first_entry_or_null(&offdev->netdevs, 166 - struct bpf_offload_netdev, 167 - offdev_netdevs); 165 + if (offdev) { 166 + list_del(&ondev->offdev_netdevs); 167 + altdev = list_first_entry_or_null(&offdev->netdevs, 168 + struct bpf_offload_netdev, 169 + offdev_netdevs); 170 + } 171 + 168 172 if (altdev) { 169 173 list_for_each_entry(offload, &ondev->progs, offloads) 170 174 offload->netdev = altdev->netdev; ··· 185 185 WARN_ON(!list_empty(&ondev->progs)); 186 186 WARN_ON(!list_empty(&ondev->maps)); 187 187 kfree(ondev); 188 - unlock: 189 - up_write(&bpf_devs_lock); 190 188 } 191 189 192 - int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr) 190 + int bpf_prog_dev_bound_init(struct bpf_prog *prog, union bpf_attr *attr) 193 191 { 194 192 struct bpf_offload_netdev *ondev; 195 193 struct bpf_prog_offload *offload; ··· 197 199 attr->prog_type != BPF_PROG_TYPE_XDP) 198 200 return -EINVAL; 199 201 200 - if (attr->prog_flags) 202 + if (attr->prog_flags & ~BPF_F_XDP_DEV_BOUND_ONLY) 203 + return -EINVAL; 204 + 205 + if (attr->prog_type == BPF_PROG_TYPE_SCHED_CLS && 206 + attr->prog_flags & BPF_F_XDP_DEV_BOUND_ONLY) 201 207 return -EINVAL; 202 208 203 209 offload = kzalloc(sizeof(*offload), GFP_USER); ··· 216 214 if (err) 217 215 goto err_maybe_put; 218 216 217 + prog->aux->offload_requested = !(attr->prog_flags & BPF_F_XDP_DEV_BOUND_ONLY); 218 + 219 219 down_write(&bpf_devs_lock); 220 220 ondev = bpf_offload_find_netdev(offload->netdev); 221 221 if (!ondev) { 222 - err = -EINVAL; 223 - goto err_unlock; 222 + if (bpf_prog_is_offloaded(prog->aux)) { 223 + err = -EINVAL; 224 + goto err_unlock; 225 + } 226 + 227 + /* When only binding to the device, explicitly 228 + * create an entry in the hashtable. 229 + */ 230 + err = __bpf_offload_dev_netdev_register(NULL, offload->netdev); 231 + if (err) 232 + goto err_unlock; 233 + ondev = bpf_offload_find_netdev(offload->netdev); 224 234 } 225 235 offload->offdev = ondev->offdev; 226 236 prog->aux->offload = offload; ··· 335 321 up_read(&bpf_devs_lock); 336 322 } 337 323 338 - void bpf_prog_offload_destroy(struct bpf_prog *prog) 324 + void bpf_prog_dev_bound_destroy(struct bpf_prog *prog) 339 325 { 326 + struct bpf_offload_netdev *ondev; 327 + struct net_device *netdev; 328 + 329 + rtnl_lock(); 340 330 down_write(&bpf_devs_lock); 341 - if (prog->aux->offload) 331 + if (prog->aux->offload) { 332 + list_del_init(&prog->aux->offload->offloads); 333 + 334 + netdev = prog->aux->offload->netdev; 342 335 __bpf_prog_offload_destroy(prog); 336 + 337 + ondev = bpf_offload_find_netdev(netdev); 338 + if (!ondev->offdev && list_empty(&ondev->progs)) 339 + __bpf_offload_dev_netdev_unregister(NULL, netdev); 340 + } 343 341 up_write(&bpf_devs_lock); 342 + rtnl_unlock(); 344 343 } 345 344 346 345 static int bpf_prog_offload_translate(struct bpf_prog *prog) ··· 648 621 struct bpf_offload_netdev *ondev1, *ondev2; 649 622 struct bpf_prog_offload *offload; 650 623 651 - if (!bpf_prog_is_offloaded(prog->aux)) 624 + if (!bpf_prog_is_dev_bound(prog->aux)) 652 625 return false; 653 626 654 627 offload = prog->aux->offload; ··· 694 667 int bpf_offload_dev_netdev_register(struct bpf_offload_dev *offdev, 695 668 struct net_device *netdev) 696 669 { 697 - return __bpf_offload_dev_netdev_register(offdev, netdev); 670 + int err; 671 + 672 + down_write(&bpf_devs_lock); 673 + err = __bpf_offload_dev_netdev_register(offdev, netdev); 674 + up_write(&bpf_devs_lock); 675 + return err; 698 676 } 699 677 EXPORT_SYMBOL_GPL(bpf_offload_dev_netdev_register); 700 678 701 679 void bpf_offload_dev_netdev_unregister(struct bpf_offload_dev *offdev, 702 680 struct net_device *netdev) 703 681 { 682 + down_write(&bpf_devs_lock); 704 683 __bpf_offload_dev_netdev_unregister(offdev, netdev); 684 + up_write(&bpf_devs_lock); 705 685 } 706 686 EXPORT_SYMBOL_GPL(bpf_offload_dev_netdev_unregister); 707 687 ··· 741 707 return offdev->priv; 742 708 } 743 709 EXPORT_SYMBOL_GPL(bpf_offload_dev_priv); 710 + 711 + void bpf_dev_bound_netdev_unregister(struct net_device *dev) 712 + { 713 + struct bpf_offload_netdev *ondev; 714 + 715 + ASSERT_RTNL(); 716 + 717 + down_write(&bpf_devs_lock); 718 + ondev = bpf_offload_find_netdev(dev); 719 + if (ondev && !ondev->offdev) 720 + __bpf_offload_dev_netdev_unregister(NULL, ondev->netdev); 721 + up_write(&bpf_devs_lock); 722 + } 744 723 745 724 static int __init bpf_offload_init(void) 746 725 {
+5 -4
kernel/bpf/syscall.c
··· 2491 2491 BPF_F_TEST_STATE_FREQ | 2492 2492 BPF_F_SLEEPABLE | 2493 2493 BPF_F_TEST_RND_HI32 | 2494 - BPF_F_XDP_HAS_FRAGS)) 2494 + BPF_F_XDP_HAS_FRAGS | 2495 + BPF_F_XDP_DEV_BOUND_ONLY)) 2495 2496 return -EINVAL; 2496 2497 2497 2498 if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && ··· 2576 2575 prog->aux->attach_btf = attach_btf; 2577 2576 prog->aux->attach_btf_id = attr->attach_btf_id; 2578 2577 prog->aux->dst_prog = dst_prog; 2579 - prog->aux->offload_requested = !!attr->prog_ifindex; 2578 + prog->aux->dev_bound = !!attr->prog_ifindex; 2580 2579 prog->aux->sleepable = attr->prog_flags & BPF_F_SLEEPABLE; 2581 2580 prog->aux->xdp_has_frags = attr->prog_flags & BPF_F_XDP_HAS_FRAGS; 2582 2581 ··· 2599 2598 atomic64_set(&prog->aux->refcnt, 1); 2600 2599 prog->gpl_compatible = is_gpl ? 1 : 0; 2601 2600 2602 - if (bpf_prog_is_offloaded(prog->aux)) { 2603 - err = bpf_prog_offload_init(prog, attr); 2601 + if (bpf_prog_is_dev_bound(prog->aux)) { 2602 + err = bpf_prog_dev_bound_init(prog, attr); 2604 2603 if (err) 2605 2604 goto free_prog_sec; 2606 2605 }
+5
net/core/dev.c
··· 9228 9228 NL_SET_ERR_MSG(extack, "Using offloaded program without HW_MODE flag is not supported"); 9229 9229 return -EINVAL; 9230 9230 } 9231 + if (bpf_prog_is_dev_bound(new_prog->aux) && !bpf_offload_dev_match(new_prog, dev)) { 9232 + NL_SET_ERR_MSG(extack, "Program bound to different device"); 9233 + return -EINVAL; 9234 + } 9231 9235 if (new_prog->expected_attach_type == BPF_XDP_DEVMAP) { 9232 9236 NL_SET_ERR_MSG(extack, "BPF_XDP_DEVMAP programs can not be attached to a device"); 9233 9237 return -EINVAL; ··· 10834 10830 dev_shutdown(dev); 10835 10831 10836 10832 dev_xdp_uninstall(dev); 10833 + bpf_dev_bound_netdev_unregister(dev); 10837 10834 10838 10835 netdev_offload_xstats_disable_all(dev); 10839 10836
+5
tools/include/uapi/linux/bpf.h
··· 1156 1156 */ 1157 1157 #define BPF_F_XDP_HAS_FRAGS (1U << 5) 1158 1158 1159 + /* If BPF_F_XDP_DEV_BOUND_ONLY is used in BPF_PROG_LOAD command, the loaded 1160 + * program becomes device-bound but can access XDP metadata. 1161 + */ 1162 + #define BPF_F_XDP_DEV_BOUND_ONLY (1U << 6) 1163 + 1159 1164 /* link_create.kprobe_multi.flags used in LINK_CREATE command for 1160 1165 * BPF_TRACE_KPROBE_MULTI attach type to create return probe. 1161 1166 */