Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Revert "Merge branch 'netkit-support-for-io_uring-zero-copy-and-af_xdp'"

This reverts commit 77b9c4a438fc66e2ab004c411056b3fb71a54f2c, reversing
changes made to 4515ec4ad58a37e70a9e1256c0b993958c9b7497:

931420a2fc36 ("selftests/net: Add netkit container tests")
ab771c938d9a ("selftests/net: Make NetDrvContEnv support queue leasing")
6be87fbb2776 ("selftests/net: Add env for container based tests")
61d99ce3dfc2 ("selftests/net: Add bpf skb forwarding program")
920da3634194 ("netkit: Add xsk support for af_xdp applications")
eef51113f8af ("netkit: Add netkit notifier to check for unregistering devices")
b5ef109d22d4 ("netkit: Implement rtnl_link_ops->alloc and ndo_queue_create")
b5c3fa4a0b16 ("netkit: Add single device mode for netkit")
0073d2fd679d ("xsk: Proxy pool management for leased queues")
1ecea95dd3b5 ("xsk: Extend xsk_rcv_check validation")
804bf334d08a ("net: Proxy netdev_queue_get_dma_dev for leased queues")
0caa9a8ddec3 ("net: Proxy net_mp_{open,close}_rxq for leased queues")
ff8889ff9107 ("net, ethtool: Disallow leased real rxqs to be resized")
9e2103f36110 ("net: Add lease info to queue-get response")
31127deddef4 ("net: Implement netdev_nl_queue_create_doit")
a5546e18f77c ("net: Add queue-create operation")

The series will conflict with io_uring work, and the code needs more
polish.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>

+117 -1233
-44
Documentation/netlink/specs/netdev.yaml
··· 339 339 doc: XSK information for this queue, if any. 340 340 type: nest 341 341 nested-attributes: xsk-info 342 - - 343 - name: lease 344 - doc: | 345 - A queue from a virtual device can have a lease which refers to 346 - another queue from a physical device. This is useful for memory 347 - providers and AF_XDP operations which take an ifindex and queue id 348 - to allow applications to bind against virtual devices in containers. 349 - type: nest 350 - nested-attributes: lease 351 342 - 352 343 name: qstats 353 344 doc: | ··· 538 547 - 539 548 name: type 540 549 - 541 - name: lease 542 - attributes: 543 - - 544 - name: ifindex 545 - doc: The netdev ifindex to lease the queue from. 546 - type: u32 547 - checks: 548 - min: 1 549 - - 550 - name: queue 551 - doc: The netdev queue to lease from. 552 - type: nest 553 - nested-attributes: queue-id 554 - - 555 - name: netns-id 556 - doc: The network namespace id of the netdev. 557 - type: s32 558 - - 559 550 name: dmabuf 560 551 attributes: 561 552 - ··· 686 713 - dmabuf 687 714 - io-uring 688 715 - xsk 689 - - lease 690 716 dump: 691 717 request: 692 718 attributes: ··· 795 823 - ifindex 796 824 - fd 797 825 reply: 798 - attributes: 799 - - id 800 - - 801 - name: queue-create 802 - doc: | 803 - Create a new queue for the given netdevice. Whether this operation 804 - is supported depends on the device and the driver. 805 - attribute-set: queue 806 - flags: [admin-perm] 807 - do: 808 - request: 809 - attributes: 810 - - ifindex 811 - - type 812 - - lease 813 - reply: &queue-create-op 814 826 attributes: 815 827 - id 816 828
+54 -306
drivers/net/netkit.c
··· 9 9 #include <linux/bpf_mprog.h> 10 10 #include <linux/indirect_call_wrapper.h> 11 11 12 - #include <net/netdev_lock.h> 13 - #include <net/netdev_queues.h> 14 - #include <net/netdev_rx_queue.h> 15 - #include <net/xdp_sock_drv.h> 16 12 #include <net/netkit.h> 17 13 #include <net/dst.h> 18 14 #include <net/tcx.h> 19 15 20 - #define NETKIT_DRV_NAME "netkit" 21 - 22 - #define NETKIT_NUM_RX_QUEUES_MAX 1024 23 - #define NETKIT_NUM_TX_QUEUES_MAX 1 24 - 25 - #define NETKIT_NUM_RX_QUEUES_REAL 1 26 - #define NETKIT_NUM_TX_QUEUES_REAL 1 16 + #define DRV_NAME "netkit" 27 17 28 18 struct netkit { 29 19 __cacheline_group_begin(netkit_fastpath); ··· 26 36 27 37 __cacheline_group_begin(netkit_slowpath); 28 38 enum netkit_mode mode; 29 - enum netkit_pairing pair; 30 39 bool primary; 31 40 u32 headroom; 32 41 __cacheline_group_end(netkit_slowpath); ··· 35 46 struct bpf_link link; 36 47 struct net_device *dev; 37 48 }; 38 - 39 - static struct rtnl_link_ops netkit_link_ops; 40 49 41 50 static __always_inline int 42 51 netkit_run(const struct bpf_mprog_entry *entry, struct sk_buff *skb, ··· 135 148 struct netkit *nk = netkit_priv(dev); 136 149 struct net_device *peer = rtnl_dereference(nk->peer); 137 150 138 - if (nk->pair == NETKIT_DEVICE_SINGLE) { 139 - netif_carrier_on(dev); 140 - return 0; 141 - } 142 151 if (!peer) 143 152 return -ENOTCONN; 144 153 if (peer->flags & IFF_UP) { ··· 219 236 stats->tx_dropped = DEV_STATS_READ(dev, tx_dropped); 220 237 } 221 238 222 - static bool netkit_xsk_supported_at_phys(const struct net_device *dev) 223 - { 224 - if (!dev->netdev_ops->ndo_bpf || 225 - !dev->netdev_ops->ndo_xdp_xmit || 226 - !dev->netdev_ops->ndo_xsk_wakeup) 227 - return false; 228 - if ((dev->xdp_features & NETDEV_XDP_ACT_XSK) != NETDEV_XDP_ACT_XSK) 229 - return false; 230 - return true; 231 - } 232 - 233 - static int netkit_xsk(struct net_device *dev, struct netdev_bpf *xdp) 234 - { 235 - struct netkit *nk = netkit_priv(dev); 236 - struct netdev_bpf xdp_lower; 237 - struct netdev_rx_queue *rxq; 238 - struct net_device *phys; 239 - int ret = -EBUSY; 240 - 241 - switch (xdp->command) { 242 - case XDP_SETUP_XSK_POOL: 243 - if (nk->pair == NETKIT_DEVICE_PAIR) 244 - return -EOPNOTSUPP; 245 - if (xdp->xsk.queue_id >= dev->real_num_rx_queues) 246 - return -EINVAL; 247 - 248 - rxq = __netif_get_rx_queue(dev, xdp->xsk.queue_id); 249 - if (!rxq->lease) 250 - return -EOPNOTSUPP; 251 - 252 - phys = rxq->lease->dev; 253 - if (!netkit_xsk_supported_at_phys(phys)) 254 - return -EOPNOTSUPP; 255 - 256 - memcpy(&xdp_lower, xdp, sizeof(xdp_lower)); 257 - xdp_lower.xsk.queue_id = get_netdev_rx_queue_index(rxq->lease); 258 - break; 259 - case XDP_SETUP_PROG: 260 - return -EPERM; 261 - default: 262 - return -EINVAL; 263 - } 264 - 265 - netdev_lock(phys); 266 - if (!dev_get_min_mp_channel_count(phys)) 267 - ret = phys->netdev_ops->ndo_bpf(phys, &xdp_lower); 268 - netdev_unlock(phys); 269 - return ret; 270 - } 271 - 272 - static int netkit_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags) 273 - { 274 - struct netdev_rx_queue *rxq; 275 - struct net_device *phys; 276 - 277 - if (queue_id >= dev->real_num_rx_queues) 278 - return -EINVAL; 279 - 280 - rxq = __netif_get_rx_queue(dev, queue_id); 281 - if (!rxq->lease) 282 - return -EOPNOTSUPP; 283 - 284 - phys = rxq->lease->dev; 285 - if (!netkit_xsk_supported_at_phys(phys)) 286 - return -EOPNOTSUPP; 287 - 288 - return phys->netdev_ops->ndo_xsk_wakeup(phys, 289 - get_netdev_rx_queue_index(rxq->lease), flags); 290 - } 291 - 292 - static int netkit_init(struct net_device *dev) 293 - { 294 - netdev_lockdep_set_classes(dev); 295 - return 0; 296 - } 297 - 298 239 static void netkit_uninit(struct net_device *dev); 299 240 300 241 static const struct net_device_ops netkit_netdev_ops = { 301 - .ndo_init = netkit_init, 302 242 .ndo_open = netkit_open, 303 243 .ndo_stop = netkit_close, 304 244 .ndo_start_xmit = netkit_xmit, ··· 232 326 .ndo_get_peer_dev = netkit_peer_dev, 233 327 .ndo_get_stats64 = netkit_get_stats, 234 328 .ndo_uninit = netkit_uninit, 235 - .ndo_bpf = netkit_xsk, 236 - .ndo_xsk_wakeup = netkit_xsk_wakeup, 237 329 .ndo_features_check = passthru_features_check, 238 330 }; 239 331 240 332 static void netkit_get_drvinfo(struct net_device *dev, 241 333 struct ethtool_drvinfo *info) 242 334 { 243 - strscpy(info->driver, NETKIT_DRV_NAME, sizeof(info->driver)); 335 + strscpy(info->driver, DRV_NAME, sizeof(info->driver)); 244 336 } 245 337 246 338 static const struct ethtool_ops netkit_ethtool_ops = { 247 339 .get_drvinfo = netkit_get_drvinfo, 248 340 }; 249 - 250 - static int netkit_queue_create(struct net_device *dev) 251 - { 252 - struct netkit *nk = netkit_priv(dev); 253 - u32 rxq_count_old, rxq_count_new; 254 - int err; 255 - 256 - rxq_count_old = dev->real_num_rx_queues; 257 - rxq_count_new = rxq_count_old + 1; 258 - 259 - /* Only allow to lease a queue in single device mode or to 260 - * lease against the peer device which then ends up in the 261 - * target netns. 262 - */ 263 - if (nk->pair == NETKIT_DEVICE_PAIR && nk->primary) 264 - return -EOPNOTSUPP; 265 - 266 - if (netif_running(dev)) 267 - netif_carrier_off(dev); 268 - err = netif_set_real_num_rx_queues(dev, rxq_count_new); 269 - if (netif_running(dev)) 270 - netif_carrier_on(dev); 271 - 272 - return err ? : rxq_count_old; 273 - } 274 - 275 - static const struct netdev_queue_mgmt_ops netkit_queue_mgmt_ops = { 276 - .ndo_queue_create = netkit_queue_create, 277 - }; 278 - 279 - static struct net_device *netkit_alloc(struct nlattr *tb[], 280 - const char *ifname, 281 - unsigned char name_assign_type, 282 - unsigned int num_tx_queues, 283 - unsigned int num_rx_queues) 284 - { 285 - const struct rtnl_link_ops *ops = &netkit_link_ops; 286 - struct net_device *dev; 287 - 288 - if (num_tx_queues > NETKIT_NUM_TX_QUEUES_MAX || 289 - num_rx_queues > NETKIT_NUM_RX_QUEUES_MAX) 290 - return ERR_PTR(-EOPNOTSUPP); 291 - 292 - dev = alloc_netdev_mqs(ops->priv_size, ifname, 293 - name_assign_type, ops->setup, 294 - num_tx_queues, num_rx_queues); 295 - if (dev) { 296 - dev->real_num_tx_queues = NETKIT_NUM_TX_QUEUES_REAL; 297 - dev->real_num_rx_queues = NETKIT_NUM_RX_QUEUES_REAL; 298 - } 299 - return dev; 300 - } 301 - 302 - static void netkit_queue_unlease(struct net_device *dev) 303 - { 304 - struct netdev_rx_queue *rxq, *rxq_lease; 305 - struct net_device *dev_lease; 306 - int i; 307 - 308 - if (dev->real_num_rx_queues == 1) 309 - return; 310 - 311 - netdev_lock(dev); 312 - for (i = 1; i < dev->real_num_rx_queues; i++) { 313 - rxq = __netif_get_rx_queue(dev, i); 314 - rxq_lease = rxq->lease; 315 - dev_lease = rxq_lease->dev; 316 - 317 - netdev_lock(dev_lease); 318 - netdev_rx_queue_unlease(rxq, rxq_lease); 319 - netdev_unlock(dev_lease); 320 - } 321 - netdev_unlock(dev); 322 - } 323 341 324 342 static void netkit_setup(struct net_device *dev) 325 343 { ··· 275 445 dev->priv_flags |= IFF_DISABLE_NETPOLL; 276 446 dev->lltx = true; 277 447 278 - dev->netdev_ops = &netkit_netdev_ops; 279 - dev->ethtool_ops = &netkit_ethtool_ops; 280 - dev->queue_mgmt_ops = &netkit_queue_mgmt_ops; 448 + dev->ethtool_ops = &netkit_ethtool_ops; 449 + dev->netdev_ops = &netkit_netdev_ops; 281 450 282 451 dev->features |= netkit_features; 283 452 dev->hw_features = netkit_features; 284 453 dev->hw_enc_features = netkit_features; 285 454 dev->mpls_features = NETIF_F_HW_CSUM | NETIF_F_GSO_SOFTWARE; 286 455 dev->vlan_features = dev->features & ~netkit_features_hw_vlan; 456 + 287 457 dev->needs_free_netdev = true; 288 458 289 459 netif_set_tso_max_size(dev, GSO_MAX_SIZE); 290 - 291 - xdp_set_features_flag(dev, NETDEV_XDP_ACT_XSK); 292 460 } 293 461 294 462 static struct net *netkit_get_link_net(const struct net_device *dev) ··· 325 497 return 0; 326 498 } 327 499 500 + static struct rtnl_link_ops netkit_link_ops; 501 + 328 502 static int netkit_new_link(struct net_device *dev, 329 503 struct rtnl_newlink_params *params, 330 504 struct netlink_ext_ack *extack) ··· 335 505 enum netkit_scrub scrub_prim = NETKIT_SCRUB_DEFAULT; 336 506 enum netkit_scrub scrub_peer = NETKIT_SCRUB_DEFAULT; 337 507 struct nlattr *peer_tb[IFLA_MAX + 1], **tbp, *attr; 338 - enum netkit_pairing pair = NETKIT_DEVICE_PAIR; 339 508 enum netkit_action policy_prim = NETKIT_PASS; 340 509 enum netkit_action policy_peer = NETKIT_PASS; 341 510 struct nlattr **data = params->data; ··· 343 514 struct nlattr **tb = params->tb; 344 515 u16 headroom = 0, tailroom = 0; 345 516 struct ifinfomsg *ifmp = NULL; 346 - struct net_device *peer = NULL; 347 - bool seen_peer = false; 517 + struct net_device *peer; 348 518 char ifname[IFNAMSIZ]; 349 519 struct netkit *nk; 350 520 int err; ··· 380 552 headroom = nla_get_u16(data[IFLA_NETKIT_HEADROOM]); 381 553 if (data[IFLA_NETKIT_TAILROOM]) 382 554 tailroom = nla_get_u16(data[IFLA_NETKIT_TAILROOM]); 383 - if (data[IFLA_NETKIT_PAIRING]) 384 - pair = nla_get_u32(data[IFLA_NETKIT_PAIRING]); 385 - 386 - seen_peer = data[IFLA_NETKIT_PEER_INFO] || 387 - data[IFLA_NETKIT_PEER_SCRUB] || 388 - data[IFLA_NETKIT_PEER_POLICY]; 389 555 } 390 556 391 557 if (ifmp && tbp[IFLA_IFNAME]) { ··· 392 570 if (mode != NETKIT_L2 && 393 571 (tb[IFLA_ADDRESS] || tbp[IFLA_ADDRESS])) 394 572 return -EOPNOTSUPP; 395 - if (pair == NETKIT_DEVICE_SINGLE && 396 - (tb != tbp || seen_peer || policy_prim != NETKIT_PASS)) 397 - return -EOPNOTSUPP; 398 573 399 - if (pair == NETKIT_DEVICE_PAIR) { 400 - peer = rtnl_create_link(peer_net, ifname, ifname_assign_type, 401 - &netkit_link_ops, tbp, extack); 402 - if (IS_ERR(peer)) 403 - return PTR_ERR(peer); 574 + peer = rtnl_create_link(peer_net, ifname, ifname_assign_type, 575 + &netkit_link_ops, tbp, extack); 576 + if (IS_ERR(peer)) 577 + return PTR_ERR(peer); 404 578 405 - netif_inherit_tso_max(peer, dev); 406 - if (headroom) 407 - peer->needed_headroom = headroom; 408 - if (tailroom) 409 - peer->needed_tailroom = tailroom; 410 - if (mode == NETKIT_L2 && !(ifmp && tbp[IFLA_ADDRESS])) 411 - eth_hw_addr_random(peer); 412 - if (ifmp && dev->ifindex) 413 - peer->ifindex = ifmp->ifi_index; 414 - 415 - nk = netkit_priv(peer); 416 - nk->primary = false; 417 - nk->policy = policy_peer; 418 - nk->scrub = scrub_peer; 419 - nk->mode = mode; 420 - nk->pair = pair; 421 - nk->headroom = headroom; 422 - bpf_mprog_bundle_init(&nk->bundle); 423 - 424 - err = register_netdevice(peer); 425 - if (err < 0) 426 - goto err_register_peer; 427 - netif_carrier_off(peer); 428 - if (mode == NETKIT_L2) 429 - dev_change_flags(peer, peer->flags & ~IFF_NOARP, NULL); 430 - 431 - err = rtnl_configure_link(peer, NULL, 0, NULL); 432 - if (err < 0) 433 - goto err_configure_peer; 579 + netif_inherit_tso_max(peer, dev); 580 + if (headroom) { 581 + peer->needed_headroom = headroom; 582 + dev->needed_headroom = headroom; 434 583 } 584 + if (tailroom) { 585 + peer->needed_tailroom = tailroom; 586 + dev->needed_tailroom = tailroom; 587 + } 588 + 589 + if (mode == NETKIT_L2 && !(ifmp && tbp[IFLA_ADDRESS])) 590 + eth_hw_addr_random(peer); 591 + if (ifmp && dev->ifindex) 592 + peer->ifindex = ifmp->ifi_index; 593 + 594 + nk = netkit_priv(peer); 595 + nk->primary = false; 596 + nk->policy = policy_peer; 597 + nk->scrub = scrub_peer; 598 + nk->mode = mode; 599 + nk->headroom = headroom; 600 + bpf_mprog_bundle_init(&nk->bundle); 601 + 602 + err = register_netdevice(peer); 603 + if (err < 0) 604 + goto err_register_peer; 605 + netif_carrier_off(peer); 606 + if (mode == NETKIT_L2) 607 + dev_change_flags(peer, peer->flags & ~IFF_NOARP, NULL); 608 + 609 + err = rtnl_configure_link(peer, NULL, 0, NULL); 610 + if (err < 0) 611 + goto err_configure_peer; 435 612 436 613 if (mode == NETKIT_L2 && !tb[IFLA_ADDRESS]) 437 614 eth_hw_addr_random(dev); ··· 438 617 nla_strscpy(dev->name, tb[IFLA_IFNAME], IFNAMSIZ); 439 618 else 440 619 strscpy(dev->name, "nk%d", IFNAMSIZ); 441 - if (headroom) 442 - dev->needed_headroom = headroom; 443 - if (tailroom) 444 - dev->needed_tailroom = tailroom; 445 620 446 621 nk = netkit_priv(dev); 447 622 nk->primary = true; 448 623 nk->policy = policy_prim; 449 624 nk->scrub = scrub_prim; 450 625 nk->mode = mode; 451 - nk->pair = pair; 452 626 nk->headroom = headroom; 453 627 bpf_mprog_bundle_init(&nk->bundle); 454 628 ··· 455 639 dev_change_flags(dev, dev->flags & ~IFF_NOARP, NULL); 456 640 457 641 rcu_assign_pointer(netkit_priv(dev)->peer, peer); 458 - if (peer) 459 - rcu_assign_pointer(netkit_priv(peer)->peer, dev); 642 + rcu_assign_pointer(netkit_priv(peer)->peer, dev); 460 643 return 0; 461 644 err_configure_peer: 462 - if (peer) 463 - unregister_netdevice(peer); 645 + unregister_netdevice(peer); 464 646 return err; 465 647 err_register_peer: 466 648 free_netdev(peer); ··· 518 704 nk = netkit_priv(dev); 519 705 if (!nk->primary) 520 706 return ERR_PTR(-EACCES); 521 - if (nk->pair == NETKIT_DEVICE_SINGLE) 522 - return ERR_PTR(-EOPNOTSUPP); 523 707 if (which == BPF_NETKIT_PEER) { 524 708 dev = rcu_dereference_rtnl(nk->peer); 525 709 if (!dev) ··· 844 1032 static void netkit_uninit(struct net_device *dev) 845 1033 { 846 1034 netkit_release_all(dev); 847 - netkit_queue_unlease(dev); 848 1035 } 849 1036 850 1037 static void netkit_del_link(struct net_device *dev, struct list_head *head) ··· 879 1068 { IFLA_NETKIT_PEER_INFO, "peer info" }, 880 1069 { IFLA_NETKIT_HEADROOM, "headroom" }, 881 1070 { IFLA_NETKIT_TAILROOM, "tailroom" }, 882 - { IFLA_NETKIT_PAIRING, "pairing" }, 883 1071 }; 884 1072 885 1073 if (!nk->primary) { ··· 898 1088 } 899 1089 900 1090 if (data[IFLA_NETKIT_POLICY]) { 901 - err = -EOPNOTSUPP; 902 1091 attr = data[IFLA_NETKIT_POLICY]; 903 1092 policy = nla_get_u32(attr); 904 - if (nk->pair == NETKIT_DEVICE_PAIR) 905 - err = netkit_check_policy(policy, attr, extack); 1093 + err = netkit_check_policy(policy, attr, extack); 906 1094 if (err) 907 1095 return err; 908 1096 WRITE_ONCE(nk->policy, policy); ··· 921 1113 return 0; 922 1114 } 923 1115 924 - static void netkit_check_lease_unregister(struct net_device *dev) 925 - { 926 - LIST_HEAD(list_kill); 927 - u32 q_idx; 928 - 929 - if (READ_ONCE(dev->reg_state) != NETREG_UNREGISTERING || 930 - !dev->dev.parent) 931 - return; 932 - 933 - netdev_lock_ops(dev); 934 - for (q_idx = 0; q_idx < dev->real_num_rx_queues; q_idx++) { 935 - struct net_device *tmp = dev; 936 - u32 tmp_q_idx = q_idx; 937 - 938 - if (netif_rx_queue_lease_get_owner(&tmp, &tmp_q_idx)) { 939 - if (tmp->netdev_ops != &netkit_netdev_ops) 940 - continue; 941 - /* A single phys device can have multiple queues leased 942 - * to one netkit device. We can only queue that netkit 943 - * device once to the list_kill. Queues of that phys 944 - * device can be leased with different individual netkit 945 - * devices, hence we batch via list_kill. 946 - */ 947 - if (unregister_netdevice_queued(tmp)) 948 - continue; 949 - netkit_del_link(tmp, &list_kill); 950 - } 951 - } 952 - netdev_unlock_ops(dev); 953 - unregister_netdevice_many(&list_kill); 954 - } 955 - 956 - static int netkit_notifier(struct notifier_block *this, 957 - unsigned long event, void *ptr) 958 - { 959 - struct net_device *dev = netdev_notifier_info_to_dev(ptr); 960 - 961 - if (event == NETDEV_UNREGISTER) 962 - netkit_check_lease_unregister(dev); 963 - return NOTIFY_DONE; 964 - } 965 - 966 1116 static size_t netkit_get_size(const struct net_device *dev) 967 1117 { 968 1118 return nla_total_size(sizeof(u32)) + /* IFLA_NETKIT_POLICY */ ··· 931 1165 nla_total_size(sizeof(u8)) + /* IFLA_NETKIT_PRIMARY */ 932 1166 nla_total_size(sizeof(u16)) + /* IFLA_NETKIT_HEADROOM */ 933 1167 nla_total_size(sizeof(u16)) + /* IFLA_NETKIT_TAILROOM */ 934 - nla_total_size(sizeof(u32)) + /* IFLA_NETKIT_PAIRING */ 935 1168 0; 936 1169 } 937 1170 ··· 950 1185 if (nla_put_u16(skb, IFLA_NETKIT_HEADROOM, dev->needed_headroom)) 951 1186 return -EMSGSIZE; 952 1187 if (nla_put_u16(skb, IFLA_NETKIT_TAILROOM, dev->needed_tailroom)) 953 - return -EMSGSIZE; 954 - if (nla_put_u32(skb, IFLA_NETKIT_PAIRING, nk->pair)) 955 1188 return -EMSGSIZE; 956 1189 957 1190 if (peer) { ··· 972 1209 [IFLA_NETKIT_TAILROOM] = { .type = NLA_U16 }, 973 1210 [IFLA_NETKIT_SCRUB] = NLA_POLICY_MAX(NLA_U32, NETKIT_SCRUB_DEFAULT), 974 1211 [IFLA_NETKIT_PEER_SCRUB] = NLA_POLICY_MAX(NLA_U32, NETKIT_SCRUB_DEFAULT), 975 - [IFLA_NETKIT_PAIRING] = NLA_POLICY_MAX(NLA_U32, NETKIT_DEVICE_SINGLE), 976 1212 [IFLA_NETKIT_PRIMARY] = { .type = NLA_REJECT, 977 1213 .reject_message = "Primary attribute is read-only" }, 978 1214 }; 979 1215 980 1216 static struct rtnl_link_ops netkit_link_ops = { 981 - .kind = NETKIT_DRV_NAME, 1217 + .kind = DRV_NAME, 982 1218 .priv_size = sizeof(struct netkit), 983 - .alloc = netkit_alloc, 984 1219 .setup = netkit_setup, 985 1220 .newlink = netkit_new_link, 986 1221 .dellink = netkit_del_link, ··· 992 1231 .maxtype = IFLA_NETKIT_MAX, 993 1232 }; 994 1233 995 - static struct notifier_block netkit_netdev_notifier = { 996 - .notifier_call = netkit_notifier, 997 - }; 998 - 999 - static __init int netkit_mod_init(void) 1234 + static __init int netkit_init(void) 1000 1235 { 1001 - int ret; 1002 - 1003 1236 BUILD_BUG_ON((int)NETKIT_NEXT != (int)TCX_NEXT || 1004 1237 (int)NETKIT_PASS != (int)TCX_PASS || 1005 1238 (int)NETKIT_DROP != (int)TCX_DROP || 1006 1239 (int)NETKIT_REDIRECT != (int)TCX_REDIRECT); 1007 1240 1008 - ret = rtnl_link_register(&netkit_link_ops); 1009 - if (ret) 1010 - return ret; 1011 - ret = register_netdevice_notifier(&netkit_netdev_notifier); 1012 - if (ret) 1013 - rtnl_link_unregister(&netkit_link_ops); 1014 - return ret; 1241 + return rtnl_link_register(&netkit_link_ops); 1015 1242 } 1016 1243 1017 - static __exit void netkit_mod_exit(void) 1244 + static __exit void netkit_exit(void) 1018 1245 { 1019 - unregister_netdevice_notifier(&netkit_netdev_notifier); 1020 1246 rtnl_link_unregister(&netkit_link_ops); 1021 1247 } 1022 1248 1023 - module_init(netkit_mod_init); 1024 - module_exit(netkit_mod_exit); 1249 + module_init(netkit_init); 1250 + module_exit(netkit_exit); 1025 1251 1026 1252 MODULE_DESCRIPTION("BPF-programmable network device"); 1027 1253 MODULE_AUTHOR("Daniel Borkmann <daniel@iogearbox.net>"); 1028 1254 MODULE_AUTHOR("Nikolay Aleksandrov <razor@blackwall.org>"); 1029 1255 MODULE_LICENSE("GPL"); 1030 - MODULE_ALIAS_RTNL_LINK(NETKIT_DRV_NAME); 1256 + MODULE_ALIAS_RTNL_LINK(DRV_NAME);
-6
include/linux/netdevice.h
··· 3400 3400 int register_netdevice(struct net_device *dev); 3401 3401 void unregister_netdevice_queue(struct net_device *dev, struct list_head *head); 3402 3402 void unregister_netdevice_many(struct list_head *head); 3403 - 3404 3403 static inline void unregister_netdevice(struct net_device *dev) 3405 3404 { 3406 3405 unregister_netdevice_queue(dev, NULL); 3407 - } 3408 - 3409 - static inline bool unregister_netdevice_queued(const struct net_device *dev) 3410 - { 3411 - return !list_empty(&dev->unreg_list); 3412 3406 } 3413 3407 3414 3408 int netdev_refcnt_read(const struct net_device *dev);
+3 -16
include/net/netdev_queues.h
··· 130 130 * @ndo_queue_get_dma_dev: Get dma device for zero-copy operations to be used 131 131 * for this queue. Return NULL on error. 132 132 * 133 - * @ndo_queue_create: Create a new RX queue which can be leased to another queue. 134 - * Ops on this queue are redirected to the leased queue e.g. 135 - * when opening a memory provider. Return the new queue id on 136 - * success. Return negative error code on failure. 137 - * 138 133 * Note that @ndo_queue_mem_alloc and @ndo_queue_mem_free may be called while 139 134 * the interface is closed. @ndo_queue_start and @ndo_queue_stop will only 140 135 * be called for an interface which is open. ··· 149 154 int idx); 150 155 struct device * (*ndo_queue_get_dma_dev)(struct net_device *dev, 151 156 int idx); 152 - int (*ndo_queue_create)(struct net_device *dev); 153 157 }; 154 158 155 - bool netif_rxq_has_unreadable_mp(struct net_device *dev, unsigned int rxq_idx); 156 - bool netif_rxq_has_mp(struct net_device *dev, unsigned int rxq_idx); 157 - bool netif_rxq_is_leased(struct net_device *dev, unsigned int rxq_idx); 159 + bool netif_rxq_has_unreadable_mp(struct net_device *dev, int idx); 158 160 159 161 /** 160 162 * DOC: Lockless queue stopping / waking helpers. ··· 340 348 }) 341 349 342 350 struct device *netdev_queue_get_dma_dev(struct net_device *dev, int idx); 343 - bool netdev_can_create_queue(const struct net_device *dev, 344 - struct netlink_ext_ack *extack); 345 - bool netdev_can_lease_queue(const struct net_device *dev, 346 - struct netlink_ext_ack *extack); 347 - bool netdev_queue_busy(struct net_device *dev, int idx, 348 - struct netlink_ext_ack *extack); 349 - #endif /* _LINUX_NET_QUEUES_H */ 351 + 352 + #endif
+1 -20
include/net/netdev_rx_queue.h
··· 28 28 #endif 29 29 struct napi_struct *napi; 30 30 struct pp_memory_provider_params mp_params; 31 - struct netdev_rx_queue *lease; 32 - netdevice_tracker lease_tracker; 33 31 } ____cacheline_aligned_in_smp; 34 32 35 33 /* ··· 57 59 } 58 60 59 61 int netdev_rx_queue_restart(struct net_device *dev, unsigned int rxq); 60 - void netdev_rx_queue_lease(struct netdev_rx_queue *rxq_dst, 61 - struct netdev_rx_queue *rxq_src); 62 - void netdev_rx_queue_unlease(struct netdev_rx_queue *rxq_dst, 63 - struct netdev_rx_queue *rxq_src); 64 - bool netif_rx_queue_lease_get_owner(struct net_device **dev, unsigned int *rxq); 65 62 66 - enum netif_lease_dir { 67 - NETIF_VIRT_TO_PHYS, 68 - NETIF_PHYS_TO_VIRT, 69 - }; 70 - 71 - struct netdev_rx_queue * 72 - __netif_get_rx_queue_lease(struct net_device **dev, unsigned int *rxq, 73 - enum netif_lease_dir dir); 74 - struct netdev_rx_queue * 75 - netif_get_rx_queue_lease_locked(struct net_device **dev, unsigned int *rxq); 76 - void netif_put_rx_queue_lease_locked(struct net_device *orig_dev, 77 - struct net_device *dev); 78 - #endif /* _LINUX_NETDEV_RX_QUEUE_H */ 63 + #endif
+2 -2
include/net/page_pool/memory_provider.h
··· 23 23 void net_mp_niov_set_page_pool(struct page_pool *pool, struct net_iov *niov); 24 24 void net_mp_niov_clear_page_pool(struct net_iov *niov); 25 25 26 - int net_mp_open_rxq(struct net_device *dev, unsigned int rxq_idx, 26 + int net_mp_open_rxq(struct net_device *dev, unsigned ifq_idx, 27 27 struct pp_memory_provider_params *p); 28 28 int __net_mp_open_rxq(struct net_device *dev, unsigned int rxq_idx, 29 29 const struct pp_memory_provider_params *p, 30 30 struct netlink_ext_ack *extack); 31 - void net_mp_close_rxq(struct net_device *dev, unsigned int rxq_idx, 31 + void net_mp_close_rxq(struct net_device *dev, unsigned ifq_idx, 32 32 struct pp_memory_provider_params *old_p); 33 33 void __net_mp_close_rxq(struct net_device *dev, unsigned int rxq_idx, 34 34 const struct pp_memory_provider_params *old_p);
+1 -1
include/net/xdp_sock_drv.h
··· 28 28 bool xsk_tx_peek_desc(struct xsk_buff_pool *pool, struct xdp_desc *desc); 29 29 u32 xsk_tx_peek_release_desc_batch(struct xsk_buff_pool *pool, u32 max); 30 30 void xsk_tx_release(struct xsk_buff_pool *pool); 31 - struct xsk_buff_pool *xsk_get_pool_from_qid(const struct net_device *dev, 31 + struct xsk_buff_pool *xsk_get_pool_from_qid(struct net_device *dev, 32 32 u16 queue_id); 33 33 void xsk_set_rx_need_wakeup(struct xsk_buff_pool *pool); 34 34 void xsk_set_tx_need_wakeup(struct xsk_buff_pool *pool);
-6
include/uapi/linux/if_link.h
··· 1296 1296 NETKIT_L3, 1297 1297 }; 1298 1298 1299 - enum netkit_pairing { 1300 - NETKIT_DEVICE_PAIR, 1301 - NETKIT_DEVICE_SINGLE, 1302 - }; 1303 - 1304 1299 /* NETKIT_SCRUB_NONE leaves clearing skb->{mark,priority} up to 1305 1300 * the BPF program if attached. This also means the latter can 1306 1301 * consume the two fields if they were populated earlier. ··· 1320 1325 IFLA_NETKIT_PEER_SCRUB, 1321 1326 IFLA_NETKIT_HEADROOM, 1322 1327 IFLA_NETKIT_TAILROOM, 1323 - IFLA_NETKIT_PAIRING, 1324 1328 __IFLA_NETKIT_MAX, 1325 1329 }; 1326 1330 #define IFLA_NETKIT_MAX (__IFLA_NETKIT_MAX - 1)
-11
include/uapi/linux/netdev.h
··· 160 160 NETDEV_A_QUEUE_DMABUF, 161 161 NETDEV_A_QUEUE_IO_URING, 162 162 NETDEV_A_QUEUE_XSK, 163 - NETDEV_A_QUEUE_LEASE, 164 163 165 164 __NETDEV_A_QUEUE_MAX, 166 165 NETDEV_A_QUEUE_MAX = (__NETDEV_A_QUEUE_MAX - 1) ··· 203 204 }; 204 205 205 206 enum { 206 - NETDEV_A_LEASE_IFINDEX = 1, 207 - NETDEV_A_LEASE_QUEUE, 208 - NETDEV_A_LEASE_NETNS_ID, 209 - 210 - __NETDEV_A_LEASE_MAX, 211 - NETDEV_A_LEASE_MAX = (__NETDEV_A_LEASE_MAX - 1) 212 - }; 213 - 214 - enum { 215 207 NETDEV_A_DMABUF_IFINDEX = 1, 216 208 NETDEV_A_DMABUF_QUEUES, 217 209 NETDEV_A_DMABUF_FD, ··· 228 238 NETDEV_CMD_BIND_RX, 229 239 NETDEV_CMD_NAPI_SET, 230 240 NETDEV_CMD_BIND_TX, 231 - NETDEV_CMD_QUEUE_CREATE, 232 241 233 242 __NETDEV_CMD_MAX, 234 243 NETDEV_CMD_MAX = (__NETDEV_CMD_MAX - 1)
-7
net/core/dev.c
··· 1115 1115 } 1116 1116 1117 1117 struct net_device * 1118 - netdev_put_lock(struct net_device *dev, netdevice_tracker *tracker) 1119 - { 1120 - netdev_tracker_free(dev, tracker); 1121 - return __netdev_put_lock(dev, dev_net(dev)); 1122 - } 1123 - 1124 - struct net_device * 1125 1118 netdev_xa_find_lock(struct net *net, struct net_device *dev, 1126 1119 unsigned long *index) 1127 1120 {
-2
net/core/dev.h
··· 30 30 struct net_device *dev_get_by_napi_id(unsigned int napi_id); 31 31 32 32 struct net_device *__netdev_put_lock(struct net_device *dev, struct net *net); 33 - struct net_device *netdev_put_lock(struct net_device *dev, 34 - netdevice_tracker *tracker); 35 33 struct net_device * 36 34 netdev_xa_find_lock(struct net *net, struct net_device *dev, 37 35 unsigned long *index);
-20
net/core/netdev-genl-gen.c
··· 28 28 }; 29 29 30 30 /* Common nested types */ 31 - const struct nla_policy netdev_lease_nl_policy[NETDEV_A_LEASE_NETNS_ID + 1] = { 32 - [NETDEV_A_LEASE_IFINDEX] = NLA_POLICY_MIN(NLA_U32, 1), 33 - [NETDEV_A_LEASE_QUEUE] = NLA_POLICY_NESTED(netdev_queue_id_nl_policy), 34 - [NETDEV_A_LEASE_NETNS_ID] = { .type = NLA_S32, }, 35 - }; 36 - 37 31 const struct nla_policy netdev_page_pool_info_nl_policy[NETDEV_A_PAGE_POOL_IFINDEX + 1] = { 38 32 [NETDEV_A_PAGE_POOL_ID] = NLA_POLICY_FULL_RANGE(NLA_UINT, &netdev_a_page_pool_id_range), 39 33 [NETDEV_A_PAGE_POOL_IFINDEX] = NLA_POLICY_FULL_RANGE(NLA_U32, &netdev_a_page_pool_ifindex_range), ··· 105 111 static const struct nla_policy netdev_bind_tx_nl_policy[NETDEV_A_DMABUF_FD + 1] = { 106 112 [NETDEV_A_DMABUF_IFINDEX] = NLA_POLICY_MIN(NLA_U32, 1), 107 113 [NETDEV_A_DMABUF_FD] = { .type = NLA_U32, }, 108 - }; 109 - 110 - /* NETDEV_CMD_QUEUE_CREATE - do */ 111 - static const struct nla_policy netdev_queue_create_nl_policy[NETDEV_A_QUEUE_LEASE + 1] = { 112 - [NETDEV_A_QUEUE_IFINDEX] = NLA_POLICY_MIN(NLA_U32, 1), 113 - [NETDEV_A_QUEUE_TYPE] = NLA_POLICY_MAX(NLA_U32, 1), 114 - [NETDEV_A_QUEUE_LEASE] = NLA_POLICY_NESTED(netdev_lease_nl_policy), 115 114 }; 116 115 117 116 /* Ops table for netdev */ ··· 204 217 .policy = netdev_bind_tx_nl_policy, 205 218 .maxattr = NETDEV_A_DMABUF_FD, 206 219 .flags = GENL_CMD_CAP_DO, 207 - }, 208 - { 209 - .cmd = NETDEV_CMD_QUEUE_CREATE, 210 - .doit = netdev_nl_queue_create_doit, 211 - .policy = netdev_queue_create_nl_policy, 212 - .maxattr = NETDEV_A_QUEUE_LEASE, 213 - .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, 214 220 }, 215 221 }; 216 222
-2
net/core/netdev-genl-gen.h
··· 14 14 #include <net/netdev_netlink.h> 15 15 16 16 /* Common nested types */ 17 - extern const struct nla_policy netdev_lease_nl_policy[NETDEV_A_LEASE_NETNS_ID + 1]; 18 17 extern const struct nla_policy netdev_page_pool_info_nl_policy[NETDEV_A_PAGE_POOL_IFINDEX + 1]; 19 18 extern const struct nla_policy netdev_queue_id_nl_policy[NETDEV_A_QUEUE_TYPE + 1]; 20 19 ··· 36 37 int netdev_nl_bind_rx_doit(struct sk_buff *skb, struct genl_info *info); 37 38 int netdev_nl_napi_set_doit(struct sk_buff *skb, struct genl_info *info); 38 39 int netdev_nl_bind_tx_doit(struct sk_buff *skb, struct genl_info *info); 39 - int netdev_nl_queue_create_doit(struct sk_buff *skb, struct genl_info *info); 40 40 41 41 enum { 42 42 NETDEV_NLGRP_MGMT,
-185
net/core/netdev-genl.c
··· 391 391 u32 q_idx, u32 q_type, const struct genl_info *info) 392 392 { 393 393 struct pp_memory_provider_params *params; 394 - struct net_device *orig_netdev = netdev; 395 - struct nlattr *nest_lease, *nest_queue; 396 394 struct netdev_rx_queue *rxq; 397 395 struct netdev_queue *txq; 398 - u32 lease_q_idx = q_idx; 399 396 void *hdr; 400 397 401 398 hdr = genlmsg_iput(rsp, info); ··· 409 412 rxq = __netif_get_rx_queue(netdev, q_idx); 410 413 if (nla_put_napi_id(rsp, rxq->napi)) 411 414 goto nla_put_failure; 412 - 413 - if (netif_rx_queue_lease_get_owner(&netdev, &lease_q_idx)) { 414 - struct net *net, *peer_net; 415 - 416 - nest_lease = nla_nest_start(rsp, NETDEV_A_QUEUE_LEASE); 417 - if (!nest_lease) 418 - goto nla_put_failure; 419 - nest_queue = nla_nest_start(rsp, NETDEV_A_LEASE_QUEUE); 420 - if (!nest_queue) 421 - goto nla_put_failure; 422 - if (nla_put_u32(rsp, NETDEV_A_QUEUE_ID, lease_q_idx)) 423 - goto nla_put_failure; 424 - if (nla_put_u32(rsp, NETDEV_A_QUEUE_TYPE, q_type)) 425 - goto nla_put_failure; 426 - nla_nest_end(rsp, nest_queue); 427 - if (nla_put_u32(rsp, NETDEV_A_LEASE_IFINDEX, 428 - READ_ONCE(netdev->ifindex))) 429 - goto nla_put_failure; 430 - rcu_read_lock(); 431 - peer_net = dev_net_rcu(netdev); 432 - net = dev_net_rcu(orig_netdev); 433 - if (!net_eq(net, peer_net)) { 434 - s32 id = peernet2id_alloc(net, peer_net, GFP_ATOMIC); 435 - 436 - if (nla_put_s32(rsp, NETDEV_A_LEASE_NETNS_ID, id)) 437 - goto nla_put_failure_unlock; 438 - } 439 - rcu_read_unlock(); 440 - nla_nest_end(rsp, nest_lease); 441 - netdev = orig_netdev; 442 - } 443 415 444 416 params = &rxq->mp_params; 445 417 if (params->mp_ops && ··· 437 471 438 472 return 0; 439 473 440 - nla_put_failure_unlock: 441 - rcu_read_unlock(); 442 474 nla_put_failure: 443 475 genlmsg_cancel(rsp, hdr); 444 476 return -EMSGSIZE; ··· 1115 1151 netdev_unlock(netdev); 1116 1152 err_unlock_sock: 1117 1153 mutex_unlock(&priv->lock); 1118 - err_genlmsg_free: 1119 - nlmsg_free(rsp); 1120 - return err; 1121 - } 1122 - 1123 - int netdev_nl_queue_create_doit(struct sk_buff *skb, struct genl_info *info) 1124 - { 1125 - const int qmaxtype = ARRAY_SIZE(netdev_queue_id_nl_policy) - 1; 1126 - const int lmaxtype = ARRAY_SIZE(netdev_lease_nl_policy) - 1; 1127 - int err, ifindex, ifindex_lease, queue_id, queue_id_lease; 1128 - struct nlattr *qtb[ARRAY_SIZE(netdev_queue_id_nl_policy)]; 1129 - struct nlattr *ltb[ARRAY_SIZE(netdev_lease_nl_policy)]; 1130 - struct netdev_rx_queue *rxq, *rxq_lease; 1131 - struct net_device *dev, *dev_lease; 1132 - netdevice_tracker dev_tracker; 1133 - struct nlattr *nest; 1134 - struct sk_buff *rsp; 1135 - void *hdr; 1136 - 1137 - if (GENL_REQ_ATTR_CHECK(info, NETDEV_A_QUEUE_IFINDEX) || 1138 - GENL_REQ_ATTR_CHECK(info, NETDEV_A_QUEUE_TYPE) || 1139 - GENL_REQ_ATTR_CHECK(info, NETDEV_A_QUEUE_LEASE)) 1140 - return -EINVAL; 1141 - if (nla_get_u32(info->attrs[NETDEV_A_QUEUE_TYPE]) != 1142 - NETDEV_QUEUE_TYPE_RX) { 1143 - NL_SET_BAD_ATTR(info->extack, info->attrs[NETDEV_A_QUEUE_TYPE]); 1144 - return -EINVAL; 1145 - } 1146 - 1147 - ifindex = nla_get_u32(info->attrs[NETDEV_A_QUEUE_IFINDEX]); 1148 - 1149 - nest = info->attrs[NETDEV_A_QUEUE_LEASE]; 1150 - err = nla_parse_nested(ltb, lmaxtype, nest, 1151 - netdev_lease_nl_policy, info->extack); 1152 - if (err < 0) 1153 - return err; 1154 - if (NL_REQ_ATTR_CHECK(info->extack, nest, ltb, NETDEV_A_LEASE_IFINDEX) || 1155 - NL_REQ_ATTR_CHECK(info->extack, nest, ltb, NETDEV_A_LEASE_QUEUE)) 1156 - return -EINVAL; 1157 - if (ltb[NETDEV_A_LEASE_NETNS_ID]) { 1158 - NL_SET_BAD_ATTR(info->extack, ltb[NETDEV_A_LEASE_NETNS_ID]); 1159 - return -EINVAL; 1160 - } 1161 - 1162 - ifindex_lease = nla_get_u32(ltb[NETDEV_A_LEASE_IFINDEX]); 1163 - 1164 - nest = ltb[NETDEV_A_LEASE_QUEUE]; 1165 - err = nla_parse_nested(qtb, qmaxtype, nest, 1166 - netdev_queue_id_nl_policy, info->extack); 1167 - if (err < 0) 1168 - return err; 1169 - if (NL_REQ_ATTR_CHECK(info->extack, nest, qtb, NETDEV_A_QUEUE_ID) || 1170 - NL_REQ_ATTR_CHECK(info->extack, nest, qtb, NETDEV_A_QUEUE_TYPE)) 1171 - return -EINVAL; 1172 - if (nla_get_u32(qtb[NETDEV_A_QUEUE_TYPE]) != NETDEV_QUEUE_TYPE_RX) { 1173 - NL_SET_BAD_ATTR(info->extack, qtb[NETDEV_A_QUEUE_TYPE]); 1174 - return -EINVAL; 1175 - } 1176 - if (ifindex == ifindex_lease) { 1177 - NL_SET_ERR_MSG(info->extack, 1178 - "Lease ifindex cannot be the same as queue creation ifindex"); 1179 - return -EINVAL; 1180 - } 1181 - 1182 - queue_id_lease = nla_get_u32(qtb[NETDEV_A_QUEUE_ID]); 1183 - 1184 - rsp = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL); 1185 - if (!rsp) 1186 - return -ENOMEM; 1187 - 1188 - hdr = genlmsg_iput(rsp, info); 1189 - if (!hdr) { 1190 - err = -EMSGSIZE; 1191 - goto err_genlmsg_free; 1192 - } 1193 - 1194 - /* Locking order is always from the virtual to the physical device 1195 - * since this is also the same order when applications open the 1196 - * memory provider later on. 1197 - */ 1198 - dev = netdev_get_by_index_lock(genl_info_net(info), ifindex); 1199 - if (!dev) { 1200 - err = -ENODEV; 1201 - goto err_genlmsg_free; 1202 - } 1203 - if (!netdev_can_create_queue(dev, info->extack)) { 1204 - err = -EINVAL; 1205 - goto err_unlock_dev; 1206 - } 1207 - 1208 - dev_lease = netdev_get_by_index(genl_info_net(info), ifindex_lease, 1209 - &dev_tracker, GFP_KERNEL); 1210 - if (!dev_lease) { 1211 - err = -ENODEV; 1212 - goto err_unlock_dev; 1213 - } 1214 - if (!netdev_can_lease_queue(dev_lease, info->extack)) { 1215 - netdev_put(dev_lease, &dev_tracker); 1216 - err = -EINVAL; 1217 - goto err_unlock_dev; 1218 - } 1219 - 1220 - dev_lease = netdev_put_lock(dev_lease, &dev_tracker); 1221 - if (!dev_lease) { 1222 - err = -ENODEV; 1223 - goto err_unlock_dev; 1224 - } 1225 - if (queue_id_lease >= dev_lease->real_num_rx_queues) { 1226 - err = -ERANGE; 1227 - NL_SET_BAD_ATTR(info->extack, qtb[NETDEV_A_QUEUE_ID]); 1228 - goto err_unlock_dev_lease; 1229 - } 1230 - if (netdev_queue_busy(dev_lease, queue_id_lease, info->extack)) { 1231 - err = -EBUSY; 1232 - goto err_unlock_dev_lease; 1233 - } 1234 - 1235 - rxq_lease = __netif_get_rx_queue(dev_lease, queue_id_lease); 1236 - rxq = __netif_get_rx_queue(dev, dev->real_num_rx_queues - 1); 1237 - 1238 - if (rxq->lease && rxq->lease->dev != dev_lease) { 1239 - err = -EOPNOTSUPP; 1240 - NL_SET_ERR_MSG(info->extack, 1241 - "Leasing multiple queues from different devices not supported"); 1242 - goto err_unlock_dev_lease; 1243 - } 1244 - 1245 - err = queue_id = dev->queue_mgmt_ops->ndo_queue_create(dev); 1246 - if (err < 0) { 1247 - NL_SET_ERR_MSG(info->extack, 1248 - "Device is unable to create a new queue"); 1249 - goto err_unlock_dev_lease; 1250 - } 1251 - 1252 - rxq = __netif_get_rx_queue(dev, queue_id); 1253 - netdev_rx_queue_lease(rxq, rxq_lease); 1254 - 1255 - nla_put_u32(rsp, NETDEV_A_QUEUE_ID, queue_id); 1256 - genlmsg_end(rsp, hdr); 1257 - 1258 - netdev_unlock(dev_lease); 1259 - netdev_unlock(dev); 1260 - 1261 - return genlmsg_reply(rsp, info); 1262 - 1263 - err_unlock_dev_lease: 1264 - netdev_unlock(dev_lease); 1265 - err_unlock_dev: 1266 - netdev_unlock(dev); 1267 1154 err_genlmsg_free: 1268 1155 nlmsg_free(rsp); 1269 1156 return err;
+2 -72
net/core/netdev_queues.c
··· 1 1 // SPDX-License-Identifier: GPL-2.0-or-later 2 2 3 3 #include <net/netdev_queues.h> 4 - #include <net/netdev_rx_queue.h> 5 - #include <net/xdp_sock_drv.h> 6 4 7 5 /** 8 6 * netdev_queue_get_dma_dev() - get dma device for zero-copy operations 9 7 * @dev: net_device 10 8 * @idx: queue index 11 9 * 12 - * Get dma device for zero-copy operations to be used for this queue. If the 13 - * queue is leased to a physical queue, we retrieve the latter's dma device. 10 + * Get dma device for zero-copy operations to be used for this queue. 14 11 * When such device is not available or valid, the function will return NULL. 15 12 * 16 13 * Return: Device or NULL on error 17 14 */ 18 15 struct device *netdev_queue_get_dma_dev(struct net_device *dev, int idx) 19 16 { 20 - const struct netdev_queue_mgmt_ops *queue_ops; 17 + const struct netdev_queue_mgmt_ops *queue_ops = dev->queue_mgmt_ops; 21 18 struct device *dma_dev; 22 - 23 - if (idx < dev->real_num_rx_queues) { 24 - struct netdev_rx_queue *rxq = __netif_get_rx_queue(dev, idx); 25 - 26 - if (rxq->lease) { 27 - rxq = rxq->lease; 28 - dev = rxq->dev; 29 - idx = get_netdev_rx_queue_index(rxq); 30 - } 31 - } 32 - 33 - queue_ops = dev->queue_mgmt_ops; 34 19 35 20 if (queue_ops && queue_ops->ndo_queue_get_dma_dev) 36 21 dma_dev = queue_ops->ndo_queue_get_dma_dev(dev, idx); ··· 25 40 return dma_dev && dma_dev->dma_mask ? dma_dev : NULL; 26 41 } 27 42 28 - bool netdev_can_create_queue(const struct net_device *dev, 29 - struct netlink_ext_ack *extack) 30 - { 31 - if (dev->dev.parent) { 32 - NL_SET_ERR_MSG(extack, "Device is not a virtual device"); 33 - return false; 34 - } 35 - if (!dev->queue_mgmt_ops || 36 - !dev->queue_mgmt_ops->ndo_queue_create) { 37 - NL_SET_ERR_MSG(extack, "Device does not support queue creation"); 38 - return false; 39 - } 40 - if (dev->real_num_rx_queues < 1 || 41 - dev->real_num_tx_queues < 1) { 42 - NL_SET_ERR_MSG(extack, "Device must have at least one real queue"); 43 - return false; 44 - } 45 - return true; 46 - } 47 - 48 - bool netdev_can_lease_queue(const struct net_device *dev, 49 - struct netlink_ext_ack *extack) 50 - { 51 - if (!dev->dev.parent) { 52 - NL_SET_ERR_MSG(extack, "Lease device is a virtual device"); 53 - return false; 54 - } 55 - if (!netif_device_present(dev)) { 56 - NL_SET_ERR_MSG(extack, "Lease device has been removed from the system"); 57 - return false; 58 - } 59 - if (!dev->queue_mgmt_ops) { 60 - NL_SET_ERR_MSG(extack, "Lease device does not support queue management operations"); 61 - return false; 62 - } 63 - return true; 64 - } 65 - 66 - bool netdev_queue_busy(struct net_device *dev, int idx, 67 - struct netlink_ext_ack *extack) 68 - { 69 - if (netif_rxq_is_leased(dev, idx)) { 70 - NL_SET_ERR_MSG(extack, "Lease device queue is already leased"); 71 - return true; 72 - } 73 - if (xsk_get_pool_from_qid(dev, idx)) { 74 - NL_SET_ERR_MSG(extack, "Lease device queue in use by AF_XDP"); 75 - return true; 76 - } 77 - if (netif_rxq_has_mp(dev, idx)) { 78 - NL_SET_ERR_MSG(extack, "Lease device queue in use by memory provider"); 79 - return true; 80 - } 81 - return false; 82 - }
+22 -147
net/core/netdev_rx_queue.c
··· 9 9 10 10 #include "page_pool_priv.h" 11 11 12 - void netdev_rx_queue_lease(struct netdev_rx_queue *rxq_dst, 13 - struct netdev_rx_queue *rxq_src) 14 - { 15 - netdev_assert_locked(rxq_src->dev); 16 - netdev_assert_locked(rxq_dst->dev); 17 - 18 - netdev_hold(rxq_src->dev, &rxq_src->lease_tracker, GFP_KERNEL); 19 - 20 - WRITE_ONCE(rxq_src->lease, rxq_dst); 21 - WRITE_ONCE(rxq_dst->lease, rxq_src); 22 - } 23 - 24 - void netdev_rx_queue_unlease(struct netdev_rx_queue *rxq_dst, 25 - struct netdev_rx_queue *rxq_src) 26 - { 27 - netdev_assert_locked(rxq_dst->dev); 28 - netdev_assert_locked(rxq_src->dev); 29 - 30 - WRITE_ONCE(rxq_src->lease, NULL); 31 - WRITE_ONCE(rxq_dst->lease, NULL); 32 - 33 - netdev_put(rxq_src->dev, &rxq_src->lease_tracker); 34 - } 35 - 36 - bool netif_rxq_is_leased(struct net_device *dev, unsigned int rxq_idx) 37 - { 38 - if (rxq_idx < dev->real_num_rx_queues) 39 - return READ_ONCE(__netif_get_rx_queue(dev, rxq_idx)->lease); 40 - return false; 41 - } 42 - 43 - static bool netif_lease_dir_ok(const struct net_device *dev, 44 - enum netif_lease_dir dir) 45 - { 46 - if (dir == NETIF_VIRT_TO_PHYS && !dev->dev.parent) 47 - return true; 48 - if (dir == NETIF_PHYS_TO_VIRT && dev->dev.parent) 49 - return true; 50 - return false; 51 - } 52 - 53 - struct netdev_rx_queue * 54 - __netif_get_rx_queue_lease(struct net_device **dev, unsigned int *rxq_idx, 55 - enum netif_lease_dir dir) 56 - { 57 - struct net_device *orig_dev = *dev; 58 - struct netdev_rx_queue *rxq = __netif_get_rx_queue(orig_dev, *rxq_idx); 59 - 60 - if (rxq->lease) { 61 - if (!netif_lease_dir_ok(orig_dev, dir)) 62 - return NULL; 63 - rxq = rxq->lease; 64 - *rxq_idx = get_netdev_rx_queue_index(rxq); 65 - *dev = rxq->dev; 66 - } 67 - return rxq; 68 - } 69 - 70 - struct netdev_rx_queue * 71 - netif_get_rx_queue_lease_locked(struct net_device **dev, unsigned int *rxq_idx) 72 - { 73 - struct net_device *orig_dev = *dev; 74 - struct netdev_rx_queue *rxq; 75 - 76 - /* Locking order is always from the virtual to the physical device 77 - * see netdev_nl_queue_create_doit(). 78 - */ 79 - netdev_ops_assert_locked(orig_dev); 80 - rxq = __netif_get_rx_queue_lease(dev, rxq_idx, NETIF_VIRT_TO_PHYS); 81 - if (rxq && orig_dev != *dev) 82 - netdev_lock(*dev); 83 - return rxq; 84 - } 85 - 86 - void netif_put_rx_queue_lease_locked(struct net_device *orig_dev, 87 - struct net_device *dev) 88 - { 89 - if (orig_dev != dev) 90 - netdev_unlock(dev); 91 - } 92 - 93 - bool netif_rx_queue_lease_get_owner(struct net_device **dev, 94 - unsigned int *rxq_idx) 95 - { 96 - struct net_device *orig_dev = *dev; 97 - struct netdev_rx_queue *rxq; 98 - 99 - /* The physical device needs to be locked. If there is indeed a lease, 100 - * then the virtual device holds a reference on the physical device 101 - * and the lease stays active until the virtual device is torn down. 102 - * When queues get {un,}leased both devices are always locked. 103 - */ 104 - netdev_ops_assert_locked(orig_dev); 105 - rxq = __netif_get_rx_queue_lease(dev, rxq_idx, NETIF_PHYS_TO_VIRT); 106 - if (rxq && orig_dev != *dev) 107 - return true; 108 - return false; 109 - } 110 - 111 12 /* See also page_pool_is_unreadable() */ 112 - bool netif_rxq_has_unreadable_mp(struct net_device *dev, unsigned int rxq_idx) 13 + bool netif_rxq_has_unreadable_mp(struct net_device *dev, int idx) 113 14 { 114 - if (rxq_idx < dev->real_num_rx_queues) 115 - return __netif_get_rx_queue(dev, rxq_idx)->mp_params.mp_ops; 116 - return false; 15 + struct netdev_rx_queue *rxq = __netif_get_rx_queue(dev, idx); 16 + 17 + return !!rxq->mp_params.mp_ops; 117 18 } 118 19 EXPORT_SYMBOL(netif_rxq_has_unreadable_mp); 119 - 120 - bool netif_rxq_has_mp(struct net_device *dev, unsigned int rxq_idx) 121 - { 122 - if (rxq_idx < dev->real_num_rx_queues) 123 - return __netif_get_rx_queue(dev, rxq_idx)->mp_params.mp_priv; 124 - return false; 125 - } 126 20 127 21 int netdev_rx_queue_restart(struct net_device *dev, unsigned int rxq_idx) 128 22 { ··· 100 206 const struct pp_memory_provider_params *p, 101 207 struct netlink_ext_ack *extack) 102 208 { 103 - struct net_device *orig_dev = dev; 104 209 struct netdev_rx_queue *rxq; 105 210 int ret; 106 211 107 212 if (!netdev_need_ops_lock(dev)) 108 213 return -EOPNOTSUPP; 214 + 109 215 if (rxq_idx >= dev->real_num_rx_queues) { 110 216 NL_SET_ERR_MSG(extack, "rx queue index out of range"); 111 217 return -ERANGE; 112 218 } 113 - 114 219 rxq_idx = array_index_nospec(rxq_idx, dev->real_num_rx_queues); 115 - rxq = netif_get_rx_queue_lease_locked(&dev, &rxq_idx); 116 - if (!rxq) { 117 - NL_SET_ERR_MSG(extack, "rx queue peered to a virtual netdev"); 118 - return -EBUSY; 119 - } 120 - if (!dev->dev.parent) { 121 - NL_SET_ERR_MSG(extack, "rx queue is mapped to a virtual netdev"); 122 - ret = -EBUSY; 123 - goto out; 124 - } 220 + 125 221 if (dev->cfg->hds_config != ETHTOOL_TCP_DATA_SPLIT_ENABLED) { 126 222 NL_SET_ERR_MSG(extack, "tcp-data-split is disabled"); 127 - ret = -EINVAL; 128 - goto out; 223 + return -EINVAL; 129 224 } 130 225 if (dev->cfg->hds_thresh) { 131 226 NL_SET_ERR_MSG(extack, "hds-thresh is not zero"); 132 - ret = -EINVAL; 133 - goto out; 227 + return -EINVAL; 134 228 } 135 229 if (dev_xdp_prog_count(dev)) { 136 230 NL_SET_ERR_MSG(extack, "unable to custom memory provider to device with XDP program attached"); 137 - ret = -EEXIST; 138 - goto out; 231 + return -EEXIST; 139 232 } 233 + 234 + rxq = __netif_get_rx_queue(dev, rxq_idx); 140 235 if (rxq->mp_params.mp_ops) { 141 236 NL_SET_ERR_MSG(extack, "designated queue already memory provider bound"); 142 - ret = -EEXIST; 143 - goto out; 237 + return -EEXIST; 144 238 } 145 239 #ifdef CONFIG_XDP_SOCKETS 146 240 if (rxq->pool) { 147 241 NL_SET_ERR_MSG(extack, "designated queue already in use by AF_XDP"); 148 - ret = -EBUSY; 149 - goto out; 242 + return -EBUSY; 150 243 } 151 244 #endif 245 + 152 246 rxq->mp_params = *p; 153 247 ret = netdev_rx_queue_restart(dev, rxq_idx); 154 248 if (ret) { 155 249 rxq->mp_params.mp_ops = NULL; 156 250 rxq->mp_params.mp_priv = NULL; 157 251 } 158 - out: 159 - netif_put_rx_queue_lease_locked(orig_dev, dev); 160 252 return ret; 161 253 } 162 254 ··· 157 277 return ret; 158 278 } 159 279 160 - void __net_mp_close_rxq(struct net_device *dev, unsigned int rxq_idx, 280 + void __net_mp_close_rxq(struct net_device *dev, unsigned int ifq_idx, 161 281 const struct pp_memory_provider_params *old_p) 162 282 { 163 - struct net_device *orig_dev = dev; 164 283 struct netdev_rx_queue *rxq; 165 284 int err; 166 285 167 - if (WARN_ON_ONCE(rxq_idx >= dev->real_num_rx_queues)) 286 + if (WARN_ON_ONCE(ifq_idx >= dev->real_num_rx_queues)) 168 287 return; 169 288 170 - rxq = netif_get_rx_queue_lease_locked(&dev, &rxq_idx); 171 - if (WARN_ON_ONCE(!rxq)) 172 - return; 289 + rxq = __netif_get_rx_queue(dev, ifq_idx); 173 290 174 291 /* Callers holding a netdev ref may get here after we already 175 292 * went thru shutdown via dev_memory_provider_uninstall(). 176 293 */ 177 294 if (dev->reg_state > NETREG_REGISTERED && 178 295 !rxq->mp_params.mp_ops) 179 - goto out; 296 + return; 180 297 181 298 if (WARN_ON_ONCE(rxq->mp_params.mp_ops != old_p->mp_ops || 182 299 rxq->mp_params.mp_priv != old_p->mp_priv)) 183 - goto out; 300 + return; 184 301 185 302 rxq->mp_params.mp_ops = NULL; 186 303 rxq->mp_params.mp_priv = NULL; 187 - err = netdev_rx_queue_restart(dev, rxq_idx); 304 + err = netdev_rx_queue_restart(dev, ifq_idx); 188 305 WARN_ON(err && err != -ENETDOWN); 189 - out: 190 - netif_put_rx_queue_lease_locked(orig_dev, dev); 191 306 } 192 307 193 - void net_mp_close_rxq(struct net_device *dev, unsigned int rxq_idx, 308 + void net_mp_close_rxq(struct net_device *dev, unsigned ifq_idx, 194 309 struct pp_memory_provider_params *old_p) 195 310 { 196 311 netdev_lock(dev); 197 - __net_mp_close_rxq(dev, rxq_idx, old_p); 312 + __net_mp_close_rxq(dev, ifq_idx, old_p); 198 313 netdev_unlock(dev); 199 314 }
+5 -7
net/ethtool/channels.c
··· 1 1 // SPDX-License-Identifier: GPL-2.0-only 2 2 3 - #include <net/netdev_queues.h> 3 + #include <net/xdp_sock_drv.h> 4 4 5 5 #include "netlink.h" 6 6 #include "common.h" ··· 169 169 if (ret) 170 170 return ret; 171 171 172 - /* ensure channels are not busy at the moment */ 172 + /* Disabling channels, query zero-copy AF_XDP sockets */ 173 173 from_channel = channels.combined_count + 174 174 min(channels.rx_count, channels.tx_count); 175 - for (i = from_channel; i < old_total; i++) { 176 - if (netdev_queue_busy(dev, i, NULL)) { 177 - GENL_SET_ERR_MSG(info, 178 - "requested channel counts are too low due to busy queues (AF_XDP or queue leasing)"); 175 + for (i = from_channel; i < old_total; i++) 176 + if (xsk_get_pool_from_qid(dev, i)) { 177 + GENL_SET_ERR_MSG(info, "requested channel counts are too low for existing zerocopy AF_XDP sockets"); 179 178 return -EINVAL; 180 179 } 181 - } 182 180 183 181 ret = dev->ethtool_ops->set_channels(dev, &channels); 184 182 return ret < 0 ? ret : 1;
+4 -5
net/ethtool/ioctl.c
··· 27 27 #include <linux/net.h> 28 28 #include <linux/pm_runtime.h> 29 29 #include <linux/utsname.h> 30 - #include <linux/ethtool_netlink.h> 31 30 #include <net/devlink.h> 32 31 #include <net/ipv6.h> 32 + #include <net/xdp_sock_drv.h> 33 33 #include <net/flow_offload.h> 34 34 #include <net/netdev_lock.h> 35 - #include <net/netdev_queues.h> 36 - 35 + #include <linux/ethtool_netlink.h> 37 36 #include "common.h" 38 37 39 38 /* State held across locks and calls for commands which have devlink fallback */ ··· 2282 2283 if (ret) 2283 2284 return ret; 2284 2285 2285 - /* Disabling channels, query busy queues (AF_XDP, queue leasing) */ 2286 + /* Disabling channels, query zero-copy AF_XDP sockets */ 2286 2287 from_channel = channels.combined_count + 2287 2288 min(channels.rx_count, channels.tx_count); 2288 2289 to_channel = curr.combined_count + max(curr.rx_count, curr.tx_count); 2289 2290 for (i = from_channel; i < to_channel; i++) 2290 - if (netdev_queue_busy(dev, i, NULL)) 2291 + if (xsk_get_pool_from_qid(dev, i)) 2291 2292 return -EINVAL; 2292 2293 2293 2294 ret = dev->ethtool_ops->set_channels(dev, &channels);
+17 -62
net/xdp/xsk.c
··· 23 23 #include <linux/netdevice.h> 24 24 #include <linux/rculist.h> 25 25 #include <linux/vmalloc.h> 26 - 27 - #include <net/netdev_queues.h> 28 26 #include <net/xdp_sock_drv.h> 29 27 #include <net/busy_poll.h> 30 28 #include <net/netdev_lock.h> ··· 103 105 } 104 106 EXPORT_SYMBOL(xsk_uses_need_wakeup); 105 107 106 - struct xsk_buff_pool *xsk_get_pool_from_qid(const struct net_device *dev, 108 + struct xsk_buff_pool *xsk_get_pool_from_qid(struct net_device *dev, 107 109 u16 queue_id) 108 110 { 109 111 if (queue_id < dev->real_num_rx_queues) ··· 117 119 118 120 void xsk_clear_pool_at_qid(struct net_device *dev, u16 queue_id) 119 121 { 120 - struct net_device *orig_dev = dev; 121 - unsigned int id = queue_id; 122 - 123 - if (id < dev->real_num_rx_queues) 124 - WARN_ON_ONCE(!netif_get_rx_queue_lease_locked(&dev, &id)); 125 - 126 - if (id < dev->real_num_rx_queues) 127 - dev->_rx[id].pool = NULL; 128 - if (id < dev->real_num_tx_queues) 129 - dev->_tx[id].pool = NULL; 130 - 131 - netif_put_rx_queue_lease_locked(orig_dev, dev); 122 + if (queue_id < dev->num_rx_queues) 123 + dev->_rx[queue_id].pool = NULL; 124 + if (queue_id < dev->num_tx_queues) 125 + dev->_tx[queue_id].pool = NULL; 132 126 } 133 127 134 128 /* The buffer pool is stored both in the _rx struct and the _tx struct as we do ··· 130 140 int xsk_reg_pool_at_qid(struct net_device *dev, struct xsk_buff_pool *pool, 131 141 u16 queue_id) 132 142 { 133 - struct net_device *orig_dev = dev; 134 - unsigned int id = queue_id; 135 - int ret = 0; 136 - 137 - if (id >= max(dev->real_num_rx_queues, 138 - dev->real_num_tx_queues)) 143 + if (queue_id >= max_t(unsigned int, 144 + dev->real_num_rx_queues, 145 + dev->real_num_tx_queues)) 139 146 return -EINVAL; 140 - if (id < dev->real_num_rx_queues) { 141 - if (!netif_get_rx_queue_lease_locked(&dev, &id)) 142 - return -EBUSY; 143 - if (xsk_get_pool_from_qid(dev, id)) { 144 - ret = -EBUSY; 145 - goto out; 146 - } 147 - } 148 147 149 - if (id < dev->real_num_rx_queues) 150 - dev->_rx[id].pool = pool; 151 - if (id < dev->real_num_tx_queues) 152 - dev->_tx[id].pool = pool; 153 - out: 154 - netif_put_rx_queue_lease_locked(orig_dev, dev); 155 - return ret; 148 + if (queue_id < dev->real_num_rx_queues) 149 + dev->_rx[queue_id].pool = pool; 150 + if (queue_id < dev->real_num_tx_queues) 151 + dev->_tx[queue_id].pool = pool; 152 + 153 + return 0; 156 154 } 157 155 158 156 static int __xsk_rcv_zc(struct xdp_sock *xs, struct xdp_buff_xsk *xskb, u32 len, ··· 324 346 return false; 325 347 } 326 348 327 - static bool xsk_dev_queue_valid(const struct xdp_sock *xs, 328 - const struct xdp_rxq_info *info) 329 - { 330 - struct net_device *dev = xs->dev; 331 - u32 queue_index = xs->queue_id; 332 - struct netdev_rx_queue *rxq; 333 - 334 - if (info->dev == dev && 335 - info->queue_index == queue_index) 336 - return true; 337 - 338 - if (queue_index < dev->real_num_rx_queues) { 339 - rxq = READ_ONCE(__netif_get_rx_queue(dev, queue_index)->lease); 340 - if (!rxq) 341 - return false; 342 - 343 - dev = rxq->dev; 344 - queue_index = get_netdev_rx_queue_index(rxq); 345 - 346 - return info->dev == dev && 347 - info->queue_index == queue_index; 348 - } 349 - return false; 350 - } 351 - 352 349 static int xsk_rcv_check(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len) 353 350 { 354 351 if (!xsk_is_bound(xs)) 355 352 return -ENXIO; 356 - if (!xsk_dev_queue_valid(xs, xdp->rxq)) 353 + 354 + if (xs->dev != xdp->rxq->dev || xs->queue_id != xdp->rxq->queue_index) 357 355 return -EINVAL; 356 + 358 357 if (len > xsk_pool_get_rx_frame_size(xs->pool) && !xs->sg) { 359 358 xs->rx_dropped++; 360 359 return -ENOSPC;
-11
tools/include/uapi/linux/netdev.h
··· 160 160 NETDEV_A_QUEUE_DMABUF, 161 161 NETDEV_A_QUEUE_IO_URING, 162 162 NETDEV_A_QUEUE_XSK, 163 - NETDEV_A_QUEUE_LEASE, 164 163 165 164 __NETDEV_A_QUEUE_MAX, 166 165 NETDEV_A_QUEUE_MAX = (__NETDEV_A_QUEUE_MAX - 1) ··· 203 204 }; 204 205 205 206 enum { 206 - NETDEV_A_LEASE_IFINDEX = 1, 207 - NETDEV_A_LEASE_QUEUE, 208 - NETDEV_A_LEASE_NETNS_ID, 209 - 210 - __NETDEV_A_LEASE_MAX, 211 - NETDEV_A_LEASE_MAX = (__NETDEV_A_LEASE_MAX - 1) 212 - }; 213 - 214 - enum { 215 207 NETDEV_A_DMABUF_IFINDEX = 1, 216 208 NETDEV_A_DMABUF_QUEUES, 217 209 NETDEV_A_DMABUF_FD, ··· 228 238 NETDEV_CMD_BIND_RX, 229 239 NETDEV_CMD_NAPI_SET, 230 240 NETDEV_CMD_BIND_TX, 231 - NETDEV_CMD_QUEUE_CREATE, 232 241 233 242 __NETDEV_CMD_MAX, 234 243 NETDEV_CMD_MAX = (__NETDEV_CMD_MAX - 1)
-7
tools/testing/selftests/drivers/net/README.rst
··· 62 62 63 63 Local and remote endpoint IP addresses. 64 64 65 - LOCAL_PREFIX_V4, LOCAL_PREFIX_V6 66 - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 67 - 68 - Local IP prefix/subnet which can be used to allocate extra IP addresses (for 69 - network name spaces behind macvlan, veth, netkit devices). DUT must be 70 - reachable using these addresses from the endpoint. 71 - 72 65 REMOTE_TYPE 73 66 ~~~~~~~~~~~ 74 67
-2
tools/testing/selftests/drivers/net/hw/Makefile
··· 32 32 irq.py \ 33 33 loopback.sh \ 34 34 nic_timestamp.py \ 35 - nk_netns.py \ 36 - nk_qlease.py \ 37 35 pp_alloc_fail.py \ 38 36 rss_api.py \ 39 37 rss_ctx.py \
+3 -4
tools/testing/selftests/drivers/net/hw/lib/py/__init__.py
··· 3 3 """ 4 4 Driver test environment (hardware-only tests). 5 5 NetDrvEnv and NetDrvEpEnv are the main environment classes. 6 - NetDrvContEnv extends NetDrvEpEnv with netkit container support. 7 6 Former is for local host only tests, latter creates / connects 8 7 to a remote endpoint. See NIPA wiki for more information about 9 8 running and writing driver tests. ··· 29 30 from net.lib.py import ksft_eq, ksft_ge, ksft_in, ksft_is, ksft_lt, \ 30 31 ksft_ne, ksft_not_in, ksft_raises, ksft_true, ksft_gt, ksft_not_none 31 32 from drivers.net.lib.py import GenerateTraffic, Remote, Iperf3Runner 32 - from drivers.net.lib.py import NetDrvEnv, NetDrvEpEnv, NetDrvContEnv 33 + from drivers.net.lib.py import NetDrvEnv, NetDrvEpEnv 33 34 34 35 __all__ = ["NetNS", "NetNSEnter", "NetdevSimDev", 35 36 "EthtoolFamily", "NetdevFamily", "NetshaperFamily", ··· 44 45 "ksft_eq", "ksft_ge", "ksft_in", "ksft_is", "ksft_lt", 45 46 "ksft_ne", "ksft_not_in", "ksft_raises", "ksft_true", "ksft_gt", 46 47 "ksft_not_none", "ksft_not_none", 47 - "NetDrvEnv", "NetDrvEpEnv", "NetDrvContEnv", "GenerateTraffic", 48 - "Remote", "Iperf3Runner"] 48 + "NetDrvEnv", "NetDrvEpEnv", "GenerateTraffic", "Remote", 49 + "Iperf3Runner"] 49 50 except ModuleNotFoundError as e: 50 51 print("Failed importing `net` library from kernel sources") 51 52 print(str(e))
-49
tools/testing/selftests/drivers/net/hw/nk_forward.bpf.c
··· 1 - // SPDX-License-Identifier: GPL-2.0 2 - #include <linux/bpf.h> 3 - #include <linux/pkt_cls.h> 4 - #include <linux/if_ether.h> 5 - #include <linux/ipv6.h> 6 - #include <linux/in6.h> 7 - #include <bpf/bpf_endian.h> 8 - #include <bpf/bpf_helpers.h> 9 - 10 - #define TC_ACT_OK 0 11 - #define ETH_P_IPV6 0x86DD 12 - 13 - #define ctx_ptr(field) ((void *)(long)(field)) 14 - 15 - #define v6_p64_equal(a, b) (a.s6_addr32[0] == b.s6_addr32[0] && \ 16 - a.s6_addr32[1] == b.s6_addr32[1]) 17 - 18 - volatile __u32 netkit_ifindex; 19 - volatile __u8 ipv6_prefix[16]; 20 - 21 - SEC("tc/ingress") 22 - int tc_redirect_peer(struct __sk_buff *skb) 23 - { 24 - void *data_end = ctx_ptr(skb->data_end); 25 - void *data = ctx_ptr(skb->data); 26 - struct in6_addr *peer_addr; 27 - struct ipv6hdr *ip6h; 28 - struct ethhdr *eth; 29 - 30 - peer_addr = (struct in6_addr *)ipv6_prefix; 31 - 32 - if (skb->protocol != bpf_htons(ETH_P_IPV6)) 33 - return TC_ACT_OK; 34 - 35 - eth = data; 36 - if ((void *)(eth + 1) > data_end) 37 - return TC_ACT_OK; 38 - 39 - ip6h = data + sizeof(struct ethhdr); 40 - if ((void *)(ip6h + 1) > data_end) 41 - return TC_ACT_OK; 42 - 43 - if (!v6_p64_equal(ip6h->daddr, (*peer_addr))) 44 - return TC_ACT_OK; 45 - 46 - return bpf_redirect_peer(netkit_ifindex, 0); 47 - } 48 - 49 - char __license[] SEC("license") = "GPL";
-23
tools/testing/selftests/drivers/net/hw/nk_netns.py
··· 1 - #!/usr/bin/env python3 2 - # SPDX-License-Identifier: GPL-2.0 3 - 4 - from lib.py import ksft_run, ksft_exit 5 - from lib.py import NetDrvContEnv 6 - from lib.py import cmd 7 - 8 - 9 - def test_ping(cfg) -> None: 10 - cfg.require_ipver("6") 11 - 12 - cmd(f"ping -c 1 -W5 {cfg.nk_guest_ipv6}", host=cfg.remote) 13 - cmd(f"ping -c 1 -W5 {cfg.remote_addr_v['6']}", ns=cfg.netns) 14 - 15 - 16 - def main() -> None: 17 - with NetDrvContEnv(__file__) as cfg: 18 - ksft_run([test_ping], args=(cfg,)) 19 - ksft_exit() 20 - 21 - 22 - if __name__ == "__main__": 23 - main()
-55
tools/testing/selftests/drivers/net/hw/nk_qlease.py
··· 1 - #!/usr/bin/env python3 2 - # SPDX-License-Identifier: GPL-2.0 3 - 4 - import re 5 - from os import path 6 - from lib.py import ksft_run, ksft_exit 7 - from lib.py import NetDrvContEnv 8 - from lib.py import bkg, cmd, defer, ethtool, rand_port, wait_port_listen 9 - 10 - 11 - def create_rss_ctx(cfg): 12 - output = ethtool(f"-X {cfg.ifname} context new start {cfg.src_queue} equal 1").stdout 13 - values = re.search(r'New RSS context is (\d+)', output).group(1) 14 - return int(values) 15 - 16 - 17 - def set_flow_rule(cfg): 18 - output = ethtool(f"-N {cfg.ifname} flow-type tcp6 dst-port {cfg.port} action {cfg.src_queue}").stdout 19 - values = re.search(r'ID (\d+)', output).group(1) 20 - return int(values) 21 - 22 - 23 - def set_flow_rule_rss(cfg, rss_ctx_id): 24 - output = ethtool(f"-N {cfg.ifname} flow-type tcp6 dst-port {cfg.port} context {rss_ctx_id}").stdout 25 - values = re.search(r'ID (\d+)', output).group(1) 26 - return int(values) 27 - 28 - 29 - def test_iou_zcrx(cfg) -> None: 30 - cfg.require_ipver('6') 31 - 32 - ethtool(f"-X {cfg.ifname} equal {cfg.src_queue}") 33 - defer(ethtool, f"-X {cfg.ifname} default") 34 - 35 - flow_rule_id = set_flow_rule(cfg) 36 - defer(ethtool, f"-N {cfg.ifname} delete {flow_rule_id}") 37 - 38 - rx_cmd = f"ip netns exec {cfg.netns.name} {cfg.bin_local} -s -p {cfg.port} -i {cfg._nk_guest_ifname} -q {cfg.nk_queue}" 39 - tx_cmd = f"{cfg.bin_remote} -c -h {cfg.nk_guest_ipv6} -p {cfg.port} -l 12840" 40 - with bkg(rx_cmd, exit_wait=True): 41 - wait_port_listen(cfg.port, proto="tcp", ns=cfg.netns) 42 - cmd(tx_cmd, host=cfg.remote) 43 - 44 - 45 - def main() -> None: 46 - with NetDrvContEnv(__file__, lease=True) as cfg: 47 - cfg.bin_local = path.abspath(path.dirname(__file__) + "/../../../drivers/net/hw/iou-zcrx") 48 - cfg.bin_remote = cfg.remote.deploy(cfg.bin_local) 49 - cfg.port = rand_port() 50 - ksft_run([test_iou_zcrx], args=(cfg,)) 51 - ksft_exit() 52 - 53 - 54 - if __name__ == "__main__": 55 - main()
+3 -4
tools/testing/selftests/drivers/net/lib/py/__init__.py
··· 3 3 """ 4 4 Driver test environment. 5 5 NetDrvEnv and NetDrvEpEnv are the main environment classes. 6 - NetDrvContEnv extends NetDrvEpEnv with netkit container support. 7 6 Former is for local host only tests, latter creates / connects 8 7 to a remote endpoint. See NIPA wiki for more information about 9 8 running and writing driver tests. ··· 43 44 "ksft_ne", "ksft_not_in", "ksft_raises", "ksft_true", "ksft_gt", 44 45 "ksft_not_none", "ksft_not_none"] 45 46 46 - from .env import NetDrvEnv, NetDrvEpEnv, NetDrvContEnv 47 + from .env import NetDrvEnv, NetDrvEpEnv 47 48 from .load import GenerateTraffic, Iperf3Runner 48 49 from .remote import Remote 49 50 50 - __all__ += ["NetDrvEnv", "NetDrvEpEnv", "NetDrvContEnv", "GenerateTraffic", 51 - "Remote", "Iperf3Runner"] 51 + __all__ += ["NetDrvEnv", "NetDrvEpEnv", "GenerateTraffic", "Remote", 52 + "Iperf3Runner"] 52 53 except ModuleNotFoundError as e: 53 54 print("Failed importing `net` library from kernel sources") 54 55 print(str(e))
-157
tools/testing/selftests/drivers/net/lib/py/env.py
··· 1 1 # SPDX-License-Identifier: GPL-2.0 2 2 3 - import ipaddress 4 3 import os 5 - import re 6 4 import time 7 5 from pathlib import Path 8 6 from lib.py import KsftSkipEx, KsftXfailEx 9 7 from lib.py import ksft_setup, wait_file 10 8 from lib.py import cmd, ethtool, ip, CmdExitFailure 11 9 from lib.py import NetNS, NetdevSimDev 12 - from lib.py import NetdevFamily, EthtoolFamily 13 10 from .remote import Remote 14 - from . import bpftool 15 11 16 12 17 13 class NetDrvEnvBase: ··· 289 293 data.get('stats-block-usecs', 0) / 1000 / 1000 290 294 291 295 time.sleep(self._stats_settle_time) 292 - 293 - 294 - class NetDrvContEnv(NetDrvEpEnv): 295 - """ 296 - Class for an environment with a netkit pair setup for forwarding traffic 297 - between the physical interface and a network namespace. 298 - """ 299 - 300 - def __init__(self, src_path, lease=False, **kwargs): 301 - super().__init__(src_path, **kwargs) 302 - 303 - self.require_ipver("6") 304 - local_prefix = self.env.get("LOCAL_PREFIX_V6") 305 - if not local_prefix: 306 - raise KsftSkipEx("LOCAL_PREFIX_V6 required") 307 - 308 - self.netdevnl = NetdevFamily() 309 - self.ethnl = EthtoolFamily() 310 - 311 - local_prefix = local_prefix.rstrip("/64").rstrip("::").rstrip(":") 312 - self.ipv6_prefix = f"{local_prefix}::" 313 - self.nk_host_ipv6 = f"{local_prefix}::2:1" 314 - self.nk_guest_ipv6 = f"{local_prefix}::2:2" 315 - 316 - self.netns = None 317 - self._nk_host_ifname = None 318 - self._nk_guest_ifname = None 319 - self._tc_attached = False 320 - self._bpf_prog_pref = None 321 - self._bpf_prog_id = None 322 - self._leased = False 323 - 324 - nk_rxqueues = 1 325 - if lease: 326 - nk_rxqueues = 2 327 - ip(f"link add type netkit mode l2 forward peer forward numrxqueues {nk_rxqueues}") 328 - 329 - all_links = ip("-d link show", json=True) 330 - netkit_links = [link for link in all_links 331 - if link.get('linkinfo', {}).get('info_kind') == 'netkit' 332 - and 'UP' not in link.get('flags', [])] 333 - 334 - if len(netkit_links) != 2: 335 - raise KsftSkipEx("Failed to create netkit pair") 336 - 337 - netkit_links.sort(key=lambda x: x['ifindex']) 338 - self._nk_host_ifname = netkit_links[1]['ifname'] 339 - self._nk_guest_ifname = netkit_links[0]['ifname'] 340 - self.nk_host_ifindex = netkit_links[1]['ifindex'] 341 - self.nk_guest_ifindex = netkit_links[0]['ifindex'] 342 - 343 - if lease: 344 - self._lease_queues() 345 - 346 - self._setup_ns() 347 - self._attach_bpf() 348 - 349 - def __del__(self): 350 - if self._tc_attached: 351 - cmd(f"tc filter del dev {self.ifname} ingress pref {self._bpf_prog_pref}") 352 - self._tc_attached = False 353 - 354 - if self._nk_host_ifname: 355 - cmd(f"ip link del dev {self._nk_host_ifname}") 356 - self._nk_host_ifname = None 357 - self._nk_guest_ifname = None 358 - 359 - if self.netns: 360 - del self.netns 361 - self.netns = None 362 - 363 - if self._leased: 364 - self.ethnl.rings_set({'header': {'dev-index': self.ifindex}, 365 - 'tcp-data-split': 'unknown', 366 - 'hds-thresh': self._hds_thresh, 367 - 'rx': self._rx_rings}) 368 - self._leased = False 369 - 370 - super().__del__() 371 - 372 - def _lease_queues(self): 373 - channels = self.ethnl.channels_get({'header': {'dev-index': self.ifindex}}) 374 - channels = channels['combined-count'] 375 - if channels < 2: 376 - raise KsftSkipEx('Test requires NETIF with at least 2 combined channels') 377 - 378 - rings = self.ethnl.rings_get({'header': {'dev-index': self.ifindex}}) 379 - self._rx_rings = rings['rx'] 380 - self._hds_thresh = rings.get('hds-thresh', 0) 381 - self.ethnl.rings_set({'header': {'dev-index': self.ifindex}, 382 - 'tcp-data-split': 'enabled', 383 - 'hds-thresh': 0, 384 - 'rx': 64}) 385 - self.src_queue = channels - 1 386 - bind_result = self.netdevnl.queue_create( 387 - { 388 - "ifindex": self.nk_guest_ifindex, 389 - "type": "rx", 390 - "lease": { 391 - "ifindex": self.ifindex, 392 - "queue": {"id": self.src_queue, "type": "rx"}, 393 - }, 394 - } 395 - ) 396 - self.nk_queue = bind_result['id'] 397 - self._leased = True 398 - 399 - def _setup_ns(self): 400 - self.netns = NetNS() 401 - ip(f"link set dev {self._nk_guest_ifname} netns {self.netns.name}") 402 - ip(f"link set dev {self._nk_host_ifname} up") 403 - ip(f"-6 addr add fe80::1/64 dev {self._nk_host_ifname} nodad") 404 - ip(f"-6 route add {self.nk_guest_ipv6}/128 via fe80::2 dev {self._nk_host_ifname}") 405 - 406 - ip("link set lo up", ns=self.netns) 407 - ip(f"link set dev {self._nk_guest_ifname} up", ns=self.netns) 408 - ip(f"-6 addr add fe80::2/64 dev {self._nk_guest_ifname}", ns=self.netns) 409 - ip(f"-6 addr add {self.nk_guest_ipv6}/64 dev {self._nk_guest_ifname} nodad", ns=self.netns) 410 - ip(f"-6 route add default via fe80::1 dev {self._nk_guest_ifname}", ns=self.netns) 411 - 412 - def _attach_bpf(self): 413 - bpf_obj = self.test_dir / "nk_forward.bpf.o" 414 - if not bpf_obj.exists(): 415 - raise KsftSkipEx("BPF prog not found") 416 - 417 - cmd(f"tc filter add dev {self.ifname} ingress bpf obj {bpf_obj} sec tc/ingress direct-action") 418 - self._tc_attached = True 419 - 420 - tc_info = cmd(f"tc filter show dev {self.ifname} ingress").stdout 421 - match = re.search(r'pref (\d+).*nk_forward\.bpf.*id (\d+)', tc_info) 422 - if not match: 423 - raise Exception("Failed to get BPF prog ID") 424 - self._bpf_prog_pref = int(match.group(1)) 425 - self._bpf_prog_id = int(match.group(2)) 426 - 427 - prog_info = bpftool(f"prog show id {self._bpf_prog_id}", json=True) 428 - map_ids = prog_info.get("map_ids", []) 429 - 430 - bss_map_id = None 431 - for map_id in map_ids: 432 - map_info = bpftool(f"map show id {map_id}", json=True) 433 - if map_info.get("name").endswith("bss"): 434 - bss_map_id = map_id 435 - 436 - if bss_map_id is None: 437 - raise Exception("Failed to find .bss map") 438 - 439 - ipv6_addr = ipaddress.IPv6Address(self.ipv6_prefix) 440 - ipv6_bytes = ipv6_addr.packed 441 - ifindex_bytes = self.nk_host_ifindex.to_bytes(4, byteorder='little') 442 - value = ipv6_bytes + ifindex_bytes 443 - value_hex = ' '.join(f'{b:02x}' for b in value) 444 - bpftool(f"map update id {bss_map_id} key hex 00 00 00 00 value hex {value_hex}")