Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

udp_tunnel: add the ability to share port tables

Unfortunately recent Intel NIC designs share the UDP port table
across netdevs. So far the UDP tunnel port state was maintained
per netdev, we need to extend that to cater to Intel NICs.

Expect NICs to allocate the info structure dynamically and link
to the state from there. All the shared NICs will record port
offload information in the one instance of the table so we need
to make sure that the use count can accommodate larger numbers.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Jakub Kicinski and committed by
David S. Miller
74cc6d18 8744c0a8

+110 -10
+24
include/net/udp_tunnel.h
··· 200 200 UDP_TUNNEL_NIC_INFO_STATIC_IANA_VXLAN = BIT(3), 201 201 }; 202 202 203 + struct udp_tunnel_nic; 204 + 205 + #define UDP_TUNNEL_NIC_MAX_SHARING_DEVICES (U16_MAX / 2) 206 + 207 + struct udp_tunnel_nic_shared { 208 + struct udp_tunnel_nic *udp_tunnel_nic_info; 209 + 210 + struct list_head devices; 211 + }; 212 + 213 + struct udp_tunnel_nic_shared_node { 214 + struct net_device *dev; 215 + struct list_head list; 216 + }; 217 + 203 218 /** 204 219 * struct udp_tunnel_nic_info - driver UDP tunnel offload information 205 220 * @set_port: callback for adding a new port 206 221 * @unset_port: callback for removing a port 207 222 * @sync_table: callback for syncing the entire port table at once 223 + * @shared: reference to device global state (optional) 208 224 * @flags: device flags from enum udp_tunnel_nic_info_flags 209 225 * @tables: UDP port tables this device has 210 226 * @tables.n_entries: number of entries in this table ··· 228 212 * 229 213 * Drivers are expected to provide either @set_port and @unset_port callbacks 230 214 * or the @sync_table callback. Callbacks are invoked with rtnl lock held. 215 + * 216 + * Devices which (misguidedly) share the UDP tunnel port table across multiple 217 + * netdevs should allocate an instance of struct udp_tunnel_nic_shared and 218 + * point @shared at it. 219 + * There must never be more than %UDP_TUNNEL_NIC_MAX_SHARING_DEVICES devices 220 + * sharing a table. 231 221 * 232 222 * Known limitations: 233 223 * - UDP tunnel port notifications are fundamentally best-effort - ··· 255 233 256 234 /* all at once */ 257 235 int (*sync_table)(struct net_device *dev, unsigned int table); 236 + 237 + struct udp_tunnel_nic_shared *shared; 258 238 259 239 unsigned int flags; 260 240
+86 -10
net/ipv4/udp_tunnel_nic.c
··· 19 19 struct udp_tunnel_nic_table_entry { 20 20 __be16 port; 21 21 u8 type; 22 - u8 use_cnt; 23 22 u8 flags; 23 + u16 use_cnt; 24 + #define UDP_TUNNEL_NIC_USE_CNT_MAX U16_MAX 24 25 u8 hw_priv; 25 26 }; 26 27 ··· 371 370 bool dodgy = entry->flags & UDP_TUNNEL_NIC_ENTRY_OP_FAIL; 372 371 unsigned int from, to; 373 372 373 + WARN_ON(entry->use_cnt + (u32)use_cnt_adj > U16_MAX); 374 + 374 375 /* If not going from used to unused or vice versa - all done. 375 376 * For dodgy entries make sure we try to sync again (queue the entry). 376 377 */ ··· 678 675 udp_tunnel_nic_replay(struct net_device *dev, struct udp_tunnel_nic *utn) 679 676 { 680 677 const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info; 678 + struct udp_tunnel_nic_shared_node *node; 681 679 unsigned int i, j; 682 680 683 681 /* Freeze all the ports we are already tracking so that the replay ··· 690 686 utn->missed = 0; 691 687 utn->need_replay = 0; 692 688 693 - udp_tunnel_get_rx_info(dev); 689 + if (!info->shared) { 690 + udp_tunnel_get_rx_info(dev); 691 + } else { 692 + list_for_each_entry(node, &info->shared->devices, list) 693 + udp_tunnel_get_rx_info(node->dev); 694 + } 694 695 695 696 for (i = 0; i < utn->n_tables; i++) 696 697 for (j = 0; j < info->tables[i].n_entries; j++) ··· 751 742 return NULL; 752 743 } 753 744 745 + static void udp_tunnel_nic_free(struct udp_tunnel_nic *utn) 746 + { 747 + unsigned int i; 748 + 749 + for (i = 0; i < utn->n_tables; i++) 750 + kfree(utn->entries[i]); 751 + kfree(utn->entries); 752 + kfree(utn); 753 + } 754 + 754 755 static int udp_tunnel_nic_register(struct net_device *dev) 755 756 { 756 757 const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info; 758 + struct udp_tunnel_nic_shared_node *node = NULL; 757 759 struct udp_tunnel_nic *utn; 758 760 unsigned int n_tables, i; 759 761 760 762 BUILD_BUG_ON(sizeof(utn->missed) * BITS_PER_BYTE < 761 763 UDP_TUNNEL_NIC_MAX_TABLES); 764 + /* Expect use count of at most 2 (IPv4, IPv6) per device */ 765 + BUILD_BUG_ON(UDP_TUNNEL_NIC_USE_CNT_MAX < 766 + UDP_TUNNEL_NIC_MAX_SHARING_DEVICES * 2); 762 767 768 + /* Check that the driver info is sane */ 763 769 if (WARN_ON(!info->set_port != !info->unset_port) || 764 770 WARN_ON(!info->set_port == !info->sync_table) || 765 771 WARN_ON(!info->tables[0].n_entries)) 772 + return -EINVAL; 773 + 774 + if (WARN_ON(info->shared && 775 + info->flags & UDP_TUNNEL_NIC_INFO_OPEN_ONLY)) 766 776 return -EINVAL; 767 777 768 778 n_tables = 1; ··· 794 766 return -EINVAL; 795 767 } 796 768 797 - utn = udp_tunnel_nic_alloc(info, n_tables); 798 - if (!utn) 799 - return -ENOMEM; 769 + /* Create UDP tunnel state structures */ 770 + if (info->shared) { 771 + node = kzalloc(sizeof(*node), GFP_KERNEL); 772 + if (!node) 773 + return -ENOMEM; 774 + 775 + node->dev = dev; 776 + } 777 + 778 + if (info->shared && info->shared->udp_tunnel_nic_info) { 779 + utn = info->shared->udp_tunnel_nic_info; 780 + } else { 781 + utn = udp_tunnel_nic_alloc(info, n_tables); 782 + if (!utn) { 783 + kfree(node); 784 + return -ENOMEM; 785 + } 786 + } 787 + 788 + if (info->shared) { 789 + if (!info->shared->udp_tunnel_nic_info) { 790 + INIT_LIST_HEAD(&info->shared->devices); 791 + info->shared->udp_tunnel_nic_info = utn; 792 + } 793 + 794 + list_add_tail(&node->list, &info->shared->devices); 795 + } 800 796 801 797 utn->dev = dev; 802 798 dev_hold(dev); ··· 835 783 static void 836 784 udp_tunnel_nic_unregister(struct net_device *dev, struct udp_tunnel_nic *utn) 837 785 { 838 - unsigned int i; 786 + const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info; 787 + 788 + /* For a shared table remove this dev from the list of sharing devices 789 + * and if there are other devices just detach. 790 + */ 791 + if (info->shared) { 792 + struct udp_tunnel_nic_shared_node *node, *first; 793 + 794 + list_for_each_entry(node, &info->shared->devices, list) 795 + if (node->dev == dev) 796 + break; 797 + if (node->dev != dev) 798 + return; 799 + 800 + list_del(&node->list); 801 + kfree(node); 802 + 803 + first = list_first_entry_or_null(&info->shared->devices, 804 + typeof(*first), list); 805 + if (first) { 806 + udp_tunnel_drop_rx_info(dev); 807 + utn->dev = first->dev; 808 + goto release_dev; 809 + } 810 + 811 + info->shared->udp_tunnel_nic_info = NULL; 812 + } 839 813 840 814 /* Flush before we check work, so we don't waste time adding entries 841 815 * from the work which we will boot immediately. ··· 874 796 if (utn->work_pending) 875 797 return; 876 798 877 - for (i = 0; i < utn->n_tables; i++) 878 - kfree(utn->entries[i]); 879 - kfree(utn->entries); 880 - kfree(utn); 799 + udp_tunnel_nic_free(utn); 800 + release_dev: 881 801 dev->udp_tunnel_nic = NULL; 882 802 dev_put(dev); 883 803 }