Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'udp_tunnel-convert-Intel-drivers-with-shared-tables'

Jakub Kicinski says:

====================
udp_tunnel: convert Intel drivers with shared tables

This set converts Intel drivers which have the ability to spawn
multiple netdevs, but have only one UDP tunnel port table.

Appropriate support is added to the core infra in patch 1,
followed by netdevsim support and a selftest.

The table sharing works by core attaching the same table
structure to all devices sharing the table. This means the
reference count has to accommodate potentially large values.

Once core is ready i40e and ice are converted. These are
complex drivers, but we got a tested-by from Aaron, so we
should be good :)

Compared to v1 I've made sure the selftest is executable.

Other than that patches 8 and 9 are actually from the Mellanox
conversion series were kept out to avoid Mellanox vs Intel
conflicts.

Last patch is new, some docs to let users knows ethtool
can now display UDP tunnel info.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>

+533 -453
+28
Documentation/networking/vxlan.rst
··· 58 58 3. Show forwarding table:: 59 59 60 60 # bridge fdb show dev vxlan0 61 + 62 + The following NIC features may indicate support for UDP tunnel-related 63 + offloads (most commonly VXLAN features, but support for a particular 64 + encapsulation protocol is NIC specific): 65 + 66 + - `tx-udp_tnl-segmentation` 67 + - `tx-udp_tnl-csum-segmentation` 68 + ability to perform TCP segmentation offload of UDP encapsulated frames 69 + 70 + - `rx-udp_tunnel-port-offload` 71 + receive side parsing of UDP encapsulated frames which allows NICs to 72 + perform protocol-aware offloads, like checksum validation offload of 73 + inner frames (only needed by NICs without protocol-agnostic offloads) 74 + 75 + For devices supporting `rx-udp_tunnel-port-offload` the list of currently 76 + offloaded ports can be interrogated with `ethtool`:: 77 + 78 + $ ethtool --show-tunnels eth0 79 + Tunnel information for eth0: 80 + UDP port table 0: 81 + Size: 4 82 + Types: vxlan 83 + No entries 84 + UDP port table 1: 85 + Size: 4 86 + Types: geneve, vxlan-gpe 87 + Entries (1): 88 + port 1230, vxlan-gpe
+3 -3
drivers/net/ethernet/intel/i40e/i40e.h
··· 35 35 #include <net/pkt_cls.h> 36 36 #include <net/tc_act/tc_gact.h> 37 37 #include <net/tc_act/tc_mirred.h> 38 + #include <net/udp_tunnel.h> 38 39 #include <net/xdp_sock.h> 39 40 #include "i40e_type.h" 40 41 #include "i40e_prototype.h" ··· 134 133 __I40E_PORT_SUSPENDED, 135 134 __I40E_VF_DISABLE, 136 135 __I40E_MACVLAN_SYNC_PENDING, 137 - __I40E_UDP_FILTER_SYNC_PENDING, 138 136 __I40E_TEMP_LINK_POLLING, 139 137 __I40E_CLIENT_SERVICE_REQUESTED, 140 138 __I40E_CLIENT_L2_CHANGE, ··· 478 478 struct list_head l3_flex_pit_list; 479 479 struct list_head l4_flex_pit_list; 480 480 481 - struct i40e_udp_port_config udp_ports[I40E_MAX_PF_UDP_OFFLOAD_PORTS]; 482 - u16 pending_udp_bitmap; 481 + struct udp_tunnel_nic_shared udp_tunnel_shared; 482 + struct udp_tunnel_nic_info udp_tunnel_nic; 483 483 484 484 struct hlist_head cloud_filter_list; 485 485 u16 num_cloud_filters;
+48 -216
drivers/net/ethernet/intel/i40e/i40e_main.c
··· 10386 10386 i40e_flush(hw); 10387 10387 } 10388 10388 10389 - static const char *i40e_tunnel_name(u8 type) 10390 - { 10391 - switch (type) { 10392 - case UDP_TUNNEL_TYPE_VXLAN: 10393 - return "vxlan"; 10394 - case UDP_TUNNEL_TYPE_GENEVE: 10395 - return "geneve"; 10396 - default: 10397 - return "unknown"; 10398 - } 10399 - } 10400 - 10401 - /** 10402 - * i40e_sync_udp_filters - Trigger a sync event for existing UDP filters 10403 - * @pf: board private structure 10404 - **/ 10405 - static void i40e_sync_udp_filters(struct i40e_pf *pf) 10406 - { 10407 - int i; 10408 - 10409 - /* loop through and set pending bit for all active UDP filters */ 10410 - for (i = 0; i < I40E_MAX_PF_UDP_OFFLOAD_PORTS; i++) { 10411 - if (pf->udp_ports[i].port) 10412 - pf->pending_udp_bitmap |= BIT_ULL(i); 10413 - } 10414 - 10415 - set_bit(__I40E_UDP_FILTER_SYNC_PENDING, pf->state); 10416 - } 10417 - 10418 - /** 10419 - * i40e_sync_udp_filters_subtask - Sync the VSI filter list with HW 10420 - * @pf: board private structure 10421 - **/ 10422 - static void i40e_sync_udp_filters_subtask(struct i40e_pf *pf) 10423 - { 10424 - struct i40e_hw *hw = &pf->hw; 10425 - u8 filter_index, type; 10426 - u16 port; 10427 - int i; 10428 - 10429 - if (!test_and_clear_bit(__I40E_UDP_FILTER_SYNC_PENDING, pf->state)) 10430 - return; 10431 - 10432 - /* acquire RTNL to maintain state of flags and port requests */ 10433 - rtnl_lock(); 10434 - 10435 - for (i = 0; i < I40E_MAX_PF_UDP_OFFLOAD_PORTS; i++) { 10436 - if (pf->pending_udp_bitmap & BIT_ULL(i)) { 10437 - struct i40e_udp_port_config *udp_port; 10438 - i40e_status ret = 0; 10439 - 10440 - udp_port = &pf->udp_ports[i]; 10441 - pf->pending_udp_bitmap &= ~BIT_ULL(i); 10442 - 10443 - port = READ_ONCE(udp_port->port); 10444 - type = READ_ONCE(udp_port->type); 10445 - filter_index = READ_ONCE(udp_port->filter_index); 10446 - 10447 - /* release RTNL while we wait on AQ command */ 10448 - rtnl_unlock(); 10449 - 10450 - if (port) 10451 - ret = i40e_aq_add_udp_tunnel(hw, port, 10452 - type, 10453 - &filter_index, 10454 - NULL); 10455 - else if (filter_index != I40E_UDP_PORT_INDEX_UNUSED) 10456 - ret = i40e_aq_del_udp_tunnel(hw, filter_index, 10457 - NULL); 10458 - 10459 - /* reacquire RTNL so we can update filter_index */ 10460 - rtnl_lock(); 10461 - 10462 - if (ret) { 10463 - dev_info(&pf->pdev->dev, 10464 - "%s %s port %d, index %d failed, err %s aq_err %s\n", 10465 - i40e_tunnel_name(type), 10466 - port ? "add" : "delete", 10467 - port, 10468 - filter_index, 10469 - i40e_stat_str(&pf->hw, ret), 10470 - i40e_aq_str(&pf->hw, 10471 - pf->hw.aq.asq_last_status)); 10472 - if (port) { 10473 - /* failed to add, just reset port, 10474 - * drop pending bit for any deletion 10475 - */ 10476 - udp_port->port = 0; 10477 - pf->pending_udp_bitmap &= ~BIT_ULL(i); 10478 - } 10479 - } else if (port) { 10480 - /* record filter index on success */ 10481 - udp_port->filter_index = filter_index; 10482 - } 10483 - } 10484 - } 10485 - 10486 - rtnl_unlock(); 10487 - } 10488 - 10489 10389 /** 10490 10390 * i40e_service_task - Run the driver's async subtasks 10491 10391 * @work: pointer to work_struct containing our data ··· 10425 10525 pf->vsi[pf->lan_vsi]); 10426 10526 } 10427 10527 i40e_sync_filters_subtask(pf); 10428 - i40e_sync_udp_filters_subtask(pf); 10429 10528 } else { 10430 10529 i40e_reset_subtask(pf); 10431 10530 } ··· 12124 12225 return 0; 12125 12226 } 12126 12227 12127 - /** 12128 - * i40e_get_udp_port_idx - Lookup a possibly offloaded for Rx UDP port 12129 - * @pf: board private structure 12130 - * @port: The UDP port to look up 12131 - * 12132 - * Returns the index number or I40E_MAX_PF_UDP_OFFLOAD_PORTS if port not found 12133 - **/ 12134 - static u8 i40e_get_udp_port_idx(struct i40e_pf *pf, u16 port) 12135 - { 12136 - u8 i; 12137 - 12138 - for (i = 0; i < I40E_MAX_PF_UDP_OFFLOAD_PORTS; i++) { 12139 - /* Do not report ports with pending deletions as 12140 - * being available. 12141 - */ 12142 - if (!port && (pf->pending_udp_bitmap & BIT_ULL(i))) 12143 - continue; 12144 - if (pf->udp_ports[i].port == port) 12145 - return i; 12146 - } 12147 - 12148 - return i; 12149 - } 12150 - 12151 - /** 12152 - * i40e_udp_tunnel_add - Get notifications about UDP tunnel ports that come up 12153 - * @netdev: This physical port's netdev 12154 - * @ti: Tunnel endpoint information 12155 - **/ 12156 - static void i40e_udp_tunnel_add(struct net_device *netdev, 12157 - struct udp_tunnel_info *ti) 12228 + static int i40e_udp_tunnel_set_port(struct net_device *netdev, 12229 + unsigned int table, unsigned int idx, 12230 + struct udp_tunnel_info *ti) 12158 12231 { 12159 12232 struct i40e_netdev_priv *np = netdev_priv(netdev); 12160 - struct i40e_vsi *vsi = np->vsi; 12161 - struct i40e_pf *pf = vsi->back; 12162 - u16 port = ntohs(ti->port); 12163 - u8 next_idx; 12164 - u8 idx; 12233 + struct i40e_hw *hw = &np->vsi->back->hw; 12234 + u8 type, filter_index; 12235 + i40e_status ret; 12165 12236 12166 - idx = i40e_get_udp_port_idx(pf, port); 12237 + type = ti->type == UDP_TUNNEL_TYPE_VXLAN ? I40E_AQC_TUNNEL_TYPE_VXLAN : 12238 + I40E_AQC_TUNNEL_TYPE_NGE; 12167 12239 12168 - /* Check if port already exists */ 12169 - if (idx < I40E_MAX_PF_UDP_OFFLOAD_PORTS) { 12170 - netdev_info(netdev, "port %d already offloaded\n", port); 12171 - return; 12240 + ret = i40e_aq_add_udp_tunnel(hw, ntohs(ti->port), type, &filter_index, 12241 + NULL); 12242 + if (ret) { 12243 + netdev_info(netdev, "add UDP port failed, err %s aq_err %s\n", 12244 + i40e_stat_str(hw, ret), 12245 + i40e_aq_str(hw, hw->aq.asq_last_status)); 12246 + return -EIO; 12172 12247 } 12173 12248 12174 - /* Now check if there is space to add the new port */ 12175 - next_idx = i40e_get_udp_port_idx(pf, 0); 12176 - 12177 - if (next_idx == I40E_MAX_PF_UDP_OFFLOAD_PORTS) { 12178 - netdev_info(netdev, "maximum number of offloaded UDP ports reached, not adding port %d\n", 12179 - port); 12180 - return; 12181 - } 12182 - 12183 - switch (ti->type) { 12184 - case UDP_TUNNEL_TYPE_VXLAN: 12185 - pf->udp_ports[next_idx].type = I40E_AQC_TUNNEL_TYPE_VXLAN; 12186 - break; 12187 - case UDP_TUNNEL_TYPE_GENEVE: 12188 - if (!(pf->hw_features & I40E_HW_GENEVE_OFFLOAD_CAPABLE)) 12189 - return; 12190 - pf->udp_ports[next_idx].type = I40E_AQC_TUNNEL_TYPE_NGE; 12191 - break; 12192 - default: 12193 - return; 12194 - } 12195 - 12196 - /* New port: add it and mark its index in the bitmap */ 12197 - pf->udp_ports[next_idx].port = port; 12198 - pf->udp_ports[next_idx].filter_index = I40E_UDP_PORT_INDEX_UNUSED; 12199 - pf->pending_udp_bitmap |= BIT_ULL(next_idx); 12200 - set_bit(__I40E_UDP_FILTER_SYNC_PENDING, pf->state); 12249 + udp_tunnel_nic_set_port_priv(netdev, table, idx, filter_index); 12250 + return 0; 12201 12251 } 12202 12252 12203 - /** 12204 - * i40e_udp_tunnel_del - Get notifications about UDP tunnel ports that go away 12205 - * @netdev: This physical port's netdev 12206 - * @ti: Tunnel endpoint information 12207 - **/ 12208 - static void i40e_udp_tunnel_del(struct net_device *netdev, 12209 - struct udp_tunnel_info *ti) 12253 + static int i40e_udp_tunnel_unset_port(struct net_device *netdev, 12254 + unsigned int table, unsigned int idx, 12255 + struct udp_tunnel_info *ti) 12210 12256 { 12211 12257 struct i40e_netdev_priv *np = netdev_priv(netdev); 12212 - struct i40e_vsi *vsi = np->vsi; 12213 - struct i40e_pf *pf = vsi->back; 12214 - u16 port = ntohs(ti->port); 12215 - u8 idx; 12258 + struct i40e_hw *hw = &np->vsi->back->hw; 12259 + i40e_status ret; 12216 12260 12217 - idx = i40e_get_udp_port_idx(pf, port); 12218 - 12219 - /* Check if port already exists */ 12220 - if (idx >= I40E_MAX_PF_UDP_OFFLOAD_PORTS) 12221 - goto not_found; 12222 - 12223 - switch (ti->type) { 12224 - case UDP_TUNNEL_TYPE_VXLAN: 12225 - if (pf->udp_ports[idx].type != I40E_AQC_TUNNEL_TYPE_VXLAN) 12226 - goto not_found; 12227 - break; 12228 - case UDP_TUNNEL_TYPE_GENEVE: 12229 - if (pf->udp_ports[idx].type != I40E_AQC_TUNNEL_TYPE_NGE) 12230 - goto not_found; 12231 - break; 12232 - default: 12233 - goto not_found; 12261 + ret = i40e_aq_del_udp_tunnel(hw, ti->hw_priv, NULL); 12262 + if (ret) { 12263 + netdev_info(netdev, "delete UDP port failed, err %s aq_err %s\n", 12264 + i40e_stat_str(hw, ret), 12265 + i40e_aq_str(hw, hw->aq.asq_last_status)); 12266 + return -EIO; 12234 12267 } 12235 12268 12236 - /* if port exists, set it to 0 (mark for deletion) 12237 - * and make it pending 12238 - */ 12239 - pf->udp_ports[idx].port = 0; 12240 - 12241 - /* Toggle pending bit instead of setting it. This way if we are 12242 - * deleting a port that has yet to be added we just clear the pending 12243 - * bit and don't have to worry about it. 12244 - */ 12245 - pf->pending_udp_bitmap ^= BIT_ULL(idx); 12246 - set_bit(__I40E_UDP_FILTER_SYNC_PENDING, pf->state); 12247 - 12248 - return; 12249 - not_found: 12250 - netdev_warn(netdev, "UDP port %d was not found, not deleting\n", 12251 - port); 12269 + return 0; 12252 12270 } 12253 12271 12254 12272 static int i40e_get_phys_port_id(struct net_device *netdev, ··· 12771 12955 .ndo_set_vf_link_state = i40e_ndo_set_vf_link_state, 12772 12956 .ndo_set_vf_spoofchk = i40e_ndo_set_vf_spoofchk, 12773 12957 .ndo_set_vf_trust = i40e_ndo_set_vf_trust, 12774 - .ndo_udp_tunnel_add = i40e_udp_tunnel_add, 12775 - .ndo_udp_tunnel_del = i40e_udp_tunnel_del, 12958 + .ndo_udp_tunnel_add = udp_tunnel_nic_add_port, 12959 + .ndo_udp_tunnel_del = udp_tunnel_nic_del_port, 12776 12960 .ndo_get_phys_port_id = i40e_get_phys_port_id, 12777 12961 .ndo_fdb_add = i40e_ndo_fdb_add, 12778 12962 .ndo_features_check = i40e_features_check, ··· 12835 13019 12836 13020 if (!(pf->hw_features & I40E_HW_OUTER_UDP_CSUM_CAPABLE)) 12837 13021 netdev->gso_partial_features |= NETIF_F_GSO_UDP_TUNNEL_CSUM; 13022 + 13023 + netdev->udp_tunnel_nic_info = &pf->udp_tunnel_nic; 12838 13024 12839 13025 netdev->gso_partial_features |= NETIF_F_GSO_GRE_CSUM; 12840 13026 ··· 14238 14420 i40e_ptp_init(pf); 14239 14421 14240 14422 /* repopulate tunnel port filters */ 14241 - i40e_sync_udp_filters(pf); 14423 + udp_tunnel_nic_reset_ntf(pf->vsi[pf->lan_vsi]->netdev); 14242 14424 14243 14425 return ret; 14244 14426 } ··· 14967 15149 if (err) 14968 15150 goto err_switch_setup; 14969 15151 15152 + pf->udp_tunnel_nic.set_port = i40e_udp_tunnel_set_port; 15153 + pf->udp_tunnel_nic.unset_port = i40e_udp_tunnel_unset_port; 15154 + pf->udp_tunnel_nic.flags = UDP_TUNNEL_NIC_INFO_MAY_SLEEP; 15155 + pf->udp_tunnel_nic.shared = &pf->udp_tunnel_shared; 15156 + pf->udp_tunnel_nic.tables[0].n_entries = I40E_MAX_PF_UDP_OFFLOAD_PORTS; 15157 + pf->udp_tunnel_nic.tables[0].tunnel_types = UDP_TUNNEL_TYPE_VXLAN | 15158 + UDP_TUNNEL_TYPE_GENEVE; 15159 + 14970 15160 /* The number of VSIs reported by the FW is the minimum guaranteed 14971 15161 * to us; HW supports far more and we share the remaining pool with 14972 15162 * the other PFs. We allocate space for more than the guarantee with ··· 14984 15158 pf->num_alloc_vsi = I40E_MIN_VSI_ALLOC; 14985 15159 else 14986 15160 pf->num_alloc_vsi = pf->hw.func_caps.num_vsis; 15161 + if (pf->num_alloc_vsi > UDP_TUNNEL_NIC_MAX_SHARING_DEVICES) { 15162 + dev_warn(&pf->pdev->dev, 15163 + "limiting the VSI count due to UDP tunnel limitation %d > %d\n", 15164 + pf->num_alloc_vsi, UDP_TUNNEL_NIC_MAX_SHARING_DEVICES); 15165 + pf->num_alloc_vsi = UDP_TUNNEL_NIC_MAX_SHARING_DEVICES; 15166 + } 14987 15167 14988 15168 /* Set up the *vsi struct and our local tracking of the MAIN PF vsi. */ 14989 15169 pf->vsi = kcalloc(pf->num_alloc_vsi, sizeof(struct i40e_vsi *),
+2 -4
drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c
··· 1268 1268 bool is_tun = tun == ICE_FD_HW_SEG_TUN; 1269 1269 int err; 1270 1270 1271 - if (is_tun && !ice_get_open_tunnel_port(&pf->hw, TNL_ALL, 1272 - &port_num)) 1271 + if (is_tun && !ice_get_open_tunnel_port(&pf->hw, &port_num)) 1273 1272 continue; 1274 1273 err = ice_fdir_write_fltr(pf, input, add, is_tun); 1275 1274 if (err) ··· 1646 1647 } 1647 1648 1648 1649 /* return error if not an update and no available filters */ 1649 - fltrs_needed = ice_get_open_tunnel_port(hw, TNL_ALL, &tunnel_port) ? 1650 - 2 : 1; 1650 + fltrs_needed = ice_get_open_tunnel_port(hw, &tunnel_port) ? 2 : 1; 1651 1651 if (!ice_fdir_find_fltr_by_idx(hw, fsp->location) && 1652 1652 ice_fdir_num_avail_fltr(hw, pf->vsi[vsi->idx]) < fltrs_needed) { 1653 1653 dev_err(dev, "Failed to add filter. The maximum number of flow director filters has been reached.\n");
+1 -1
drivers/net/ethernet/intel/ice/ice_fdir.c
··· 556 556 memcpy(pkt, ice_fdir_pkt[idx].pkt, ice_fdir_pkt[idx].pkt_len); 557 557 loc = pkt; 558 558 } else { 559 - if (!ice_get_open_tunnel_port(hw, TNL_ALL, &tnl_port)) 559 + if (!ice_get_open_tunnel_port(hw, &tnl_port)) 560 560 return ICE_ERR_DOES_NOT_EXIST; 561 561 if (!ice_fdir_pkt[idx].tun_pkt) 562 562 return ICE_ERR_PARAM;
+98 -137
drivers/net/ethernet/intel/ice/ice_flex_pipe.c
··· 489 489 if ((label_name[len] - '0') == hw->pf_id) { 490 490 hw->tnl.tbl[hw->tnl.count].type = tnls[i].type; 491 491 hw->tnl.tbl[hw->tnl.count].valid = false; 492 - hw->tnl.tbl[hw->tnl.count].in_use = false; 493 - hw->tnl.tbl[hw->tnl.count].marked = false; 494 492 hw->tnl.tbl[hw->tnl.count].boost_addr = val; 495 493 hw->tnl.tbl[hw->tnl.count].port = 0; 496 494 hw->tnl.count++; ··· 503 505 for (i = 0; i < hw->tnl.count; i++) { 504 506 ice_find_boost_entry(ice_seg, hw->tnl.tbl[i].boost_addr, 505 507 &hw->tnl.tbl[i].boost_entry); 506 - if (hw->tnl.tbl[i].boost_entry) 508 + if (hw->tnl.tbl[i].boost_entry) { 507 509 hw->tnl.tbl[i].valid = true; 510 + if (hw->tnl.tbl[i].type < __TNL_TYPE_CNT) 511 + hw->tnl.valid_count[hw->tnl.tbl[i].type]++; 512 + } 508 513 } 509 514 } 510 515 ··· 1627 1626 } 1628 1627 1629 1628 /** 1630 - * ice_tunnel_port_in_use_hlpr - helper function to determine tunnel usage 1631 - * @hw: pointer to the HW structure 1632 - * @port: port to search for 1633 - * @index: optionally returns index 1634 - * 1635 - * Returns whether a port is already in use as a tunnel, and optionally its 1636 - * index 1637 - */ 1638 - static bool ice_tunnel_port_in_use_hlpr(struct ice_hw *hw, u16 port, u16 *index) 1639 - { 1640 - u16 i; 1641 - 1642 - for (i = 0; i < hw->tnl.count && i < ICE_TUNNEL_MAX_ENTRIES; i++) 1643 - if (hw->tnl.tbl[i].in_use && hw->tnl.tbl[i].port == port) { 1644 - if (index) 1645 - *index = i; 1646 - return true; 1647 - } 1648 - 1649 - return false; 1650 - } 1651 - 1652 - /** 1653 - * ice_tunnel_port_in_use 1654 - * @hw: pointer to the HW structure 1655 - * @port: port to search for 1656 - * @index: optionally returns index 1657 - * 1658 - * Returns whether a port is already in use as a tunnel, and optionally its 1659 - * index 1660 - */ 1661 - bool ice_tunnel_port_in_use(struct ice_hw *hw, u16 port, u16 *index) 1662 - { 1663 - bool res; 1664 - 1665 - mutex_lock(&hw->tnl_lock); 1666 - res = ice_tunnel_port_in_use_hlpr(hw, port, index); 1667 - mutex_unlock(&hw->tnl_lock); 1668 - 1669 - return res; 1670 - } 1671 - 1672 - /** 1673 - * ice_find_free_tunnel_entry 1674 - * @hw: pointer to the HW structure 1675 - * @type: tunnel type 1676 - * @index: optionally returns index 1677 - * 1678 - * Returns whether there is a free tunnel entry, and optionally its index 1679 - */ 1680 - static bool 1681 - ice_find_free_tunnel_entry(struct ice_hw *hw, enum ice_tunnel_type type, 1682 - u16 *index) 1683 - { 1684 - u16 i; 1685 - 1686 - for (i = 0; i < hw->tnl.count && i < ICE_TUNNEL_MAX_ENTRIES; i++) 1687 - if (hw->tnl.tbl[i].valid && !hw->tnl.tbl[i].in_use && 1688 - hw->tnl.tbl[i].type == type) { 1689 - if (index) 1690 - *index = i; 1691 - return true; 1692 - } 1693 - 1694 - return false; 1695 - } 1696 - 1697 - /** 1698 1629 * ice_get_open_tunnel_port - retrieve an open tunnel port 1699 1630 * @hw: pointer to the HW structure 1700 - * @type: tunnel type (TNL_ALL will return any open port) 1701 1631 * @port: returns open port 1702 1632 */ 1703 1633 bool 1704 - ice_get_open_tunnel_port(struct ice_hw *hw, enum ice_tunnel_type type, 1705 - u16 *port) 1634 + ice_get_open_tunnel_port(struct ice_hw *hw, u16 *port) 1706 1635 { 1707 1636 bool res = false; 1708 1637 u16 i; ··· 1640 1709 mutex_lock(&hw->tnl_lock); 1641 1710 1642 1711 for (i = 0; i < hw->tnl.count && i < ICE_TUNNEL_MAX_ENTRIES; i++) 1643 - if (hw->tnl.tbl[i].valid && hw->tnl.tbl[i].in_use && 1644 - (type == TNL_ALL || hw->tnl.tbl[i].type == type)) { 1712 + if (hw->tnl.tbl[i].valid && hw->tnl.tbl[i].port) { 1645 1713 *port = hw->tnl.tbl[i].port; 1646 1714 res = true; 1647 1715 break; ··· 1652 1722 } 1653 1723 1654 1724 /** 1725 + * ice_tunnel_idx_to_entry - convert linear index to the sparse one 1726 + * @hw: pointer to the HW structure 1727 + * @type: type of tunnel 1728 + * @idx: linear index 1729 + * 1730 + * Stack assumes we have 2 linear tables with indexes [0, count_valid), 1731 + * but really the port table may be sprase, and types are mixed, so convert 1732 + * the stack index into the device index. 1733 + */ 1734 + static u16 ice_tunnel_idx_to_entry(struct ice_hw *hw, enum ice_tunnel_type type, 1735 + u16 idx) 1736 + { 1737 + u16 i; 1738 + 1739 + for (i = 0; i < hw->tnl.count && i < ICE_TUNNEL_MAX_ENTRIES; i++) 1740 + if (hw->tnl.tbl[i].valid && 1741 + hw->tnl.tbl[i].type == type && 1742 + idx--) 1743 + return i; 1744 + 1745 + WARN_ON_ONCE(1); 1746 + return 0; 1747 + } 1748 + 1749 + /** 1655 1750 * ice_create_tunnel 1656 1751 * @hw: pointer to the HW structure 1752 + * @index: device table entry 1657 1753 * @type: type of tunnel 1658 1754 * @port: port of tunnel to create 1659 1755 * ··· 1687 1731 * creating a package buffer with the tunnel info and issuing an update package 1688 1732 * command. 1689 1733 */ 1690 - enum ice_status 1691 - ice_create_tunnel(struct ice_hw *hw, enum ice_tunnel_type type, u16 port) 1734 + static enum ice_status 1735 + ice_create_tunnel(struct ice_hw *hw, u16 index, 1736 + enum ice_tunnel_type type, u16 port) 1692 1737 { 1693 1738 struct ice_boost_tcam_section *sect_rx, *sect_tx; 1694 1739 enum ice_status status = ICE_ERR_MAX_LIMIT; 1695 1740 struct ice_buf_build *bld; 1696 - u16 index; 1697 1741 1698 1742 mutex_lock(&hw->tnl_lock); 1699 - 1700 - if (ice_tunnel_port_in_use_hlpr(hw, port, &index)) { 1701 - hw->tnl.tbl[index].ref++; 1702 - status = 0; 1703 - goto ice_create_tunnel_end; 1704 - } 1705 - 1706 - if (!ice_find_free_tunnel_entry(hw, type, &index)) { 1707 - status = ICE_ERR_OUT_OF_RANGE; 1708 - goto ice_create_tunnel_end; 1709 - } 1710 1743 1711 1744 bld = ice_pkg_buf_alloc(hw); 1712 1745 if (!bld) { ··· 1735 1790 memcpy(sect_tx->tcam, sect_rx->tcam, sizeof(*sect_tx->tcam)); 1736 1791 1737 1792 status = ice_update_pkg(hw, ice_pkg_buf(bld), 1); 1738 - if (!status) { 1793 + if (!status) 1739 1794 hw->tnl.tbl[index].port = port; 1740 - hw->tnl.tbl[index].in_use = true; 1741 - hw->tnl.tbl[index].ref = 1; 1742 - } 1743 1795 1744 1796 ice_create_tunnel_err: 1745 1797 ice_pkg_buf_free(hw, bld); ··· 1750 1808 /** 1751 1809 * ice_destroy_tunnel 1752 1810 * @hw: pointer to the HW structure 1811 + * @index: device table entry 1812 + * @type: type of tunnel 1753 1813 * @port: port of tunnel to destroy (ignored if the all parameter is true) 1754 - * @all: flag that states to destroy all tunnels 1755 1814 * 1756 1815 * Destroys a tunnel or all tunnels by creating an update package buffer 1757 1816 * targeting the specific updates requested and then performing an update 1758 1817 * package. 1759 1818 */ 1760 - enum ice_status ice_destroy_tunnel(struct ice_hw *hw, u16 port, bool all) 1819 + static enum ice_status 1820 + ice_destroy_tunnel(struct ice_hw *hw, u16 index, enum ice_tunnel_type type, 1821 + u16 port) 1761 1822 { 1762 1823 struct ice_boost_tcam_section *sect_rx, *sect_tx; 1763 1824 enum ice_status status = ICE_ERR_MAX_LIMIT; 1764 1825 struct ice_buf_build *bld; 1765 - u16 count = 0; 1766 - u16 index; 1767 - u16 size; 1768 - u16 i; 1769 1826 1770 1827 mutex_lock(&hw->tnl_lock); 1771 1828 1772 - if (!all && ice_tunnel_port_in_use_hlpr(hw, port, &index)) 1773 - if (hw->tnl.tbl[index].ref > 1) { 1774 - hw->tnl.tbl[index].ref--; 1775 - status = 0; 1776 - goto ice_destroy_tunnel_end; 1777 - } 1778 - 1779 - /* determine count */ 1780 - for (i = 0; i < hw->tnl.count && i < ICE_TUNNEL_MAX_ENTRIES; i++) 1781 - if (hw->tnl.tbl[i].valid && hw->tnl.tbl[i].in_use && 1782 - (all || hw->tnl.tbl[i].port == port)) 1783 - count++; 1784 - 1785 - if (!count) { 1786 - status = ICE_ERR_PARAM; 1829 + if (WARN_ON(!hw->tnl.tbl[index].valid || 1830 + hw->tnl.tbl[index].type != type || 1831 + hw->tnl.tbl[index].port != port)) { 1832 + status = ICE_ERR_OUT_OF_RANGE; 1787 1833 goto ice_destroy_tunnel_end; 1788 1834 } 1789 - 1790 - /* size of section - there is at least one entry */ 1791 - size = struct_size(sect_rx, tcam, count); 1792 1835 1793 1836 bld = ice_pkg_buf_alloc(hw); 1794 1837 if (!bld) { ··· 1786 1859 goto ice_destroy_tunnel_err; 1787 1860 1788 1861 sect_rx = ice_pkg_buf_alloc_section(bld, ICE_SID_RXPARSER_BOOST_TCAM, 1789 - size); 1862 + struct_size(sect_rx, tcam, 1)); 1790 1863 if (!sect_rx) 1791 1864 goto ice_destroy_tunnel_err; 1792 1865 sect_rx->count = cpu_to_le16(1); 1793 1866 1794 1867 sect_tx = ice_pkg_buf_alloc_section(bld, ICE_SID_TXPARSER_BOOST_TCAM, 1795 - size); 1868 + struct_size(sect_tx, tcam, 1)); 1796 1869 if (!sect_tx) 1797 1870 goto ice_destroy_tunnel_err; 1798 1871 sect_tx->count = cpu_to_le16(1); ··· 1800 1873 /* copy original boost entry to update package buffer, one copy to Rx 1801 1874 * section, another copy to the Tx section 1802 1875 */ 1803 - for (i = 0; i < hw->tnl.count && i < ICE_TUNNEL_MAX_ENTRIES; i++) 1804 - if (hw->tnl.tbl[i].valid && hw->tnl.tbl[i].in_use && 1805 - (all || hw->tnl.tbl[i].port == port)) { 1806 - memcpy(sect_rx->tcam + i, hw->tnl.tbl[i].boost_entry, 1807 - sizeof(*sect_rx->tcam)); 1808 - memcpy(sect_tx->tcam + i, hw->tnl.tbl[i].boost_entry, 1809 - sizeof(*sect_tx->tcam)); 1810 - hw->tnl.tbl[i].marked = true; 1811 - } 1876 + memcpy(sect_rx->tcam, hw->tnl.tbl[index].boost_entry, 1877 + sizeof(*sect_rx->tcam)); 1878 + memcpy(sect_tx->tcam, hw->tnl.tbl[index].boost_entry, 1879 + sizeof(*sect_tx->tcam)); 1812 1880 1813 1881 status = ice_update_pkg(hw, ice_pkg_buf(bld), 1); 1814 1882 if (!status) 1815 - for (i = 0; i < hw->tnl.count && 1816 - i < ICE_TUNNEL_MAX_ENTRIES; i++) 1817 - if (hw->tnl.tbl[i].marked) { 1818 - hw->tnl.tbl[i].ref = 0; 1819 - hw->tnl.tbl[i].port = 0; 1820 - hw->tnl.tbl[i].in_use = false; 1821 - hw->tnl.tbl[i].marked = false; 1822 - } 1883 + hw->tnl.tbl[index].port = 0; 1823 1884 1824 1885 ice_destroy_tunnel_err: 1825 1886 ice_pkg_buf_free(hw, bld); ··· 1816 1901 mutex_unlock(&hw->tnl_lock); 1817 1902 1818 1903 return status; 1904 + } 1905 + 1906 + int ice_udp_tunnel_set_port(struct net_device *netdev, unsigned int table, 1907 + unsigned int idx, struct udp_tunnel_info *ti) 1908 + { 1909 + struct ice_netdev_priv *np = netdev_priv(netdev); 1910 + struct ice_vsi *vsi = np->vsi; 1911 + struct ice_pf *pf = vsi->back; 1912 + enum ice_tunnel_type tnl_type; 1913 + enum ice_status status; 1914 + u16 index; 1915 + 1916 + tnl_type = ti->type == UDP_TUNNEL_TYPE_VXLAN ? TNL_VXLAN : TNL_GENEVE; 1917 + index = ice_tunnel_idx_to_entry(&pf->hw, idx, tnl_type); 1918 + 1919 + status = ice_create_tunnel(&pf->hw, index, tnl_type, ntohs(ti->port)); 1920 + if (status) { 1921 + netdev_err(netdev, "Error adding UDP tunnel - %s\n", 1922 + ice_stat_str(status)); 1923 + return -EIO; 1924 + } 1925 + 1926 + udp_tunnel_nic_set_port_priv(netdev, table, idx, index); 1927 + return 0; 1928 + } 1929 + 1930 + int ice_udp_tunnel_unset_port(struct net_device *netdev, unsigned int table, 1931 + unsigned int idx, struct udp_tunnel_info *ti) 1932 + { 1933 + struct ice_netdev_priv *np = netdev_priv(netdev); 1934 + struct ice_vsi *vsi = np->vsi; 1935 + struct ice_pf *pf = vsi->back; 1936 + enum ice_tunnel_type tnl_type; 1937 + enum ice_status status; 1938 + 1939 + tnl_type = ti->type == UDP_TUNNEL_TYPE_VXLAN ? TNL_VXLAN : TNL_GENEVE; 1940 + 1941 + status = ice_destroy_tunnel(&pf->hw, ti->hw_priv, tnl_type, 1942 + ntohs(ti->port)); 1943 + if (status) { 1944 + netdev_err(netdev, "Error removing UDP tunnel - %s\n", 1945 + ice_stat_str(status)); 1946 + return -EIO; 1947 + } 1948 + 1949 + return 0; 1819 1950 } 1820 1951 1821 1952 /* PTG Management */
+5 -6
drivers/net/ethernet/intel/ice/ice_flex_pipe.h
··· 19 19 #define ICE_PKG_CNT 4 20 20 21 21 bool 22 - ice_get_open_tunnel_port(struct ice_hw *hw, enum ice_tunnel_type type, 23 - u16 *port); 24 - enum ice_status 25 - ice_create_tunnel(struct ice_hw *hw, enum ice_tunnel_type type, u16 port); 26 - enum ice_status ice_destroy_tunnel(struct ice_hw *hw, u16 port, bool all); 27 - bool ice_tunnel_port_in_use(struct ice_hw *hw, u16 port, u16 *index); 22 + ice_get_open_tunnel_port(struct ice_hw *hw, u16 *port); 23 + int ice_udp_tunnel_set_port(struct net_device *netdev, unsigned int table, 24 + unsigned int idx, struct udp_tunnel_info *ti); 25 + int ice_udp_tunnel_unset_port(struct net_device *netdev, unsigned int table, 26 + unsigned int idx, struct udp_tunnel_info *ti); 28 27 29 28 enum ice_status 30 29 ice_add_prof(struct ice_hw *hw, enum ice_block blk, u64 id, u8 ptypes[],
+2 -3
drivers/net/ethernet/intel/ice/ice_flex_type.h
··· 298 298 enum ice_tunnel_type { 299 299 TNL_VXLAN = 0, 300 300 TNL_GENEVE, 301 + __TNL_TYPE_CNT, 301 302 TNL_LAST = 0xFF, 302 303 TNL_ALL = 0xFF, 303 304 }; ··· 312 311 enum ice_tunnel_type type; 313 312 u16 boost_addr; 314 313 u16 port; 315 - u16 ref; 316 314 struct ice_boost_tcam_entry *boost_entry; 317 315 u8 valid; 318 - u8 in_use; 319 - u8 marked; 320 316 }; 321 317 322 318 #define ICE_TUNNEL_MAX_ENTRIES 16 ··· 321 323 struct ice_tunnel_table { 322 324 struct ice_tunnel_entry tbl[ICE_TUNNEL_MAX_ENTRIES]; 323 325 u16 count; 326 + u16 valid_count[__TNL_TYPE_CNT]; 324 327 }; 325 328 326 329 struct ice_pkg_es {
+30 -67
drivers/net/ethernet/intel/ice/ice_main.c
··· 2873 2873 } 2874 2874 2875 2875 netdev->netdev_ops = &ice_netdev_ops; 2876 + netdev->udp_tunnel_nic_info = &pf->hw.udp_tunnel_nic; 2876 2877 ice_set_ethtool_ops(netdev); 2877 2878 } 2878 2879 ··· 3979 3978 struct device *dev = &pdev->dev; 3980 3979 struct ice_pf *pf; 3981 3980 struct ice_hw *hw; 3982 - int err; 3981 + int i, err; 3983 3982 3984 3983 /* this driver uses devres, see 3985 3984 * Documentation/driver-api/driver-model/devres.rst ··· 4074 4073 4075 4074 ice_devlink_init_regions(pf); 4076 4075 4076 + pf->hw.udp_tunnel_nic.set_port = ice_udp_tunnel_set_port; 4077 + pf->hw.udp_tunnel_nic.unset_port = ice_udp_tunnel_unset_port; 4078 + pf->hw.udp_tunnel_nic.flags = UDP_TUNNEL_NIC_INFO_MAY_SLEEP; 4079 + pf->hw.udp_tunnel_nic.shared = &pf->hw.udp_tunnel_shared; 4080 + i = 0; 4081 + if (pf->hw.tnl.valid_count[TNL_VXLAN]) { 4082 + pf->hw.udp_tunnel_nic.tables[i].n_entries = 4083 + pf->hw.tnl.valid_count[TNL_VXLAN]; 4084 + pf->hw.udp_tunnel_nic.tables[i].tunnel_types = 4085 + UDP_TUNNEL_TYPE_VXLAN; 4086 + i++; 4087 + } 4088 + if (pf->hw.tnl.valid_count[TNL_GENEVE]) { 4089 + pf->hw.udp_tunnel_nic.tables[i].n_entries = 4090 + pf->hw.tnl.valid_count[TNL_GENEVE]; 4091 + pf->hw.udp_tunnel_nic.tables[i].tunnel_types = 4092 + UDP_TUNNEL_TYPE_GENEVE; 4093 + i++; 4094 + } 4095 + 4077 4096 pf->num_alloc_vsi = hw->func_caps.guar_num_vsi; 4078 4097 if (!pf->num_alloc_vsi) { 4079 4098 err = -EIO; 4080 4099 goto err_init_pf_unroll; 4100 + } 4101 + if (pf->num_alloc_vsi > UDP_TUNNEL_NIC_MAX_SHARING_DEVICES) { 4102 + dev_warn(&pf->pdev->dev, 4103 + "limiting the VSI count due to UDP tunnel limitation %d > %d\n", 4104 + pf->num_alloc_vsi, UDP_TUNNEL_NIC_MAX_SHARING_DEVICES); 4105 + pf->num_alloc_vsi = UDP_TUNNEL_NIC_MAX_SHARING_DEVICES; 4081 4106 } 4082 4107 4083 4108 pf->vsi = devm_kcalloc(dev, pf->num_alloc_vsi, sizeof(*pf->vsi), ··· 6602 6575 } 6603 6576 6604 6577 /** 6605 - * ice_udp_tunnel_add - Get notifications about UDP tunnel ports that come up 6606 - * @netdev: This physical port's netdev 6607 - * @ti: Tunnel endpoint information 6608 - */ 6609 - static void 6610 - ice_udp_tunnel_add(struct net_device *netdev, struct udp_tunnel_info *ti) 6611 - { 6612 - struct ice_netdev_priv *np = netdev_priv(netdev); 6613 - struct ice_vsi *vsi = np->vsi; 6614 - struct ice_pf *pf = vsi->back; 6615 - enum ice_tunnel_type tnl_type; 6616 - u16 port = ntohs(ti->port); 6617 - enum ice_status status; 6618 - 6619 - switch (ti->type) { 6620 - case UDP_TUNNEL_TYPE_VXLAN: 6621 - tnl_type = TNL_VXLAN; 6622 - break; 6623 - case UDP_TUNNEL_TYPE_GENEVE: 6624 - tnl_type = TNL_GENEVE; 6625 - break; 6626 - default: 6627 - netdev_err(netdev, "Unknown tunnel type\n"); 6628 - return; 6629 - } 6630 - 6631 - status = ice_create_tunnel(&pf->hw, tnl_type, port); 6632 - if (status == ICE_ERR_OUT_OF_RANGE) 6633 - netdev_info(netdev, "Max tunneled UDP ports reached, port %d not added\n", 6634 - port); 6635 - else if (status) 6636 - netdev_err(netdev, "Error adding UDP tunnel - %s\n", 6637 - ice_stat_str(status)); 6638 - } 6639 - 6640 - /** 6641 - * ice_udp_tunnel_del - Get notifications about UDP tunnel ports that go away 6642 - * @netdev: This physical port's netdev 6643 - * @ti: Tunnel endpoint information 6644 - */ 6645 - static void 6646 - ice_udp_tunnel_del(struct net_device *netdev, struct udp_tunnel_info *ti) 6647 - { 6648 - struct ice_netdev_priv *np = netdev_priv(netdev); 6649 - struct ice_vsi *vsi = np->vsi; 6650 - struct ice_pf *pf = vsi->back; 6651 - u16 port = ntohs(ti->port); 6652 - enum ice_status status; 6653 - bool retval; 6654 - 6655 - retval = ice_tunnel_port_in_use(&pf->hw, port, NULL); 6656 - if (!retval) { 6657 - netdev_info(netdev, "port %d not found in UDP tunnels list\n", 6658 - port); 6659 - return; 6660 - } 6661 - 6662 - status = ice_destroy_tunnel(&pf->hw, port, false); 6663 - if (status) 6664 - netdev_err(netdev, "error deleting port %d from UDP tunnels list\n", 6665 - port); 6666 - } 6667 - 6668 - /** 6669 6578 * ice_open - Called when a network interface becomes active 6670 6579 * @netdev: network interface device structure 6671 6580 * ··· 6793 6830 .ndo_bpf = ice_xdp, 6794 6831 .ndo_xdp_xmit = ice_xdp_xmit, 6795 6832 .ndo_xsk_wakeup = ice_xsk_wakeup, 6796 - .ndo_udp_tunnel_add = ice_udp_tunnel_add, 6797 - .ndo_udp_tunnel_del = ice_udp_tunnel_del, 6833 + .ndo_udp_tunnel_add = udp_tunnel_nic_add_port, 6834 + .ndo_udp_tunnel_del = udp_tunnel_nic_del_port, 6798 6835 };
+3
drivers/net/ethernet/intel/ice/ice_type.h
··· 676 676 struct mutex tnl_lock; 677 677 struct ice_tunnel_table tnl; 678 678 679 + struct udp_tunnel_nic_shared udp_tunnel_shared; 680 + struct udp_tunnel_nic_info udp_tunnel_nic; 681 + 679 682 /* HW block tables */ 680 683 struct ice_blk_info blk[ICE_BLK_COUNT]; 681 684 struct mutex fl_profs_locks[ICE_BLK_COUNT]; /* lock fltr profiles */
+7 -1
drivers/net/netdevsim/netdevsim.h
··· 20 20 #include <linux/netdevice.h> 21 21 #include <linux/u64_stats_sync.h> 22 22 #include <net/devlink.h> 23 + #include <net/udp_tunnel.h> 23 24 #include <net/xdp.h> 24 25 25 26 #define DRV_NAME "netdevsim" ··· 85 84 struct { 86 85 u32 inject_error; 87 86 u32 sleep; 88 - u32 ports[2][NSIM_UDP_TUNNEL_N_PORTS]; 87 + u32 __ports[2][NSIM_UDP_TUNNEL_N_PORTS]; 88 + u32 (*ports)[NSIM_UDP_TUNNEL_N_PORTS]; 89 89 struct debugfs_u32_array dfs_ports[2]; 90 90 } udp_ports; 91 91 ··· 211 209 bool fail_trap_policer_set; 212 210 bool fail_trap_policer_counter_get; 213 211 struct { 212 + struct udp_tunnel_nic_shared utn_shared; 213 + u32 __ports[2][NSIM_UDP_TUNNEL_N_PORTS]; 214 214 bool sync_all; 215 215 bool open_only; 216 216 bool ipv4_only; 217 + bool shared; 218 + bool static_iana_vxlan; 217 219 u32 sleep; 218 220 } udp_ports; 219 221 };
+29 -5
drivers/net/netdevsim/udp_tunnels.c
··· 22 22 msleep(ns->udp_ports.sleep); 23 23 24 24 if (!ret) { 25 - if (ns->udp_ports.ports[table][entry]) 25 + if (ns->udp_ports.ports[table][entry]) { 26 + WARN(1, "entry already in use\n"); 26 27 ret = -EBUSY; 27 - else 28 + } else { 28 29 ns->udp_ports.ports[table][entry] = 29 30 be16_to_cpu(ti->port) << 16 | ti->type; 31 + } 30 32 } 31 33 32 34 netdev_info(dev, "set [%d, %d] type %d family %d port %d - %d\n", ··· 52 50 if (!ret) { 53 51 u32 val = be16_to_cpu(ti->port) << 16 | ti->type; 54 52 55 - if (val == ns->udp_ports.ports[table][entry]) 53 + if (val == ns->udp_ports.ports[table][entry]) { 56 54 ns->udp_ports.ports[table][entry] = 0; 57 - else 55 + } else { 56 + WARN(1, "entry not installed %x vs %x\n", 57 + val, ns->udp_ports.ports[table][entry]); 58 58 ret = -ENOENT; 59 + } 59 60 } 60 61 61 62 netdev_info(dev, "unset [%d, %d] type %d family %d port %d - %d\n", ··· 112 107 struct net_device *dev = file->private_data; 113 108 struct netdevsim *ns = netdev_priv(dev); 114 109 115 - memset(&ns->udp_ports.ports, 0, sizeof(ns->udp_ports.ports)); 110 + memset(ns->udp_ports.ports, 0, sizeof(ns->udp_ports.__ports)); 116 111 rtnl_lock(); 117 112 udp_tunnel_nic_reset_ntf(dev); 118 113 rtnl_unlock(); ··· 131 126 { 132 127 struct netdevsim *ns = netdev_priv(dev); 133 128 struct udp_tunnel_nic_info *info; 129 + 130 + if (nsim_dev->udp_ports.shared && nsim_dev->udp_ports.open_only) { 131 + dev_err(&nsim_dev->nsim_bus_dev->dev, 132 + "shared can't be used in conjunction with open_only\n"); 133 + return -EINVAL; 134 + } 135 + 136 + if (!nsim_dev->udp_ports.shared) 137 + ns->udp_ports.ports = ns->udp_ports.__ports; 138 + else 139 + ns->udp_ports.ports = nsim_dev->udp_ports.__ports; 134 140 135 141 debugfs_create_u32("udp_ports_inject_error", 0600, 136 142 ns->nsim_dev_port->ddir, ··· 184 168 info->flags |= UDP_TUNNEL_NIC_INFO_OPEN_ONLY; 185 169 if (nsim_dev->udp_ports.ipv4_only) 186 170 info->flags |= UDP_TUNNEL_NIC_INFO_IPV4_ONLY; 171 + if (nsim_dev->udp_ports.shared) 172 + info->shared = &nsim_dev->udp_ports.utn_shared; 173 + if (nsim_dev->udp_ports.static_iana_vxlan) 174 + info->flags |= UDP_TUNNEL_NIC_INFO_STATIC_IANA_VXLAN; 187 175 188 176 dev->udp_tunnel_nic_info = info; 189 177 return 0; ··· 207 187 &nsim_dev->udp_ports.open_only); 208 188 debugfs_create_bool("udp_ports_ipv4_only", 0600, nsim_dev->ddir, 209 189 &nsim_dev->udp_ports.ipv4_only); 190 + debugfs_create_bool("udp_ports_shared", 0600, nsim_dev->ddir, 191 + &nsim_dev->udp_ports.shared); 192 + debugfs_create_bool("udp_ports_static_iana_vxlan", 0600, nsim_dev->ddir, 193 + &nsim_dev->udp_ports.static_iana_vxlan); 210 194 debugfs_create_u32("udp_ports_sleep", 0600, nsim_dev->ddir, 211 195 &nsim_dev->udp_ports.sleep); 212 196 }
+24
include/net/udp_tunnel.h
··· 200 200 UDP_TUNNEL_NIC_INFO_STATIC_IANA_VXLAN = BIT(3), 201 201 }; 202 202 203 + struct udp_tunnel_nic; 204 + 205 + #define UDP_TUNNEL_NIC_MAX_SHARING_DEVICES (U16_MAX / 2) 206 + 207 + struct udp_tunnel_nic_shared { 208 + struct udp_tunnel_nic *udp_tunnel_nic_info; 209 + 210 + struct list_head devices; 211 + }; 212 + 213 + struct udp_tunnel_nic_shared_node { 214 + struct net_device *dev; 215 + struct list_head list; 216 + }; 217 + 203 218 /** 204 219 * struct udp_tunnel_nic_info - driver UDP tunnel offload information 205 220 * @set_port: callback for adding a new port 206 221 * @unset_port: callback for removing a port 207 222 * @sync_table: callback for syncing the entire port table at once 223 + * @shared: reference to device global state (optional) 208 224 * @flags: device flags from enum udp_tunnel_nic_info_flags 209 225 * @tables: UDP port tables this device has 210 226 * @tables.n_entries: number of entries in this table ··· 228 212 * 229 213 * Drivers are expected to provide either @set_port and @unset_port callbacks 230 214 * or the @sync_table callback. Callbacks are invoked with rtnl lock held. 215 + * 216 + * Devices which (misguidedly) share the UDP tunnel port table across multiple 217 + * netdevs should allocate an instance of struct udp_tunnel_nic_shared and 218 + * point @shared at it. 219 + * There must never be more than %UDP_TUNNEL_NIC_MAX_SHARING_DEVICES devices 220 + * sharing a table. 231 221 * 232 222 * Known limitations: 233 223 * - UDP tunnel port notifications are fundamentally best-effort - ··· 255 233 256 234 /* all at once */ 257 235 int (*sync_table)(struct net_device *dev, unsigned int table); 236 + 237 + struct udp_tunnel_nic_shared *shared; 258 238 259 239 unsigned int flags; 260 240
+86 -10
net/ipv4/udp_tunnel_nic.c
··· 19 19 struct udp_tunnel_nic_table_entry { 20 20 __be16 port; 21 21 u8 type; 22 - u8 use_cnt; 23 22 u8 flags; 23 + u16 use_cnt; 24 + #define UDP_TUNNEL_NIC_USE_CNT_MAX U16_MAX 24 25 u8 hw_priv; 25 26 }; 26 27 ··· 371 370 bool dodgy = entry->flags & UDP_TUNNEL_NIC_ENTRY_OP_FAIL; 372 371 unsigned int from, to; 373 372 373 + WARN_ON(entry->use_cnt + (u32)use_cnt_adj > U16_MAX); 374 + 374 375 /* If not going from used to unused or vice versa - all done. 375 376 * For dodgy entries make sure we try to sync again (queue the entry). 376 377 */ ··· 678 675 udp_tunnel_nic_replay(struct net_device *dev, struct udp_tunnel_nic *utn) 679 676 { 680 677 const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info; 678 + struct udp_tunnel_nic_shared_node *node; 681 679 unsigned int i, j; 682 680 683 681 /* Freeze all the ports we are already tracking so that the replay ··· 690 686 utn->missed = 0; 691 687 utn->need_replay = 0; 692 688 693 - udp_tunnel_get_rx_info(dev); 689 + if (!info->shared) { 690 + udp_tunnel_get_rx_info(dev); 691 + } else { 692 + list_for_each_entry(node, &info->shared->devices, list) 693 + udp_tunnel_get_rx_info(node->dev); 694 + } 694 695 695 696 for (i = 0; i < utn->n_tables; i++) 696 697 for (j = 0; j < info->tables[i].n_entries; j++) ··· 751 742 return NULL; 752 743 } 753 744 745 + static void udp_tunnel_nic_free(struct udp_tunnel_nic *utn) 746 + { 747 + unsigned int i; 748 + 749 + for (i = 0; i < utn->n_tables; i++) 750 + kfree(utn->entries[i]); 751 + kfree(utn->entries); 752 + kfree(utn); 753 + } 754 + 754 755 static int udp_tunnel_nic_register(struct net_device *dev) 755 756 { 756 757 const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info; 758 + struct udp_tunnel_nic_shared_node *node = NULL; 757 759 struct udp_tunnel_nic *utn; 758 760 unsigned int n_tables, i; 759 761 760 762 BUILD_BUG_ON(sizeof(utn->missed) * BITS_PER_BYTE < 761 763 UDP_TUNNEL_NIC_MAX_TABLES); 764 + /* Expect use count of at most 2 (IPv4, IPv6) per device */ 765 + BUILD_BUG_ON(UDP_TUNNEL_NIC_USE_CNT_MAX < 766 + UDP_TUNNEL_NIC_MAX_SHARING_DEVICES * 2); 762 767 768 + /* Check that the driver info is sane */ 763 769 if (WARN_ON(!info->set_port != !info->unset_port) || 764 770 WARN_ON(!info->set_port == !info->sync_table) || 765 771 WARN_ON(!info->tables[0].n_entries)) 772 + return -EINVAL; 773 + 774 + if (WARN_ON(info->shared && 775 + info->flags & UDP_TUNNEL_NIC_INFO_OPEN_ONLY)) 766 776 return -EINVAL; 767 777 768 778 n_tables = 1; ··· 794 766 return -EINVAL; 795 767 } 796 768 797 - utn = udp_tunnel_nic_alloc(info, n_tables); 798 - if (!utn) 799 - return -ENOMEM; 769 + /* Create UDP tunnel state structures */ 770 + if (info->shared) { 771 + node = kzalloc(sizeof(*node), GFP_KERNEL); 772 + if (!node) 773 + return -ENOMEM; 774 + 775 + node->dev = dev; 776 + } 777 + 778 + if (info->shared && info->shared->udp_tunnel_nic_info) { 779 + utn = info->shared->udp_tunnel_nic_info; 780 + } else { 781 + utn = udp_tunnel_nic_alloc(info, n_tables); 782 + if (!utn) { 783 + kfree(node); 784 + return -ENOMEM; 785 + } 786 + } 787 + 788 + if (info->shared) { 789 + if (!info->shared->udp_tunnel_nic_info) { 790 + INIT_LIST_HEAD(&info->shared->devices); 791 + info->shared->udp_tunnel_nic_info = utn; 792 + } 793 + 794 + list_add_tail(&node->list, &info->shared->devices); 795 + } 800 796 801 797 utn->dev = dev; 802 798 dev_hold(dev); ··· 835 783 static void 836 784 udp_tunnel_nic_unregister(struct net_device *dev, struct udp_tunnel_nic *utn) 837 785 { 838 - unsigned int i; 786 + const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info; 787 + 788 + /* For a shared table remove this dev from the list of sharing devices 789 + * and if there are other devices just detach. 790 + */ 791 + if (info->shared) { 792 + struct udp_tunnel_nic_shared_node *node, *first; 793 + 794 + list_for_each_entry(node, &info->shared->devices, list) 795 + if (node->dev == dev) 796 + break; 797 + if (node->dev != dev) 798 + return; 799 + 800 + list_del(&node->list); 801 + kfree(node); 802 + 803 + first = list_first_entry_or_null(&info->shared->devices, 804 + typeof(*first), list); 805 + if (first) { 806 + udp_tunnel_drop_rx_info(dev); 807 + utn->dev = first->dev; 808 + goto release_dev; 809 + } 810 + 811 + info->shared->udp_tunnel_nic_info = NULL; 812 + } 839 813 840 814 /* Flush before we check work, so we don't waste time adding entries 841 815 * from the work which we will boot immediately. ··· 874 796 if (utn->work_pending) 875 797 return; 876 798 877 - for (i = 0; i < utn->n_tables; i++) 878 - kfree(utn->entries[i]); 879 - kfree(utn->entries); 880 - kfree(utn); 799 + udp_tunnel_nic_free(utn); 800 + release_dev: 881 801 dev->udp_tunnel_nic = NULL; 882 802 dev_put(dev); 883 803 }
+167
tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh
··· 7 7 NSIM_DEV_DFS=/sys/kernel/debug/netdevsim/netdevsim$NSIM_ID 8 8 NSIM_NETDEV= 9 9 HAS_ETHTOOL= 10 + STATIC_ENTRIES= 10 11 EXIT_STATUS=0 11 12 num_cases=0 12 13 num_errors=0 ··· 194 193 sleep 0.02 195 194 ((retries--)) 196 195 done 196 + 197 + if [ -n "$HAS_ETHTOOL" -a -n "${STATIC_ENTRIES[0]}" ]; then 198 + fail=0 199 + for i in "${!STATIC_ENTRIES[@]}"; do 200 + pp_expected=`pre_ethtool ${STATIC_ENTRIES[i]}` 201 + cnt=$(ethtool --show-tunnels $NSIM_NETDEV | grep -c "$pp_expected") 202 + if [ $cnt -ne 1 ]; then 203 + err_cnt "ethtool static entry: $pfx - $msg" 204 + echo " check_table: ethtool does not contain '$pp_expected'" 205 + ethtool --show-tunnels $NSIM_NETDEV 206 + fail=1 207 + fi 208 + done 209 + [ $fail == 0 ] && pass_cnt 210 + fi 197 211 } 198 212 199 213 function print_table { ··· 790 774 exp0=( 0 0 0 0 ) 791 775 exp1=( 0 0 0 0 ) 792 776 done 777 + 778 + cleanup_nsim 779 + 780 + # shared port tables 781 + pfx="table sharing" 782 + 783 + echo $NSIM_ID > /sys/bus/netdevsim/new_device 784 + echo 0 > $NSIM_DEV_SYS/del_port 785 + 786 + echo 0 > $NSIM_DEV_DFS/udp_ports_open_only 787 + echo 1 > $NSIM_DEV_DFS/udp_ports_sleep 788 + echo 1 > $NSIM_DEV_DFS/udp_ports_shared 789 + 790 + old_netdevs=$(ls /sys/class/net) 791 + echo 1 > $NSIM_DEV_SYS/new_port 792 + NSIM_NETDEV=`get_netdev_name old_netdevs` 793 + old_netdevs=$(ls /sys/class/net) 794 + echo 2 > $NSIM_DEV_SYS/new_port 795 + NSIM_NETDEV2=`get_netdev_name old_netdevs` 796 + 797 + msg="VxLAN v4 devices" 798 + exp0=( `mke 4789 1` 0 0 0 ) 799 + exp1=( 0 0 0 0 ) 800 + new_vxlan vxlan0 4789 $NSIM_NETDEV 801 + new_vxlan vxlan1 4789 $NSIM_NETDEV2 802 + 803 + msg="VxLAN v4 devices go down" 804 + exp0=( 0 0 0 0 ) 805 + ifconfig vxlan1 down 806 + ifconfig vxlan0 down 807 + check_tables 808 + 809 + for ifc in vxlan0 vxlan1; do 810 + ifconfig $ifc up 811 + done 812 + 813 + msg="VxLAN v6 device" 814 + exp0=( `mke 4789 1` `mke 4790 1` 0 0 ) 815 + new_vxlan vxlanC 4790 $NSIM_NETDEV 6 816 + 817 + msg="Geneve device" 818 + exp1=( `mke 6081 2` 0 0 0 ) 819 + new_geneve gnv0 6081 820 + 821 + msg="NIC device goes down" 822 + ifconfig $NSIM_NETDEV down 823 + check_tables 824 + 825 + msg="NIC device goes up again" 826 + ifconfig $NSIM_NETDEV up 827 + check_tables 828 + 829 + for i in `seq 2`; do 830 + msg="turn feature off - 1, rep $i" 831 + ethtool -K $NSIM_NETDEV rx-udp_tunnel-port-offload off 832 + check_tables 833 + 834 + msg="turn feature off - 2, rep $i" 835 + exp0=( 0 0 0 0 ) 836 + exp1=( 0 0 0 0 ) 837 + ethtool -K $NSIM_NETDEV2 rx-udp_tunnel-port-offload off 838 + check_tables 839 + 840 + msg="turn feature on - 1, rep $i" 841 + exp0=( `mke 4789 1` `mke 4790 1` 0 0 ) 842 + exp1=( `mke 6081 2` 0 0 0 ) 843 + ethtool -K $NSIM_NETDEV rx-udp_tunnel-port-offload on 844 + check_tables 845 + 846 + msg="turn feature on - 2, rep $i" 847 + ethtool -K $NSIM_NETDEV2 rx-udp_tunnel-port-offload on 848 + check_tables 849 + done 850 + 851 + msg="tunnels destroyed 1" 852 + cleanup_tuns 853 + exp0=( 0 0 0 0 ) 854 + exp1=( 0 0 0 0 ) 855 + check_tables 856 + 857 + overflow_table0 "overflow NIC table" 858 + 859 + msg="re-add a port" 860 + 861 + echo 2 > $NSIM_DEV_SYS/del_port 862 + echo 2 > $NSIM_DEV_SYS/new_port 863 + check_tables 864 + 865 + msg="replace VxLAN in overflow table" 866 + exp0=( `mke 10000 1` `mke 10004 1` `mke 10002 1` `mke 10003 1` ) 867 + del_dev vxlan1 868 + 869 + msg="vacate VxLAN in overflow table" 870 + exp0=( `mke 10000 1` `mke 10004 1` 0 `mke 10003 1` ) 871 + del_dev vxlan2 872 + 873 + echo 1 > $NSIM_DEV_DFS/ports/$port/udp_ports_reset 874 + check_tables 875 + 876 + msg="tunnels destroyed 2" 877 + cleanup_tuns 878 + exp0=( 0 0 0 0 ) 879 + exp1=( 0 0 0 0 ) 880 + check_tables 881 + 882 + echo 1 > $NSIM_DEV_SYS/del_port 883 + echo 2 > $NSIM_DEV_SYS/del_port 884 + 885 + cleanup_nsim 886 + 887 + # Static IANA port 888 + pfx="static IANA vxlan" 889 + 890 + echo $NSIM_ID > /sys/bus/netdevsim/new_device 891 + echo 0 > $NSIM_DEV_SYS/del_port 892 + 893 + echo 1 > $NSIM_DEV_DFS/udp_ports_static_iana_vxlan 894 + STATIC_ENTRIES=( `mke 4789 1` ) 895 + 896 + port=1 897 + old_netdevs=$(ls /sys/class/net) 898 + echo $port > $NSIM_DEV_SYS/new_port 899 + NSIM_NETDEV=`get_netdev_name old_netdevs` 900 + 901 + msg="check empty" 902 + exp0=( 0 0 0 0 ) 903 + exp1=( 0 0 0 0 ) 904 + check_tables 905 + 906 + msg="add on static port" 907 + new_vxlan vxlan0 4789 $NSIM_NETDEV 908 + new_vxlan vxlan1 4789 $NSIM_NETDEV 909 + 910 + msg="add on different port" 911 + exp0=( `mke 4790 1` 0 0 0 ) 912 + new_vxlan vxlan2 4790 $NSIM_NETDEV 913 + 914 + cleanup_tuns 915 + 916 + msg="tunnels destroyed" 917 + exp0=( 0 0 0 0 ) 918 + exp1=( 0 0 0 0 ) 919 + check_tables 920 + 921 + msg="different type" 922 + new_geneve gnv0 4789 923 + 924 + cleanup_tuns 925 + cleanup_nsim 926 + 927 + # END 793 928 794 929 modprobe -r netdevsim 795 930