Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

udp_tunnel: add central NIC RX port offload infrastructure

Cater to devices which:
(a) may want to sleep in the callbacks;
(b) only have IPv4 support;
(c) need all the programming to happen while the netdev is up.

Drivers attach UDP tunnel offload info struct to their netdevs,
where they declare how many UDP ports of various tunnel types
they support. Core takes care of tracking which ports to offload.

Use a fixed-size array since this matches what almost all drivers
do, and avoids a complexity and uncertainty around memory allocations
in an atomic context.

Make sure that tunnel drivers don't try to replay the ports when
new NIC netdev is registered. Automatic replays would mess up
reference counting, and will be removed completely once all drivers
are converted.

v4:
- use a #define NULL to avoid build issues with CONFIG_INET=n.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Jakub Kicinski and committed by
David S. Miller
cc4e3835 84a4160e

+983 -5
+4 -2
drivers/net/geneve.c
··· 1796 1796 event == NETDEV_UDP_TUNNEL_DROP_INFO) { 1797 1797 geneve_offload_rx_ports(dev, event == NETDEV_UDP_TUNNEL_PUSH_INFO); 1798 1798 } else if (event == NETDEV_UNREGISTER) { 1799 - geneve_offload_rx_ports(dev, false); 1799 + if (!dev->udp_tunnel_nic_info) 1800 + geneve_offload_rx_ports(dev, false); 1800 1801 } else if (event == NETDEV_REGISTER) { 1801 - geneve_offload_rx_ports(dev, true); 1802 + if (!dev->udp_tunnel_nic_info) 1803 + geneve_offload_rx_ports(dev, true); 1802 1804 } 1803 1805 1804 1806 return NOTIFY_DONE;
+4 -2
drivers/net/vxlan.c
··· 4477 4477 struct vxlan_net *vn = net_generic(dev_net(dev), vxlan_net_id); 4478 4478 4479 4479 if (event == NETDEV_UNREGISTER) { 4480 - vxlan_offload_rx_ports(dev, false); 4480 + if (!dev->udp_tunnel_nic_info) 4481 + vxlan_offload_rx_ports(dev, false); 4481 4482 vxlan_handle_lowerdev_unregister(vn, dev); 4482 4483 } else if (event == NETDEV_REGISTER) { 4483 - vxlan_offload_rx_ports(dev, true); 4484 + if (!dev->udp_tunnel_nic_info) 4485 + vxlan_offload_rx_ports(dev, true); 4484 4486 } else if (event == NETDEV_UDP_TUNNEL_PUSH_INFO || 4485 4487 event == NETDEV_UDP_TUNNEL_DROP_INFO) { 4486 4488 vxlan_offload_rx_ports(dev, event == NETDEV_UDP_TUNNEL_PUSH_INFO);
+8
include/linux/netdevice.h
··· 65 65 struct mpls_dev; 66 66 /* UDP Tunnel offloads */ 67 67 struct udp_tunnel_info; 68 + struct udp_tunnel_nic_info; 69 + struct udp_tunnel_nic; 68 70 struct bpf_prog; 69 71 struct xdp_buff; 70 72 ··· 1838 1836 * 1839 1837 * @macsec_ops: MACsec offloading ops 1840 1838 * 1839 + * @udp_tunnel_nic_info: static structure describing the UDP tunnel 1840 + * offload capabilities of the device 1841 + * @udp_tunnel_nic: UDP tunnel offload state 1842 + * 1841 1843 * FIXME: cleanup struct net_device such that network protocol info 1842 1844 * moves out. 1843 1845 */ ··· 2140 2134 /* MACsec management functions */ 2141 2135 const struct macsec_ops *macsec_ops; 2142 2136 #endif 2137 + const struct udp_tunnel_nic_info *udp_tunnel_nic_info; 2138 + struct udp_tunnel_nic *udp_tunnel_nic; 2143 2139 }; 2144 2140 #define to_net_dev(d) container_of(d, struct net_device, dev) 2145 2141
+137
include/net/udp_tunnel.h
··· 115 115 unsigned short type; 116 116 sa_family_t sa_family; 117 117 __be16 port; 118 + u8 hw_priv; 118 119 }; 119 120 120 121 /* Notify network devices of offloadable types */ ··· 182 181 udp_encap_enable(); 183 182 } 184 183 184 + #define UDP_TUNNEL_NIC_MAX_TABLES 4 185 + 186 + enum udp_tunnel_nic_info_flags { 187 + /* Device callbacks may sleep */ 188 + UDP_TUNNEL_NIC_INFO_MAY_SLEEP = BIT(0), 189 + /* Device only supports offloads when it's open, all ports 190 + * will be removed before close and re-added after open. 191 + */ 192 + UDP_TUNNEL_NIC_INFO_OPEN_ONLY = BIT(1), 193 + /* Device supports only IPv4 tunnels */ 194 + UDP_TUNNEL_NIC_INFO_IPV4_ONLY = BIT(2), 195 + }; 196 + 197 + /** 198 + * struct udp_tunnel_nic_info - driver UDP tunnel offload information 199 + * @set_port: callback for adding a new port 200 + * @unset_port: callback for removing a port 201 + * @sync_table: callback for syncing the entire port table at once 202 + * @flags: device flags from enum udp_tunnel_nic_info_flags 203 + * @tables: UDP port tables this device has 204 + * @tables.n_entries: number of entries in this table 205 + * @tables.tunnel_types: types of tunnels this table accepts 206 + * 207 + * Drivers are expected to provide either @set_port and @unset_port callbacks 208 + * or the @sync_table callback. Callbacks are invoked with rtnl lock held. 209 + * 210 + * Known limitations: 211 + * - UDP tunnel port notifications are fundamentally best-effort - 212 + * it is likely the driver will both see skbs which use a UDP tunnel port, 213 + * while not being a tunneled skb, and tunnel skbs from other ports - 214 + * drivers should only use these ports for non-critical RX-side offloads, 215 + * e.g. the checksum offload; 216 + * - none of the devices care about the socket family at present, so we don't 217 + * track it. Please extend this code if you care. 218 + */ 219 + struct udp_tunnel_nic_info { 220 + /* one-by-one */ 221 + int (*set_port)(struct net_device *dev, 222 + unsigned int table, unsigned int entry, 223 + struct udp_tunnel_info *ti); 224 + int (*unset_port)(struct net_device *dev, 225 + unsigned int table, unsigned int entry, 226 + struct udp_tunnel_info *ti); 227 + 228 + /* all at once */ 229 + int (*sync_table)(struct net_device *dev, unsigned int table); 230 + 231 + unsigned int flags; 232 + 233 + struct udp_tunnel_nic_table_info { 234 + unsigned int n_entries; 235 + unsigned int tunnel_types; 236 + } tables[UDP_TUNNEL_NIC_MAX_TABLES]; 237 + }; 238 + 239 + /* UDP tunnel module dependencies 240 + * 241 + * Tunnel drivers are expected to have a hard dependency on the udp_tunnel 242 + * module. NIC drivers are not, they just attach their 243 + * struct udp_tunnel_nic_info to the netdev and wait for callbacks to come. 244 + * Loading a tunnel driver will cause the udp_tunnel module to be loaded 245 + * and only then will all the required state structures be allocated. 246 + * Since we want a weak dependency from the drivers and the core to udp_tunnel 247 + * we call things through the following stubs. 248 + */ 249 + struct udp_tunnel_nic_ops { 250 + void (*get_port)(struct net_device *dev, unsigned int table, 251 + unsigned int idx, struct udp_tunnel_info *ti); 252 + void (*set_port_priv)(struct net_device *dev, unsigned int table, 253 + unsigned int idx, u8 priv); 254 + void (*add_port)(struct net_device *dev, struct udp_tunnel_info *ti); 255 + void (*del_port)(struct net_device *dev, struct udp_tunnel_info *ti); 256 + void (*reset_ntf)(struct net_device *dev); 257 + }; 258 + 259 + #ifdef CONFIG_INET 260 + extern const struct udp_tunnel_nic_ops *udp_tunnel_nic_ops; 261 + #else 262 + #define udp_tunnel_nic_ops ((struct udp_tunnel_nic_ops *)NULL) 263 + #endif 264 + 265 + static inline void 266 + udp_tunnel_nic_get_port(struct net_device *dev, unsigned int table, 267 + unsigned int idx, struct udp_tunnel_info *ti) 268 + { 269 + /* This helper is used from .sync_table, we indicate empty entries 270 + * by zero'ed @ti. Drivers which need to know the details of a port 271 + * when it gets deleted should use the .set_port / .unset_port 272 + * callbacks. 273 + * Zero out here, otherwise !CONFIG_INET causes uninitilized warnings. 274 + */ 275 + memset(ti, 0, sizeof(*ti)); 276 + 277 + if (udp_tunnel_nic_ops) 278 + udp_tunnel_nic_ops->get_port(dev, table, idx, ti); 279 + } 280 + 281 + static inline void 282 + udp_tunnel_nic_set_port_priv(struct net_device *dev, unsigned int table, 283 + unsigned int idx, u8 priv) 284 + { 285 + if (udp_tunnel_nic_ops) 286 + udp_tunnel_nic_ops->set_port_priv(dev, table, idx, priv); 287 + } 288 + 289 + static inline void 290 + udp_tunnel_nic_add_port(struct net_device *dev, struct udp_tunnel_info *ti) 291 + { 292 + if (udp_tunnel_nic_ops) 293 + udp_tunnel_nic_ops->add_port(dev, ti); 294 + } 295 + 296 + static inline void 297 + udp_tunnel_nic_del_port(struct net_device *dev, struct udp_tunnel_info *ti) 298 + { 299 + if (udp_tunnel_nic_ops) 300 + udp_tunnel_nic_ops->del_port(dev, ti); 301 + } 302 + 303 + /** 304 + * udp_tunnel_nic_reset_ntf() - device-originating reset notification 305 + * @dev: network interface device structure 306 + * 307 + * Called by the driver to inform the core that the entire UDP tunnel port 308 + * state has been lost, usually due to device reset. Core will assume device 309 + * forgot all the ports and issue .set_port and .sync_table callbacks as 310 + * necessary. 311 + * 312 + * This function must be called with rtnl lock held, and will issue all 313 + * the callbacks before returning. 314 + */ 315 + static inline void udp_tunnel_nic_reset_ntf(struct net_device *dev) 316 + { 317 + if (udp_tunnel_nic_ops) 318 + udp_tunnel_nic_ops->reset_ntf(dev); 319 + } 185 320 #endif
+2 -1
net/ipv4/Makefile
··· 14 14 udp_offload.o arp.o icmp.o devinet.o af_inet.o igmp.o \ 15 15 fib_frontend.o fib_semantics.o fib_trie.o fib_notifier.o \ 16 16 inet_fragment.o ping.o ip_tunnel_core.o gre_offload.o \ 17 - metrics.o netlink.o nexthop.o 17 + metrics.o netlink.o nexthop.o udp_tunnel_stub.o 18 18 19 19 obj-$(CONFIG_BPFILTER) += bpfilter/ 20 20 ··· 29 29 obj-$(CONFIG_NET_FOU) += fou.o 30 30 obj-$(CONFIG_NET_IPGRE_DEMUX) += gre.o 31 31 obj-$(CONFIG_NET_IPGRE) += ip_gre.o 32 + udp_tunnel-y := udp_tunnel_core.o udp_tunnel_nic.o 32 33 obj-$(CONFIG_NET_UDP_TUNNEL) += udp_tunnel.o 33 34 obj-$(CONFIG_NET_IPVTI) += ip_vti.o 34 35 obj-$(CONFIG_SYN_COOKIES) += syncookies.o
net/ipv4/udp_tunnel.c net/ipv4/udp_tunnel_core.c
+821
net/ipv4/udp_tunnel_nic.c
··· 1 + // SPDX-License-Identifier: GPL-2.0-only 2 + // Copyright (c) 2020 Facebook Inc. 3 + 4 + #include <linux/netdevice.h> 5 + #include <linux/slab.h> 6 + #include <linux/types.h> 7 + #include <linux/workqueue.h> 8 + #include <net/udp_tunnel.h> 9 + 10 + enum udp_tunnel_nic_table_entry_flags { 11 + UDP_TUNNEL_NIC_ENTRY_ADD = BIT(0), 12 + UDP_TUNNEL_NIC_ENTRY_DEL = BIT(1), 13 + UDP_TUNNEL_NIC_ENTRY_OP_FAIL = BIT(2), 14 + UDP_TUNNEL_NIC_ENTRY_FROZEN = BIT(3), 15 + }; 16 + 17 + struct udp_tunnel_nic_table_entry { 18 + __be16 port; 19 + u8 type; 20 + u8 use_cnt; 21 + u8 flags; 22 + u8 hw_priv; 23 + }; 24 + 25 + /** 26 + * struct udp_tunnel_nic - UDP tunnel port offload state 27 + * @work: async work for talking to hardware from process context 28 + * @dev: netdev pointer 29 + * @need_sync: at least one port start changed 30 + * @need_replay: space was freed, we need a replay of all ports 31 + * @work_pending: @work is currently scheduled 32 + * @n_tables: number of tables under @entries 33 + * @missed: bitmap of tables which overflown 34 + * @entries: table of tables of ports currently offloaded 35 + */ 36 + struct udp_tunnel_nic { 37 + struct work_struct work; 38 + 39 + struct net_device *dev; 40 + 41 + u8 need_sync:1; 42 + u8 need_replay:1; 43 + u8 work_pending:1; 44 + 45 + unsigned int n_tables; 46 + unsigned long missed; 47 + struct udp_tunnel_nic_table_entry **entries; 48 + }; 49 + 50 + /* We ensure all work structs are done using driver state, but not the code. 51 + * We need a workqueue we can flush before module gets removed. 52 + */ 53 + static struct workqueue_struct *udp_tunnel_nic_workqueue; 54 + 55 + static const char *udp_tunnel_nic_tunnel_type_name(unsigned int type) 56 + { 57 + switch (type) { 58 + case UDP_TUNNEL_TYPE_VXLAN: 59 + return "vxlan"; 60 + case UDP_TUNNEL_TYPE_GENEVE: 61 + return "geneve"; 62 + case UDP_TUNNEL_TYPE_VXLAN_GPE: 63 + return "vxlan-gpe"; 64 + default: 65 + return "unknown"; 66 + } 67 + } 68 + 69 + static bool 70 + udp_tunnel_nic_entry_is_free(struct udp_tunnel_nic_table_entry *entry) 71 + { 72 + return entry->use_cnt == 0 && !entry->flags; 73 + } 74 + 75 + static bool 76 + udp_tunnel_nic_entry_is_frozen(struct udp_tunnel_nic_table_entry *entry) 77 + { 78 + return entry->flags & UDP_TUNNEL_NIC_ENTRY_FROZEN; 79 + } 80 + 81 + static void 82 + udp_tunnel_nic_entry_freeze_used(struct udp_tunnel_nic_table_entry *entry) 83 + { 84 + if (!udp_tunnel_nic_entry_is_free(entry)) 85 + entry->flags |= UDP_TUNNEL_NIC_ENTRY_FROZEN; 86 + } 87 + 88 + static void 89 + udp_tunnel_nic_entry_unfreeze(struct udp_tunnel_nic_table_entry *entry) 90 + { 91 + entry->flags &= ~UDP_TUNNEL_NIC_ENTRY_FROZEN; 92 + } 93 + 94 + static bool 95 + udp_tunnel_nic_entry_is_queued(struct udp_tunnel_nic_table_entry *entry) 96 + { 97 + return entry->flags & (UDP_TUNNEL_NIC_ENTRY_ADD | 98 + UDP_TUNNEL_NIC_ENTRY_DEL); 99 + } 100 + 101 + static void 102 + udp_tunnel_nic_entry_queue(struct udp_tunnel_nic *utn, 103 + struct udp_tunnel_nic_table_entry *entry, 104 + unsigned int flag) 105 + { 106 + entry->flags |= flag; 107 + utn->need_sync = 1; 108 + } 109 + 110 + static void 111 + udp_tunnel_nic_ti_from_entry(struct udp_tunnel_nic_table_entry *entry, 112 + struct udp_tunnel_info *ti) 113 + { 114 + memset(ti, 0, sizeof(*ti)); 115 + ti->port = entry->port; 116 + ti->type = entry->type; 117 + ti->hw_priv = entry->hw_priv; 118 + } 119 + 120 + static bool 121 + udp_tunnel_nic_is_empty(struct net_device *dev, struct udp_tunnel_nic *utn) 122 + { 123 + const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info; 124 + unsigned int i, j; 125 + 126 + for (i = 0; i < utn->n_tables; i++) 127 + for (j = 0; j < info->tables[i].n_entries; j++) 128 + if (!udp_tunnel_nic_entry_is_free(&utn->entries[i][j])) 129 + return false; 130 + return true; 131 + } 132 + 133 + static bool 134 + udp_tunnel_nic_should_replay(struct net_device *dev, struct udp_tunnel_nic *utn) 135 + { 136 + const struct udp_tunnel_nic_table_info *table; 137 + unsigned int i, j; 138 + 139 + if (!utn->missed) 140 + return false; 141 + 142 + for (i = 0; i < utn->n_tables; i++) { 143 + table = &dev->udp_tunnel_nic_info->tables[i]; 144 + if (!test_bit(i, &utn->missed)) 145 + continue; 146 + 147 + for (j = 0; j < table->n_entries; j++) 148 + if (udp_tunnel_nic_entry_is_free(&utn->entries[i][j])) 149 + return true; 150 + } 151 + 152 + return false; 153 + } 154 + 155 + static void 156 + __udp_tunnel_nic_get_port(struct net_device *dev, unsigned int table, 157 + unsigned int idx, struct udp_tunnel_info *ti) 158 + { 159 + struct udp_tunnel_nic_table_entry *entry; 160 + struct udp_tunnel_nic *utn; 161 + 162 + utn = dev->udp_tunnel_nic; 163 + entry = &utn->entries[table][idx]; 164 + 165 + if (entry->use_cnt) 166 + udp_tunnel_nic_ti_from_entry(entry, ti); 167 + } 168 + 169 + static void 170 + __udp_tunnel_nic_set_port_priv(struct net_device *dev, unsigned int table, 171 + unsigned int idx, u8 priv) 172 + { 173 + dev->udp_tunnel_nic->entries[table][idx].hw_priv = priv; 174 + } 175 + 176 + static void 177 + udp_tunnel_nic_entry_update_done(struct udp_tunnel_nic_table_entry *entry, 178 + int err) 179 + { 180 + bool dodgy = entry->flags & UDP_TUNNEL_NIC_ENTRY_OP_FAIL; 181 + 182 + WARN_ON_ONCE(entry->flags & UDP_TUNNEL_NIC_ENTRY_ADD && 183 + entry->flags & UDP_TUNNEL_NIC_ENTRY_DEL); 184 + 185 + if (entry->flags & UDP_TUNNEL_NIC_ENTRY_ADD && 186 + (!err || (err == -EEXIST && dodgy))) 187 + entry->flags &= ~UDP_TUNNEL_NIC_ENTRY_ADD; 188 + 189 + if (entry->flags & UDP_TUNNEL_NIC_ENTRY_DEL && 190 + (!err || (err == -ENOENT && dodgy))) 191 + entry->flags &= ~UDP_TUNNEL_NIC_ENTRY_DEL; 192 + 193 + if (!err) 194 + entry->flags &= ~UDP_TUNNEL_NIC_ENTRY_OP_FAIL; 195 + else 196 + entry->flags |= UDP_TUNNEL_NIC_ENTRY_OP_FAIL; 197 + } 198 + 199 + static void 200 + udp_tunnel_nic_device_sync_one(struct net_device *dev, 201 + struct udp_tunnel_nic *utn, 202 + unsigned int table, unsigned int idx) 203 + { 204 + struct udp_tunnel_nic_table_entry *entry; 205 + struct udp_tunnel_info ti; 206 + int err; 207 + 208 + entry = &utn->entries[table][idx]; 209 + if (!udp_tunnel_nic_entry_is_queued(entry)) 210 + return; 211 + 212 + udp_tunnel_nic_ti_from_entry(entry, &ti); 213 + if (entry->flags & UDP_TUNNEL_NIC_ENTRY_ADD) 214 + err = dev->udp_tunnel_nic_info->set_port(dev, table, idx, &ti); 215 + else 216 + err = dev->udp_tunnel_nic_info->unset_port(dev, table, idx, 217 + &ti); 218 + udp_tunnel_nic_entry_update_done(entry, err); 219 + 220 + if (err) 221 + netdev_warn(dev, 222 + "UDP tunnel port sync failed port %d type %s: %d\n", 223 + be16_to_cpu(entry->port), 224 + udp_tunnel_nic_tunnel_type_name(entry->type), 225 + err); 226 + } 227 + 228 + static void 229 + udp_tunnel_nic_device_sync_by_port(struct net_device *dev, 230 + struct udp_tunnel_nic *utn) 231 + { 232 + const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info; 233 + unsigned int i, j; 234 + 235 + for (i = 0; i < utn->n_tables; i++) 236 + for (j = 0; j < info->tables[i].n_entries; j++) 237 + udp_tunnel_nic_device_sync_one(dev, utn, i, j); 238 + } 239 + 240 + static void 241 + udp_tunnel_nic_device_sync_by_table(struct net_device *dev, 242 + struct udp_tunnel_nic *utn) 243 + { 244 + const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info; 245 + unsigned int i, j; 246 + int err; 247 + 248 + for (i = 0; i < utn->n_tables; i++) { 249 + /* Find something that needs sync in this table */ 250 + for (j = 0; j < info->tables[i].n_entries; j++) 251 + if (udp_tunnel_nic_entry_is_queued(&utn->entries[i][j])) 252 + break; 253 + if (j == info->tables[i].n_entries) 254 + continue; 255 + 256 + err = info->sync_table(dev, i); 257 + if (err) 258 + netdev_warn(dev, "UDP tunnel port sync failed for table %d: %d\n", 259 + i, err); 260 + 261 + for (j = 0; j < info->tables[i].n_entries; j++) { 262 + struct udp_tunnel_nic_table_entry *entry; 263 + 264 + entry = &utn->entries[i][j]; 265 + if (udp_tunnel_nic_entry_is_queued(entry)) 266 + udp_tunnel_nic_entry_update_done(entry, err); 267 + } 268 + } 269 + } 270 + 271 + static void 272 + __udp_tunnel_nic_device_sync(struct net_device *dev, struct udp_tunnel_nic *utn) 273 + { 274 + if (!utn->need_sync) 275 + return; 276 + 277 + if (dev->udp_tunnel_nic_info->sync_table) 278 + udp_tunnel_nic_device_sync_by_table(dev, utn); 279 + else 280 + udp_tunnel_nic_device_sync_by_port(dev, utn); 281 + 282 + utn->need_sync = 0; 283 + /* Can't replay directly here, in case we come from the tunnel driver's 284 + * notification - trying to replay may deadlock inside tunnel driver. 285 + */ 286 + utn->need_replay = udp_tunnel_nic_should_replay(dev, utn); 287 + } 288 + 289 + static void 290 + udp_tunnel_nic_device_sync(struct net_device *dev, struct udp_tunnel_nic *utn) 291 + { 292 + const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info; 293 + bool may_sleep; 294 + 295 + if (!utn->need_sync) 296 + return; 297 + 298 + /* Drivers which sleep in the callback need to update from 299 + * the workqueue, if we come from the tunnel driver's notification. 300 + */ 301 + may_sleep = info->flags & UDP_TUNNEL_NIC_INFO_MAY_SLEEP; 302 + if (!may_sleep) 303 + __udp_tunnel_nic_device_sync(dev, utn); 304 + if (may_sleep || utn->need_replay) { 305 + queue_work(udp_tunnel_nic_workqueue, &utn->work); 306 + utn->work_pending = 1; 307 + } 308 + } 309 + 310 + static bool 311 + udp_tunnel_nic_table_is_capable(const struct udp_tunnel_nic_table_info *table, 312 + struct udp_tunnel_info *ti) 313 + { 314 + return table->tunnel_types & ti->type; 315 + } 316 + 317 + static bool 318 + udp_tunnel_nic_is_capable(struct net_device *dev, struct udp_tunnel_nic *utn, 319 + struct udp_tunnel_info *ti) 320 + { 321 + const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info; 322 + unsigned int i; 323 + 324 + /* Special case IPv4-only NICs */ 325 + if (info->flags & UDP_TUNNEL_NIC_INFO_IPV4_ONLY && 326 + ti->sa_family != AF_INET) 327 + return false; 328 + 329 + for (i = 0; i < utn->n_tables; i++) 330 + if (udp_tunnel_nic_table_is_capable(&info->tables[i], ti)) 331 + return true; 332 + return false; 333 + } 334 + 335 + static int 336 + udp_tunnel_nic_has_collision(struct net_device *dev, struct udp_tunnel_nic *utn, 337 + struct udp_tunnel_info *ti) 338 + { 339 + const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info; 340 + struct udp_tunnel_nic_table_entry *entry; 341 + unsigned int i, j; 342 + 343 + for (i = 0; i < utn->n_tables; i++) 344 + for (j = 0; j < info->tables[i].n_entries; j++) { 345 + entry = &utn->entries[i][j]; 346 + 347 + if (!udp_tunnel_nic_entry_is_free(entry) && 348 + entry->port == ti->port && 349 + entry->type != ti->type) { 350 + __set_bit(i, &utn->missed); 351 + return true; 352 + } 353 + } 354 + return false; 355 + } 356 + 357 + static void 358 + udp_tunnel_nic_entry_adj(struct udp_tunnel_nic *utn, 359 + unsigned int table, unsigned int idx, int use_cnt_adj) 360 + { 361 + struct udp_tunnel_nic_table_entry *entry = &utn->entries[table][idx]; 362 + bool dodgy = entry->flags & UDP_TUNNEL_NIC_ENTRY_OP_FAIL; 363 + unsigned int from, to; 364 + 365 + /* If not going from used to unused or vice versa - all done. 366 + * For dodgy entries make sure we try to sync again (queue the entry). 367 + */ 368 + entry->use_cnt += use_cnt_adj; 369 + if (!dodgy && !entry->use_cnt == !(entry->use_cnt - use_cnt_adj)) 370 + return; 371 + 372 + /* Cancel the op before it was sent to the device, if possible, 373 + * otherwise we'd need to take special care to issue commands 374 + * in the same order the ports arrived. 375 + */ 376 + if (use_cnt_adj < 0) { 377 + from = UDP_TUNNEL_NIC_ENTRY_ADD; 378 + to = UDP_TUNNEL_NIC_ENTRY_DEL; 379 + } else { 380 + from = UDP_TUNNEL_NIC_ENTRY_DEL; 381 + to = UDP_TUNNEL_NIC_ENTRY_ADD; 382 + } 383 + 384 + if (entry->flags & from) { 385 + entry->flags &= ~from; 386 + if (!dodgy) 387 + return; 388 + } 389 + 390 + udp_tunnel_nic_entry_queue(utn, entry, to); 391 + } 392 + 393 + static bool 394 + udp_tunnel_nic_entry_try_adj(struct udp_tunnel_nic *utn, 395 + unsigned int table, unsigned int idx, 396 + struct udp_tunnel_info *ti, int use_cnt_adj) 397 + { 398 + struct udp_tunnel_nic_table_entry *entry = &utn->entries[table][idx]; 399 + 400 + if (udp_tunnel_nic_entry_is_free(entry) || 401 + entry->port != ti->port || 402 + entry->type != ti->type) 403 + return false; 404 + 405 + if (udp_tunnel_nic_entry_is_frozen(entry)) 406 + return true; 407 + 408 + udp_tunnel_nic_entry_adj(utn, table, idx, use_cnt_adj); 409 + return true; 410 + } 411 + 412 + /* Try to find existing matching entry and adjust its use count, instead of 413 + * adding a new one. Returns true if entry was found. In case of delete the 414 + * entry may have gotten removed in the process, in which case it will be 415 + * queued for removal. 416 + */ 417 + static bool 418 + udp_tunnel_nic_try_existing(struct net_device *dev, struct udp_tunnel_nic *utn, 419 + struct udp_tunnel_info *ti, int use_cnt_adj) 420 + { 421 + const struct udp_tunnel_nic_table_info *table; 422 + unsigned int i, j; 423 + 424 + for (i = 0; i < utn->n_tables; i++) { 425 + table = &dev->udp_tunnel_nic_info->tables[i]; 426 + if (!udp_tunnel_nic_table_is_capable(table, ti)) 427 + continue; 428 + 429 + for (j = 0; j < table->n_entries; j++) 430 + if (udp_tunnel_nic_entry_try_adj(utn, i, j, ti, 431 + use_cnt_adj)) 432 + return true; 433 + } 434 + 435 + return false; 436 + } 437 + 438 + static bool 439 + udp_tunnel_nic_add_existing(struct net_device *dev, struct udp_tunnel_nic *utn, 440 + struct udp_tunnel_info *ti) 441 + { 442 + return udp_tunnel_nic_try_existing(dev, utn, ti, +1); 443 + } 444 + 445 + static bool 446 + udp_tunnel_nic_del_existing(struct net_device *dev, struct udp_tunnel_nic *utn, 447 + struct udp_tunnel_info *ti) 448 + { 449 + return udp_tunnel_nic_try_existing(dev, utn, ti, -1); 450 + } 451 + 452 + static bool 453 + udp_tunnel_nic_add_new(struct net_device *dev, struct udp_tunnel_nic *utn, 454 + struct udp_tunnel_info *ti) 455 + { 456 + const struct udp_tunnel_nic_table_info *table; 457 + unsigned int i, j; 458 + 459 + for (i = 0; i < utn->n_tables; i++) { 460 + table = &dev->udp_tunnel_nic_info->tables[i]; 461 + if (!udp_tunnel_nic_table_is_capable(table, ti)) 462 + continue; 463 + 464 + for (j = 0; j < table->n_entries; j++) { 465 + struct udp_tunnel_nic_table_entry *entry; 466 + 467 + entry = &utn->entries[i][j]; 468 + if (!udp_tunnel_nic_entry_is_free(entry)) 469 + continue; 470 + 471 + entry->port = ti->port; 472 + entry->type = ti->type; 473 + entry->use_cnt = 1; 474 + udp_tunnel_nic_entry_queue(utn, entry, 475 + UDP_TUNNEL_NIC_ENTRY_ADD); 476 + return true; 477 + } 478 + 479 + /* The different table may still fit this port in, but there 480 + * are no devices currently which have multiple tables accepting 481 + * the same tunnel type, and false positives are okay. 482 + */ 483 + __set_bit(i, &utn->missed); 484 + } 485 + 486 + return false; 487 + } 488 + 489 + static void 490 + __udp_tunnel_nic_add_port(struct net_device *dev, struct udp_tunnel_info *ti) 491 + { 492 + const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info; 493 + struct udp_tunnel_nic *utn; 494 + 495 + utn = dev->udp_tunnel_nic; 496 + if (!utn) 497 + return; 498 + if (!netif_running(dev) && info->flags & UDP_TUNNEL_NIC_INFO_OPEN_ONLY) 499 + return; 500 + 501 + if (!udp_tunnel_nic_is_capable(dev, utn, ti)) 502 + return; 503 + 504 + /* It may happen that a tunnel of one type is removed and different 505 + * tunnel type tries to reuse its port before the device was informed. 506 + * Rely on utn->missed to re-add this port later. 507 + */ 508 + if (udp_tunnel_nic_has_collision(dev, utn, ti)) 509 + return; 510 + 511 + if (!udp_tunnel_nic_add_existing(dev, utn, ti)) 512 + udp_tunnel_nic_add_new(dev, utn, ti); 513 + 514 + udp_tunnel_nic_device_sync(dev, utn); 515 + } 516 + 517 + static void 518 + __udp_tunnel_nic_del_port(struct net_device *dev, struct udp_tunnel_info *ti) 519 + { 520 + struct udp_tunnel_nic *utn; 521 + 522 + utn = dev->udp_tunnel_nic; 523 + if (!utn) 524 + return; 525 + 526 + if (!udp_tunnel_nic_is_capable(dev, utn, ti)) 527 + return; 528 + 529 + udp_tunnel_nic_del_existing(dev, utn, ti); 530 + 531 + udp_tunnel_nic_device_sync(dev, utn); 532 + } 533 + 534 + static void __udp_tunnel_nic_reset_ntf(struct net_device *dev) 535 + { 536 + const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info; 537 + struct udp_tunnel_nic *utn; 538 + unsigned int i, j; 539 + 540 + ASSERT_RTNL(); 541 + 542 + utn = dev->udp_tunnel_nic; 543 + if (!utn) 544 + return; 545 + 546 + utn->need_sync = false; 547 + for (i = 0; i < utn->n_tables; i++) 548 + for (j = 0; j < info->tables[i].n_entries; j++) { 549 + struct udp_tunnel_nic_table_entry *entry; 550 + 551 + entry = &utn->entries[i][j]; 552 + 553 + entry->flags &= ~(UDP_TUNNEL_NIC_ENTRY_DEL | 554 + UDP_TUNNEL_NIC_ENTRY_OP_FAIL); 555 + /* We don't release rtnl across ops */ 556 + WARN_ON(entry->flags & UDP_TUNNEL_NIC_ENTRY_FROZEN); 557 + if (!entry->use_cnt) 558 + continue; 559 + 560 + udp_tunnel_nic_entry_queue(utn, entry, 561 + UDP_TUNNEL_NIC_ENTRY_ADD); 562 + } 563 + 564 + __udp_tunnel_nic_device_sync(dev, utn); 565 + } 566 + 567 + static const struct udp_tunnel_nic_ops __udp_tunnel_nic_ops = { 568 + .get_port = __udp_tunnel_nic_get_port, 569 + .set_port_priv = __udp_tunnel_nic_set_port_priv, 570 + .add_port = __udp_tunnel_nic_add_port, 571 + .del_port = __udp_tunnel_nic_del_port, 572 + .reset_ntf = __udp_tunnel_nic_reset_ntf, 573 + }; 574 + 575 + static void 576 + udp_tunnel_nic_flush(struct net_device *dev, struct udp_tunnel_nic *utn) 577 + { 578 + const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info; 579 + unsigned int i, j; 580 + 581 + for (i = 0; i < utn->n_tables; i++) 582 + for (j = 0; j < info->tables[i].n_entries; j++) { 583 + int adj_cnt = -utn->entries[i][j].use_cnt; 584 + 585 + if (adj_cnt) 586 + udp_tunnel_nic_entry_adj(utn, i, j, adj_cnt); 587 + } 588 + 589 + __udp_tunnel_nic_device_sync(dev, utn); 590 + 591 + for (i = 0; i < utn->n_tables; i++) 592 + memset(utn->entries[i], 0, array_size(info->tables[i].n_entries, 593 + sizeof(**utn->entries))); 594 + WARN_ON(utn->need_sync); 595 + utn->need_replay = 0; 596 + } 597 + 598 + static void 599 + udp_tunnel_nic_replay(struct net_device *dev, struct udp_tunnel_nic *utn) 600 + { 601 + const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info; 602 + unsigned int i, j; 603 + 604 + /* Freeze all the ports we are already tracking so that the replay 605 + * does not double up the refcount. 606 + */ 607 + for (i = 0; i < utn->n_tables; i++) 608 + for (j = 0; j < info->tables[i].n_entries; j++) 609 + udp_tunnel_nic_entry_freeze_used(&utn->entries[i][j]); 610 + utn->missed = 0; 611 + utn->need_replay = 0; 612 + 613 + udp_tunnel_get_rx_info(dev); 614 + 615 + for (i = 0; i < utn->n_tables; i++) 616 + for (j = 0; j < info->tables[i].n_entries; j++) 617 + udp_tunnel_nic_entry_unfreeze(&utn->entries[i][j]); 618 + } 619 + 620 + static void udp_tunnel_nic_device_sync_work(struct work_struct *work) 621 + { 622 + struct udp_tunnel_nic *utn = 623 + container_of(work, struct udp_tunnel_nic, work); 624 + 625 + rtnl_lock(); 626 + utn->work_pending = 0; 627 + __udp_tunnel_nic_device_sync(utn->dev, utn); 628 + 629 + if (utn->need_replay) 630 + udp_tunnel_nic_replay(utn->dev, utn); 631 + rtnl_unlock(); 632 + } 633 + 634 + static struct udp_tunnel_nic * 635 + udp_tunnel_nic_alloc(const struct udp_tunnel_nic_info *info, 636 + unsigned int n_tables) 637 + { 638 + struct udp_tunnel_nic *utn; 639 + unsigned int i; 640 + 641 + utn = kzalloc(sizeof(*utn), GFP_KERNEL); 642 + if (!utn) 643 + return NULL; 644 + utn->n_tables = n_tables; 645 + INIT_WORK(&utn->work, udp_tunnel_nic_device_sync_work); 646 + 647 + utn->entries = kmalloc_array(n_tables, sizeof(void *), GFP_KERNEL); 648 + if (!utn->entries) 649 + goto err_free_utn; 650 + 651 + for (i = 0; i < n_tables; i++) { 652 + utn->entries[i] = kcalloc(info->tables[i].n_entries, 653 + sizeof(*utn->entries[i]), GFP_KERNEL); 654 + if (!utn->entries[i]) 655 + goto err_free_prev_entries; 656 + } 657 + 658 + return utn; 659 + 660 + err_free_prev_entries: 661 + while (i--) 662 + kfree(utn->entries[i]); 663 + kfree(utn->entries); 664 + err_free_utn: 665 + kfree(utn); 666 + return NULL; 667 + } 668 + 669 + static int udp_tunnel_nic_register(struct net_device *dev) 670 + { 671 + const struct udp_tunnel_nic_info *info = dev->udp_tunnel_nic_info; 672 + struct udp_tunnel_nic *utn; 673 + unsigned int n_tables, i; 674 + 675 + BUILD_BUG_ON(sizeof(utn->missed) * BITS_PER_BYTE < 676 + UDP_TUNNEL_NIC_MAX_TABLES); 677 + 678 + if (WARN_ON(!info->set_port != !info->unset_port) || 679 + WARN_ON(!info->set_port == !info->sync_table) || 680 + WARN_ON(!info->tables[0].n_entries)) 681 + return -EINVAL; 682 + 683 + n_tables = 1; 684 + for (i = 1; i < UDP_TUNNEL_NIC_MAX_TABLES; i++) { 685 + if (!info->tables[i].n_entries) 686 + continue; 687 + 688 + n_tables++; 689 + if (WARN_ON(!info->tables[i - 1].n_entries)) 690 + return -EINVAL; 691 + } 692 + 693 + utn = udp_tunnel_nic_alloc(info, n_tables); 694 + if (!utn) 695 + return -ENOMEM; 696 + 697 + utn->dev = dev; 698 + dev_hold(dev); 699 + dev->udp_tunnel_nic = utn; 700 + 701 + if (!(info->flags & UDP_TUNNEL_NIC_INFO_OPEN_ONLY)) 702 + udp_tunnel_get_rx_info(dev); 703 + 704 + return 0; 705 + } 706 + 707 + static void 708 + udp_tunnel_nic_unregister(struct net_device *dev, struct udp_tunnel_nic *utn) 709 + { 710 + unsigned int i; 711 + 712 + /* Flush before we check work, so we don't waste time adding entries 713 + * from the work which we will boot immediately. 714 + */ 715 + udp_tunnel_nic_flush(dev, utn); 716 + 717 + /* Wait for the work to be done using the state, netdev core will 718 + * retry unregister until we give up our reference on this device. 719 + */ 720 + if (utn->work_pending) 721 + return; 722 + 723 + for (i = 0; i < utn->n_tables; i++) 724 + kfree(utn->entries[i]); 725 + kfree(utn->entries); 726 + kfree(utn); 727 + dev->udp_tunnel_nic = NULL; 728 + dev_put(dev); 729 + } 730 + 731 + static int 732 + udp_tunnel_nic_netdevice_event(struct notifier_block *unused, 733 + unsigned long event, void *ptr) 734 + { 735 + struct net_device *dev = netdev_notifier_info_to_dev(ptr); 736 + const struct udp_tunnel_nic_info *info; 737 + struct udp_tunnel_nic *utn; 738 + 739 + info = dev->udp_tunnel_nic_info; 740 + if (!info) 741 + return NOTIFY_DONE; 742 + 743 + if (event == NETDEV_REGISTER) { 744 + int err; 745 + 746 + err = udp_tunnel_nic_register(dev); 747 + if (err) 748 + netdev_WARN(dev, "failed to register for UDP tunnel offloads: %d", err); 749 + return notifier_from_errno(err); 750 + } 751 + /* All other events will need the udp_tunnel_nic state */ 752 + utn = dev->udp_tunnel_nic; 753 + if (!utn) 754 + return NOTIFY_DONE; 755 + 756 + if (event == NETDEV_UNREGISTER) { 757 + udp_tunnel_nic_unregister(dev, utn); 758 + return NOTIFY_OK; 759 + } 760 + 761 + /* All other events only matter if NIC has to be programmed open */ 762 + if (!(info->flags & UDP_TUNNEL_NIC_INFO_OPEN_ONLY)) 763 + return NOTIFY_DONE; 764 + 765 + if (event == NETDEV_UP) { 766 + WARN_ON(!udp_tunnel_nic_is_empty(dev, utn)); 767 + udp_tunnel_get_rx_info(dev); 768 + return NOTIFY_OK; 769 + } 770 + if (event == NETDEV_GOING_DOWN) { 771 + udp_tunnel_nic_flush(dev, utn); 772 + return NOTIFY_OK; 773 + } 774 + 775 + return NOTIFY_DONE; 776 + } 777 + 778 + static struct notifier_block udp_tunnel_nic_notifier_block __read_mostly = { 779 + .notifier_call = udp_tunnel_nic_netdevice_event, 780 + }; 781 + 782 + static int __init udp_tunnel_nic_init_module(void) 783 + { 784 + int err; 785 + 786 + udp_tunnel_nic_workqueue = alloc_workqueue("udp_tunnel_nic", 0, 0); 787 + if (!udp_tunnel_nic_workqueue) 788 + return -ENOMEM; 789 + 790 + rtnl_lock(); 791 + udp_tunnel_nic_ops = &__udp_tunnel_nic_ops; 792 + rtnl_unlock(); 793 + 794 + err = register_netdevice_notifier(&udp_tunnel_nic_notifier_block); 795 + if (err) 796 + goto err_unset_ops; 797 + 798 + return 0; 799 + 800 + err_unset_ops: 801 + rtnl_lock(); 802 + udp_tunnel_nic_ops = NULL; 803 + rtnl_unlock(); 804 + destroy_workqueue(udp_tunnel_nic_workqueue); 805 + return err; 806 + } 807 + late_initcall(udp_tunnel_nic_init_module); 808 + 809 + static void __exit udp_tunnel_nic_cleanup_module(void) 810 + { 811 + unregister_netdevice_notifier(&udp_tunnel_nic_notifier_block); 812 + 813 + rtnl_lock(); 814 + udp_tunnel_nic_ops = NULL; 815 + rtnl_unlock(); 816 + 817 + destroy_workqueue(udp_tunnel_nic_workqueue); 818 + } 819 + module_exit(udp_tunnel_nic_cleanup_module); 820 + 821 + MODULE_LICENSE("GPL");
+7
net/ipv4/udp_tunnel_stub.c
··· 1 + // SPDX-License-Identifier: GPL-2.0-only 2 + // Copyright (c) 2020 Facebook Inc. 3 + 4 + #include <net/udp_tunnel.h> 5 + 6 + const struct udp_tunnel_nic_ops *udp_tunnel_nic_ops; 7 + EXPORT_SYMBOL_GPL(udp_tunnel_nic_ops);