Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

net: UDP tunnel encapsulation module for tunnelling different protocols like MPLS, IP, NSH etc.

The Bareudp tunnel module provides a generic L3 encapsulation
tunnelling module for tunnelling different protocols like MPLS,
IP,NSH etc inside a UDP tunnel.

Signed-off-by: Martin Varghese <martin.varghese@nokia.com>
Acked-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Martin Varghese and committed by
David S. Miller
571912c6 48851e9e

+952
+34
Documentation/networking/bareudp.rst
··· 1 + .. SPDX-License-Identifier: GPL-2.0 2 + 3 + ======================================== 4 + Bare UDP Tunnelling Module Documentation 5 + ======================================== 6 + 7 + There are various L3 encapsulation standards using UDP being discussed to 8 + leverage the UDP based load balancing capability of different networks. 9 + MPLSoUDP (__ https://tools.ietf.org/html/rfc7510) is one among them. 10 + 11 + The Bareudp tunnel module provides a generic L3 encapsulation tunnelling 12 + support for tunnelling different L3 protocols like MPLS, IP, NSH etc. inside 13 + a UDP tunnel. 14 + 15 + Usage 16 + ------ 17 + 18 + 1) Device creation & deletion 19 + 20 + a) ip link add dev bareudp0 type bareudp dstport 6635 ethertype 0x8847. 21 + 22 + This creates a bareudp tunnel device which tunnels L3 traffic with ethertype 23 + 0x8847 (MPLS traffic). The destination port of the UDP header will be set to 24 + 6635.The device will listen on UDP port 6635 to receive traffic. 25 + 26 + b) ip link delete bareudp0 27 + 28 + 2) Device Usage 29 + 30 + The bareudp device could be used along with OVS or flower filter in TC. 31 + The OVS or TC flower layer must set the tunnel information in SKB dst field before 32 + sending packet buffer to the bareudp device for transmission. On reception the 33 + bareudp device extracts and stores the tunnel information in SKB dst field before 34 + passing the packet buffer to the network stack.
+1
Documentation/networking/index.rst
··· 8 8 9 9 netdev-FAQ 10 10 af_xdp 11 + bareudp 11 12 batman-adv 12 13 can 13 14 can_ucan_protocol
+13
drivers/net/Kconfig
··· 258 258 To compile this driver as a module, choose M here: the module 259 259 will be called geneve. 260 260 261 + config BAREUDP 262 + tristate "Bare UDP Encapsulation" 263 + depends on INET 264 + depends on IPV6 || !IPV6 265 + select NET_UDP_TUNNEL 266 + select GRO_CELLS 267 + help 268 + This adds a bare UDP tunnel module for tunnelling different 269 + kinds of traffic like MPLS, IP, etc. inside a UDP tunnel. 270 + 271 + To compile this driver as a module, choose M here: the module 272 + will be called bareudp. 273 + 261 274 config GTP 262 275 tristate "GPRS Tunneling Protocol datapath (GTP-U)" 263 276 depends on INET
+1
drivers/net/Makefile
··· 29 29 obj-$(CONFIG_VIRTIO_NET) += virtio_net.o 30 30 obj-$(CONFIG_VXLAN) += vxlan.o 31 31 obj-$(CONFIG_GENEVE) += geneve.o 32 + obj-$(CONFIG_BAREUDP) += bareudp.o 32 33 obj-$(CONFIG_GTP) += gtp.o 33 34 obj-$(CONFIG_NLMON) += nlmon.o 34 35 obj-$(CONFIG_NET_VRF) += vrf.o
+743
drivers/net/bareudp.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* Bareudp: UDP tunnel encasulation for different Payload types like 3 + * MPLS, NSH, IP, etc. 4 + * Copyright (c) 2019 Nokia, Inc. 5 + * Authors: Martin Varghese, <martin.varghese@nokia.com> 6 + */ 7 + 8 + #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 9 + 10 + #include <linux/kernel.h> 11 + #include <linux/module.h> 12 + #include <linux/etherdevice.h> 13 + #include <linux/hash.h> 14 + #include <net/dst_metadata.h> 15 + #include <net/gro_cells.h> 16 + #include <net/rtnetlink.h> 17 + #include <net/protocol.h> 18 + #include <net/ip6_tunnel.h> 19 + #include <net/ip_tunnels.h> 20 + #include <net/udp_tunnel.h> 21 + #include <net/bareudp.h> 22 + 23 + #define BAREUDP_BASE_HLEN sizeof(struct udphdr) 24 + #define BAREUDP_IPV4_HLEN (sizeof(struct iphdr) + \ 25 + sizeof(struct udphdr)) 26 + #define BAREUDP_IPV6_HLEN (sizeof(struct ipv6hdr) + \ 27 + sizeof(struct udphdr)) 28 + 29 + static bool log_ecn_error = true; 30 + module_param(log_ecn_error, bool, 0644); 31 + MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN"); 32 + 33 + /* per-network namespace private data for this module */ 34 + 35 + static unsigned int bareudp_net_id; 36 + 37 + struct bareudp_net { 38 + struct list_head bareudp_list; 39 + }; 40 + 41 + /* Pseudo network device */ 42 + struct bareudp_dev { 43 + struct net *net; /* netns for packet i/o */ 44 + struct net_device *dev; /* netdev for bareudp tunnel */ 45 + __be16 ethertype; 46 + __be16 port; 47 + u16 sport_min; 48 + struct socket __rcu *sock; 49 + struct list_head next; /* bareudp node on namespace list */ 50 + struct gro_cells gro_cells; 51 + }; 52 + 53 + static int bareudp_udp_encap_recv(struct sock *sk, struct sk_buff *skb) 54 + { 55 + struct metadata_dst *tun_dst = NULL; 56 + struct pcpu_sw_netstats *stats; 57 + struct bareudp_dev *bareudp; 58 + unsigned short family; 59 + unsigned int len; 60 + __be16 proto; 61 + void *oiph; 62 + int err; 63 + 64 + bareudp = rcu_dereference_sk_user_data(sk); 65 + if (!bareudp) 66 + goto drop; 67 + 68 + if (skb->protocol == htons(ETH_P_IP)) 69 + family = AF_INET; 70 + else 71 + family = AF_INET6; 72 + 73 + proto = bareudp->ethertype; 74 + 75 + if (iptunnel_pull_header(skb, BAREUDP_BASE_HLEN, 76 + proto, 77 + !net_eq(bareudp->net, 78 + dev_net(bareudp->dev)))) { 79 + bareudp->dev->stats.rx_dropped++; 80 + goto drop; 81 + } 82 + 83 + tun_dst = udp_tun_rx_dst(skb, family, TUNNEL_KEY, 0, 0); 84 + if (!tun_dst) { 85 + bareudp->dev->stats.rx_dropped++; 86 + goto drop; 87 + } 88 + skb_dst_set(skb, &tun_dst->dst); 89 + skb->dev = bareudp->dev; 90 + oiph = skb_network_header(skb); 91 + skb_reset_network_header(skb); 92 + 93 + if (family == AF_INET) 94 + err = IP_ECN_decapsulate(oiph, skb); 95 + #if IS_ENABLED(CONFIG_IPV6) 96 + else 97 + err = IP6_ECN_decapsulate(oiph, skb); 98 + #endif 99 + 100 + if (unlikely(err)) { 101 + if (log_ecn_error) { 102 + if (family == AF_INET) 103 + net_info_ratelimited("non-ECT from %pI4 " 104 + "with TOS=%#x\n", 105 + &((struct iphdr *)oiph)->saddr, 106 + ((struct iphdr *)oiph)->tos); 107 + #if IS_ENABLED(CONFIG_IPV6) 108 + else 109 + net_info_ratelimited("non-ECT from %pI6\n", 110 + &((struct ipv6hdr *)oiph)->saddr); 111 + #endif 112 + } 113 + if (err > 1) { 114 + ++bareudp->dev->stats.rx_frame_errors; 115 + ++bareudp->dev->stats.rx_errors; 116 + goto drop; 117 + } 118 + } 119 + 120 + len = skb->len; 121 + err = gro_cells_receive(&bareudp->gro_cells, skb); 122 + if (likely(err == NET_RX_SUCCESS)) { 123 + stats = this_cpu_ptr(bareudp->dev->tstats); 124 + u64_stats_update_begin(&stats->syncp); 125 + stats->rx_packets++; 126 + stats->rx_bytes += len; 127 + u64_stats_update_end(&stats->syncp); 128 + } 129 + return 0; 130 + drop: 131 + /* Consume bad packet */ 132 + kfree_skb(skb); 133 + 134 + return 0; 135 + } 136 + 137 + static int bareudp_err_lookup(struct sock *sk, struct sk_buff *skb) 138 + { 139 + return 0; 140 + } 141 + 142 + static int bareudp_init(struct net_device *dev) 143 + { 144 + struct bareudp_dev *bareudp = netdev_priv(dev); 145 + int err; 146 + 147 + dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); 148 + if (!dev->tstats) 149 + return -ENOMEM; 150 + 151 + err = gro_cells_init(&bareudp->gro_cells, dev); 152 + if (err) { 153 + free_percpu(dev->tstats); 154 + return err; 155 + } 156 + return 0; 157 + } 158 + 159 + static void bareudp_uninit(struct net_device *dev) 160 + { 161 + struct bareudp_dev *bareudp = netdev_priv(dev); 162 + 163 + gro_cells_destroy(&bareudp->gro_cells); 164 + free_percpu(dev->tstats); 165 + } 166 + 167 + static struct socket *bareudp_create_sock(struct net *net, __be16 port) 168 + { 169 + struct udp_port_cfg udp_conf; 170 + struct socket *sock; 171 + int err; 172 + 173 + memset(&udp_conf, 0, sizeof(udp_conf)); 174 + #if IS_ENABLED(CONFIG_IPV6) 175 + udp_conf.family = AF_INET6; 176 + #else 177 + udp_conf.family = AF_INET; 178 + #endif 179 + udp_conf.local_udp_port = port; 180 + /* Open UDP socket */ 181 + err = udp_sock_create(net, &udp_conf, &sock); 182 + if (err < 0) 183 + return ERR_PTR(err); 184 + 185 + return sock; 186 + } 187 + 188 + /* Create new listen socket if needed */ 189 + static int bareudp_socket_create(struct bareudp_dev *bareudp, __be16 port) 190 + { 191 + struct udp_tunnel_sock_cfg tunnel_cfg; 192 + struct socket *sock; 193 + 194 + sock = bareudp_create_sock(bareudp->net, port); 195 + if (IS_ERR(sock)) 196 + return PTR_ERR(sock); 197 + 198 + /* Mark socket as an encapsulation socket */ 199 + memset(&tunnel_cfg, 0, sizeof(tunnel_cfg)); 200 + tunnel_cfg.sk_user_data = bareudp; 201 + tunnel_cfg.encap_type = 1; 202 + tunnel_cfg.encap_rcv = bareudp_udp_encap_recv; 203 + tunnel_cfg.encap_err_lookup = bareudp_err_lookup; 204 + tunnel_cfg.encap_destroy = NULL; 205 + setup_udp_tunnel_sock(bareudp->net, sock, &tunnel_cfg); 206 + 207 + if (sock->sk->sk_family == AF_INET6) 208 + udp_encap_enable(); 209 + 210 + rcu_assign_pointer(bareudp->sock, sock); 211 + return 0; 212 + } 213 + 214 + static int bareudp_open(struct net_device *dev) 215 + { 216 + struct bareudp_dev *bareudp = netdev_priv(dev); 217 + int ret = 0; 218 + 219 + ret = bareudp_socket_create(bareudp, bareudp->port); 220 + return ret; 221 + } 222 + 223 + static void bareudp_sock_release(struct bareudp_dev *bareudp) 224 + { 225 + struct socket *sock; 226 + 227 + sock = bareudp->sock; 228 + rcu_assign_pointer(bareudp->sock, NULL); 229 + synchronize_net(); 230 + udp_tunnel_sock_release(sock); 231 + } 232 + 233 + static int bareudp_stop(struct net_device *dev) 234 + { 235 + struct bareudp_dev *bareudp = netdev_priv(dev); 236 + 237 + bareudp_sock_release(bareudp); 238 + return 0; 239 + } 240 + 241 + static int bareudp_xmit_skb(struct sk_buff *skb, struct net_device *dev, 242 + struct bareudp_dev *bareudp, 243 + const struct ip_tunnel_info *info) 244 + { 245 + bool xnet = !net_eq(bareudp->net, dev_net(bareudp->dev)); 246 + bool use_cache = ip_tunnel_dst_cache_usable(skb, info); 247 + struct socket *sock = rcu_dereference(bareudp->sock); 248 + bool udp_sum = !!(info->key.tun_flags & TUNNEL_CSUM); 249 + const struct ip_tunnel_key *key = &info->key; 250 + struct rtable *rt; 251 + __be16 sport, df; 252 + int min_headroom; 253 + __u8 tos, ttl; 254 + __be32 saddr; 255 + int err; 256 + 257 + if (!sock) 258 + return -ESHUTDOWN; 259 + 260 + rt = ip_route_output_tunnel(skb, dev, bareudp->net, &saddr, info, 261 + IPPROTO_UDP, use_cache); 262 + 263 + if (IS_ERR(rt)) 264 + return PTR_ERR(rt); 265 + 266 + skb_tunnel_check_pmtu(skb, &rt->dst, 267 + BAREUDP_IPV4_HLEN + info->options_len); 268 + 269 + sport = udp_flow_src_port(bareudp->net, skb, 270 + bareudp->sport_min, USHRT_MAX, 271 + true); 272 + tos = ip_tunnel_ecn_encap(key->tos, ip_hdr(skb), skb); 273 + ttl = key->ttl; 274 + df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0; 275 + skb_scrub_packet(skb, xnet); 276 + 277 + if (!skb_pull(skb, skb_network_offset(skb))) 278 + goto free_dst; 279 + 280 + min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len + 281 + BAREUDP_BASE_HLEN + info->options_len + sizeof(struct iphdr); 282 + 283 + err = skb_cow_head(skb, min_headroom); 284 + if (unlikely(err)) 285 + goto free_dst; 286 + 287 + err = udp_tunnel_handle_offloads(skb, udp_sum); 288 + if (err) 289 + goto free_dst; 290 + 291 + skb_set_inner_protocol(skb, bareudp->ethertype); 292 + udp_tunnel_xmit_skb(rt, sock->sk, skb, saddr, info->key.u.ipv4.dst, 293 + tos, ttl, df, sport, bareudp->port, 294 + !net_eq(bareudp->net, dev_net(bareudp->dev)), 295 + !(info->key.tun_flags & TUNNEL_CSUM)); 296 + return 0; 297 + 298 + free_dst: 299 + dst_release(&rt->dst); 300 + return err; 301 + } 302 + 303 + #if IS_ENABLED(CONFIG_IPV6) 304 + static int bareudp6_xmit_skb(struct sk_buff *skb, struct net_device *dev, 305 + struct bareudp_dev *bareudp, 306 + const struct ip_tunnel_info *info) 307 + { 308 + bool xnet = !net_eq(bareudp->net, dev_net(bareudp->dev)); 309 + bool use_cache = ip_tunnel_dst_cache_usable(skb, info); 310 + struct socket *sock = rcu_dereference(bareudp->sock); 311 + bool udp_sum = !!(info->key.tun_flags & TUNNEL_CSUM); 312 + const struct ip_tunnel_key *key = &info->key; 313 + struct dst_entry *dst = NULL; 314 + struct in6_addr saddr, daddr; 315 + int min_headroom; 316 + __u8 prio, ttl; 317 + __be16 sport; 318 + int err; 319 + 320 + if (!sock) 321 + return -ESHUTDOWN; 322 + 323 + dst = ip6_dst_lookup_tunnel(skb, dev, bareudp->net, sock, &saddr, info, 324 + IPPROTO_UDP, use_cache); 325 + if (IS_ERR(dst)) 326 + return PTR_ERR(dst); 327 + 328 + skb_tunnel_check_pmtu(skb, dst, BAREUDP_IPV6_HLEN + info->options_len); 329 + 330 + sport = udp_flow_src_port(bareudp->net, skb, 331 + bareudp->sport_min, USHRT_MAX, 332 + true); 333 + prio = ip_tunnel_ecn_encap(key->tos, ip_hdr(skb), skb); 334 + ttl = key->ttl; 335 + 336 + skb_scrub_packet(skb, xnet); 337 + 338 + if (!skb_pull(skb, skb_network_offset(skb))) 339 + goto free_dst; 340 + 341 + min_headroom = LL_RESERVED_SPACE(dst->dev) + dst->header_len + 342 + BAREUDP_BASE_HLEN + info->options_len + sizeof(struct iphdr); 343 + 344 + err = skb_cow_head(skb, min_headroom); 345 + if (unlikely(err)) 346 + goto free_dst; 347 + 348 + err = udp_tunnel_handle_offloads(skb, udp_sum); 349 + if (err) 350 + goto free_dst; 351 + 352 + daddr = info->key.u.ipv6.dst; 353 + udp_tunnel6_xmit_skb(dst, sock->sk, skb, dev, 354 + &saddr, &daddr, prio, ttl, 355 + info->key.label, sport, bareudp->port, 356 + !(info->key.tun_flags & TUNNEL_CSUM)); 357 + return 0; 358 + 359 + free_dst: 360 + dst_release(dst); 361 + return err; 362 + } 363 + #endif 364 + 365 + static netdev_tx_t bareudp_xmit(struct sk_buff *skb, struct net_device *dev) 366 + { 367 + struct bareudp_dev *bareudp = netdev_priv(dev); 368 + struct ip_tunnel_info *info = NULL; 369 + int err; 370 + 371 + if (skb->protocol != bareudp->ethertype) { 372 + err = -EINVAL; 373 + goto tx_error; 374 + } 375 + 376 + info = skb_tunnel_info(skb); 377 + if (unlikely(!info || !(info->mode & IP_TUNNEL_INFO_TX))) { 378 + err = -EINVAL; 379 + goto tx_error; 380 + } 381 + 382 + rcu_read_lock(); 383 + #if IS_ENABLED(CONFIG_IPV6) 384 + if (info->mode & IP_TUNNEL_INFO_IPV6) 385 + err = bareudp6_xmit_skb(skb, dev, bareudp, info); 386 + else 387 + #endif 388 + err = bareudp_xmit_skb(skb, dev, bareudp, info); 389 + 390 + rcu_read_unlock(); 391 + 392 + if (likely(!err)) 393 + return NETDEV_TX_OK; 394 + tx_error: 395 + dev_kfree_skb(skb); 396 + 397 + if (err == -ELOOP) 398 + dev->stats.collisions++; 399 + else if (err == -ENETUNREACH) 400 + dev->stats.tx_carrier_errors++; 401 + 402 + dev->stats.tx_errors++; 403 + return NETDEV_TX_OK; 404 + } 405 + 406 + static int bareudp_fill_metadata_dst(struct net_device *dev, 407 + struct sk_buff *skb) 408 + { 409 + struct ip_tunnel_info *info = skb_tunnel_info(skb); 410 + struct bareudp_dev *bareudp = netdev_priv(dev); 411 + bool use_cache; 412 + 413 + use_cache = ip_tunnel_dst_cache_usable(skb, info); 414 + 415 + if (ip_tunnel_info_af(info) == AF_INET) { 416 + struct rtable *rt; 417 + __be32 saddr; 418 + 419 + rt = ip_route_output_tunnel(skb, dev, bareudp->net, &saddr, 420 + info, IPPROTO_UDP, use_cache); 421 + if (IS_ERR(rt)) 422 + return PTR_ERR(rt); 423 + 424 + ip_rt_put(rt); 425 + info->key.u.ipv4.src = saddr; 426 + #if IS_ENABLED(CONFIG_IPV6) 427 + } else if (ip_tunnel_info_af(info) == AF_INET6) { 428 + struct dst_entry *dst; 429 + struct in6_addr saddr; 430 + struct socket *sock = rcu_dereference(bareudp->sock); 431 + 432 + dst = ip6_dst_lookup_tunnel(skb, dev, bareudp->net, sock, 433 + &saddr, info, IPPROTO_UDP, 434 + use_cache); 435 + if (IS_ERR(dst)) 436 + return PTR_ERR(dst); 437 + 438 + dst_release(dst); 439 + info->key.u.ipv6.src = saddr; 440 + #endif 441 + } else { 442 + return -EINVAL; 443 + } 444 + 445 + info->key.tp_src = udp_flow_src_port(bareudp->net, skb, 446 + bareudp->sport_min, 447 + USHRT_MAX, true); 448 + info->key.tp_dst = bareudp->port; 449 + return 0; 450 + } 451 + 452 + static const struct net_device_ops bareudp_netdev_ops = { 453 + .ndo_init = bareudp_init, 454 + .ndo_uninit = bareudp_uninit, 455 + .ndo_open = bareudp_open, 456 + .ndo_stop = bareudp_stop, 457 + .ndo_start_xmit = bareudp_xmit, 458 + .ndo_get_stats64 = ip_tunnel_get_stats64, 459 + .ndo_fill_metadata_dst = bareudp_fill_metadata_dst, 460 + }; 461 + 462 + static const struct nla_policy bareudp_policy[IFLA_BAREUDP_MAX + 1] = { 463 + [IFLA_BAREUDP_PORT] = { .type = NLA_U16 }, 464 + [IFLA_BAREUDP_ETHERTYPE] = { .type = NLA_U16 }, 465 + [IFLA_BAREUDP_SRCPORT_MIN] = { .type = NLA_U16 }, 466 + }; 467 + 468 + /* Info for udev, that this is a virtual tunnel endpoint */ 469 + static struct device_type bareudp_type = { 470 + .name = "bareudp", 471 + }; 472 + 473 + /* Initialize the device structure. */ 474 + static void bareudp_setup(struct net_device *dev) 475 + { 476 + dev->netdev_ops = &bareudp_netdev_ops; 477 + dev->needs_free_netdev = true; 478 + SET_NETDEV_DEVTYPE(dev, &bareudp_type); 479 + dev->features |= NETIF_F_SG | NETIF_F_HW_CSUM; 480 + dev->features |= NETIF_F_RXCSUM; 481 + dev->features |= NETIF_F_GSO_SOFTWARE; 482 + dev->hw_features |= NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_RXCSUM; 483 + dev->hw_features |= NETIF_F_GSO_SOFTWARE; 484 + dev->hard_header_len = 0; 485 + dev->addr_len = 0; 486 + dev->mtu = ETH_DATA_LEN; 487 + dev->min_mtu = IPV4_MIN_MTU; 488 + dev->max_mtu = IP_MAX_MTU - BAREUDP_BASE_HLEN; 489 + dev->type = ARPHRD_NONE; 490 + netif_keep_dst(dev); 491 + dev->priv_flags |= IFF_NO_QUEUE; 492 + dev->flags = IFF_POINTOPOINT | IFF_NOARP | IFF_MULTICAST; 493 + } 494 + 495 + static int bareudp_validate(struct nlattr *tb[], struct nlattr *data[], 496 + struct netlink_ext_ack *extack) 497 + { 498 + if (!data) { 499 + NL_SET_ERR_MSG(extack, 500 + "Not enough attributes provided to perform the operation"); 501 + return -EINVAL; 502 + } 503 + return 0; 504 + } 505 + 506 + static int bareudp2info(struct nlattr *data[], struct bareudp_conf *conf) 507 + { 508 + if (!data[IFLA_BAREUDP_PORT] || !data[IFLA_BAREUDP_ETHERTYPE]) 509 + return -EINVAL; 510 + 511 + if (data[IFLA_BAREUDP_PORT]) 512 + conf->port = nla_get_u16(data[IFLA_BAREUDP_PORT]); 513 + 514 + if (data[IFLA_BAREUDP_ETHERTYPE]) 515 + conf->ethertype = nla_get_u16(data[IFLA_BAREUDP_ETHERTYPE]); 516 + 517 + if (data[IFLA_BAREUDP_SRCPORT_MIN]) 518 + conf->sport_min = nla_get_u16(data[IFLA_BAREUDP_SRCPORT_MIN]); 519 + 520 + return 0; 521 + } 522 + 523 + static struct bareudp_dev *bareudp_find_dev(struct bareudp_net *bn, 524 + const struct bareudp_conf *conf) 525 + { 526 + struct bareudp_dev *bareudp, *t = NULL; 527 + 528 + list_for_each_entry(bareudp, &bn->bareudp_list, next) { 529 + if (conf->port == bareudp->port) 530 + t = bareudp; 531 + } 532 + return t; 533 + } 534 + 535 + static int bareudp_configure(struct net *net, struct net_device *dev, 536 + struct bareudp_conf *conf) 537 + { 538 + struct bareudp_net *bn = net_generic(net, bareudp_net_id); 539 + struct bareudp_dev *t, *bareudp = netdev_priv(dev); 540 + int err; 541 + 542 + bareudp->net = net; 543 + bareudp->dev = dev; 544 + t = bareudp_find_dev(bn, conf); 545 + if (t) 546 + return -EBUSY; 547 + 548 + bareudp->port = conf->port; 549 + bareudp->ethertype = conf->ethertype; 550 + bareudp->sport_min = conf->sport_min; 551 + err = register_netdevice(dev); 552 + if (err) 553 + return err; 554 + 555 + list_add(&bareudp->next, &bn->bareudp_list); 556 + return 0; 557 + } 558 + 559 + static int bareudp_link_config(struct net_device *dev, 560 + struct nlattr *tb[]) 561 + { 562 + int err; 563 + 564 + if (tb[IFLA_MTU]) { 565 + err = dev_set_mtu(dev, nla_get_u32(tb[IFLA_MTU])); 566 + if (err) 567 + return err; 568 + } 569 + return 0; 570 + } 571 + 572 + static int bareudp_newlink(struct net *net, struct net_device *dev, 573 + struct nlattr *tb[], struct nlattr *data[], 574 + struct netlink_ext_ack *extack) 575 + { 576 + struct bareudp_conf conf; 577 + int err; 578 + 579 + err = bareudp2info(data, &conf); 580 + if (err) 581 + return err; 582 + 583 + err = bareudp_configure(net, dev, &conf); 584 + if (err) 585 + return err; 586 + 587 + err = bareudp_link_config(dev, tb); 588 + if (err) 589 + return err; 590 + 591 + return 0; 592 + } 593 + 594 + static void bareudp_dellink(struct net_device *dev, struct list_head *head) 595 + { 596 + struct bareudp_dev *bareudp = netdev_priv(dev); 597 + 598 + list_del(&bareudp->next); 599 + unregister_netdevice_queue(dev, head); 600 + } 601 + 602 + static size_t bareudp_get_size(const struct net_device *dev) 603 + { 604 + return nla_total_size(sizeof(__be16)) + /* IFLA_BAREUDP_PORT */ 605 + nla_total_size(sizeof(__be16)) + /* IFLA_BAREUDP_ETHERTYPE */ 606 + nla_total_size(sizeof(__u16)) + /* IFLA_BAREUDP_SRCPORT_MIN */ 607 + 0; 608 + } 609 + 610 + static int bareudp_fill_info(struct sk_buff *skb, const struct net_device *dev) 611 + { 612 + struct bareudp_dev *bareudp = netdev_priv(dev); 613 + 614 + if (nla_put_be16(skb, IFLA_BAREUDP_PORT, bareudp->port)) 615 + goto nla_put_failure; 616 + if (nla_put_be16(skb, IFLA_BAREUDP_ETHERTYPE, bareudp->ethertype)) 617 + goto nla_put_failure; 618 + if (nla_put_u16(skb, IFLA_BAREUDP_SRCPORT_MIN, bareudp->sport_min)) 619 + goto nla_put_failure; 620 + 621 + return 0; 622 + 623 + nla_put_failure: 624 + return -EMSGSIZE; 625 + } 626 + 627 + static struct rtnl_link_ops bareudp_link_ops __read_mostly = { 628 + .kind = "bareudp", 629 + .maxtype = IFLA_BAREUDP_MAX, 630 + .policy = bareudp_policy, 631 + .priv_size = sizeof(struct bareudp_dev), 632 + .setup = bareudp_setup, 633 + .validate = bareudp_validate, 634 + .newlink = bareudp_newlink, 635 + .dellink = bareudp_dellink, 636 + .get_size = bareudp_get_size, 637 + .fill_info = bareudp_fill_info, 638 + }; 639 + 640 + struct net_device *bareudp_dev_create(struct net *net, const char *name, 641 + u8 name_assign_type, 642 + struct bareudp_conf *conf) 643 + { 644 + struct nlattr *tb[IFLA_MAX + 1]; 645 + struct net_device *dev; 646 + LIST_HEAD(list_kill); 647 + int err; 648 + 649 + memset(tb, 0, sizeof(tb)); 650 + dev = rtnl_create_link(net, name, name_assign_type, 651 + &bareudp_link_ops, tb, NULL); 652 + if (IS_ERR(dev)) 653 + return dev; 654 + 655 + err = bareudp_configure(net, dev, conf); 656 + if (err) { 657 + free_netdev(dev); 658 + return ERR_PTR(err); 659 + } 660 + err = dev_set_mtu(dev, IP_MAX_MTU - BAREUDP_BASE_HLEN); 661 + if (err) 662 + goto err; 663 + 664 + err = rtnl_configure_link(dev, NULL); 665 + if (err < 0) 666 + goto err; 667 + 668 + return dev; 669 + err: 670 + bareudp_dellink(dev, &list_kill); 671 + unregister_netdevice_many(&list_kill); 672 + return ERR_PTR(err); 673 + } 674 + EXPORT_SYMBOL_GPL(bareudp_dev_create); 675 + 676 + static __net_init int bareudp_init_net(struct net *net) 677 + { 678 + struct bareudp_net *bn = net_generic(net, bareudp_net_id); 679 + 680 + INIT_LIST_HEAD(&bn->bareudp_list); 681 + return 0; 682 + } 683 + 684 + static void bareudp_destroy_tunnels(struct net *net, struct list_head *head) 685 + { 686 + struct bareudp_net *bn = net_generic(net, bareudp_net_id); 687 + struct bareudp_dev *bareudp, *next; 688 + 689 + list_for_each_entry_safe(bareudp, next, &bn->bareudp_list, next) 690 + unregister_netdevice_queue(bareudp->dev, head); 691 + } 692 + 693 + static void __net_exit bareudp_exit_batch_net(struct list_head *net_list) 694 + { 695 + struct net *net; 696 + LIST_HEAD(list); 697 + 698 + rtnl_lock(); 699 + list_for_each_entry(net, net_list, exit_list) 700 + bareudp_destroy_tunnels(net, &list); 701 + 702 + /* unregister the devices gathered above */ 703 + unregister_netdevice_many(&list); 704 + rtnl_unlock(); 705 + } 706 + 707 + static struct pernet_operations bareudp_net_ops = { 708 + .init = bareudp_init_net, 709 + .exit_batch = bareudp_exit_batch_net, 710 + .id = &bareudp_net_id, 711 + .size = sizeof(struct bareudp_net), 712 + }; 713 + 714 + static int __init bareudp_init_module(void) 715 + { 716 + int rc; 717 + 718 + rc = register_pernet_subsys(&bareudp_net_ops); 719 + if (rc) 720 + goto out1; 721 + 722 + rc = rtnl_link_register(&bareudp_link_ops); 723 + if (rc) 724 + goto out2; 725 + 726 + return 0; 727 + out2: 728 + unregister_pernet_subsys(&bareudp_net_ops); 729 + out1: 730 + return rc; 731 + } 732 + late_initcall(bareudp_init_module); 733 + 734 + static void __exit bareudp_cleanup_module(void) 735 + { 736 + rtnl_link_unregister(&bareudp_link_ops); 737 + unregister_pernet_subsys(&bareudp_net_ops); 738 + } 739 + module_exit(bareudp_cleanup_module); 740 + 741 + MODULE_LICENSE("GPL"); 742 + MODULE_AUTHOR("Martin Varghese <martin.varghese@nokia.com>"); 743 + MODULE_DESCRIPTION("Interface driver for UDP encapsulated traffic");
+19
include/net/bareudp.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + 3 + #ifndef __NET_BAREUDP_H 4 + #define __NET_BAREUDP_H 5 + 6 + #include <linux/types.h> 7 + #include <linux/skbuff.h> 8 + 9 + struct bareudp_conf { 10 + __be16 ethertype; 11 + __be16 port; 12 + u16 sport_min; 13 + }; 14 + 15 + struct net_device *bareudp_dev_create(struct net *net, const char *name, 16 + u8 name_assign_type, 17 + struct bareudp_conf *info); 18 + 19 + #endif
+6
include/net/ipv6.h
··· 1027 1027 struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6, 1028 1028 const struct in6_addr *final_dst, 1029 1029 bool connected); 1030 + struct dst_entry *ip6_dst_lookup_tunnel(struct sk_buff *skb, 1031 + struct net_device *dev, 1032 + struct net *net, struct socket *sock, 1033 + struct in6_addr *saddr, 1034 + const struct ip_tunnel_info *info, 1035 + u8 protocol, bool use_cache); 1030 1036 struct dst_entry *ip6_blackhole_route(struct net *net, 1031 1037 struct dst_entry *orig_dst); 1032 1038
+6
include/net/route.h
··· 128 128 129 129 struct rtable *ip_route_output_flow(struct net *, struct flowi4 *flp, 130 130 const struct sock *sk); 131 + struct rtable *ip_route_output_tunnel(struct sk_buff *skb, 132 + struct net_device *dev, 133 + struct net *net, __be32 *saddr, 134 + const struct ip_tunnel_info *info, 135 + u8 protocol, bool use_cache); 136 + 131 137 struct dst_entry *ipv4_blackhole_route(struct net *net, 132 138 struct dst_entry *dst_orig); 133 139
+11
include/uapi/linux/if_link.h
··· 590 590 GENEVE_DF_MAX = __GENEVE_DF_END - 1, 591 591 }; 592 592 593 + /* Bareudp section */ 594 + enum { 595 + IFLA_BAREUDP_UNSPEC, 596 + IFLA_BAREUDP_PORT, 597 + IFLA_BAREUDP_ETHERTYPE, 598 + IFLA_BAREUDP_SRCPORT_MIN, 599 + __IFLA_BAREUDP_MAX 600 + }; 601 + 602 + #define IFLA_BAREUDP_MAX (__IFLA_BAREUDP_MAX - 1) 603 + 593 604 /* PPP section */ 594 605 enum { 595 606 IFLA_PPP_UNSPEC,
+48
net/ipv4/route.c
··· 2774 2774 } 2775 2775 EXPORT_SYMBOL_GPL(ip_route_output_flow); 2776 2776 2777 + struct rtable *ip_route_output_tunnel(struct sk_buff *skb, 2778 + struct net_device *dev, 2779 + struct net *net, __be32 *saddr, 2780 + const struct ip_tunnel_info *info, 2781 + u8 protocol, bool use_cache) 2782 + { 2783 + #ifdef CONFIG_DST_CACHE 2784 + struct dst_cache *dst_cache; 2785 + #endif 2786 + struct rtable *rt = NULL; 2787 + struct flowi4 fl4; 2788 + __u8 tos; 2789 + 2790 + #ifdef CONFIG_DST_CACHE 2791 + dst_cache = (struct dst_cache *)&info->dst_cache; 2792 + if (use_cache) { 2793 + rt = dst_cache_get_ip4(dst_cache, saddr); 2794 + if (rt) 2795 + return rt; 2796 + } 2797 + #endif 2798 + memset(&fl4, 0, sizeof(fl4)); 2799 + fl4.flowi4_mark = skb->mark; 2800 + fl4.flowi4_proto = protocol; 2801 + fl4.daddr = info->key.u.ipv4.dst; 2802 + fl4.saddr = info->key.u.ipv4.src; 2803 + tos = info->key.tos; 2804 + fl4.flowi4_tos = RT_TOS(tos); 2805 + 2806 + rt = ip_route_output_key(net, &fl4); 2807 + if (IS_ERR(rt)) { 2808 + netdev_dbg(dev, "no route to %pI4\n", &fl4.daddr); 2809 + return ERR_PTR(-ENETUNREACH); 2810 + } 2811 + if (rt->dst.dev == dev) { /* is this necessary? */ 2812 + netdev_dbg(dev, "circular route to %pI4\n", &fl4.daddr); 2813 + ip_rt_put(rt); 2814 + return ERR_PTR(-ELOOP); 2815 + } 2816 + #ifdef CONFIG_DST_CACHE 2817 + if (use_cache) 2818 + dst_cache_set_ip4(dst_cache, &rt->dst, fl4.saddr); 2819 + #endif 2820 + *saddr = fl4.saddr; 2821 + return rt; 2822 + } 2823 + EXPORT_SYMBOL_GPL(ip_route_output_tunnel); 2824 + 2777 2825 /* called with rcu_read_lock held */ 2778 2826 static int rt_fill_info(struct net *net, __be32 dst, __be32 src, 2779 2827 struct rtable *rt, u32 table_id, struct flowi4 *fl4,
+70
net/ipv6/ip6_output.c
··· 54 54 #include <linux/mroute6.h> 55 55 #include <net/l3mdev.h> 56 56 #include <net/lwtunnel.h> 57 + #include <net/ip_tunnels.h> 57 58 58 59 static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb) 59 60 { ··· 1196 1195 return dst; 1197 1196 } 1198 1197 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow); 1198 + 1199 + /** 1200 + * ip6_dst_lookup_tunnel - perform route lookup on tunnel 1201 + * @skb: Packet for which lookup is done 1202 + * @dev: Tunnel device 1203 + * @net: Network namespace of tunnel device 1204 + * @sk: Socket which provides route info 1205 + * @saddr: Memory to store the src ip address 1206 + * @info: Tunnel information 1207 + * @protocol: IP protocol 1208 + * @use_cahce: Flag to enable cache usage 1209 + * This function performs a route lookup on a tunnel 1210 + * 1211 + * It returns a valid dst pointer and stores src address to be used in 1212 + * tunnel in param saddr on success, else a pointer encoded error code. 1213 + */ 1214 + 1215 + struct dst_entry *ip6_dst_lookup_tunnel(struct sk_buff *skb, 1216 + struct net_device *dev, 1217 + struct net *net, 1218 + struct socket *sock, 1219 + struct in6_addr *saddr, 1220 + const struct ip_tunnel_info *info, 1221 + u8 protocol, 1222 + bool use_cache) 1223 + { 1224 + struct dst_entry *dst = NULL; 1225 + #ifdef CONFIG_DST_CACHE 1226 + struct dst_cache *dst_cache; 1227 + #endif 1228 + struct flowi6 fl6; 1229 + __u8 prio; 1230 + 1231 + #ifdef CONFIG_DST_CACHE 1232 + dst_cache = (struct dst_cache *)&info->dst_cache; 1233 + if (use_cache) { 1234 + dst = dst_cache_get_ip6(dst_cache, saddr); 1235 + if (dst) 1236 + return dst; 1237 + } 1238 + #endif 1239 + memset(&fl6, 0, sizeof(fl6)); 1240 + fl6.flowi6_mark = skb->mark; 1241 + fl6.flowi6_proto = protocol; 1242 + fl6.daddr = info->key.u.ipv6.dst; 1243 + fl6.saddr = info->key.u.ipv6.src; 1244 + prio = info->key.tos; 1245 + fl6.flowlabel = ip6_make_flowinfo(RT_TOS(prio), 1246 + info->key.label); 1247 + 1248 + dst = ipv6_stub->ipv6_dst_lookup_flow(net, sock->sk, &fl6, 1249 + NULL); 1250 + if (IS_ERR(dst)) { 1251 + netdev_dbg(dev, "no route to %pI6\n", &fl6.daddr); 1252 + return ERR_PTR(-ENETUNREACH); 1253 + } 1254 + if (dst->dev == dev) { /* is this necessary? */ 1255 + netdev_dbg(dev, "circular route to %pI6\n", &fl6.daddr); 1256 + dst_release(dst); 1257 + return ERR_PTR(-ELOOP); 1258 + } 1259 + #ifdef CONFIG_DST_CACHE 1260 + if (use_cache) 1261 + dst_cache_set_ip6(dst_cache, dst, &fl6.saddr); 1262 + #endif 1263 + *saddr = fl6.saddr; 1264 + return dst; 1265 + } 1266 + EXPORT_SYMBOL_GPL(ip6_dst_lookup_tunnel); 1199 1267 1200 1268 static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src, 1201 1269 gfp_t gfp)