Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

tunneling: Add generic Tunnel segmentation.

Adds generic tunneling offloading support for IPv4-UDP based
tunnels.
GSO type is added to request this offload for a skb.
netdev feature NETIF_F_UDP_TUNNEL is added for hardware offloaded
udp-tunnel support. Currently no device supports this feature,
software offload is used.

This can be used by tunneling protocols like VXLAN.

CC: Jesse Gross <jesse@nicira.com>
Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Stephen Hemminger <stephen@networkplumber.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Pravin B Shelar and committed by
David S. Miller
73136267 aefbd2b3

+111 -30
+4 -3
include/linux/netdev_features.h
··· 42 42 NETIF_F_TSO6_BIT, /* ... TCPv6 segmentation */ 43 43 NETIF_F_FSO_BIT, /* ... FCoE segmentation */ 44 44 NETIF_F_GSO_GRE_BIT, /* ... GRE with TSO */ 45 - /**/NETIF_F_GSO_LAST, /* [can't be last bit, see GSO_MASK] */ 46 - NETIF_F_GSO_RESERVED2 /* ... free (fill GSO_MASK to 8 bits) */ 47 - = NETIF_F_GSO_LAST, 45 + NETIF_F_GSO_UDP_TUNNEL_BIT, /* ... UDP TUNNEL with TSO */ 46 + /**/NETIF_F_GSO_LAST = /* last bit, see GSO_MASK */ 47 + NETIF_F_GSO_UDP_TUNNEL_BIT, 48 48 49 49 NETIF_F_FCOE_CRC_BIT, /* FCoE CRC32 */ 50 50 NETIF_F_SCTP_CSUM_BIT, /* SCTP checksum offload */ ··· 103 103 #define NETIF_F_RXFCS __NETIF_F(RXFCS) 104 104 #define NETIF_F_RXALL __NETIF_F(RXALL) 105 105 #define NETIF_F_GRE_GSO __NETIF_F(GSO_GRE) 106 + #define NETIF_F_UDP_TUNNEL __NETIF_F(UDP_TUNNEL) 106 107 107 108 /* Features valid for ethtool to change */ 108 109 /* = all defined minus driver/device-class-related */
+2
include/linux/skbuff.h
··· 316 316 SKB_GSO_FCOE = 1 << 5, 317 317 318 318 SKB_GSO_GRE = 1 << 6, 319 + 320 + SKB_GSO_UDP_TUNNEL = 1 << 7, 319 321 }; 320 322 321 323 #if BITS_PER_LONG > 32
+1
net/core/ethtool.c
··· 78 78 [NETIF_F_TSO6_BIT] = "tx-tcp6-segmentation", 79 79 [NETIF_F_FSO_BIT] = "tx-fcoe-segmentation", 80 80 [NETIF_F_GSO_GRE_BIT] = "tx-gre-segmentation", 81 + [NETIF_F_GSO_UDP_TUNNEL_BIT] = "tx-udp_tnl-segmentation", 81 82 82 83 [NETIF_F_FCOE_CRC_BIT] = "tx-checksum-fcoe-crc", 83 84 [NETIF_F_SCTP_CSUM_BIT] = "tx-checksum-sctp",
+5 -1
net/ipv4/af_inet.c
··· 1283 1283 int ihl; 1284 1284 int id; 1285 1285 unsigned int offset = 0; 1286 + bool tunnel; 1286 1287 1287 1288 if (unlikely(skb_shinfo(skb)->gso_type & 1288 1289 ~(SKB_GSO_TCPV4 | ··· 1291 1290 SKB_GSO_DODGY | 1292 1291 SKB_GSO_TCP_ECN | 1293 1292 SKB_GSO_GRE | 1293 + SKB_GSO_UDP_TUNNEL | 1294 1294 0))) 1295 1295 goto out; 1296 1296 ··· 1305 1303 1306 1304 if (unlikely(!pskb_may_pull(skb, ihl))) 1307 1305 goto out; 1306 + 1307 + tunnel = !!skb->encapsulation; 1308 1308 1309 1309 __skb_pull(skb, ihl); 1310 1310 skb_reset_transport_header(skb); ··· 1327 1323 skb = segs; 1328 1324 do { 1329 1325 iph = ip_hdr(skb); 1330 - if (proto == IPPROTO_UDP) { 1326 + if (!tunnel && proto == IPPROTO_UDP) { 1331 1327 iph->id = htons(id); 1332 1328 iph->frag_off = htons(offset >> 3); 1333 1329 if (skb->next != NULL)
+1
net/ipv4/tcp.c
··· 3044 3044 SKB_GSO_TCP_ECN | 3045 3045 SKB_GSO_TCPV6 | 3046 3046 SKB_GSO_GRE | 3047 + SKB_GSO_UDP_TUNNEL | 3047 3048 0) || 3048 3049 !(type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)))) 3049 3050 goto out;
+90 -25
net/ipv4/udp.c
··· 2272 2272 2273 2273 int udp4_ufo_send_check(struct sk_buff *skb) 2274 2274 { 2275 - const struct iphdr *iph; 2276 - struct udphdr *uh; 2277 - 2278 - if (!pskb_may_pull(skb, sizeof(*uh))) 2275 + if (!pskb_may_pull(skb, sizeof(struct udphdr))) 2279 2276 return -EINVAL; 2280 2277 2281 - iph = ip_hdr(skb); 2282 - uh = udp_hdr(skb); 2278 + if (likely(!skb->encapsulation)) { 2279 + const struct iphdr *iph; 2280 + struct udphdr *uh; 2283 2281 2284 - uh->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, skb->len, 2285 - IPPROTO_UDP, 0); 2286 - skb->csum_start = skb_transport_header(skb) - skb->head; 2287 - skb->csum_offset = offsetof(struct udphdr, check); 2288 - skb->ip_summed = CHECKSUM_PARTIAL; 2282 + iph = ip_hdr(skb); 2283 + uh = udp_hdr(skb); 2284 + 2285 + uh->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, skb->len, 2286 + IPPROTO_UDP, 0); 2287 + skb->csum_start = skb_transport_header(skb) - skb->head; 2288 + skb->csum_offset = offsetof(struct udphdr, check); 2289 + skb->ip_summed = CHECKSUM_PARTIAL; 2290 + } 2289 2291 return 0; 2292 + } 2293 + 2294 + static struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb, 2295 + netdev_features_t features) 2296 + { 2297 + struct sk_buff *segs = ERR_PTR(-EINVAL); 2298 + int mac_len = skb->mac_len; 2299 + int tnl_hlen = skb_inner_mac_header(skb) - skb_transport_header(skb); 2300 + int outer_hlen; 2301 + netdev_features_t enc_features; 2302 + 2303 + if (unlikely(!pskb_may_pull(skb, tnl_hlen))) 2304 + goto out; 2305 + 2306 + skb->encapsulation = 0; 2307 + __skb_pull(skb, tnl_hlen); 2308 + skb_reset_mac_header(skb); 2309 + skb_set_network_header(skb, skb_inner_network_offset(skb)); 2310 + skb->mac_len = skb_inner_network_offset(skb); 2311 + 2312 + /* segment inner packet. */ 2313 + enc_features = skb->dev->hw_enc_features & netif_skb_features(skb); 2314 + segs = skb_mac_gso_segment(skb, enc_features); 2315 + if (!segs || IS_ERR(segs)) 2316 + goto out; 2317 + 2318 + outer_hlen = skb_tnl_header_len(skb); 2319 + skb = segs; 2320 + do { 2321 + struct udphdr *uh; 2322 + int udp_offset = outer_hlen - tnl_hlen; 2323 + 2324 + skb->mac_len = mac_len; 2325 + 2326 + skb_push(skb, outer_hlen); 2327 + skb_reset_mac_header(skb); 2328 + skb_set_network_header(skb, mac_len); 2329 + skb_set_transport_header(skb, udp_offset); 2330 + uh = udp_hdr(skb); 2331 + uh->len = htons(skb->len - udp_offset); 2332 + 2333 + /* csum segment if tunnel sets skb with csum. */ 2334 + if (unlikely(uh->check)) { 2335 + struct iphdr *iph = ip_hdr(skb); 2336 + 2337 + uh->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, 2338 + skb->len - udp_offset, 2339 + IPPROTO_UDP, 0); 2340 + uh->check = csum_fold(skb_checksum(skb, udp_offset, 2341 + skb->len - udp_offset, 0)); 2342 + if (uh->check == 0) 2343 + uh->check = CSUM_MANGLED_0; 2344 + 2345 + } 2346 + skb->ip_summed = CHECKSUM_NONE; 2347 + } while ((skb = skb->next)); 2348 + out: 2349 + return segs; 2290 2350 } 2291 2351 2292 2352 struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, ··· 2354 2294 { 2355 2295 struct sk_buff *segs = ERR_PTR(-EINVAL); 2356 2296 unsigned int mss; 2357 - int offset; 2358 - __wsum csum; 2359 - 2360 2297 mss = skb_shinfo(skb)->gso_size; 2361 2298 if (unlikely(skb->len <= mss)) 2362 2299 goto out; ··· 2363 2306 int type = skb_shinfo(skb)->gso_type; 2364 2307 2365 2308 if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY | 2309 + SKB_GSO_UDP_TUNNEL | 2366 2310 SKB_GSO_GRE) || 2367 2311 !(type & (SKB_GSO_UDP)))) 2368 2312 goto out; ··· 2374 2316 goto out; 2375 2317 } 2376 2318 2377 - /* Do software UFO. Complete and fill in the UDP checksum as HW cannot 2378 - * do checksum of UDP packets sent as multiple IP fragments. 2379 - */ 2380 - offset = skb_checksum_start_offset(skb); 2381 - csum = skb_checksum(skb, offset, skb->len - offset, 0); 2382 - offset += skb->csum_offset; 2383 - *(__sum16 *)(skb->data + offset) = csum_fold(csum); 2384 - skb->ip_summed = CHECKSUM_NONE; 2385 - 2386 2319 /* Fragment the skb. IP headers of the fragments are updated in 2387 2320 * inet_gso_segment() 2388 2321 */ 2389 - segs = skb_segment(skb, features); 2322 + if (skb->encapsulation && skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL) 2323 + segs = skb_udp_tunnel_segment(skb, features); 2324 + else { 2325 + int offset; 2326 + __wsum csum; 2327 + 2328 + /* Do software UFO. Complete and fill in the UDP checksum as 2329 + * HW cannot do checksum of UDP packets sent as multiple 2330 + * IP fragments. 2331 + */ 2332 + offset = skb_checksum_start_offset(skb); 2333 + csum = skb_checksum(skb, offset, skb->len - offset, 0); 2334 + offset += skb->csum_offset; 2335 + *(__sum16 *)(skb->data + offset) = csum_fold(csum); 2336 + skb->ip_summed = CHECKSUM_NONE; 2337 + 2338 + segs = skb_segment(skb, features); 2339 + } 2390 2340 out: 2391 2341 return segs; 2392 2342 } 2393 -
+1
net/ipv6/ip6_offload.c
··· 97 97 SKB_GSO_DODGY | 98 98 SKB_GSO_TCP_ECN | 99 99 SKB_GSO_GRE | 100 + SKB_GSO_UDP_TUNNEL | 100 101 SKB_GSO_TCPV6 | 101 102 0))) 102 103 goto out;
+7 -1
net/ipv6/udp_offload.c
··· 21 21 const struct ipv6hdr *ipv6h; 22 22 struct udphdr *uh; 23 23 24 + /* UDP Tunnel offload on ipv6 is not yet supported. */ 25 + if (skb->encapsulation) 26 + return -EINVAL; 27 + 24 28 if (!pskb_may_pull(skb, sizeof(*uh))) 25 29 return -EINVAL; 26 30 ··· 60 56 /* Packet is from an untrusted source, reset gso_segs. */ 61 57 int type = skb_shinfo(skb)->gso_type; 62 58 63 - if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY | 59 + if (unlikely(type & ~(SKB_GSO_UDP | 60 + SKB_GSO_DODGY | 61 + SKB_GSO_UDP_TUNNEL | 64 62 SKB_GSO_GRE) || 65 63 !(type & (SKB_GSO_UDP)))) 66 64 goto out;