net: accept UFO datagrams from tuntap and packet

+1 -1

drivers/net/tap.c

··· 1077 1077 case TUNSETOFFLOAD: 1078 1078 /* let the user check for future flags */ 1079 1079 if (arg & ~(TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 | 1080 - TUN_F_TSO_ECN)) 1080 + TUN_F_TSO_ECN | TUN_F_UFO)) 1081 1081 return -EINVAL; 1082 1082 1083 1083 rtnl_lock();

+2

drivers/net/tun.c

··· 2370 2370 features |= NETIF_F_TSO6; 2371 2371 arg &= ~(TUN_F_TSO4|TUN_F_TSO6); 2372 2372 } 2373 + 2374 + arg &= ~TUN_F_UFO; 2373 2375 } 2374 2376 2375 2377 /* This gives the user a way to test for new features in future by

+3 -1

include/linux/netdev_features.h

··· 54 54 NETIF_F_GSO_TUNNEL_REMCSUM_BIT, /* ... TUNNEL with TSO & REMCSUM */ 55 55 NETIF_F_GSO_SCTP_BIT, /* ... SCTP fragmentation */ 56 56 NETIF_F_GSO_ESP_BIT, /* ... ESP with TSO */ 57 + NETIF_F_GSO_UDP_BIT, /* ... UFO, deprecated except tuntap */ 57 58 /**/NETIF_F_GSO_LAST = /* last bit, see GSO_MASK */ 58 - NETIF_F_GSO_ESP_BIT, 59 + NETIF_F_GSO_UDP_BIT, 59 60 60 61 NETIF_F_FCOE_CRC_BIT, /* FCoE CRC32 */ 61 62 NETIF_F_SCTP_CRC_BIT, /* SCTP checksum offload */ ··· 133 132 #define NETIF_F_GSO_TUNNEL_REMCSUM __NETIF_F(GSO_TUNNEL_REMCSUM) 134 133 #define NETIF_F_GSO_SCTP __NETIF_F(GSO_SCTP) 135 134 #define NETIF_F_GSO_ESP __NETIF_F(GSO_ESP) 135 + #define NETIF_F_GSO_UDP __NETIF_F(GSO_UDP) 136 136 #define NETIF_F_HW_VLAN_STAG_FILTER __NETIF_F(HW_VLAN_STAG_FILTER) 137 137 #define NETIF_F_HW_VLAN_STAG_RX __NETIF_F(HW_VLAN_STAG_RX) 138 138 #define NETIF_F_HW_VLAN_STAG_TX __NETIF_F(HW_VLAN_STAG_TX)

+1

include/linux/netdevice.h

··· 4140 4140 BUILD_BUG_ON(SKB_GSO_TUNNEL_REMCSUM != (NETIF_F_GSO_TUNNEL_REMCSUM >> NETIF_F_GSO_SHIFT)); 4141 4141 BUILD_BUG_ON(SKB_GSO_SCTP != (NETIF_F_GSO_SCTP >> NETIF_F_GSO_SHIFT)); 4142 4142 BUILD_BUG_ON(SKB_GSO_ESP != (NETIF_F_GSO_ESP >> NETIF_F_GSO_SHIFT)); 4143 + BUILD_BUG_ON(SKB_GSO_UDP != (NETIF_F_GSO_UDP >> NETIF_F_GSO_SHIFT)); 4143 4144 4144 4145 return (features & feature) == feature; 4145 4146 }

+2

include/linux/skbuff.h

··· 568 568 SKB_GSO_SCTP = 1 << 14, 569 569 570 570 SKB_GSO_ESP = 1 << 15, 571 + 572 + SKB_GSO_UDP = 1 << 16, 571 573 }; 572 574 573 575 #if BITS_PER_LONG > 32

+4 -1

include/linux/virtio_net.h

··· 9 9 const struct virtio_net_hdr *hdr, 10 10 bool little_endian) 11 11 { 12 - unsigned short gso_type = 0; 12 + unsigned int gso_type = 0; 13 13 14 14 if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) { 15 15 switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { ··· 18 18 break; 19 19 case VIRTIO_NET_HDR_GSO_TCPV6: 20 20 gso_type = SKB_GSO_TCPV6; 21 + break; 22 + case VIRTIO_NET_HDR_GSO_UDP: 23 + gso_type = SKB_GSO_UDP; 21 24 break; 22 25 default: 23 26 return -EINVAL;

+1

include/net/ipv6.h

··· 767 767 __be32 ipv6_select_ident(struct net *net, 768 768 const struct in6_addr *daddr, 769 769 const struct in6_addr *saddr); 770 + __be32 ipv6_proxy_select_ident(struct net *net, struct sk_buff *skb); 770 771 771 772 int ip6_dst_hoplimit(struct dst_entry *dst); 772 773

+2 -1

net/core/dev.c

··· 2746 2746 static inline bool skb_needs_check(struct sk_buff *skb, bool tx_path) 2747 2747 { 2748 2748 if (tx_path) 2749 - return skb->ip_summed != CHECKSUM_PARTIAL; 2749 + return skb->ip_summed != CHECKSUM_PARTIAL && 2750 + skb->ip_summed != CHECKSUM_UNNECESSARY; 2750 2751 2751 2752 return skb->ip_summed == CHECKSUM_NONE; 2752 2753 }

+10 -2

net/ipv4/af_inet.c

··· 1223 1223 struct sk_buff *inet_gso_segment(struct sk_buff *skb, 1224 1224 netdev_features_t features) 1225 1225 { 1226 - bool fixedid = false, gso_partial, encap; 1226 + bool udpfrag = false, fixedid = false, gso_partial, encap; 1227 1227 struct sk_buff *segs = ERR_PTR(-EINVAL); 1228 1228 const struct net_offload *ops; 1229 + unsigned int offset = 0; 1229 1230 struct iphdr *iph; 1230 1231 int proto, tot_len; 1231 1232 int nhoff; ··· 1261 1260 segs = ERR_PTR(-EPROTONOSUPPORT); 1262 1261 1263 1262 if (!skb->encapsulation || encap) { 1263 + udpfrag = !!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP); 1264 1264 fixedid = !!(skb_shinfo(skb)->gso_type & SKB_GSO_TCP_FIXEDID); 1265 1265 1266 1266 /* fixed ID is invalid if DF bit is not set */ ··· 1281 1279 skb = segs; 1282 1280 do { 1283 1281 iph = (struct iphdr *)(skb_mac_header(skb) + nhoff); 1284 - if (skb_is_gso(skb)) { 1282 + if (udpfrag) { 1283 + iph->frag_off = htons(offset >> 3); 1284 + if (skb->next) 1285 + iph->frag_off |= htons(IP_MF); 1286 + offset += skb->len - nhoff - ihl; 1287 + tot_len = skb->len - nhoff; 1288 + } else if (skb_is_gso(skb)) { 1285 1289 if (!fixedid) { 1286 1290 iph->id = htons(id); 1287 1291 id += skb_shinfo(skb)->gso_segs;

+45 -4

net/ipv4/udp_offload.c

··· 187 187 } 188 188 EXPORT_SYMBOL(skb_udp_tunnel_segment); 189 189 190 - static struct sk_buff *udp4_tunnel_segment(struct sk_buff *skb, 191 - netdev_features_t features) 190 + static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, 191 + netdev_features_t features) 192 192 { 193 193 struct sk_buff *segs = ERR_PTR(-EINVAL); 194 + unsigned int mss; 195 + __wsum csum; 196 + struct udphdr *uh; 197 + struct iphdr *iph; 194 198 195 199 if (skb->encapsulation && 196 200 (skb_shinfo(skb)->gso_type & 197 - (SKB_GSO_UDP_TUNNEL|SKB_GSO_UDP_TUNNEL_CSUM))) 201 + (SKB_GSO_UDP_TUNNEL|SKB_GSO_UDP_TUNNEL_CSUM))) { 198 202 segs = skb_udp_tunnel_segment(skb, features, false); 203 + goto out; 204 + } 199 205 206 + if (!pskb_may_pull(skb, sizeof(struct udphdr))) 207 + goto out; 208 + 209 + mss = skb_shinfo(skb)->gso_size; 210 + if (unlikely(skb->len <= mss)) 211 + goto out; 212 + 213 + /* Do software UFO. Complete and fill in the UDP checksum as 214 + * HW cannot do checksum of UDP packets sent as multiple 215 + * IP fragments. 216 + */ 217 + 218 + uh = udp_hdr(skb); 219 + iph = ip_hdr(skb); 220 + 221 + uh->check = 0; 222 + csum = skb_checksum(skb, 0, skb->len, 0); 223 + uh->check = udp_v4_check(skb->len, iph->saddr, iph->daddr, csum); 224 + if (uh->check == 0) 225 + uh->check = CSUM_MANGLED_0; 226 + 227 + skb->ip_summed = CHECKSUM_UNNECESSARY; 228 + 229 + /* If there is no outer header we can fake a checksum offload 230 + * due to the fact that we have already done the checksum in 231 + * software prior to segmenting the frame. 232 + */ 233 + if (!skb->encap_hdr_csum) 234 + features |= NETIF_F_HW_CSUM; 235 + 236 + /* Fragment the skb. IP headers of the fragments are updated in 237 + * inet_gso_segment() 238 + */ 239 + segs = skb_segment(skb, features); 240 + out: 200 241 return segs; 201 242 } 202 243 ··· 371 330 372 331 static const struct net_offload udpv4_offload = { 373 332 .callbacks = { 374 - .gso_segment = udp4_tunnel_segment, 333 + .gso_segment = udp4_ufo_fragment, 375 334 .gro_receive = udp4_gro_receive, 376 335 .gro_complete = udp4_gro_complete, 377 336 },

+31

net/ipv6/output_core.c

··· 31 31 return id; 32 32 } 33 33 34 + /* This function exists only for tap drivers that must support broken 35 + * clients requesting UFO without specifying an IPv6 fragment ID. 36 + * 37 + * This is similar to ipv6_select_ident() but we use an independent hash 38 + * seed to limit information leakage. 39 + * 40 + * The network header must be set before calling this. 41 + */ 42 + __be32 ipv6_proxy_select_ident(struct net *net, struct sk_buff *skb) 43 + { 44 + static u32 ip6_proxy_idents_hashrnd __read_mostly; 45 + struct in6_addr buf[2]; 46 + struct in6_addr *addrs; 47 + u32 id; 48 + 49 + addrs = skb_header_pointer(skb, 50 + skb_network_offset(skb) + 51 + offsetof(struct ipv6hdr, saddr), 52 + sizeof(buf), buf); 53 + if (!addrs) 54 + return 0; 55 + 56 + net_get_random_once(&ip6_proxy_idents_hashrnd, 57 + sizeof(ip6_proxy_idents_hashrnd)); 58 + 59 + id = __ipv6_select_ident(net, ip6_proxy_idents_hashrnd, 60 + &addrs[1], &addrs[0]); 61 + return htonl(id); 62 + } 63 + EXPORT_SYMBOL_GPL(ipv6_proxy_select_ident); 64 + 34 65 __be32 ipv6_select_ident(struct net *net, 35 66 const struct in6_addr *daddr, 36 67 const struct in6_addr *saddr)

+82 -3

net/ipv6/udp_offload.c

··· 17 17 #include <net/ip6_checksum.h> 18 18 #include "ip6_offload.h" 19 19 20 - static struct sk_buff *udp6_tunnel_segment(struct sk_buff *skb, 21 - netdev_features_t features) 20 + static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, 21 + netdev_features_t features) 22 22 { 23 23 struct sk_buff *segs = ERR_PTR(-EINVAL); 24 + unsigned int mss; 25 + unsigned int unfrag_ip6hlen, unfrag_len; 26 + struct frag_hdr *fptr; 27 + u8 *packet_start, *prevhdr; 28 + u8 nexthdr; 29 + u8 frag_hdr_sz = sizeof(struct frag_hdr); 30 + __wsum csum; 31 + int tnl_hlen; 32 + int err; 33 + 34 + mss = skb_shinfo(skb)->gso_size; 35 + if (unlikely(skb->len <= mss)) 36 + goto out; 24 37 25 38 if (skb->encapsulation && skb_shinfo(skb)->gso_type & 26 39 (SKB_GSO_UDP_TUNNEL|SKB_GSO_UDP_TUNNEL_CSUM)) 27 40 segs = skb_udp_tunnel_segment(skb, features, true); 41 + else { 42 + const struct ipv6hdr *ipv6h; 43 + struct udphdr *uh; 28 44 45 + if (!pskb_may_pull(skb, sizeof(struct udphdr))) 46 + goto out; 47 + 48 + /* Do software UFO. Complete and fill in the UDP checksum as HW cannot 49 + * do checksum of UDP packets sent as multiple IP fragments. 50 + */ 51 + 52 + uh = udp_hdr(skb); 53 + ipv6h = ipv6_hdr(skb); 54 + 55 + uh->check = 0; 56 + csum = skb_checksum(skb, 0, skb->len, 0); 57 + uh->check = udp_v6_check(skb->len, &ipv6h->saddr, 58 + &ipv6h->daddr, csum); 59 + if (uh->check == 0) 60 + uh->check = CSUM_MANGLED_0; 61 + 62 + skb->ip_summed = CHECKSUM_UNNECESSARY; 63 + 64 + /* If there is no outer header we can fake a checksum offload 65 + * due to the fact that we have already done the checksum in 66 + * software prior to segmenting the frame. 67 + */ 68 + if (!skb->encap_hdr_csum) 69 + features |= NETIF_F_HW_CSUM; 70 + 71 + /* Check if there is enough headroom to insert fragment header. */ 72 + tnl_hlen = skb_tnl_header_len(skb); 73 + if (skb->mac_header < (tnl_hlen + frag_hdr_sz)) { 74 + if (gso_pskb_expand_head(skb, tnl_hlen + frag_hdr_sz)) 75 + goto out; 76 + } 77 + 78 + /* Find the unfragmentable header and shift it left by frag_hdr_sz 79 + * bytes to insert fragment header. 80 + */ 81 + err = ip6_find_1stfragopt(skb, &prevhdr); 82 + if (err < 0) 83 + return ERR_PTR(err); 84 + unfrag_ip6hlen = err; 85 + nexthdr = *prevhdr; 86 + *prevhdr = NEXTHDR_FRAGMENT; 87 + unfrag_len = (skb_network_header(skb) - skb_mac_header(skb)) + 88 + unfrag_ip6hlen + tnl_hlen; 89 + packet_start = (u8 *) skb->head + SKB_GSO_CB(skb)->mac_offset; 90 + memmove(packet_start-frag_hdr_sz, packet_start, unfrag_len); 91 + 92 + SKB_GSO_CB(skb)->mac_offset -= frag_hdr_sz; 93 + skb->mac_header -= frag_hdr_sz; 94 + skb->network_header -= frag_hdr_sz; 95 + 96 + fptr = (struct frag_hdr *)(skb_network_header(skb) + unfrag_ip6hlen); 97 + fptr->nexthdr = nexthdr; 98 + fptr->reserved = 0; 99 + fptr->identification = ipv6_proxy_select_ident(dev_net(skb->dev), skb); 100 + 101 + /* Fragment the skb. ipv6 header and the remaining fields of the 102 + * fragment header are updated in ipv6_gso_segment() 103 + */ 104 + segs = skb_segment(skb, features); 105 + } 106 + 107 + out: 29 108 return segs; 30 109 } 31 110 ··· 154 75 155 76 static const struct net_offload udpv6_offload = { 156 77 .callbacks = { 157 - .gso_segment = udp6_tunnel_segment, 78 + .gso_segment = udp6_ufo_fragment, 158 79 .gro_receive = udp6_gro_receive, 159 80 .gro_complete = udp6_gro_complete, 160 81 },

+14

net/openvswitch/datapath.c

··· 308 308 const struct dp_upcall_info *upcall_info, 309 309 uint32_t cutlen) 310 310 { 311 + unsigned short gso_type = skb_shinfo(skb)->gso_type; 312 + struct sw_flow_key later_key; 311 313 struct sk_buff *segs, *nskb; 312 314 int err; 313 315 ··· 320 318 if (segs == NULL) 321 319 return -EINVAL; 322 320 321 + if (gso_type & SKB_GSO_UDP) { 322 + /* The initial flow key extracted by ovs_flow_key_extract() 323 + * in this case is for a first fragment, so we need to 324 + * properly mark later fragments. 325 + */ 326 + later_key = *key; 327 + later_key.ip.frag = OVS_FRAG_TYPE_LATER; 328 + } 329 + 323 330 /* Queue all of the segments. */ 324 331 skb = segs; 325 332 do { 333 + if (gso_type & SKB_GSO_UDP && skb != segs) 334 + key = &later_key; 335 + 326 336 err = queue_userspace_packet(dp, skb, key, upcall_info, cutlen); 327 337 if (err) 328 338 break;

+5 -1

net/openvswitch/flow.c

··· 631 631 key->ip.frag = OVS_FRAG_TYPE_LATER; 632 632 return 0; 633 633 } 634 - if (nh->frag_off & htons(IP_MF)) 634 + if (nh->frag_off & htons(IP_MF) || 635 + skb_shinfo(skb)->gso_type & SKB_GSO_UDP) 635 636 key->ip.frag = OVS_FRAG_TYPE_FIRST; 636 637 else 637 638 key->ip.frag = OVS_FRAG_TYPE_NONE; ··· 748 747 749 748 if (key->ip.frag == OVS_FRAG_TYPE_LATER) 750 749 return 0; 750 + if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP) 751 + key->ip.frag = OVS_FRAG_TYPE_FIRST; 752 + 751 753 /* Transport layer. */ 752 754 if (key->ip.proto == NEXTHDR_TCP) { 753 755 if (tcphdr_ok(skb)) {

+6

net/sched/act_csum.c

··· 229 229 const struct iphdr *iph; 230 230 u16 ul; 231 231 232 + if (skb_is_gso(skb) && skb_shinfo(skb)->gso_type & SKB_GSO_UDP) 233 + return 1; 234 + 232 235 /* 233 236 * Support both UDP and UDPLITE checksum algorithms, Don't use 234 237 * udph->len to get the real length without any protocol check, ··· 284 281 struct udphdr *udph; 285 282 const struct ipv6hdr *ip6h; 286 283 u16 ul; 284 + 285 + if (skb_is_gso(skb) && skb_shinfo(skb)->gso_type & SKB_GSO_UDP) 286 + return 1; 287 287 288 288 /* 289 289 * Support both UDP and UDPLITE checksum algorithms, Don't use