Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

tun: enable gso over UDP tunnel support.

Add new tun features to represent the newly introduced virtio
GSO over UDP tunnel offload. Allows detection and selection of
such features via the existing TUNSETOFFLOAD ioctl and compute
the expected virtio header size and tunnel header offset using
the current netdev features, so that we can plug almost seamless
the newly introduced virtio helpers to serialize the extended
virtio header.

Acked-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
v6 -> v7:
- rebased

v4 -> v5:
- encapsulate the guest feature guessing in a tun helper
- dropped irrelevant check on xdp buff headroom
- do not remove unrelated black line
- avoid line len > 80 char

v3 -> v4:
- virtio tnl-related fields are at fixed offset, cleanup
the code accordingly.
- use netdev features instead of flags bit to check for
the configured offload
- drop packet in case of enabled features/configured hdr
size mismatch

v2 -> v3:
- cleaned-up uAPI comments
- use explicit struct layout instead of raw buf.

+156 -24
+49 -9
drivers/net/tun.c
··· 186 186 struct net_device *dev; 187 187 netdev_features_t set_features; 188 188 #define TUN_USER_FEATURES (NETIF_F_HW_CSUM|NETIF_F_TSO_ECN|NETIF_F_TSO| \ 189 - NETIF_F_TSO6 | NETIF_F_GSO_UDP_L4) 189 + NETIF_F_TSO6 | NETIF_F_GSO_UDP_L4 | \ 190 + NETIF_F_GSO_UDP_TUNNEL | NETIF_F_GSO_UDP_TUNNEL_CSUM) 190 191 191 192 int align; 192 193 int vnet_hdr_sz; ··· 926 925 dev->hw_features = NETIF_F_SG | NETIF_F_FRAGLIST | 927 926 TUN_USER_FEATURES | NETIF_F_HW_VLAN_CTAG_TX | 928 927 NETIF_F_HW_VLAN_STAG_TX; 928 + dev->hw_enc_features = dev->hw_features; 929 929 dev->features = dev->hw_features; 930 930 dev->vlan_features = dev->features & 931 931 ~(NETIF_F_HW_VLAN_CTAG_TX | ··· 1700 1698 struct sk_buff *skb; 1701 1699 size_t total_len = iov_iter_count(from); 1702 1700 size_t len = total_len, align = tun->align, linear; 1703 - struct virtio_net_hdr gso = { 0 }; 1701 + struct virtio_net_hdr_v1_hash_tunnel hdr; 1702 + struct virtio_net_hdr *gso; 1704 1703 int good_linear; 1705 1704 int copylen; 1706 1705 int hdr_len = 0; ··· 1711 1708 int skb_xdp = 1; 1712 1709 bool frags = tun_napi_frags_enabled(tfile); 1713 1710 enum skb_drop_reason drop_reason = SKB_DROP_REASON_NOT_SPECIFIED; 1711 + netdev_features_t features = 0; 1712 + 1713 + /* 1714 + * Keep it easy and always zero the whole buffer, even if the 1715 + * tunnel-related field will be touched only when the feature 1716 + * is enabled and the hdr size id compatible. 1717 + */ 1718 + memset(&hdr, 0, sizeof(hdr)); 1719 + gso = (struct virtio_net_hdr *)&hdr; 1714 1720 1715 1721 if (!(tun->flags & IFF_NO_PI)) { 1716 1722 if (len < sizeof(pi)) ··· 1733 1721 if (tun->flags & IFF_VNET_HDR) { 1734 1722 int vnet_hdr_sz = READ_ONCE(tun->vnet_hdr_sz); 1735 1723 1736 - hdr_len = tun_vnet_hdr_get(vnet_hdr_sz, tun->flags, from, &gso); 1724 + features = tun_vnet_hdr_guest_features(vnet_hdr_sz); 1725 + hdr_len = __tun_vnet_hdr_get(vnet_hdr_sz, tun->flags, 1726 + features, from, gso); 1737 1727 if (hdr_len < 0) 1738 1728 return hdr_len; 1739 1729 ··· 1769 1755 * (e.g gso or jumbo packet), we will do it at after 1770 1756 * skb was created with generic XDP routine. 1771 1757 */ 1772 - skb = tun_build_skb(tun, tfile, from, &gso, len, &skb_xdp); 1758 + skb = tun_build_skb(tun, tfile, from, gso, len, &skb_xdp); 1773 1759 err = PTR_ERR_OR_ZERO(skb); 1774 1760 if (err) 1775 1761 goto drop; ··· 1813 1799 } 1814 1800 } 1815 1801 1816 - if (tun_vnet_hdr_to_skb(tun->flags, skb, &gso)) { 1802 + if (tun_vnet_hdr_tnl_to_skb(tun->flags, features, skb, &hdr)) { 1817 1803 atomic_long_inc(&tun->rx_frame_errors); 1818 1804 err = -EINVAL; 1819 1805 goto free_skb; ··· 2064 2050 } 2065 2051 2066 2052 if (vnet_hdr_sz) { 2067 - struct virtio_net_hdr gso; 2053 + struct virtio_net_hdr_v1_hash_tunnel hdr; 2054 + struct virtio_net_hdr *gso; 2068 2055 2069 - ret = tun_vnet_hdr_from_skb(tun->flags, tun->dev, skb, &gso); 2056 + ret = tun_vnet_hdr_tnl_from_skb(tun->flags, tun->dev, skb, 2057 + &hdr); 2070 2058 if (ret) 2071 2059 return ret; 2072 2060 2073 - ret = tun_vnet_hdr_put(vnet_hdr_sz, iter, &gso); 2061 + /* 2062 + * Drop the packet if the configured header size is too small 2063 + * WRT the enabled offloads. 2064 + */ 2065 + gso = (struct virtio_net_hdr *)&hdr; 2066 + ret = __tun_vnet_hdr_put(vnet_hdr_sz, tun->dev->features, 2067 + iter, gso); 2074 2068 if (ret) 2075 2069 return ret; 2076 2070 } ··· 2379 2357 { 2380 2358 unsigned int datasize = xdp->data_end - xdp->data; 2381 2359 struct tun_xdp_hdr *hdr = xdp->data_hard_start; 2360 + struct virtio_net_hdr_v1_hash_tunnel *tnl_hdr; 2382 2361 struct virtio_net_hdr *gso = &hdr->gso; 2383 2362 struct bpf_prog *xdp_prog; 2384 2363 struct sk_buff *skb = NULL; 2385 2364 struct sk_buff_head *queue; 2365 + netdev_features_t features; 2386 2366 u32 rxhash = 0, act; 2387 2367 int buflen = hdr->buflen; 2388 2368 int metasize = 0; ··· 2450 2426 if (metasize > 0) 2451 2427 skb_metadata_set(skb, metasize); 2452 2428 2453 - if (tun_vnet_hdr_to_skb(tun->flags, skb, gso)) { 2429 + features = tun_vnet_hdr_guest_features(READ_ONCE(tun->vnet_hdr_sz)); 2430 + tnl_hdr = (struct virtio_net_hdr_v1_hash_tunnel *)gso; 2431 + if (tun_vnet_hdr_tnl_to_skb(tun->flags, features, skb, tnl_hdr)) { 2454 2432 atomic_long_inc(&tun->rx_frame_errors); 2455 2433 kfree_skb(skb); 2456 2434 ret = -EINVAL; ··· 2838 2812 2839 2813 } 2840 2814 2815 + #define PLAIN_GSO (NETIF_F_GSO_UDP_L4 | NETIF_F_TSO | NETIF_F_TSO6) 2816 + 2841 2817 /* This is like a cut-down ethtool ops, except done via tun fd so no 2842 2818 * privs required. */ 2843 2819 static int set_offload(struct tun_struct *tun, unsigned long arg) ··· 2868 2840 if (arg & TUN_F_USO4 && arg & TUN_F_USO6) { 2869 2841 features |= NETIF_F_GSO_UDP_L4; 2870 2842 arg &= ~(TUN_F_USO4 | TUN_F_USO6); 2843 + } 2844 + 2845 + /* 2846 + * Tunnel offload is allowed only if some plain offload is 2847 + * available, too. 2848 + */ 2849 + if (features & PLAIN_GSO && arg & TUN_F_UDP_TUNNEL_GSO) { 2850 + features |= NETIF_F_GSO_UDP_TUNNEL; 2851 + if (arg & TUN_F_UDP_TUNNEL_GSO_CSUM) 2852 + features |= NETIF_F_GSO_UDP_TUNNEL_CSUM; 2853 + arg &= ~(TUN_F_UDP_TUNNEL_GSO | 2854 + TUN_F_UDP_TUNNEL_GSO_CSUM); 2871 2855 } 2872 2856 } 2873 2857
+98 -15
drivers/net/tun_vnet.h
··· 6 6 #define TUN_VNET_LE 0x80000000 7 7 #define TUN_VNET_BE 0x40000000 8 8 9 + #define TUN_VNET_TNL_SIZE sizeof(struct virtio_net_hdr_v1_hash_tunnel) 10 + 9 11 static inline bool tun_vnet_legacy_is_little_endian(unsigned int flags) 10 12 { 11 13 bool be = IS_ENABLED(CONFIG_TUN_VNET_CROSS_LE) && ··· 109 107 } 110 108 } 111 109 112 - static inline int tun_vnet_hdr_get(int sz, unsigned int flags, 113 - struct iov_iter *from, 114 - struct virtio_net_hdr *hdr) 110 + static inline unsigned int tun_vnet_parse_size(netdev_features_t features) 115 111 { 112 + if (!(features & NETIF_F_GSO_UDP_TUNNEL)) 113 + return sizeof(struct virtio_net_hdr); 114 + 115 + return TUN_VNET_TNL_SIZE; 116 + } 117 + 118 + static inline int __tun_vnet_hdr_get(int sz, unsigned int flags, 119 + netdev_features_t features, 120 + struct iov_iter *from, 121 + struct virtio_net_hdr *hdr) 122 + { 123 + unsigned int parsed_size = tun_vnet_parse_size(features); 116 124 u16 hdr_len; 117 125 118 126 if (iov_iter_count(from) < sz) 119 127 return -EINVAL; 120 128 121 - if (!copy_from_iter_full(hdr, sizeof(*hdr), from)) 129 + if (!copy_from_iter_full(hdr, parsed_size, from)) 122 130 return -EFAULT; 123 131 124 132 hdr_len = tun_vnet16_to_cpu(flags, hdr->hdr_len); ··· 141 129 if (hdr_len > iov_iter_count(from)) 142 130 return -EINVAL; 143 131 144 - iov_iter_advance(from, sz - sizeof(*hdr)); 132 + iov_iter_advance(from, sz - parsed_size); 145 133 146 134 return hdr_len; 135 + } 136 + 137 + static inline int tun_vnet_hdr_get(int sz, unsigned int flags, 138 + struct iov_iter *from, 139 + struct virtio_net_hdr *hdr) 140 + { 141 + return __tun_vnet_hdr_get(sz, flags, 0, from, hdr); 142 + } 143 + 144 + static inline int __tun_vnet_hdr_put(int sz, netdev_features_t features, 145 + struct iov_iter *iter, 146 + const struct virtio_net_hdr *hdr) 147 + { 148 + unsigned int parsed_size = tun_vnet_parse_size(features); 149 + 150 + if (unlikely(iov_iter_count(iter) < sz)) 151 + return -EINVAL; 152 + 153 + if (unlikely(copy_to_iter(hdr, parsed_size, iter) != parsed_size)) 154 + return -EFAULT; 155 + 156 + if (iov_iter_zero(sz - parsed_size, iter) != sz - parsed_size) 157 + return -EFAULT; 158 + 159 + return 0; 147 160 } 148 161 149 162 static inline int tun_vnet_hdr_put(int sz, struct iov_iter *iter, 150 163 const struct virtio_net_hdr *hdr) 151 164 { 152 - if (unlikely(iov_iter_count(iter) < sz)) 153 - return -EINVAL; 154 - 155 - if (unlikely(copy_to_iter(hdr, sizeof(*hdr), iter) != sizeof(*hdr))) 156 - return -EFAULT; 157 - 158 - if (iov_iter_zero(sz - sizeof(*hdr), iter) != sz - sizeof(*hdr)) 159 - return -EFAULT; 160 - 161 - return 0; 165 + return __tun_vnet_hdr_put(sz, 0, iter, hdr); 162 166 } 163 167 164 168 static inline int tun_vnet_hdr_to_skb(unsigned int flags, struct sk_buff *skb, 165 169 const struct virtio_net_hdr *hdr) 166 170 { 167 171 return virtio_net_hdr_to_skb(skb, hdr, tun_vnet_is_little_endian(flags)); 172 + } 173 + 174 + /* 175 + * Tun is not aware of the negotiated guest features, guess them from the 176 + * virtio net hdr size 177 + */ 178 + static inline netdev_features_t tun_vnet_hdr_guest_features(int vnet_hdr_sz) 179 + { 180 + if (vnet_hdr_sz >= TUN_VNET_TNL_SIZE) 181 + return NETIF_F_GSO_UDP_TUNNEL | NETIF_F_GSO_UDP_TUNNEL_CSUM; 182 + return 0; 183 + } 184 + 185 + static inline int 186 + tun_vnet_hdr_tnl_to_skb(unsigned int flags, netdev_features_t features, 187 + struct sk_buff *skb, 188 + const struct virtio_net_hdr_v1_hash_tunnel *hdr) 189 + { 190 + return virtio_net_hdr_tnl_to_skb(skb, hdr, 191 + features & NETIF_F_GSO_UDP_TUNNEL, 192 + features & NETIF_F_GSO_UDP_TUNNEL_CSUM, 193 + tun_vnet_is_little_endian(flags)); 168 194 } 169 195 170 196 static inline int tun_vnet_hdr_from_skb(unsigned int flags, ··· 225 175 DUMP_PREFIX_NONE, 226 176 16, 1, skb->head, 227 177 min(tun_vnet16_to_cpu(flags, hdr->hdr_len), 64), true); 178 + } 179 + WARN_ON_ONCE(1); 180 + return -EINVAL; 181 + } 182 + 183 + return 0; 184 + } 185 + 186 + static inline int 187 + tun_vnet_hdr_tnl_from_skb(unsigned int flags, 188 + const struct net_device *dev, 189 + const struct sk_buff *skb, 190 + struct virtio_net_hdr_v1_hash_tunnel *tnl_hdr) 191 + { 192 + bool has_tnl_offload = !!(dev->features & NETIF_F_GSO_UDP_TUNNEL); 193 + int vlan_hlen = skb_vlan_tag_present(skb) ? VLAN_HLEN : 0; 194 + 195 + if (virtio_net_hdr_tnl_from_skb(skb, tnl_hdr, has_tnl_offload, 196 + tun_vnet_is_little_endian(flags), 197 + vlan_hlen)) { 198 + struct virtio_net_hdr_v1 *hdr = &tnl_hdr->hash_hdr.hdr; 199 + struct skb_shared_info *sinfo = skb_shinfo(skb); 200 + 201 + if (net_ratelimit()) { 202 + int hdr_len = tun_vnet16_to_cpu(flags, hdr->hdr_len); 203 + 204 + netdev_err(dev, "unexpected GSO type: 0x%x, gso_size %d, hdr_len %d\n", 205 + sinfo->gso_type, 206 + tun_vnet16_to_cpu(flags, hdr->gso_size), 207 + tun_vnet16_to_cpu(flags, hdr->hdr_len)); 208 + print_hex_dump(KERN_ERR, "tun: ", DUMP_PREFIX_NONE, 209 + 16, 1, skb->head, min(hdr_len, 64), 210 + true); 228 211 } 229 212 WARN_ON_ONCE(1); 230 213 return -EINVAL;
+9
include/uapi/linux/if_tun.h
··· 93 93 #define TUN_F_USO4 0x20 /* I can handle USO for IPv4 packets */ 94 94 #define TUN_F_USO6 0x40 /* I can handle USO for IPv6 packets */ 95 95 96 + /* I can handle TSO/USO for UDP tunneled packets */ 97 + #define TUN_F_UDP_TUNNEL_GSO 0x080 98 + 99 + /* 100 + * I can handle TSO/USO for UDP tunneled packets requiring csum offload for 101 + * the outer header 102 + */ 103 + #define TUN_F_UDP_TUNNEL_GSO_CSUM 0x100 104 + 96 105 /* Protocol info prepended to the packets (when IFF_NO_PI is not set) */ 97 106 #define TUN_PKT_STRIP 0x0001 98 107 struct tun_pi {