Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'vxlan_group_policy_extension'

Thomas Graf says:

====================
VXLAN Group Policy Extension

Implements supports for the Group Policy VXLAN extension [0] to provide
a lightweight and simple security label mechanism across network peers
based on VXLAN. The security context and associated metadata is mapped
to/from skb->mark. This allows further mapping to a SELinux context
using SECMARK, to implement ACLs directly with nftables, iptables, OVS,
tc, etc.

The extension is disabled by default and should be run on a distinct
port in mixed Linux VXLAN VTEP environments. Liberal VXLAN VTEPs
which ignore unknown reserved bits will be able to receive VXLAN-GBP
frames.

Simple usage example:

10.1.1.1:
# ip link add vxlan0 type vxlan id 10 remote 10.1.1.2 gbp
# iptables -I OUTPUT -m owner --uid-owner 101 -j MARK --set-mark 0x200

10.1.1.2:
# ip link add vxlan0 type vxlan id 10 remote 10.1.1.1 gbp
# iptables -I INPUT -m mark --mark 0x200 -j DROP

iproute2 [1] and OVS [2] support will be provided in separate patches.

[0] https://tools.ietf.org/html/draft-smith-vxlan-group-policy
[1] https://github.com/tgraf/iproute2/tree/vxlan-gbp
[2] https://github.com/tgraf/ovs/tree/vxlan-gbp
====================

Signed-off-by: David S. Miller <davem@davemloft.net>

+491 -140
+89 -24
drivers/net/vxlan.c
··· 263 263 return list_first_entry(&fdb->remotes, struct vxlan_rdst, list); 264 264 } 265 265 266 - /* Find VXLAN socket based on network namespace, address family and UDP port */ 267 - static struct vxlan_sock *vxlan_find_sock(struct net *net, 268 - sa_family_t family, __be16 port) 266 + /* Find VXLAN socket based on network namespace, address family and UDP port 267 + * and enabled unshareable flags. 268 + */ 269 + static struct vxlan_sock *vxlan_find_sock(struct net *net, sa_family_t family, 270 + __be16 port, u32 flags) 269 271 { 270 272 struct vxlan_sock *vs; 273 + u32 match_flags = flags & VXLAN_F_UNSHAREABLE; 271 274 272 275 hlist_for_each_entry_rcu(vs, vs_head(net, port), hlist) { 273 276 if (inet_sk(vs->sock->sk)->inet_sport == port && 274 - inet_sk(vs->sock->sk)->sk.sk_family == family) 277 + inet_sk(vs->sock->sk)->sk.sk_family == family && 278 + (vs->flags & VXLAN_F_UNSHAREABLE) == match_flags) 275 279 return vs; 276 280 } 277 281 return NULL; ··· 295 291 296 292 /* Look up VNI in a per net namespace table */ 297 293 static struct vxlan_dev *vxlan_find_vni(struct net *net, u32 id, 298 - sa_family_t family, __be16 port) 294 + sa_family_t family, __be16 port, 295 + u32 flags) 299 296 { 300 297 struct vxlan_sock *vs; 301 298 302 - vs = vxlan_find_sock(net, family, port); 299 + vs = vxlan_find_sock(net, family, port, flags); 303 300 if (!vs) 304 301 return NULL; 305 302 ··· 625 620 continue; 626 621 627 622 vh2 = (struct vxlanhdr *)(p->data + off_vx); 628 - if (vh->vx_vni != vh2->vx_vni) { 623 + if (vh->vx_flags != vh2->vx_flags || 624 + vh->vx_vni != vh2->vx_vni) { 629 625 NAPI_GRO_CB(p)->same_flow = 0; 630 626 continue; 631 627 } ··· 1189 1183 struct vxlan_sock *vs; 1190 1184 struct vxlanhdr *vxh; 1191 1185 u32 flags, vni; 1186 + struct vxlan_metadata md = {0}; 1192 1187 1193 1188 /* Need Vxlan and inner Ethernet header to be present */ 1194 1189 if (!pskb_may_pull(skb, VXLAN_HLEN)) ··· 1223 1216 vni &= VXLAN_VID_MASK; 1224 1217 } 1225 1218 1219 + /* For backwards compatibility, only allow reserved fields to be 1220 + * used by VXLAN extensions if explicitly requested. 1221 + */ 1222 + if ((flags & VXLAN_HF_GBP) && (vs->flags & VXLAN_F_GBP)) { 1223 + struct vxlanhdr_gbp *gbp; 1224 + 1225 + gbp = (struct vxlanhdr_gbp *)vxh; 1226 + md.gbp = ntohs(gbp->policy_id); 1227 + 1228 + if (gbp->dont_learn) 1229 + md.gbp |= VXLAN_GBP_DONT_LEARN; 1230 + 1231 + if (gbp->policy_applied) 1232 + md.gbp |= VXLAN_GBP_POLICY_APPLIED; 1233 + 1234 + flags &= ~VXLAN_GBP_USED_BITS; 1235 + } 1236 + 1226 1237 if (flags || (vni & ~VXLAN_VID_MASK)) { 1227 1238 /* If there are any unprocessed flags remaining treat 1228 1239 * this as a malformed packet. This behavior diverges from ··· 1254 1229 goto bad_flags; 1255 1230 } 1256 1231 1257 - vs->rcv(vs, skb, vxh->vx_vni); 1232 + md.vni = vxh->vx_vni; 1233 + vs->rcv(vs, skb, &md); 1258 1234 return 0; 1259 1235 1260 1236 drop: ··· 1272 1246 return 1; 1273 1247 } 1274 1248 1275 - static void vxlan_rcv(struct vxlan_sock *vs, 1276 - struct sk_buff *skb, __be32 vx_vni) 1249 + static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb, 1250 + struct vxlan_metadata *md) 1277 1251 { 1278 1252 struct iphdr *oip = NULL; 1279 1253 struct ipv6hdr *oip6 = NULL; ··· 1284 1258 int err = 0; 1285 1259 union vxlan_addr *remote_ip; 1286 1260 1287 - vni = ntohl(vx_vni) >> 8; 1261 + vni = ntohl(md->vni) >> 8; 1288 1262 /* Is this VNI defined? */ 1289 1263 vxlan = vxlan_vs_find_vni(vs, vni); 1290 1264 if (!vxlan) ··· 1318 1292 goto drop; 1319 1293 1320 1294 skb_reset_network_header(skb); 1295 + skb->mark = md->gbp; 1321 1296 1322 1297 if (oip6) 1323 1298 err = IP6_ECN_decapsulate(oip6, skb); ··· 1668 1641 return false; 1669 1642 } 1670 1643 1644 + static void vxlan_build_gbp_hdr(struct vxlanhdr *vxh, struct vxlan_sock *vs, 1645 + struct vxlan_metadata *md) 1646 + { 1647 + struct vxlanhdr_gbp *gbp; 1648 + 1649 + gbp = (struct vxlanhdr_gbp *)vxh; 1650 + vxh->vx_flags |= htonl(VXLAN_HF_GBP); 1651 + 1652 + if (md->gbp & VXLAN_GBP_DONT_LEARN) 1653 + gbp->dont_learn = 1; 1654 + 1655 + if (md->gbp & VXLAN_GBP_POLICY_APPLIED) 1656 + gbp->policy_applied = 1; 1657 + 1658 + gbp->policy_id = htons(md->gbp & VXLAN_GBP_ID_MASK); 1659 + } 1660 + 1671 1661 #if IS_ENABLED(CONFIG_IPV6) 1672 1662 static int vxlan6_xmit_skb(struct vxlan_sock *vs, 1673 1663 struct dst_entry *dst, struct sk_buff *skb, 1674 1664 struct net_device *dev, struct in6_addr *saddr, 1675 1665 struct in6_addr *daddr, __u8 prio, __u8 ttl, 1676 - __be16 src_port, __be16 dst_port, __be32 vni, 1677 - bool xnet) 1666 + __be16 src_port, __be16 dst_port, 1667 + struct vxlan_metadata *md, bool xnet) 1678 1668 { 1679 1669 struct vxlanhdr *vxh; 1680 1670 int min_headroom; ··· 1740 1696 1741 1697 vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh)); 1742 1698 vxh->vx_flags = htonl(VXLAN_HF_VNI); 1743 - vxh->vx_vni = vni; 1699 + vxh->vx_vni = md->vni; 1744 1700 1745 1701 if (type & SKB_GSO_TUNNEL_REMCSUM) { 1746 1702 u32 data = (skb_checksum_start_offset(skb) - hdrlen) >> ··· 1758 1714 } 1759 1715 } 1760 1716 1717 + if (vs->flags & VXLAN_F_GBP) 1718 + vxlan_build_gbp_hdr(vxh, vs, md); 1719 + 1761 1720 skb_set_inner_protocol(skb, htons(ETH_P_TEB)); 1762 1721 1763 1722 udp_tunnel6_xmit_skb(vs->sock, dst, skb, dev, saddr, daddr, prio, ··· 1775 1728 int vxlan_xmit_skb(struct vxlan_sock *vs, 1776 1729 struct rtable *rt, struct sk_buff *skb, 1777 1730 __be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df, 1778 - __be16 src_port, __be16 dst_port, __be32 vni, bool xnet) 1731 + __be16 src_port, __be16 dst_port, 1732 + struct vxlan_metadata *md, bool xnet) 1779 1733 { 1780 1734 struct vxlanhdr *vxh; 1781 1735 int min_headroom; ··· 1819 1771 1820 1772 vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh)); 1821 1773 vxh->vx_flags = htonl(VXLAN_HF_VNI); 1822 - vxh->vx_vni = vni; 1774 + vxh->vx_vni = md->vni; 1823 1775 1824 1776 if (type & SKB_GSO_TUNNEL_REMCSUM) { 1825 1777 u32 data = (skb_checksum_start_offset(skb) - hdrlen) >> ··· 1836 1788 skb->encapsulation = 0; 1837 1789 } 1838 1790 } 1791 + 1792 + if (vs->flags & VXLAN_F_GBP) 1793 + vxlan_build_gbp_hdr(vxh, vs, md); 1839 1794 1840 1795 skb_set_inner_protocol(skb, htons(ETH_P_TEB)); 1841 1796 ··· 1900 1849 const struct iphdr *old_iph; 1901 1850 struct flowi4 fl4; 1902 1851 union vxlan_addr *dst; 1852 + struct vxlan_metadata md; 1903 1853 __be16 src_port = 0, dst_port; 1904 1854 u32 vni; 1905 1855 __be16 df = 0; ··· 1962 1910 1963 1911 ip_rt_put(rt); 1964 1912 dst_vxlan = vxlan_find_vni(vxlan->net, vni, 1965 - dst->sa.sa_family, dst_port); 1913 + dst->sa.sa_family, dst_port, 1914 + vxlan->flags); 1966 1915 if (!dst_vxlan) 1967 1916 goto tx_error; 1968 1917 vxlan_encap_bypass(skb, vxlan, dst_vxlan); ··· 1972 1919 1973 1920 tos = ip_tunnel_ecn_encap(tos, old_iph, skb); 1974 1921 ttl = ttl ? : ip4_dst_hoplimit(&rt->dst); 1922 + md.vni = htonl(vni << 8); 1923 + md.gbp = skb->mark; 1975 1924 1976 1925 err = vxlan_xmit_skb(vxlan->vn_sock, rt, skb, 1977 1926 fl4.saddr, dst->sin.sin_addr.s_addr, 1978 - tos, ttl, df, src_port, dst_port, 1979 - htonl(vni << 8), 1927 + tos, ttl, df, src_port, dst_port, &md, 1980 1928 !net_eq(vxlan->net, dev_net(vxlan->dev))); 1981 1929 if (err < 0) { 1982 1930 /* skb is already freed. */ ··· 2022 1968 2023 1969 dst_release(ndst); 2024 1970 dst_vxlan = vxlan_find_vni(vxlan->net, vni, 2025 - dst->sa.sa_family, dst_port); 1971 + dst->sa.sa_family, dst_port, 1972 + vxlan->flags); 2026 1973 if (!dst_vxlan) 2027 1974 goto tx_error; 2028 1975 vxlan_encap_bypass(skb, vxlan, dst_vxlan); ··· 2031 1976 } 2032 1977 2033 1978 ttl = ttl ? : ip6_dst_hoplimit(ndst); 1979 + md.vni = htonl(vni << 8); 1980 + md.gbp = skb->mark; 2034 1981 2035 1982 err = vxlan6_xmit_skb(vxlan->vn_sock, ndst, skb, 2036 1983 dev, &fl6.saddr, &fl6.daddr, 0, ttl, 2037 - src_port, dst_port, htonl(vni << 8), 1984 + src_port, dst_port, &md, 2038 1985 !net_eq(vxlan->net, dev_net(vxlan->dev))); 2039 1986 #endif 2040 1987 } ··· 2193 2136 2194 2137 spin_lock(&vn->sock_lock); 2195 2138 vs = vxlan_find_sock(vxlan->net, ipv6 ? AF_INET6 : AF_INET, 2196 - vxlan->dst_port); 2139 + vxlan->dst_port, vxlan->flags); 2197 2140 if (vs && atomic_add_unless(&vs->refcnt, 1, 0)) { 2198 2141 /* If we have a socket with same port already, reuse it */ 2199 2142 vxlan_vs_add_dev(vs, vxlan); ··· 2439 2382 [IFLA_VXLAN_UDP_ZERO_CSUM6_RX] = { .type = NLA_U8 }, 2440 2383 [IFLA_VXLAN_REMCSUM_TX] = { .type = NLA_U8 }, 2441 2384 [IFLA_VXLAN_REMCSUM_RX] = { .type = NLA_U8 }, 2385 + [IFLA_VXLAN_GBP] = { .type = NLA_FLAG, }, 2442 2386 }; 2443 2387 2444 2388 static int vxlan_validate(struct nlattr *tb[], struct nlattr *data[]) ··· 2600 2542 return vs; 2601 2543 2602 2544 spin_lock(&vn->sock_lock); 2603 - vs = vxlan_find_sock(net, ipv6 ? AF_INET6 : AF_INET, port); 2545 + vs = vxlan_find_sock(net, ipv6 ? AF_INET6 : AF_INET, port, flags); 2604 2546 if (vs && ((vs->rcv != rcv) || 2605 2547 !atomic_add_unless(&vs->refcnt, 1, 0))) 2606 2548 vs = ERR_PTR(-EBUSY); ··· 2764 2706 nla_get_u8(data[IFLA_VXLAN_REMCSUM_RX])) 2765 2707 vxlan->flags |= VXLAN_F_REMCSUM_RX; 2766 2708 2709 + if (data[IFLA_VXLAN_GBP]) 2710 + vxlan->flags |= VXLAN_F_GBP; 2711 + 2767 2712 if (vxlan_find_vni(net, vni, use_ipv6 ? AF_INET6 : AF_INET, 2768 - vxlan->dst_port)) { 2713 + vxlan->dst_port, vxlan->flags)) { 2769 2714 pr_info("duplicate VNI %u\n", vni); 2770 2715 return -EEXIST; 2771 2716 } ··· 2910 2849 goto nla_put_failure; 2911 2850 2912 2851 if (nla_put(skb, IFLA_VXLAN_PORT_RANGE, sizeof(ports), &ports)) 2852 + goto nla_put_failure; 2853 + 2854 + if (vxlan->flags & VXLAN_F_GBP && 2855 + nla_put_flag(skb, IFLA_VXLAN_GBP)) 2913 2856 goto nla_put_failure; 2914 2857 2915 2858 return 0;
+4 -1
include/net/ip_tunnels.h
··· 97 97 #define TUNNEL_DONT_FRAGMENT __cpu_to_be16(0x0100) 98 98 #define TUNNEL_OAM __cpu_to_be16(0x0200) 99 99 #define TUNNEL_CRIT_OPT __cpu_to_be16(0x0400) 100 - #define TUNNEL_OPTIONS_PRESENT __cpu_to_be16(0x0800) 100 + #define TUNNEL_GENEVE_OPT __cpu_to_be16(0x0800) 101 + #define TUNNEL_VXLAN_OPT __cpu_to_be16(0x1000) 102 + 103 + #define TUNNEL_OPTIONS_PRESENT (TUNNEL_GENEVE_OPT | TUNNEL_VXLAN_OPT) 101 104 102 105 struct tnl_ptk_info { 103 106 __be16 flags;
+77 -5
include/net/vxlan.h
··· 11 11 #define VNI_HASH_BITS 10 12 12 #define VNI_HASH_SIZE (1<<VNI_HASH_BITS) 13 13 14 - /* VXLAN protocol header */ 14 + /* 15 + * VXLAN Group Based Policy Extension: 16 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 17 + * |1|-|-|-|1|-|-|-|R|D|R|R|A|R|R|R| Group Policy ID | 18 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 19 + * | VXLAN Network Identifier (VNI) | Reserved | 20 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 21 + * 22 + * D = Don't Learn bit. When set, this bit indicates that the egress 23 + * VTEP MUST NOT learn the source address of the encapsulated frame. 24 + * 25 + * A = Indicates that the group policy has already been applied to 26 + * this packet. Policies MUST NOT be applied by devices when the 27 + * A bit is set. 28 + * 29 + * [0] https://tools.ietf.org/html/draft-smith-vxlan-group-policy 30 + */ 31 + struct vxlanhdr_gbp { 32 + __u8 vx_flags; 33 + #ifdef __LITTLE_ENDIAN_BITFIELD 34 + __u8 reserved_flags1:3, 35 + policy_applied:1, 36 + reserved_flags2:2, 37 + dont_learn:1, 38 + reserved_flags3:1; 39 + #elif defined(__BIG_ENDIAN_BITFIELD) 40 + __u8 reserved_flags1:1, 41 + dont_learn:1, 42 + reserved_flags2:2, 43 + policy_applied:1, 44 + reserved_flags3:3; 45 + #else 46 + #error "Please fix <asm/byteorder.h>" 47 + #endif 48 + __be16 policy_id; 49 + __be32 vx_vni; 50 + }; 51 + 52 + #define VXLAN_GBP_USED_BITS (VXLAN_HF_GBP | 0xFFFFFF) 53 + 54 + /* skb->mark mapping 55 + * 56 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 57 + * |R|R|R|R|R|R|R|R|R|D|R|R|A|R|R|R| Group Policy ID | 58 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 59 + */ 60 + #define VXLAN_GBP_DONT_LEARN (BIT(6) << 16) 61 + #define VXLAN_GBP_POLICY_APPLIED (BIT(3) << 16) 62 + #define VXLAN_GBP_ID_MASK (0xFFFF) 63 + 64 + /* VXLAN protocol header: 65 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 66 + * |G|R|R|R|I|R|R|C| Reserved | 67 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 68 + * | VXLAN Network Identifier (VNI) | Reserved | 69 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 70 + * 71 + * G = 1 Group Policy (VXLAN-GBP) 72 + * I = 1 VXLAN Network Identifier (VNI) present 73 + * C = 1 Remote checksum offload (RCO) 74 + */ 15 75 struct vxlanhdr { 16 76 __be32 vx_flags; 17 77 __be32 vx_vni; 18 78 }; 19 79 20 80 /* VXLAN header flags. */ 21 - #define VXLAN_HF_VNI 0x08000000 22 - #define VXLAN_HF_RCO 0x00200000 81 + #define VXLAN_HF_RCO BIT(24) 82 + #define VXLAN_HF_VNI BIT(27) 83 + #define VXLAN_HF_GBP BIT(31) 23 84 24 85 /* Remote checksum offload header option */ 25 86 #define VXLAN_RCO_MASK 0x7f /* Last byte of vni field */ ··· 93 32 #define VXLAN_VID_MASK (VXLAN_N_VID - 1) 94 33 #define VXLAN_HLEN (sizeof(struct udphdr) + sizeof(struct vxlanhdr)) 95 34 35 + struct vxlan_metadata { 36 + __be32 vni; 37 + u32 gbp; 38 + }; 39 + 96 40 struct vxlan_sock; 97 - typedef void (vxlan_rcv_t)(struct vxlan_sock *vh, struct sk_buff *skb, __be32 key); 41 + typedef void (vxlan_rcv_t)(struct vxlan_sock *vh, struct sk_buff *skb, 42 + struct vxlan_metadata *md); 98 43 99 44 /* per UDP socket information */ 100 45 struct vxlan_sock { ··· 127 60 #define VXLAN_F_UDP_ZERO_CSUM6_RX 0x100 128 61 #define VXLAN_F_REMCSUM_TX 0x200 129 62 #define VXLAN_F_REMCSUM_RX 0x400 63 + #define VXLAN_F_GBP 0x800 64 + 65 + /* These flags must match in order for a socket to be shareable */ 66 + #define VXLAN_F_UNSHAREABLE VXLAN_F_GBP 130 67 131 68 struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port, 132 69 vxlan_rcv_t *rcv, void *data, ··· 141 70 int vxlan_xmit_skb(struct vxlan_sock *vs, 142 71 struct rtable *rt, struct sk_buff *skb, 143 72 __be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df, 144 - __be16 src_port, __be16 dst_port, __be32 vni, bool xnet); 73 + __be16 src_port, __be16 dst_port, struct vxlan_metadata *md, 74 + bool xnet); 145 75 146 76 static inline netdev_features_t vxlan_features_check(struct sk_buff *skb, 147 77 netdev_features_t features)
+1
include/uapi/linux/if_link.h
··· 372 372 IFLA_VXLAN_UDP_ZERO_CSUM6_RX, 373 373 IFLA_VXLAN_REMCSUM_TX, 374 374 IFLA_VXLAN_REMCSUM_RX, 375 + IFLA_VXLAN_GBP, 375 376 __IFLA_VXLAN_MAX 376 377 }; 377 378 #define IFLA_VXLAN_MAX (__IFLA_VXLAN_MAX - 1)
+11
include/uapi/linux/openvswitch.h
··· 252 252 253 253 #define OVS_VPORT_ATTR_MAX (__OVS_VPORT_ATTR_MAX - 1) 254 254 255 + enum { 256 + OVS_VXLAN_EXT_UNSPEC, 257 + OVS_VXLAN_EXT_GBP, /* Flag or __u32 */ 258 + __OVS_VXLAN_EXT_MAX, 259 + }; 260 + 261 + #define OVS_VXLAN_EXT_MAX (__OVS_VXLAN_EXT_MAX - 1) 262 + 263 + 255 264 /* OVS_VPORT_ATTR_OPTIONS attributes for tunnels. 256 265 */ 257 266 enum { 258 267 OVS_TUNNEL_ATTR_UNSPEC, 259 268 OVS_TUNNEL_ATTR_DST_PORT, /* 16-bit UDP port, used by L4 tunnels. */ 269 + OVS_TUNNEL_ATTR_EXTENSION, 260 270 __OVS_TUNNEL_ATTR_MAX 261 271 }; 262 272 ··· 338 328 OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS, /* Array of Geneve options. */ 339 329 OVS_TUNNEL_KEY_ATTR_TP_SRC, /* be16 src Transport Port. */ 340 330 OVS_TUNNEL_KEY_ATTR_TP_DST, /* be16 dst Transport Port. */ 331 + OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS, /* Nested OVS_VXLAN_EXT_* */ 341 332 __OVS_TUNNEL_KEY_ATTR_MAX 342 333 }; 343 334
+1 -1
net/openvswitch/flow.c
··· 691 691 BUILD_BUG_ON((1 << (sizeof(tun_info->options_len) * 692 692 8)) - 1 693 693 > sizeof(key->tun_opts)); 694 - memcpy(GENEVE_OPTS(key, tun_info->options_len), 694 + memcpy(TUN_METADATA_OPTS(key, tun_info->options_len), 695 695 tun_info->options, tun_info->options_len); 696 696 key->tun_opts_len = tun_info->options_len; 697 697 } else {
+7 -7
net/openvswitch/flow.h
··· 53 53 54 54 struct ovs_tunnel_info { 55 55 struct ovs_key_ipv4_tunnel tunnel; 56 - const struct geneve_opt *options; 56 + const void *options; 57 57 u8 options_len; 58 58 }; 59 59 ··· 61 61 * maximum size. This allows us to get the benefits of variable length 62 62 * matching for small options. 63 63 */ 64 - #define GENEVE_OPTS(flow_key, opt_len) \ 65 - ((struct geneve_opt *)((flow_key)->tun_opts + \ 66 - FIELD_SIZEOF(struct sw_flow_key, tun_opts) - \ 67 - opt_len)) 64 + #define TUN_METADATA_OFFSET(opt_len) \ 65 + (FIELD_SIZEOF(struct sw_flow_key, tun_opts) - opt_len) 66 + #define TUN_METADATA_OPTS(flow_key, opt_len) \ 67 + ((void *)((flow_key)->tun_opts + TUN_METADATA_OFFSET(opt_len))) 68 68 69 69 static inline void __ovs_flow_tun_info_init(struct ovs_tunnel_info *tun_info, 70 70 __be32 saddr, __be32 daddr, ··· 73 73 __be16 tp_dst, 74 74 __be64 tun_id, 75 75 __be16 tun_flags, 76 - const struct geneve_opt *opts, 76 + const void *opts, 77 77 u8 opts_len) 78 78 { 79 79 tun_info->tunnel.tun_id = tun_id; ··· 105 105 __be16 tp_dst, 106 106 __be64 tun_id, 107 107 __be16 tun_flags, 108 - const struct geneve_opt *opts, 108 + const void *opts, 109 109 u8 opts_len) 110 110 { 111 111 __ovs_flow_tun_info_init(tun_info, iph->saddr, iph->daddr,
+193 -93
net/openvswitch/flow_netlink.c
··· 49 49 #include <net/mpls.h> 50 50 51 51 #include "flow_netlink.h" 52 + #include "vport-vxlan.h" 53 + 54 + struct ovs_len_tbl { 55 + int len; 56 + const struct ovs_len_tbl *next; 57 + }; 58 + 59 + #define OVS_ATTR_NESTED -1 52 60 53 61 static void update_range(struct sw_flow_match *match, 54 62 size_t offset, size_t size, bool is_mask) ··· 269 261 + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_CSUM */ 270 262 + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_OAM */ 271 263 + nla_total_size(256) /* OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS */ 264 + /* OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS is mutually exclusive with 265 + * OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS and covered by it. 266 + */ 272 267 + nla_total_size(2) /* OVS_TUNNEL_KEY_ATTR_TP_SRC */ 273 268 + nla_total_size(2); /* OVS_TUNNEL_KEY_ATTR_TP_DST */ 274 269 } ··· 300 289 + nla_total_size(28); /* OVS_KEY_ATTR_ND */ 301 290 } 302 291 292 + static const struct ovs_len_tbl ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1] = { 293 + [OVS_TUNNEL_KEY_ATTR_ID] = { .len = sizeof(u64) }, 294 + [OVS_TUNNEL_KEY_ATTR_IPV4_SRC] = { .len = sizeof(u32) }, 295 + [OVS_TUNNEL_KEY_ATTR_IPV4_DST] = { .len = sizeof(u32) }, 296 + [OVS_TUNNEL_KEY_ATTR_TOS] = { .len = 1 }, 297 + [OVS_TUNNEL_KEY_ATTR_TTL] = { .len = 1 }, 298 + [OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT] = { .len = 0 }, 299 + [OVS_TUNNEL_KEY_ATTR_CSUM] = { .len = 0 }, 300 + [OVS_TUNNEL_KEY_ATTR_TP_SRC] = { .len = sizeof(u16) }, 301 + [OVS_TUNNEL_KEY_ATTR_TP_DST] = { .len = sizeof(u16) }, 302 + [OVS_TUNNEL_KEY_ATTR_OAM] = { .len = 0 }, 303 + [OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS] = { .len = OVS_ATTR_NESTED }, 304 + [OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS] = { .len = OVS_ATTR_NESTED }, 305 + }; 306 + 303 307 /* The size of the argument for each %OVS_KEY_ATTR_* Netlink attribute. */ 304 - static const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = { 305 - [OVS_KEY_ATTR_ENCAP] = -1, 306 - [OVS_KEY_ATTR_PRIORITY] = sizeof(u32), 307 - [OVS_KEY_ATTR_IN_PORT] = sizeof(u32), 308 - [OVS_KEY_ATTR_SKB_MARK] = sizeof(u32), 309 - [OVS_KEY_ATTR_ETHERNET] = sizeof(struct ovs_key_ethernet), 310 - [OVS_KEY_ATTR_VLAN] = sizeof(__be16), 311 - [OVS_KEY_ATTR_ETHERTYPE] = sizeof(__be16), 312 - [OVS_KEY_ATTR_IPV4] = sizeof(struct ovs_key_ipv4), 313 - [OVS_KEY_ATTR_IPV6] = sizeof(struct ovs_key_ipv6), 314 - [OVS_KEY_ATTR_TCP] = sizeof(struct ovs_key_tcp), 315 - [OVS_KEY_ATTR_TCP_FLAGS] = sizeof(__be16), 316 - [OVS_KEY_ATTR_UDP] = sizeof(struct ovs_key_udp), 317 - [OVS_KEY_ATTR_SCTP] = sizeof(struct ovs_key_sctp), 318 - [OVS_KEY_ATTR_ICMP] = sizeof(struct ovs_key_icmp), 319 - [OVS_KEY_ATTR_ICMPV6] = sizeof(struct ovs_key_icmpv6), 320 - [OVS_KEY_ATTR_ARP] = sizeof(struct ovs_key_arp), 321 - [OVS_KEY_ATTR_ND] = sizeof(struct ovs_key_nd), 322 - [OVS_KEY_ATTR_RECIRC_ID] = sizeof(u32), 323 - [OVS_KEY_ATTR_DP_HASH] = sizeof(u32), 324 - [OVS_KEY_ATTR_TUNNEL] = -1, 325 - [OVS_KEY_ATTR_MPLS] = sizeof(struct ovs_key_mpls), 308 + static const struct ovs_len_tbl ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = { 309 + [OVS_KEY_ATTR_ENCAP] = { .len = OVS_ATTR_NESTED }, 310 + [OVS_KEY_ATTR_PRIORITY] = { .len = sizeof(u32) }, 311 + [OVS_KEY_ATTR_IN_PORT] = { .len = sizeof(u32) }, 312 + [OVS_KEY_ATTR_SKB_MARK] = { .len = sizeof(u32) }, 313 + [OVS_KEY_ATTR_ETHERNET] = { .len = sizeof(struct ovs_key_ethernet) }, 314 + [OVS_KEY_ATTR_VLAN] = { .len = sizeof(__be16) }, 315 + [OVS_KEY_ATTR_ETHERTYPE] = { .len = sizeof(__be16) }, 316 + [OVS_KEY_ATTR_IPV4] = { .len = sizeof(struct ovs_key_ipv4) }, 317 + [OVS_KEY_ATTR_IPV6] = { .len = sizeof(struct ovs_key_ipv6) }, 318 + [OVS_KEY_ATTR_TCP] = { .len = sizeof(struct ovs_key_tcp) }, 319 + [OVS_KEY_ATTR_TCP_FLAGS] = { .len = sizeof(__be16) }, 320 + [OVS_KEY_ATTR_UDP] = { .len = sizeof(struct ovs_key_udp) }, 321 + [OVS_KEY_ATTR_SCTP] = { .len = sizeof(struct ovs_key_sctp) }, 322 + [OVS_KEY_ATTR_ICMP] = { .len = sizeof(struct ovs_key_icmp) }, 323 + [OVS_KEY_ATTR_ICMPV6] = { .len = sizeof(struct ovs_key_icmpv6) }, 324 + [OVS_KEY_ATTR_ARP] = { .len = sizeof(struct ovs_key_arp) }, 325 + [OVS_KEY_ATTR_ND] = { .len = sizeof(struct ovs_key_nd) }, 326 + [OVS_KEY_ATTR_RECIRC_ID] = { .len = sizeof(u32) }, 327 + [OVS_KEY_ATTR_DP_HASH] = { .len = sizeof(u32) }, 328 + [OVS_KEY_ATTR_TUNNEL] = { .len = OVS_ATTR_NESTED, 329 + .next = ovs_tunnel_key_lens, }, 330 + [OVS_KEY_ATTR_MPLS] = { .len = sizeof(struct ovs_key_mpls) }, 326 331 }; 327 332 328 333 static bool is_all_zero(const u8 *fp, size_t size) ··· 379 352 return -EINVAL; 380 353 } 381 354 382 - expected_len = ovs_key_lens[type]; 383 - if (nla_len(nla) != expected_len && expected_len != -1) { 355 + expected_len = ovs_key_lens[type].len; 356 + if (nla_len(nla) != expected_len && expected_len != OVS_ATTR_NESTED) { 384 357 OVS_NLERR(log, "Key %d has unexpected len %d expected %d", 385 358 type, nla_len(nla), expected_len); 386 359 return -EINVAL; ··· 459 432 SW_FLOW_KEY_PUT(match, tun_opts_len, 0xff, true); 460 433 } 461 434 462 - opt_key_offset = (unsigned long)GENEVE_OPTS((struct sw_flow_key *)0, 463 - nla_len(a)); 435 + opt_key_offset = TUN_METADATA_OFFSET(nla_len(a)); 464 436 SW_FLOW_KEY_MEMCPY_OFFSET(match, opt_key_offset, nla_data(a), 465 437 nla_len(a), is_mask); 438 + return 0; 439 + } 440 + 441 + static const struct nla_policy vxlan_opt_policy[OVS_VXLAN_EXT_MAX + 1] = { 442 + [OVS_VXLAN_EXT_GBP] = { .type = NLA_U32 }, 443 + }; 444 + 445 + static int vxlan_tun_opt_from_nlattr(const struct nlattr *a, 446 + struct sw_flow_match *match, bool is_mask, 447 + bool log) 448 + { 449 + struct nlattr *tb[OVS_VXLAN_EXT_MAX+1]; 450 + unsigned long opt_key_offset; 451 + struct ovs_vxlan_opts opts; 452 + int err; 453 + 454 + BUILD_BUG_ON(sizeof(opts) > sizeof(match->key->tun_opts)); 455 + 456 + err = nla_parse_nested(tb, OVS_VXLAN_EXT_MAX, a, vxlan_opt_policy); 457 + if (err < 0) 458 + return err; 459 + 460 + memset(&opts, 0, sizeof(opts)); 461 + 462 + if (tb[OVS_VXLAN_EXT_GBP]) 463 + opts.gbp = nla_get_u32(tb[OVS_VXLAN_EXT_GBP]); 464 + 465 + if (!is_mask) 466 + SW_FLOW_KEY_PUT(match, tun_opts_len, sizeof(opts), false); 467 + else 468 + SW_FLOW_KEY_PUT(match, tun_opts_len, 0xff, true); 469 + 470 + opt_key_offset = TUN_METADATA_OFFSET(sizeof(opts)); 471 + SW_FLOW_KEY_MEMCPY_OFFSET(match, opt_key_offset, &opts, sizeof(opts), 472 + is_mask); 466 473 return 0; 467 474 } 468 475 ··· 508 447 int rem; 509 448 bool ttl = false; 510 449 __be16 tun_flags = 0; 450 + int opts_type = 0; 511 451 512 452 nla_for_each_nested(a, attr, rem) { 513 453 int type = nla_type(a); 514 454 int err; 515 - 516 - static const u32 ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1] = { 517 - [OVS_TUNNEL_KEY_ATTR_ID] = sizeof(u64), 518 - [OVS_TUNNEL_KEY_ATTR_IPV4_SRC] = sizeof(u32), 519 - [OVS_TUNNEL_KEY_ATTR_IPV4_DST] = sizeof(u32), 520 - [OVS_TUNNEL_KEY_ATTR_TOS] = 1, 521 - [OVS_TUNNEL_KEY_ATTR_TTL] = 1, 522 - [OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT] = 0, 523 - [OVS_TUNNEL_KEY_ATTR_CSUM] = 0, 524 - [OVS_TUNNEL_KEY_ATTR_TP_SRC] = sizeof(u16), 525 - [OVS_TUNNEL_KEY_ATTR_TP_DST] = sizeof(u16), 526 - [OVS_TUNNEL_KEY_ATTR_OAM] = 0, 527 - [OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS] = -1, 528 - }; 529 455 530 456 if (type > OVS_TUNNEL_KEY_ATTR_MAX) { 531 457 OVS_NLERR(log, "Tunnel attr %d out of range max %d", ··· 520 472 return -EINVAL; 521 473 } 522 474 523 - if (ovs_tunnel_key_lens[type] != nla_len(a) && 524 - ovs_tunnel_key_lens[type] != -1) { 475 + if (ovs_tunnel_key_lens[type].len != nla_len(a) && 476 + ovs_tunnel_key_lens[type].len != OVS_ATTR_NESTED) { 525 477 OVS_NLERR(log, "Tunnel attr %d has unexpected len %d expected %d", 526 - type, nla_len(a), ovs_tunnel_key_lens[type]); 478 + type, nla_len(a), ovs_tunnel_key_lens[type].len); 527 479 return -EINVAL; 528 480 } 529 481 ··· 568 520 tun_flags |= TUNNEL_OAM; 569 521 break; 570 522 case OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS: 523 + if (opts_type) { 524 + OVS_NLERR(log, "Multiple metadata blocks provided"); 525 + return -EINVAL; 526 + } 527 + 571 528 err = genev_tun_opt_from_nlattr(a, match, is_mask, log); 572 529 if (err) 573 530 return err; 574 531 575 - tun_flags |= TUNNEL_OPTIONS_PRESENT; 532 + tun_flags |= TUNNEL_GENEVE_OPT; 533 + opts_type = type; 534 + break; 535 + case OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS: 536 + if (opts_type) { 537 + OVS_NLERR(log, "Multiple metadata blocks provided"); 538 + return -EINVAL; 539 + } 540 + 541 + err = vxlan_tun_opt_from_nlattr(a, match, is_mask, log); 542 + if (err) 543 + return err; 544 + 545 + tun_flags |= TUNNEL_VXLAN_OPT; 546 + opts_type = type; 576 547 break; 577 548 default: 578 549 OVS_NLERR(log, "Unknown IPv4 tunnel attribute %d", ··· 620 553 } 621 554 } 622 555 556 + return opts_type; 557 + } 558 + 559 + static int vxlan_opt_to_nlattr(struct sk_buff *skb, 560 + const void *tun_opts, int swkey_tun_opts_len) 561 + { 562 + const struct ovs_vxlan_opts *opts = tun_opts; 563 + struct nlattr *nla; 564 + 565 + nla = nla_nest_start(skb, OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS); 566 + if (!nla) 567 + return -EMSGSIZE; 568 + 569 + if (nla_put_u32(skb, OVS_VXLAN_EXT_GBP, opts->gbp) < 0) 570 + return -EMSGSIZE; 571 + 572 + nla_nest_end(skb, nla); 623 573 return 0; 624 574 } 625 575 626 576 static int __ipv4_tun_to_nlattr(struct sk_buff *skb, 627 577 const struct ovs_key_ipv4_tunnel *output, 628 - const struct geneve_opt *tun_opts, 629 - int swkey_tun_opts_len) 578 + const void *tun_opts, int swkey_tun_opts_len) 630 579 { 631 580 if (output->tun_flags & TUNNEL_KEY && 632 581 nla_put_be64(skb, OVS_TUNNEL_KEY_ATTR_ID, output->tun_id)) ··· 673 590 if ((output->tun_flags & TUNNEL_OAM) && 674 591 nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_OAM)) 675 592 return -EMSGSIZE; 676 - if (tun_opts && 677 - nla_put(skb, OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS, 678 - swkey_tun_opts_len, tun_opts)) 679 - return -EMSGSIZE; 593 + if (tun_opts) { 594 + if (output->tun_flags & TUNNEL_GENEVE_OPT && 595 + nla_put(skb, OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS, 596 + swkey_tun_opts_len, tun_opts)) 597 + return -EMSGSIZE; 598 + else if (output->tun_flags & TUNNEL_VXLAN_OPT && 599 + vxlan_opt_to_nlattr(skb, tun_opts, swkey_tun_opts_len)) 600 + return -EMSGSIZE; 601 + } 680 602 681 603 return 0; 682 604 } 683 605 684 606 static int ipv4_tun_to_nlattr(struct sk_buff *skb, 685 607 const struct ovs_key_ipv4_tunnel *output, 686 - const struct geneve_opt *tun_opts, 687 - int swkey_tun_opts_len) 608 + const void *tun_opts, int swkey_tun_opts_len) 688 609 { 689 610 struct nlattr *nla; 690 611 int err; ··· 762 675 } 763 676 if (*attrs & (1 << OVS_KEY_ATTR_TUNNEL)) { 764 677 if (ipv4_tun_from_nlattr(a[OVS_KEY_ATTR_TUNNEL], match, 765 - is_mask, log)) 678 + is_mask, log) < 0) 766 679 return -EINVAL; 767 680 *attrs &= ~(1 << OVS_KEY_ATTR_TUNNEL); 768 681 } ··· 1002 915 return 0; 1003 916 } 1004 917 1005 - static void nlattr_set(struct nlattr *attr, u8 val, bool is_attr_mask_key) 918 + static void nlattr_set(struct nlattr *attr, u8 val, 919 + const struct ovs_len_tbl *tbl) 1006 920 { 1007 921 struct nlattr *nla; 1008 922 int rem; 1009 923 1010 924 /* The nlattr stream should already have been validated */ 1011 925 nla_for_each_nested(nla, attr, rem) { 1012 - /* We assume that ovs_key_lens[type] == -1 means that type is a 1013 - * nested attribute 1014 - */ 1015 - if (is_attr_mask_key && ovs_key_lens[nla_type(nla)] == -1) 1016 - nlattr_set(nla, val, false); 926 + if (tbl && tbl[nla_type(nla)].len == OVS_ATTR_NESTED) 927 + nlattr_set(nla, val, tbl[nla_type(nla)].next); 1017 928 else 1018 929 memset(nla_data(nla), val, nla_len(nla)); 1019 930 } ··· 1019 934 1020 935 static void mask_set_nlattr(struct nlattr *attr, u8 val) 1021 936 { 1022 - nlattr_set(attr, val, true); 937 + nlattr_set(attr, val, ovs_key_lens); 1023 938 } 1024 939 1025 940 /** ··· 1233 1148 goto nla_put_failure; 1234 1149 1235 1150 if ((swkey->tun_key.ipv4_dst || is_mask)) { 1236 - const struct geneve_opt *opts = NULL; 1151 + const void *opts = NULL; 1237 1152 1238 1153 if (output->tun_key.tun_flags & TUNNEL_OPTIONS_PRESENT) 1239 - opts = GENEVE_OPTS(output, swkey->tun_opts_len); 1154 + opts = TUN_METADATA_OPTS(output, swkey->tun_opts_len); 1240 1155 1241 1156 if (ipv4_tun_to_nlattr(skb, &output->tun_key, opts, 1242 1157 swkey->tun_opts_len)) ··· 1625 1540 } 1626 1541 } 1627 1542 1543 + static int validate_geneve_opts(struct sw_flow_key *key) 1544 + { 1545 + struct geneve_opt *option; 1546 + int opts_len = key->tun_opts_len; 1547 + bool crit_opt = false; 1548 + 1549 + option = (struct geneve_opt *)TUN_METADATA_OPTS(key, key->tun_opts_len); 1550 + while (opts_len > 0) { 1551 + int len; 1552 + 1553 + if (opts_len < sizeof(*option)) 1554 + return -EINVAL; 1555 + 1556 + len = sizeof(*option) + option->length * 4; 1557 + if (len > opts_len) 1558 + return -EINVAL; 1559 + 1560 + crit_opt |= !!(option->type & GENEVE_CRIT_OPT_TYPE); 1561 + 1562 + option = (struct geneve_opt *)((u8 *)option + len); 1563 + opts_len -= len; 1564 + }; 1565 + 1566 + key->tun_key.tun_flags |= crit_opt ? TUNNEL_CRIT_OPT : 0; 1567 + 1568 + return 0; 1569 + } 1570 + 1628 1571 static int validate_and_copy_set_tun(const struct nlattr *attr, 1629 1572 struct sw_flow_actions **sfa, bool log) 1630 1573 { ··· 1660 1547 struct sw_flow_key key; 1661 1548 struct ovs_tunnel_info *tun_info; 1662 1549 struct nlattr *a; 1663 - int err, start; 1550 + int err, start, opts_type; 1664 1551 1665 1552 ovs_match_init(&match, &key, NULL); 1666 - err = ipv4_tun_from_nlattr(nla_data(attr), &match, false, log); 1667 - if (err) 1668 - return err; 1553 + opts_type = ipv4_tun_from_nlattr(nla_data(attr), &match, false, log); 1554 + if (opts_type < 0) 1555 + return opts_type; 1669 1556 1670 1557 if (key.tun_opts_len) { 1671 - struct geneve_opt *option = GENEVE_OPTS(&key, 1672 - key.tun_opts_len); 1673 - int opts_len = key.tun_opts_len; 1674 - bool crit_opt = false; 1675 - 1676 - while (opts_len > 0) { 1677 - int len; 1678 - 1679 - if (opts_len < sizeof(*option)) 1680 - return -EINVAL; 1681 - 1682 - len = sizeof(*option) + option->length * 4; 1683 - if (len > opts_len) 1684 - return -EINVAL; 1685 - 1686 - crit_opt |= !!(option->type & GENEVE_CRIT_OPT_TYPE); 1687 - 1688 - option = (struct geneve_opt *)((u8 *)option + len); 1689 - opts_len -= len; 1690 - }; 1691 - 1692 - key.tun_key.tun_flags |= crit_opt ? TUNNEL_CRIT_OPT : 0; 1558 + switch (opts_type) { 1559 + case OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS: 1560 + err = validate_geneve_opts(&key); 1561 + if (err < 0) 1562 + return err; 1563 + break; 1564 + case OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS: 1565 + break; 1566 + } 1693 1567 }; 1694 1568 1695 1569 start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SET, log); ··· 1697 1597 * everything else will go away after flow setup. We can append 1698 1598 * it to tun_info and then point there. 1699 1599 */ 1700 - memcpy((tun_info + 1), GENEVE_OPTS(&key, key.tun_opts_len), 1701 - key.tun_opts_len); 1702 - tun_info->options = (struct geneve_opt *)(tun_info + 1); 1600 + memcpy((tun_info + 1), 1601 + TUN_METADATA_OPTS(&key, key.tun_opts_len), key.tun_opts_len); 1602 + tun_info->options = (tun_info + 1); 1703 1603 } else { 1704 1604 tun_info->options = NULL; 1705 1605 } ··· 1722 1622 return -EINVAL; 1723 1623 1724 1624 if (key_type > OVS_KEY_ATTR_MAX || 1725 - (ovs_key_lens[key_type] != nla_len(ovs_key) && 1726 - ovs_key_lens[key_type] != -1)) 1625 + (ovs_key_lens[key_type].len != nla_len(ovs_key) && 1626 + ovs_key_lens[key_type].len != OVS_ATTR_NESTED)) 1727 1627 return -EINVAL; 1728 1628 1729 1629 switch (key_type) {
+11 -4
net/openvswitch/vport-geneve.c
··· 88 88 89 89 opts_len = geneveh->opt_len * 4; 90 90 91 - flags = TUNNEL_KEY | TUNNEL_OPTIONS_PRESENT | 91 + flags = TUNNEL_KEY | TUNNEL_GENEVE_OPT | 92 92 (udp_hdr(skb)->check != 0 ? TUNNEL_CSUM : 0) | 93 93 (geneveh->oam ? TUNNEL_OAM : 0) | 94 94 (geneveh->critical ? TUNNEL_CRIT_OPT : 0); ··· 178 178 __be16 sport; 179 179 struct rtable *rt; 180 180 struct flowi4 fl; 181 - u8 vni[3]; 181 + u8 vni[3], opts_len, *opts; 182 182 __be16 df; 183 183 int err; 184 184 ··· 200 200 tunnel_id_to_vni(tun_key->tun_id, vni); 201 201 skb->ignore_df = 1; 202 202 203 + if (tun_key->tun_flags & TUNNEL_GENEVE_OPT) { 204 + opts = (u8 *)tun_info->options; 205 + opts_len = tun_info->options_len; 206 + } else { 207 + opts = NULL; 208 + opts_len = 0; 209 + } 210 + 203 211 err = geneve_xmit_skb(geneve_port->gs, rt, skb, fl.saddr, 204 212 tun_key->ipv4_dst, tun_key->ipv4_tos, 205 213 tun_key->ipv4_ttl, df, sport, dport, 206 - tun_key->tun_flags, vni, 207 - tun_info->options_len, (u8 *)tun_info->options, 214 + tun_key->tun_flags, vni, opts_len, opts, 208 215 false); 209 216 if (err < 0) 210 217 ip_rt_put(rt);
+86 -5
net/openvswitch/vport-vxlan.c
··· 40 40 41 41 #include "datapath.h" 42 42 #include "vport.h" 43 + #include "vport-vxlan.h" 43 44 44 45 /** 45 46 * struct vxlan_port - Keeps track of open UDP ports ··· 50 49 struct vxlan_port { 51 50 struct vxlan_sock *vs; 52 51 char name[IFNAMSIZ]; 52 + u32 exts; /* VXLAN_F_* in <net/vxlan.h> */ 53 53 }; 54 54 55 55 static struct vport_ops ovs_vxlan_vport_ops; ··· 61 59 } 62 60 63 61 /* Called with rcu_read_lock and BH disabled. */ 64 - static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb, __be32 vx_vni) 62 + static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb, 63 + struct vxlan_metadata *md) 65 64 { 66 65 struct ovs_tunnel_info tun_info; 66 + struct vxlan_port *vxlan_port; 67 67 struct vport *vport = vs->data; 68 68 struct iphdr *iph; 69 + struct ovs_vxlan_opts opts = { 70 + .gbp = md->gbp, 71 + }; 69 72 __be64 key; 73 + __be16 flags; 74 + 75 + flags = TUNNEL_KEY; 76 + vxlan_port = vxlan_vport(vport); 77 + if (vxlan_port->exts & VXLAN_F_GBP) 78 + flags |= TUNNEL_VXLAN_OPT; 70 79 71 80 /* Save outer tunnel values */ 72 81 iph = ip_hdr(skb); 73 - key = cpu_to_be64(ntohl(vx_vni) >> 8); 82 + key = cpu_to_be64(ntohl(md->vni) >> 8); 74 83 ovs_flow_tun_info_init(&tun_info, iph, 75 84 udp_hdr(skb)->source, udp_hdr(skb)->dest, 76 - key, TUNNEL_KEY, NULL, 0); 85 + key, flags, &opts, sizeof(opts)); 77 86 78 87 ovs_vport_receive(vport, skb, &tun_info); 79 88 } ··· 96 83 97 84 if (nla_put_u16(skb, OVS_TUNNEL_ATTR_DST_PORT, ntohs(dst_port))) 98 85 return -EMSGSIZE; 86 + 87 + if (vxlan_port->exts) { 88 + struct nlattr *exts; 89 + 90 + exts = nla_nest_start(skb, OVS_TUNNEL_ATTR_EXTENSION); 91 + if (!exts) 92 + return -EMSGSIZE; 93 + 94 + if (vxlan_port->exts & VXLAN_F_GBP && 95 + nla_put_flag(skb, OVS_VXLAN_EXT_GBP)) 96 + return -EMSGSIZE; 97 + 98 + nla_nest_end(skb, exts); 99 + } 100 + 99 101 return 0; 100 102 } 101 103 ··· 121 93 vxlan_sock_release(vxlan_port->vs); 122 94 123 95 ovs_vport_deferred_free(vport); 96 + } 97 + 98 + static const struct nla_policy exts_policy[OVS_VXLAN_EXT_MAX+1] = { 99 + [OVS_VXLAN_EXT_GBP] = { .type = NLA_FLAG, }, 100 + }; 101 + 102 + static int vxlan_configure_exts(struct vport *vport, struct nlattr *attr) 103 + { 104 + struct nlattr *exts[OVS_VXLAN_EXT_MAX+1]; 105 + struct vxlan_port *vxlan_port; 106 + int err; 107 + 108 + if (nla_len(attr) < sizeof(struct nlattr)) 109 + return -EINVAL; 110 + 111 + err = nla_parse_nested(exts, OVS_VXLAN_EXT_MAX, attr, exts_policy); 112 + if (err < 0) 113 + return err; 114 + 115 + vxlan_port = vxlan_vport(vport); 116 + 117 + if (exts[OVS_VXLAN_EXT_GBP]) 118 + vxlan_port->exts |= VXLAN_F_GBP; 119 + 120 + return 0; 124 121 } 125 122 126 123 static struct vport *vxlan_tnl_create(const struct vport_parms *parms) ··· 180 127 vxlan_port = vxlan_vport(vport); 181 128 strncpy(vxlan_port->name, parms->name, IFNAMSIZ); 182 129 183 - vs = vxlan_sock_add(net, htons(dst_port), vxlan_rcv, vport, true, 0); 130 + a = nla_find_nested(options, OVS_TUNNEL_ATTR_EXTENSION); 131 + if (a) { 132 + err = vxlan_configure_exts(vport, a); 133 + if (err) { 134 + ovs_vport_free(vport); 135 + goto error; 136 + } 137 + } 138 + 139 + vs = vxlan_sock_add(net, htons(dst_port), vxlan_rcv, vport, true, 140 + vxlan_port->exts); 184 141 if (IS_ERR(vs)) { 185 142 ovs_vport_free(vport); 186 143 return (void *)vs; ··· 203 140 return ERR_PTR(err); 204 141 } 205 142 143 + static int vxlan_ext_gbp(struct sk_buff *skb) 144 + { 145 + const struct ovs_tunnel_info *tun_info; 146 + const struct ovs_vxlan_opts *opts; 147 + 148 + tun_info = OVS_CB(skb)->egress_tun_info; 149 + opts = tun_info->options; 150 + 151 + if (tun_info->tunnel.tun_flags & TUNNEL_VXLAN_OPT && 152 + tun_info->options_len >= sizeof(*opts)) 153 + return opts->gbp; 154 + else 155 + return 0; 156 + } 157 + 206 158 static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb) 207 159 { 208 160 struct net *net = ovs_dp_get_net(vport->dp); 209 161 struct vxlan_port *vxlan_port = vxlan_vport(vport); 210 162 __be16 dst_port = inet_sk(vxlan_port->vs->sock->sk)->inet_sport; 211 163 const struct ovs_key_ipv4_tunnel *tun_key; 164 + struct vxlan_metadata md = {0}; 212 165 struct rtable *rt; 213 166 struct flowi4 fl; 214 167 __be16 src_port; ··· 249 170 skb->ignore_df = 1; 250 171 251 172 src_port = udp_flow_src_port(net, skb, 0, 0, true); 173 + md.vni = htonl(be64_to_cpu(tun_key->tun_id) << 8); 174 + md.gbp = vxlan_ext_gbp(skb); 252 175 253 176 err = vxlan_xmit_skb(vxlan_port->vs, rt, skb, 254 177 fl.saddr, tun_key->ipv4_dst, 255 178 tun_key->ipv4_tos, tun_key->ipv4_ttl, df, 256 179 src_port, dst_port, 257 - htonl(be64_to_cpu(tun_key->tun_id) << 8), 180 + &md, 258 181 false); 259 182 if (err < 0) 260 183 ip_rt_put(rt);
+11
net/openvswitch/vport-vxlan.h
··· 1 + #ifndef VPORT_VXLAN_H 2 + #define VPORT_VXLAN_H 1 3 + 4 + #include <linux/kernel.h> 5 + #include <linux/types.h> 6 + 7 + struct ovs_vxlan_opts { 8 + __u32 gbp; 9 + }; 10 + 11 + #endif