Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Change in Openvswitch to support MPLS label depth of 3 in ingress direction

The openvswitch was supporting a MPLS label depth of 1 in the ingress
direction though the userspace OVS supports a max depth of 3 labels.
This change enables openvswitch module to support a max depth of
3 labels in the ingress.

Signed-off-by: Martin Varghese <martin.varghese@nokia.com>
Acked-by: Pravin B Shelar <pshelar@ovn.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Martin Varghese and committed by
David S. Miller
fbdcdd78 a5ec6516

+85 -33
+1 -1
net/openvswitch/actions.c
··· 200 200 if (err) 201 201 return err; 202 202 203 - flow_key->mpls.top_lse = lse; 203 + flow_key->mpls.lse[0] = lse; 204 204 return 0; 205 205 } 206 206
+14 -6
net/openvswitch/flow.c
··· 637 637 memset(&key->ipv4, 0, sizeof(key->ipv4)); 638 638 } 639 639 } else if (eth_p_mpls(key->eth.type)) { 640 - size_t stack_len = MPLS_HLEN; 640 + u8 label_count = 1; 641 641 642 + memset(&key->mpls, 0, sizeof(key->mpls)); 642 643 skb_set_inner_network_header(skb, skb->mac_len); 643 644 while (1) { 644 645 __be32 lse; 645 646 646 - error = check_header(skb, skb->mac_len + stack_len); 647 + error = check_header(skb, skb->mac_len + 648 + label_count * MPLS_HLEN); 647 649 if (unlikely(error)) 648 650 return 0; 649 651 650 652 memcpy(&lse, skb_inner_network_header(skb), MPLS_HLEN); 651 653 652 - if (stack_len == MPLS_HLEN) 653 - memcpy(&key->mpls.top_lse, &lse, MPLS_HLEN); 654 + if (label_count <= MPLS_LABEL_DEPTH) 655 + memcpy(&key->mpls.lse[label_count - 1], &lse, 656 + MPLS_HLEN); 654 657 655 - skb_set_inner_network_header(skb, skb->mac_len + stack_len); 658 + skb_set_inner_network_header(skb, skb->mac_len + 659 + label_count * MPLS_HLEN); 656 660 if (lse & htonl(MPLS_LS_S_MASK)) 657 661 break; 658 662 659 - stack_len += MPLS_HLEN; 663 + label_count++; 660 664 } 665 + if (label_count > MPLS_LABEL_DEPTH) 666 + label_count = MPLS_LABEL_DEPTH; 667 + 668 + key->mpls.num_labels_mask = GENMASK(label_count - 1, 0); 661 669 } else if (key->eth.type == htons(ETH_P_IPV6)) { 662 670 int nh_len; /* IPv6 Header + Extensions */ 663 671
+6 -3
net/openvswitch/flow.h
··· 30 30 MAC_PROTO_ETHERNET, 31 31 }; 32 32 #define SW_FLOW_KEY_INVALID 0x80 33 + #define MPLS_LABEL_DEPTH 3 33 34 34 35 /* Store options at the end of the array if they are less than the 35 36 * maximum size. This allows us to get the benefits of variable length ··· 86 85 */ 87 86 union { 88 87 struct { 89 - __be32 top_lse; /* top label stack entry */ 90 - } mpls; 91 - struct { 92 88 u8 proto; /* IP protocol or lower 8 bits of ARP opcode. */ 93 89 u8 tos; /* IP ToS. */ 94 90 u8 ttl; /* IP TTL/hop limit. */ ··· 133 135 } nd; 134 136 }; 135 137 } ipv6; 138 + struct { 139 + u32 num_labels_mask; /* labels present bitmap of effective length MPLS_LABEL_DEPTH */ 140 + __be32 lse[MPLS_LABEL_DEPTH]; /* label stack entry */ 141 + } mpls; 142 + 136 143 struct ovs_key_nsh nsh; /* network service header */ 137 144 }; 138 145 struct {
+64 -23
net/openvswitch/flow_netlink.c
··· 424 424 [OVS_KEY_ATTR_DP_HASH] = { .len = sizeof(u32) }, 425 425 [OVS_KEY_ATTR_TUNNEL] = { .len = OVS_ATTR_NESTED, 426 426 .next = ovs_tunnel_key_lens, }, 427 - [OVS_KEY_ATTR_MPLS] = { .len = sizeof(struct ovs_key_mpls) }, 427 + [OVS_KEY_ATTR_MPLS] = { .len = OVS_ATTR_VARIABLE }, 428 428 [OVS_KEY_ATTR_CT_STATE] = { .len = sizeof(u32) }, 429 429 [OVS_KEY_ATTR_CT_ZONE] = { .len = sizeof(u16) }, 430 430 [OVS_KEY_ATTR_CT_MARK] = { .len = sizeof(u32) }, ··· 1628 1628 1629 1629 if (attrs & (1 << OVS_KEY_ATTR_MPLS)) { 1630 1630 const struct ovs_key_mpls *mpls_key; 1631 + u32 hdr_len; 1632 + u32 label_count, label_count_mask, i; 1631 1633 1632 1634 mpls_key = nla_data(a[OVS_KEY_ATTR_MPLS]); 1633 - SW_FLOW_KEY_PUT(match, mpls.top_lse, 1634 - mpls_key->mpls_lse, is_mask); 1635 + hdr_len = nla_len(a[OVS_KEY_ATTR_MPLS]); 1636 + label_count = hdr_len / sizeof(struct ovs_key_mpls); 1637 + 1638 + if (label_count == 0 || label_count > MPLS_LABEL_DEPTH || 1639 + hdr_len % sizeof(struct ovs_key_mpls)) 1640 + return -EINVAL; 1641 + 1642 + label_count_mask = GENMASK(label_count - 1, 0); 1643 + 1644 + for (i = 0 ; i < label_count; i++) 1645 + SW_FLOW_KEY_PUT(match, mpls.lse[i], 1646 + mpls_key[i].mpls_lse, is_mask); 1647 + 1648 + SW_FLOW_KEY_PUT(match, mpls.num_labels_mask, 1649 + label_count_mask, is_mask); 1635 1650 1636 1651 attrs &= ~(1 << OVS_KEY_ATTR_MPLS); 1637 1652 } ··· 2129 2114 ether_addr_copy(arp_key->arp_sha, output->ipv4.arp.sha); 2130 2115 ether_addr_copy(arp_key->arp_tha, output->ipv4.arp.tha); 2131 2116 } else if (eth_p_mpls(swkey->eth.type)) { 2117 + u8 i, num_labels; 2132 2118 struct ovs_key_mpls *mpls_key; 2133 2119 2134 - nla = nla_reserve(skb, OVS_KEY_ATTR_MPLS, sizeof(*mpls_key)); 2120 + num_labels = hweight_long(output->mpls.num_labels_mask); 2121 + nla = nla_reserve(skb, OVS_KEY_ATTR_MPLS, 2122 + num_labels * sizeof(*mpls_key)); 2135 2123 if (!nla) 2136 2124 goto nla_put_failure; 2125 + 2137 2126 mpls_key = nla_data(nla); 2138 - mpls_key->mpls_lse = output->mpls.top_lse; 2127 + for (i = 0; i < num_labels; i++) 2128 + mpls_key[i].mpls_lse = output->mpls.lse[i]; 2139 2129 } 2140 2130 2141 2131 if ((swkey->eth.type == htons(ETH_P_IP) || ··· 2426 2406 static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, 2427 2407 const struct sw_flow_key *key, 2428 2408 struct sw_flow_actions **sfa, 2429 - __be16 eth_type, __be16 vlan_tci, bool log); 2409 + __be16 eth_type, __be16 vlan_tci, 2410 + u32 mpls_label_count, bool log); 2430 2411 2431 2412 static int validate_and_copy_sample(struct net *net, const struct nlattr *attr, 2432 2413 const struct sw_flow_key *key, 2433 2414 struct sw_flow_actions **sfa, 2434 2415 __be16 eth_type, __be16 vlan_tci, 2435 - bool log, bool last) 2416 + u32 mpls_label_count, bool log, bool last) 2436 2417 { 2437 2418 const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1]; 2438 2419 const struct nlattr *probability, *actions; ··· 2484 2463 return err; 2485 2464 2486 2465 err = __ovs_nla_copy_actions(net, actions, key, sfa, 2487 - eth_type, vlan_tci, log); 2466 + eth_type, vlan_tci, mpls_label_count, log); 2488 2467 2489 2468 if (err) 2490 2469 return err; ··· 2499 2478 const struct sw_flow_key *key, 2500 2479 struct sw_flow_actions **sfa, 2501 2480 __be16 eth_type, __be16 vlan_tci, 2502 - bool log, bool last) 2481 + u32 mpls_label_count, bool log, bool last) 2503 2482 { 2504 2483 int start, err; 2505 2484 u32 exec; ··· 2519 2498 return err; 2520 2499 2521 2500 err = __ovs_nla_copy_actions(net, attr, key, sfa, 2522 - eth_type, vlan_tci, log); 2501 + eth_type, vlan_tci, mpls_label_count, log); 2523 2502 if (err) 2524 2503 return err; 2525 2504 ··· 2885 2864 const struct sw_flow_key *key, 2886 2865 struct sw_flow_actions **sfa, 2887 2866 __be16 eth_type, __be16 vlan_tci, 2867 + u32 mpls_label_count, 2888 2868 bool log, bool last) 2889 2869 { 2890 2870 const struct nlattr *acts_if_greater, *acts_if_lesser_eq; ··· 2934 2912 return nested_acts_start; 2935 2913 2936 2914 err = __ovs_nla_copy_actions(net, acts_if_lesser_eq, key, sfa, 2937 - eth_type, vlan_tci, log); 2915 + eth_type, vlan_tci, mpls_label_count, log); 2938 2916 2939 2917 if (err) 2940 2918 return err; ··· 2947 2925 return nested_acts_start; 2948 2926 2949 2927 err = __ovs_nla_copy_actions(net, acts_if_greater, key, sfa, 2950 - eth_type, vlan_tci, log); 2928 + eth_type, vlan_tci, mpls_label_count, log); 2951 2929 2952 2930 if (err) 2953 2931 return err; ··· 2974 2952 static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, 2975 2953 const struct sw_flow_key *key, 2976 2954 struct sw_flow_actions **sfa, 2977 - __be16 eth_type, __be16 vlan_tci, bool log) 2955 + __be16 eth_type, __be16 vlan_tci, 2956 + u32 mpls_label_count, bool log) 2978 2957 { 2979 2958 u8 mac_proto = ovs_key_mac_proto(key); 2980 2959 const struct nlattr *a; ··· 3088 3065 !eth_p_mpls(eth_type))) 3089 3066 return -EINVAL; 3090 3067 eth_type = mpls->mpls_ethertype; 3068 + mpls_label_count++; 3091 3069 break; 3092 3070 } 3093 3071 3094 - case OVS_ACTION_ATTR_POP_MPLS: 3072 + case OVS_ACTION_ATTR_POP_MPLS: { 3073 + __be16 proto; 3095 3074 if (vlan_tci & htons(VLAN_CFI_MASK) || 3096 3075 !eth_p_mpls(eth_type)) 3097 3076 return -EINVAL; 3098 3077 3099 - /* Disallow subsequent L2.5+ set and mpls_pop actions 3100 - * as there is no check here to ensure that the new 3101 - * eth_type is valid and thus set actions could 3102 - * write off the end of the packet or otherwise 3103 - * corrupt it. 3078 + /* Disallow subsequent L2.5+ set actions and mpls_pop 3079 + * actions once the last MPLS label in the packet is 3080 + * is popped as there is no check here to ensure that 3081 + * the new eth type is valid and thus set actions could 3082 + * write off the end of the packet or otherwise corrupt 3083 + * it. 3104 3084 * 3105 3085 * Support for these actions is planned using packet 3106 3086 * recirculation. 3107 3087 */ 3108 - eth_type = htons(0); 3088 + proto = nla_get_be16(a); 3089 + mpls_label_count--; 3090 + 3091 + if (!eth_p_mpls(proto) || !mpls_label_count) 3092 + eth_type = htons(0); 3093 + else 3094 + eth_type = proto; 3095 + 3109 3096 break; 3097 + } 3110 3098 3111 3099 case OVS_ACTION_ATTR_SET: 3112 3100 err = validate_set(a, key, sfa, ··· 3140 3106 3141 3107 err = validate_and_copy_sample(net, a, key, sfa, 3142 3108 eth_type, vlan_tci, 3109 + mpls_label_count, 3143 3110 log, last); 3144 3111 if (err) 3145 3112 return err; ··· 3211 3176 3212 3177 err = validate_and_copy_clone(net, a, key, sfa, 3213 3178 eth_type, vlan_tci, 3179 + mpls_label_count, 3214 3180 log, last); 3215 3181 if (err) 3216 3182 return err; ··· 3224 3188 3225 3189 err = validate_and_copy_check_pkt_len(net, a, key, sfa, 3226 3190 eth_type, 3227 - vlan_tci, log, 3228 - last); 3191 + vlan_tci, 3192 + mpls_label_count, 3193 + log, last); 3229 3194 if (err) 3230 3195 return err; 3231 3196 skip_copy = true; ··· 3256 3219 struct sw_flow_actions **sfa, bool log) 3257 3220 { 3258 3221 int err; 3222 + u32 mpls_label_count = 0; 3259 3223 3260 3224 *sfa = nla_alloc_flow_actions(min(nla_len(attr), MAX_ACTIONS_BUFSIZE)); 3261 3225 if (IS_ERR(*sfa)) 3262 3226 return PTR_ERR(*sfa); 3263 3227 3228 + if (eth_p_mpls(key->eth.type)) 3229 + mpls_label_count = hweight_long(key->mpls.num_labels_mask); 3230 + 3264 3231 (*sfa)->orig_len = nla_len(attr); 3265 3232 err = __ovs_nla_copy_actions(net, attr, key, sfa, key->eth.type, 3266 - key->eth.vlan.tci, log); 3233 + key->eth.vlan.tci, mpls_label_count, log); 3267 3234 if (err) 3268 3235 ovs_nla_free_flow_actions(*sfa); 3269 3236