Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

openvswitch: enable NSH support

v16->17
- Fixed disputed check code: keep them in nsh_push and nsh_pop
but also add them in __ovs_nla_copy_actions

v15->v16
- Add csum recalculation for nsh_push, nsh_pop and set_nsh
pointed out by Pravin
- Move nsh key into the union with ipv4 and ipv6 and add
check for nsh key in match_validate pointed out by Pravin
- Add nsh check in validate_set and __ovs_nla_copy_actions

v14->v15
- Check size in nsh_hdr_from_nlattr
- Fixed four small issues pointed out By Jiri and Eric

v13->v14
- Rename skb_push_nsh to nsh_push per Dave's comment
- Rename skb_pop_nsh to nsh_pop per Dave's comment

v12->v13
- Fix NSH header length check in set_nsh

v11->v12
- Fix missing changes old comments pointed out
- Fix new comments for v11

v10->v11
- Fix the left three disputable comments for v9
but not fixed in v10.

v9->v10
- Change struct ovs_key_nsh to
struct ovs_nsh_key_base base;
__be32 context[NSH_MD1_CONTEXT_SIZE];
- Fix new comments for v9

v8->v9
- Fix build error reported by daily intel build
because nsh module isn't selected by openvswitch

v7->v8
- Rework nested value and mask for OVS_KEY_ATTR_NSH
- Change pop_nsh to adapt to nsh kernel module
- Fix many issues per comments from Jiri Benc

v6->v7
- Remove NSH GSO patches in v6 because Jiri Benc
reworked it as another patch series and they have
been merged.
- Change it to adapt to nsh kernel module added by NSH
GSO patch series

v5->v6
- Fix the rest comments for v4.
- Add NSH GSO support for VxLAN-gpe + NSH and
Eth + NSH.

v4->v5
- Fix many comments by Jiri Benc and Eric Garver
for v4.

v3->v4
- Add new NSH match field ttl
- Update NSH header to the latest format
which will be final format and won't change
per its author's confirmation.
- Fix comments for v3.

v2->v3
- Change OVS_KEY_ATTR_NSH to nested key to handle
length-fixed attributes and length-variable
attriubte more flexibly.
- Remove struct ovs_action_push_nsh completely
- Add code to handle nested attribute for SET_MASKED
- Change PUSH_NSH to use the nested OVS_KEY_ATTR_NSH
to transfer NSH header data.
- Fix comments and coding style issues by Jiri and Eric

v1->v2
- Change encap_nsh and decap_nsh to push_nsh and pop_nsh
- Dynamically allocate struct ovs_action_push_nsh for
length-variable metadata.

OVS master and 2.8 branch has merged NSH userspace
patch series, this patch is to enable NSH support
in kernel data path in order that OVS can support
NSH in compat mode by porting this.

Signed-off-by: Yi Yang <yi.y.yang@intel.com>
Acked-by: Jiri Benc <jbenc@redhat.com>
Acked-by: Eric Garver <e@erig.me>
Acked-by: Pravin Shelar <pshelar@ovn.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Yi Yang and committed by
David S. Miller
b2d0f5d5 7f5d3f27

+613 -2
+3
include/net/nsh.h
··· 304 304 NSH_FLAGS_MASK | NSH_TTL_MASK | NSH_LEN_MASK); 305 305 } 306 306 307 + int nsh_push(struct sk_buff *skb, const struct nshhdr *pushed_nh); 308 + int nsh_pop(struct sk_buff *skb); 309 + 307 310 #endif /* __NET_NSH_H */
+29
include/uapi/linux/openvswitch.h
··· 336 336 OVS_KEY_ATTR_CT_LABELS, /* 16-octet connection tracking label */ 337 337 OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4, /* struct ovs_key_ct_tuple_ipv4 */ 338 338 OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6, /* struct ovs_key_ct_tuple_ipv6 */ 339 + OVS_KEY_ATTR_NSH, /* Nested set of ovs_nsh_key_* */ 339 340 340 341 #ifdef __KERNEL__ 341 342 OVS_KEY_ATTR_TUNNEL_INFO, /* struct ip_tunnel_info */ ··· 494 493 __be16 src_port; 495 494 __be16 dst_port; 496 495 __u8 ipv6_proto; 496 + }; 497 + 498 + enum ovs_nsh_key_attr { 499 + OVS_NSH_KEY_ATTR_UNSPEC, 500 + OVS_NSH_KEY_ATTR_BASE, /* struct ovs_nsh_key_base. */ 501 + OVS_NSH_KEY_ATTR_MD1, /* struct ovs_nsh_key_md1. */ 502 + OVS_NSH_KEY_ATTR_MD2, /* variable-length octets for MD type 2. */ 503 + __OVS_NSH_KEY_ATTR_MAX 504 + }; 505 + 506 + #define OVS_NSH_KEY_ATTR_MAX (__OVS_NSH_KEY_ATTR_MAX - 1) 507 + 508 + struct ovs_nsh_key_base { 509 + __u8 flags; 510 + __u8 ttl; 511 + __u8 mdtype; 512 + __u8 np; 513 + __be32 path_hdr; 514 + }; 515 + 516 + #define NSH_MD1_CONTEXT_SIZE 4 517 + 518 + struct ovs_nsh_key_md1 { 519 + __be32 context[NSH_MD1_CONTEXT_SIZE]; 497 520 }; 498 521 499 522 /** ··· 836 811 * @OVS_ACTION_ATTR_POP_ETH: Pop the outermost Ethernet header off the 837 812 * packet. 838 813 * @OVS_ACTION_ATTR_CT_CLEAR: Clear conntrack state from the packet. 814 + * @OVS_ACTION_ATTR_PUSH_NSH: push NSH header to the packet. 815 + * @OVS_ACTION_ATTR_POP_NSH: pop the outermost NSH header off the packet. 839 816 * 840 817 * Only a single header can be set with a single %OVS_ACTION_ATTR_SET. Not all 841 818 * fields within a header are modifiable, e.g. the IPv4 protocol and fragment ··· 868 841 OVS_ACTION_ATTR_PUSH_ETH, /* struct ovs_action_push_eth. */ 869 842 OVS_ACTION_ATTR_POP_ETH, /* No argument. */ 870 843 OVS_ACTION_ATTR_CT_CLEAR, /* No argument. */ 844 + OVS_ACTION_ATTR_PUSH_NSH, /* Nested OVS_NSH_KEY_ATTR_*. */ 845 + OVS_ACTION_ATTR_POP_NSH, /* No argument. */ 871 846 872 847 __OVS_ACTION_ATTR_MAX, /* Nothing past this will be accepted 873 848 * from userspace. */
+60
net/nsh/nsh.c
··· 14 14 #include <net/nsh.h> 15 15 #include <net/tun_proto.h> 16 16 17 + int nsh_push(struct sk_buff *skb, const struct nshhdr *pushed_nh) 18 + { 19 + struct nshhdr *nh; 20 + size_t length = nsh_hdr_len(pushed_nh); 21 + u8 next_proto; 22 + 23 + if (skb->mac_len) { 24 + next_proto = TUN_P_ETHERNET; 25 + } else { 26 + next_proto = tun_p_from_eth_p(skb->protocol); 27 + if (!next_proto) 28 + return -EAFNOSUPPORT; 29 + } 30 + 31 + /* Add the NSH header */ 32 + if (skb_cow_head(skb, length) < 0) 33 + return -ENOMEM; 34 + 35 + skb_push(skb, length); 36 + nh = (struct nshhdr *)(skb->data); 37 + memcpy(nh, pushed_nh, length); 38 + nh->np = next_proto; 39 + skb_postpush_rcsum(skb, nh, length); 40 + 41 + skb->protocol = htons(ETH_P_NSH); 42 + skb_reset_mac_header(skb); 43 + skb_reset_network_header(skb); 44 + skb_reset_mac_len(skb); 45 + 46 + return 0; 47 + } 48 + EXPORT_SYMBOL_GPL(nsh_push); 49 + 50 + int nsh_pop(struct sk_buff *skb) 51 + { 52 + struct nshhdr *nh; 53 + size_t length; 54 + __be16 inner_proto; 55 + 56 + if (!pskb_may_pull(skb, NSH_BASE_HDR_LEN)) 57 + return -ENOMEM; 58 + nh = (struct nshhdr *)(skb->data); 59 + length = nsh_hdr_len(nh); 60 + inner_proto = tun_p_to_eth_p(nh->np); 61 + if (!pskb_may_pull(skb, length)) 62 + return -ENOMEM; 63 + 64 + if (!inner_proto) 65 + return -EAFNOSUPPORT; 66 + 67 + skb_pull_rcsum(skb, length); 68 + skb_reset_mac_header(skb); 69 + skb_reset_network_header(skb); 70 + skb_reset_mac_len(skb); 71 + skb->protocol = inner_proto; 72 + 73 + return 0; 74 + } 75 + EXPORT_SYMBOL_GPL(nsh_pop); 76 + 17 77 static struct sk_buff *nsh_gso_segment(struct sk_buff *skb, 18 78 netdev_features_t features) 19 79 {
+1
net/openvswitch/Kconfig
··· 14 14 select MPLS 15 15 select NET_MPLS_GSO 16 16 select DST_CACHE 17 + select NET_NSH 17 18 ---help--- 18 19 Open vSwitch is a multilayer Ethernet switch targeted at virtualized 19 20 environments. In addition to supporting a variety of features
+116
net/openvswitch/actions.c
··· 43 43 #include "flow.h" 44 44 #include "conntrack.h" 45 45 #include "vport.h" 46 + #include "flow_netlink.h" 46 47 47 48 struct deferred_action { 48 49 struct sk_buff *skb; ··· 381 380 return 0; 382 381 } 383 382 383 + static int push_nsh(struct sk_buff *skb, struct sw_flow_key *key, 384 + const struct nshhdr *nh) 385 + { 386 + int err; 387 + 388 + err = nsh_push(skb, nh); 389 + if (err) 390 + return err; 391 + 392 + /* safe right before invalidate_flow_key */ 393 + key->mac_proto = MAC_PROTO_NONE; 394 + invalidate_flow_key(key); 395 + return 0; 396 + } 397 + 398 + static int pop_nsh(struct sk_buff *skb, struct sw_flow_key *key) 399 + { 400 + int err; 401 + 402 + err = nsh_pop(skb); 403 + if (err) 404 + return err; 405 + 406 + /* safe right before invalidate_flow_key */ 407 + if (skb->protocol == htons(ETH_P_TEB)) 408 + key->mac_proto = MAC_PROTO_ETHERNET; 409 + else 410 + key->mac_proto = MAC_PROTO_NONE; 411 + invalidate_flow_key(key); 412 + return 0; 413 + } 414 + 384 415 static void update_ip_l4_checksum(struct sk_buff *skb, struct iphdr *nh, 385 416 __be32 addr, __be32 new_addr) 386 417 { ··· 632 599 mask->ipv6_hlimit); 633 600 flow_key->ip.ttl = nh->hop_limit; 634 601 } 602 + return 0; 603 + } 604 + 605 + static int set_nsh(struct sk_buff *skb, struct sw_flow_key *flow_key, 606 + const struct nlattr *a) 607 + { 608 + struct nshhdr *nh; 609 + size_t length; 610 + int err; 611 + u8 flags; 612 + u8 ttl; 613 + int i; 614 + 615 + struct ovs_key_nsh key; 616 + struct ovs_key_nsh mask; 617 + 618 + err = nsh_key_from_nlattr(a, &key, &mask); 619 + if (err) 620 + return err; 621 + 622 + /* Make sure the NSH base header is there */ 623 + if (!pskb_may_pull(skb, skb_network_offset(skb) + NSH_BASE_HDR_LEN)) 624 + return -ENOMEM; 625 + 626 + nh = nsh_hdr(skb); 627 + length = nsh_hdr_len(nh); 628 + 629 + /* Make sure the whole NSH header is there */ 630 + err = skb_ensure_writable(skb, skb_network_offset(skb) + 631 + length); 632 + if (unlikely(err)) 633 + return err; 634 + 635 + nh = nsh_hdr(skb); 636 + skb_postpull_rcsum(skb, nh, length); 637 + flags = nsh_get_flags(nh); 638 + flags = OVS_MASKED(flags, key.base.flags, mask.base.flags); 639 + flow_key->nsh.base.flags = flags; 640 + ttl = nsh_get_ttl(nh); 641 + ttl = OVS_MASKED(ttl, key.base.ttl, mask.base.ttl); 642 + flow_key->nsh.base.ttl = ttl; 643 + nsh_set_flags_and_ttl(nh, flags, ttl); 644 + nh->path_hdr = OVS_MASKED(nh->path_hdr, key.base.path_hdr, 645 + mask.base.path_hdr); 646 + flow_key->nsh.base.path_hdr = nh->path_hdr; 647 + switch (nh->mdtype) { 648 + case NSH_M_TYPE1: 649 + for (i = 0; i < NSH_MD1_CONTEXT_SIZE; i++) { 650 + nh->md1.context[i] = 651 + OVS_MASKED(nh->md1.context[i], key.context[i], 652 + mask.context[i]); 653 + } 654 + memcpy(flow_key->nsh.context, nh->md1.context, 655 + sizeof(nh->md1.context)); 656 + break; 657 + case NSH_M_TYPE2: 658 + memset(flow_key->nsh.context, 0, 659 + sizeof(flow_key->nsh.context)); 660 + break; 661 + default: 662 + return -EINVAL; 663 + } 664 + skb_postpush_rcsum(skb, nh, length); 635 665 return 0; 636 666 } 637 667 ··· 1120 1024 get_mask(a, struct ovs_key_ethernet *)); 1121 1025 break; 1122 1026 1027 + case OVS_KEY_ATTR_NSH: 1028 + err = set_nsh(skb, flow_key, a); 1029 + break; 1030 + 1123 1031 case OVS_KEY_ATTR_IPV4: 1124 1032 err = set_ipv4(skb, flow_key, nla_data(a), 1125 1033 get_mask(a, struct ovs_key_ipv4 *)); ··· 1313 1213 1314 1214 case OVS_ACTION_ATTR_POP_ETH: 1315 1215 err = pop_eth(skb, key); 1216 + break; 1217 + 1218 + case OVS_ACTION_ATTR_PUSH_NSH: { 1219 + u8 buffer[NSH_HDR_MAX_LEN]; 1220 + struct nshhdr *nh = (struct nshhdr *)buffer; 1221 + 1222 + err = nsh_hdr_from_nlattr(nla_data(a), nh, 1223 + NSH_HDR_MAX_LEN); 1224 + if (unlikely(err)) 1225 + break; 1226 + err = push_nsh(skb, key, nh); 1227 + break; 1228 + } 1229 + 1230 + case OVS_ACTION_ATTR_POP_NSH: 1231 + err = pop_nsh(skb, key); 1316 1232 break; 1317 1233 } 1318 1234
+51
net/openvswitch/flow.c
··· 46 46 #include <net/ipv6.h> 47 47 #include <net/mpls.h> 48 48 #include <net/ndisc.h> 49 + #include <net/nsh.h> 49 50 50 51 #include "conntrack.h" 51 52 #include "datapath.h" ··· 491 490 return 0; 492 491 } 493 492 493 + static int parse_nsh(struct sk_buff *skb, struct sw_flow_key *key) 494 + { 495 + struct nshhdr *nh; 496 + unsigned int nh_ofs = skb_network_offset(skb); 497 + u8 version, length; 498 + int err; 499 + 500 + err = check_header(skb, nh_ofs + NSH_BASE_HDR_LEN); 501 + if (unlikely(err)) 502 + return err; 503 + 504 + nh = nsh_hdr(skb); 505 + version = nsh_get_ver(nh); 506 + length = nsh_hdr_len(nh); 507 + 508 + if (version != 0) 509 + return -EINVAL; 510 + 511 + err = check_header(skb, nh_ofs + length); 512 + if (unlikely(err)) 513 + return err; 514 + 515 + nh = nsh_hdr(skb); 516 + key->nsh.base.flags = nsh_get_flags(nh); 517 + key->nsh.base.ttl = nsh_get_ttl(nh); 518 + key->nsh.base.mdtype = nh->mdtype; 519 + key->nsh.base.np = nh->np; 520 + key->nsh.base.path_hdr = nh->path_hdr; 521 + switch (key->nsh.base.mdtype) { 522 + case NSH_M_TYPE1: 523 + if (length != NSH_M_TYPE1_LEN) 524 + return -EINVAL; 525 + memcpy(key->nsh.context, nh->md1.context, 526 + sizeof(nh->md1)); 527 + break; 528 + case NSH_M_TYPE2: 529 + memset(key->nsh.context, 0, 530 + sizeof(nh->md1)); 531 + break; 532 + default: 533 + return -EINVAL; 534 + } 535 + 536 + return 0; 537 + } 538 + 494 539 /** 495 540 * key_extract - extracts a flow key from an Ethernet frame. 496 541 * @skb: sk_buff that contains the frame, with skb->data pointing to the ··· 782 735 memset(&key->tp, 0, sizeof(key->tp)); 783 736 } 784 737 } 738 + } else if (key->eth.type == htons(ETH_P_NSH)) { 739 + error = parse_nsh(skb, key); 740 + if (error) 741 + return error; 785 742 } 786 743 return 0; 787 744 }
+7
net/openvswitch/flow.h
··· 35 35 #include <net/inet_ecn.h> 36 36 #include <net/ip_tunnels.h> 37 37 #include <net/dst_metadata.h> 38 + #include <net/nsh.h> 38 39 39 40 struct sk_buff; 40 41 ··· 66 65 #define OVS_SW_FLOW_KEY_METADATA_SIZE \ 67 66 (offsetof(struct sw_flow_key, recirc_id) + \ 68 67 FIELD_SIZEOF(struct sw_flow_key, recirc_id)) 68 + 69 + struct ovs_key_nsh { 70 + struct ovs_nsh_key_base base; 71 + __be32 context[NSH_MD1_CONTEXT_SIZE]; 72 + }; 69 73 70 74 struct sw_flow_key { 71 75 u8 tun_opts[IP_TUNNEL_OPTS_MAX]; ··· 149 143 } nd; 150 144 }; 151 145 } ipv6; 146 + struct ovs_key_nsh nsh; /* network service header */ 152 147 }; 153 148 struct { 154 149 /* Connection tracking fields not packed above. */
+341 -2
net/openvswitch/flow_netlink.c
··· 48 48 #include <net/ndisc.h> 49 49 #include <net/mpls.h> 50 50 #include <net/vxlan.h> 51 + #include <net/tun_proto.h> 51 52 #include <net/erspan.h> 52 53 53 54 #include "flow_netlink.h" ··· 81 80 case OVS_ACTION_ATTR_HASH: 82 81 case OVS_ACTION_ATTR_POP_ETH: 83 82 case OVS_ACTION_ATTR_POP_MPLS: 83 + case OVS_ACTION_ATTR_POP_NSH: 84 84 case OVS_ACTION_ATTR_POP_VLAN: 85 85 case OVS_ACTION_ATTR_PUSH_ETH: 86 86 case OVS_ACTION_ATTR_PUSH_MPLS: 87 + case OVS_ACTION_ATTR_PUSH_NSH: 87 88 case OVS_ACTION_ATTR_PUSH_VLAN: 88 89 case OVS_ACTION_ATTR_SAMPLE: 89 90 case OVS_ACTION_ATTR_SET: ··· 178 175 | (1 << OVS_KEY_ATTR_ICMPV6) 179 176 | (1 << OVS_KEY_ATTR_ARP) 180 177 | (1 << OVS_KEY_ATTR_ND) 181 - | (1 << OVS_KEY_ATTR_MPLS)); 178 + | (1 << OVS_KEY_ATTR_MPLS) 179 + | (1 << OVS_KEY_ATTR_NSH)); 182 180 183 181 /* Always allowed mask fields. */ 184 182 mask_allowed |= ((1 << OVS_KEY_ATTR_TUNNEL) ··· 288 284 } 289 285 } 290 286 287 + if (match->key->eth.type == htons(ETH_P_NSH)) { 288 + key_expected |= 1 << OVS_KEY_ATTR_NSH; 289 + if (match->mask && 290 + match->mask->key.eth.type == htons(0xffff)) { 291 + mask_allowed |= 1 << OVS_KEY_ATTR_NSH; 292 + } 293 + } 294 + 291 295 if ((key_attrs & key_expected) != key_expected) { 292 296 /* Key attributes check failed. */ 293 297 OVS_NLERR(log, "Missing key (keys=%llx, expected=%llx)", ··· 337 325 + nla_total_size(4); /* OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS */ 338 326 } 339 327 328 + size_t ovs_nsh_key_attr_size(void) 329 + { 330 + /* Whenever adding new OVS_NSH_KEY_ FIELDS, we should consider 331 + * updating this function. 332 + */ 333 + return nla_total_size(NSH_BASE_HDR_LEN) /* OVS_NSH_KEY_ATTR_BASE */ 334 + /* OVS_NSH_KEY_ATTR_MD1 and OVS_NSH_KEY_ATTR_MD2 are 335 + * mutually exclusive, so the bigger one can cover 336 + * the small one. 337 + */ 338 + + nla_total_size(NSH_CTX_HDRS_MAX_LEN); 339 + } 340 + 340 341 size_t ovs_key_attr_size(void) 341 342 { 342 343 /* Whenever adding new OVS_KEY_ FIELDS, we should consider 343 344 * updating this function. 344 345 */ 345 - BUILD_BUG_ON(OVS_KEY_ATTR_TUNNEL_INFO != 28); 346 + BUILD_BUG_ON(OVS_KEY_ATTR_TUNNEL_INFO != 29); 346 347 347 348 return nla_total_size(4) /* OVS_KEY_ATTR_PRIORITY */ 348 349 + nla_total_size(0) /* OVS_KEY_ATTR_TUNNEL */ ··· 369 344 + nla_total_size(4) /* OVS_KEY_ATTR_CT_MARK */ 370 345 + nla_total_size(16) /* OVS_KEY_ATTR_CT_LABELS */ 371 346 + nla_total_size(40) /* OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6 */ 347 + + nla_total_size(0) /* OVS_KEY_ATTR_NSH */ 348 + + ovs_nsh_key_attr_size() 372 349 + nla_total_size(12) /* OVS_KEY_ATTR_ETHERNET */ 373 350 + nla_total_size(2) /* OVS_KEY_ATTR_ETHERTYPE */ 374 351 + nla_total_size(4) /* OVS_KEY_ATTR_VLAN */ ··· 402 375 [OVS_TUNNEL_KEY_ATTR_IPV6_SRC] = { .len = sizeof(struct in6_addr) }, 403 376 [OVS_TUNNEL_KEY_ATTR_IPV6_DST] = { .len = sizeof(struct in6_addr) }, 404 377 [OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS] = { .len = sizeof(u32) }, 378 + }; 379 + 380 + static const struct ovs_len_tbl 381 + ovs_nsh_key_attr_lens[OVS_NSH_KEY_ATTR_MAX + 1] = { 382 + [OVS_NSH_KEY_ATTR_BASE] = { .len = sizeof(struct ovs_nsh_key_base) }, 383 + [OVS_NSH_KEY_ATTR_MD1] = { .len = sizeof(struct ovs_nsh_key_md1) }, 384 + [OVS_NSH_KEY_ATTR_MD2] = { .len = OVS_ATTR_VARIABLE }, 405 385 }; 406 386 407 387 /* The size of the argument for each %OVS_KEY_ATTR_* Netlink attribute. */ ··· 443 409 .len = sizeof(struct ovs_key_ct_tuple_ipv4) }, 444 410 [OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6] = { 445 411 .len = sizeof(struct ovs_key_ct_tuple_ipv6) }, 412 + [OVS_KEY_ATTR_NSH] = { .len = OVS_ATTR_NESTED, 413 + .next = ovs_nsh_key_attr_lens, }, 446 414 }; 447 415 448 416 static bool check_attr_len(unsigned int attr_len, unsigned int expected_len) ··· 1263 1227 return 0; 1264 1228 } 1265 1229 1230 + int nsh_hdr_from_nlattr(const struct nlattr *attr, 1231 + struct nshhdr *nh, size_t size) 1232 + { 1233 + struct nlattr *a; 1234 + int rem; 1235 + u8 flags = 0; 1236 + u8 ttl = 0; 1237 + int mdlen = 0; 1238 + 1239 + /* validate_nsh has check this, so we needn't do duplicate check here 1240 + */ 1241 + if (size < NSH_BASE_HDR_LEN) 1242 + return -ENOBUFS; 1243 + 1244 + nla_for_each_nested(a, attr, rem) { 1245 + int type = nla_type(a); 1246 + 1247 + switch (type) { 1248 + case OVS_NSH_KEY_ATTR_BASE: { 1249 + const struct ovs_nsh_key_base *base = nla_data(a); 1250 + 1251 + flags = base->flags; 1252 + ttl = base->ttl; 1253 + nh->np = base->np; 1254 + nh->mdtype = base->mdtype; 1255 + nh->path_hdr = base->path_hdr; 1256 + break; 1257 + } 1258 + case OVS_NSH_KEY_ATTR_MD1: 1259 + mdlen = nla_len(a); 1260 + if (mdlen > size - NSH_BASE_HDR_LEN) 1261 + return -ENOBUFS; 1262 + memcpy(&nh->md1, nla_data(a), mdlen); 1263 + break; 1264 + 1265 + case OVS_NSH_KEY_ATTR_MD2: 1266 + mdlen = nla_len(a); 1267 + if (mdlen > size - NSH_BASE_HDR_LEN) 1268 + return -ENOBUFS; 1269 + memcpy(&nh->md2, nla_data(a), mdlen); 1270 + break; 1271 + 1272 + default: 1273 + return -EINVAL; 1274 + } 1275 + } 1276 + 1277 + /* nsh header length = NSH_BASE_HDR_LEN + mdlen */ 1278 + nh->ver_flags_ttl_len = 0; 1279 + nsh_set_flags_ttl_len(nh, flags, ttl, NSH_BASE_HDR_LEN + mdlen); 1280 + 1281 + return 0; 1282 + } 1283 + 1284 + int nsh_key_from_nlattr(const struct nlattr *attr, 1285 + struct ovs_key_nsh *nsh, struct ovs_key_nsh *nsh_mask) 1286 + { 1287 + struct nlattr *a; 1288 + int rem; 1289 + 1290 + /* validate_nsh has check this, so we needn't do duplicate check here 1291 + */ 1292 + nla_for_each_nested(a, attr, rem) { 1293 + int type = nla_type(a); 1294 + 1295 + switch (type) { 1296 + case OVS_NSH_KEY_ATTR_BASE: { 1297 + const struct ovs_nsh_key_base *base = nla_data(a); 1298 + const struct ovs_nsh_key_base *base_mask = base + 1; 1299 + 1300 + nsh->base = *base; 1301 + nsh_mask->base = *base_mask; 1302 + break; 1303 + } 1304 + case OVS_NSH_KEY_ATTR_MD1: { 1305 + const struct ovs_nsh_key_md1 *md1 = nla_data(a); 1306 + const struct ovs_nsh_key_md1 *md1_mask = md1 + 1; 1307 + 1308 + memcpy(nsh->context, md1->context, sizeof(*md1)); 1309 + memcpy(nsh_mask->context, md1_mask->context, 1310 + sizeof(*md1_mask)); 1311 + break; 1312 + } 1313 + case OVS_NSH_KEY_ATTR_MD2: 1314 + /* Not supported yet */ 1315 + return -ENOTSUPP; 1316 + default: 1317 + return -EINVAL; 1318 + } 1319 + } 1320 + 1321 + return 0; 1322 + } 1323 + 1324 + static int nsh_key_put_from_nlattr(const struct nlattr *attr, 1325 + struct sw_flow_match *match, bool is_mask, 1326 + bool is_push_nsh, bool log) 1327 + { 1328 + struct nlattr *a; 1329 + int rem; 1330 + bool has_base = false; 1331 + bool has_md1 = false; 1332 + bool has_md2 = false; 1333 + u8 mdtype = 0; 1334 + int mdlen = 0; 1335 + 1336 + if (WARN_ON(is_push_nsh && is_mask)) 1337 + return -EINVAL; 1338 + 1339 + nla_for_each_nested(a, attr, rem) { 1340 + int type = nla_type(a); 1341 + int i; 1342 + 1343 + if (type > OVS_NSH_KEY_ATTR_MAX) { 1344 + OVS_NLERR(log, "nsh attr %d is out of range max %d", 1345 + type, OVS_NSH_KEY_ATTR_MAX); 1346 + return -EINVAL; 1347 + } 1348 + 1349 + if (!check_attr_len(nla_len(a), 1350 + ovs_nsh_key_attr_lens[type].len)) { 1351 + OVS_NLERR( 1352 + log, 1353 + "nsh attr %d has unexpected len %d expected %d", 1354 + type, 1355 + nla_len(a), 1356 + ovs_nsh_key_attr_lens[type].len 1357 + ); 1358 + return -EINVAL; 1359 + } 1360 + 1361 + switch (type) { 1362 + case OVS_NSH_KEY_ATTR_BASE: { 1363 + const struct ovs_nsh_key_base *base = nla_data(a); 1364 + 1365 + has_base = true; 1366 + mdtype = base->mdtype; 1367 + SW_FLOW_KEY_PUT(match, nsh.base.flags, 1368 + base->flags, is_mask); 1369 + SW_FLOW_KEY_PUT(match, nsh.base.ttl, 1370 + base->ttl, is_mask); 1371 + SW_FLOW_KEY_PUT(match, nsh.base.mdtype, 1372 + base->mdtype, is_mask); 1373 + SW_FLOW_KEY_PUT(match, nsh.base.np, 1374 + base->np, is_mask); 1375 + SW_FLOW_KEY_PUT(match, nsh.base.path_hdr, 1376 + base->path_hdr, is_mask); 1377 + break; 1378 + } 1379 + case OVS_NSH_KEY_ATTR_MD1: { 1380 + const struct ovs_nsh_key_md1 *md1 = nla_data(a); 1381 + 1382 + has_md1 = true; 1383 + for (i = 0; i < NSH_MD1_CONTEXT_SIZE; i++) 1384 + SW_FLOW_KEY_PUT(match, nsh.context[i], 1385 + md1->context[i], is_mask); 1386 + break; 1387 + } 1388 + case OVS_NSH_KEY_ATTR_MD2: 1389 + if (!is_push_nsh) /* Not supported MD type 2 yet */ 1390 + return -ENOTSUPP; 1391 + 1392 + has_md2 = true; 1393 + mdlen = nla_len(a); 1394 + if (mdlen > NSH_CTX_HDRS_MAX_LEN || mdlen <= 0) { 1395 + OVS_NLERR( 1396 + log, 1397 + "Invalid MD length %d for MD type %d", 1398 + mdlen, 1399 + mdtype 1400 + ); 1401 + return -EINVAL; 1402 + } 1403 + break; 1404 + default: 1405 + OVS_NLERR(log, "Unknown nsh attribute %d", 1406 + type); 1407 + return -EINVAL; 1408 + } 1409 + } 1410 + 1411 + if (rem > 0) { 1412 + OVS_NLERR(log, "nsh attribute has %d unknown bytes.", rem); 1413 + return -EINVAL; 1414 + } 1415 + 1416 + if (has_md1 && has_md2) { 1417 + OVS_NLERR( 1418 + 1, 1419 + "invalid nsh attribute: md1 and md2 are exclusive." 1420 + ); 1421 + return -EINVAL; 1422 + } 1423 + 1424 + if (!is_mask) { 1425 + if ((has_md1 && mdtype != NSH_M_TYPE1) || 1426 + (has_md2 && mdtype != NSH_M_TYPE2)) { 1427 + OVS_NLERR(1, "nsh attribute has unmatched MD type %d.", 1428 + mdtype); 1429 + return -EINVAL; 1430 + } 1431 + 1432 + if (is_push_nsh && 1433 + (!has_base || (!has_md1 && !has_md2))) { 1434 + OVS_NLERR( 1435 + 1, 1436 + "push_nsh: missing base or metadata attributes" 1437 + ); 1438 + return -EINVAL; 1439 + } 1440 + } 1441 + 1442 + return 0; 1443 + } 1444 + 1266 1445 static int ovs_key_from_nlattrs(struct net *net, struct sw_flow_match *match, 1267 1446 u64 attrs, const struct nlattr **a, 1268 1447 bool is_mask, bool log) ··· 1603 1352 arp_key->arp_tha, ETH_ALEN, is_mask); 1604 1353 1605 1354 attrs &= ~(1 << OVS_KEY_ATTR_ARP); 1355 + } 1356 + 1357 + if (attrs & (1 << OVS_KEY_ATTR_NSH)) { 1358 + if (nsh_key_put_from_nlattr(a[OVS_KEY_ATTR_NSH], match, 1359 + is_mask, false, log) < 0) 1360 + return -EINVAL; 1361 + attrs &= ~(1 << OVS_KEY_ATTR_NSH); 1606 1362 } 1607 1363 1608 1364 if (attrs & (1 << OVS_KEY_ATTR_MPLS)) { ··· 1928 1670 return 0; 1929 1671 } 1930 1672 1673 + static int nsh_key_to_nlattr(const struct ovs_key_nsh *nsh, bool is_mask, 1674 + struct sk_buff *skb) 1675 + { 1676 + struct nlattr *start; 1677 + 1678 + start = nla_nest_start(skb, OVS_KEY_ATTR_NSH); 1679 + if (!start) 1680 + return -EMSGSIZE; 1681 + 1682 + if (nla_put(skb, OVS_NSH_KEY_ATTR_BASE, sizeof(nsh->base), &nsh->base)) 1683 + goto nla_put_failure; 1684 + 1685 + if (is_mask || nsh->base.mdtype == NSH_M_TYPE1) { 1686 + if (nla_put(skb, OVS_NSH_KEY_ATTR_MD1, 1687 + sizeof(nsh->context), nsh->context)) 1688 + goto nla_put_failure; 1689 + } 1690 + 1691 + /* Don't support MD type 2 yet */ 1692 + 1693 + nla_nest_end(skb, start); 1694 + 1695 + return 0; 1696 + 1697 + nla_put_failure: 1698 + return -EMSGSIZE; 1699 + } 1700 + 1931 1701 static int __ovs_nla_put_key(const struct sw_flow_key *swkey, 1932 1702 const struct sw_flow_key *output, bool is_mask, 1933 1703 struct sk_buff *skb) ··· 2084 1798 ipv6_key->ipv6_tclass = output->ip.tos; 2085 1799 ipv6_key->ipv6_hlimit = output->ip.ttl; 2086 1800 ipv6_key->ipv6_frag = output->ip.frag; 1801 + } else if (swkey->eth.type == htons(ETH_P_NSH)) { 1802 + if (nsh_key_to_nlattr(&output->nsh, is_mask, skb)) 1803 + goto nla_put_failure; 2087 1804 } else if (swkey->eth.type == htons(ETH_P_ARP) || 2088 1805 swkey->eth.type == htons(ETH_P_RARP)) { 2089 1806 struct ovs_key_arp *arp_key; ··· 2581 2292 return err; 2582 2293 } 2583 2294 2295 + static bool validate_nsh(const struct nlattr *attr, bool is_mask, 2296 + bool is_push_nsh, bool log) 2297 + { 2298 + struct sw_flow_match match; 2299 + struct sw_flow_key key; 2300 + int ret = 0; 2301 + 2302 + ovs_match_init(&match, &key, true, NULL); 2303 + ret = nsh_key_put_from_nlattr(attr, &match, is_mask, 2304 + is_push_nsh, log); 2305 + return !ret; 2306 + } 2307 + 2584 2308 /* Return false if there are any non-masked bits set. 2585 2309 * Mask follows data immediately, before any netlink padding. 2586 2310 */ ··· 2736 2434 2737 2435 break; 2738 2436 2437 + case OVS_KEY_ATTR_NSH: 2438 + if (eth_type != htons(ETH_P_NSH)) 2439 + return -EINVAL; 2440 + if (!validate_nsh(nla_data(a), masked, false, log)) 2441 + return -EINVAL; 2442 + break; 2443 + 2739 2444 default: 2740 2445 return -EINVAL; 2741 2446 } ··· 2842 2533 [OVS_ACTION_ATTR_TRUNC] = sizeof(struct ovs_action_trunc), 2843 2534 [OVS_ACTION_ATTR_PUSH_ETH] = sizeof(struct ovs_action_push_eth), 2844 2535 [OVS_ACTION_ATTR_POP_ETH] = 0, 2536 + [OVS_ACTION_ATTR_PUSH_NSH] = (u32)-1, 2537 + [OVS_ACTION_ATTR_POP_NSH] = 0, 2845 2538 }; 2846 2539 const struct ovs_action_push_vlan *vlan; 2847 2540 int type = nla_type(a); ··· 3000 2689 return -EINVAL; 3001 2690 mac_proto = MAC_PROTO_ETHERNET; 3002 2691 break; 2692 + 2693 + case OVS_ACTION_ATTR_PUSH_NSH: 2694 + if (mac_proto != MAC_PROTO_ETHERNET) { 2695 + u8 next_proto; 2696 + 2697 + next_proto = tun_p_from_eth_p(eth_type); 2698 + if (!next_proto) 2699 + return -EINVAL; 2700 + } 2701 + mac_proto = MAC_PROTO_NONE; 2702 + if (!validate_nsh(nla_data(a), false, true, true)) 2703 + return -EINVAL; 2704 + break; 2705 + 2706 + case OVS_ACTION_ATTR_POP_NSH: { 2707 + __be16 inner_proto; 2708 + 2709 + if (eth_type != htons(ETH_P_NSH)) 2710 + return -EINVAL; 2711 + inner_proto = tun_p_to_eth_p(key->nsh.base.np); 2712 + if (!inner_proto) 2713 + return -EINVAL; 2714 + if (key->nsh.base.np == TUN_P_ETHERNET) 2715 + mac_proto = MAC_PROTO_ETHERNET; 2716 + else 2717 + mac_proto = MAC_PROTO_NONE; 2718 + break; 2719 + } 3003 2720 3004 2721 default: 3005 2722 OVS_NLERR(log, "Unknown Action type %d", type);
+5
net/openvswitch/flow_netlink.h
··· 79 79 void ovs_nla_free_flow_actions(struct sw_flow_actions *); 80 80 void ovs_nla_free_flow_actions_rcu(struct sw_flow_actions *); 81 81 82 + int nsh_key_from_nlattr(const struct nlattr *attr, struct ovs_key_nsh *nsh, 83 + struct ovs_key_nsh *nsh_mask); 84 + int nsh_hdr_from_nlattr(const struct nlattr *attr, struct nshhdr *nh, 85 + size_t size); 86 + 82 87 #endif /* flow_netlink.h */