Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

net: openvswitch: remove never-working support for setting nsh fields

The validation of the set(nsh(...)) action is completely wrong.
It runs through the nsh_key_put_from_nlattr() function that is the
same function that validates NSH keys for the flow match and the
push_nsh() action. However, the set(nsh(...)) has a very different
memory layout. Nested attributes in there are doubled in size in
case of the masked set(). That makes proper validation impossible.

There is also confusion in the code between the 'masked' flag, that
says that the nested attributes are doubled in size containing both
the value and the mask, and the 'is_mask' that says that the value
we're parsing is the mask. This is causing kernel crash on trying to
write into mask part of the match with SW_FLOW_KEY_PUT() during
validation, while validate_nsh() doesn't allocate any memory for it:

BUG: kernel NULL pointer dereference, address: 0000000000000018
#PF: supervisor read access in kernel mode
#PF: error_code(0x0000) - not-present page
PGD 1c2383067 P4D 1c2383067 PUD 20b703067 PMD 0
Oops: Oops: 0000 [#1] SMP NOPTI
CPU: 8 UID: 0 Kdump: loaded Not tainted 6.17.0-rc4+ #107 PREEMPT(voluntary)
RIP: 0010:nsh_key_put_from_nlattr+0x19d/0x610 [openvswitch]
Call Trace:
<TASK>
validate_nsh+0x60/0x90 [openvswitch]
validate_set.constprop.0+0x270/0x3c0 [openvswitch]
__ovs_nla_copy_actions+0x477/0x860 [openvswitch]
ovs_nla_copy_actions+0x8d/0x100 [openvswitch]
ovs_packet_cmd_execute+0x1cc/0x310 [openvswitch]
genl_family_rcv_msg_doit+0xdb/0x130
genl_family_rcv_msg+0x14b/0x220
genl_rcv_msg+0x47/0xa0
netlink_rcv_skb+0x53/0x100
genl_rcv+0x24/0x40
netlink_unicast+0x280/0x3b0
netlink_sendmsg+0x1f7/0x430
____sys_sendmsg+0x36b/0x3a0
___sys_sendmsg+0x87/0xd0
__sys_sendmsg+0x6d/0xd0
do_syscall_64+0x7b/0x2c0
entry_SYSCALL_64_after_hwframe+0x76/0x7e

The third issue with this process is that while trying to convert
the non-masked set into masked one, validate_set() copies and doubles
the size of the OVS_KEY_ATTR_NSH as if it didn't have any nested
attributes. It should be copying each nested attribute and doubling
them in size independently. And the process must be properly reversed
during the conversion back from masked to a non-masked variant during
the flow dump.

In the end, the only two outcomes of trying to use this action are
either validation failure or a kernel crash. And if somehow someone
manages to install a flow with such an action, it will most definitely
not do what it is supposed to, since all the keys and the masks are
mixed up.

Fixing all the issues is a complex task as it requires re-writing
most of the validation code.

Given that and the fact that this functionality never worked since
introduction, let's just remove it altogether. It's better to
re-introduce it later with a proper implementation instead of trying
to fix it in stable releases.

Fixes: b2d0f5d5dc53 ("openvswitch: enable NSH support")
Reported-by: Junvy Yang <zhuque@tencent.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
Acked-by: Eelco Chaudron <echaudro@redhat.com>
Reviewed-by: Aaron Conole <aconole@redhat.com>
Link: https://patch.msgid.link/20251112112246.95064-1-i.maximets@ovn.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

authored by

Ilya Maximets and committed by
Jakub Kicinski
dfe28c41 035bca3f

+9 -125
+1 -67
net/openvswitch/actions.c
··· 572 572 return 0; 573 573 } 574 574 575 - static int set_nsh(struct sk_buff *skb, struct sw_flow_key *flow_key, 576 - const struct nlattr *a) 577 - { 578 - struct nshhdr *nh; 579 - size_t length; 580 - int err; 581 - u8 flags; 582 - u8 ttl; 583 - int i; 584 - 585 - struct ovs_key_nsh key; 586 - struct ovs_key_nsh mask; 587 - 588 - err = nsh_key_from_nlattr(a, &key, &mask); 589 - if (err) 590 - return err; 591 - 592 - /* Make sure the NSH base header is there */ 593 - if (!pskb_may_pull(skb, skb_network_offset(skb) + NSH_BASE_HDR_LEN)) 594 - return -ENOMEM; 595 - 596 - nh = nsh_hdr(skb); 597 - length = nsh_hdr_len(nh); 598 - 599 - /* Make sure the whole NSH header is there */ 600 - err = skb_ensure_writable(skb, skb_network_offset(skb) + 601 - length); 602 - if (unlikely(err)) 603 - return err; 604 - 605 - nh = nsh_hdr(skb); 606 - skb_postpull_rcsum(skb, nh, length); 607 - flags = nsh_get_flags(nh); 608 - flags = OVS_MASKED(flags, key.base.flags, mask.base.flags); 609 - flow_key->nsh.base.flags = flags; 610 - ttl = nsh_get_ttl(nh); 611 - ttl = OVS_MASKED(ttl, key.base.ttl, mask.base.ttl); 612 - flow_key->nsh.base.ttl = ttl; 613 - nsh_set_flags_and_ttl(nh, flags, ttl); 614 - nh->path_hdr = OVS_MASKED(nh->path_hdr, key.base.path_hdr, 615 - mask.base.path_hdr); 616 - flow_key->nsh.base.path_hdr = nh->path_hdr; 617 - switch (nh->mdtype) { 618 - case NSH_M_TYPE1: 619 - for (i = 0; i < NSH_MD1_CONTEXT_SIZE; i++) { 620 - nh->md1.context[i] = 621 - OVS_MASKED(nh->md1.context[i], key.context[i], 622 - mask.context[i]); 623 - } 624 - memcpy(flow_key->nsh.context, nh->md1.context, 625 - sizeof(nh->md1.context)); 626 - break; 627 - case NSH_M_TYPE2: 628 - memset(flow_key->nsh.context, 0, 629 - sizeof(flow_key->nsh.context)); 630 - break; 631 - default: 632 - return -EINVAL; 633 - } 634 - skb_postpush_rcsum(skb, nh, length); 635 - return 0; 636 - } 637 - 638 575 /* Must follow skb_ensure_writable() since that can move the skb data. */ 639 576 static void set_tp_port(struct sk_buff *skb, __be16 *port, 640 577 __be16 new_port, __sum16 *check) ··· 1067 1130 get_mask(a, struct ovs_key_ethernet *)); 1068 1131 break; 1069 1132 1070 - case OVS_KEY_ATTR_NSH: 1071 - err = set_nsh(skb, flow_key, a); 1072 - break; 1073 - 1074 1133 case OVS_KEY_ATTR_IPV4: 1075 1134 err = set_ipv4(skb, flow_key, nla_data(a), 1076 1135 get_mask(a, struct ovs_key_ipv4 *)); ··· 1103 1170 case OVS_KEY_ATTR_CT_LABELS: 1104 1171 case OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4: 1105 1172 case OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6: 1173 + case OVS_KEY_ATTR_NSH: 1106 1174 err = -EINVAL; 1107 1175 break; 1108 1176 }
+8 -56
net/openvswitch/flow_netlink.c
··· 1305 1305 return 0; 1306 1306 } 1307 1307 1308 + /* 1309 + * Constructs NSH header 'nh' from attributes of OVS_ACTION_ATTR_PUSH_NSH, 1310 + * where 'nh' points to a memory block of 'size' bytes. It's assumed that 1311 + * attributes were previously validated with validate_push_nsh(). 1312 + */ 1308 1313 int nsh_hdr_from_nlattr(const struct nlattr *attr, 1309 1314 struct nshhdr *nh, size_t size) 1310 1315 { ··· 1319 1314 u8 ttl = 0; 1320 1315 int mdlen = 0; 1321 1316 1322 - /* validate_nsh has check this, so we needn't do duplicate check here 1323 - */ 1324 1317 if (size < NSH_BASE_HDR_LEN) 1325 1318 return -ENOBUFS; 1326 1319 ··· 1358 1355 /* nsh header length = NSH_BASE_HDR_LEN + mdlen */ 1359 1356 nh->ver_flags_ttl_len = 0; 1360 1357 nsh_set_flags_ttl_len(nh, flags, ttl, NSH_BASE_HDR_LEN + mdlen); 1361 - 1362 - return 0; 1363 - } 1364 - 1365 - int nsh_key_from_nlattr(const struct nlattr *attr, 1366 - struct ovs_key_nsh *nsh, struct ovs_key_nsh *nsh_mask) 1367 - { 1368 - struct nlattr *a; 1369 - int rem; 1370 - 1371 - /* validate_nsh has check this, so we needn't do duplicate check here 1372 - */ 1373 - nla_for_each_nested(a, attr, rem) { 1374 - int type = nla_type(a); 1375 - 1376 - switch (type) { 1377 - case OVS_NSH_KEY_ATTR_BASE: { 1378 - const struct ovs_nsh_key_base *base = nla_data(a); 1379 - const struct ovs_nsh_key_base *base_mask = base + 1; 1380 - 1381 - nsh->base = *base; 1382 - nsh_mask->base = *base_mask; 1383 - break; 1384 - } 1385 - case OVS_NSH_KEY_ATTR_MD1: { 1386 - const struct ovs_nsh_key_md1 *md1 = nla_data(a); 1387 - const struct ovs_nsh_key_md1 *md1_mask = md1 + 1; 1388 - 1389 - memcpy(nsh->context, md1->context, sizeof(*md1)); 1390 - memcpy(nsh_mask->context, md1_mask->context, 1391 - sizeof(*md1_mask)); 1392 - break; 1393 - } 1394 - case OVS_NSH_KEY_ATTR_MD2: 1395 - /* Not supported yet */ 1396 - return -ENOTSUPP; 1397 - default: 1398 - return -EINVAL; 1399 - } 1400 - } 1401 1358 1402 1359 return 0; 1403 1360 } ··· 2802 2839 return err; 2803 2840 } 2804 2841 2805 - static bool validate_nsh(const struct nlattr *attr, bool is_mask, 2806 - bool is_push_nsh, bool log) 2842 + static bool validate_push_nsh(const struct nlattr *attr, bool log) 2807 2843 { 2808 2844 struct sw_flow_match match; 2809 2845 struct sw_flow_key key; 2810 - int ret = 0; 2811 2846 2812 2847 ovs_match_init(&match, &key, true, NULL); 2813 - ret = nsh_key_put_from_nlattr(attr, &match, is_mask, 2814 - is_push_nsh, log); 2815 - return !ret; 2848 + return !nsh_key_put_from_nlattr(attr, &match, false, true, log); 2816 2849 } 2817 2850 2818 2851 /* Return false if there are any non-masked bits set. ··· 2954 2995 flow_key->ip.proto != IPPROTO_SCTP) 2955 2996 return -EINVAL; 2956 2997 2957 - break; 2958 - 2959 - case OVS_KEY_ATTR_NSH: 2960 - if (eth_type != htons(ETH_P_NSH)) 2961 - return -EINVAL; 2962 - if (!validate_nsh(nla_data(a), masked, false, log)) 2963 - return -EINVAL; 2964 2998 break; 2965 2999 2966 3000 default: ··· 3389 3437 return -EINVAL; 3390 3438 } 3391 3439 mac_proto = MAC_PROTO_NONE; 3392 - if (!validate_nsh(nla_data(a), false, true, true)) 3440 + if (!validate_push_nsh(nla_data(a), log)) 3393 3441 return -EINVAL; 3394 3442 break; 3395 3443
-2
net/openvswitch/flow_netlink.h
··· 65 65 void ovs_nla_free_flow_actions(struct sw_flow_actions *); 66 66 void ovs_nla_free_flow_actions_rcu(struct sw_flow_actions *); 67 67 68 - int nsh_key_from_nlattr(const struct nlattr *attr, struct ovs_key_nsh *nsh, 69 - struct ovs_key_nsh *nsh_mask); 70 68 int nsh_hdr_from_nlattr(const struct nlattr *attr, struct nshhdr *nh, 71 69 size_t size); 72 70