Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

net: openvswitch: add hash info to upcall

When using the kernel datapath, the upcall don't
include skb hash info relatived. That will introduce
some problem, because the hash of skb is important
in kernel stack. For example, VXLAN module uses
it to select UDP src port. The tx queue selection
may also use the hash in stack.

Hash is computed in different ways. Hash is random
for a TCP socket, and hash may be computed in hardware,
or software stack. Recalculation hash is not easy.

Hash of TCP socket is computed:
tcp_v4_connect
-> sk_set_txhash (is random)

__tcp_transmit_skb
-> skb_set_hash_from_sk

There will be one upcall, without information of skb
hash, to ovs-vswitchd, for the first packet of a TCP
session. The rest packets will be processed in Open vSwitch
modules, hash kept. If this tcp session is forward to
VXLAN module, then the UDP src port of first tcp packet
is different from rest packets.

TCP packets may come from the host or dockers, to Open vSwitch.
To fix it, we store the hash info to upcall, and restore hash
when packets sent back.

+---------------+ +-------------------------+
| Docker/VMs | | ovs-vswitchd |
+----+----------+ +-+--------------------+--+
| ^ |
| | |
| | upcall v restore packet hash (not recalculate)
| +-+--------------------+--+
| tap netdev | | vxlan module
+---------------> +--> Open vSwitch ko +-->
or internal type | |
+-------------------------+

Reported-at: https://mail.openvswitch.org/pipermail/ovs-dev/2019-October/364062.html
Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
Acked-by: Pravin B Shelar <pshelar@ovn.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Tonghao Zhang and committed by
David S. Miller
bd1903b7 839554b7

+40 -2
+3 -1
include/uapi/linux/openvswitch.h
··· 173 173 * @OVS_PACKET_ATTR_LEN: Packet size before truncation. 174 174 * %OVS_PACKET_ATTR_USERSPACE action specify the Maximum received fragment 175 175 * size. 176 + * @OVS_PACKET_ATTR_HASH: Packet hash info (e.g. hash, sw_hash and l4_hash in skb). 176 177 * 177 178 * These attributes follow the &struct ovs_header within the Generic Netlink 178 179 * payload for %OVS_PACKET_* commands. ··· 191 190 OVS_PACKET_ATTR_PROBE, /* Packet operation is a feature probe, 192 191 error logging should be suppressed. */ 193 192 OVS_PACKET_ATTR_MRU, /* Maximum received IP fragment size. */ 194 - OVS_PACKET_ATTR_LEN, /* Packet size before truncation. */ 193 + OVS_PACKET_ATTR_LEN, /* Packet size before truncation. */ 194 + OVS_PACKET_ATTR_HASH, /* Packet hash. */ 195 195 __OVS_PACKET_ATTR_MAX 196 196 }; 197 197
+25 -1
net/openvswitch/datapath.c
··· 350 350 size_t size = NLMSG_ALIGN(sizeof(struct ovs_header)) 351 351 + nla_total_size(hdrlen) /* OVS_PACKET_ATTR_PACKET */ 352 352 + nla_total_size(ovs_key_attr_size()) /* OVS_PACKET_ATTR_KEY */ 353 - + nla_total_size(sizeof(unsigned int)); /* OVS_PACKET_ATTR_LEN */ 353 + + nla_total_size(sizeof(unsigned int)) /* OVS_PACKET_ATTR_LEN */ 354 + + nla_total_size(sizeof(u64)); /* OVS_PACKET_ATTR_HASH */ 354 355 355 356 /* OVS_PACKET_ATTR_USERDATA */ 356 357 if (upcall_info->userdata) ··· 394 393 size_t len; 395 394 unsigned int hlen; 396 395 int err, dp_ifindex; 396 + u64 hash; 397 397 398 398 dp_ifindex = get_dpifindex(dp); 399 399 if (!dp_ifindex) ··· 506 504 pad_packet(dp, user_skb); 507 505 } 508 506 507 + /* Add OVS_PACKET_ATTR_HASH */ 508 + hash = skb_get_hash_raw(skb); 509 + if (skb->sw_hash) 510 + hash |= OVS_PACKET_HASH_SW_BIT; 511 + 512 + if (skb->l4_hash) 513 + hash |= OVS_PACKET_HASH_L4_BIT; 514 + 515 + if (nla_put(user_skb, OVS_PACKET_ATTR_HASH, sizeof (u64), &hash)) { 516 + err = -ENOBUFS; 517 + goto out; 518 + } 519 + 509 520 /* Only reserve room for attribute header, packet data is added 510 521 * in skb_zerocopy() */ 511 522 if (!(nla = nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, 0))) { ··· 558 543 struct datapath *dp; 559 544 struct vport *input_vport; 560 545 u16 mru = 0; 546 + u64 hash; 561 547 int len; 562 548 int err; 563 549 bool log = !a[OVS_PACKET_ATTR_PROBE]; ··· 583 567 packet->ignore_df = 1; 584 568 } 585 569 OVS_CB(packet)->mru = mru; 570 + 571 + if (a[OVS_PACKET_ATTR_HASH]) { 572 + hash = nla_get_u64(a[OVS_PACKET_ATTR_HASH]); 573 + 574 + __skb_set_hash(packet, hash & 0xFFFFFFFFULL, 575 + !!(hash & OVS_PACKET_HASH_SW_BIT), 576 + !!(hash & OVS_PACKET_HASH_L4_BIT)); 577 + } 586 578 587 579 /* Build an sw_flow for sending this packet. */ 588 580 flow = ovs_flow_alloc();
+12
net/openvswitch/datapath.h
··· 139 139 bool xt_label; 140 140 }; 141 141 142 + /** 143 + * enum ovs_pkt_hash_types - hash info to include with a packet 144 + * to send to userspace. 145 + * @OVS_PACKET_HASH_SW_BIT: indicates hash was computed in software stack. 146 + * @OVS_PACKET_HASH_L4_BIT: indicates hash is a canonical 4-tuple hash 147 + * over transport ports. 148 + */ 149 + enum ovs_pkt_hash_types { 150 + OVS_PACKET_HASH_SW_BIT = (1ULL << 32), 151 + OVS_PACKET_HASH_L4_BIT = (1ULL << 33), 152 + }; 153 + 142 154 extern unsigned int ovs_net_id; 143 155 void ovs_lock(void); 144 156 void ovs_unlock(void);