Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

vxlan: Support MC routing in the underlay

Locally-generated MC packets have so far not been subject to MC routing.
Instead an MC-enabled installation would maintain the MC routing tables,
and separately from that the list of interfaces to send packets to as part
of the VXLAN FDB and MDB.

In a previous patch, a ip_mr_output() and ip6_mr_output() routines were
added for IPv4 and IPv6. All locally generated MC traffic is now passed
through these functions. For reasons of backward compatibility, an SKB
(IPCB / IP6CB) flag guards the actual MC routing.

This patch adds logic to set the flag, and the UAPI to enable the behavior.

Signed-off-by: Petr Machata <petrm@nvidia.com>
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Nikolay Aleksandrov <razor@blackwall.org>
Link: https://patch.msgid.link/d899655bb7e9b2521ee8c793e67056b9fd02ba12.1750113335.git.petrm@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

authored by

Petr Machata and committed by
Jakub Kicinski
f8337efa 96e8f5a9

+25 -3
+20 -2
drivers/net/vxlan/vxlan_core.c
··· 2451 2451 rcu_read_lock(); 2452 2452 if (addr_family == AF_INET) { 2453 2453 struct vxlan_sock *sock4 = rcu_dereference(vxlan->vn4_sock); 2454 + u16 ipcb_flags = 0; 2454 2455 struct rtable *rt; 2455 2456 __be16 df = 0; 2456 2457 __be32 saddr; ··· 2467 2466 reason = SKB_DROP_REASON_IP_OUTNOROUTES; 2468 2467 goto tx_error; 2469 2468 } 2469 + 2470 + if (flags & VXLAN_F_MC_ROUTE) 2471 + ipcb_flags |= IPSKB_MCROUTE; 2470 2472 2471 2473 if (!info) { 2472 2474 /* Bypass encapsulation if the destination is local */ ··· 2526 2522 2527 2523 udp_tunnel_xmit_skb(rt, sock4->sock->sk, skb, saddr, 2528 2524 pkey->u.ipv4.dst, tos, ttl, df, 2529 - src_port, dst_port, xnet, !udp_sum, 0); 2525 + src_port, dst_port, xnet, !udp_sum, 2526 + ipcb_flags); 2530 2527 #if IS_ENABLED(CONFIG_IPV6) 2531 2528 } else { 2532 2529 struct vxlan_sock *sock6 = rcu_dereference(vxlan->vn6_sock); 2533 2530 struct in6_addr saddr; 2531 + u16 ip6cb_flags = 0; 2534 2532 2535 2533 if (!ifindex) 2536 2534 ifindex = sock6->sock->sk->sk_bound_dev_if; ··· 2547 2541 reason = SKB_DROP_REASON_IP_OUTNOROUTES; 2548 2542 goto tx_error; 2549 2543 } 2544 + 2545 + if (flags & VXLAN_F_MC_ROUTE) 2546 + ip6cb_flags |= IP6SKB_MCROUTE; 2550 2547 2551 2548 if (!info) { 2552 2549 u32 rt6i_flags = dst_rt6_info(ndst)->rt6i_flags; ··· 2596 2587 udp_tunnel6_xmit_skb(ndst, sock6->sock->sk, skb, dev, 2597 2588 &saddr, &pkey->u.ipv6.dst, tos, ttl, 2598 2589 pkey->label, src_port, dst_port, !udp_sum, 2599 - 0); 2590 + ip6cb_flags); 2600 2591 #endif 2601 2592 } 2602 2593 vxlan_vnifilter_count(vxlan, vni, NULL, VXLAN_VNI_STATS_TX, pkt_len); ··· 3411 3402 [IFLA_VXLAN_LOCALBYPASS] = NLA_POLICY_MAX(NLA_U8, 1), 3412 3403 [IFLA_VXLAN_LABEL_POLICY] = NLA_POLICY_MAX(NLA_U32, VXLAN_LABEL_MAX), 3413 3404 [IFLA_VXLAN_RESERVED_BITS] = NLA_POLICY_EXACT_LEN(sizeof(struct vxlanhdr)), 3405 + [IFLA_VXLAN_MC_ROUTE] = NLA_POLICY_MAX(NLA_U8, 1), 3414 3406 }; 3415 3407 3416 3408 static int vxlan_validate(struct nlattr *tb[], struct nlattr *data[], ··· 4321 4311 err = vxlan_nl2flag(conf, data, IFLA_VXLAN_REMCSUM_NOPARTIAL, 4322 4312 VXLAN_F_REMCSUM_NOPARTIAL, changelink, 4323 4313 false, extack); 4314 + if (err) 4315 + return err; 4316 + } 4317 + 4318 + if (data[IFLA_VXLAN_MC_ROUTE]) { 4319 + err = vxlan_nl2flag(conf, data, IFLA_VXLAN_MC_ROUTE, 4320 + VXLAN_F_MC_ROUTE, changelink, 4321 + true, extack); 4324 4322 if (err) 4325 4323 return err; 4326 4324 }
+4 -1
include/net/vxlan.h
··· 332 332 #define VXLAN_F_VNIFILTER 0x20000 333 333 #define VXLAN_F_MDB 0x40000 334 334 #define VXLAN_F_LOCALBYPASS 0x80000 335 + #define VXLAN_F_MC_ROUTE 0x100000 335 336 336 337 /* Flags that are used in the receive path. These flags must match in 337 338 * order for a socket to be shareable ··· 354 353 VXLAN_F_UDP_ZERO_CSUM6_RX | \ 355 354 VXLAN_F_COLLECT_METADATA | \ 356 355 VXLAN_F_VNIFILTER | \ 357 - VXLAN_F_LOCALBYPASS) 356 + VXLAN_F_LOCALBYPASS | \ 357 + VXLAN_F_MC_ROUTE | \ 358 + 0) 358 359 359 360 struct net_device *vxlan_dev_create(struct net *net, const char *name, 360 361 u8 name_assign_type, struct vxlan_config *conf);
+1
include/uapi/linux/if_link.h
··· 1398 1398 IFLA_VXLAN_LOCALBYPASS, 1399 1399 IFLA_VXLAN_LABEL_POLICY, /* IPv6 flow label policy; ifla_vxlan_label_policy */ 1400 1400 IFLA_VXLAN_RESERVED_BITS, 1401 + IFLA_VXLAN_MC_ROUTE, 1401 1402 __IFLA_VXLAN_MAX 1402 1403 }; 1403 1404 #define IFLA_VXLAN_MAX (__IFLA_VXLAN_MAX - 1)