Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

net: ipv6: Add ip6_mr_output()

Multicast routing is today handled in the input path. Locally generated MC
packets don't hit the IPMR code today. Thus if a VXLAN remote address is
multicast, the driver needs to set an OIF during route lookup. Thus MC
routing configuration needs to be kept in sync with the VXLAN FDB and MDB.
Ideally, the VXLAN packets would be routed by the MC routing code instead.

To that end, this patch adds support to route locally generated multicast
packets. The newly-added routines do largely what ip6_mr_input() and
ip6_mr_forward() do: make an MR cache lookup to find where to send the
packets, and use ip6_output() to send each of them. When no cache entry is
found, the packet is punted to the daemon for resolution.

Similarly to the IPv4 case in a previous patch, the new logic is contingent
on a newly-added IP6CB flag being set.

Signed-off-by: Petr Machata <petrm@nvidia.com>
Link: https://patch.msgid.link/3bcc034a3ab4d3c291072fff38f78d7fbbeef4e6.1750113335.git.petrm@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

authored by

Petr Machata and committed by
Jakub Kicinski
96e8f5a9 1b02f447

+127
+1
include/linux/ipv6.h
··· 156 156 #define IP6SKB_SEG6 256 157 157 #define IP6SKB_FAKEJUMBO 512 158 158 #define IP6SKB_MULTIPATH 1024 159 + #define IP6SKB_MCROUTE 2048 159 160 }; 160 161 161 162 #if defined(CONFIG_NET_L3_MASTER_DEV)
+7
include/linux/mroute6.h
··· 31 31 extern int ip6_mr_input(struct sk_buff *skb); 32 32 extern int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg); 33 33 extern int ip6_mr_init(void); 34 + extern int ip6_mr_output(struct net *net, struct sock *sk, struct sk_buff *skb); 34 35 extern void ip6_mr_cleanup(void); 35 36 int ip6mr_ioctl(struct sock *sk, int cmd, void *arg); 36 37 #else ··· 57 56 static inline int ip6_mr_init(void) 58 57 { 59 58 return 0; 59 + } 60 + 61 + static inline int 62 + ip6_mr_output(struct net *net, struct sock *sk, struct sk_buff *skb) 63 + { 64 + return ip6_output(net, sk, skb); 60 65 } 61 66 62 67 static inline void ip6_mr_cleanup(void)
+118
net/ipv6/ip6mr.c
··· 2119 2119 kfree_skb(skb); 2120 2120 } 2121 2121 2122 + static void ip6mr_output2(struct net *net, struct mr_table *mrt, 2123 + struct sk_buff *skb, int vifi) 2124 + { 2125 + if (ip6mr_prepare_xmit(net, mrt, skb, vifi)) 2126 + goto out_free; 2127 + 2128 + ip6_output(net, NULL, skb); 2129 + return; 2130 + 2131 + out_free: 2132 + kfree_skb(skb); 2133 + } 2134 + 2122 2135 /* Called with rcu_read_lock() */ 2123 2136 static int ip6mr_find_vif(struct mr_table *mrt, struct net_device *dev) 2124 2137 { ··· 2244 2231 kfree_skb(skb); 2245 2232 } 2246 2233 2234 + /* Called under rcu_read_lock() */ 2235 + static void ip6_mr_output_finish(struct net *net, struct mr_table *mrt, 2236 + struct net_device *dev, struct sk_buff *skb, 2237 + struct mfc6_cache *c) 2238 + { 2239 + int psend = -1; 2240 + int ct; 2241 + 2242 + WARN_ON_ONCE(!rcu_read_lock_held()); 2243 + 2244 + atomic_long_inc(&c->_c.mfc_un.res.pkt); 2245 + atomic_long_add(skb->len, &c->_c.mfc_un.res.bytes); 2246 + WRITE_ONCE(c->_c.mfc_un.res.lastuse, jiffies); 2247 + 2248 + /* Forward the frame */ 2249 + if (ipv6_addr_any(&c->mf6c_origin) && 2250 + ipv6_addr_any(&c->mf6c_mcastgrp)) { 2251 + if (ipv6_hdr(skb)->hop_limit > 2252 + c->_c.mfc_un.res.ttls[c->_c.mfc_parent]) { 2253 + /* It's an (*,*) entry and the packet is not coming from 2254 + * the upstream: forward the packet to the upstream 2255 + * only. 2256 + */ 2257 + psend = c->_c.mfc_parent; 2258 + goto last_forward; 2259 + } 2260 + goto dont_forward; 2261 + } 2262 + for (ct = c->_c.mfc_un.res.maxvif - 1; 2263 + ct >= c->_c.mfc_un.res.minvif; ct--) { 2264 + if (ipv6_hdr(skb)->hop_limit > c->_c.mfc_un.res.ttls[ct]) { 2265 + if (psend != -1) { 2266 + struct sk_buff *skb2; 2267 + 2268 + skb2 = skb_clone(skb, GFP_ATOMIC); 2269 + if (skb2) 2270 + ip6mr_output2(net, mrt, skb2, psend); 2271 + } 2272 + psend = ct; 2273 + } 2274 + } 2275 + last_forward: 2276 + if (psend != -1) { 2277 + ip6mr_output2(net, mrt, skb, psend); 2278 + return; 2279 + } 2280 + 2281 + dont_forward: 2282 + kfree_skb(skb); 2283 + } 2247 2284 2248 2285 /* 2249 2286 * Multicast packets for forwarding arrive here ··· 2359 2296 ip6_mr_forward(net, mrt, dev, skb, cache); 2360 2297 2361 2298 return 0; 2299 + } 2300 + 2301 + int ip6_mr_output(struct net *net, struct sock *sk, struct sk_buff *skb) 2302 + { 2303 + struct net_device *dev = skb_dst(skb)->dev; 2304 + struct flowi6 fl6 = (struct flowi6) { 2305 + .flowi6_iif = LOOPBACK_IFINDEX, 2306 + .flowi6_mark = skb->mark, 2307 + }; 2308 + struct mfc6_cache *cache; 2309 + struct mr_table *mrt; 2310 + int err; 2311 + int vif; 2312 + 2313 + WARN_ON_ONCE(!rcu_read_lock_held()); 2314 + 2315 + if (IP6CB(skb)->flags & IP6SKB_FORWARDED) 2316 + goto ip6_output; 2317 + if (!(IP6CB(skb)->flags & IP6SKB_MCROUTE)) 2318 + goto ip6_output; 2319 + 2320 + err = ip6mr_fib_lookup(net, &fl6, &mrt); 2321 + if (err < 0) { 2322 + kfree_skb(skb); 2323 + return err; 2324 + } 2325 + 2326 + cache = ip6mr_cache_find(mrt, 2327 + &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr); 2328 + if (!cache) { 2329 + vif = ip6mr_find_vif(mrt, dev); 2330 + if (vif >= 0) 2331 + cache = ip6mr_cache_find_any(mrt, 2332 + &ipv6_hdr(skb)->daddr, 2333 + vif); 2334 + } 2335 + 2336 + /* No usable cache entry */ 2337 + if (!cache) { 2338 + vif = ip6mr_find_vif(mrt, dev); 2339 + if (vif >= 0) 2340 + return ip6mr_cache_unresolved(mrt, vif, skb, dev); 2341 + goto ip6_output; 2342 + } 2343 + 2344 + /* Wrong interface */ 2345 + vif = cache->_c.mfc_parent; 2346 + if (rcu_access_pointer(mrt->vif_table[vif].dev) != dev) 2347 + goto ip6_output; 2348 + 2349 + ip6_mr_output_finish(net, mrt, dev, skb, cache); 2350 + return 0; 2351 + 2352 + ip6_output: 2353 + return ip6_output(net, sk, skb); 2362 2354 } 2363 2355 2364 2356 int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm,
+1
net/ipv6/route.c
··· 1145 1145 rt->dst.input = ip6_input; 1146 1146 } else if (ipv6_addr_type(&f6i->fib6_dst.addr) & IPV6_ADDR_MULTICAST) { 1147 1147 rt->dst.input = ip6_mc_input; 1148 + rt->dst.output = ip6_mr_output; 1148 1149 } else { 1149 1150 rt->dst.input = ip6_forward; 1150 1151 }