Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

net: fib_rules: Fix iif / oif matching on L3 master device

Before commit 40867d74c374 ("net: Add l3mdev index to flow struct and
avoid oif reset for port devices") it was possible to use FIB rules to
match on a L3 domain. This was done by having a FIB rule match on iif /
oif being a L3 master device. It worked because prior to the FIB rule
lookup the iif / oif fields in the flow structure were reset to the
index of the L3 master device to which the input / output device was
enslaved to.

The above scheme made it impossible to match on the original input /
output device. Therefore, cited commit stopped overwriting the iif / oif
fields in the flow structure and instead stored the index of the
enslaving L3 master device in a new field ('flowi_l3mdev') in the flow
structure.

While the change enabled new use cases, it broke the original use case
of matching on a L3 domain. Fix this by interpreting the iif / oif
matching on a L3 master device as a match against the L3 domain. In
other words, if the iif / oif in the FIB rule points to a L3 master
device, compare the provided index against 'flowi_l3mdev' rather than
'flowi_{i,o}if'.

Before cited commit, a FIB rule that matched on 'iif vrf1' would only
match incoming traffic from devices enslaved to 'vrf1'. With the
proposed change (i.e., comparing against 'flowi_l3mdev'), the rule would
also match traffic originating from a socket bound to 'vrf1'. Avoid that
by adding a new flow flag ('FLOWI_FLAG_L3MDEV_OIF') that indicates if
the L3 domain was derived from the output interface or the input
interface (when not set) and take this flag into account when evaluating
the FIB rule against the flow structure.

Avoid unnecessary checks in the data path by detecting that a rule
matches on a L3 master device when the rule is installed and marking it
as such.

Tested using the following script [1].

Output before 40867d74c374 (v5.4.291):

default dev dummy1 table 100 scope link
default dev dummy1 table 200 scope link

Output after 40867d74c374:

default dev dummy1 table 300 scope link
default dev dummy1 table 300 scope link

Output with this patch:

default dev dummy1 table 100 scope link
default dev dummy1 table 200 scope link

[1]
#!/bin/bash

ip link add name vrf1 up type vrf table 10
ip link add name dummy1 up master vrf1 type dummy

sysctl -wq net.ipv4.conf.all.forwarding=1
sysctl -wq net.ipv4.conf.all.rp_filter=0

ip route add table 100 default dev dummy1
ip route add table 200 default dev dummy1
ip route add table 300 default dev dummy1

ip rule add prio 0 oif vrf1 table 100
ip rule add prio 1 iif vrf1 table 200
ip rule add prio 2 table 300

ip route get 192.0.2.1 oif dummy1 fibmatch
ip route get 192.0.2.1 iif dummy1 from 198.51.100.1 fibmatch

Fixes: 40867d74c374 ("net: Add l3mdev index to flow struct and avoid oif reset for port devices")
Reported-by: hanhuihui <hanhuihui5@huawei.com>
Closes: https://lore.kernel.org/netdev/ec671c4f821a4d63904d0da15d604b75@huawei.com/
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Acked-by: David Ahern <dsahern@kernel.org>
Link: https://patch.msgid.link/20250414172022.242991-2-idosch@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

authored by

Ido Schimmel and committed by
Jakub Kicinski
2d300ce0 12f2d033

+73 -9
+2
include/net/fib_rules.h
··· 45 45 struct fib_rule_port_range dport_range; 46 46 u16 sport_mask; 47 47 u16 dport_mask; 48 + u8 iif_is_l3_master; 49 + u8 oif_is_l3_master; 48 50 struct rcu_head rcu; 49 51 }; 50 52
+1
include/net/flow.h
··· 38 38 __u8 flowic_flags; 39 39 #define FLOWI_FLAG_ANYSRC 0x01 40 40 #define FLOWI_FLAG_KNOWN_NH 0x02 41 + #define FLOWI_FLAG_L3MDEV_OIF 0x04 41 42 __u32 flowic_secid; 42 43 kuid_t flowic_uid; 43 44 __u32 flowic_multipath_hash;
+27
include/net/l3mdev.h
··· 59 59 int l3mdev_fib_rule_match(struct net *net, struct flowi *fl, 60 60 struct fib_lookup_arg *arg); 61 61 62 + static inline 63 + bool l3mdev_fib_rule_iif_match(const struct flowi *fl, int iifindex) 64 + { 65 + return !(fl->flowi_flags & FLOWI_FLAG_L3MDEV_OIF) && 66 + fl->flowi_l3mdev == iifindex; 67 + } 68 + 69 + static inline 70 + bool l3mdev_fib_rule_oif_match(const struct flowi *fl, int oifindex) 71 + { 72 + return fl->flowi_flags & FLOWI_FLAG_L3MDEV_OIF && 73 + fl->flowi_l3mdev == oifindex; 74 + } 75 + 62 76 void l3mdev_update_flow(struct net *net, struct flowi *fl); 63 77 64 78 int l3mdev_master_ifindex_rcu(const struct net_device *dev); ··· 341 327 { 342 328 return 1; 343 329 } 330 + 331 + static inline 332 + bool l3mdev_fib_rule_iif_match(const struct flowi *fl, int iifindex) 333 + { 334 + return false; 335 + } 336 + 337 + static inline 338 + bool l3mdev_fib_rule_oif_match(const struct flowi *fl, int oifindex) 339 + { 340 + return false; 341 + } 342 + 344 343 static inline 345 344 void l3mdev_update_flow(struct net *net, struct flowi *fl) 346 345 {
+40 -8
net/core/fib_rules.c
··· 257 257 return nla_put(skb, attrtype, sizeof(*range), range); 258 258 } 259 259 260 + static bool fib_rule_iif_match(const struct fib_rule *rule, int iifindex, 261 + const struct flowi *fl) 262 + { 263 + u8 iif_is_l3_master = READ_ONCE(rule->iif_is_l3_master); 264 + 265 + return iif_is_l3_master ? l3mdev_fib_rule_iif_match(fl, iifindex) : 266 + fl->flowi_iif == iifindex; 267 + } 268 + 269 + static bool fib_rule_oif_match(const struct fib_rule *rule, int oifindex, 270 + const struct flowi *fl) 271 + { 272 + u8 oif_is_l3_master = READ_ONCE(rule->oif_is_l3_master); 273 + 274 + return oif_is_l3_master ? l3mdev_fib_rule_oif_match(fl, oifindex) : 275 + fl->flowi_oif == oifindex; 276 + } 277 + 260 278 static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops, 261 279 struct flowi *fl, int flags, 262 280 struct fib_lookup_arg *arg) ··· 282 264 int iifindex, oifindex, ret = 0; 283 265 284 266 iifindex = READ_ONCE(rule->iifindex); 285 - if (iifindex && (iifindex != fl->flowi_iif)) 267 + if (iifindex && !fib_rule_iif_match(rule, iifindex, fl)) 286 268 goto out; 287 269 288 270 oifindex = READ_ONCE(rule->oifindex); 289 - if (oifindex && (oifindex != fl->flowi_oif)) 271 + if (oifindex && !fib_rule_oif_match(rule, oifindex, fl)) 290 272 goto out; 291 273 292 274 if ((rule->mark ^ fl->flowi_mark) & rule->mark_mask) ··· 754 736 struct net_device *dev; 755 737 756 738 dev = __dev_get_by_name(nlrule->fr_net, nlrule->iifname); 757 - if (dev) 739 + if (dev) { 758 740 nlrule->iifindex = dev->ifindex; 741 + nlrule->iif_is_l3_master = netif_is_l3_master(dev); 742 + } 759 743 } 760 744 761 745 if (tb[FRA_OIFNAME]) { 762 746 struct net_device *dev; 763 747 764 748 dev = __dev_get_by_name(nlrule->fr_net, nlrule->oifname); 765 - if (dev) 749 + if (dev) { 766 750 nlrule->oifindex = dev->ifindex; 751 + nlrule->oif_is_l3_master = netif_is_l3_master(dev); 752 + } 767 753 } 768 754 769 755 return 0; ··· 1358 1336 1359 1337 list_for_each_entry(rule, rules, list) { 1360 1338 if (rule->iifindex == -1 && 1361 - strcmp(dev->name, rule->iifname) == 0) 1339 + strcmp(dev->name, rule->iifname) == 0) { 1362 1340 WRITE_ONCE(rule->iifindex, dev->ifindex); 1341 + WRITE_ONCE(rule->iif_is_l3_master, 1342 + netif_is_l3_master(dev)); 1343 + } 1363 1344 if (rule->oifindex == -1 && 1364 - strcmp(dev->name, rule->oifname) == 0) 1345 + strcmp(dev->name, rule->oifname) == 0) { 1365 1346 WRITE_ONCE(rule->oifindex, dev->ifindex); 1347 + WRITE_ONCE(rule->oif_is_l3_master, 1348 + netif_is_l3_master(dev)); 1349 + } 1366 1350 } 1367 1351 } 1368 1352 ··· 1377 1349 struct fib_rule *rule; 1378 1350 1379 1351 list_for_each_entry(rule, rules, list) { 1380 - if (rule->iifindex == dev->ifindex) 1352 + if (rule->iifindex == dev->ifindex) { 1381 1353 WRITE_ONCE(rule->iifindex, -1); 1382 - if (rule->oifindex == dev->ifindex) 1354 + WRITE_ONCE(rule->iif_is_l3_master, false); 1355 + } 1356 + if (rule->oifindex == dev->ifindex) { 1383 1357 WRITE_ONCE(rule->oifindex, -1); 1358 + WRITE_ONCE(rule->oif_is_l3_master, false); 1359 + } 1384 1360 } 1385 1361 } 1386 1362
+3 -1
net/l3mdev/l3mdev.c
··· 277 277 if (fl->flowi_oif) { 278 278 dev = dev_get_by_index_rcu(net, fl->flowi_oif); 279 279 if (dev) { 280 - if (!fl->flowi_l3mdev) 280 + if (!fl->flowi_l3mdev) { 281 281 fl->flowi_l3mdev = l3mdev_master_ifindex_rcu(dev); 282 + fl->flowi_flags |= FLOWI_FLAG_L3MDEV_OIF; 283 + } 282 284 283 285 /* oif set to L3mdev directs lookup to its table; 284 286 * reset to avoid oif match in fib_lookup