Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'net-fib_rules-add-flow-label-selector-support'

Ido Schimmel says:

====================
net: fib_rules: Add flow label selector support

In some deployments users would like to encode path information into
certain bits of the IPv6 flow label, the UDP source port and the DSCP
and use this information to route packets accordingly.

Redirecting traffic to a routing table based on the flow label is not
currently possible with Linux as FIB rules cannot match on it despite
the flow label being available in the IPv6 flow key.

This patchset extends FIB rules to match on the flow label with a mask.
Future patches will add mask attributes to L4 ports and DSCP matches.

Patches #1-#5 gradually extend FIB rules to match on the flow label.

Patches #6-#7 allow user space to specify a flow label in route get
requests. This is useful for both debugging and testing.

Patch #8 adjusts the fib6_table_lookup tracepoint to print the flow
label to the trace buffer for better observability.

Patch #9 extends the FIB rule selftest with flow label test cases while
utilizing the route get functionality from patch #6.
====================

Link: https://patch.msgid.link/20241216171201.274644-1-idosch@nvidia.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>

+140 -6
+7
Documentation/netlink/specs/rt_route.yaml
··· 177 177 - 178 178 name: rta-nh-id 179 179 type: u32 180 + - 181 + name: rta-flowlabel 182 + type: u32 183 + byte-order: big-endian 184 + display-hint: hex 180 185 - 181 186 name: rta-metrics 182 187 attributes: ··· 265 260 - rta-dport 266 261 - rta-mark 267 262 - rta-uid 263 + - rta-flowlabel 268 264 reply: 269 265 value: 24 270 266 attributes: &all-route-attrs ··· 305 299 - rta-sport 306 300 - rta-dport 307 301 - rta-nh-id 302 + - rta-flowlabel 308 303 dump: 309 304 request: 310 305 value: 26
+12
Documentation/netlink/specs/rt_rule.yaml
··· 172 172 - 173 173 name: dscp 174 174 type: u8 175 + - 176 + name: flowlabel 177 + type: u32 178 + byte-order: big-endian 179 + display-hint: hex 180 + - 181 + name: flowlabel-mask 182 + type: u32 183 + byte-order: big-endian 184 + display-hint: hex 175 185 176 186 operations: 177 187 enum-model: directional ··· 213 203 - sport-range 214 204 - dport-range 215 205 - dscp 206 + - flowlabel 207 + - flowlabel-mask 216 208 - 217 209 name: newrule-ntf 218 210 doc: Notify a rule creation
+5 -3
include/trace/events/fib6.h
··· 22 22 __field( int, err ) 23 23 __field( int, oif ) 24 24 __field( int, iif ) 25 + __field( u32, flowlabel ) 25 26 __field( __u8, tos ) 26 27 __field( __u8, scope ) 27 28 __field( __u8, flags ) ··· 43 42 __entry->err = ip6_rt_type_to_error(res->fib6_type); 44 43 __entry->oif = flp->flowi6_oif; 45 44 __entry->iif = flp->flowi6_iif; 45 + __entry->flowlabel = ntohl(flowi6_get_flowlabel(flp)); 46 46 __entry->tos = ip6_tclass(flp->flowlabel); 47 47 __entry->scope = flp->flowi6_scope; 48 48 __entry->flags = flp->flowi6_flags; ··· 78 76 } 79 77 ), 80 78 81 - TP_printk("table %3u oif %d iif %d proto %u %pI6c/%u -> %pI6c/%u tos %d scope %d flags %x ==> dev %s gw %pI6c err %d", 79 + TP_printk("table %3u oif %d iif %d proto %u %pI6c/%u -> %pI6c/%u flowlabel %#x tos %d scope %d flags %x ==> dev %s gw %pI6c err %d", 82 80 __entry->tb_id, __entry->oif, __entry->iif, __entry->proto, 83 81 __entry->src, __entry->sport, __entry->dst, __entry->dport, 84 - __entry->tos, __entry->scope, __entry->flags, 85 - __entry->name, __entry->gw, __entry->err) 82 + __entry->flowlabel, __entry->tos, __entry->scope, 83 + __entry->flags, __entry->name, __entry->gw, __entry->err) 86 84 ); 87 85 88 86 #endif /* _TRACE_FIB6_H */
+2
include/uapi/linux/fib_rules.h
··· 68 68 FRA_SPORT_RANGE, /* sport */ 69 69 FRA_DPORT_RANGE, /* dport */ 70 70 FRA_DSCP, /* dscp */ 71 + FRA_FLOWLABEL, /* flowlabel */ 72 + FRA_FLOWLABEL_MASK, /* flowlabel mask */ 71 73 __FRA_MAX 72 74 }; 73 75
+1
include/uapi/linux/rtnetlink.h
··· 393 393 RTA_SPORT, 394 394 RTA_DPORT, 395 395 RTA_NH_ID, 396 + RTA_FLOWLABEL, 396 397 __RTA_MAX 397 398 }; 398 399
+2
net/core/fib_rules.c
··· 770 770 [FRA_SPORT_RANGE] = { .len = sizeof(struct fib_rule_port_range) }, 771 771 [FRA_DPORT_RANGE] = { .len = sizeof(struct fib_rule_port_range) }, 772 772 [FRA_DSCP] = NLA_POLICY_MAX(NLA_U8, INET_DSCP_MASK >> 2), 773 + [FRA_FLOWLABEL] = { .type = NLA_BE32 }, 774 + [FRA_FLOWLABEL_MASK] = { .type = NLA_BE32 }, 773 775 }; 774 776 775 777 int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh,
+6
net/ipv4/fib_rules.c
··· 249 249 int err = -EINVAL; 250 250 struct fib4_rule *rule4 = (struct fib4_rule *) rule; 251 251 252 + if (tb[FRA_FLOWLABEL] || tb[FRA_FLOWLABEL_MASK]) { 253 + NL_SET_ERR_MSG(extack, 254 + "Flow label cannot be specified for IPv4 FIB rules"); 255 + goto errout; 256 + } 257 + 252 258 if (!inet_validate_dscp(frh->tos)) { 253 259 NL_SET_ERR_MSG(extack, 254 260 "Invalid dsfield (tos): ECN bits must be 0");
+55 -2
net/ipv6/fib6_rules.c
··· 26 26 struct fib_rule common; 27 27 struct rt6key src; 28 28 struct rt6key dst; 29 + __be32 flowlabel; 30 + __be32 flowlabel_mask; 29 31 dscp_t dscp; 30 32 u8 dscp_full:1; /* DSCP or TOS selector */ 31 33 }; ··· 36 34 { 37 35 struct fib6_rule *r = container_of(rule, struct fib6_rule, common); 38 36 39 - if (r->dst.plen || r->src.plen || r->dscp) 37 + if (r->dst.plen || r->src.plen || r->dscp || r->flowlabel_mask) 40 38 return false; 41 39 return fib_rule_matchall(rule); 42 40 } ··· 334 332 if (r->dscp && r->dscp != ip6_dscp(fl6->flowlabel)) 335 333 return 0; 336 334 335 + if ((r->flowlabel ^ flowi6_get_flowlabel(fl6)) & r->flowlabel_mask) 336 + return 0; 337 + 337 338 if (rule->ip_proto && (rule->ip_proto != fl6->flowi6_proto)) 338 339 return 0; 339 340 ··· 365 360 return 0; 366 361 } 367 362 363 + static int fib6_nl2rule_flowlabel(struct nlattr **tb, struct fib6_rule *rule6, 364 + struct netlink_ext_ack *extack) 365 + { 366 + __be32 flowlabel, flowlabel_mask; 367 + 368 + if (NL_REQ_ATTR_CHECK(extack, NULL, tb, FRA_FLOWLABEL) || 369 + NL_REQ_ATTR_CHECK(extack, NULL, tb, FRA_FLOWLABEL_MASK)) 370 + return -EINVAL; 371 + 372 + flowlabel = nla_get_be32(tb[FRA_FLOWLABEL]); 373 + flowlabel_mask = nla_get_be32(tb[FRA_FLOWLABEL_MASK]); 374 + 375 + if (flowlabel_mask & ~IPV6_FLOWLABEL_MASK) { 376 + NL_SET_ERR_MSG_ATTR(extack, tb[FRA_FLOWLABEL_MASK], 377 + "Invalid flow label mask"); 378 + return -EINVAL; 379 + } 380 + 381 + if (flowlabel & ~flowlabel_mask) { 382 + NL_SET_ERR_MSG(extack, "Flow label and mask do not match"); 383 + return -EINVAL; 384 + } 385 + 386 + rule6->flowlabel = flowlabel; 387 + rule6->flowlabel_mask = flowlabel_mask; 388 + 389 + return 0; 390 + } 391 + 368 392 static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb, 369 393 struct fib_rule_hdr *frh, 370 394 struct nlattr **tb, ··· 411 377 rule6->dscp = inet_dsfield_to_dscp(frh->tos); 412 378 413 379 if (tb[FRA_DSCP] && fib6_nl2rule_dscp(tb[FRA_DSCP], rule6, extack) < 0) 380 + goto errout; 381 + 382 + if ((tb[FRA_FLOWLABEL] || tb[FRA_FLOWLABEL_MASK]) && 383 + fib6_nl2rule_flowlabel(tb, rule6, extack) < 0) 414 384 goto errout; 415 385 416 386 if (rule->action == FR_ACT_TO_TBL && !rule->l3mdev) { ··· 482 444 return 0; 483 445 } 484 446 447 + if (tb[FRA_FLOWLABEL] && 448 + nla_get_be32(tb[FRA_FLOWLABEL]) != rule6->flowlabel) 449 + return 0; 450 + 451 + if (tb[FRA_FLOWLABEL_MASK] && 452 + nla_get_be32(tb[FRA_FLOWLABEL_MASK]) != rule6->flowlabel_mask) 453 + return 0; 454 + 485 455 if (frh->src_len && 486 456 nla_memcmp(tb[FRA_SRC], &rule6->src.addr, sizeof(struct in6_addr))) 487 457 return 0; ··· 518 472 frh->tos = inet_dscp_to_dsfield(rule6->dscp); 519 473 } 520 474 475 + if (rule6->flowlabel_mask && 476 + (nla_put_be32(skb, FRA_FLOWLABEL, rule6->flowlabel) || 477 + nla_put_be32(skb, FRA_FLOWLABEL_MASK, rule6->flowlabel_mask))) 478 + goto nla_put_failure; 479 + 521 480 if ((rule6->dst.plen && 522 481 nla_put_in6_addr(skb, FRA_DST, &rule6->dst.addr)) || 523 482 (rule6->src.plen && ··· 538 487 { 539 488 return nla_total_size(16) /* dst */ 540 489 + nla_total_size(16) /* src */ 541 - + nla_total_size(1); /* dscp */ 490 + + nla_total_size(1) /* dscp */ 491 + + nla_total_size(4) /* flowlabel */ 492 + + nla_total_size(4); /* flowlabel mask */ 542 493 } 543 494 544 495 static void fib6_rule_flush_cache(struct fib_rules_ops *ops)
+19 -1
net/ipv6/route.c
··· 5005 5005 [RTA_SPORT] = { .type = NLA_U16 }, 5006 5006 [RTA_DPORT] = { .type = NLA_U16 }, 5007 5007 [RTA_NH_ID] = { .type = NLA_U32 }, 5008 + [RTA_FLOWLABEL] = { .type = NLA_BE32 }, 5008 5009 }; 5009 5010 5010 5011 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, ··· 5028 5027 if (rtm->rtm_tos) { 5029 5028 NL_SET_ERR_MSG(extack, 5030 5029 "Invalid dsfield (tos): option not available for IPv6"); 5030 + goto errout; 5031 + } 5032 + 5033 + if (tb[RTA_FLOWLABEL]) { 5034 + NL_SET_ERR_MSG_ATTR(extack, tb[RTA_FLOWLABEL], 5035 + "Flow label cannot be specified for this operation"); 5031 5036 goto errout; 5032 5037 } 5033 5038 ··· 6020 6013 return -EINVAL; 6021 6014 } 6022 6015 6016 + if (tb[RTA_FLOWLABEL] && 6017 + (nla_get_be32(tb[RTA_FLOWLABEL]) & ~IPV6_FLOWLABEL_MASK)) { 6018 + NL_SET_ERR_MSG_ATTR(extack, tb[RTA_FLOWLABEL], 6019 + "Invalid flow label"); 6020 + return -EINVAL; 6021 + } 6022 + 6023 6023 for (i = 0; i <= RTA_MAX; i++) { 6024 6024 if (!tb[i]) 6025 6025 continue; ··· 6041 6027 case RTA_SPORT: 6042 6028 case RTA_DPORT: 6043 6029 case RTA_IP_PROTO: 6030 + case RTA_FLOWLABEL: 6044 6031 break; 6045 6032 default: 6046 6033 NL_SET_ERR_MSG_MOD(extack, "Unsupported attribute in get route request"); ··· 6064 6049 struct sk_buff *skb; 6065 6050 struct rtmsg *rtm; 6066 6051 struct flowi6 fl6 = {}; 6052 + __be32 flowlabel; 6067 6053 bool fibmatch; 6068 6054 6069 6055 err = inet6_rtm_valid_getroute_req(in_skb, nlh, tb, extack); ··· 6073 6057 6074 6058 err = -EINVAL; 6075 6059 rtm = nlmsg_data(nlh); 6076 - fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, 0); 6077 6060 fibmatch = !!(rtm->rtm_flags & RTM_F_FIB_MATCH); 6078 6061 6079 6062 if (tb[RTA_SRC]) { ··· 6117 6102 if (err) 6118 6103 goto errout; 6119 6104 } 6105 + 6106 + flowlabel = nla_get_be32_default(tb[RTA_FLOWLABEL], 0); 6107 + fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, flowlabel); 6120 6108 6121 6109 if (iif) { 6122 6110 struct net_device *dev;
+31
tools/testing/selftests/net/fib_rule_tests.sh
··· 291 291 "$getnomatch" "iif dscp redirect to table" \ 292 292 "iif dscp no redirect to table" 293 293 fi 294 + 295 + fib_check_iproute_support "flowlabel" "flowlabel" 296 + if [ $? -eq 0 ]; then 297 + match="flowlabel 0xfffff" 298 + getmatch="flowlabel 0xfffff" 299 + getnomatch="flowlabel 0xf" 300 + fib_rule6_test_match_n_redirect "$match" "$getmatch" \ 301 + "$getnomatch" "flowlabel redirect to table" \ 302 + "flowlabel no redirect to table" 303 + 304 + match="flowlabel 0xfffff" 305 + getmatch="from $SRC_IP6 iif $DEV flowlabel 0xfffff" 306 + getnomatch="from $SRC_IP6 iif $DEV flowlabel 0xf" 307 + fib_rule6_test_match_n_redirect "$match" "$getmatch" \ 308 + "$getnomatch" "iif flowlabel redirect to table" \ 309 + "iif flowlabel no redirect to table" 310 + 311 + match="flowlabel 0x08000/0x08000" 312 + getmatch="flowlabel 0xfffff" 313 + getnomatch="flowlabel 0xf7fff" 314 + fib_rule6_test_match_n_redirect "$match" "$getmatch" \ 315 + "$getnomatch" "flowlabel masked redirect to table" \ 316 + "flowlabel masked no redirect to table" 317 + 318 + match="flowlabel 0x08000/0x08000" 319 + getmatch="from $SRC_IP6 iif $DEV flowlabel 0xfffff" 320 + getnomatch="from $SRC_IP6 iif $DEV flowlabel 0xf7fff" 321 + fib_rule6_test_match_n_redirect "$match" "$getmatch" \ 322 + "$getnomatch" "iif flowlabel masked redirect to table" \ 323 + "iif flowlabel masked no redirect to table" 324 + fi 294 325 } 295 326 296 327 fib_rule6_vrf_test()