Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'unmask-upper-dscp-bits-part-1'

Ido Schimmel says:

====================
Unmask upper DSCP bits - part 1

tl;dr - This patchset starts to unmask the upper DSCP bits in the IPv4
flow key in preparation for allowing IPv4 FIB rules to match on DSCP.
No functional changes are expected.

The TOS field in the IPv4 flow key ('flowi4_tos') is used during FIB
lookup to match against the TOS selector in FIB rules and routes.

It is currently impossible for user space to configure FIB rules that
match on the DSCP value as the upper DSCP bits are either masked in the
various call sites that initialize the IPv4 flow key or along the path
to the FIB core.

In preparation for adding a DSCP selector to IPv4 and IPv6 FIB rules, we
need to make sure the entire DSCP value is present in the IPv4 flow key.
This patchset starts to unmask the upper DSCP bits in the various places
that invoke the core FIB lookup functions directly (patches #1-#7) and
in the input route path (patches #8-#12). Future patchsets will do the
same in the output route path.

No functional changes are expected as commit 1fa3314c14c6 ("ipv4:
Centralize TOS matching") moved the masking of the upper DSCP bits to
the core where 'flowi4_tos' is matched against the TOS selector.
====================

Link: https://patch.msgid.link/20240821125251.1571445-1-idosch@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

+17 -12
+2 -1
net/core/filter.c
··· 84 84 #include <net/netkit.h> 85 85 #include <linux/un.h> 86 86 #include <net/xdp_sock_drv.h> 87 + #include <net/inet_dscp.h> 87 88 88 89 #include "dev.h" 89 90 ··· 5900 5899 fl4.flowi4_iif = params->ifindex; 5901 5900 fl4.flowi4_oif = 0; 5902 5901 } 5903 - fl4.flowi4_tos = params->tos & IPTOS_RT_MASK; 5902 + fl4.flowi4_tos = params->tos & INET_DSCP_MASK; 5904 5903 fl4.flowi4_scope = RT_SCOPE_UNIVERSE; 5905 5904 fl4.flowi4_flags = 0; 5906 5905
+2 -2
net/ipv4/fib_frontend.c
··· 293 293 .flowi4_iif = LOOPBACK_IFINDEX, 294 294 .flowi4_l3mdev = l3mdev_master_ifindex_rcu(dev), 295 295 .daddr = ip_hdr(skb)->saddr, 296 - .flowi4_tos = ip_hdr(skb)->tos & IPTOS_RT_MASK, 296 + .flowi4_tos = ip_hdr(skb)->tos & INET_DSCP_MASK, 297 297 .flowi4_scope = scope, 298 298 .flowi4_mark = vmark ? skb->mark : 0, 299 299 }; ··· 1343 1343 struct flowi4 fl4 = { 1344 1344 .flowi4_mark = frn->fl_mark, 1345 1345 .daddr = frn->fl_addr, 1346 - .flowi4_tos = frn->fl_tos & IPTOS_RT_MASK, 1346 + .flowi4_tos = frn->fl_tos & INET_DSCP_MASK, 1347 1347 .flowi4_scope = frn->fl_scope, 1348 1348 }; 1349 1349 struct fib_table *tb;
+1 -1
net/ipv4/icmp.c
··· 545 545 orefdst = skb_in->_skb_refdst; /* save old refdst */ 546 546 skb_dst_set(skb_in, NULL); 547 547 err = ip_route_input(skb_in, fl4_dec.daddr, fl4_dec.saddr, 548 - RT_TOS(tos), rt2->dst.dev); 548 + tos, rt2->dst.dev); 549 549 550 550 dst_release(&rt2->dst); 551 551 rt2 = skb_rtable(skb_in);
+2 -1
net/ipv4/ipmr.c
··· 62 62 #include <net/fib_rules.h> 63 63 #include <linux/netconf.h> 64 64 #include <net/rtnh.h> 65 + #include <net/inet_dscp.h> 65 66 66 67 #include <linux/nospec.h> 67 68 ··· 2081 2080 struct flowi4 fl4 = { 2082 2081 .daddr = iph->daddr, 2083 2082 .saddr = iph->saddr, 2084 - .flowi4_tos = RT_TOS(iph->tos), 2083 + .flowi4_tos = iph->tos & INET_DSCP_MASK, 2085 2084 .flowi4_oif = (rt_is_output_route(rt) ? 2086 2085 skb->dev->ifindex : 0), 2087 2086 .flowi4_iif = (rt_is_output_route(rt) ?
+2 -1
net/ipv4/netfilter/ipt_rpfilter.c
··· 8 8 #include <linux/module.h> 9 9 #include <linux/skbuff.h> 10 10 #include <linux/netdevice.h> 11 + #include <net/inet_dscp.h> 11 12 #include <linux/ip.h> 12 13 #include <net/ip.h> 13 14 #include <net/ip_fib.h> ··· 76 75 flow.daddr = iph->saddr; 77 76 flow.saddr = rpfilter_get_saddr(iph->daddr); 78 77 flow.flowi4_mark = info->flags & XT_RPFILTER_VALID_MARK ? skb->mark : 0; 79 - flow.flowi4_tos = iph->tos & IPTOS_RT_MASK; 78 + flow.flowi4_tos = iph->tos & INET_DSCP_MASK; 80 79 flow.flowi4_scope = RT_SCOPE_UNIVERSE; 81 80 flow.flowi4_l3mdev = l3mdev_master_ifindex_rcu(xt_in(par)); 82 81 flow.flowi4_uid = sock_net_uid(xt_net(par), NULL);
+2 -1
net/ipv4/netfilter/nft_fib_ipv4.c
··· 10 10 #include <net/netfilter/nf_tables.h> 11 11 #include <net/netfilter/nft_fib.h> 12 12 13 + #include <net/inet_dscp.h> 13 14 #include <net/ip_fib.h> 14 15 #include <net/route.h> 15 16 ··· 109 108 if (priv->flags & NFTA_FIB_F_MARK) 110 109 fl4.flowi4_mark = pkt->skb->mark; 111 110 112 - fl4.flowi4_tos = iph->tos & IPTOS_RT_MASK; 111 + fl4.flowi4_tos = iph->tos & INET_DSCP_MASK; 113 112 114 113 if (priv->flags & NFTA_FIB_F_DADDR) { 115 114 fl4.daddr = iph->daddr;
+4 -4
net/ipv4/route.c
··· 1263 1263 struct flowi4 fl4 = { 1264 1264 .daddr = iph->daddr, 1265 1265 .saddr = iph->saddr, 1266 - .flowi4_tos = iph->tos & IPTOS_RT_MASK, 1266 + .flowi4_tos = iph->tos & INET_DSCP_MASK, 1267 1267 .flowi4_oif = rt->dst.dev->ifindex, 1268 1268 .flowi4_iif = skb->dev->ifindex, 1269 1269 .flowi4_mark = skb->mark, ··· 2160 2160 if (rt->rt_type != RTN_LOCAL) 2161 2161 goto skip_validate_source; 2162 2162 2163 - tos &= IPTOS_RT_MASK; 2163 + tos &= INET_DSCP_MASK; 2164 2164 err = fib_validate_source(skb, saddr, daddr, tos, 0, dev, in_dev, &tag); 2165 2165 if (err < 0) 2166 2166 goto martian_source; ··· 2470 2470 struct fib_result res; 2471 2471 int err; 2472 2472 2473 - tos &= IPTOS_RT_MASK; 2473 + tos &= INET_DSCP_MASK; 2474 2474 rcu_read_lock(); 2475 2475 err = ip_route_input_rcu(skb, daddr, saddr, tos, dev, &res); 2476 2476 rcu_read_unlock(); ··· 3286 3286 skb->dev = dev; 3287 3287 skb->mark = mark; 3288 3288 err = ip_route_input_rcu(skb, dst, src, 3289 - rtm->rtm_tos & IPTOS_RT_MASK, dev, 3289 + rtm->rtm_tos & INET_DSCP_MASK, dev, 3290 3290 &res); 3291 3291 3292 3292 rt = skb_rtable(skb);
+2 -1
net/ipv4/udp.c
··· 115 115 #include <net/addrconf.h> 116 116 #include <net/udp_tunnel.h> 117 117 #include <net/gro.h> 118 + #include <net/inet_dscp.h> 118 119 #if IS_ENABLED(CONFIG_IPV6) 119 120 #include <net/ipv6_stubs.h> 120 121 #endif ··· 2619 2618 if (!inet_sk(sk)->inet_daddr && in_dev) 2620 2619 return ip_mc_validate_source(skb, iph->daddr, 2621 2620 iph->saddr, 2622 - iph->tos & IPTOS_RT_MASK, 2621 + iph->tos & INET_DSCP_MASK, 2623 2622 skb->dev, in_dev, &itag); 2624 2623 } 2625 2624 return 0;