Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

net: dst: add four helpers to annotate data-races around dst->dev

dst->dev is read locklessly in many contexts,
and written in dst_dev_put().

Fixing all the races is going to need many changes.

We probably will have to add full RCU protection.

Add three helpers to ease this painful process.

static inline struct net_device *dst_dev(const struct dst_entry *dst)
{
return READ_ONCE(dst->dev);
}

static inline struct net_device *skb_dst_dev(const struct sk_buff *skb)
{
return dst_dev(skb_dst(skb));
}

static inline struct net *skb_dst_dev_net(const struct sk_buff *skb)
{
return dev_net(skb_dst_dev(skb));
}

static inline struct net *skb_dst_dev_net_rcu(const struct sk_buff *skb)
{
return dev_net_rcu(skb_dst_dev(skb));
}

Fixes: 4a6ce2b6f2ec ("net: introduce a new function dst_dev_put()")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Kuniyuki Iwashima <kuniyu@google.com>
Link: https://patch.msgid.link/20250630121934.3399505-7-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

authored by

Eric Dumazet and committed by
Jakub Kicinski
88fe1425 2dce8c52

+26 -6
+20
include/net/dst.h
··· 563 563 dst->ops->update_pmtu(dst, NULL, skb, mtu, false); 564 564 } 565 565 566 + static inline struct net_device *dst_dev(const struct dst_entry *dst) 567 + { 568 + return READ_ONCE(dst->dev); 569 + } 570 + 571 + static inline struct net_device *skb_dst_dev(const struct sk_buff *skb) 572 + { 573 + return dst_dev(skb_dst(skb)); 574 + } 575 + 576 + static inline struct net *skb_dst_dev_net(const struct sk_buff *skb) 577 + { 578 + return dev_net(skb_dst_dev(skb)); 579 + } 580 + 581 + static inline struct net *skb_dst_dev_net_rcu(const struct sk_buff *skb) 582 + { 583 + return dev_net_rcu(skb_dst_dev(skb)); 584 + } 585 + 566 586 struct dst_entry *dst_blackhole_check(struct dst_entry *dst, u32 cookie); 567 587 void dst_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk, 568 588 struct sk_buff *skb, u32 mtu, bool confirm_neigh);
+2 -2
net/core/dst.c
··· 150 150 dst->ops->ifdown(dst, dev); 151 151 WRITE_ONCE(dst->input, dst_discard); 152 152 WRITE_ONCE(dst->output, dst_discard_out); 153 - dst->dev = blackhole_netdev; 153 + WRITE_ONCE(dst->dev, blackhole_netdev); 154 154 netdev_ref_replace(dev, blackhole_netdev, &dst->dev_tracker, 155 155 GFP_ATOMIC); 156 156 } ··· 263 263 { 264 264 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU); 265 265 266 - return mtu ? : dst->dev->mtu; 266 + return mtu ? : dst_dev(dst)->mtu; 267 267 } 268 268 EXPORT_SYMBOL_GPL(dst_blackhole_mtu); 269 269
+4 -4
net/core/sock.c
··· 2588 2588 !ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr)); 2589 2589 #endif 2590 2590 /* pairs with the WRITE_ONCE() in netif_set_gso(_ipv4)_max_size() */ 2591 - max_size = is_ipv6 ? READ_ONCE(dst->dev->gso_max_size) : 2592 - READ_ONCE(dst->dev->gso_ipv4_max_size); 2591 + max_size = is_ipv6 ? READ_ONCE(dst_dev(dst)->gso_max_size) : 2592 + READ_ONCE(dst_dev(dst)->gso_ipv4_max_size); 2593 2593 if (max_size > GSO_LEGACY_MAX_SIZE && !sk_is_tcp(sk)) 2594 2594 max_size = GSO_LEGACY_MAX_SIZE; 2595 2595 ··· 2600 2600 { 2601 2601 u32 max_segs = 1; 2602 2602 2603 - sk->sk_route_caps = dst->dev->features; 2603 + sk->sk_route_caps = dst_dev(dst)->features; 2604 2604 if (sk_is_tcp(sk)) { 2605 2605 struct inet_connection_sock *icsk = inet_csk(sk); 2606 2606 ··· 2618 2618 sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM; 2619 2619 sk->sk_gso_max_size = sk_dst_gso_max_size(sk, dst); 2620 2620 /* pairs with the WRITE_ONCE() in netif_set_gso_max_segs() */ 2621 - max_segs = max_t(u32, READ_ONCE(dst->dev->gso_max_segs), 1); 2621 + max_segs = max_t(u32, READ_ONCE(dst_dev(dst)->gso_max_segs), 1); 2622 2622 } 2623 2623 } 2624 2624 sk->sk_gso_max_segs = max_segs;