Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

net: avoid extra access to sk->sk_wmem_alloc in sock_wfree()

UDP TX packets destructor is sock_wfree().

It suffers from a cache line bouncing in sock_def_write_space_wfree().

Instead of reading sk->sk_wmem_alloc after we just did an atomic RMW
on it, use __refcount_sub_and_test() to get the old value for free,
and pass the new value to sock_def_write_space_wfree().

Add __sock_writeable() helper.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Kuniyuki Iwashima <kuniyu@google.com>
Link: https://patch.msgid.link/20251017133712.2842665-1-edumazet@google.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>

authored by

Eric Dumazet and committed by
Paolo Abeni
3ff9bcec d1d7998d

+13 -7
+5 -1
include/net/sock.h
··· 2607 2607 2608 2608 bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag); 2609 2609 2610 + static inline bool __sock_writeable(const struct sock *sk, int wmem_alloc) 2611 + { 2612 + return wmem_alloc < (READ_ONCE(sk->sk_sndbuf) >> 1); 2613 + } 2610 2614 /* 2611 2615 * Default write policy as shown to user space via poll/select/SIGIO 2612 2616 */ 2613 2617 static inline bool sock_writeable(const struct sock *sk) 2614 2618 { 2615 - return refcount_read(&sk->sk_wmem_alloc) < (READ_ONCE(sk->sk_sndbuf) >> 1); 2619 + return __sock_writeable(sk, refcount_read(&sk->sk_wmem_alloc)); 2616 2620 } 2617 2621 2618 2622 static inline gfp_t gfp_any(void)
+8 -6
net/core/sock.c
··· 155 155 static DEFINE_MUTEX(proto_list_mutex); 156 156 static LIST_HEAD(proto_list); 157 157 158 - static void sock_def_write_space_wfree(struct sock *sk); 158 + static void sock_def_write_space_wfree(struct sock *sk, int wmem_alloc); 159 159 static void sock_def_write_space(struct sock *sk); 160 160 161 161 /** ··· 2659 2659 */ 2660 2660 void sock_wfree(struct sk_buff *skb) 2661 2661 { 2662 - struct sock *sk = skb->sk; 2663 2662 unsigned int len = skb->truesize; 2663 + struct sock *sk = skb->sk; 2664 2664 bool free; 2665 + int old; 2665 2666 2666 2667 if (!sock_flag(sk, SOCK_USE_WRITE_QUEUE)) { 2667 2668 if (sock_flag(sk, SOCK_RCU_FREE) && 2668 2669 sk->sk_write_space == sock_def_write_space) { 2669 2670 rcu_read_lock(); 2670 - free = refcount_sub_and_test(len, &sk->sk_wmem_alloc); 2671 - sock_def_write_space_wfree(sk); 2671 + free = __refcount_sub_and_test(len, &sk->sk_wmem_alloc, 2672 + &old); 2673 + sock_def_write_space_wfree(sk, old - len); 2672 2674 rcu_read_unlock(); 2673 2675 if (unlikely(free)) 2674 2676 __sk_free(sk); ··· 3614 3612 * for SOCK_RCU_FREE sockets under RCU read section and after putting 3615 3613 * ->sk_wmem_alloc. 3616 3614 */ 3617 - static void sock_def_write_space_wfree(struct sock *sk) 3615 + static void sock_def_write_space_wfree(struct sock *sk, int wmem_alloc) 3618 3616 { 3619 3617 /* Do not wake up a writer until he can make "significant" 3620 3618 * progress. --DaveM 3621 3619 */ 3622 - if (sock_writeable(sk)) { 3620 + if (__sock_writeable(sk, wmem_alloc)) { 3623 3621 struct socket_wq *wq = rcu_dereference(sk->sk_wq); 3624 3622 3625 3623 /* rely on refcount_sub from sock_wfree() */