Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

net: lockless sock_i_ino()

Followup of commit c51da3f7a161 ("net: remove sock_i_uid()")

A recent syzbot report was the trigger for this change.

Over the years, we had many problems caused by the
read_lock[_bh](&sk->sk_callback_lock) in sock_i_uid().

We could fix smc_diag_dump_proto() or make a more radical move:

Instead of waiting for new syzbot reports, cache the socket
inode number in sk->sk_ino, so that we no longer
need to acquire sk->sk_callback_lock in sock_i_ino().

This makes socket dumps faster (one less cache line miss,
and two atomic ops avoided).

Prior art:

commit 25a9c8a4431c ("netlink: Add __sock_i_ino() for __netlink_diag_dump().")
commit 4f9bf2a2f5aa ("tcp: Don't acquire inet_listen_hashbucket::lock with disabled BH.")
commit efc3dbc37412 ("rds: Make rds_sock_lock BH rather than IRQ safe.")

Fixes: d2d6422f8bd1 ("x86: Allow to enable PREEMPT_RT.")
Reported-by: syzbot+50603c05bbdf4dfdaffa@syzkaller.appspotmail.com
Closes: https://lore.kernel.org/netdev/68b73804.050a0220.3db4df.01d8.GAE@google.com/T/#u
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Kuniyuki Iwashima <kuniyu@google.com>
Reviewed-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Link: https://patch.msgid.link/20250902183603.740428-1-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

authored by

Eric Dumazet and committed by
Jakub Kicinski
5d6b58c9 b4ada061

+14 -28
+13 -4
include/net/sock.h
··· 285 285 * @sk_ack_backlog: current listen backlog 286 286 * @sk_max_ack_backlog: listen backlog set in listen() 287 287 * @sk_uid: user id of owner 288 + * @sk_ino: inode number (zero if orphaned) 288 289 * @sk_prefer_busy_poll: prefer busypolling over softirq processing 289 290 * @sk_busy_poll_budget: napi processing budget when busypolling 290 291 * @sk_priority: %SO_PRIORITY setting ··· 519 518 u32 sk_ack_backlog; 520 519 u32 sk_max_ack_backlog; 521 520 kuid_t sk_uid; 521 + unsigned long sk_ino; 522 522 spinlock_t sk_peer_lock; 523 523 int sk_bind_phc; 524 524 struct pid *sk_peer_pid; ··· 2058 2056 static inline void sk_set_socket(struct sock *sk, struct socket *sock) 2059 2057 { 2060 2058 sk->sk_socket = sock; 2059 + if (sock) { 2060 + WRITE_ONCE(sk->sk_uid, SOCK_INODE(sock)->i_uid); 2061 + WRITE_ONCE(sk->sk_ino, SOCK_INODE(sock)->i_ino); 2062 + } 2061 2063 } 2062 2064 2063 2065 static inline wait_queue_head_t *sk_sleep(struct sock *sk) ··· 2083 2077 sk_set_socket(sk, NULL); 2084 2078 sk->sk_wq = NULL; 2085 2079 /* Note: sk_uid is unchanged. */ 2080 + WRITE_ONCE(sk->sk_ino, 0); 2086 2081 write_unlock_bh(&sk->sk_callback_lock); 2087 2082 } 2088 2083 ··· 2094 2087 rcu_assign_pointer(sk->sk_wq, &parent->wq); 2095 2088 parent->sk = sk; 2096 2089 sk_set_socket(sk, parent); 2097 - WRITE_ONCE(sk->sk_uid, SOCK_INODE(parent)->i_uid); 2098 2090 security_sock_graft(sk, parent); 2099 2091 write_unlock_bh(&sk->sk_callback_lock); 2092 + } 2093 + 2094 + static inline unsigned long sock_i_ino(const struct sock *sk) 2095 + { 2096 + /* Paired with WRITE_ONCE() in sock_graft() and sock_orphan() */ 2097 + return READ_ONCE(sk->sk_ino); 2100 2098 } 2101 2099 2102 2100 static inline kuid_t sk_uid(const struct sock *sk) ··· 2109 2097 /* Paired with WRITE_ONCE() in sockfs_setattr() */ 2110 2098 return READ_ONCE(sk->sk_uid); 2111 2099 } 2112 - 2113 - unsigned long __sock_i_ino(struct sock *sk); 2114 - unsigned long sock_i_ino(struct sock *sk); 2115 2100 2116 2101 static inline kuid_t sock_net_uid(const struct net *net, const struct sock *sk) 2117 2102 {
-22
net/core/sock.c
··· 2780 2780 EXPORT_SYMBOL(sock_pfree); 2781 2781 #endif /* CONFIG_INET */ 2782 2782 2783 - unsigned long __sock_i_ino(struct sock *sk) 2784 - { 2785 - unsigned long ino; 2786 - 2787 - read_lock(&sk->sk_callback_lock); 2788 - ino = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_ino : 0; 2789 - read_unlock(&sk->sk_callback_lock); 2790 - return ino; 2791 - } 2792 - EXPORT_SYMBOL(__sock_i_ino); 2793 - 2794 - unsigned long sock_i_ino(struct sock *sk) 2795 - { 2796 - unsigned long ino; 2797 - 2798 - local_bh_disable(); 2799 - ino = __sock_i_ino(sk); 2800 - local_bh_enable(); 2801 - return ino; 2802 - } 2803 - EXPORT_SYMBOL(sock_i_ino); 2804 - 2805 2783 /* 2806 2784 * Allocate a skb from the socket's send buffer. 2807 2785 */
-1
net/mptcp/protocol.c
··· 3554 3554 write_lock_bh(&sk->sk_callback_lock); 3555 3555 rcu_assign_pointer(sk->sk_wq, &parent->wq); 3556 3556 sk_set_socket(sk, parent); 3557 - WRITE_ONCE(sk->sk_uid, SOCK_INODE(parent)->i_uid); 3558 3557 write_unlock_bh(&sk->sk_callback_lock); 3559 3558 } 3560 3559
+1 -1
net/netlink/diag.c
··· 168 168 NETLINK_CB(cb->skb).portid, 169 169 cb->nlh->nlmsg_seq, 170 170 NLM_F_MULTI, 171 - __sock_i_ino(sk)) < 0) { 171 + sock_i_ino(sk)) < 0) { 172 172 ret = 1; 173 173 break; 174 174 }