Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

af_unix: Put pathname sockets in the global hash table.

Commit cf2f225e2653 ("af_unix: Put a socket into a per-netns hash table.")
accidentally broke user API for pathname sockets. A socket was able to
connect() to a pathname socket whose file was visible even if they were in
different network namespaces.

The commit puts all sockets into a per-netns hash table. As a result,
connect() to a pathname socket in a different netns fails to find it in the
caller's per-netns hash table and returns -ECONNREFUSED even when the task
can view the peer socket file.

We can reproduce this issue by:

Console A:

# python3
>>> from socket import *
>>> s = socket(AF_UNIX, SOCK_STREAM, 0)
>>> s.bind('test')
>>> s.listen(32)

Console B:

# ip netns add test
# ip netns exec test sh
# python3
>>> from socket import *
>>> s = socket(AF_UNIX, SOCK_STREAM, 0)
>>> s.connect('test')

Note when dumping sockets by sock_diag, procfs, and bpf_iter, they are
filtered only by netns. In other words, even if they are visible and
connect()able, all sockets in different netns are skipped while iterating
sockets. Thus, we need a fix only for finding a peer pathname socket.

This patch adds a global hash table for pathname sockets, links them with
sk_bind_node, and uses it in unix_find_socket_byinode(). By doing so, we
can keep sockets in per-netns hash tables and dump them easily.

Thanks to Sachin Sant and Leonard Crestez for reports, logs and a reproducer.

Fixes: cf2f225e2653 ("af_unix: Put a socket into a per-netns hash table.")
Reported-by: Sachin Sant <sachinp@linux.ibm.com>
Reported-by: Leonard Crestez <cdleonard@gmail.com>
Tested-by: Sachin Sant <sachinp@linux.ibm.com>
Tested-by: Nathan Chancellor <nathan@kernel.org>
Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Tested-by: Leonard Crestez <cdleonard@gmail.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>

authored by

Kuniyuki Iwashima and committed by
Paolo Abeni
51bae889 874bdbfe

+37 -10
+37 -10
net/unix/af_unix.c
··· 119 119 #include "scm.h" 120 120 121 121 static atomic_long_t unix_nr_socks; 122 + static struct hlist_head bsd_socket_buckets[UNIX_HASH_SIZE / 2]; 123 + static spinlock_t bsd_socket_locks[UNIX_HASH_SIZE / 2]; 122 124 123 125 /* SMP locking strategy: 124 126 * hash table is protected with spinlock. ··· 330 328 spin_unlock(&net->unx.table.locks[sk->sk_hash]); 331 329 } 332 330 331 + static void unix_insert_bsd_socket(struct sock *sk) 332 + { 333 + spin_lock(&bsd_socket_locks[sk->sk_hash]); 334 + sk_add_bind_node(sk, &bsd_socket_buckets[sk->sk_hash]); 335 + spin_unlock(&bsd_socket_locks[sk->sk_hash]); 336 + } 337 + 338 + static void unix_remove_bsd_socket(struct sock *sk) 339 + { 340 + if (!hlist_unhashed(&sk->sk_bind_node)) { 341 + spin_lock(&bsd_socket_locks[sk->sk_hash]); 342 + __sk_del_bind_node(sk); 343 + spin_unlock(&bsd_socket_locks[sk->sk_hash]); 344 + 345 + sk_node_init(&sk->sk_bind_node); 346 + } 347 + } 348 + 333 349 static struct sock *__unix_find_socket_byname(struct net *net, 334 350 struct sockaddr_un *sunname, 335 351 int len, unsigned int hash) ··· 378 358 return s; 379 359 } 380 360 381 - static struct sock *unix_find_socket_byinode(struct net *net, struct inode *i) 361 + static struct sock *unix_find_socket_byinode(struct inode *i) 382 362 { 383 363 unsigned int hash = unix_bsd_hash(i); 384 364 struct sock *s; 385 365 386 - spin_lock(&net->unx.table.locks[hash]); 387 - sk_for_each(s, &net->unx.table.buckets[hash]) { 366 + spin_lock(&bsd_socket_locks[hash]); 367 + sk_for_each_bound(s, &bsd_socket_buckets[hash]) { 388 368 struct dentry *dentry = unix_sk(s)->path.dentry; 389 369 390 370 if (dentry && d_backing_inode(dentry) == i) { 391 371 sock_hold(s); 392 - spin_unlock(&net->unx.table.locks[hash]); 372 + spin_unlock(&bsd_socket_locks[hash]); 393 373 return s; 394 374 } 395 375 } 396 - spin_unlock(&net->unx.table.locks[hash]); 376 + spin_unlock(&bsd_socket_locks[hash]); 397 377 return NULL; 398 378 } 399 379 ··· 597 577 int state; 598 578 599 579 unix_remove_socket(sock_net(sk), sk); 580 + unix_remove_bsd_socket(sk); 600 581 601 582 /* Clear state */ 602 583 unix_state_lock(sk); ··· 1009 988 return 0; 1010 989 } 1011 990 1012 - static struct sock *unix_find_bsd(struct net *net, struct sockaddr_un *sunaddr, 1013 - int addr_len, int type) 991 + static struct sock *unix_find_bsd(struct sockaddr_un *sunaddr, int addr_len, 992 + int type) 1014 993 { 1015 994 struct inode *inode; 1016 995 struct path path; ··· 1031 1010 if (!S_ISSOCK(inode->i_mode)) 1032 1011 goto path_put; 1033 1012 1034 - sk = unix_find_socket_byinode(net, inode); 1013 + sk = unix_find_socket_byinode(inode); 1035 1014 if (!sk) 1036 1015 goto path_put; 1037 1016 ··· 1079 1058 struct sock *sk; 1080 1059 1081 1060 if (sunaddr->sun_path[0]) 1082 - sk = unix_find_bsd(net, sunaddr, addr_len, type); 1061 + sk = unix_find_bsd(sunaddr, addr_len, type); 1083 1062 else 1084 1063 sk = unix_find_abstract(net, sunaddr, addr_len, type); 1085 1064 ··· 1200 1179 u->path.dentry = dget(dentry); 1201 1180 __unix_set_addr_hash(net, sk, addr, new_hash); 1202 1181 unix_table_double_unlock(net, old_hash, new_hash); 1182 + unix_insert_bsd_socket(sk); 1203 1183 mutex_unlock(&u->bindlock); 1204 1184 done_path_create(&parent, dentry); 1205 1185 return 0; ··· 3704 3682 3705 3683 static int __init af_unix_init(void) 3706 3684 { 3707 - int rc = -1; 3685 + int i, rc = -1; 3708 3686 3709 3687 BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof_field(struct sk_buff, cb)); 3688 + 3689 + for (i = 0; i < UNIX_HASH_SIZE / 2; i++) { 3690 + spin_lock_init(&bsd_socket_locks[i]); 3691 + INIT_HLIST_HEAD(&bsd_socket_buckets[i]); 3692 + } 3710 3693 3711 3694 rc = proto_register(&unix_dgram_proto, 1); 3712 3695 if (rc != 0) {