Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

sock: Move the socket inuse to namespace.

In some case, we want to know how many sockets are in use in
different _net_ namespaces. It's a key resource metric.

This patch add a member in struct netns_core. This is a counter
for socket-inuse in the _net_ namespace. The patch will add/sub
counter in the sk_alloc, sk_clone_lock and __sk_free.

This patch will not counter the socket created in kernel.
It's not very useful for userspace to know how many kernel
sockets we created.

The main reasons for doing this are that:

1. When linux calls the 'do_exit' for process to exit, the functions
'exit_task_namespaces' and 'exit_task_work' will be called sequentially.
'exit_task_namespaces' may have destroyed the _net_ namespace, but
'sock_release' called in 'exit_task_work' may use the _net_ namespace
if we counter the socket-inuse in sock_release.

2. socket and sock are in pair. More important, sock holds the _net_
namespace. We counter the socket-inuse in sock, for avoiding holding
_net_ namespace again in socket. It's a easy way to maintain the code.

Signed-off-by: Martin Zhang <zhangjunweimartin@didichuxing.com>
Signed-off-by: Tonghao Zhang <zhangtonghao@didichuxing.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Tonghao Zhang and committed by
David S. Miller
648845ab 08fc7f81

+51 -21
+3
include/net/netns/core.h
··· 11 11 12 12 int sysctl_somaxconn; 13 13 14 + #ifdef CONFIG_PROC_FS 15 + int __percpu *sock_inuse; 16 + #endif 14 17 struct prot_inuse __percpu *prot_inuse; 15 18 }; 16 19
+1
include/net/sock.h
··· 1262 1262 /* Called with local bh disabled */ 1263 1263 void sock_prot_inuse_add(struct net *net, struct proto *prot, int inc); 1264 1264 int sock_prot_inuse_get(struct net *net, struct proto *proto); 1265 + int sock_inuse_get(struct net *net); 1265 1266 #else 1266 1267 static inline void sock_prot_inuse_add(struct net *net, struct proto *prot, 1267 1268 int inc)
+45 -2
net/core/sock.c
··· 145 145 static DEFINE_MUTEX(proto_list_mutex); 146 146 static LIST_HEAD(proto_list); 147 147 148 + static void sock_inuse_add(struct net *net, int val); 149 + 148 150 /** 149 151 * sk_ns_capable - General socket capability test 150 152 * @sk: Socket to use a capability on or through ··· 1533 1531 sk->sk_kern_sock = kern; 1534 1532 sock_lock_init(sk); 1535 1533 sk->sk_net_refcnt = kern ? 0 : 1; 1536 - if (likely(sk->sk_net_refcnt)) 1534 + if (likely(sk->sk_net_refcnt)) { 1537 1535 get_net(net); 1536 + sock_inuse_add(net, 1); 1537 + } 1538 + 1538 1539 sock_net_set(sk, net); 1539 1540 refcount_set(&sk->sk_wmem_alloc, 1); 1540 1541 ··· 1600 1595 1601 1596 static void __sk_free(struct sock *sk) 1602 1597 { 1598 + if (likely(sk->sk_net_refcnt)) 1599 + sock_inuse_add(sock_net(sk), -1); 1600 + 1603 1601 if (unlikely(sock_diag_has_destroy_listeners(sk) && sk->sk_net_refcnt)) 1604 1602 sock_diag_broadcast_destroy(sk); 1605 1603 else ··· 1724 1716 newsk->sk_priority = 0; 1725 1717 newsk->sk_incoming_cpu = raw_smp_processor_id(); 1726 1718 atomic64_set(&newsk->sk_cookie, 0); 1719 + if (likely(newsk->sk_net_refcnt)) 1720 + sock_inuse_add(sock_net(newsk), 1); 1727 1721 1728 1722 /* 1729 1723 * Before updating sk_refcnt, we must commit prior changes to memory ··· 3071 3061 } 3072 3062 EXPORT_SYMBOL_GPL(sock_prot_inuse_get); 3073 3063 3064 + static void sock_inuse_add(struct net *net, int val) 3065 + { 3066 + this_cpu_add(*net->core.sock_inuse, val); 3067 + } 3068 + 3069 + int sock_inuse_get(struct net *net) 3070 + { 3071 + int cpu, res = 0; 3072 + 3073 + for_each_possible_cpu(cpu) 3074 + res += *per_cpu_ptr(net->core.sock_inuse, cpu); 3075 + 3076 + return res; 3077 + } 3078 + 3079 + EXPORT_SYMBOL_GPL(sock_inuse_get); 3080 + 3074 3081 static int __net_init sock_inuse_init_net(struct net *net) 3075 3082 { 3076 3083 net->core.prot_inuse = alloc_percpu(struct prot_inuse); 3077 - return net->core.prot_inuse ? 0 : -ENOMEM; 3084 + if (net->core.prot_inuse == NULL) 3085 + return -ENOMEM; 3086 + 3087 + net->core.sock_inuse = alloc_percpu(int); 3088 + if (net->core.sock_inuse == NULL) 3089 + goto out; 3090 + 3091 + return 0; 3092 + 3093 + out: 3094 + free_percpu(net->core.prot_inuse); 3095 + return -ENOMEM; 3078 3096 } 3079 3097 3080 3098 static void __net_exit sock_inuse_exit_net(struct net *net) 3081 3099 { 3082 3100 free_percpu(net->core.prot_inuse); 3101 + free_percpu(net->core.sock_inuse); 3083 3102 } 3084 3103 3085 3104 static struct pernet_operations net_inuse_ops = { ··· 3149 3110 } 3150 3111 3151 3112 static inline void release_proto_idx(struct proto *prot) 3113 + { 3114 + } 3115 + 3116 + static void sock_inuse_add(struct net *net, int val) 3152 3117 { 3153 3118 } 3154 3119 #endif
+2 -19
net/socket.c
··· 163 163 static const struct net_proto_family __rcu *net_families[NPROTO] __read_mostly; 164 164 165 165 /* 166 - * Statistics counters of the socket lists 167 - */ 168 - 169 - static DEFINE_PER_CPU(int, sockets_in_use); 170 - 171 - /* 172 166 * Support routines. 173 167 * Move socket addresses back and forth across the kernel/user 174 168 * divide and look after the messy bits. ··· 572 578 inode->i_gid = current_fsgid(); 573 579 inode->i_op = &sockfs_inode_ops; 574 580 575 - this_cpu_add(sockets_in_use, 1); 576 581 return sock; 577 582 } 578 583 EXPORT_SYMBOL(sock_alloc); ··· 598 605 if (rcu_dereference_protected(sock->wq, 1)->fasync_list) 599 606 pr_err("%s: fasync list not empty!\n", __func__); 600 607 601 - this_cpu_sub(sockets_in_use, 1); 602 608 if (!sock->file) { 603 609 iput(SOCK_INODE(sock)); 604 610 return; ··· 2614 2622 #ifdef CONFIG_PROC_FS 2615 2623 void socket_seq_show(struct seq_file *seq) 2616 2624 { 2617 - int cpu; 2618 - int counter = 0; 2619 - 2620 - for_each_possible_cpu(cpu) 2621 - counter += per_cpu(sockets_in_use, cpu); 2622 - 2623 - /* It can be negative, by the way. 8) */ 2624 - if (counter < 0) 2625 - counter = 0; 2626 - 2627 - seq_printf(seq, "sockets: used %d\n", counter); 2625 + seq_printf(seq, "sockets: used %d\n", 2626 + sock_inuse_get(seq->private)); 2628 2627 } 2629 2628 #endif /* CONFIG_PROC_FS */ 2630 2629