Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1// SPDX-License-Identifier: GPL-2.0-or-later
2/*
3 * tcp_diag.c Module for monitoring TCP transport protocols sockets.
4 *
5 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
6 */
7
8#include <linux/module.h>
9#include <linux/net.h>
10#include <linux/sock_diag.h>
11#include <linux/inet_diag.h>
12
13#include <linux/tcp.h>
14
15#include <net/inet_hashtables.h>
16#include <net/inet6_hashtables.h>
17#include <net/inet_timewait_sock.h>
18#include <net/netlink.h>
19#include <net/tcp.h>
20
21static void tcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
22 void *_info)
23{
24 struct tcp_info *info = _info;
25
26 if (inet_sk_state_load(sk) == TCP_LISTEN) {
27 r->idiag_rqueue = READ_ONCE(sk->sk_ack_backlog);
28 r->idiag_wqueue = READ_ONCE(sk->sk_max_ack_backlog);
29 } else if (sk->sk_type == SOCK_STREAM) {
30 const struct tcp_sock *tp = tcp_sk(sk);
31
32 r->idiag_rqueue = max_t(int, READ_ONCE(tp->rcv_nxt) -
33 READ_ONCE(tp->copied_seq), 0);
34 r->idiag_wqueue = READ_ONCE(tp->write_seq) - tp->snd_una;
35 }
36 if (info)
37 tcp_get_info(sk, info);
38}
39
40#ifdef CONFIG_TCP_MD5SIG
41static void tcp_diag_md5sig_fill(struct tcp_diag_md5sig *info,
42 const struct tcp_md5sig_key *key)
43{
44 info->tcpm_family = key->family;
45 info->tcpm_prefixlen = key->prefixlen;
46 info->tcpm_keylen = key->keylen;
47 memcpy(info->tcpm_key, key->key, key->keylen);
48
49 if (key->family == AF_INET)
50 info->tcpm_addr[0] = key->addr.a4.s_addr;
51 #if IS_ENABLED(CONFIG_IPV6)
52 else if (key->family == AF_INET6)
53 memcpy(&info->tcpm_addr, &key->addr.a6,
54 sizeof(info->tcpm_addr));
55 #endif
56}
57
58static int tcp_diag_put_md5sig(struct sk_buff *skb,
59 const struct tcp_md5sig_info *md5sig)
60{
61 const struct tcp_md5sig_key *key;
62 struct tcp_diag_md5sig *info;
63 struct nlattr *attr;
64 int md5sig_count = 0;
65
66 hlist_for_each_entry_rcu(key, &md5sig->head, node)
67 md5sig_count++;
68 if (md5sig_count == 0)
69 return 0;
70
71 attr = nla_reserve(skb, INET_DIAG_MD5SIG,
72 md5sig_count * sizeof(struct tcp_diag_md5sig));
73 if (!attr)
74 return -EMSGSIZE;
75
76 info = nla_data(attr);
77 memset(info, 0, md5sig_count * sizeof(struct tcp_diag_md5sig));
78 hlist_for_each_entry_rcu(key, &md5sig->head, node) {
79 tcp_diag_md5sig_fill(info++, key);
80 if (--md5sig_count == 0)
81 break;
82 }
83
84 return 0;
85}
86#endif
87
88static int tcp_diag_put_ulp(struct sk_buff *skb, struct sock *sk,
89 const struct tcp_ulp_ops *ulp_ops, bool net_admin)
90{
91 struct nlattr *nest;
92 int err;
93
94 nest = nla_nest_start_noflag(skb, INET_DIAG_ULP_INFO);
95 if (!nest)
96 return -EMSGSIZE;
97
98 err = nla_put_string(skb, INET_ULP_INFO_NAME, ulp_ops->name);
99 if (err)
100 goto nla_failure;
101
102 if (ulp_ops->get_info)
103 err = ulp_ops->get_info(sk, skb, net_admin);
104 if (err)
105 goto nla_failure;
106
107 nla_nest_end(skb, nest);
108 return 0;
109
110nla_failure:
111 nla_nest_cancel(skb, nest);
112 return err;
113}
114
115static int tcp_diag_get_aux(struct sock *sk, bool net_admin,
116 struct sk_buff *skb)
117{
118 struct inet_connection_sock *icsk = inet_csk(sk);
119 const struct tcp_ulp_ops *ulp_ops;
120 int err = 0;
121
122#ifdef CONFIG_TCP_MD5SIG
123 if (net_admin) {
124 struct tcp_md5sig_info *md5sig;
125
126 rcu_read_lock();
127 md5sig = rcu_dereference(tcp_sk(sk)->md5sig_info);
128 if (md5sig)
129 err = tcp_diag_put_md5sig(skb, md5sig);
130 rcu_read_unlock();
131 if (err < 0)
132 return err;
133 }
134#endif
135
136 ulp_ops = icsk->icsk_ulp_ops;
137 if (ulp_ops) {
138 err = tcp_diag_put_ulp(skb, sk, ulp_ops, net_admin);
139 if (err < 0)
140 return err;
141 }
142
143 return 0;
144}
145
146static size_t tcp_diag_get_aux_size(struct sock *sk, bool net_admin)
147{
148 struct inet_connection_sock *icsk = inet_csk(sk);
149 size_t size = 0;
150
151#ifdef CONFIG_TCP_MD5SIG
152 if (net_admin && sk_fullsock(sk)) {
153 const struct tcp_md5sig_info *md5sig;
154 const struct tcp_md5sig_key *key;
155 size_t md5sig_count = 0;
156
157 rcu_read_lock();
158 md5sig = rcu_dereference(tcp_sk(sk)->md5sig_info);
159 if (md5sig) {
160 hlist_for_each_entry_rcu(key, &md5sig->head, node)
161 md5sig_count++;
162 }
163 rcu_read_unlock();
164 size += nla_total_size(md5sig_count *
165 sizeof(struct tcp_diag_md5sig));
166 }
167#endif
168
169 if (sk_fullsock(sk)) {
170 const struct tcp_ulp_ops *ulp_ops;
171
172 ulp_ops = icsk->icsk_ulp_ops;
173 if (ulp_ops) {
174 size += nla_total_size(0) +
175 nla_total_size(TCP_ULP_NAME_MAX);
176 if (ulp_ops->get_info_size)
177 size += ulp_ops->get_info_size(sk, net_admin);
178 }
179 }
180
181 return size
182 + nla_total_size(sizeof(struct tcp_info))
183 + nla_total_size(sizeof(struct inet_diag_msg))
184 + inet_diag_msg_attrs_size()
185 + nla_total_size(sizeof(struct inet_diag_meminfo))
186 + nla_total_size(SK_MEMINFO_VARS * sizeof(u32))
187 + nla_total_size(TCP_CA_NAME_MAX)
188 + nla_total_size(sizeof(struct tcpvegas_info))
189 + 64;
190}
191
192static int tcp_twsk_diag_fill(struct sock *sk,
193 struct sk_buff *skb,
194 struct netlink_callback *cb,
195 u16 nlmsg_flags, bool net_admin)
196{
197 struct inet_timewait_sock *tw = inet_twsk(sk);
198 struct inet_diag_msg *r;
199 struct nlmsghdr *nlh;
200 long tmo;
201
202 nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid,
203 cb->nlh->nlmsg_seq, cb->nlh->nlmsg_type,
204 sizeof(*r), nlmsg_flags);
205 if (!nlh)
206 return -EMSGSIZE;
207
208 r = nlmsg_data(nlh);
209 DEBUG_NET_WARN_ON_ONCE(tw->tw_state != TCP_TIME_WAIT);
210
211 inet_diag_msg_common_fill(r, sk);
212 r->idiag_retrans = 0;
213
214 r->idiag_state = READ_ONCE(tw->tw_substate);
215 r->idiag_timer = 3;
216 tmo = tw->tw_timer.expires - jiffies;
217 r->idiag_expires = jiffies_delta_to_msecs(tmo);
218 r->idiag_rqueue = 0;
219 r->idiag_wqueue = 0;
220 r->idiag_uid = 0;
221 r->idiag_inode = 0;
222
223 if (net_admin && nla_put_u32(skb, INET_DIAG_MARK,
224 tw->tw_mark)) {
225 nlmsg_cancel(skb, nlh);
226 return -EMSGSIZE;
227 }
228
229 nlmsg_end(skb, nlh);
230 return 0;
231}
232
233static int tcp_req_diag_fill(struct sock *sk, struct sk_buff *skb,
234 struct netlink_callback *cb,
235 u16 nlmsg_flags, bool net_admin)
236{
237 struct request_sock *reqsk = inet_reqsk(sk);
238 struct inet_diag_msg *r;
239 struct nlmsghdr *nlh;
240 long tmo;
241
242 nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
243 cb->nlh->nlmsg_type, sizeof(*r), nlmsg_flags);
244 if (!nlh)
245 return -EMSGSIZE;
246
247 r = nlmsg_data(nlh);
248 inet_diag_msg_common_fill(r, sk);
249 r->idiag_state = TCP_SYN_RECV;
250 r->idiag_timer = 1;
251 r->idiag_retrans = READ_ONCE(reqsk->num_retrans);
252
253 BUILD_BUG_ON(offsetof(struct inet_request_sock, ir_cookie) !=
254 offsetof(struct sock, sk_cookie));
255
256 tmo = READ_ONCE(inet_reqsk(sk)->rsk_timer.expires) - jiffies;
257 r->idiag_expires = jiffies_delta_to_msecs(tmo);
258 r->idiag_rqueue = 0;
259 r->idiag_wqueue = 0;
260 r->idiag_uid = 0;
261 r->idiag_inode = 0;
262
263 if (net_admin && nla_put_u32(skb, INET_DIAG_MARK,
264 inet_rsk(reqsk)->ir_mark)) {
265 nlmsg_cancel(skb, nlh);
266 return -EMSGSIZE;
267 }
268
269 nlmsg_end(skb, nlh);
270 return 0;
271}
272
273static int sk_diag_fill(struct sock *sk, struct sk_buff *skb,
274 struct netlink_callback *cb,
275 const struct inet_diag_req_v2 *r,
276 u16 nlmsg_flags, bool net_admin)
277{
278 if (sk->sk_state == TCP_TIME_WAIT)
279 return tcp_twsk_diag_fill(sk, skb, cb, nlmsg_flags, net_admin);
280
281 if (sk->sk_state == TCP_NEW_SYN_RECV)
282 return tcp_req_diag_fill(sk, skb, cb, nlmsg_flags, net_admin);
283
284 return inet_sk_diag_fill(sk, inet_csk(sk), skb, cb, r, nlmsg_flags,
285 net_admin);
286}
287
288static void twsk_build_assert(void)
289{
290 BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_family) !=
291 offsetof(struct sock, sk_family));
292
293 BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_num) !=
294 offsetof(struct inet_sock, inet_num));
295
296 BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_dport) !=
297 offsetof(struct inet_sock, inet_dport));
298
299 BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_rcv_saddr) !=
300 offsetof(struct inet_sock, inet_rcv_saddr));
301
302 BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_daddr) !=
303 offsetof(struct inet_sock, inet_daddr));
304
305#if IS_ENABLED(CONFIG_IPV6)
306 BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_v6_rcv_saddr) !=
307 offsetof(struct sock, sk_v6_rcv_saddr));
308
309 BUILD_BUG_ON(offsetof(struct inet_timewait_sock, tw_v6_daddr) !=
310 offsetof(struct sock, sk_v6_daddr));
311#endif
312}
313
314static void tcp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
315 const struct inet_diag_req_v2 *r)
316{
317 bool net_admin = netlink_net_capable(cb->skb, CAP_NET_ADMIN);
318 struct inet_diag_dump_data *cb_data = cb->data;
319 struct net *net = sock_net(skb->sk);
320 u32 idiag_states = r->idiag_states;
321 struct inet_hashinfo *hashinfo;
322 int i, num, s_i, s_num;
323 struct sock *sk;
324
325 hashinfo = net->ipv4.tcp_death_row.hashinfo;
326 if (idiag_states & TCPF_SYN_RECV)
327 idiag_states |= TCPF_NEW_SYN_RECV;
328 s_i = cb->args[1];
329 s_num = num = cb->args[2];
330
331 if (cb->args[0] == 0) {
332 if (!(idiag_states & TCPF_LISTEN) || r->id.idiag_dport)
333 goto skip_listen_ht;
334
335 for (i = s_i; i <= hashinfo->lhash2_mask; i++) {
336 struct inet_listen_hashbucket *ilb;
337 struct hlist_nulls_node *node;
338
339 num = 0;
340 ilb = &hashinfo->lhash2[i];
341
342 if (hlist_nulls_empty(&ilb->nulls_head)) {
343 s_num = 0;
344 continue;
345 }
346 spin_lock(&ilb->lock);
347 sk_nulls_for_each(sk, node, &ilb->nulls_head) {
348 struct inet_sock *inet = inet_sk(sk);
349
350 if (!net_eq(sock_net(sk), net))
351 continue;
352
353 if (num < s_num) {
354 num++;
355 continue;
356 }
357
358 if (r->sdiag_family != AF_UNSPEC &&
359 sk->sk_family != r->sdiag_family)
360 goto next_listen;
361
362 if (r->id.idiag_sport != inet->inet_sport &&
363 r->id.idiag_sport)
364 goto next_listen;
365
366 if (!inet_diag_bc_sk(cb_data, sk))
367 goto next_listen;
368
369 if (inet_sk_diag_fill(sk, inet_csk(sk), skb,
370 cb, r, NLM_F_MULTI,
371 net_admin) < 0) {
372 spin_unlock(&ilb->lock);
373 goto done;
374 }
375
376next_listen:
377 ++num;
378 }
379 spin_unlock(&ilb->lock);
380
381 s_num = 0;
382 }
383skip_listen_ht:
384 cb->args[0] = 1;
385 s_i = num = s_num = 0;
386 }
387
388/* Process a maximum of SKARR_SZ sockets at a time when walking hash buckets
389 * with bh disabled.
390 */
391#define SKARR_SZ 16
392
393 /* Dump bound but inactive (not listening, connecting, etc.) sockets */
394 if (cb->args[0] == 1) {
395 if (!(idiag_states & TCPF_BOUND_INACTIVE))
396 goto skip_bind_ht;
397
398 for (i = s_i; i < hashinfo->bhash_size; i++) {
399 struct inet_bind_hashbucket *ibb;
400 struct inet_bind2_bucket *tb2;
401 struct sock *sk_arr[SKARR_SZ];
402 int num_arr[SKARR_SZ];
403 int idx, accum, res;
404
405resume_bind_walk:
406 num = 0;
407 accum = 0;
408 ibb = &hashinfo->bhash2[i];
409
410 if (hlist_empty(&ibb->chain)) {
411 s_num = 0;
412 continue;
413 }
414 spin_lock_bh(&ibb->lock);
415 inet_bind_bucket_for_each(tb2, &ibb->chain) {
416 if (!net_eq(ib2_net(tb2), net))
417 continue;
418
419 sk_for_each_bound(sk, &tb2->owners) {
420 struct inet_sock *inet = inet_sk(sk);
421
422 if (num < s_num)
423 goto next_bind;
424
425 if (sk->sk_state != TCP_CLOSE ||
426 !inet->inet_num)
427 goto next_bind;
428
429 if (r->sdiag_family != AF_UNSPEC &&
430 r->sdiag_family != sk->sk_family)
431 goto next_bind;
432
433 if (!inet_diag_bc_sk(cb_data, sk))
434 goto next_bind;
435
436 sock_hold(sk);
437 num_arr[accum] = num;
438 sk_arr[accum] = sk;
439 if (++accum == SKARR_SZ)
440 goto pause_bind_walk;
441next_bind:
442 num++;
443 }
444 }
445pause_bind_walk:
446 spin_unlock_bh(&ibb->lock);
447
448 res = 0;
449 for (idx = 0; idx < accum; idx++) {
450 if (res >= 0) {
451 res = inet_sk_diag_fill(sk_arr[idx],
452 NULL, skb, cb,
453 r, NLM_F_MULTI,
454 net_admin);
455 if (res < 0)
456 num = num_arr[idx];
457 }
458 sock_put(sk_arr[idx]);
459 }
460 if (res < 0)
461 goto done;
462
463 cond_resched();
464
465 if (accum == SKARR_SZ) {
466 s_num = num + 1;
467 goto resume_bind_walk;
468 }
469
470 s_num = 0;
471 }
472skip_bind_ht:
473 cb->args[0] = 2;
474 s_i = num = s_num = 0;
475 }
476
477 if (!(idiag_states & ~TCPF_LISTEN))
478 goto out;
479
480 for (i = s_i; i <= hashinfo->ehash_mask; i++) {
481 struct inet_ehash_bucket *head = &hashinfo->ehash[i];
482 spinlock_t *lock = inet_ehash_lockp(hashinfo, i);
483 struct hlist_nulls_node *node;
484 struct sock *sk_arr[SKARR_SZ];
485 int num_arr[SKARR_SZ];
486 int idx, accum, res;
487
488 if (hlist_nulls_empty(&head->chain))
489 continue;
490
491 if (i > s_i)
492 s_num = 0;
493
494next_chunk:
495 num = 0;
496 accum = 0;
497 spin_lock_bh(lock);
498 sk_nulls_for_each(sk, node, &head->chain) {
499 int state;
500
501 if (!net_eq(sock_net(sk), net))
502 continue;
503 if (num < s_num)
504 goto next_normal;
505 state = (sk->sk_state == TCP_TIME_WAIT) ?
506 READ_ONCE(inet_twsk(sk)->tw_substate) : sk->sk_state;
507 if (!(idiag_states & (1 << state)))
508 goto next_normal;
509 if (r->sdiag_family != AF_UNSPEC &&
510 sk->sk_family != r->sdiag_family)
511 goto next_normal;
512 if (r->id.idiag_sport != htons(sk->sk_num) &&
513 r->id.idiag_sport)
514 goto next_normal;
515 if (r->id.idiag_dport != sk->sk_dport &&
516 r->id.idiag_dport)
517 goto next_normal;
518 twsk_build_assert();
519
520 if (!inet_diag_bc_sk(cb_data, sk))
521 goto next_normal;
522
523 if (!refcount_inc_not_zero(&sk->sk_refcnt))
524 goto next_normal;
525
526 num_arr[accum] = num;
527 sk_arr[accum] = sk;
528 if (++accum == SKARR_SZ)
529 break;
530next_normal:
531 ++num;
532 }
533 spin_unlock_bh(lock);
534
535 res = 0;
536 for (idx = 0; idx < accum; idx++) {
537 if (res >= 0) {
538 res = sk_diag_fill(sk_arr[idx], skb, cb, r,
539 NLM_F_MULTI, net_admin);
540 if (res < 0)
541 num = num_arr[idx];
542 }
543 sock_gen_put(sk_arr[idx]);
544 }
545 if (res < 0)
546 break;
547
548 cond_resched();
549
550 if (accum == SKARR_SZ) {
551 s_num = num + 1;
552 goto next_chunk;
553 }
554 }
555
556done:
557 cb->args[1] = i;
558 cb->args[2] = num;
559out:
560 ;
561}
562
563static struct sock *tcp_diag_find_one_icsk(struct net *net,
564 const struct inet_diag_req_v2 *req)
565{
566 struct sock *sk;
567
568 rcu_read_lock();
569 if (req->sdiag_family == AF_INET) {
570 sk = inet_lookup(net, NULL, 0, req->id.idiag_dst[0],
571 req->id.idiag_dport, req->id.idiag_src[0],
572 req->id.idiag_sport, req->id.idiag_if);
573#if IS_ENABLED(CONFIG_IPV6)
574 } else if (req->sdiag_family == AF_INET6) {
575 if (ipv6_addr_v4mapped((struct in6_addr *)req->id.idiag_dst) &&
576 ipv6_addr_v4mapped((struct in6_addr *)req->id.idiag_src))
577 sk = inet_lookup(net, NULL, 0, req->id.idiag_dst[3],
578 req->id.idiag_dport, req->id.idiag_src[3],
579 req->id.idiag_sport, req->id.idiag_if);
580 else
581 sk = inet6_lookup(net, NULL, 0,
582 (struct in6_addr *)req->id.idiag_dst,
583 req->id.idiag_dport,
584 (struct in6_addr *)req->id.idiag_src,
585 req->id.idiag_sport,
586 req->id.idiag_if);
587#endif
588 } else {
589 rcu_read_unlock();
590 return ERR_PTR(-EINVAL);
591 }
592 rcu_read_unlock();
593 if (!sk)
594 return ERR_PTR(-ENOENT);
595
596 if (sock_diag_check_cookie(sk, req->id.idiag_cookie)) {
597 sock_gen_put(sk);
598 return ERR_PTR(-ENOENT);
599 }
600
601 return sk;
602}
603
604static int tcp_diag_dump_one(struct netlink_callback *cb,
605 const struct inet_diag_req_v2 *req)
606{
607 struct sk_buff *in_skb = cb->skb;
608 struct sk_buff *rep;
609 struct sock *sk;
610 struct net *net;
611 bool net_admin;
612 int err;
613
614 net = sock_net(in_skb->sk);
615 sk = tcp_diag_find_one_icsk(net, req);
616 if (IS_ERR(sk))
617 return PTR_ERR(sk);
618
619 net_admin = netlink_net_capable(in_skb, CAP_NET_ADMIN);
620 rep = nlmsg_new(tcp_diag_get_aux_size(sk, net_admin), GFP_KERNEL);
621 if (!rep) {
622 err = -ENOMEM;
623 goto out;
624 }
625
626 err = sk_diag_fill(sk, rep, cb, req, 0, net_admin);
627 if (err < 0) {
628 WARN_ON(err == -EMSGSIZE);
629 nlmsg_free(rep);
630 goto out;
631 }
632 err = nlmsg_unicast(net->diag_nlsk, rep, NETLINK_CB(in_skb).portid);
633
634out:
635 if (sk)
636 sock_gen_put(sk);
637
638 return err;
639}
640
641#ifdef CONFIG_INET_DIAG_DESTROY
642static int tcp_diag_destroy(struct sk_buff *in_skb,
643 const struct inet_diag_req_v2 *req)
644{
645 struct net *net = sock_net(in_skb->sk);
646 struct sock *sk;
647 int err;
648
649 sk = tcp_diag_find_one_icsk(net, req);
650 if (IS_ERR(sk))
651 return PTR_ERR(sk);
652
653 err = sock_diag_destroy(sk, ECONNABORTED);
654
655 sock_gen_put(sk);
656
657 return err;
658}
659#endif
660
661static const struct inet_diag_handler tcp_diag_handler = {
662 .owner = THIS_MODULE,
663 .dump = tcp_diag_dump,
664 .dump_one = tcp_diag_dump_one,
665 .idiag_get_info = tcp_diag_get_info,
666 .idiag_get_aux = tcp_diag_get_aux,
667 .idiag_type = IPPROTO_TCP,
668 .idiag_info_size = sizeof(struct tcp_info),
669#ifdef CONFIG_INET_DIAG_DESTROY
670 .destroy = tcp_diag_destroy,
671#endif
672};
673
674static int __init tcp_diag_init(void)
675{
676 return inet_diag_register(&tcp_diag_handler);
677}
678
679static void __exit tcp_diag_exit(void)
680{
681 inet_diag_unregister(&tcp_diag_handler);
682}
683
684module_init(tcp_diag_init);
685module_exit(tcp_diag_exit);
686MODULE_LICENSE("GPL");
687MODULE_DESCRIPTION("TCP socket monitoring via SOCK_DIAG");
688MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 2-6 /* AF_INET - IPPROTO_TCP */);