Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1/*
2 * net/dccp/proto.c
3 *
4 * An implementation of the DCCP protocol
5 * Arnaldo Carvalho de Melo <acme@conectiva.com.br>
6 *
7 * This program is free software; you can redistribute it and/or modify it
8 * under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11
12#include <linux/dccp.h>
13#include <linux/module.h>
14#include <linux/types.h>
15#include <linux/sched.h>
16#include <linux/kernel.h>
17#include <linux/skbuff.h>
18#include <linux/netdevice.h>
19#include <linux/in.h>
20#include <linux/if_arp.h>
21#include <linux/init.h>
22#include <linux/random.h>
23#include <net/checksum.h>
24
25#include <net/inet_sock.h>
26#include <net/sock.h>
27#include <net/xfrm.h>
28
29#include <asm/ioctls.h>
30#include <asm/semaphore.h>
31#include <linux/spinlock.h>
32#include <linux/timer.h>
33#include <linux/delay.h>
34#include <linux/poll.h>
35
36#include "ccid.h"
37#include "dccp.h"
38#include "feat.h"
39
40DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
41
42EXPORT_SYMBOL_GPL(dccp_statistics);
43
44atomic_t dccp_orphan_count = ATOMIC_INIT(0);
45
46EXPORT_SYMBOL_GPL(dccp_orphan_count);
47
48struct inet_hashinfo __cacheline_aligned dccp_hashinfo = {
49 .lhash_lock = RW_LOCK_UNLOCKED,
50 .lhash_users = ATOMIC_INIT(0),
51 .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(dccp_hashinfo.lhash_wait),
52};
53
54EXPORT_SYMBOL_GPL(dccp_hashinfo);
55
56/* the maximum queue length for tx in packets. 0 is no limit */
57int sysctl_dccp_tx_qlen __read_mostly = 5;
58
59void dccp_set_state(struct sock *sk, const int state)
60{
61 const int oldstate = sk->sk_state;
62
63 dccp_pr_debug("%s(%p) %s --> %s\n", dccp_role(sk), sk,
64 dccp_state_name(oldstate), dccp_state_name(state));
65 WARN_ON(state == oldstate);
66
67 switch (state) {
68 case DCCP_OPEN:
69 if (oldstate != DCCP_OPEN)
70 DCCP_INC_STATS(DCCP_MIB_CURRESTAB);
71 break;
72
73 case DCCP_CLOSED:
74 if (oldstate == DCCP_OPEN || oldstate == DCCP_ACTIVE_CLOSEREQ ||
75 oldstate == DCCP_CLOSING)
76 DCCP_INC_STATS(DCCP_MIB_ESTABRESETS);
77
78 sk->sk_prot->unhash(sk);
79 if (inet_csk(sk)->icsk_bind_hash != NULL &&
80 !(sk->sk_userlocks & SOCK_BINDPORT_LOCK))
81 inet_put_port(&dccp_hashinfo, sk);
82 /* fall through */
83 default:
84 if (oldstate == DCCP_OPEN)
85 DCCP_DEC_STATS(DCCP_MIB_CURRESTAB);
86 }
87
88 /* Change state AFTER socket is unhashed to avoid closed
89 * socket sitting in hash tables.
90 */
91 sk->sk_state = state;
92}
93
94EXPORT_SYMBOL_GPL(dccp_set_state);
95
96static void dccp_finish_passive_close(struct sock *sk)
97{
98 switch (sk->sk_state) {
99 case DCCP_PASSIVE_CLOSE:
100 /* Node (client or server) has received Close packet. */
101 dccp_send_reset(sk, DCCP_RESET_CODE_CLOSED);
102 dccp_set_state(sk, DCCP_CLOSED);
103 break;
104 case DCCP_PASSIVE_CLOSEREQ:
105 /*
106 * Client received CloseReq. We set the `active' flag so that
107 * dccp_send_close() retransmits the Close as per RFC 4340, 8.3.
108 */
109 dccp_send_close(sk, 1);
110 dccp_set_state(sk, DCCP_CLOSING);
111 }
112}
113
114void dccp_done(struct sock *sk)
115{
116 dccp_set_state(sk, DCCP_CLOSED);
117 dccp_clear_xmit_timers(sk);
118
119 sk->sk_shutdown = SHUTDOWN_MASK;
120
121 if (!sock_flag(sk, SOCK_DEAD))
122 sk->sk_state_change(sk);
123 else
124 inet_csk_destroy_sock(sk);
125}
126
127EXPORT_SYMBOL_GPL(dccp_done);
128
129const char *dccp_packet_name(const int type)
130{
131 static const char *dccp_packet_names[] = {
132 [DCCP_PKT_REQUEST] = "REQUEST",
133 [DCCP_PKT_RESPONSE] = "RESPONSE",
134 [DCCP_PKT_DATA] = "DATA",
135 [DCCP_PKT_ACK] = "ACK",
136 [DCCP_PKT_DATAACK] = "DATAACK",
137 [DCCP_PKT_CLOSEREQ] = "CLOSEREQ",
138 [DCCP_PKT_CLOSE] = "CLOSE",
139 [DCCP_PKT_RESET] = "RESET",
140 [DCCP_PKT_SYNC] = "SYNC",
141 [DCCP_PKT_SYNCACK] = "SYNCACK",
142 };
143
144 if (type >= DCCP_NR_PKT_TYPES)
145 return "INVALID";
146 else
147 return dccp_packet_names[type];
148}
149
150EXPORT_SYMBOL_GPL(dccp_packet_name);
151
152const char *dccp_state_name(const int state)
153{
154 static char *dccp_state_names[] = {
155 [DCCP_OPEN] = "OPEN",
156 [DCCP_REQUESTING] = "REQUESTING",
157 [DCCP_PARTOPEN] = "PARTOPEN",
158 [DCCP_LISTEN] = "LISTEN",
159 [DCCP_RESPOND] = "RESPOND",
160 [DCCP_CLOSING] = "CLOSING",
161 [DCCP_ACTIVE_CLOSEREQ] = "CLOSEREQ",
162 [DCCP_PASSIVE_CLOSE] = "PASSIVE_CLOSE",
163 [DCCP_PASSIVE_CLOSEREQ] = "PASSIVE_CLOSEREQ",
164 [DCCP_TIME_WAIT] = "TIME_WAIT",
165 [DCCP_CLOSED] = "CLOSED",
166 };
167
168 if (state >= DCCP_MAX_STATES)
169 return "INVALID STATE!";
170 else
171 return dccp_state_names[state];
172}
173
174EXPORT_SYMBOL_GPL(dccp_state_name);
175
176void dccp_hash(struct sock *sk)
177{
178 inet_hash(&dccp_hashinfo, sk);
179}
180
181EXPORT_SYMBOL_GPL(dccp_hash);
182
183void dccp_unhash(struct sock *sk)
184{
185 inet_unhash(&dccp_hashinfo, sk);
186}
187
188EXPORT_SYMBOL_GPL(dccp_unhash);
189
190int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
191{
192 struct dccp_sock *dp = dccp_sk(sk);
193 struct dccp_minisock *dmsk = dccp_msk(sk);
194 struct inet_connection_sock *icsk = inet_csk(sk);
195
196 dccp_minisock_init(&dp->dccps_minisock);
197
198 icsk->icsk_rto = DCCP_TIMEOUT_INIT;
199 icsk->icsk_syn_retries = sysctl_dccp_request_retries;
200 sk->sk_state = DCCP_CLOSED;
201 sk->sk_write_space = dccp_write_space;
202 icsk->icsk_sync_mss = dccp_sync_mss;
203 dp->dccps_mss_cache = 536;
204 dp->dccps_rate_last = jiffies;
205 dp->dccps_role = DCCP_ROLE_UNDEFINED;
206 dp->dccps_service = DCCP_SERVICE_CODE_IS_ABSENT;
207 dp->dccps_l_ack_ratio = dp->dccps_r_ack_ratio = 1;
208
209 dccp_init_xmit_timers(sk);
210
211 /*
212 * FIXME: We're hardcoding the CCID, and doing this at this point makes
213 * the listening (master) sock get CCID control blocks, which is not
214 * necessary, but for now, to not mess with the test userspace apps,
215 * lets leave it here, later the real solution is to do this in a
216 * setsockopt(CCIDs-I-want/accept). -acme
217 */
218 if (likely(ctl_sock_initialized)) {
219 int rc = dccp_feat_init(dmsk);
220
221 if (rc)
222 return rc;
223
224 if (dmsk->dccpms_send_ack_vector) {
225 dp->dccps_hc_rx_ackvec = dccp_ackvec_alloc(GFP_KERNEL);
226 if (dp->dccps_hc_rx_ackvec == NULL)
227 return -ENOMEM;
228 }
229 dp->dccps_hc_rx_ccid = ccid_hc_rx_new(dmsk->dccpms_rx_ccid,
230 sk, GFP_KERNEL);
231 dp->dccps_hc_tx_ccid = ccid_hc_tx_new(dmsk->dccpms_tx_ccid,
232 sk, GFP_KERNEL);
233 if (unlikely(dp->dccps_hc_rx_ccid == NULL ||
234 dp->dccps_hc_tx_ccid == NULL)) {
235 ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
236 ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
237 if (dmsk->dccpms_send_ack_vector) {
238 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
239 dp->dccps_hc_rx_ackvec = NULL;
240 }
241 dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
242 return -ENOMEM;
243 }
244 } else {
245 /* control socket doesn't need feat nego */
246 INIT_LIST_HEAD(&dmsk->dccpms_pending);
247 INIT_LIST_HEAD(&dmsk->dccpms_conf);
248 }
249
250 return 0;
251}
252
253EXPORT_SYMBOL_GPL(dccp_init_sock);
254
255int dccp_destroy_sock(struct sock *sk)
256{
257 struct dccp_sock *dp = dccp_sk(sk);
258 struct dccp_minisock *dmsk = dccp_msk(sk);
259
260 /*
261 * DCCP doesn't use sk_write_queue, just sk_send_head
262 * for retransmissions
263 */
264 if (sk->sk_send_head != NULL) {
265 kfree_skb(sk->sk_send_head);
266 sk->sk_send_head = NULL;
267 }
268
269 /* Clean up a referenced DCCP bind bucket. */
270 if (inet_csk(sk)->icsk_bind_hash != NULL)
271 inet_put_port(&dccp_hashinfo, sk);
272
273 kfree(dp->dccps_service_list);
274 dp->dccps_service_list = NULL;
275
276 if (dmsk->dccpms_send_ack_vector) {
277 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
278 dp->dccps_hc_rx_ackvec = NULL;
279 }
280 ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
281 ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
282 dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
283
284 /* clean up feature negotiation state */
285 dccp_feat_clean(dmsk);
286
287 return 0;
288}
289
290EXPORT_SYMBOL_GPL(dccp_destroy_sock);
291
292static inline int dccp_listen_start(struct sock *sk, int backlog)
293{
294 struct dccp_sock *dp = dccp_sk(sk);
295
296 dp->dccps_role = DCCP_ROLE_LISTEN;
297 return inet_csk_listen_start(sk, backlog);
298}
299
300static inline int dccp_need_reset(int state)
301{
302 return state != DCCP_CLOSED && state != DCCP_LISTEN &&
303 state != DCCP_REQUESTING;
304}
305
306int dccp_disconnect(struct sock *sk, int flags)
307{
308 struct inet_connection_sock *icsk = inet_csk(sk);
309 struct inet_sock *inet = inet_sk(sk);
310 int err = 0;
311 const int old_state = sk->sk_state;
312
313 if (old_state != DCCP_CLOSED)
314 dccp_set_state(sk, DCCP_CLOSED);
315
316 /*
317 * This corresponds to the ABORT function of RFC793, sec. 3.8
318 * TCP uses a RST segment, DCCP a Reset packet with Code 2, "Aborted".
319 */
320 if (old_state == DCCP_LISTEN) {
321 inet_csk_listen_stop(sk);
322 } else if (dccp_need_reset(old_state)) {
323 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
324 sk->sk_err = ECONNRESET;
325 } else if (old_state == DCCP_REQUESTING)
326 sk->sk_err = ECONNRESET;
327
328 dccp_clear_xmit_timers(sk);
329 __skb_queue_purge(&sk->sk_receive_queue);
330 if (sk->sk_send_head != NULL) {
331 __kfree_skb(sk->sk_send_head);
332 sk->sk_send_head = NULL;
333 }
334
335 inet->dport = 0;
336
337 if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
338 inet_reset_saddr(sk);
339
340 sk->sk_shutdown = 0;
341 sock_reset_flag(sk, SOCK_DONE);
342
343 icsk->icsk_backoff = 0;
344 inet_csk_delack_init(sk);
345 __sk_dst_reset(sk);
346
347 BUG_TRAP(!inet->num || icsk->icsk_bind_hash);
348
349 sk->sk_error_report(sk);
350 return err;
351}
352
353EXPORT_SYMBOL_GPL(dccp_disconnect);
354
355/*
356 * Wait for a DCCP event.
357 *
358 * Note that we don't need to lock the socket, as the upper poll layers
359 * take care of normal races (between the test and the event) and we don't
360 * go look at any of the socket buffers directly.
361 */
362unsigned int dccp_poll(struct file *file, struct socket *sock,
363 poll_table *wait)
364{
365 unsigned int mask;
366 struct sock *sk = sock->sk;
367
368 poll_wait(file, sk->sk_sleep, wait);
369 if (sk->sk_state == DCCP_LISTEN)
370 return inet_csk_listen_poll(sk);
371
372 /* Socket is not locked. We are protected from async events
373 by poll logic and correct handling of state changes
374 made by another threads is impossible in any case.
375 */
376
377 mask = 0;
378 if (sk->sk_err)
379 mask = POLLERR;
380
381 if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
382 mask |= POLLHUP;
383 if (sk->sk_shutdown & RCV_SHUTDOWN)
384 mask |= POLLIN | POLLRDNORM | POLLRDHUP;
385
386 /* Connected? */
387 if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
388 if (atomic_read(&sk->sk_rmem_alloc) > 0)
389 mask |= POLLIN | POLLRDNORM;
390
391 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
392 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
393 mask |= POLLOUT | POLLWRNORM;
394 } else { /* send SIGIO later */
395 set_bit(SOCK_ASYNC_NOSPACE,
396 &sk->sk_socket->flags);
397 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
398
399 /* Race breaker. If space is freed after
400 * wspace test but before the flags are set,
401 * IO signal will be lost.
402 */
403 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk))
404 mask |= POLLOUT | POLLWRNORM;
405 }
406 }
407 }
408 return mask;
409}
410
411EXPORT_SYMBOL_GPL(dccp_poll);
412
413int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
414{
415 int rc = -ENOTCONN;
416
417 lock_sock(sk);
418
419 if (sk->sk_state == DCCP_LISTEN)
420 goto out;
421
422 switch (cmd) {
423 case SIOCINQ: {
424 struct sk_buff *skb;
425 unsigned long amount = 0;
426
427 skb = skb_peek(&sk->sk_receive_queue);
428 if (skb != NULL) {
429 /*
430 * We will only return the amount of this packet since
431 * that is all that will be read.
432 */
433 amount = skb->len;
434 }
435 rc = put_user(amount, (int __user *)arg);
436 }
437 break;
438 default:
439 rc = -ENOIOCTLCMD;
440 break;
441 }
442out:
443 release_sock(sk);
444 return rc;
445}
446
447EXPORT_SYMBOL_GPL(dccp_ioctl);
448
449static int dccp_setsockopt_service(struct sock *sk, const __be32 service,
450 char __user *optval, int optlen)
451{
452 struct dccp_sock *dp = dccp_sk(sk);
453 struct dccp_service_list *sl = NULL;
454
455 if (service == DCCP_SERVICE_INVALID_VALUE ||
456 optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32))
457 return -EINVAL;
458
459 if (optlen > sizeof(service)) {
460 sl = kmalloc(optlen, GFP_KERNEL);
461 if (sl == NULL)
462 return -ENOMEM;
463
464 sl->dccpsl_nr = optlen / sizeof(u32) - 1;
465 if (copy_from_user(sl->dccpsl_list,
466 optval + sizeof(service),
467 optlen - sizeof(service)) ||
468 dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) {
469 kfree(sl);
470 return -EFAULT;
471 }
472 }
473
474 lock_sock(sk);
475 dp->dccps_service = service;
476
477 kfree(dp->dccps_service_list);
478
479 dp->dccps_service_list = sl;
480 release_sock(sk);
481 return 0;
482}
483
484/* byte 1 is feature. the rest is the preference list */
485static int dccp_setsockopt_change(struct sock *sk, int type,
486 struct dccp_so_feat __user *optval)
487{
488 struct dccp_so_feat opt;
489 u8 *val;
490 int rc;
491
492 if (copy_from_user(&opt, optval, sizeof(opt)))
493 return -EFAULT;
494
495 val = kmalloc(opt.dccpsf_len, GFP_KERNEL);
496 if (!val)
497 return -ENOMEM;
498
499 if (copy_from_user(val, opt.dccpsf_val, opt.dccpsf_len)) {
500 rc = -EFAULT;
501 goto out_free_val;
502 }
503
504 rc = dccp_feat_change(dccp_msk(sk), type, opt.dccpsf_feat,
505 val, opt.dccpsf_len, GFP_KERNEL);
506 if (rc)
507 goto out_free_val;
508
509out:
510 return rc;
511
512out_free_val:
513 kfree(val);
514 goto out;
515}
516
517static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
518 char __user *optval, int optlen)
519{
520 struct dccp_sock *dp = dccp_sk(sk);
521 int val, err = 0;
522
523 if (optlen < sizeof(int))
524 return -EINVAL;
525
526 if (get_user(val, (int __user *)optval))
527 return -EFAULT;
528
529 if (optname == DCCP_SOCKOPT_SERVICE)
530 return dccp_setsockopt_service(sk, val, optval, optlen);
531
532 lock_sock(sk);
533 switch (optname) {
534 case DCCP_SOCKOPT_PACKET_SIZE:
535 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
536 err = 0;
537 break;
538 case DCCP_SOCKOPT_CHANGE_L:
539 if (optlen != sizeof(struct dccp_so_feat))
540 err = -EINVAL;
541 else
542 err = dccp_setsockopt_change(sk, DCCPO_CHANGE_L,
543 (struct dccp_so_feat __user *)
544 optval);
545 break;
546 case DCCP_SOCKOPT_CHANGE_R:
547 if (optlen != sizeof(struct dccp_so_feat))
548 err = -EINVAL;
549 else
550 err = dccp_setsockopt_change(sk, DCCPO_CHANGE_R,
551 (struct dccp_so_feat __user *)
552 optval);
553 break;
554 case DCCP_SOCKOPT_SERVER_TIMEWAIT:
555 if (dp->dccps_role != DCCP_ROLE_SERVER)
556 err = -EOPNOTSUPP;
557 else
558 dp->dccps_server_timewait = (val != 0);
559 break;
560 case DCCP_SOCKOPT_SEND_CSCOV: /* sender side, RFC 4340, sec. 9.2 */
561 if (val < 0 || val > 15)
562 err = -EINVAL;
563 else
564 dp->dccps_pcslen = val;
565 break;
566 case DCCP_SOCKOPT_RECV_CSCOV: /* receiver side, RFC 4340 sec. 9.2.1 */
567 if (val < 0 || val > 15)
568 err = -EINVAL;
569 else {
570 dp->dccps_pcrlen = val;
571 /* FIXME: add feature negotiation,
572 * ChangeL(MinimumChecksumCoverage, val) */
573 }
574 break;
575 default:
576 err = -ENOPROTOOPT;
577 break;
578 }
579
580 release_sock(sk);
581 return err;
582}
583
584int dccp_setsockopt(struct sock *sk, int level, int optname,
585 char __user *optval, int optlen)
586{
587 if (level != SOL_DCCP)
588 return inet_csk(sk)->icsk_af_ops->setsockopt(sk, level,
589 optname, optval,
590 optlen);
591 return do_dccp_setsockopt(sk, level, optname, optval, optlen);
592}
593
594EXPORT_SYMBOL_GPL(dccp_setsockopt);
595
596#ifdef CONFIG_COMPAT
597int compat_dccp_setsockopt(struct sock *sk, int level, int optname,
598 char __user *optval, int optlen)
599{
600 if (level != SOL_DCCP)
601 return inet_csk_compat_setsockopt(sk, level, optname,
602 optval, optlen);
603 return do_dccp_setsockopt(sk, level, optname, optval, optlen);
604}
605
606EXPORT_SYMBOL_GPL(compat_dccp_setsockopt);
607#endif
608
609static int dccp_getsockopt_service(struct sock *sk, int len,
610 __be32 __user *optval,
611 int __user *optlen)
612{
613 const struct dccp_sock *dp = dccp_sk(sk);
614 const struct dccp_service_list *sl;
615 int err = -ENOENT, slen = 0, total_len = sizeof(u32);
616
617 lock_sock(sk);
618 if ((sl = dp->dccps_service_list) != NULL) {
619 slen = sl->dccpsl_nr * sizeof(u32);
620 total_len += slen;
621 }
622
623 err = -EINVAL;
624 if (total_len > len)
625 goto out;
626
627 err = 0;
628 if (put_user(total_len, optlen) ||
629 put_user(dp->dccps_service, optval) ||
630 (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen)))
631 err = -EFAULT;
632out:
633 release_sock(sk);
634 return err;
635}
636
637static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
638 char __user *optval, int __user *optlen)
639{
640 struct dccp_sock *dp;
641 int val, len;
642
643 if (get_user(len, optlen))
644 return -EFAULT;
645
646 if (len < (int)sizeof(int))
647 return -EINVAL;
648
649 dp = dccp_sk(sk);
650
651 switch (optname) {
652 case DCCP_SOCKOPT_PACKET_SIZE:
653 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
654 return 0;
655 case DCCP_SOCKOPT_SERVICE:
656 return dccp_getsockopt_service(sk, len,
657 (__be32 __user *)optval, optlen);
658 case DCCP_SOCKOPT_GET_CUR_MPS:
659 val = dp->dccps_mss_cache;
660 break;
661 case DCCP_SOCKOPT_SERVER_TIMEWAIT:
662 val = dp->dccps_server_timewait;
663 break;
664 case DCCP_SOCKOPT_SEND_CSCOV:
665 val = dp->dccps_pcslen;
666 break;
667 case DCCP_SOCKOPT_RECV_CSCOV:
668 val = dp->dccps_pcrlen;
669 break;
670 case 128 ... 191:
671 return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
672 len, (u32 __user *)optval, optlen);
673 case 192 ... 255:
674 return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname,
675 len, (u32 __user *)optval, optlen);
676 default:
677 return -ENOPROTOOPT;
678 }
679
680 len = sizeof(val);
681 if (put_user(len, optlen) || copy_to_user(optval, &val, len))
682 return -EFAULT;
683
684 return 0;
685}
686
687int dccp_getsockopt(struct sock *sk, int level, int optname,
688 char __user *optval, int __user *optlen)
689{
690 if (level != SOL_DCCP)
691 return inet_csk(sk)->icsk_af_ops->getsockopt(sk, level,
692 optname, optval,
693 optlen);
694 return do_dccp_getsockopt(sk, level, optname, optval, optlen);
695}
696
697EXPORT_SYMBOL_GPL(dccp_getsockopt);
698
699#ifdef CONFIG_COMPAT
700int compat_dccp_getsockopt(struct sock *sk, int level, int optname,
701 char __user *optval, int __user *optlen)
702{
703 if (level != SOL_DCCP)
704 return inet_csk_compat_getsockopt(sk, level, optname,
705 optval, optlen);
706 return do_dccp_getsockopt(sk, level, optname, optval, optlen);
707}
708
709EXPORT_SYMBOL_GPL(compat_dccp_getsockopt);
710#endif
711
712int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
713 size_t len)
714{
715 const struct dccp_sock *dp = dccp_sk(sk);
716 const int flags = msg->msg_flags;
717 const int noblock = flags & MSG_DONTWAIT;
718 struct sk_buff *skb;
719 int rc, size;
720 long timeo;
721
722 if (len > dp->dccps_mss_cache)
723 return -EMSGSIZE;
724
725 lock_sock(sk);
726
727 if (sysctl_dccp_tx_qlen &&
728 (sk->sk_write_queue.qlen >= sysctl_dccp_tx_qlen)) {
729 rc = -EAGAIN;
730 goto out_release;
731 }
732
733 timeo = sock_sndtimeo(sk, noblock);
734
735 /*
736 * We have to use sk_stream_wait_connect here to set sk_write_pending,
737 * so that the trick in dccp_rcv_request_sent_state_process.
738 */
739 /* Wait for a connection to finish. */
740 if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN))
741 if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0)
742 goto out_release;
743
744 size = sk->sk_prot->max_header + len;
745 release_sock(sk);
746 skb = sock_alloc_send_skb(sk, size, noblock, &rc);
747 lock_sock(sk);
748 if (skb == NULL)
749 goto out_release;
750
751 skb_reserve(skb, sk->sk_prot->max_header);
752 rc = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
753 if (rc != 0)
754 goto out_discard;
755
756 skb_queue_tail(&sk->sk_write_queue, skb);
757 dccp_write_xmit(sk,0);
758out_release:
759 release_sock(sk);
760 return rc ? : len;
761out_discard:
762 kfree_skb(skb);
763 goto out_release;
764}
765
766EXPORT_SYMBOL_GPL(dccp_sendmsg);
767
768int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
769 size_t len, int nonblock, int flags, int *addr_len)
770{
771 const struct dccp_hdr *dh;
772 long timeo;
773
774 lock_sock(sk);
775
776 if (sk->sk_state == DCCP_LISTEN) {
777 len = -ENOTCONN;
778 goto out;
779 }
780
781 timeo = sock_rcvtimeo(sk, nonblock);
782
783 do {
784 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
785
786 if (skb == NULL)
787 goto verify_sock_status;
788
789 dh = dccp_hdr(skb);
790
791 switch (dh->dccph_type) {
792 case DCCP_PKT_DATA:
793 case DCCP_PKT_DATAACK:
794 goto found_ok_skb;
795
796 case DCCP_PKT_CLOSE:
797 case DCCP_PKT_CLOSEREQ:
798 if (!(flags & MSG_PEEK))
799 dccp_finish_passive_close(sk);
800 /* fall through */
801 case DCCP_PKT_RESET:
802 dccp_pr_debug("found fin (%s) ok!\n",
803 dccp_packet_name(dh->dccph_type));
804 len = 0;
805 goto found_fin_ok;
806 default:
807 dccp_pr_debug("packet_type=%s\n",
808 dccp_packet_name(dh->dccph_type));
809 sk_eat_skb(sk, skb, 0);
810 }
811verify_sock_status:
812 if (sock_flag(sk, SOCK_DONE)) {
813 len = 0;
814 break;
815 }
816
817 if (sk->sk_err) {
818 len = sock_error(sk);
819 break;
820 }
821
822 if (sk->sk_shutdown & RCV_SHUTDOWN) {
823 len = 0;
824 break;
825 }
826
827 if (sk->sk_state == DCCP_CLOSED) {
828 if (!sock_flag(sk, SOCK_DONE)) {
829 /* This occurs when user tries to read
830 * from never connected socket.
831 */
832 len = -ENOTCONN;
833 break;
834 }
835 len = 0;
836 break;
837 }
838
839 if (!timeo) {
840 len = -EAGAIN;
841 break;
842 }
843
844 if (signal_pending(current)) {
845 len = sock_intr_errno(timeo);
846 break;
847 }
848
849 sk_wait_data(sk, &timeo);
850 continue;
851 found_ok_skb:
852 if (len > skb->len)
853 len = skb->len;
854 else if (len < skb->len)
855 msg->msg_flags |= MSG_TRUNC;
856
857 if (skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len)) {
858 /* Exception. Bailout! */
859 len = -EFAULT;
860 break;
861 }
862 found_fin_ok:
863 if (!(flags & MSG_PEEK))
864 sk_eat_skb(sk, skb, 0);
865 break;
866 } while (1);
867out:
868 release_sock(sk);
869 return len;
870}
871
872EXPORT_SYMBOL_GPL(dccp_recvmsg);
873
874int inet_dccp_listen(struct socket *sock, int backlog)
875{
876 struct sock *sk = sock->sk;
877 unsigned char old_state;
878 int err;
879
880 lock_sock(sk);
881
882 err = -EINVAL;
883 if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP)
884 goto out;
885
886 old_state = sk->sk_state;
887 if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
888 goto out;
889
890 /* Really, if the socket is already in listen state
891 * we can only allow the backlog to be adjusted.
892 */
893 if (old_state != DCCP_LISTEN) {
894 /*
895 * FIXME: here it probably should be sk->sk_prot->listen_start
896 * see tcp_listen_start
897 */
898 err = dccp_listen_start(sk, backlog);
899 if (err)
900 goto out;
901 }
902 sk->sk_max_ack_backlog = backlog;
903 err = 0;
904
905out:
906 release_sock(sk);
907 return err;
908}
909
910EXPORT_SYMBOL_GPL(inet_dccp_listen);
911
912static void dccp_terminate_connection(struct sock *sk)
913{
914 u8 next_state = DCCP_CLOSED;
915
916 switch (sk->sk_state) {
917 case DCCP_PASSIVE_CLOSE:
918 case DCCP_PASSIVE_CLOSEREQ:
919 dccp_finish_passive_close(sk);
920 break;
921 case DCCP_PARTOPEN:
922 dccp_pr_debug("Stop PARTOPEN timer (%p)\n", sk);
923 inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
924 /* fall through */
925 case DCCP_OPEN:
926 dccp_send_close(sk, 1);
927
928 if (dccp_sk(sk)->dccps_role == DCCP_ROLE_SERVER &&
929 !dccp_sk(sk)->dccps_server_timewait)
930 next_state = DCCP_ACTIVE_CLOSEREQ;
931 else
932 next_state = DCCP_CLOSING;
933 /* fall through */
934 default:
935 dccp_set_state(sk, next_state);
936 }
937}
938
939void dccp_close(struct sock *sk, long timeout)
940{
941 struct dccp_sock *dp = dccp_sk(sk);
942 struct sk_buff *skb;
943 u32 data_was_unread = 0;
944 int state;
945
946 lock_sock(sk);
947
948 sk->sk_shutdown = SHUTDOWN_MASK;
949
950 if (sk->sk_state == DCCP_LISTEN) {
951 dccp_set_state(sk, DCCP_CLOSED);
952
953 /* Special case. */
954 inet_csk_listen_stop(sk);
955
956 goto adjudge_to_death;
957 }
958
959 sk_stop_timer(sk, &dp->dccps_xmit_timer);
960
961 /*
962 * We need to flush the recv. buffs. We do this only on the
963 * descriptor close, not protocol-sourced closes, because the
964 *reader process may not have drained the data yet!
965 */
966 while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
967 data_was_unread += skb->len;
968 __kfree_skb(skb);
969 }
970
971 if (data_was_unread) {
972 /* Unread data was tossed, send an appropriate Reset Code */
973 DCCP_WARN("DCCP: ABORT -- %u bytes unread\n", data_was_unread);
974 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
975 dccp_set_state(sk, DCCP_CLOSED);
976 } else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
977 /* Check zero linger _after_ checking for unread data. */
978 sk->sk_prot->disconnect(sk, 0);
979 } else if (sk->sk_state != DCCP_CLOSED) {
980 dccp_terminate_connection(sk);
981 }
982
983 sk_stream_wait_close(sk, timeout);
984
985adjudge_to_death:
986 state = sk->sk_state;
987 sock_hold(sk);
988 sock_orphan(sk);
989 atomic_inc(sk->sk_prot->orphan_count);
990
991 /*
992 * It is the last release_sock in its life. It will remove backlog.
993 */
994 release_sock(sk);
995 /*
996 * Now socket is owned by kernel and we acquire BH lock
997 * to finish close. No need to check for user refs.
998 */
999 local_bh_disable();
1000 bh_lock_sock(sk);
1001 BUG_TRAP(!sock_owned_by_user(sk));
1002
1003 /* Have we already been destroyed by a softirq or backlog? */
1004 if (state != DCCP_CLOSED && sk->sk_state == DCCP_CLOSED)
1005 goto out;
1006
1007 if (sk->sk_state == DCCP_CLOSED)
1008 inet_csk_destroy_sock(sk);
1009
1010 /* Otherwise, socket is reprieved until protocol close. */
1011
1012out:
1013 bh_unlock_sock(sk);
1014 local_bh_enable();
1015 sock_put(sk);
1016}
1017
1018EXPORT_SYMBOL_GPL(dccp_close);
1019
1020void dccp_shutdown(struct sock *sk, int how)
1021{
1022 dccp_pr_debug("called shutdown(%x)\n", how);
1023}
1024
1025EXPORT_SYMBOL_GPL(dccp_shutdown);
1026
1027static int __init dccp_mib_init(void)
1028{
1029 int rc = -ENOMEM;
1030
1031 dccp_statistics[0] = alloc_percpu(struct dccp_mib);
1032 if (dccp_statistics[0] == NULL)
1033 goto out;
1034
1035 dccp_statistics[1] = alloc_percpu(struct dccp_mib);
1036 if (dccp_statistics[1] == NULL)
1037 goto out_free_one;
1038
1039 rc = 0;
1040out:
1041 return rc;
1042out_free_one:
1043 free_percpu(dccp_statistics[0]);
1044 dccp_statistics[0] = NULL;
1045 goto out;
1046
1047}
1048
1049static void dccp_mib_exit(void)
1050{
1051 free_percpu(dccp_statistics[0]);
1052 free_percpu(dccp_statistics[1]);
1053 dccp_statistics[0] = dccp_statistics[1] = NULL;
1054}
1055
1056static int thash_entries;
1057module_param(thash_entries, int, 0444);
1058MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
1059
1060#ifdef CONFIG_IP_DCCP_DEBUG
1061int dccp_debug;
1062module_param(dccp_debug, bool, 0444);
1063MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
1064
1065EXPORT_SYMBOL_GPL(dccp_debug);
1066#endif
1067
1068static int __init dccp_init(void)
1069{
1070 unsigned long goal;
1071 int ehash_order, bhash_order, i;
1072 int rc = -ENOBUFS;
1073
1074 dccp_hashinfo.bind_bucket_cachep =
1075 kmem_cache_create("dccp_bind_bucket",
1076 sizeof(struct inet_bind_bucket), 0,
1077 SLAB_HWCACHE_ALIGN, NULL);
1078 if (!dccp_hashinfo.bind_bucket_cachep)
1079 goto out;
1080
1081 /*
1082 * Size and allocate the main established and bind bucket
1083 * hash tables.
1084 *
1085 * The methodology is similar to that of the buffer cache.
1086 */
1087 if (num_physpages >= (128 * 1024))
1088 goal = num_physpages >> (21 - PAGE_SHIFT);
1089 else
1090 goal = num_physpages >> (23 - PAGE_SHIFT);
1091
1092 if (thash_entries)
1093 goal = (thash_entries *
1094 sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT;
1095 for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++)
1096 ;
1097 do {
1098 dccp_hashinfo.ehash_size = (1UL << ehash_order) * PAGE_SIZE /
1099 sizeof(struct inet_ehash_bucket);
1100 while (dccp_hashinfo.ehash_size &
1101 (dccp_hashinfo.ehash_size - 1))
1102 dccp_hashinfo.ehash_size--;
1103 dccp_hashinfo.ehash = (struct inet_ehash_bucket *)
1104 __get_free_pages(GFP_ATOMIC, ehash_order);
1105 } while (!dccp_hashinfo.ehash && --ehash_order > 0);
1106
1107 if (!dccp_hashinfo.ehash) {
1108 DCCP_CRIT("Failed to allocate DCCP established hash table");
1109 goto out_free_bind_bucket_cachep;
1110 }
1111
1112 for (i = 0; i < dccp_hashinfo.ehash_size; i++) {
1113 INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].chain);
1114 INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].twchain);
1115 }
1116
1117 if (inet_ehash_locks_alloc(&dccp_hashinfo))
1118 goto out_free_dccp_ehash;
1119
1120 bhash_order = ehash_order;
1121
1122 do {
1123 dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE /
1124 sizeof(struct inet_bind_hashbucket);
1125 if ((dccp_hashinfo.bhash_size > (64 * 1024)) &&
1126 bhash_order > 0)
1127 continue;
1128 dccp_hashinfo.bhash = (struct inet_bind_hashbucket *)
1129 __get_free_pages(GFP_ATOMIC, bhash_order);
1130 } while (!dccp_hashinfo.bhash && --bhash_order >= 0);
1131
1132 if (!dccp_hashinfo.bhash) {
1133 DCCP_CRIT("Failed to allocate DCCP bind hash table");
1134 goto out_free_dccp_locks;
1135 }
1136
1137 for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
1138 spin_lock_init(&dccp_hashinfo.bhash[i].lock);
1139 INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
1140 }
1141
1142 rc = dccp_mib_init();
1143 if (rc)
1144 goto out_free_dccp_bhash;
1145
1146 rc = dccp_ackvec_init();
1147 if (rc)
1148 goto out_free_dccp_mib;
1149
1150 rc = dccp_sysctl_init();
1151 if (rc)
1152 goto out_ackvec_exit;
1153
1154 dccp_timestamping_init();
1155out:
1156 return rc;
1157out_ackvec_exit:
1158 dccp_ackvec_exit();
1159out_free_dccp_mib:
1160 dccp_mib_exit();
1161out_free_dccp_bhash:
1162 free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
1163 dccp_hashinfo.bhash = NULL;
1164out_free_dccp_locks:
1165 inet_ehash_locks_free(&dccp_hashinfo);
1166out_free_dccp_ehash:
1167 free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
1168 dccp_hashinfo.ehash = NULL;
1169out_free_bind_bucket_cachep:
1170 kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1171 dccp_hashinfo.bind_bucket_cachep = NULL;
1172 goto out;
1173}
1174
1175static void __exit dccp_fini(void)
1176{
1177 dccp_mib_exit();
1178 free_pages((unsigned long)dccp_hashinfo.bhash,
1179 get_order(dccp_hashinfo.bhash_size *
1180 sizeof(struct inet_bind_hashbucket)));
1181 free_pages((unsigned long)dccp_hashinfo.ehash,
1182 get_order(dccp_hashinfo.ehash_size *
1183 sizeof(struct inet_ehash_bucket)));
1184 inet_ehash_locks_free(&dccp_hashinfo);
1185 kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1186 dccp_ackvec_exit();
1187 dccp_sysctl_exit();
1188}
1189
1190module_init(dccp_init);
1191module_exit(dccp_fini);
1192
1193MODULE_LICENSE("GPL");
1194MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
1195MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");