Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

[DCCP]: Initial implementation

Development to this point was done on a subversion repository at:

http://oops.ghostprotocols.net:81/cgi-bin/viewcvs.cgi/dccp-2.6/

This repository will be kept at this site for the foreseable future,
so that interested parties can see the history of this code,
attributions, etc.

If I ever decide to take this offline I'll provide the full history at
some other suitable place.

Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Arnaldo Carvalho de Melo and committed by
David S. Miller
7c657876 c4365c92

+7746
+432
include/linux/dccp.h
··· 1 + #ifndef _LINUX_DCCP_H 2 + #define _LINUX_DCCP_H 3 + 4 + #include <linux/in.h> 5 + #include <linux/list.h> 6 + #include <linux/types.h> 7 + #include <linux/uio.h> 8 + #include <linux/workqueue.h> 9 + 10 + #include <net/inet_connection_sock.h> 11 + #include <net/sock.h> 12 + #include <net/tcp_states.h> 13 + #include <net/tcp.h> 14 + 15 + /* FIXME: this is utterly wrong */ 16 + struct sockaddr_dccp { 17 + struct sockaddr_in in; 18 + unsigned int service; 19 + }; 20 + 21 + enum dccp_state { 22 + DCCP_OPEN = TCP_ESTABLISHED, 23 + DCCP_REQUESTING = TCP_SYN_SENT, 24 + DCCP_PARTOPEN = TCP_FIN_WAIT1, /* FIXME: 25 + This mapping is horrible, but TCP has 26 + no matching state for DCCP_PARTOPEN, 27 + as TCP_SYN_RECV is already used by 28 + DCCP_RESPOND, why don't stop using TCP 29 + mapping of states? OK, now we don't use 30 + sk_stream_sendmsg anymore, so doesn't 31 + seem to exist any reason for us to 32 + do the TCP mapping here */ 33 + DCCP_LISTEN = TCP_LISTEN, 34 + DCCP_RESPOND = TCP_SYN_RECV, 35 + DCCP_CLOSING = TCP_CLOSING, 36 + DCCP_TIME_WAIT = TCP_TIME_WAIT, 37 + DCCP_CLOSED = TCP_CLOSE, 38 + DCCP_MAX_STATES = TCP_MAX_STATES, 39 + }; 40 + 41 + #define DCCP_STATE_MASK 0xf 42 + #define DCCP_ACTION_FIN (1<<7) 43 + 44 + enum { 45 + DCCPF_OPEN = TCPF_ESTABLISHED, 46 + DCCPF_REQUESTING = TCPF_SYN_SENT, 47 + DCCPF_PARTOPEN = TCPF_FIN_WAIT1, 48 + DCCPF_LISTEN = TCPF_LISTEN, 49 + DCCPF_RESPOND = TCPF_SYN_RECV, 50 + DCCPF_CLOSING = TCPF_CLOSING, 51 + DCCPF_TIME_WAIT = TCPF_TIME_WAIT, 52 + DCCPF_CLOSED = TCPF_CLOSE, 53 + }; 54 + 55 + /** 56 + * struct dccp_hdr - generic part of DCCP packet header 57 + * 58 + * @dccph_sport - Relevant port on the endpoint that sent this packet 59 + * @dccph_dport - Relevant port on the other endpoint 60 + * @dccph_doff - Data Offset from the start of the DCCP header, in 32-bit words 61 + * @dccph_ccval - Used by the HC-Sender CCID 62 + * @dccph_cscov - Parts of the packet that are covered by the Checksum field 63 + * @dccph_checksum - Internet checksum, depends on dccph_cscov 64 + * @dccph_x - 0 = 24 bit sequence number, 1 = 48 65 + * @dccph_type - packet type, see DCCP_PKT_ prefixed macros 66 + * @dccph_seq - sequence number high or low order 24 bits, depends on dccph_x 67 + */ 68 + struct dccp_hdr { 69 + __u16 dccph_sport, 70 + dccph_dport; 71 + __u8 dccph_doff; 72 + #if defined(__LITTLE_ENDIAN_BITFIELD) 73 + __u8 dccph_cscov:4, 74 + dccph_ccval:4; 75 + #elif defined(__BIG_ENDIAN_BITFIELD) 76 + __u8 dccph_ccval:4, 77 + dccph_cscov:4; 78 + #else 79 + #error "Adjust your <asm/byteorder.h> defines" 80 + #endif 81 + __u16 dccph_checksum; 82 + #if defined(__LITTLE_ENDIAN_BITFIELD) 83 + __u32 dccph_x:1, 84 + dccph_type:4, 85 + dccph_reserved:3, 86 + dccph_seq:24; 87 + #elif defined(__BIG_ENDIAN_BITFIELD) 88 + __u32 dccph_reserved:3, 89 + dccph_type:4, 90 + dccph_x:1, 91 + dccph_seq:24; 92 + #else 93 + #error "Adjust your <asm/byteorder.h> defines" 94 + #endif 95 + }; 96 + 97 + static inline struct dccp_hdr *dccp_hdr(const struct sk_buff *skb) 98 + { 99 + return (struct dccp_hdr *)skb->h.raw; 100 + } 101 + 102 + /** 103 + * struct dccp_hdr_ext - the low bits of a 48 bit seq packet 104 + * 105 + * @dccph_seq_low - low 24 bits of a 48 bit seq packet 106 + */ 107 + struct dccp_hdr_ext { 108 + __u32 dccph_seq_low; 109 + }; 110 + 111 + static inline struct dccp_hdr_ext *dccp_hdrx(const struct sk_buff *skb) 112 + { 113 + return (struct dccp_hdr_ext *)(skb->h.raw + sizeof(struct dccp_hdr)); 114 + } 115 + 116 + static inline unsigned int dccp_basic_hdr_len(const struct sk_buff *skb) 117 + { 118 + const struct dccp_hdr *dh = dccp_hdr(skb); 119 + return sizeof(*dh) + (dh->dccph_x ? sizeof(struct dccp_hdr_ext) : 0); 120 + } 121 + 122 + static inline __u64 dccp_hdr_seq(const struct sk_buff *skb) 123 + { 124 + const struct dccp_hdr *dh = dccp_hdr(skb); 125 + #if defined(__LITTLE_ENDIAN_BITFIELD) 126 + __u64 seq_nr = ntohl(dh->dccph_seq << 8); 127 + #elif defined(__BIG_ENDIAN_BITFIELD) 128 + __u64 seq_nr = ntohl(dh->dccph_seq); 129 + #else 130 + #error "Adjust your <asm/byteorder.h> defines" 131 + #endif 132 + 133 + if (dh->dccph_x != 0) 134 + seq_nr = (seq_nr << 32) + ntohl(dccp_hdrx(skb)->dccph_seq_low); 135 + 136 + return seq_nr; 137 + } 138 + 139 + /** 140 + * struct dccp_hdr_request - Conection initiation request header 141 + * 142 + * @dccph_req_service - Service to which the client app wants to connect 143 + * @dccph_req_options - list of options (must be a multiple of 32 bits 144 + */ 145 + struct dccp_hdr_request { 146 + __u32 dccph_req_service; 147 + }; 148 + 149 + static inline struct dccp_hdr_request *dccp_hdr_request(struct sk_buff *skb) 150 + { 151 + return (struct dccp_hdr_request *)(skb->h.raw + dccp_basic_hdr_len(skb)); 152 + } 153 + 154 + /** 155 + * struct dccp_hdr_ack_bits - acknowledgment bits common to most packets 156 + * 157 + * @dccph_resp_ack_nr_high - 48 bit ack number high order bits, contains GSR 158 + * @dccph_resp_ack_nr_low - 48 bit ack number low order bits, contains GSR 159 + */ 160 + struct dccp_hdr_ack_bits { 161 + __u32 dccph_reserved1:8, 162 + dccph_ack_nr_high:24; 163 + __u32 dccph_ack_nr_low; 164 + }; 165 + 166 + static inline struct dccp_hdr_ack_bits *dccp_hdr_ack_bits(const struct sk_buff *skb) 167 + { 168 + return (struct dccp_hdr_ack_bits *)(skb->h.raw + dccp_basic_hdr_len(skb)); 169 + } 170 + 171 + static inline u64 dccp_hdr_ack_seq(const struct sk_buff *skb) 172 + { 173 + const struct dccp_hdr_ack_bits *dhack = dccp_hdr_ack_bits(skb); 174 + #if defined(__LITTLE_ENDIAN_BITFIELD) 175 + return (((u64)ntohl(dhack->dccph_ack_nr_high << 8)) << 32) + ntohl(dhack->dccph_ack_nr_low); 176 + #elif defined(__BIG_ENDIAN_BITFIELD) 177 + return (((u64)ntohl(dhack->dccph_ack_nr_high)) << 32) + ntohl(dhack->dccph_ack_nr_low); 178 + #else 179 + #error "Adjust your <asm/byteorder.h> defines" 180 + #endif 181 + } 182 + 183 + /** 184 + * struct dccp_hdr_response - Conection initiation response header 185 + * 186 + * @dccph_resp_ack_nr_high - 48 bit ack number high order bits, contains GSR 187 + * @dccph_resp_ack_nr_low - 48 bit ack number low order bits, contains GSR 188 + * @dccph_resp_service - Echoes the Service Code on a received DCCP-Request 189 + * @dccph_resp_options - list of options (must be a multiple of 32 bits 190 + */ 191 + struct dccp_hdr_response { 192 + struct dccp_hdr_ack_bits dccph_resp_ack; 193 + __u32 dccph_resp_service; 194 + }; 195 + 196 + static inline struct dccp_hdr_response *dccp_hdr_response(struct sk_buff *skb) 197 + { 198 + return (struct dccp_hdr_response *)(skb->h.raw + dccp_basic_hdr_len(skb)); 199 + } 200 + 201 + /** 202 + * struct dccp_hdr_reset - Unconditionally shut down a connection 203 + * 204 + * @dccph_reset_service - Echoes the Service Code on a received DCCP-Request 205 + * @dccph_reset_options - list of options (must be a multiple of 32 bits 206 + */ 207 + struct dccp_hdr_reset { 208 + struct dccp_hdr_ack_bits dccph_reset_ack; 209 + __u8 dccph_reset_code, 210 + dccph_reset_data[3]; 211 + }; 212 + 213 + static inline struct dccp_hdr_reset *dccp_hdr_reset(struct sk_buff *skb) 214 + { 215 + return (struct dccp_hdr_reset *)(skb->h.raw + dccp_basic_hdr_len(skb)); 216 + } 217 + 218 + enum dccp_pkt_type { 219 + DCCP_PKT_REQUEST = 0, 220 + DCCP_PKT_RESPONSE, 221 + DCCP_PKT_DATA, 222 + DCCP_PKT_ACK, 223 + DCCP_PKT_DATAACK, 224 + DCCP_PKT_CLOSEREQ, 225 + DCCP_PKT_CLOSE, 226 + DCCP_PKT_RESET, 227 + DCCP_PKT_SYNC, 228 + DCCP_PKT_SYNCACK, 229 + DCCP_PKT_INVALID, 230 + }; 231 + 232 + #define DCCP_NR_PKT_TYPES DCCP_PKT_INVALID 233 + 234 + static inline unsigned int dccp_packet_hdr_len(const __u8 type) 235 + { 236 + if (type == DCCP_PKT_DATA) 237 + return 0; 238 + if (type == DCCP_PKT_DATAACK || 239 + type == DCCP_PKT_ACK || 240 + type == DCCP_PKT_SYNC || 241 + type == DCCP_PKT_SYNCACK || 242 + type == DCCP_PKT_CLOSE || 243 + type == DCCP_PKT_CLOSEREQ) 244 + return sizeof(struct dccp_hdr_ack_bits); 245 + if (type == DCCP_PKT_REQUEST) 246 + return sizeof(struct dccp_hdr_request); 247 + if (type == DCCP_PKT_RESPONSE) 248 + return sizeof(struct dccp_hdr_response); 249 + return sizeof(struct dccp_hdr_reset); 250 + } 251 + 252 + static inline unsigned int dccp_hdr_len(const struct sk_buff *skb) 253 + { 254 + return dccp_basic_hdr_len(skb) + 255 + dccp_packet_hdr_len(dccp_hdr(skb)->dccph_type); 256 + } 257 + 258 + enum dccp_reset_codes { 259 + DCCP_RESET_CODE_UNSPECIFIED = 0, 260 + DCCP_RESET_CODE_CLOSED, 261 + DCCP_RESET_CODE_ABORTED, 262 + DCCP_RESET_CODE_NO_CONNECTION, 263 + DCCP_RESET_CODE_PACKET_ERROR, 264 + DCCP_RESET_CODE_OPTION_ERROR, 265 + DCCP_RESET_CODE_MANDATORY_ERROR, 266 + DCCP_RESET_CODE_CONNECTION_REFUSED, 267 + DCCP_RESET_CODE_BAD_SERVICE_CODE, 268 + DCCP_RESET_CODE_TOO_BUSY, 269 + DCCP_RESET_CODE_BAD_INIT_COOKIE, 270 + DCCP_RESET_CODE_AGGRESSION_PENALTY, 271 + }; 272 + 273 + /* DCCP options */ 274 + enum { 275 + DCCPO_PADDING = 0, 276 + DCCPO_MANDATORY = 1, 277 + DCCPO_MIN_RESERVED = 3, 278 + DCCPO_MAX_RESERVED = 31, 279 + DCCPO_NDP_COUNT = 37, 280 + DCCPO_ACK_VECTOR_0 = 38, 281 + DCCPO_ACK_VECTOR_1 = 39, 282 + DCCPO_TIMESTAMP = 41, 283 + DCCPO_TIMESTAMP_ECHO = 42, 284 + DCCPO_ELAPSED_TIME = 43, 285 + DCCPO_MAX = 45, 286 + DCCPO_MIN_CCID_SPECIFIC = 128, 287 + DCCPO_MAX_CCID_SPECIFIC = 255, 288 + }; 289 + 290 + /* DCCP features */ 291 + enum { 292 + DCCPF_RESERVED = 0, 293 + DCCPF_SEQUENCE_WINDOW = 3, 294 + DCCPF_SEND_ACK_VECTOR = 6, 295 + DCCPF_SEND_NDP_COUNT = 7, 296 + /* 10-127 reserved */ 297 + DCCPF_MIN_CCID_SPECIFIC = 128, 298 + DCCPF_MAX_CCID_SPECIFIC = 255, 299 + }; 300 + 301 + /* initial values for each feature */ 302 + #define DCCPF_INITIAL_SEQUENCE_WINDOW 100 303 + /* FIXME: for now we're using CCID 3 (TFRC) */ 304 + #define DCCPF_INITIAL_CCID 3 305 + #define DCCPF_INITIAL_SEND_ACK_VECTOR 0 306 + /* FIXME: for now we're default to 1 but it should really be 0 */ 307 + #define DCCPF_INITIAL_SEND_NDP_COUNT 1 308 + 309 + #define DCCP_NDP_LIMIT 0xFFFFFF 310 + 311 + /** 312 + * struct dccp_options - option values for a DCCP connection 313 + * @dccpo_sequence_window - Sequence Window Feature (section 7.5.2) 314 + * @dccpo_ccid - Congestion Control Id (CCID) (section 10) 315 + * @dccpo_send_ack_vector - Send Ack Vector Feature (section 11.5) 316 + * @dccpo_send_ndp_count - Send NDP Count Feature (7.7.2) 317 + */ 318 + struct dccp_options { 319 + __u64 dccpo_sequence_window; 320 + __u8 dccpo_ccid; 321 + __u8 dccpo_send_ack_vector; 322 + __u8 dccpo_send_ndp_count; 323 + }; 324 + 325 + extern void __dccp_options_init(struct dccp_options *dccpo); 326 + extern void dccp_options_init(struct dccp_options *dccpo); 327 + extern int dccp_parse_options(struct sock *sk, struct sk_buff *skb); 328 + 329 + struct dccp_request_sock { 330 + struct inet_request_sock dreq_inet_rsk; 331 + __u64 dreq_iss; 332 + __u64 dreq_isr; 333 + __u32 dreq_service; 334 + }; 335 + 336 + static inline struct dccp_request_sock *dccp_rsk(const struct request_sock *req) 337 + { 338 + return (struct dccp_request_sock *)req; 339 + } 340 + 341 + /* Read about the ECN nonce to see why it is 253 */ 342 + #define DCCP_MAX_ACK_VECTOR_LEN 253 343 + 344 + struct dccp_options_received { 345 + u32 dccpor_ndp:24, 346 + dccpor_ack_vector_len:8; 347 + u32 dccpor_ack_vector_idx:10; 348 + /* 22 bits hole, try to pack */ 349 + u32 dccpor_timestamp; 350 + u32 dccpor_timestamp_echo; 351 + u32 dccpor_elapsed_time; 352 + }; 353 + 354 + struct ccid; 355 + 356 + enum dccp_role { 357 + DCCP_ROLE_UNDEFINED, 358 + DCCP_ROLE_LISTEN, 359 + DCCP_ROLE_CLIENT, 360 + DCCP_ROLE_SERVER, 361 + }; 362 + 363 + /** 364 + * struct dccp_sock - DCCP socket state 365 + * 366 + * @dccps_swl - sequence number window low 367 + * @dccps_swh - sequence number window high 368 + * @dccps_awl - acknowledgement number window low 369 + * @dccps_awh - acknowledgement number window high 370 + * @dccps_iss - initial sequence number sent 371 + * @dccps_isr - initial sequence number received 372 + * @dccps_osr - first OPEN sequence number received 373 + * @dccps_gss - greatest sequence number sent 374 + * @dccps_gsr - greatest valid sequence number received 375 + * @dccps_gar - greatest valid ack number received on a non-Sync; initialized to %dccps_iss 376 + * @dccps_timestamp_time - time of latest TIMESTAMP option 377 + * @dccps_timestamp_echo - latest timestamp received on a TIMESTAMP option 378 + * @dccps_ext_header_len - network protocol overhead (IP/IPv6 options) 379 + * @dccps_pmtu_cookie - Last pmtu seen by socket 380 + * @dccps_avg_packet_size - FIXME: has to be set by the app thru some setsockopt or ioctl, CCID3 uses it 381 + * @dccps_role - Role of this sock, one of %dccp_role 382 + * @dccps_ndp_count - number of Non Data Packets since last data packet 383 + * @dccps_hc_rx_ackpkts - receiver half connection acked packets 384 + */ 385 + struct dccp_sock { 386 + /* inet_connection_sock has to be the first member of dccp_sock */ 387 + struct inet_connection_sock dccps_inet_connection; 388 + __u64 dccps_swl; 389 + __u64 dccps_swh; 390 + __u64 dccps_awl; 391 + __u64 dccps_awh; 392 + __u64 dccps_iss; 393 + __u64 dccps_isr; 394 + __u64 dccps_osr; 395 + __u64 dccps_gss; 396 + __u64 dccps_gsr; 397 + __u64 dccps_gar; 398 + unsigned long dccps_service; 399 + unsigned long dccps_timestamp_time; 400 + __u32 dccps_timestamp_echo; 401 + __u32 dccps_avg_packet_size; 402 + unsigned long dccps_ndp_count; 403 + __u16 dccps_ext_header_len; 404 + __u32 dccps_pmtu_cookie; 405 + __u32 dccps_mss_cache; 406 + struct dccp_options dccps_options; 407 + struct dccp_ackpkts *dccps_hc_rx_ackpkts; 408 + void *dccps_hc_rx_ccid_private; 409 + void *dccps_hc_tx_ccid_private; 410 + struct ccid *dccps_hc_rx_ccid; 411 + struct ccid *dccps_hc_tx_ccid; 412 + struct dccp_options_received dccps_options_received; 413 + enum dccp_role dccps_role:2; 414 + }; 415 + 416 + static inline struct dccp_sock *dccp_sk(const struct sock *sk) 417 + { 418 + return (struct dccp_sock *)sk; 419 + } 420 + 421 + static inline const char *dccp_role(const struct sock *sk) 422 + { 423 + switch (dccp_sk(sk)->dccps_role) { 424 + case DCCP_ROLE_UNDEFINED: return "undefined"; 425 + case DCCP_ROLE_LISTEN: return "listen"; 426 + case DCCP_ROLE_SERVER: return "server"; 427 + case DCCP_ROLE_CLIENT: return "client"; 428 + } 429 + return NULL; 430 + } 431 + 432 + #endif /* _LINUX_DCCP_H */
+1
include/linux/in.h
··· 32 32 IPPROTO_PUP = 12, /* PUP protocol */ 33 33 IPPROTO_UDP = 17, /* User Datagram Protocol */ 34 34 IPPROTO_IDP = 22, /* XNS IDP protocol */ 35 + IPPROTO_DCCP = 33, /* Datagram Congestion Control Protocol */ 35 36 IPPROTO_RSVP = 46, /* RSVP protocol */ 36 37 IPPROTO_GRE = 47, /* Cisco GRE tunnels (rfc 1701,1702) */ 37 38
+1
include/linux/net.h
··· 84 84 SOCK_RAW = 3, 85 85 SOCK_RDM = 4, 86 86 SOCK_SEQPACKET = 5, 87 + SOCK_DCCP = 6, 87 88 SOCK_PACKET = 10, 88 89 }; 89 90
+1
include/linux/socket.h
··· 271 271 #define SOL_IRDA 266 272 272 #define SOL_NETBEUI 267 273 273 #define SOL_LLC 268 274 + #define SOL_DCCP 269 274 275 275 276 /* IPX options */ 276 277 #define IPX_TYPE 1
+1
net/Kconfig
··· 147 147 148 148 endif 149 149 150 + source "net/dccp/Kconfig" 150 151 source "net/sctp/Kconfig" 151 152 source "net/atm/Kconfig" 152 153 source "net/bridge/Kconfig"
+1
net/Makefile
··· 42 42 obj-$(CONFIG_DECNET) += decnet/ 43 43 obj-$(CONFIG_ECONET) += econet/ 44 44 obj-$(CONFIG_VLAN_8021Q) += 8021q/ 45 + obj-$(CONFIG_IP_DCCP) += dccp/ 45 46 obj-$(CONFIG_IP_SCTP) += sctp/ 46 47 47 48 ifeq ($(CONFIG_NET),y)
+24
net/dccp/Kconfig
··· 1 + menu "DCCP Configuration (EXPERIMENTAL)" 2 + depends on INET && EXPERIMENTAL 3 + 4 + config IP_DCCP 5 + tristate "The DCCP Protocol (EXPERIMENTAL)" 6 + ---help--- 7 + Datagram Congestion Control Protocol 8 + 9 + From draft-ietf-dccp-spec-11 <http://www.icir.org/kohler/dcp/draft-ietf-dccp-spec-11.txt>. 10 + 11 + The Datagram Congestion Control Protocol (DCCP) is a transport 12 + protocol that implements bidirectional, unicast connections of 13 + congestion-controlled, unreliable datagrams. It should be suitable 14 + for use by applications such as streaming media, Internet telephony, 15 + and on-line games 16 + 17 + To compile this protocol support as a module, choose M here: the 18 + module will be called dccp. 19 + 20 + If in doubt, say N. 21 + 22 + source "net/dccp/ccids/Kconfig" 23 + 24 + endmenu
+5
net/dccp/Makefile
··· 1 + obj-$(CONFIG_IP_DCCP) += dccp.o 2 + 3 + dccp-y := ccid.o input.o ipv4.o minisocks.o options.o output.o proto.o timer.o 4 + 5 + obj-y += ccids/
+139
net/dccp/ccid.c
··· 1 + /* 2 + * net/dccp/ccid.c 3 + * 4 + * An implementation of the DCCP protocol 5 + * Arnaldo Carvalho de Melo <acme@conectiva.com.br> 6 + * 7 + * CCID infrastructure 8 + * 9 + * This program is free software; you can redistribute it and/or modify it 10 + * under the terms of the GNU General Public License version 2 as 11 + * published by the Free Software Foundation. 12 + */ 13 + 14 + #include "ccid.h" 15 + 16 + static struct ccid *ccids[CCID_MAX]; 17 + #if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT) 18 + static atomic_t ccids_lockct = ATOMIC_INIT(0); 19 + static DEFINE_SPINLOCK(ccids_lock); 20 + 21 + /* 22 + * The strategy is: modifications ccids vector are short, do not sleep and 23 + * veeery rare, but read access should be free of any exclusive locks. 24 + */ 25 + static void ccids_write_lock(void) 26 + { 27 + spin_lock(&ccids_lock); 28 + while (atomic_read(&ccids_lockct) != 0) { 29 + spin_unlock(&ccids_lock); 30 + yield(); 31 + spin_lock(&ccids_lock); 32 + } 33 + } 34 + 35 + static inline void ccids_write_unlock(void) 36 + { 37 + spin_unlock(&ccids_lock); 38 + } 39 + 40 + static inline void ccids_read_lock(void) 41 + { 42 + atomic_inc(&ccids_lockct); 43 + spin_unlock_wait(&ccids_lock); 44 + } 45 + 46 + static inline void ccids_read_unlock(void) 47 + { 48 + atomic_dec(&ccids_lockct); 49 + } 50 + 51 + #else 52 + #define ccids_write_lock() do { } while(0) 53 + #define ccids_write_unlock() do { } while(0) 54 + #define ccids_read_lock() do { } while(0) 55 + #define ccids_read_unlock() do { } while(0) 56 + #endif 57 + 58 + int ccid_register(struct ccid *ccid) 59 + { 60 + int err; 61 + 62 + if (ccid->ccid_init == NULL) 63 + return -1; 64 + 65 + ccids_write_lock(); 66 + err = -EEXIST; 67 + if (ccids[ccid->ccid_id] == NULL) { 68 + ccids[ccid->ccid_id] = ccid; 69 + err = 0; 70 + } 71 + ccids_write_unlock(); 72 + if (err == 0) 73 + pr_info("CCID: Registered CCID %d (%s)\n", 74 + ccid->ccid_id, ccid->ccid_name); 75 + return err; 76 + } 77 + 78 + EXPORT_SYMBOL_GPL(ccid_register); 79 + 80 + int ccid_unregister(struct ccid *ccid) 81 + { 82 + ccids_write_lock(); 83 + ccids[ccid->ccid_id] = NULL; 84 + ccids_write_unlock(); 85 + pr_info("CCID: Unregistered CCID %d (%s)\n", 86 + ccid->ccid_id, ccid->ccid_name); 87 + return 0; 88 + } 89 + 90 + EXPORT_SYMBOL_GPL(ccid_unregister); 91 + 92 + struct ccid *ccid_init(unsigned char id, struct sock *sk) 93 + { 94 + struct ccid *ccid; 95 + 96 + #ifdef CONFIG_KMOD 97 + if (ccids[id] == NULL) 98 + request_module("net-dccp-ccid-%d", id); 99 + #endif 100 + ccids_read_lock(); 101 + 102 + ccid = ccids[id]; 103 + if (ccid == NULL) 104 + goto out; 105 + 106 + if (!try_module_get(ccid->ccid_owner)) 107 + goto out_err; 108 + 109 + if (ccid->ccid_init(sk) != 0) 110 + goto out_module_put; 111 + out: 112 + ccids_read_unlock(); 113 + return ccid; 114 + out_module_put: 115 + module_put(ccid->ccid_owner); 116 + out_err: 117 + ccid = NULL; 118 + goto out; 119 + } 120 + 121 + EXPORT_SYMBOL_GPL(ccid_init); 122 + 123 + void ccid_exit(struct ccid *ccid, struct sock *sk) 124 + { 125 + if (ccid == NULL) 126 + return; 127 + 128 + ccids_read_lock(); 129 + 130 + if (ccids[ccid->ccid_id] != NULL) { 131 + if (ccid->ccid_exit != NULL) 132 + ccid->ccid_exit(sk); 133 + module_put(ccid->ccid_owner); 134 + } 135 + 136 + ccids_read_unlock(); 137 + } 138 + 139 + EXPORT_SYMBOL_GPL(ccid_exit);
+156
net/dccp/ccid.h
··· 1 + #ifndef _CCID_H 2 + #define _CCID_H 3 + /* 4 + * net/dccp/ccid.h 5 + * 6 + * An implementation of the DCCP protocol 7 + * Arnaldo Carvalho de Melo <acme@conectiva.com.br> 8 + * 9 + * CCID infrastructure 10 + * 11 + * This program is free software; you can redistribute it and/or modify it 12 + * under the terms of the GNU General Public License version 2 as 13 + * published by the Free Software Foundation. 14 + */ 15 + 16 + #include <net/sock.h> 17 + #include <linux/dccp.h> 18 + #include <linux/list.h> 19 + #include <linux/module.h> 20 + 21 + #define CCID_MAX 255 22 + 23 + struct ccid { 24 + unsigned char ccid_id; 25 + const char *ccid_name; 26 + struct module *ccid_owner; 27 + int (*ccid_init)(struct sock *sk); 28 + void (*ccid_exit)(struct sock *sk); 29 + int (*ccid_hc_rx_init)(struct sock *sk); 30 + int (*ccid_hc_tx_init)(struct sock *sk); 31 + void (*ccid_hc_rx_exit)(struct sock *sk); 32 + void (*ccid_hc_tx_exit)(struct sock *sk); 33 + void (*ccid_hc_rx_packet_recv)(struct sock *sk, struct sk_buff *skb); 34 + int (*ccid_hc_rx_parse_options)(struct sock *sk, 35 + unsigned char option, 36 + unsigned char len, u16 idx, 37 + unsigned char* value); 38 + void (*ccid_hc_rx_insert_options)(struct sock *sk, struct sk_buff *skb); 39 + void (*ccid_hc_tx_insert_options)(struct sock *sk, struct sk_buff *skb); 40 + void (*ccid_hc_tx_packet_recv)(struct sock *sk, struct sk_buff *skb); 41 + int (*ccid_hc_tx_parse_options)(struct sock *sk, 42 + unsigned char option, 43 + unsigned char len, u16 idx, 44 + unsigned char* value); 45 + int (*ccid_hc_tx_send_packet)(struct sock *sk, 46 + struct sk_buff *skb, int len, 47 + long *delay); 48 + void (*ccid_hc_tx_packet_sent)(struct sock *sk, int more, int len); 49 + }; 50 + 51 + extern int ccid_register(struct ccid *ccid); 52 + extern int ccid_unregister(struct ccid *ccid); 53 + 54 + extern struct ccid *ccid_init(unsigned char id, struct sock *sk); 55 + extern void ccid_exit(struct ccid *ccid, struct sock *sk); 56 + 57 + static inline void __ccid_get(struct ccid *ccid) 58 + { 59 + __module_get(ccid->ccid_owner); 60 + } 61 + 62 + static inline int ccid_hc_tx_send_packet(struct ccid *ccid, struct sock *sk, 63 + struct sk_buff *skb, int len, 64 + long *delay) 65 + { 66 + int rc = 0; 67 + if (ccid->ccid_hc_tx_send_packet != NULL) 68 + rc = ccid->ccid_hc_tx_send_packet(sk, skb, len, delay); 69 + return rc; 70 + } 71 + 72 + static inline void ccid_hc_tx_packet_sent(struct ccid *ccid, struct sock *sk, 73 + int more, int len) 74 + { 75 + if (ccid->ccid_hc_tx_packet_sent != NULL) 76 + ccid->ccid_hc_tx_packet_sent(sk, more, len); 77 + } 78 + 79 + static inline int ccid_hc_rx_init(struct ccid *ccid, struct sock *sk) 80 + { 81 + int rc = 0; 82 + if (ccid->ccid_hc_rx_init != NULL) 83 + rc = ccid->ccid_hc_rx_init(sk); 84 + return rc; 85 + } 86 + 87 + static inline int ccid_hc_tx_init(struct ccid *ccid, struct sock *sk) 88 + { 89 + int rc = 0; 90 + if (ccid->ccid_hc_tx_init != NULL) 91 + rc = ccid->ccid_hc_tx_init(sk); 92 + return rc; 93 + } 94 + 95 + static inline void ccid_hc_rx_exit(struct ccid *ccid, struct sock *sk) 96 + { 97 + if (ccid->ccid_hc_rx_exit != NULL) 98 + ccid->ccid_hc_rx_exit(sk); 99 + } 100 + 101 + static inline void ccid_hc_tx_exit(struct ccid *ccid, struct sock *sk) 102 + { 103 + if (ccid->ccid_hc_tx_exit != NULL) 104 + ccid->ccid_hc_tx_exit(sk); 105 + } 106 + 107 + static inline void ccid_hc_rx_packet_recv(struct ccid *ccid, struct sock *sk, 108 + struct sk_buff *skb) 109 + { 110 + if (ccid->ccid_hc_rx_packet_recv != NULL) 111 + ccid->ccid_hc_rx_packet_recv(sk, skb); 112 + } 113 + 114 + static inline void ccid_hc_tx_packet_recv(struct ccid *ccid, struct sock *sk, 115 + struct sk_buff *skb) 116 + { 117 + if (ccid->ccid_hc_tx_packet_recv != NULL) 118 + ccid->ccid_hc_tx_packet_recv(sk, skb); 119 + } 120 + 121 + static inline int ccid_hc_tx_parse_options(struct ccid *ccid, struct sock *sk, 122 + unsigned char option, 123 + unsigned char len, u16 idx, 124 + unsigned char* value) 125 + { 126 + int rc = 0; 127 + if (ccid->ccid_hc_tx_parse_options != NULL) 128 + rc = ccid->ccid_hc_tx_parse_options(sk, option, len, idx, value); 129 + return rc; 130 + } 131 + 132 + static inline int ccid_hc_rx_parse_options(struct ccid *ccid, struct sock *sk, 133 + unsigned char option, 134 + unsigned char len, u16 idx, 135 + unsigned char* value) 136 + { 137 + int rc = 0; 138 + if (ccid->ccid_hc_rx_parse_options != NULL) 139 + rc = ccid->ccid_hc_rx_parse_options(sk, option, len, idx, value); 140 + return rc; 141 + } 142 + 143 + static inline void ccid_hc_tx_insert_options(struct ccid *ccid, struct sock *sk, 144 + struct sk_buff *skb) 145 + { 146 + if (ccid->ccid_hc_tx_insert_options != NULL) 147 + ccid->ccid_hc_tx_insert_options(sk, skb); 148 + } 149 + 150 + static inline void ccid_hc_rx_insert_options(struct ccid *ccid, struct sock *sk, 151 + struct sk_buff *skb) 152 + { 153 + if (ccid->ccid_hc_rx_insert_options != NULL) 154 + ccid->ccid_hc_rx_insert_options(sk, skb); 155 + } 156 + #endif /* _CCID_H */
+25
net/dccp/ccids/Kconfig
··· 1 + menu "DCCP CCIDs Configuration (EXPERIMENTAL)" 2 + depends on IP_DCCP && EXPERIMENTAL 3 + 4 + config IP_DCCP_CCID3 5 + tristate "CCID3 (TFRC) (EXPERIMENTAL)" 6 + depends on IP_DCCP 7 + ---help--- 8 + CCID 3 denotes TCP-Friendly Rate Control (TFRC), an equation-based 9 + rate-controlled congestion control mechanism. TFRC is designed to 10 + be reasonably fair when competing for bandwidth with TCP-like flows, 11 + where a flow is "reasonably fair" if its sending rate is generally 12 + within a factor of two of the sending rate of a TCP flow under the 13 + same conditions. However, TFRC has a much lower variation of 14 + throughput over time compared with TCP, which makes CCID 3 more 15 + suitable than CCID 2 for applications such streaming media where a 16 + relatively smooth sending rate is of importance. 17 + 18 + CCID 3 is further described in [CCID 3 PROFILE]. The TFRC 19 + congestion control algorithms were initially described in RFC 3448. 20 + 21 + This text was extracted from draft-ietf-dccp-spec-11.txt. 22 + 23 + If in doubt, say M. 24 + 25 + endmenu
+3
net/dccp/ccids/Makefile
··· 1 + obj-$(CONFIG_IP_DCCP_CCID3) += dccp_ccid3.o 2 + 3 + dccp_ccid3-y := ccid3.o
+2164
net/dccp/ccids/ccid3.c
··· 1 + /* 2 + * net/dccp/ccids/ccid3.c 3 + * 4 + * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand. 5 + * 6 + * An implementation of the DCCP protocol 7 + * 8 + * This code has been developed by the University of Waikato WAND 9 + * research group. For further information please see http://www.wand.net.nz/ 10 + * or e-mail Ian McDonald - iam4@cs.waikato.ac.nz 11 + * 12 + * This code also uses code from Lulea University, rereleased as GPL by its 13 + * authors: 14 + * Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon 15 + * 16 + * Changes to meet Linux coding standards, to make it meet latest ccid3 draft 17 + * and to make it work as a loadable module in the DCCP stack written by 18 + * Arnaldo Carvalho de Melo <acme@conectiva.com.br>. 19 + * 20 + * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br> 21 + * 22 + * This program is free software; you can redistribute it and/or modify 23 + * it under the terms of the GNU General Public License as published by 24 + * the Free Software Foundation; either version 2 of the License, or 25 + * (at your option) any later version. 26 + * 27 + * This program is distributed in the hope that it will be useful, 28 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 29 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 30 + * GNU General Public License for more details. 31 + * 32 + * You should have received a copy of the GNU General Public License 33 + * along with this program; if not, write to the Free Software 34 + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 35 + */ 36 + 37 + #include "../ccid.h" 38 + #include "../dccp.h" 39 + #include "ccid3.h" 40 + 41 + #ifdef CCID3_DEBUG 42 + extern int ccid3_debug; 43 + 44 + #define ccid3_pr_debug(format, a...) \ 45 + do { if (ccid3_debug) \ 46 + printk(KERN_DEBUG "%s: " format, __FUNCTION__, ##a); \ 47 + } while (0) 48 + #else 49 + #define ccid3_pr_debug(format, a...) 50 + #endif 51 + 52 + #define TFRC_MIN_PACKET_SIZE 16 53 + #define TFRC_STD_PACKET_SIZE 256 54 + #define TFRC_MAX_PACKET_SIZE 65535 55 + 56 + #define USEC_IN_SEC 1000000 57 + 58 + #define TFRC_INITIAL_TIMEOUT (2 * USEC_IN_SEC) 59 + /* two seconds as per CCID3 spec 11 */ 60 + 61 + #define TFRC_OPSYS_HALF_TIME_GRAN (USEC_IN_SEC / (2 * HZ)) 62 + /* above is in usecs - half the scheduling granularity as per RFC3448 4.6 */ 63 + 64 + #define TFRC_WIN_COUNT_PER_RTT 4 65 + #define TFRC_WIN_COUNT_LIMIT 16 66 + 67 + #define TFRC_MAX_BACK_OFF_TIME 64 68 + /* above is in seconds */ 69 + 70 + #define TFRC_SMALLEST_P 40 71 + 72 + #define TFRC_RECV_IVAL_F_LENGTH 8 /* length(w[]) */ 73 + 74 + /* Number of later packets received before one is considered lost */ 75 + #define TFRC_RECV_NUM_LATE_LOSS 3 76 + 77 + enum ccid3_options { 78 + TFRC_OPT_LOSS_EVENT_RATE = 192, 79 + TFRC_OPT_LOSS_INTERVALS = 193, 80 + TFRC_OPT_RECEIVE_RATE = 194, 81 + }; 82 + 83 + static int ccid3_debug; 84 + 85 + static kmem_cache_t *ccid3_tx_hist_slab; 86 + static kmem_cache_t *ccid3_rx_hist_slab; 87 + static kmem_cache_t *ccid3_loss_interval_hist_slab; 88 + 89 + static inline struct ccid3_tx_hist_entry *ccid3_tx_hist_entry_new(int prio) 90 + { 91 + struct ccid3_tx_hist_entry *entry = kmem_cache_alloc(ccid3_tx_hist_slab, prio); 92 + 93 + if (entry != NULL) 94 + entry->ccid3htx_sent = 0; 95 + 96 + return entry; 97 + } 98 + 99 + static inline void ccid3_tx_hist_entry_delete(struct ccid3_tx_hist_entry *entry) 100 + { 101 + if (entry != NULL) 102 + kmem_cache_free(ccid3_tx_hist_slab, entry); 103 + } 104 + 105 + static inline struct ccid3_rx_hist_entry *ccid3_rx_hist_entry_new(struct sock *sk, 106 + struct sk_buff *skb, 107 + int prio) 108 + { 109 + struct ccid3_rx_hist_entry *entry = kmem_cache_alloc(ccid3_rx_hist_slab, prio); 110 + 111 + if (entry != NULL) { 112 + const struct dccp_hdr *dh = dccp_hdr(skb); 113 + 114 + entry->ccid3hrx_seqno = DCCP_SKB_CB(skb)->dccpd_seq; 115 + entry->ccid3hrx_win_count = dh->dccph_ccval; 116 + entry->ccid3hrx_type = dh->dccph_type; 117 + entry->ccid3hrx_ndp = dccp_sk(sk)->dccps_options_received.dccpor_ndp; 118 + do_gettimeofday(&(entry->ccid3hrx_tstamp)); 119 + } 120 + 121 + return entry; 122 + } 123 + 124 + static inline void ccid3_rx_hist_entry_delete(struct ccid3_rx_hist_entry *entry) 125 + { 126 + if (entry != NULL) 127 + kmem_cache_free(ccid3_rx_hist_slab, entry); 128 + } 129 + 130 + static void ccid3_rx_history_delete(struct list_head *hist) 131 + { 132 + struct ccid3_rx_hist_entry *entry, *next; 133 + 134 + list_for_each_entry_safe(entry, next, hist, ccid3hrx_node) { 135 + list_del_init(&entry->ccid3hrx_node); 136 + kmem_cache_free(ccid3_rx_hist_slab, entry); 137 + } 138 + } 139 + 140 + static inline struct ccid3_loss_interval_hist_entry *ccid3_loss_interval_hist_entry_new(int prio) 141 + { 142 + return kmem_cache_alloc(ccid3_loss_interval_hist_slab, prio); 143 + } 144 + 145 + static inline void ccid3_loss_interval_hist_entry_delete(struct ccid3_loss_interval_hist_entry *entry) 146 + { 147 + if (entry != NULL) 148 + kmem_cache_free(ccid3_loss_interval_hist_slab, entry); 149 + } 150 + 151 + static void ccid3_loss_interval_history_delete(struct list_head *hist) 152 + { 153 + struct ccid3_loss_interval_hist_entry *entry, *next; 154 + 155 + list_for_each_entry_safe(entry, next, hist, ccid3lih_node) { 156 + list_del_init(&entry->ccid3lih_node); 157 + kmem_cache_free(ccid3_loss_interval_hist_slab, entry); 158 + } 159 + } 160 + 161 + static int ccid3_init(struct sock *sk) 162 + { 163 + ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk); 164 + return 0; 165 + } 166 + 167 + static void ccid3_exit(struct sock *sk) 168 + { 169 + ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk); 170 + } 171 + 172 + /* TFRC sender states */ 173 + enum ccid3_hc_tx_states { 174 + TFRC_SSTATE_NO_SENT = 1, 175 + TFRC_SSTATE_NO_FBACK, 176 + TFRC_SSTATE_FBACK, 177 + TFRC_SSTATE_TERM, 178 + }; 179 + 180 + #ifdef CCID3_DEBUG 181 + static const char *ccid3_tx_state_name(enum ccid3_hc_tx_states state) 182 + { 183 + static char *ccid3_state_names[] = { 184 + [TFRC_SSTATE_NO_SENT] = "NO_SENT", 185 + [TFRC_SSTATE_NO_FBACK] = "NO_FBACK", 186 + [TFRC_SSTATE_FBACK] = "FBACK", 187 + [TFRC_SSTATE_TERM] = "TERM", 188 + }; 189 + 190 + return ccid3_state_names[state]; 191 + } 192 + #endif 193 + 194 + static inline void ccid3_hc_tx_set_state(struct sock *sk, enum ccid3_hc_tx_states state) 195 + { 196 + struct dccp_sock *dp = dccp_sk(sk); 197 + struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; 198 + enum ccid3_hc_tx_states oldstate = hctx->ccid3hctx_state; 199 + 200 + ccid3_pr_debug("%s(%p) %-8.8s -> %s\n", 201 + dccp_role(sk), sk, ccid3_tx_state_name(oldstate), ccid3_tx_state_name(state)); 202 + WARN_ON(state == oldstate); 203 + hctx->ccid3hctx_state = state; 204 + } 205 + 206 + static void timeval_sub(struct timeval large, struct timeval small, struct timeval *result) { 207 + 208 + result->tv_sec = large.tv_sec-small.tv_sec; 209 + if (large.tv_usec < small.tv_usec) { 210 + (result->tv_sec)--; 211 + result->tv_usec = USEC_IN_SEC+large.tv_usec-small.tv_usec; 212 + } else 213 + result->tv_usec = large.tv_usec-small.tv_usec; 214 + } 215 + 216 + static inline void timeval_fix(struct timeval *tv) { 217 + if (tv->tv_usec >= USEC_IN_SEC) { 218 + tv->tv_sec++; 219 + tv->tv_usec -= USEC_IN_SEC; 220 + } 221 + } 222 + 223 + /* returns the difference in usecs between timeval passed in and current time */ 224 + static inline u32 now_delta(struct timeval tv) { 225 + struct timeval now; 226 + 227 + do_gettimeofday(&now); 228 + return ((now.tv_sec-tv.tv_sec)*1000000+now.tv_usec-tv.tv_usec); 229 + } 230 + 231 + #define CALCX_ARRSIZE 500 232 + 233 + #define CALCX_SPLIT 50000 234 + /* equivalent to 0.05 */ 235 + 236 + static const u32 calcx_lookup[CALCX_ARRSIZE][2] = { 237 + { 37172 , 8172 }, 238 + { 53499 , 11567 }, 239 + { 66664 , 14180 }, 240 + { 78298 , 16388 }, 241 + { 89021 , 18339 }, 242 + { 99147 , 20108 }, 243 + { 108858 , 21738 }, 244 + { 118273 , 23260 }, 245 + { 127474 , 24693 }, 246 + { 136520 , 26052 }, 247 + { 145456 , 27348 }, 248 + { 154316 , 28589 }, 249 + { 163130 , 29783 }, 250 + { 171919 , 30935 }, 251 + { 180704 , 32049 }, 252 + { 189502 , 33130 }, 253 + { 198328 , 34180 }, 254 + { 207194 , 35202 }, 255 + { 216114 , 36198 }, 256 + { 225097 , 37172 }, 257 + { 234153 , 38123 }, 258 + { 243294 , 39055 }, 259 + { 252527 , 39968 }, 260 + { 261861 , 40864 }, 261 + { 271305 , 41743 }, 262 + { 280866 , 42607 }, 263 + { 290553 , 43457 }, 264 + { 300372 , 44293 }, 265 + { 310333 , 45117 }, 266 + { 320441 , 45929 }, 267 + { 330705 , 46729 }, 268 + { 341131 , 47518 }, 269 + { 351728 , 48297 }, 270 + { 362501 , 49066 }, 271 + { 373460 , 49826 }, 272 + { 384609 , 50577 }, 273 + { 395958 , 51320 }, 274 + { 407513 , 52054 }, 275 + { 419281 , 52780 }, 276 + { 431270 , 53499 }, 277 + { 443487 , 54211 }, 278 + { 455940 , 54916 }, 279 + { 468635 , 55614 }, 280 + { 481581 , 56306 }, 281 + { 494785 , 56991 }, 282 + { 508254 , 57671 }, 283 + { 521996 , 58345 }, 284 + { 536019 , 59014 }, 285 + { 550331 , 59677 }, 286 + { 564939 , 60335 }, 287 + { 579851 , 60988 }, 288 + { 595075 , 61636 }, 289 + { 610619 , 62279 }, 290 + { 626491 , 62918 }, 291 + { 642700 , 63553 }, 292 + { 659253 , 64183 }, 293 + { 676158 , 64809 }, 294 + { 693424 , 65431 }, 295 + { 711060 , 66050 }, 296 + { 729073 , 66664 }, 297 + { 747472 , 67275 }, 298 + { 766266 , 67882 }, 299 + { 785464 , 68486 }, 300 + { 805073 , 69087 }, 301 + { 825103 , 69684 }, 302 + { 845562 , 70278 }, 303 + { 866460 , 70868 }, 304 + { 887805 , 71456 }, 305 + { 909606 , 72041 }, 306 + { 931873 , 72623 }, 307 + { 954614 , 73202 }, 308 + { 977839 , 73778 }, 309 + { 1001557 , 74352 }, 310 + { 1025777 , 74923 }, 311 + { 1050508 , 75492 }, 312 + { 1075761 , 76058 }, 313 + { 1101544 , 76621 }, 314 + { 1127867 , 77183 }, 315 + { 1154739 , 77741 }, 316 + { 1182172 , 78298 }, 317 + { 1210173 , 78852 }, 318 + { 1238753 , 79405 }, 319 + { 1267922 , 79955 }, 320 + { 1297689 , 80503 }, 321 + { 1328066 , 81049 }, 322 + { 1359060 , 81593 }, 323 + { 1390684 , 82135 }, 324 + { 1422947 , 82675 }, 325 + { 1455859 , 83213 }, 326 + { 1489430 , 83750 }, 327 + { 1523671 , 84284 }, 328 + { 1558593 , 84817 }, 329 + { 1594205 , 85348 }, 330 + { 1630518 , 85878 }, 331 + { 1667543 , 86406 }, 332 + { 1705290 , 86932 }, 333 + { 1743770 , 87457 }, 334 + { 1782994 , 87980 }, 335 + { 1822973 , 88501 }, 336 + { 1863717 , 89021 }, 337 + { 1905237 , 89540 }, 338 + { 1947545 , 90057 }, 339 + { 1990650 , 90573 }, 340 + { 2034566 , 91087 }, 341 + { 2079301 , 91600 }, 342 + { 2124869 , 92111 }, 343 + { 2171279 , 92622 }, 344 + { 2218543 , 93131 }, 345 + { 2266673 , 93639 }, 346 + { 2315680 , 94145 }, 347 + { 2365575 , 94650 }, 348 + { 2416371 , 95154 }, 349 + { 2468077 , 95657 }, 350 + { 2520707 , 96159 }, 351 + { 2574271 , 96660 }, 352 + { 2628782 , 97159 }, 353 + { 2684250 , 97658 }, 354 + { 2740689 , 98155 }, 355 + { 2798110 , 98651 }, 356 + { 2856524 , 99147 }, 357 + { 2915944 , 99641 }, 358 + { 2976382 , 100134 }, 359 + { 3037850 , 100626 }, 360 + { 3100360 , 101117 }, 361 + { 3163924 , 101608 }, 362 + { 3228554 , 102097 }, 363 + { 3294263 , 102586 }, 364 + { 3361063 , 103073 }, 365 + { 3428966 , 103560 }, 366 + { 3497984 , 104045 }, 367 + { 3568131 , 104530 }, 368 + { 3639419 , 105014 }, 369 + { 3711860 , 105498 }, 370 + { 3785467 , 105980 }, 371 + { 3860253 , 106462 }, 372 + { 3936229 , 106942 }, 373 + { 4013410 , 107422 }, 374 + { 4091808 , 107902 }, 375 + { 4171435 , 108380 }, 376 + { 4252306 , 108858 }, 377 + { 4334431 , 109335 }, 378 + { 4417825 , 109811 }, 379 + { 4502501 , 110287 }, 380 + { 4588472 , 110762 }, 381 + { 4675750 , 111236 }, 382 + { 4764349 , 111709 }, 383 + { 4854283 , 112182 }, 384 + { 4945564 , 112654 }, 385 + { 5038206 , 113126 }, 386 + { 5132223 , 113597 }, 387 + { 5227627 , 114067 }, 388 + { 5324432 , 114537 }, 389 + { 5422652 , 115006 }, 390 + { 5522299 , 115474 }, 391 + { 5623389 , 115942 }, 392 + { 5725934 , 116409 }, 393 + { 5829948 , 116876 }, 394 + { 5935446 , 117342 }, 395 + { 6042439 , 117808 }, 396 + { 6150943 , 118273 }, 397 + { 6260972 , 118738 }, 398 + { 6372538 , 119202 }, 399 + { 6485657 , 119665 }, 400 + { 6600342 , 120128 }, 401 + { 6716607 , 120591 }, 402 + { 6834467 , 121053 }, 403 + { 6953935 , 121514 }, 404 + { 7075025 , 121976 }, 405 + { 7197752 , 122436 }, 406 + { 7322131 , 122896 }, 407 + { 7448175 , 123356 }, 408 + { 7575898 , 123815 }, 409 + { 7705316 , 124274 }, 410 + { 7836442 , 124733 }, 411 + { 7969291 , 125191 }, 412 + { 8103877 , 125648 }, 413 + { 8240216 , 126105 }, 414 + { 8378321 , 126562 }, 415 + { 8518208 , 127018 }, 416 + { 8659890 , 127474 }, 417 + { 8803384 , 127930 }, 418 + { 8948702 , 128385 }, 419 + { 9095861 , 128840 }, 420 + { 9244875 , 129294 }, 421 + { 9395760 , 129748 }, 422 + { 9548529 , 130202 }, 423 + { 9703198 , 130655 }, 424 + { 9859782 , 131108 }, 425 + { 10018296 , 131561 }, 426 + { 10178755 , 132014 }, 427 + { 10341174 , 132466 }, 428 + { 10505569 , 132917 }, 429 + { 10671954 , 133369 }, 430 + { 10840345 , 133820 }, 431 + { 11010757 , 134271 }, 432 + { 11183206 , 134721 }, 433 + { 11357706 , 135171 }, 434 + { 11534274 , 135621 }, 435 + { 11712924 , 136071 }, 436 + { 11893673 , 136520 }, 437 + { 12076536 , 136969 }, 438 + { 12261527 , 137418 }, 439 + { 12448664 , 137867 }, 440 + { 12637961 , 138315 }, 441 + { 12829435 , 138763 }, 442 + { 13023101 , 139211 }, 443 + { 13218974 , 139658 }, 444 + { 13417071 , 140106 }, 445 + { 13617407 , 140553 }, 446 + { 13819999 , 140999 }, 447 + { 14024862 , 141446 }, 448 + { 14232012 , 141892 }, 449 + { 14441465 , 142339 }, 450 + { 14653238 , 142785 }, 451 + { 14867346 , 143230 }, 452 + { 15083805 , 143676 }, 453 + { 15302632 , 144121 }, 454 + { 15523842 , 144566 }, 455 + { 15747453 , 145011 }, 456 + { 15973479 , 145456 }, 457 + { 16201939 , 145900 }, 458 + { 16432847 , 146345 }, 459 + { 16666221 , 146789 }, 460 + { 16902076 , 147233 }, 461 + { 17140429 , 147677 }, 462 + { 17381297 , 148121 }, 463 + { 17624696 , 148564 }, 464 + { 17870643 , 149007 }, 465 + { 18119154 , 149451 }, 466 + { 18370247 , 149894 }, 467 + { 18623936 , 150336 }, 468 + { 18880241 , 150779 }, 469 + { 19139176 , 151222 }, 470 + { 19400759 , 151664 }, 471 + { 19665007 , 152107 }, 472 + { 19931936 , 152549 }, 473 + { 20201564 , 152991 }, 474 + { 20473907 , 153433 }, 475 + { 20748982 , 153875 }, 476 + { 21026807 , 154316 }, 477 + { 21307399 , 154758 }, 478 + { 21590773 , 155199 }, 479 + { 21876949 , 155641 }, 480 + { 22165941 , 156082 }, 481 + { 22457769 , 156523 }, 482 + { 22752449 , 156964 }, 483 + { 23049999 , 157405 }, 484 + { 23350435 , 157846 }, 485 + { 23653774 , 158287 }, 486 + { 23960036 , 158727 }, 487 + { 24269236 , 159168 }, 488 + { 24581392 , 159608 }, 489 + { 24896521 , 160049 }, 490 + { 25214642 , 160489 }, 491 + { 25535772 , 160929 }, 492 + { 25859927 , 161370 }, 493 + { 26187127 , 161810 }, 494 + { 26517388 , 162250 }, 495 + { 26850728 , 162690 }, 496 + { 27187165 , 163130 }, 497 + { 27526716 , 163569 }, 498 + { 27869400 , 164009 }, 499 + { 28215234 , 164449 }, 500 + { 28564236 , 164889 }, 501 + { 28916423 , 165328 }, 502 + { 29271815 , 165768 }, 503 + { 29630428 , 166208 }, 504 + { 29992281 , 166647 }, 505 + { 30357392 , 167087 }, 506 + { 30725779 , 167526 }, 507 + { 31097459 , 167965 }, 508 + { 31472452 , 168405 }, 509 + { 31850774 , 168844 }, 510 + { 32232445 , 169283 }, 511 + { 32617482 , 169723 }, 512 + { 33005904 , 170162 }, 513 + { 33397730 , 170601 }, 514 + { 33792976 , 171041 }, 515 + { 34191663 , 171480 }, 516 + { 34593807 , 171919 }, 517 + { 34999428 , 172358 }, 518 + { 35408544 , 172797 }, 519 + { 35821174 , 173237 }, 520 + { 36237335 , 173676 }, 521 + { 36657047 , 174115 }, 522 + { 37080329 , 174554 }, 523 + { 37507197 , 174993 }, 524 + { 37937673 , 175433 }, 525 + { 38371773 , 175872 }, 526 + { 38809517 , 176311 }, 527 + { 39250924 , 176750 }, 528 + { 39696012 , 177190 }, 529 + { 40144800 , 177629 }, 530 + { 40597308 , 178068 }, 531 + { 41053553 , 178507 }, 532 + { 41513554 , 178947 }, 533 + { 41977332 , 179386 }, 534 + { 42444904 , 179825 }, 535 + { 42916290 , 180265 }, 536 + { 43391509 , 180704 }, 537 + { 43870579 , 181144 }, 538 + { 44353520 , 181583 }, 539 + { 44840352 , 182023 }, 540 + { 45331092 , 182462 }, 541 + { 45825761 , 182902 }, 542 + { 46324378 , 183342 }, 543 + { 46826961 , 183781 }, 544 + { 47333531 , 184221 }, 545 + { 47844106 , 184661 }, 546 + { 48358706 , 185101 }, 547 + { 48877350 , 185541 }, 548 + { 49400058 , 185981 }, 549 + { 49926849 , 186421 }, 550 + { 50457743 , 186861 }, 551 + { 50992759 , 187301 }, 552 + { 51531916 , 187741 }, 553 + { 52075235 , 188181 }, 554 + { 52622735 , 188622 }, 555 + { 53174435 , 189062 }, 556 + { 53730355 , 189502 }, 557 + { 54290515 , 189943 }, 558 + { 54854935 , 190383 }, 559 + { 55423634 , 190824 }, 560 + { 55996633 , 191265 }, 561 + { 56573950 , 191706 }, 562 + { 57155606 , 192146 }, 563 + { 57741621 , 192587 }, 564 + { 58332014 , 193028 }, 565 + { 58926806 , 193470 }, 566 + { 59526017 , 193911 }, 567 + { 60129666 , 194352 }, 568 + { 60737774 , 194793 }, 569 + { 61350361 , 195235 }, 570 + { 61967446 , 195677 }, 571 + { 62589050 , 196118 }, 572 + { 63215194 , 196560 }, 573 + { 63845897 , 197002 }, 574 + { 64481179 , 197444 }, 575 + { 65121061 , 197886 }, 576 + { 65765563 , 198328 }, 577 + { 66414705 , 198770 }, 578 + { 67068508 , 199213 }, 579 + { 67726992 , 199655 }, 580 + { 68390177 , 200098 }, 581 + { 69058085 , 200540 }, 582 + { 69730735 , 200983 }, 583 + { 70408147 , 201426 }, 584 + { 71090343 , 201869 }, 585 + { 71777343 , 202312 }, 586 + { 72469168 , 202755 }, 587 + { 73165837 , 203199 }, 588 + { 73867373 , 203642 }, 589 + { 74573795 , 204086 }, 590 + { 75285124 , 204529 }, 591 + { 76001380 , 204973 }, 592 + { 76722586 , 205417 }, 593 + { 77448761 , 205861 }, 594 + { 78179926 , 206306 }, 595 + { 78916102 , 206750 }, 596 + { 79657310 , 207194 }, 597 + { 80403571 , 207639 }, 598 + { 81154906 , 208084 }, 599 + { 81911335 , 208529 }, 600 + { 82672880 , 208974 }, 601 + { 83439562 , 209419 }, 602 + { 84211402 , 209864 }, 603 + { 84988421 , 210309 }, 604 + { 85770640 , 210755 }, 605 + { 86558080 , 211201 }, 606 + { 87350762 , 211647 }, 607 + { 88148708 , 212093 }, 608 + { 88951938 , 212539 }, 609 + { 89760475 , 212985 }, 610 + { 90574339 , 213432 }, 611 + { 91393551 , 213878 }, 612 + { 92218133 , 214325 }, 613 + { 93048107 , 214772 }, 614 + { 93883493 , 215219 }, 615 + { 94724314 , 215666 }, 616 + { 95570590 , 216114 }, 617 + { 96422343 , 216561 }, 618 + { 97279594 , 217009 }, 619 + { 98142366 , 217457 }, 620 + { 99010679 , 217905 }, 621 + { 99884556 , 218353 }, 622 + { 100764018 , 218801 }, 623 + { 101649086 , 219250 }, 624 + { 102539782 , 219698 }, 625 + { 103436128 , 220147 }, 626 + { 104338146 , 220596 }, 627 + { 105245857 , 221046 }, 628 + { 106159284 , 221495 }, 629 + { 107078448 , 221945 }, 630 + { 108003370 , 222394 }, 631 + { 108934074 , 222844 }, 632 + { 109870580 , 223294 }, 633 + { 110812910 , 223745 }, 634 + { 111761087 , 224195 }, 635 + { 112715133 , 224646 }, 636 + { 113675069 , 225097 }, 637 + { 114640918 , 225548 }, 638 + { 115612702 , 225999 }, 639 + { 116590442 , 226450 }, 640 + { 117574162 , 226902 }, 641 + { 118563882 , 227353 }, 642 + { 119559626 , 227805 }, 643 + { 120561415 , 228258 }, 644 + { 121569272 , 228710 }, 645 + { 122583219 , 229162 }, 646 + { 123603278 , 229615 }, 647 + { 124629471 , 230068 }, 648 + { 125661822 , 230521 }, 649 + { 126700352 , 230974 }, 650 + { 127745083 , 231428 }, 651 + { 128796039 , 231882 }, 652 + { 129853241 , 232336 }, 653 + { 130916713 , 232790 }, 654 + { 131986475 , 233244 }, 655 + { 133062553 , 233699 }, 656 + { 134144966 , 234153 }, 657 + { 135233739 , 234608 }, 658 + { 136328894 , 235064 }, 659 + { 137430453 , 235519 }, 660 + { 138538440 , 235975 }, 661 + { 139652876 , 236430 }, 662 + { 140773786 , 236886 }, 663 + { 141901190 , 237343 }, 664 + { 143035113 , 237799 }, 665 + { 144175576 , 238256 }, 666 + { 145322604 , 238713 }, 667 + { 146476218 , 239170 }, 668 + { 147636442 , 239627 }, 669 + { 148803298 , 240085 }, 670 + { 149976809 , 240542 }, 671 + { 151156999 , 241000 }, 672 + { 152343890 , 241459 }, 673 + { 153537506 , 241917 }, 674 + { 154737869 , 242376 }, 675 + { 155945002 , 242835 }, 676 + { 157158929 , 243294 }, 677 + { 158379673 , 243753 }, 678 + { 159607257 , 244213 }, 679 + { 160841704 , 244673 }, 680 + { 162083037 , 245133 }, 681 + { 163331279 , 245593 }, 682 + { 164586455 , 246054 }, 683 + { 165848586 , 246514 }, 684 + { 167117696 , 246975 }, 685 + { 168393810 , 247437 }, 686 + { 169676949 , 247898 }, 687 + { 170967138 , 248360 }, 688 + { 172264399 , 248822 }, 689 + { 173568757 , 249284 }, 690 + { 174880235 , 249747 }, 691 + { 176198856 , 250209 }, 692 + { 177524643 , 250672 }, 693 + { 178857621 , 251136 }, 694 + { 180197813 , 251599 }, 695 + { 181545242 , 252063 }, 696 + { 182899933 , 252527 }, 697 + { 184261908 , 252991 }, 698 + { 185631191 , 253456 }, 699 + { 187007807 , 253920 }, 700 + { 188391778 , 254385 }, 701 + { 189783129 , 254851 }, 702 + { 191181884 , 255316 }, 703 + { 192588065 , 255782 }, 704 + { 194001698 , 256248 }, 705 + { 195422805 , 256714 }, 706 + { 196851411 , 257181 }, 707 + { 198287540 , 257648 }, 708 + { 199731215 , 258115 }, 709 + { 201182461 , 258582 }, 710 + { 202641302 , 259050 }, 711 + { 204107760 , 259518 }, 712 + { 205581862 , 259986 }, 713 + { 207063630 , 260454 }, 714 + { 208553088 , 260923 }, 715 + { 210050262 , 261392 }, 716 + { 211555174 , 261861 }, 717 + { 213067849 , 262331 }, 718 + { 214588312 , 262800 }, 719 + { 216116586 , 263270 }, 720 + { 217652696 , 263741 }, 721 + { 219196666 , 264211 }, 722 + { 220748520 , 264682 }, 723 + { 222308282 , 265153 }, 724 + { 223875978 , 265625 }, 725 + { 225451630 , 266097 }, 726 + { 227035265 , 266569 }, 727 + { 228626905 , 267041 }, 728 + { 230226576 , 267514 }, 729 + { 231834302 , 267986 }, 730 + { 233450107 , 268460 }, 731 + { 235074016 , 268933 }, 732 + { 236706054 , 269407 }, 733 + { 238346244 , 269881 }, 734 + { 239994613 , 270355 }, 735 + { 241651183 , 270830 }, 736 + { 243315981 , 271305 } 737 + }; 738 + 739 + /* Calculate the send rate as per section 3.1 of RFC3448 740 + 741 + Returns send rate in bytes per second 742 + 743 + Integer maths and lookups are used as not allowed floating point in kernel 744 + 745 + The function for Xcalc as per section 3.1 of RFC3448 is: 746 + 747 + X = s 748 + ------------------------------------------------------------- 749 + R*sqrt(2*b*p/3) + (t_RTO * (3*sqrt(3*b*p/8) * p * (1+32*p^2))) 750 + 751 + where 752 + X is the trasmit rate in bytes/second 753 + s is the packet size in bytes 754 + R is the round trip time in seconds 755 + p is the loss event rate, between 0 and 1.0, of the number of loss events 756 + as a fraction of the number of packets transmitted 757 + t_RTO is the TCP retransmission timeout value in seconds 758 + b is the number of packets acknowledged by a single TCP acknowledgement 759 + 760 + we can assume that b = 1 and t_RTO is 4 * R. With this the equation becomes: 761 + 762 + X = s 763 + ----------------------------------------------------------------------- 764 + R * sqrt(2 * p / 3) + (12 * R * (sqrt(3 * p / 8) * p * (1 + 32 * p^2))) 765 + 766 + 767 + which we can break down into: 768 + 769 + X = s 770 + -------- 771 + R * f(p) 772 + 773 + where f(p) = sqrt(2 * p / 3) + (12 * sqrt(3 * p / 8) * p * (1 + 32 * p * p)) 774 + 775 + Function parameters: 776 + s - bytes 777 + R - RTT in usecs 778 + p - loss rate (decimal fraction multiplied by 1,000,000) 779 + 780 + Returns Xcalc in bytes per second 781 + 782 + DON'T alter this code unless you run test cases against it as the code 783 + has been manipulated to stop underflow/overlow. 784 + 785 + */ 786 + static u32 ccid3_calc_x(u16 s, u32 R, u32 p) 787 + { 788 + int index; 789 + u32 f; 790 + u64 tmp1, tmp2; 791 + 792 + if (p < CALCX_SPLIT) 793 + index = (p / (CALCX_SPLIT / CALCX_ARRSIZE)) - 1; 794 + else 795 + index = (p / (1000000 / CALCX_ARRSIZE)) - 1; 796 + 797 + if (index < 0) 798 + /* p should be 0 unless there is a bug in my code */ 799 + index = 0; 800 + 801 + if (R == 0) 802 + R = 1; /* RTT can't be zero or else divide by zero */ 803 + 804 + BUG_ON(index >= CALCX_ARRSIZE); 805 + 806 + if (p >= CALCX_SPLIT) 807 + f = calcx_lookup[index][0]; 808 + else 809 + f = calcx_lookup[index][1]; 810 + 811 + tmp1 = ((u64)s * 100000000); 812 + tmp2 = ((u64)R * (u64)f); 813 + do_div(tmp2,10000); 814 + do_div(tmp1,tmp2); 815 + /* don't alter above math unless you test due to overflow on 32 bit */ 816 + 817 + return (u32)tmp1; 818 + } 819 + 820 + /* Calculate new t_ipi (inter packet interval) by t_ipi = s / X_inst */ 821 + static inline void ccid3_calc_new_t_ipi(struct ccid3_hc_tx_sock *hctx) 822 + { 823 + if (hctx->ccid3hctx_state == TFRC_SSTATE_NO_FBACK) 824 + return; 825 + /* if no feedback spec says t_ipi is 1 second (set elsewhere and then 826 + * doubles after every no feedback timer (separate function) */ 827 + 828 + if (hctx->ccid3hctx_x < 10) { 829 + ccid3_pr_debug("ccid3_calc_new_t_ipi - ccid3hctx_x < 10\n"); 830 + hctx->ccid3hctx_x = 10; 831 + } 832 + hctx->ccid3hctx_t_ipi = (hctx->ccid3hctx_s * 100000) 833 + / (hctx->ccid3hctx_x / 10); 834 + /* reason for above maths with 10 in there is to avoid 32 bit 835 + * overflow for jumbo packets */ 836 + 837 + } 838 + 839 + /* Calculate new delta by delta = min(t_ipi / 2, t_gran / 2) */ 840 + static inline void ccid3_calc_new_delta(struct ccid3_hc_tx_sock *hctx) 841 + { 842 + hctx->ccid3hctx_delta = min_t(u32, hctx->ccid3hctx_t_ipi / 2, TFRC_OPSYS_HALF_TIME_GRAN); 843 + 844 + } 845 + 846 + /* 847 + * Update X by 848 + * If (p > 0) 849 + * x_calc = calcX(s, R, p); 850 + * X = max(min(X_calc, 2 * X_recv), s / t_mbi); 851 + * Else 852 + * If (now - tld >= R) 853 + * X = max(min(2 * X, 2 * X_recv), s / R); 854 + * tld = now; 855 + */ 856 + static void ccid3_hc_tx_update_x(struct sock *sk) 857 + { 858 + struct dccp_sock *dp = dccp_sk(sk); 859 + struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; 860 + 861 + if (hctx->ccid3hctx_p >= TFRC_SMALLEST_P) { /* to avoid large error in calcX */ 862 + hctx->ccid3hctx_x_calc = ccid3_calc_x(hctx->ccid3hctx_s, 863 + hctx->ccid3hctx_rtt, 864 + hctx->ccid3hctx_p); 865 + hctx->ccid3hctx_x = max_t(u32, min_t(u32, hctx->ccid3hctx_x_calc, 2 * hctx->ccid3hctx_x_recv), 866 + hctx->ccid3hctx_s / TFRC_MAX_BACK_OFF_TIME); 867 + } else if (now_delta(hctx->ccid3hctx_t_ld) >= hctx->ccid3hctx_rtt) { 868 + u32 rtt = hctx->ccid3hctx_rtt; 869 + if (rtt < 10) { 870 + rtt = 10; 871 + } /* avoid divide by zero below */ 872 + 873 + hctx->ccid3hctx_x = max_t(u32, min_t(u32, 2 * hctx->ccid3hctx_x_recv, 2 * hctx->ccid3hctx_x), 874 + (hctx->ccid3hctx_s * 100000) / (rtt / 10)); 875 + /* Using 100000 and 10 to avoid 32 bit overflow for jumbo frames */ 876 + do_gettimeofday(&hctx->ccid3hctx_t_ld); 877 + } 878 + 879 + if (hctx->ccid3hctx_x == 0) { 880 + ccid3_pr_debug("ccid3hctx_x = 0!\n"); 881 + hctx->ccid3hctx_x = 1; 882 + } 883 + } 884 + 885 + static void ccid3_hc_tx_no_feedback_timer(unsigned long data) 886 + { 887 + struct sock *sk = (struct sock *)data; 888 + struct dccp_sock *dp = dccp_sk(sk); 889 + unsigned long next_tmout = 0; 890 + struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; 891 + u32 rtt; 892 + 893 + bh_lock_sock(sk); 894 + if (sock_owned_by_user(sk)) { 895 + /* Try again later. */ 896 + /* XXX: set some sensible MIB */ 897 + sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer, jiffies + HZ / 5); 898 + goto out; 899 + } 900 + 901 + ccid3_pr_debug("%s, sk=%p, state=%s\n", dccp_role(sk), sk, 902 + ccid3_tx_state_name(hctx->ccid3hctx_state)); 903 + 904 + if (hctx->ccid3hctx_x < 10) { 905 + ccid3_pr_debug("TFRC_SSTATE_NO_FBACK ccid3hctx_x < 10\n"); 906 + hctx->ccid3hctx_x = 10; 907 + } 908 + 909 + switch (hctx->ccid3hctx_state) { 910 + case TFRC_SSTATE_TERM: 911 + goto out; 912 + case TFRC_SSTATE_NO_FBACK: 913 + /* Halve send rate */ 914 + hctx->ccid3hctx_x /= 2; 915 + if (hctx->ccid3hctx_x < (hctx->ccid3hctx_s / TFRC_MAX_BACK_OFF_TIME)) 916 + hctx->ccid3hctx_x = hctx->ccid3hctx_s / TFRC_MAX_BACK_OFF_TIME; 917 + 918 + ccid3_pr_debug("%s, sk=%p, state=%s, updated tx rate to %d bytes/s\n", 919 + dccp_role(sk), sk, ccid3_tx_state_name(hctx->ccid3hctx_state), 920 + hctx->ccid3hctx_x); 921 + next_tmout = max_t(u32, 2 * (hctx->ccid3hctx_s * 100000) 922 + / (hctx->ccid3hctx_x / 10), TFRC_INITIAL_TIMEOUT); 923 + /* do above maths with 100000 and 10 to prevent overflow on 32 bit */ 924 + /* FIXME - not sure above calculation is correct. See section 5 of CCID3 11 925 + * should adjust tx_t_ipi and double that to achieve it really */ 926 + break; 927 + case TFRC_SSTATE_FBACK: 928 + /* Check if IDLE since last timeout and recv rate is less than 4 packets per RTT */ 929 + rtt = hctx->ccid3hctx_rtt; 930 + if (rtt < 10) 931 + rtt = 10; 932 + /* stop divide by zero below */ 933 + if (!hctx->ccid3hctx_idle || (hctx->ccid3hctx_x_recv >= 934 + 4 * (hctx->ccid3hctx_s * 100000) / (rtt / 10))) { 935 + ccid3_pr_debug("%s, sk=%p, state=%s, not idle\n", dccp_role(sk), sk, 936 + ccid3_tx_state_name(hctx->ccid3hctx_state)); 937 + /* Halve sending rate */ 938 + 939 + /* If (X_calc > 2 * X_recv) 940 + * X_recv = max(X_recv / 2, s / (2 * t_mbi)); 941 + * Else 942 + * X_recv = X_calc / 4; 943 + */ 944 + BUG_ON(hctx->ccid3hctx_p >= TFRC_SMALLEST_P && hctx->ccid3hctx_x_calc == 0); 945 + 946 + /* check also if p is zero -> x_calc is infinity? */ 947 + if (hctx->ccid3hctx_p < TFRC_SMALLEST_P || 948 + hctx->ccid3hctx_x_calc > 2 * hctx->ccid3hctx_x_recv) 949 + hctx->ccid3hctx_x_recv = max_t(u32, hctx->ccid3hctx_x_recv / 2, 950 + hctx->ccid3hctx_s / (2 * TFRC_MAX_BACK_OFF_TIME)); 951 + else 952 + hctx->ccid3hctx_x_recv = hctx->ccid3hctx_x_calc / 4; 953 + 954 + /* Update sending rate */ 955 + ccid3_hc_tx_update_x(sk); 956 + } 957 + if (hctx->ccid3hctx_x == 0) { 958 + ccid3_pr_debug("TFRC_SSTATE_FBACK ccid3hctx_x = 0!\n"); 959 + hctx->ccid3hctx_x = 10; 960 + } 961 + /* Schedule no feedback timer to expire in max(4 * R, 2 * s / X) */ 962 + next_tmout = max_t(u32, inet_csk(sk)->icsk_rto, 963 + 2 * (hctx->ccid3hctx_s * 100000) / (hctx->ccid3hctx_x / 10)); 964 + break; 965 + default: 966 + printk(KERN_CRIT "%s: %s, sk=%p, Illegal state (%d)!\n", 967 + __FUNCTION__, dccp_role(sk), sk, hctx->ccid3hctx_state); 968 + dump_stack(); 969 + goto out; 970 + } 971 + 972 + sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer, 973 + jiffies + max_t(u32, 1, usecs_to_jiffies(next_tmout))); 974 + hctx->ccid3hctx_idle = 1; 975 + out: 976 + bh_unlock_sock(sk); 977 + sock_put(sk); 978 + } 979 + 980 + static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb, 981 + int len, long *delay) 982 + { 983 + struct dccp_sock *dp = dccp_sk(sk); 984 + struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; 985 + struct ccid3_tx_hist_entry *new_packet = NULL; 986 + struct timeval now; 987 + int rc = -ENOTCONN; 988 + 989 + // ccid3_pr_debug("%s, sk=%p, skb=%p, len=%d\n", dccp_role(sk), sk, skb, len); 990 + /* 991 + * check if pure ACK or Terminating */ 992 + /* XXX: We only call this function for DATA and DATAACK, on, these packets can have 993 + * zero length, but why the comment about "pure ACK"? 994 + */ 995 + if (hctx == NULL || len == 0 || hctx->ccid3hctx_state == TFRC_SSTATE_TERM) 996 + goto out; 997 + 998 + /* See if last packet allocated was not sent */ 999 + if (!list_empty(&hctx->ccid3hctx_hist)) 1000 + new_packet = list_entry(hctx->ccid3hctx_hist.next, 1001 + struct ccid3_tx_hist_entry, ccid3htx_node); 1002 + 1003 + if (new_packet == NULL || new_packet->ccid3htx_sent) { 1004 + new_packet = ccid3_tx_hist_entry_new(SLAB_ATOMIC); 1005 + 1006 + rc = -ENOBUFS; 1007 + if (new_packet == NULL) { 1008 + ccid3_pr_debug("%s, sk=%p, not enough mem to add " 1009 + "to history, send refused\n", dccp_role(sk), sk); 1010 + goto out; 1011 + } 1012 + 1013 + list_add(&new_packet->ccid3htx_node, &hctx->ccid3hctx_hist); 1014 + } 1015 + 1016 + do_gettimeofday(&now); 1017 + 1018 + switch (hctx->ccid3hctx_state) { 1019 + case TFRC_SSTATE_NO_SENT: 1020 + ccid3_pr_debug("%s, sk=%p, first packet(%llu)\n", dccp_role(sk), sk, 1021 + dp->dccps_gss); 1022 + 1023 + hctx->ccid3hctx_no_feedback_timer.function = ccid3_hc_tx_no_feedback_timer; 1024 + hctx->ccid3hctx_no_feedback_timer.data = (unsigned long)sk; 1025 + sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer, jiffies + usecs_to_jiffies(TFRC_INITIAL_TIMEOUT)); 1026 + hctx->ccid3hctx_last_win_count = 0; 1027 + hctx->ccid3hctx_t_last_win_count = now; 1028 + ccid3_hc_tx_set_state(sk, TFRC_SSTATE_NO_FBACK); 1029 + hctx->ccid3hctx_t_ipi = TFRC_INITIAL_TIMEOUT; 1030 + 1031 + /* Set nominal send time for initial packet */ 1032 + hctx->ccid3hctx_t_nom = now; 1033 + (hctx->ccid3hctx_t_nom).tv_usec += hctx->ccid3hctx_t_ipi; 1034 + timeval_fix(&(hctx->ccid3hctx_t_nom)); 1035 + ccid3_calc_new_delta(hctx); 1036 + rc = 0; 1037 + break; 1038 + case TFRC_SSTATE_NO_FBACK: 1039 + case TFRC_SSTATE_FBACK: 1040 + *delay = (now_delta(hctx->ccid3hctx_t_nom) - hctx->ccid3hctx_delta); 1041 + ccid3_pr_debug("send_packet delay=%ld\n",*delay); 1042 + *delay /= -1000; 1043 + /* divide by -1000 is to convert to ms and get sign right */ 1044 + rc = *delay > 0 ? -EAGAIN : 0; 1045 + break; 1046 + default: 1047 + printk(KERN_CRIT "%s: %s, sk=%p, Illegal state (%d)!\n", 1048 + __FUNCTION__, dccp_role(sk), sk, hctx->ccid3hctx_state); 1049 + dump_stack(); 1050 + rc = -EINVAL; 1051 + break; 1052 + } 1053 + 1054 + /* Can we send? if so add options and add to packet history */ 1055 + if (rc == 0) 1056 + new_packet->ccid3htx_win_count = DCCP_SKB_CB(skb)->dccpd_ccval = hctx->ccid3hctx_last_win_count; 1057 + out: 1058 + return rc; 1059 + } 1060 + 1061 + static void ccid3_hc_tx_packet_sent(struct sock *sk, int more, int len) 1062 + { 1063 + struct dccp_sock *dp = dccp_sk(sk); 1064 + struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; 1065 + struct ccid3_tx_hist_entry *packet = NULL; 1066 + struct timeval now; 1067 + 1068 + // ccid3_pr_debug("%s, sk=%p, more=%d, len=%d\n", dccp_role(sk), sk, more, len); 1069 + BUG_ON(hctx == NULL); 1070 + 1071 + if (hctx->ccid3hctx_state == TFRC_SSTATE_TERM) { 1072 + ccid3_pr_debug("%s, sk=%p, while state is TFRC_SSTATE_TERM!\n", 1073 + dccp_role(sk), sk); 1074 + return; 1075 + } 1076 + 1077 + do_gettimeofday(&now); 1078 + 1079 + /* check if we have sent a data packet */ 1080 + if (len > 0) { 1081 + unsigned long quarter_rtt; 1082 + 1083 + if (list_empty(&hctx->ccid3hctx_hist)) { 1084 + printk(KERN_CRIT "%s: packet doesn't exists in history!\n", __FUNCTION__); 1085 + return; 1086 + } 1087 + packet = list_entry(hctx->ccid3hctx_hist.next, struct ccid3_tx_hist_entry, ccid3htx_node); 1088 + if (packet->ccid3htx_sent) { 1089 + printk(KERN_CRIT "%s: no unsent packet in history!\n", __FUNCTION__); 1090 + return; 1091 + } 1092 + packet->ccid3htx_tstamp = now; 1093 + packet->ccid3htx_seqno = dp->dccps_gss; 1094 + // ccid3_pr_debug("%s, sk=%p, seqno=%llu inserted!\n", dccp_role(sk), sk, packet->ccid3htx_seqno); 1095 + 1096 + /* 1097 + * Check if win_count have changed */ 1098 + /* COMPLIANCE_BEGIN 1099 + * Algorithm in "8.1. Window Counter Valuer" in draft-ietf-dccp-ccid3-11.txt 1100 + */ 1101 + quarter_rtt = now_delta(hctx->ccid3hctx_t_last_win_count) / (hctx->ccid3hctx_rtt / 4); 1102 + if (quarter_rtt > 0) { 1103 + hctx->ccid3hctx_t_last_win_count = now; 1104 + hctx->ccid3hctx_last_win_count = (hctx->ccid3hctx_last_win_count + 1105 + min_t(unsigned long, quarter_rtt, 5)) % 16; 1106 + ccid3_pr_debug("%s, sk=%p, window changed from %u to %u!\n", 1107 + dccp_role(sk), sk, 1108 + packet->ccid3htx_win_count, 1109 + hctx->ccid3hctx_last_win_count); 1110 + } 1111 + /* COMPLIANCE_END */ 1112 + #if 0 1113 + ccid3_pr_debug("%s, sk=%p, packet sent (%llu,%u)\n", 1114 + dccp_role(sk), sk, 1115 + packet->ccid3htx_seqno, 1116 + packet->ccid3htx_win_count); 1117 + #endif 1118 + hctx->ccid3hctx_idle = 0; 1119 + packet->ccid3htx_sent = 1; 1120 + } else 1121 + ccid3_pr_debug("%s, sk=%p, seqno=%llu NOT inserted!\n", 1122 + dccp_role(sk), sk, dp->dccps_gss); 1123 + 1124 + switch (hctx->ccid3hctx_state) { 1125 + case TFRC_SSTATE_NO_SENT: 1126 + /* if first wasn't pure ack */ 1127 + if (len != 0) 1128 + printk(KERN_CRIT "%s: %s, First packet sent is noted as a data packet\n", 1129 + __FUNCTION__, dccp_role(sk)); 1130 + return; 1131 + case TFRC_SSTATE_NO_FBACK: 1132 + case TFRC_SSTATE_FBACK: 1133 + if (len > 0) { 1134 + hctx->ccid3hctx_t_nom = now; 1135 + ccid3_calc_new_t_ipi(hctx); 1136 + ccid3_calc_new_delta(hctx); 1137 + (hctx->ccid3hctx_t_nom).tv_usec += hctx->ccid3hctx_t_ipi; 1138 + timeval_fix(&(hctx->ccid3hctx_t_nom)); 1139 + } 1140 + break; 1141 + default: 1142 + printk(KERN_CRIT "%s: %s, sk=%p, Illegal state (%d)!\n", 1143 + __FUNCTION__, dccp_role(sk), sk, hctx->ccid3hctx_state); 1144 + dump_stack(); 1145 + break; 1146 + } 1147 + } 1148 + 1149 + static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) 1150 + { 1151 + struct dccp_sock *dp = dccp_sk(sk); 1152 + struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; 1153 + struct ccid3_options_received *opt_recv; 1154 + struct ccid3_tx_hist_entry *entry, *next, *packet; 1155 + unsigned long next_tmout; 1156 + u16 t_elapsed; 1157 + u32 pinv; 1158 + u32 x_recv; 1159 + u32 r_sample; 1160 + #if 0 1161 + ccid3_pr_debug("%s, sk=%p(%s), skb=%p(%s)\n", 1162 + dccp_role(sk), sk, dccp_state_name(sk->sk_state), 1163 + skb, dccp_packet_name(DCCP_SKB_CB(skb)->dccpd_type)); 1164 + #endif 1165 + if (hctx == NULL) 1166 + return; 1167 + 1168 + if (hctx->ccid3hctx_state == TFRC_SSTATE_TERM) { 1169 + ccid3_pr_debug("%s, sk=%p, received a packet when terminating!\n", dccp_role(sk), sk); 1170 + return; 1171 + } 1172 + 1173 + /* we are only interested in ACKs */ 1174 + if (!(DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_ACK || 1175 + DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_DATAACK)) 1176 + return; 1177 + 1178 + opt_recv = &hctx->ccid3hctx_options_received; 1179 + 1180 + t_elapsed = dp->dccps_options_received.dccpor_elapsed_time; 1181 + x_recv = opt_recv->ccid3or_receive_rate; 1182 + pinv = opt_recv->ccid3or_loss_event_rate; 1183 + 1184 + switch (hctx->ccid3hctx_state) { 1185 + case TFRC_SSTATE_NO_SENT: 1186 + /* FIXME: what to do here? */ 1187 + return; 1188 + case TFRC_SSTATE_NO_FBACK: 1189 + case TFRC_SSTATE_FBACK: 1190 + /* Calculate new round trip sample by 1191 + * R_sample = (now - t_recvdata) - t_delay */ 1192 + /* get t_recvdata from history */ 1193 + packet = NULL; 1194 + list_for_each_entry_safe(entry, next, &hctx->ccid3hctx_hist, ccid3htx_node) 1195 + if (entry->ccid3htx_seqno == DCCP_SKB_CB(skb)->dccpd_ack_seq) { 1196 + packet = entry; 1197 + break; 1198 + } 1199 + 1200 + if (packet == NULL) { 1201 + ccid3_pr_debug("%s, sk=%p, seqno %llu(%s) does't exist in history!\n", 1202 + dccp_role(sk), sk, DCCP_SKB_CB(skb)->dccpd_ack_seq, 1203 + dccp_packet_name(DCCP_SKB_CB(skb)->dccpd_type)); 1204 + return; 1205 + } 1206 + 1207 + /* Update RTT */ 1208 + r_sample = now_delta(packet->ccid3htx_tstamp); 1209 + /* FIXME: */ 1210 + // r_sample -= usecs_to_jiffies(t_elapsed * 10); 1211 + 1212 + /* Update RTT estimate by 1213 + * If (No feedback recv) 1214 + * R = R_sample; 1215 + * Else 1216 + * R = q * R + (1 - q) * R_sample; 1217 + * 1218 + * q is a constant, RFC 3448 recomments 0.9 1219 + */ 1220 + if (hctx->ccid3hctx_state == TFRC_SSTATE_NO_FBACK) { 1221 + ccid3_hc_tx_set_state(sk, TFRC_SSTATE_FBACK); 1222 + hctx->ccid3hctx_rtt = r_sample; 1223 + } else 1224 + hctx->ccid3hctx_rtt = (hctx->ccid3hctx_rtt * 9) / 10 + r_sample / 10; 1225 + 1226 + /* 1227 + * XXX: this is to avoid a division by zero in ccid3_hc_tx_packet_sent 1228 + * implemention of the new window count. 1229 + */ 1230 + if (hctx->ccid3hctx_rtt < 4) 1231 + hctx->ccid3hctx_rtt = 4; 1232 + 1233 + ccid3_pr_debug("%s, sk=%p, New RTT estimate=%uus, r_sample=%us\n", 1234 + dccp_role(sk), sk, 1235 + hctx->ccid3hctx_rtt, 1236 + r_sample); 1237 + 1238 + /* Update timeout interval */ 1239 + inet_csk(sk)->icsk_rto = max_t(u32, 4 * hctx->ccid3hctx_rtt, USEC_IN_SEC); 1240 + 1241 + /* Update receive rate */ 1242 + hctx->ccid3hctx_x_recv = x_recv; /* x_recv in bytes per second */ 1243 + 1244 + /* Update loss event rate */ 1245 + if (pinv == ~0 || pinv == 0) 1246 + hctx->ccid3hctx_p = 0; 1247 + else { 1248 + hctx->ccid3hctx_p = 1000000 / pinv; 1249 + 1250 + if (hctx->ccid3hctx_p < TFRC_SMALLEST_P) { 1251 + hctx->ccid3hctx_p = TFRC_SMALLEST_P; 1252 + ccid3_pr_debug("%s, sk=%p, Smallest p used!\n", dccp_role(sk), sk); 1253 + } 1254 + } 1255 + 1256 + /* unschedule no feedback timer */ 1257 + sk_stop_timer(sk, &hctx->ccid3hctx_no_feedback_timer); 1258 + 1259 + /* Update sending rate */ 1260 + ccid3_hc_tx_update_x(sk); 1261 + 1262 + /* Update next send time */ 1263 + if (hctx->ccid3hctx_t_ipi > (hctx->ccid3hctx_t_nom).tv_usec) { 1264 + (hctx->ccid3hctx_t_nom).tv_usec += USEC_IN_SEC; 1265 + (hctx->ccid3hctx_t_nom).tv_sec--; 1266 + } 1267 + /* FIXME - if no feedback then t_ipi can go > 1 second */ 1268 + (hctx->ccid3hctx_t_nom).tv_usec -= hctx->ccid3hctx_t_ipi; 1269 + ccid3_calc_new_t_ipi(hctx); 1270 + (hctx->ccid3hctx_t_nom).tv_usec += hctx->ccid3hctx_t_ipi; 1271 + timeval_fix(&(hctx->ccid3hctx_t_nom)); 1272 + ccid3_calc_new_delta(hctx); 1273 + 1274 + /* remove all packets older than the one acked from history */ 1275 + #if 0 1276 + FIXME! 1277 + list_for_each_entry_safe_continue(entry, next, &hctx->ccid3hctx_hist, ccid3htx_node) { 1278 + list_del_init(&entry->ccid3htx_node); 1279 + ccid3_tx_hist_entry_delete(entry); 1280 + } 1281 + #endif 1282 + if (hctx->ccid3hctx_x < 10) { 1283 + ccid3_pr_debug("ccid3_hc_tx_packet_recv hctx->ccid3hctx_x < 10\n"); 1284 + hctx->ccid3hctx_x = 10; 1285 + } 1286 + /* to prevent divide by zero below */ 1287 + 1288 + /* Schedule no feedback timer to expire in max(4 * R, 2 * s / X) */ 1289 + next_tmout = max(inet_csk(sk)->icsk_rto, 1290 + 2 * (hctx->ccid3hctx_s * 100000) / (hctx->ccid3hctx_x/10)); 1291 + /* maths with 100000 and 10 is to prevent overflow with 32 bit */ 1292 + 1293 + ccid3_pr_debug("%s, sk=%p, Scheduled no feedback timer to expire in %lu jiffies (%luus)\n", 1294 + dccp_role(sk), sk, usecs_to_jiffies(next_tmout), next_tmout); 1295 + 1296 + sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer, 1297 + jiffies + max_t(u32,1,usecs_to_jiffies(next_tmout))); 1298 + 1299 + /* set idle flag */ 1300 + hctx->ccid3hctx_idle = 1; 1301 + break; 1302 + default: 1303 + printk(KERN_CRIT "%s: %s, sk=%p, Illegal state (%d)!\n", 1304 + __FUNCTION__, dccp_role(sk), sk, hctx->ccid3hctx_state); 1305 + dump_stack(); 1306 + break; 1307 + } 1308 + } 1309 + 1310 + static void ccid3_hc_tx_insert_options(struct sock *sk, struct sk_buff *skb) 1311 + { 1312 + const struct dccp_sock *dp = dccp_sk(sk); 1313 + struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; 1314 + 1315 + if (hctx == NULL || !(sk->sk_state == DCCP_OPEN || sk->sk_state == DCCP_PARTOPEN)) 1316 + return; 1317 + 1318 + DCCP_SKB_CB(skb)->dccpd_ccval = hctx->ccid3hctx_last_win_count; 1319 + } 1320 + 1321 + static int ccid3_hc_tx_parse_options(struct sock *sk, unsigned char option, 1322 + unsigned char len, u16 idx, unsigned char *value) 1323 + { 1324 + int rc = 0; 1325 + struct dccp_sock *dp = dccp_sk(sk); 1326 + struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; 1327 + struct ccid3_options_received *opt_recv; 1328 + 1329 + if (hctx == NULL) 1330 + return 0; 1331 + 1332 + opt_recv = &hctx->ccid3hctx_options_received; 1333 + 1334 + if (opt_recv->ccid3or_seqno != dp->dccps_gsr) { 1335 + opt_recv->ccid3or_seqno = dp->dccps_gsr; 1336 + opt_recv->ccid3or_loss_event_rate = ~0; 1337 + opt_recv->ccid3or_loss_intervals_idx = 0; 1338 + opt_recv->ccid3or_loss_intervals_len = 0; 1339 + opt_recv->ccid3or_receive_rate = 0; 1340 + } 1341 + 1342 + switch (option) { 1343 + case TFRC_OPT_LOSS_EVENT_RATE: 1344 + if (len != 4) { 1345 + ccid3_pr_debug("%s, sk=%p, invalid len for TFRC_OPT_LOSS_EVENT_RATE\n", 1346 + dccp_role(sk), sk); 1347 + rc = -EINVAL; 1348 + } else { 1349 + opt_recv->ccid3or_loss_event_rate = ntohl(*(u32 *)value); 1350 + ccid3_pr_debug("%s, sk=%p, LOSS_EVENT_RATE=%u\n", 1351 + dccp_role(sk), sk, 1352 + opt_recv->ccid3or_loss_event_rate); 1353 + } 1354 + break; 1355 + case TFRC_OPT_LOSS_INTERVALS: 1356 + opt_recv->ccid3or_loss_intervals_idx = idx; 1357 + opt_recv->ccid3or_loss_intervals_len = len; 1358 + ccid3_pr_debug("%s, sk=%p, LOSS_INTERVALS=(%u, %u)\n", 1359 + dccp_role(sk), sk, 1360 + opt_recv->ccid3or_loss_intervals_idx, 1361 + opt_recv->ccid3or_loss_intervals_len); 1362 + break; 1363 + case TFRC_OPT_RECEIVE_RATE: 1364 + if (len != 4) { 1365 + ccid3_pr_debug("%s, sk=%p, invalid len for TFRC_OPT_RECEIVE_RATE\n", 1366 + dccp_role(sk), sk); 1367 + rc = -EINVAL; 1368 + } else { 1369 + opt_recv->ccid3or_receive_rate = ntohl(*(u32 *)value); 1370 + ccid3_pr_debug("%s, sk=%p, RECEIVE_RATE=%u\n", 1371 + dccp_role(sk), sk, 1372 + opt_recv->ccid3or_receive_rate); 1373 + } 1374 + break; 1375 + } 1376 + 1377 + return rc; 1378 + } 1379 + 1380 + static int ccid3_hc_tx_init(struct sock *sk) 1381 + { 1382 + struct dccp_sock *dp = dccp_sk(sk); 1383 + struct ccid3_hc_tx_sock *hctx; 1384 + 1385 + ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk); 1386 + 1387 + hctx = dp->dccps_hc_tx_ccid_private = kmalloc(sizeof(*hctx), gfp_any()); 1388 + if (hctx == NULL) 1389 + return -ENOMEM; 1390 + 1391 + memset(hctx, 0, sizeof(*hctx)); 1392 + 1393 + if (dp->dccps_avg_packet_size >= TFRC_MIN_PACKET_SIZE && 1394 + dp->dccps_avg_packet_size <= TFRC_MAX_PACKET_SIZE) 1395 + hctx->ccid3hctx_s = (u16)dp->dccps_avg_packet_size; 1396 + else 1397 + hctx->ccid3hctx_s = TFRC_STD_PACKET_SIZE; 1398 + 1399 + hctx->ccid3hctx_x = hctx->ccid3hctx_s; /* set transmission rate to 1 packet per second */ 1400 + hctx->ccid3hctx_rtt = 4; /* See ccid3_hc_tx_packet_sent win_count calculatation */ 1401 + inet_csk(sk)->icsk_rto = USEC_IN_SEC; 1402 + hctx->ccid3hctx_state = TFRC_SSTATE_NO_SENT; 1403 + INIT_LIST_HEAD(&hctx->ccid3hctx_hist); 1404 + init_timer(&hctx->ccid3hctx_no_feedback_timer); 1405 + 1406 + return 0; 1407 + } 1408 + 1409 + static void ccid3_hc_tx_exit(struct sock *sk) 1410 + { 1411 + struct dccp_sock *dp = dccp_sk(sk); 1412 + struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; 1413 + struct ccid3_tx_hist_entry *entry, *next; 1414 + 1415 + ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk); 1416 + BUG_ON(hctx == NULL); 1417 + 1418 + ccid3_hc_tx_set_state(sk, TFRC_SSTATE_TERM); 1419 + sk_stop_timer(sk, &hctx->ccid3hctx_no_feedback_timer); 1420 + 1421 + /* Empty packet history */ 1422 + list_for_each_entry_safe(entry, next, &hctx->ccid3hctx_hist, ccid3htx_node) { 1423 + list_del_init(&entry->ccid3htx_node); 1424 + ccid3_tx_hist_entry_delete(entry); 1425 + } 1426 + 1427 + kfree(dp->dccps_hc_tx_ccid_private); 1428 + dp->dccps_hc_tx_ccid_private = NULL; 1429 + } 1430 + 1431 + /* 1432 + * RX Half Connection methods 1433 + */ 1434 + 1435 + /* TFRC receiver states */ 1436 + enum ccid3_hc_rx_states { 1437 + TFRC_RSTATE_NO_DATA = 1, 1438 + TFRC_RSTATE_DATA, 1439 + TFRC_RSTATE_TERM = 127, 1440 + }; 1441 + 1442 + #ifdef CCID3_DEBUG 1443 + static const char *ccid3_rx_state_name(enum ccid3_hc_rx_states state) 1444 + { 1445 + static char *ccid3_rx_state_names[] = { 1446 + [TFRC_RSTATE_NO_DATA] = "NO_DATA", 1447 + [TFRC_RSTATE_DATA] = "DATA", 1448 + [TFRC_RSTATE_TERM] = "TERM", 1449 + }; 1450 + 1451 + return ccid3_rx_state_names[state]; 1452 + } 1453 + #endif 1454 + 1455 + static inline void ccid3_hc_rx_set_state(struct sock *sk, enum ccid3_hc_rx_states state) 1456 + { 1457 + struct dccp_sock *dp = dccp_sk(sk); 1458 + struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; 1459 + enum ccid3_hc_rx_states oldstate = hcrx->ccid3hcrx_state; 1460 + 1461 + ccid3_pr_debug("%s(%p) %-8.8s -> %s\n", 1462 + dccp_role(sk), sk, ccid3_rx_state_name(oldstate), ccid3_rx_state_name(state)); 1463 + WARN_ON(state == oldstate); 1464 + hcrx->ccid3hcrx_state = state; 1465 + } 1466 + 1467 + static int ccid3_hc_rx_add_hist(struct sock *sk, struct ccid3_rx_hist_entry *packet) 1468 + { 1469 + struct dccp_sock *dp = dccp_sk(sk); 1470 + struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; 1471 + struct ccid3_rx_hist_entry *entry, *next; 1472 + u8 num_later = 0; 1473 + 1474 + if (list_empty(&hcrx->ccid3hcrx_hist)) 1475 + list_add(&packet->ccid3hrx_node, &hcrx->ccid3hcrx_hist); 1476 + else { 1477 + u64 seqno = packet->ccid3hrx_seqno; 1478 + struct ccid3_rx_hist_entry *iter = list_entry(hcrx->ccid3hcrx_hist.next, 1479 + struct ccid3_rx_hist_entry, 1480 + ccid3hrx_node); 1481 + if (after48(seqno, iter->ccid3hrx_seqno)) 1482 + list_add(&packet->ccid3hrx_node, &hcrx->ccid3hcrx_hist); 1483 + else { 1484 + if (iter->ccid3hrx_type == DCCP_PKT_DATA || 1485 + iter->ccid3hrx_type == DCCP_PKT_DATAACK) 1486 + num_later = 1; 1487 + 1488 + list_for_each_entry_continue(iter, &hcrx->ccid3hcrx_hist, ccid3hrx_node) { 1489 + if (after48(seqno, iter->ccid3hrx_seqno)) { 1490 + list_add(&packet->ccid3hrx_node, &iter->ccid3hrx_node); 1491 + goto trim_history; 1492 + } 1493 + 1494 + if (iter->ccid3hrx_type == DCCP_PKT_DATA || 1495 + iter->ccid3hrx_type == DCCP_PKT_DATAACK) 1496 + num_later++; 1497 + 1498 + if (num_later == TFRC_RECV_NUM_LATE_LOSS) { 1499 + ccid3_rx_hist_entry_delete(packet); 1500 + ccid3_pr_debug("%s, sk=%p, packet(%llu) already lost!\n", 1501 + dccp_role(sk), sk, seqno); 1502 + return 1; 1503 + } 1504 + } 1505 + 1506 + if (num_later < TFRC_RECV_NUM_LATE_LOSS) 1507 + list_add_tail(&packet->ccid3hrx_node, &hcrx->ccid3hcrx_hist); 1508 + /* FIXME: else what? should we destroy the packet like above? */ 1509 + } 1510 + } 1511 + 1512 + trim_history: 1513 + /* Trim history (remove all packets after the NUM_LATE_LOSS + 1 data packets) */ 1514 + num_later = TFRC_RECV_NUM_LATE_LOSS + 1; 1515 + 1516 + if (!list_empty(&hcrx->ccid3hcrx_loss_interval_hist)) { 1517 + list_for_each_entry_safe(entry, next, &hcrx->ccid3hcrx_hist, ccid3hrx_node) { 1518 + if (num_later == 0) { 1519 + list_del_init(&entry->ccid3hrx_node); 1520 + ccid3_rx_hist_entry_delete(entry); 1521 + } else if (entry->ccid3hrx_type == DCCP_PKT_DATA || 1522 + entry->ccid3hrx_type == DCCP_PKT_DATAACK) 1523 + --num_later; 1524 + } 1525 + } else { 1526 + int step = 0; 1527 + u8 win_count = 0; /* Not needed, but lets shut up gcc */ 1528 + int tmp; 1529 + /* 1530 + * We have no loss interval history so we need at least one 1531 + * rtt:s of data packets to approximate rtt. 1532 + */ 1533 + list_for_each_entry_safe(entry, next, &hcrx->ccid3hcrx_hist, ccid3hrx_node) { 1534 + if (num_later == 0) { 1535 + switch (step) { 1536 + case 0: 1537 + step = 1; 1538 + /* OK, find next data packet */ 1539 + num_later = 1; 1540 + break; 1541 + case 1: 1542 + step = 2; 1543 + /* OK, find next data packet */ 1544 + num_later = 1; 1545 + win_count = entry->ccid3hrx_win_count; 1546 + break; 1547 + case 2: 1548 + tmp = win_count - entry->ccid3hrx_win_count; 1549 + if (tmp < 0) 1550 + tmp += TFRC_WIN_COUNT_LIMIT; 1551 + if (tmp > TFRC_WIN_COUNT_PER_RTT + 1) { 1552 + /* we have found a packet older than one rtt 1553 + * remove the rest */ 1554 + step = 3; 1555 + } else /* OK, find next data packet */ 1556 + num_later = 1; 1557 + break; 1558 + case 3: 1559 + list_del_init(&entry->ccid3hrx_node); 1560 + ccid3_rx_hist_entry_delete(entry); 1561 + break; 1562 + } 1563 + } else if (entry->ccid3hrx_type == DCCP_PKT_DATA || 1564 + entry->ccid3hrx_type == DCCP_PKT_DATAACK) 1565 + --num_later; 1566 + } 1567 + } 1568 + 1569 + return 0; 1570 + } 1571 + 1572 + static void ccid3_hc_rx_send_feedback(struct sock *sk) 1573 + { 1574 + struct dccp_sock *dp = dccp_sk(sk); 1575 + struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; 1576 + struct ccid3_rx_hist_entry *entry, *packet; 1577 + 1578 + ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk); 1579 + 1580 + switch (hcrx->ccid3hcrx_state) { 1581 + case TFRC_RSTATE_NO_DATA: 1582 + hcrx->ccid3hcrx_x_recv = 0; 1583 + break; 1584 + case TFRC_RSTATE_DATA: { 1585 + u32 delta = now_delta(hcrx->ccid3hcrx_tstamp_last_feedback); 1586 + 1587 + if (delta == 0) 1588 + delta = 1; /* to prevent divide by zero */ 1589 + hcrx->ccid3hcrx_x_recv = (hcrx->ccid3hcrx_bytes_recv * USEC_IN_SEC) / delta; 1590 + } 1591 + break; 1592 + default: 1593 + printk(KERN_CRIT "%s: %s, sk=%p, Illegal state (%d)!\n", 1594 + __FUNCTION__, dccp_role(sk), sk, hcrx->ccid3hcrx_state); 1595 + dump_stack(); 1596 + return; 1597 + } 1598 + 1599 + packet = NULL; 1600 + list_for_each_entry(entry, &hcrx->ccid3hcrx_hist, ccid3hrx_node) 1601 + if (entry->ccid3hrx_type == DCCP_PKT_DATA || 1602 + entry->ccid3hrx_type == DCCP_PKT_DATAACK) { 1603 + packet = entry; 1604 + break; 1605 + } 1606 + 1607 + if (packet == NULL) { 1608 + printk(KERN_CRIT "%s: %s, sk=%p, no data packet in history!\n", 1609 + __FUNCTION__, dccp_role(sk), sk); 1610 + dump_stack(); 1611 + return; 1612 + } 1613 + 1614 + do_gettimeofday(&(hcrx->ccid3hcrx_tstamp_last_feedback)); 1615 + hcrx->ccid3hcrx_last_counter = packet->ccid3hrx_win_count; 1616 + hcrx->ccid3hcrx_seqno_last_counter = packet->ccid3hrx_seqno; 1617 + hcrx->ccid3hcrx_bytes_recv = 0; 1618 + 1619 + /* Convert to multiples of 10us */ 1620 + hcrx->ccid3hcrx_elapsed_time = now_delta(packet->ccid3hrx_tstamp) / 10; 1621 + if (hcrx->ccid3hcrx_p == 0) 1622 + hcrx->ccid3hcrx_pinv = ~0; 1623 + else 1624 + hcrx->ccid3hcrx_pinv = 1000000 / hcrx->ccid3hcrx_p; 1625 + dccp_send_ack(sk); 1626 + } 1627 + 1628 + static void ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb) 1629 + { 1630 + const struct dccp_sock *dp = dccp_sk(sk); 1631 + struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; 1632 + 1633 + if (hcrx == NULL || !(sk->sk_state == DCCP_OPEN || sk->sk_state == DCCP_PARTOPEN)) 1634 + return; 1635 + 1636 + if (hcrx->ccid3hcrx_elapsed_time != 0 && !dccp_packet_without_ack(skb)) 1637 + dccp_insert_option_elapsed_time(sk, skb, hcrx->ccid3hcrx_elapsed_time); 1638 + 1639 + if (DCCP_SKB_CB(skb)->dccpd_type != DCCP_PKT_DATA) { 1640 + const u32 x_recv = htonl(hcrx->ccid3hcrx_x_recv); 1641 + const u32 pinv = htonl(hcrx->ccid3hcrx_pinv); 1642 + 1643 + dccp_insert_option(sk, skb, TFRC_OPT_LOSS_EVENT_RATE, &pinv, sizeof(pinv)); 1644 + dccp_insert_option(sk, skb, TFRC_OPT_RECEIVE_RATE, &x_recv, sizeof(x_recv)); 1645 + } 1646 + 1647 + DCCP_SKB_CB(skb)->dccpd_ccval = hcrx->ccid3hcrx_last_counter; 1648 + } 1649 + 1650 + /* Weights used to calculate loss event rate */ 1651 + /* 1652 + * These are integers as per section 8 of RFC3448. We can then divide by 4 * 1653 + * when we use it. 1654 + */ 1655 + const int ccid3_hc_rx_w[TFRC_RECV_IVAL_F_LENGTH] = { 4, 4, 4, 4, 3, 2, 1, 1, }; 1656 + 1657 + /* 1658 + * args: fvalue - function value to match 1659 + * returns: p closest to that value 1660 + * 1661 + * both fvalue and p are multiplied by 1,000,000 to use ints 1662 + */ 1663 + u32 calcx_reverse_lookup(u32 fvalue) { 1664 + int ctr = 0; 1665 + int small; 1666 + 1667 + if (fvalue < calcx_lookup[0][1]) 1668 + return 0; 1669 + if (fvalue <= calcx_lookup[CALCX_ARRSIZE-1][1]) 1670 + small = 1; 1671 + else if (fvalue > calcx_lookup[CALCX_ARRSIZE-1][0]) 1672 + return 1000000; 1673 + else 1674 + small = 0; 1675 + while (fvalue > calcx_lookup[ctr][small]) 1676 + ctr++; 1677 + if (small) 1678 + return (CALCX_SPLIT * ctr / CALCX_ARRSIZE); 1679 + else 1680 + return (1000000 * ctr / CALCX_ARRSIZE) ; 1681 + } 1682 + 1683 + /* calculate first loss interval 1684 + * 1685 + * returns estimated loss interval in usecs */ 1686 + 1687 + static u32 ccid3_hc_rx_calc_first_li(struct sock *sk) 1688 + { 1689 + struct dccp_sock *dp = dccp_sk(sk); 1690 + struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; 1691 + struct ccid3_rx_hist_entry *entry, *next, *tail = NULL; 1692 + u32 rtt, delta, x_recv, fval, p, tmp2; 1693 + struct timeval tstamp, tmp_tv; 1694 + int interval = 0; 1695 + int win_count = 0; 1696 + int step = 0; 1697 + u64 tmp1; 1698 + 1699 + list_for_each_entry_safe(entry, next, &hcrx->ccid3hcrx_hist, ccid3hrx_node) { 1700 + if (entry->ccid3hrx_type == DCCP_PKT_DATA || 1701 + entry->ccid3hrx_type == DCCP_PKT_DATAACK) { 1702 + tail = entry; 1703 + 1704 + switch (step) { 1705 + case 0: 1706 + tstamp = entry->ccid3hrx_tstamp; 1707 + win_count = entry->ccid3hrx_win_count; 1708 + step = 1; 1709 + break; 1710 + case 1: 1711 + interval = win_count - entry->ccid3hrx_win_count; 1712 + if (interval < 0) 1713 + interval += TFRC_WIN_COUNT_LIMIT; 1714 + if (interval > 4) 1715 + goto found; 1716 + break; 1717 + } 1718 + } 1719 + } 1720 + 1721 + if (step == 0) { 1722 + printk(KERN_CRIT "%s: %s, sk=%p, packet history contains no data packets!\n", 1723 + __FUNCTION__, dccp_role(sk), sk); 1724 + return ~0; 1725 + } 1726 + 1727 + if (interval == 0) { 1728 + ccid3_pr_debug("%s, sk=%p, Could not find a win_count interval > 0. Defaulting to 1\n", 1729 + dccp_role(sk), sk); 1730 + interval = 1; 1731 + } 1732 + found: 1733 + timeval_sub(tstamp,tail->ccid3hrx_tstamp,&tmp_tv); 1734 + rtt = (tmp_tv.tv_sec * USEC_IN_SEC + tmp_tv.tv_usec) * 4 / interval; 1735 + ccid3_pr_debug("%s, sk=%p, approximated RTT to %uus\n", 1736 + dccp_role(sk), sk, rtt); 1737 + if (rtt == 0) 1738 + rtt = 1; 1739 + 1740 + delta = now_delta(hcrx->ccid3hcrx_tstamp_last_feedback); 1741 + if (delta == 0) 1742 + delta = 1; 1743 + 1744 + x_recv = (hcrx->ccid3hcrx_bytes_recv * USEC_IN_SEC) / delta; 1745 + 1746 + tmp1 = (u64)x_recv * (u64)rtt; 1747 + do_div(tmp1,10000000); 1748 + tmp2 = (u32)tmp1; 1749 + fval = (hcrx->ccid3hcrx_s * 100000) / tmp2; 1750 + /* do not alter order above or you will get overflow on 32 bit */ 1751 + p = calcx_reverse_lookup(fval); 1752 + ccid3_pr_debug("%s, sk=%p, receive rate=%u bytes/s, implied loss rate=%u\n",\ 1753 + dccp_role(sk), sk, x_recv, p); 1754 + 1755 + if (p == 0) 1756 + return ~0; 1757 + else 1758 + return 1000000 / p; 1759 + } 1760 + 1761 + static void ccid3_hc_rx_update_li(struct sock *sk, u64 seq_loss, u8 win_loss) 1762 + { 1763 + struct dccp_sock *dp = dccp_sk(sk); 1764 + struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; 1765 + struct ccid3_loss_interval_hist_entry *li_entry; 1766 + 1767 + if (seq_loss != DCCP_MAX_SEQNO + 1) { 1768 + ccid3_pr_debug("%s, sk=%p, seq_loss=%llu, win_loss=%u, packet loss detected\n", 1769 + dccp_role(sk), sk, seq_loss, win_loss); 1770 + 1771 + if (list_empty(&hcrx->ccid3hcrx_loss_interval_hist)) { 1772 + struct ccid3_loss_interval_hist_entry *li_tail = NULL; 1773 + int i; 1774 + 1775 + ccid3_pr_debug("%s, sk=%p, first loss event detected, creating history\n", dccp_role(sk), sk); 1776 + for (i = 0; i <= TFRC_RECV_IVAL_F_LENGTH; ++i) { 1777 + li_entry = ccid3_loss_interval_hist_entry_new(SLAB_ATOMIC); 1778 + if (li_entry == NULL) { 1779 + ccid3_loss_interval_history_delete(&hcrx->ccid3hcrx_loss_interval_hist); 1780 + ccid3_pr_debug("%s, sk=%p, not enough mem for creating history\n", 1781 + dccp_role(sk), sk); 1782 + return; 1783 + } 1784 + if (li_tail == NULL) 1785 + li_tail = li_entry; 1786 + list_add(&li_entry->ccid3lih_node, &hcrx->ccid3hcrx_loss_interval_hist); 1787 + } 1788 + 1789 + li_entry->ccid3lih_seqno = seq_loss; 1790 + li_entry->ccid3lih_win_count = win_loss; 1791 + 1792 + li_tail->ccid3lih_interval = ccid3_hc_rx_calc_first_li(sk); 1793 + } 1794 + } 1795 + /* FIXME: find end of interval */ 1796 + } 1797 + 1798 + static void ccid3_hc_rx_detect_loss(struct sock *sk) 1799 + { 1800 + struct dccp_sock *dp = dccp_sk(sk); 1801 + struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; 1802 + struct ccid3_rx_hist_entry *entry, *a_next, *b_next, *packet; 1803 + struct ccid3_rx_hist_entry *a_loss = NULL; 1804 + struct ccid3_rx_hist_entry *b_loss = NULL; 1805 + u64 seq_loss = DCCP_MAX_SEQNO + 1; 1806 + u8 win_loss = 0; 1807 + u8 num_later = TFRC_RECV_NUM_LATE_LOSS; 1808 + 1809 + list_for_each_entry_safe(entry, b_next, &hcrx->ccid3hcrx_hist, ccid3hrx_node) { 1810 + if (num_later == 0) { 1811 + b_loss = entry; 1812 + break; 1813 + } else if (entry->ccid3hrx_type == DCCP_PKT_DATA || 1814 + entry->ccid3hrx_type == DCCP_PKT_DATAACK) 1815 + --num_later; 1816 + } 1817 + 1818 + if (b_loss == NULL) 1819 + goto out_update_li; 1820 + 1821 + a_next = b_next; 1822 + num_later = 1; 1823 + #if 0 1824 + FIXME MERGE GIT! 1825 + list_for_each_entry_safe_continue(entry, a_next, &hcrx->ccid3hcrx_hist, ccid3hrx_node) { 1826 + if (num_later == 0) { 1827 + a_loss = entry; 1828 + break; 1829 + } else if (entry->ccid3hrx_type == DCCP_PKT_DATA || 1830 + entry->ccid3hrx_type == DCCP_PKT_DATAACK) 1831 + --num_later; 1832 + } 1833 + #endif 1834 + 1835 + if (a_loss == NULL) { 1836 + if (list_empty(&hcrx->ccid3hcrx_loss_interval_hist)) { 1837 + /* no loss event have occured yet */ 1838 + ccid3_pr_debug("%s, sk=%p, TODO: find a lost data " 1839 + "packet by comparing to initial seqno\n", 1840 + dccp_role(sk), sk); 1841 + goto out_update_li; 1842 + } else { 1843 + pr_info("%s: %s, sk=%p, ERROR! Less than 4 data packets in history", 1844 + __FUNCTION__, dccp_role(sk), sk); 1845 + return; 1846 + } 1847 + } 1848 + 1849 + /* Locate a lost data packet */ 1850 + entry = packet = b_loss; 1851 + #if 0 1852 + FIXME MERGE GIT! 1853 + list_for_each_entry_safe_continue(entry, b_next, &hcrx->ccid3hcrx_hist, ccid3hrx_node) { 1854 + u64 delta = dccp_delta_seqno(entry->ccid3hrx_seqno, packet->ccid3hrx_seqno); 1855 + 1856 + if (delta != 0) { 1857 + if (packet->ccid3hrx_type == DCCP_PKT_DATA || 1858 + packet->ccid3hrx_type == DCCP_PKT_DATAACK) 1859 + --delta; 1860 + /* 1861 + * FIXME: check this, probably this % usage is because 1862 + * in earlier drafts the ndp count was just 8 bits 1863 + * long, but now it cam be up to 24 bits long. 1864 + */ 1865 + #if 0 1866 + if (delta % DCCP_NDP_LIMIT != 1867 + (packet->ccid3hrx_ndp - entry->ccid3hrx_ndp) % DCCP_NDP_LIMIT) 1868 + #endif 1869 + if (delta != packet->ccid3hrx_ndp - entry->ccid3hrx_ndp) { 1870 + seq_loss = entry->ccid3hrx_seqno; 1871 + dccp_inc_seqno(&seq_loss); 1872 + } 1873 + } 1874 + packet = entry; 1875 + if (packet == a_loss) 1876 + break; 1877 + } 1878 + #endif 1879 + 1880 + if (seq_loss != DCCP_MAX_SEQNO + 1) 1881 + win_loss = a_loss->ccid3hrx_win_count; 1882 + 1883 + out_update_li: 1884 + ccid3_hc_rx_update_li(sk, seq_loss, win_loss); 1885 + } 1886 + 1887 + static u32 ccid3_hc_rx_calc_i_mean(struct sock *sk) 1888 + { 1889 + struct dccp_sock *dp = dccp_sk(sk); 1890 + struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; 1891 + struct ccid3_loss_interval_hist_entry *li_entry, *li_next; 1892 + int i = 0; 1893 + u32 i_tot; 1894 + u32 i_tot0 = 0; 1895 + u32 i_tot1 = 0; 1896 + u32 w_tot = 0; 1897 + 1898 + list_for_each_entry_safe(li_entry, li_next, &hcrx->ccid3hcrx_loss_interval_hist, ccid3lih_node) { 1899 + if (i < TFRC_RECV_IVAL_F_LENGTH) { 1900 + i_tot0 += li_entry->ccid3lih_interval * ccid3_hc_rx_w[i]; 1901 + w_tot += ccid3_hc_rx_w[i]; 1902 + } 1903 + 1904 + if (i != 0) 1905 + i_tot1 += li_entry->ccid3lih_interval * ccid3_hc_rx_w[i - 1]; 1906 + 1907 + if (++i > TFRC_RECV_IVAL_F_LENGTH) 1908 + break; 1909 + } 1910 + 1911 + if (i != TFRC_RECV_IVAL_F_LENGTH) { 1912 + pr_info("%s: %s, sk=%p, ERROR! Missing entry in interval history!\n", 1913 + __FUNCTION__, dccp_role(sk), sk); 1914 + return 0; 1915 + } 1916 + 1917 + i_tot = max(i_tot0, i_tot1); 1918 + 1919 + /* FIXME: Why do we do this? -Ian McDonald */ 1920 + if (i_tot * 4 < w_tot) 1921 + i_tot = w_tot * 4; 1922 + 1923 + return i_tot * 4 / w_tot; 1924 + } 1925 + 1926 + static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) 1927 + { 1928 + struct dccp_sock *dp = dccp_sk(sk); 1929 + struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; 1930 + struct ccid3_rx_hist_entry *packet; 1931 + struct timeval now; 1932 + u8 win_count; 1933 + u32 p_prev; 1934 + int ins; 1935 + #if 0 1936 + ccid3_pr_debug("%s, sk=%p(%s), skb=%p(%s)\n", 1937 + dccp_role(sk), sk, dccp_state_name(sk->sk_state), 1938 + skb, dccp_packet_name(DCCP_SKB_CB(skb)->dccpd_type)); 1939 + #endif 1940 + if (hcrx == NULL) 1941 + return; 1942 + 1943 + BUG_ON(!(hcrx->ccid3hcrx_state == TFRC_RSTATE_NO_DATA || 1944 + hcrx->ccid3hcrx_state == TFRC_RSTATE_DATA)); 1945 + 1946 + switch (DCCP_SKB_CB(skb)->dccpd_type) { 1947 + case DCCP_PKT_ACK: 1948 + if (hcrx->ccid3hcrx_state == TFRC_RSTATE_NO_DATA) 1949 + return; 1950 + case DCCP_PKT_DATAACK: 1951 + if (dp->dccps_options_received.dccpor_timestamp_echo == 0) 1952 + break; 1953 + p_prev = hcrx->ccid3hcrx_rtt; 1954 + do_gettimeofday(&now); 1955 + /* hcrx->ccid3hcrx_rtt = now - dp->dccps_options_received.dccpor_timestamp_echo - 1956 + usecs_to_jiffies(dp->dccps_options_received.dccpor_elapsed_time * 10); 1957 + FIXME - I think above code is broken - have to look at options more, will also need 1958 + to fix pr_debug below */ 1959 + if (p_prev != hcrx->ccid3hcrx_rtt) 1960 + ccid3_pr_debug("%s, sk=%p, New RTT estimate=%lu jiffies, tstamp_echo=%u, elapsed time=%u\n", 1961 + dccp_role(sk), sk, hcrx->ccid3hcrx_rtt, 1962 + dp->dccps_options_received.dccpor_timestamp_echo, 1963 + dp->dccps_options_received.dccpor_elapsed_time); 1964 + break; 1965 + case DCCP_PKT_DATA: 1966 + break; 1967 + default: 1968 + ccid3_pr_debug("%s, sk=%p, not DATA/DATAACK/ACK packet(%s)\n", 1969 + dccp_role(sk), sk, 1970 + dccp_packet_name(DCCP_SKB_CB(skb)->dccpd_type)); 1971 + return; 1972 + } 1973 + 1974 + packet = ccid3_rx_hist_entry_new(sk, skb, SLAB_ATOMIC); 1975 + if (packet == NULL) { 1976 + ccid3_pr_debug("%s, sk=%p, Not enough mem to add rx packet to history (consider it lost)!", 1977 + dccp_role(sk), sk); 1978 + return; 1979 + } 1980 + 1981 + win_count = packet->ccid3hrx_win_count; 1982 + 1983 + ins = ccid3_hc_rx_add_hist(sk, packet); 1984 + 1985 + if (DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_ACK) 1986 + return; 1987 + 1988 + switch (hcrx->ccid3hcrx_state) { 1989 + case TFRC_RSTATE_NO_DATA: 1990 + ccid3_pr_debug("%s, sk=%p(%s), skb=%p, sending initial feedback\n", 1991 + dccp_role(sk), sk, dccp_state_name(sk->sk_state), skb); 1992 + ccid3_hc_rx_send_feedback(sk); 1993 + ccid3_hc_rx_set_state(sk, TFRC_RSTATE_DATA); 1994 + return; 1995 + case TFRC_RSTATE_DATA: 1996 + hcrx->ccid3hcrx_bytes_recv += skb->len - dccp_hdr(skb)->dccph_doff * 4; 1997 + if (ins == 0) { 1998 + do_gettimeofday(&now); 1999 + if ((now_delta(hcrx->ccid3hcrx_tstamp_last_ack)) >= hcrx->ccid3hcrx_rtt) { 2000 + hcrx->ccid3hcrx_tstamp_last_ack = now; 2001 + ccid3_hc_rx_send_feedback(sk); 2002 + } 2003 + return; 2004 + } 2005 + break; 2006 + default: 2007 + printk(KERN_CRIT "%s: %s, sk=%p, Illegal state (%d)!\n", 2008 + __FUNCTION__, dccp_role(sk), sk, hcrx->ccid3hcrx_state); 2009 + dump_stack(); 2010 + return; 2011 + } 2012 + 2013 + /* Dealing with packet loss */ 2014 + ccid3_pr_debug("%s, sk=%p(%s), skb=%p, data loss! Reacting...\n", 2015 + dccp_role(sk), sk, dccp_state_name(sk->sk_state), skb); 2016 + 2017 + ccid3_hc_rx_detect_loss(sk); 2018 + p_prev = hcrx->ccid3hcrx_p; 2019 + 2020 + /* Calculate loss event rate */ 2021 + if (!list_empty(&hcrx->ccid3hcrx_loss_interval_hist)) 2022 + /* Scaling up by 1000000 as fixed decimal */ 2023 + hcrx->ccid3hcrx_p = 1000000 / ccid3_hc_rx_calc_i_mean(sk); 2024 + 2025 + if (hcrx->ccid3hcrx_p > p_prev) { 2026 + ccid3_hc_rx_send_feedback(sk); 2027 + return; 2028 + } 2029 + } 2030 + 2031 + static int ccid3_hc_rx_init(struct sock *sk) 2032 + { 2033 + struct dccp_sock *dp = dccp_sk(sk); 2034 + struct ccid3_hc_rx_sock *hcrx; 2035 + 2036 + ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk); 2037 + 2038 + hcrx = dp->dccps_hc_rx_ccid_private = kmalloc(sizeof(*hcrx), gfp_any()); 2039 + if (hcrx == NULL) 2040 + return -ENOMEM; 2041 + 2042 + memset(hcrx, 0, sizeof(*hcrx)); 2043 + 2044 + if (dp->dccps_avg_packet_size >= TFRC_MIN_PACKET_SIZE && 2045 + dp->dccps_avg_packet_size <= TFRC_MAX_PACKET_SIZE) 2046 + hcrx->ccid3hcrx_s = (u16)dp->dccps_avg_packet_size; 2047 + else 2048 + hcrx->ccid3hcrx_s = TFRC_STD_PACKET_SIZE; 2049 + 2050 + hcrx->ccid3hcrx_state = TFRC_RSTATE_NO_DATA; 2051 + INIT_LIST_HEAD(&hcrx->ccid3hcrx_hist); 2052 + INIT_LIST_HEAD(&hcrx->ccid3hcrx_loss_interval_hist); 2053 + 2054 + return 0; 2055 + } 2056 + 2057 + static void ccid3_hc_rx_exit(struct sock *sk) 2058 + { 2059 + struct dccp_sock *dp = dccp_sk(sk); 2060 + struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; 2061 + 2062 + ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk); 2063 + 2064 + if (hcrx == NULL) 2065 + return; 2066 + 2067 + ccid3_hc_rx_set_state(sk, TFRC_RSTATE_TERM); 2068 + 2069 + /* Empty packet history */ 2070 + ccid3_rx_history_delete(&hcrx->ccid3hcrx_hist); 2071 + 2072 + /* Empty loss interval history */ 2073 + ccid3_loss_interval_history_delete(&hcrx->ccid3hcrx_loss_interval_hist); 2074 + 2075 + kfree(dp->dccps_hc_rx_ccid_private); 2076 + dp->dccps_hc_rx_ccid_private = NULL; 2077 + } 2078 + 2079 + static struct ccid ccid3 = { 2080 + .ccid_id = 3, 2081 + .ccid_name = "ccid3", 2082 + .ccid_owner = THIS_MODULE, 2083 + .ccid_init = ccid3_init, 2084 + .ccid_exit = ccid3_exit, 2085 + .ccid_hc_tx_init = ccid3_hc_tx_init, 2086 + .ccid_hc_tx_exit = ccid3_hc_tx_exit, 2087 + .ccid_hc_tx_send_packet = ccid3_hc_tx_send_packet, 2088 + .ccid_hc_tx_packet_sent = ccid3_hc_tx_packet_sent, 2089 + .ccid_hc_tx_packet_recv = ccid3_hc_tx_packet_recv, 2090 + .ccid_hc_tx_insert_options = ccid3_hc_tx_insert_options, 2091 + .ccid_hc_tx_parse_options = ccid3_hc_tx_parse_options, 2092 + .ccid_hc_rx_init = ccid3_hc_rx_init, 2093 + .ccid_hc_rx_exit = ccid3_hc_rx_exit, 2094 + .ccid_hc_rx_insert_options = ccid3_hc_rx_insert_options, 2095 + .ccid_hc_rx_packet_recv = ccid3_hc_rx_packet_recv, 2096 + }; 2097 + 2098 + module_param(ccid3_debug, int, 0444); 2099 + MODULE_PARM_DESC(ccid3_debug, "Enable debug messages"); 2100 + 2101 + static __init int ccid3_module_init(void) 2102 + { 2103 + int rc = -ENOMEM; 2104 + 2105 + ccid3_tx_hist_slab = kmem_cache_create("dccp_ccid3_tx_history", 2106 + sizeof(struct ccid3_tx_hist_entry), 0, 2107 + SLAB_HWCACHE_ALIGN, NULL, NULL); 2108 + if (ccid3_tx_hist_slab == NULL) 2109 + goto out; 2110 + 2111 + ccid3_rx_hist_slab = kmem_cache_create("dccp_ccid3_rx_history", 2112 + sizeof(struct ccid3_rx_hist_entry), 0, 2113 + SLAB_HWCACHE_ALIGN, NULL, NULL); 2114 + if (ccid3_rx_hist_slab == NULL) 2115 + goto out_free_tx_history; 2116 + 2117 + ccid3_loss_interval_hist_slab = kmem_cache_create("dccp_ccid3_loss_interval_history", 2118 + sizeof(struct ccid3_loss_interval_hist_entry), 0, 2119 + SLAB_HWCACHE_ALIGN, NULL, NULL); 2120 + if (ccid3_loss_interval_hist_slab == NULL) 2121 + goto out_free_rx_history; 2122 + 2123 + rc = ccid_register(&ccid3); 2124 + if (rc != 0) 2125 + goto out_free_loss_interval_history; 2126 + 2127 + out: 2128 + return rc; 2129 + out_free_loss_interval_history: 2130 + kmem_cache_destroy(ccid3_loss_interval_hist_slab); 2131 + ccid3_loss_interval_hist_slab = NULL; 2132 + out_free_rx_history: 2133 + kmem_cache_destroy(ccid3_rx_hist_slab); 2134 + ccid3_rx_hist_slab = NULL; 2135 + out_free_tx_history: 2136 + kmem_cache_destroy(ccid3_tx_hist_slab); 2137 + ccid3_tx_hist_slab = NULL; 2138 + goto out; 2139 + } 2140 + module_init(ccid3_module_init); 2141 + 2142 + static __exit void ccid3_module_exit(void) 2143 + { 2144 + ccid_unregister(&ccid3); 2145 + 2146 + if (ccid3_tx_hist_slab != NULL) { 2147 + kmem_cache_destroy(ccid3_tx_hist_slab); 2148 + ccid3_tx_hist_slab = NULL; 2149 + } 2150 + if (ccid3_rx_hist_slab != NULL) { 2151 + kmem_cache_destroy(ccid3_rx_hist_slab); 2152 + ccid3_rx_hist_slab = NULL; 2153 + } 2154 + if (ccid3_loss_interval_hist_slab != NULL) { 2155 + kmem_cache_destroy(ccid3_loss_interval_hist_slab); 2156 + ccid3_loss_interval_hist_slab = NULL; 2157 + } 2158 + } 2159 + module_exit(ccid3_module_exit); 2160 + 2161 + MODULE_AUTHOR("Ian McDonald <iam4@cs.waikato.ac.nz> & Arnaldo Carvalho de Melo <acme@ghostprotocols.net>"); 2162 + MODULE_DESCRIPTION("DCCP TFRC CCID3 CCID"); 2163 + MODULE_LICENSE("GPL"); 2164 + MODULE_ALIAS("net-dccp-ccid-3");
+137
net/dccp/ccids/ccid3.h
··· 1 + /* 2 + * net/dccp/ccids/ccid3.h 3 + * 4 + * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand. 5 + * 6 + * An implementation of the DCCP protocol 7 + * 8 + * This code has been developed by the University of Waikato WAND 9 + * research group. For further information please see http://www.wand.net.nz/ 10 + * or e-mail Ian McDonald - iam4@cs.waikato.ac.nz 11 + * 12 + * This code also uses code from Lulea University, rereleased as GPL by its 13 + * authors: 14 + * Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon 15 + * 16 + * Changes to meet Linux coding standards, to make it meet latest ccid3 draft 17 + * and to make it work as a loadable module in the DCCP stack written by 18 + * Arnaldo Carvalho de Melo <acme@conectiva.com.br>. 19 + * 20 + * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br> 21 + * 22 + * This program is free software; you can redistribute it and/or modify 23 + * it under the terms of the GNU General Public License as published by 24 + * the Free Software Foundation; either version 2 of the License, or 25 + * (at your option) any later version. 26 + * 27 + * This program is distributed in the hope that it will be useful, 28 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 29 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 30 + * GNU General Public License for more details. 31 + * 32 + * You should have received a copy of the GNU General Public License 33 + * along with this program; if not, write to the Free Software 34 + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 35 + */ 36 + #ifndef _DCCP_CCID3_H_ 37 + #define _DCCP_CCID3_H_ 38 + 39 + #include <linux/types.h> 40 + #include <linux/list.h> 41 + #include <linux/timer.h> 42 + 43 + struct ccid3_tx_hist_entry { 44 + struct list_head ccid3htx_node; 45 + u64 ccid3htx_seqno:48, 46 + ccid3htx_win_count:8, 47 + ccid3htx_sent:1; 48 + struct timeval ccid3htx_tstamp; 49 + }; 50 + 51 + struct ccid3_options_received { 52 + u64 ccid3or_seqno:48, 53 + ccid3or_loss_intervals_idx:16; 54 + u16 ccid3or_loss_intervals_len; 55 + u32 ccid3or_loss_event_rate; 56 + u32 ccid3or_receive_rate; 57 + }; 58 + 59 + /** struct ccid3_hc_tx_sock - CCID3 sender half connection congestion control block 60 + * 61 + * @ccid3hctx_state - Sender state 62 + * @ccid3hctx_x - Current sending rate 63 + * @ccid3hctx_x_recv - Receive rate 64 + * @ccid3hctx_x_calc - Calculated send (?) rate 65 + * @ccid3hctx_s - Packet size 66 + * @ccid3hctx_rtt - Estimate of current round trip time in usecs 67 + * @@ccid3hctx_p - Current loss event rate (0-1) scaled by 1000000 68 + * @ccid3hctx_last_win_count - Last window counter sent 69 + * @ccid3hctx_t_last_win_count - Timestamp of earliest packet with last_win_count value sent 70 + * @ccid3hctx_no_feedback_timer - Handle to no feedback timer 71 + * @ccid3hctx_idle - FIXME 72 + * @ccid3hctx_t_ld - Time last doubled during slow start 73 + * @ccid3hctx_t_nom - Nominal send time of next packet 74 + * @ccid3hctx_t_ipi - Interpacket (send) interval 75 + * @ccid3hctx_delta - Send timer delta 76 + * @ccid3hctx_hist - Packet history 77 + */ 78 + struct ccid3_hc_tx_sock { 79 + u32 ccid3hctx_x; 80 + u32 ccid3hctx_x_recv; 81 + u32 ccid3hctx_x_calc; 82 + u16 ccid3hctx_s; 83 + u32 ccid3hctx_rtt; 84 + u32 ccid3hctx_p; 85 + u8 ccid3hctx_state; 86 + u8 ccid3hctx_last_win_count; 87 + u8 ccid3hctx_idle; 88 + struct timeval ccid3hctx_t_last_win_count; 89 + struct timer_list ccid3hctx_no_feedback_timer; 90 + struct timeval ccid3hctx_t_ld; 91 + struct timeval ccid3hctx_t_nom; 92 + u32 ccid3hctx_t_ipi; 93 + u32 ccid3hctx_delta; 94 + struct list_head ccid3hctx_hist; 95 + struct ccid3_options_received ccid3hctx_options_received; 96 + }; 97 + 98 + struct ccid3_loss_interval_hist_entry { 99 + struct list_head ccid3lih_node; 100 + u64 ccid3lih_seqno:48, 101 + ccid3lih_win_count:4; 102 + u32 ccid3lih_interval; 103 + }; 104 + 105 + struct ccid3_rx_hist_entry { 106 + struct list_head ccid3hrx_node; 107 + u64 ccid3hrx_seqno:48, 108 + ccid3hrx_win_count:4, 109 + ccid3hrx_type:4; 110 + u32 ccid3hrx_ndp; /* In fact it is from 8 to 24 bits */ 111 + struct timeval ccid3hrx_tstamp; 112 + }; 113 + 114 + struct ccid3_hc_rx_sock { 115 + u64 ccid3hcrx_seqno_last_counter:48, 116 + ccid3hcrx_state:8, 117 + ccid3hcrx_last_counter:4; 118 + unsigned long ccid3hcrx_rtt; 119 + u32 ccid3hcrx_p; 120 + u32 ccid3hcrx_bytes_recv; 121 + struct timeval ccid3hcrx_tstamp_last_feedback; 122 + struct timeval ccid3hcrx_tstamp_last_ack; 123 + struct list_head ccid3hcrx_hist; 124 + struct list_head ccid3hcrx_loss_interval_hist; 125 + u16 ccid3hcrx_s; 126 + u32 ccid3hcrx_pinv; 127 + u32 ccid3hcrx_elapsed_time; 128 + u32 ccid3hcrx_x_recv; 129 + }; 130 + 131 + #define ccid3_hc_tx_field(s,field) (s->dccps_hc_tx_ccid_private == NULL ? 0 : \ 132 + ((struct ccid3_hc_tx_sock *)s->dccps_hc_tx_ccid_private)->ccid3hctx_##field) 133 + 134 + #define ccid3_hc_rx_field(s,field) (s->dccps_hc_rx_ccid_private == NULL ? 0 : \ 135 + ((struct ccid3_hc_rx_sock *)s->dccps_hc_rx_ccid_private)->ccid3hcrx_##field) 136 + 137 + #endif /* _DCCP_CCID3_H_ */
+422
net/dccp/dccp.h
··· 1 + #ifndef _DCCP_H 2 + #define _DCCP_H 3 + /* 4 + * net/dccp/dccp.h 5 + * 6 + * An implementation of the DCCP protocol 7 + * Arnaldo Carvalho de Melo <acme@conectiva.com.br> 8 + * 9 + * This program is free software; you can redistribute it and/or modify it 10 + * under the terms of the GNU General Public License version 2 as 11 + * published by the Free Software Foundation. 12 + */ 13 + 14 + #include <linux/dccp.h> 15 + #include <net/snmp.h> 16 + #include <net/sock.h> 17 + #include <net/tcp.h> 18 + 19 + #define DCCP_DEBUG 20 + 21 + #ifdef DCCP_DEBUG 22 + extern int dccp_debug; 23 + 24 + #define dccp_pr_debug(format, a...) \ 25 + do { if (dccp_debug) \ 26 + printk(KERN_DEBUG "%s: " format, __FUNCTION__ , ##a); \ 27 + } while (0) 28 + #define dccp_pr_debug_cat(format, a...) do { if (dccp_debug) printk(format, ##a); } while (0) 29 + #else 30 + #define dccp_pr_debug(format, a...) 31 + #define dccp_pr_debug_cat(format, a...) 32 + #endif 33 + 34 + extern struct inet_hashinfo dccp_hashinfo; 35 + 36 + extern atomic_t dccp_orphan_count; 37 + extern int dccp_tw_count; 38 + extern void dccp_tw_deschedule(struct inet_timewait_sock *tw); 39 + 40 + extern void dccp_time_wait(struct sock *sk, int state, int timeo); 41 + 42 + /* FIXME: Right size this */ 43 + #define DCCP_MAX_OPT_LEN 128 44 + 45 + #define DCCP_MAX_PACKET_HDR 32 46 + 47 + #define MAX_DCCP_HEADER (DCCP_MAX_PACKET_HDR + DCCP_MAX_OPT_LEN + MAX_HEADER) 48 + 49 + #define DCCP_TIMEWAIT_LEN (60 * HZ) /* how long to wait to destroy TIME-WAIT 50 + * state, about 60 seconds */ 51 + 52 + /* draft-ietf-dccp-spec-11.txt initial RTO value */ 53 + #define DCCP_TIMEOUT_INIT ((unsigned)(3 * HZ)) 54 + 55 + /* Maximal interval between probes for local resources. */ 56 + #define DCCP_RESOURCE_PROBE_INTERVAL ((unsigned)(HZ / 2U)) 57 + 58 + #define DCCP_RTO_MAX ((unsigned)(120 * HZ)) /* FIXME: using TCP value */ 59 + 60 + extern struct proto dccp_v4_prot; 61 + 62 + /* is seq1 < seq2 ? */ 63 + static inline const int before48(const u64 seq1, const u64 seq2) 64 + { 65 + return (const s64)((seq1 << 16) - (seq2 << 16)) < 0; 66 + } 67 + 68 + /* is seq1 > seq2 ? */ 69 + static inline const int after48(const u64 seq1, const u64 seq2) 70 + { 71 + return (const s64)((seq2 << 16) - (seq1 << 16)) < 0; 72 + } 73 + 74 + /* is seq2 <= seq1 <= seq3 ? */ 75 + static inline const int between48(const u64 seq1, const u64 seq2, const u64 seq3) 76 + { 77 + return (seq3 << 16) - (seq2 << 16) >= (seq1 << 16) - (seq2 << 16); 78 + } 79 + 80 + static inline u64 max48(const u64 seq1, const u64 seq2) 81 + { 82 + return after48(seq1, seq2) ? seq1 : seq2; 83 + } 84 + 85 + enum { 86 + DCCP_MIB_NUM = 0, 87 + DCCP_MIB_ACTIVEOPENS, /* ActiveOpens */ 88 + DCCP_MIB_ESTABRESETS, /* EstabResets */ 89 + DCCP_MIB_CURRESTAB, /* CurrEstab */ 90 + DCCP_MIB_OUTSEGS, /* OutSegs */ 91 + DCCP_MIB_OUTRSTS, 92 + DCCP_MIB_ABORTONTIMEOUT, 93 + DCCP_MIB_TIMEOUTS, 94 + DCCP_MIB_ABORTFAILED, 95 + DCCP_MIB_PASSIVEOPENS, 96 + DCCP_MIB_ATTEMPTFAILS, 97 + DCCP_MIB_OUTDATAGRAMS, 98 + DCCP_MIB_INERRS, 99 + DCCP_MIB_OPTMANDATORYERROR, 100 + DCCP_MIB_INVALIDOPT, 101 + __DCCP_MIB_MAX 102 + }; 103 + 104 + #define DCCP_MIB_MAX __DCCP_MIB_MAX 105 + struct dccp_mib { 106 + unsigned long mibs[DCCP_MIB_MAX]; 107 + } __SNMP_MIB_ALIGN__; 108 + 109 + DECLARE_SNMP_STAT(struct dccp_mib, dccp_statistics); 110 + #define DCCP_INC_STATS(field) SNMP_INC_STATS(dccp_statistics, field) 111 + #define DCCP_INC_STATS_BH(field) SNMP_INC_STATS_BH(dccp_statistics, field) 112 + #define DCCP_INC_STATS_USER(field) SNMP_INC_STATS_USER(dccp_statistics, field) 113 + #define DCCP_DEC_STATS(field) SNMP_DEC_STATS(dccp_statistics, field) 114 + #define DCCP_ADD_STATS_BH(field, val) SNMP_ADD_STATS_BH(dccp_statistics, field, val) 115 + #define DCCP_ADD_STATS_USER(field, val) SNMP_ADD_STATS_USER(dccp_statistics, field, val) 116 + 117 + extern int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb); 118 + extern int dccp_retransmit_skb(struct sock *sk, struct sk_buff *skb); 119 + 120 + extern int dccp_send_response(struct sock *sk); 121 + extern void dccp_send_ack(struct sock *sk); 122 + extern void dccp_send_delayed_ack(struct sock *sk); 123 + extern void dccp_send_sync(struct sock *sk, u64 seq); 124 + 125 + extern void dccp_init_xmit_timers(struct sock *sk); 126 + static inline void dccp_clear_xmit_timers(struct sock *sk) 127 + { 128 + inet_csk_clear_xmit_timers(sk); 129 + } 130 + 131 + extern unsigned int dccp_sync_mss(struct sock *sk, u32 pmtu); 132 + 133 + extern const char *dccp_packet_name(const int type); 134 + extern const char *dccp_state_name(const int state); 135 + 136 + static inline void dccp_set_state(struct sock *sk, const int state) 137 + { 138 + const int oldstate = sk->sk_state; 139 + 140 + dccp_pr_debug("%s(%p) %-10.10s -> %s\n", 141 + dccp_role(sk), sk, 142 + dccp_state_name(oldstate), dccp_state_name(state)); 143 + WARN_ON(state == oldstate); 144 + 145 + switch (state) { 146 + case DCCP_OPEN: 147 + if (oldstate != DCCP_OPEN) 148 + DCCP_INC_STATS(DCCP_MIB_CURRESTAB); 149 + break; 150 + 151 + case DCCP_CLOSED: 152 + if (oldstate == DCCP_CLOSING || oldstate == DCCP_OPEN) 153 + DCCP_INC_STATS(DCCP_MIB_ESTABRESETS); 154 + 155 + sk->sk_prot->unhash(sk); 156 + if (inet_csk(sk)->icsk_bind_hash != NULL && 157 + !(sk->sk_userlocks & SOCK_BINDPORT_LOCK)) 158 + inet_put_port(&dccp_hashinfo, sk); 159 + /* fall through */ 160 + default: 161 + if (oldstate == DCCP_OPEN) 162 + DCCP_DEC_STATS(DCCP_MIB_CURRESTAB); 163 + } 164 + 165 + /* Change state AFTER socket is unhashed to avoid closed 166 + * socket sitting in hash tables. 167 + */ 168 + sk->sk_state = state; 169 + } 170 + 171 + static inline void dccp_done(struct sock *sk) 172 + { 173 + dccp_set_state(sk, DCCP_CLOSED); 174 + dccp_clear_xmit_timers(sk); 175 + 176 + sk->sk_shutdown = SHUTDOWN_MASK; 177 + 178 + if (!sock_flag(sk, SOCK_DEAD)) 179 + sk->sk_state_change(sk); 180 + else 181 + inet_csk_destroy_sock(sk); 182 + } 183 + 184 + static inline void dccp_openreq_init(struct request_sock *req, 185 + struct dccp_sock *dp, 186 + struct sk_buff *skb) 187 + { 188 + /* 189 + * FIXME: fill in the other req fields from the DCCP options 190 + * received 191 + */ 192 + inet_rsk(req)->rmt_port = dccp_hdr(skb)->dccph_sport; 193 + inet_rsk(req)->acked = 0; 194 + req->rcv_wnd = 0; 195 + } 196 + 197 + extern void dccp_v4_send_check(struct sock *sk, struct dccp_hdr *dh, int len, 198 + struct sk_buff *skb); 199 + extern int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb); 200 + 201 + extern struct sock *dccp_create_openreq_child(struct sock *sk, 202 + const struct request_sock *req, 203 + const struct sk_buff *skb); 204 + 205 + extern int dccp_v4_do_rcv(struct sock *sk, struct sk_buff *skb); 206 + 207 + extern void dccp_v4_err(struct sk_buff *skb, u32); 208 + 209 + extern int dccp_v4_rcv(struct sk_buff *skb); 210 + 211 + extern struct sock *dccp_v4_request_recv_sock(struct sock *sk, 212 + struct sk_buff *skb, 213 + struct request_sock *req, 214 + struct dst_entry *dst); 215 + extern struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb, 216 + struct request_sock *req, 217 + struct request_sock **prev); 218 + 219 + extern int dccp_child_process(struct sock *parent, struct sock *child, 220 + struct sk_buff *skb); 221 + extern int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, 222 + struct dccp_hdr *dh, unsigned len); 223 + extern int dccp_rcv_established(struct sock *sk, struct sk_buff *skb, 224 + const struct dccp_hdr *dh, const unsigned len); 225 + 226 + extern void dccp_close(struct sock *sk, long timeout); 227 + extern struct sk_buff *dccp_make_response(struct sock *sk, 228 + struct dst_entry *dst, 229 + struct request_sock *req); 230 + 231 + extern int dccp_connect(struct sock *sk); 232 + extern int dccp_disconnect(struct sock *sk, int flags); 233 + extern int dccp_getsockopt(struct sock *sk, int level, int optname, 234 + char *optval, int *optlen); 235 + extern int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg); 236 + extern int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, 237 + size_t size); 238 + extern int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, 239 + struct msghdr *msg, size_t len, int nonblock, 240 + int flags, int *addr_len); 241 + extern int dccp_setsockopt(struct sock *sk, int level, int optname, 242 + char *optval, int optlen); 243 + extern void dccp_shutdown(struct sock *sk, int how); 244 + 245 + extern int dccp_v4_checksum(struct sk_buff *skb); 246 + 247 + extern int dccp_v4_send_reset(struct sock *sk, enum dccp_reset_codes code); 248 + extern void dccp_send_close(struct sock *sk); 249 + 250 + struct dccp_skb_cb { 251 + __u8 dccpd_type; 252 + __u8 dccpd_reset_code; 253 + __u8 dccpd_service; 254 + __u8 dccpd_ccval; 255 + __u64 dccpd_seq; 256 + __u64 dccpd_ack_seq; 257 + int dccpd_opt_len; 258 + }; 259 + 260 + #define DCCP_SKB_CB(__skb) ((struct dccp_skb_cb *)&((__skb)->cb[0])) 261 + 262 + static inline int dccp_non_data_packet(const struct sk_buff *skb) 263 + { 264 + const __u8 type = DCCP_SKB_CB(skb)->dccpd_type; 265 + 266 + return type == DCCP_PKT_ACK || 267 + type == DCCP_PKT_CLOSE || 268 + type == DCCP_PKT_CLOSEREQ || 269 + type == DCCP_PKT_RESET || 270 + type == DCCP_PKT_SYNC || 271 + type == DCCP_PKT_SYNCACK; 272 + } 273 + 274 + static inline int dccp_packet_without_ack(const struct sk_buff *skb) 275 + { 276 + const __u8 type = DCCP_SKB_CB(skb)->dccpd_type; 277 + 278 + return type == DCCP_PKT_DATA || type == DCCP_PKT_REQUEST; 279 + } 280 + 281 + #define DCCP_MAX_SEQNO ((((u64)1) << 48) - 1) 282 + #define DCCP_PKT_WITHOUT_ACK_SEQ (DCCP_MAX_SEQNO << 2) 283 + 284 + static inline void dccp_set_seqno(u64 *seqno, u64 value) 285 + { 286 + if (value > DCCP_MAX_SEQNO) 287 + value -= DCCP_MAX_SEQNO + 1; 288 + *seqno = value; 289 + } 290 + 291 + static inline u64 dccp_delta_seqno(u64 seqno1, u64 seqno2) 292 + { 293 + return ((seqno2 << 16) - (seqno1 << 16)) >> 16; 294 + } 295 + 296 + static inline void dccp_inc_seqno(u64 *seqno) 297 + { 298 + if (++*seqno > DCCP_MAX_SEQNO) 299 + *seqno = 0; 300 + } 301 + 302 + static inline void dccp_hdr_set_seq(struct dccp_hdr *dh, const u64 gss) 303 + { 304 + struct dccp_hdr_ext *dhx = (struct dccp_hdr_ext *)((void *)dh + sizeof(*dh)); 305 + 306 + #if defined(__LITTLE_ENDIAN_BITFIELD) 307 + dh->dccph_seq = htonl((gss >> 32)) >> 8; 308 + #elif defined(__BIG_ENDIAN_BITFIELD) 309 + dh->dccph_seq = htonl((gss >> 32)); 310 + #else 311 + #error "Adjust your <asm/byteorder.h> defines" 312 + #endif 313 + dhx->dccph_seq_low = htonl(gss & 0xffffffff); 314 + } 315 + 316 + static inline void dccp_hdr_set_ack(struct dccp_hdr_ack_bits *dhack, const u64 gsr) 317 + { 318 + #if defined(__LITTLE_ENDIAN_BITFIELD) 319 + dhack->dccph_ack_nr_high = htonl((gsr >> 32)) >> 8; 320 + #elif defined(__BIG_ENDIAN_BITFIELD) 321 + dhack->dccph_ack_nr_high = htonl((gsr >> 32)); 322 + #else 323 + #error "Adjust your <asm/byteorder.h> defines" 324 + #endif 325 + dhack->dccph_ack_nr_low = htonl(gsr & 0xffffffff); 326 + } 327 + 328 + static inline void dccp_update_gsr(struct sock *sk, u64 seq) 329 + { 330 + struct dccp_sock *dp = dccp_sk(sk); 331 + u64 tmp_gsr; 332 + 333 + dccp_set_seqno(&tmp_gsr, dp->dccps_gsr + 1 - (dp->dccps_options.dccpo_sequence_window / 4)); 334 + dp->dccps_gsr = seq; 335 + dccp_set_seqno(&dp->dccps_swl, max48(tmp_gsr, dp->dccps_isr)); 336 + dccp_set_seqno(&dp->dccps_swh, 337 + dp->dccps_gsr + (3 * dp->dccps_options.dccpo_sequence_window) / 4); 338 + } 339 + 340 + static inline void dccp_update_gss(struct sock *sk, u64 seq) 341 + { 342 + struct dccp_sock *dp = dccp_sk(sk); 343 + u64 tmp_gss; 344 + 345 + dccp_set_seqno(&tmp_gss, dp->dccps_gss - dp->dccps_options.dccpo_sequence_window + 1); 346 + dp->dccps_awl = max48(tmp_gss, dp->dccps_iss); 347 + dp->dccps_awh = dp->dccps_gss = seq; 348 + } 349 + 350 + extern void dccp_insert_options(struct sock *sk, struct sk_buff *skb); 351 + extern void dccp_insert_option_elapsed_time(struct sock *sk, 352 + struct sk_buff *skb, 353 + u32 elapsed_time); 354 + extern void dccp_insert_option(struct sock *sk, struct sk_buff *skb, 355 + unsigned char option, 356 + const void *value, unsigned char len); 357 + 358 + extern struct socket *dccp_ctl_socket; 359 + 360 + #define DCCP_ACKPKTS_STATE_RECEIVED 0 361 + #define DCCP_ACKPKTS_STATE_ECN_MARKED (1 << 6) 362 + #define DCCP_ACKPKTS_STATE_NOT_RECEIVED (3 << 6) 363 + 364 + #define DCCP_ACKPKTS_STATE_MASK 0xC0 /* 11000000 */ 365 + #define DCCP_ACKPKTS_LEN_MASK 0x3F /* 00111111 */ 366 + 367 + /** struct dccp_ackpkts - acknowledgeable packets 368 + * 369 + * This data structure is the one defined in the DCCP draft 370 + * Appendix A. 371 + * 372 + * @dccpap_buf_head - circular buffer head 373 + * @dccpap_buf_tail - circular buffer tail 374 + * @dccpap_buf_ackno - ack # of the most recent packet acknoldgeable in the buffer (i.e. %dccpap_buf_head) 375 + * @dccpap_buf_nonce - the one-bit sum of the ECN Nonces on all packets acked by the buffer with State 0 376 + * 377 + * Additionally, the HC-Receiver must keep some information about the 378 + * Ack Vectors it has recently sent. For each packet sent carrying an 379 + * Ack Vector, it remembers four variables: 380 + * 381 + * @dccpap_ack_seqno - the Sequence Number used for the packet (HC-Receiver seqno) 382 + * @dccpap_ack_ptr - the value of buf_head at the time of acknowledgement. 383 + * @dccpap_ack_ackno - the Acknowledgement Number used for the packet (HC-Sender seqno) 384 + * @dccpap_ack_nonce - the one-bit sum of the ECN Nonces for all State 0. 385 + * 386 + * @dccpap_buf_len - circular buffer length 387 + * @dccpap_buf - circular buffer of acknowledgeable packets 388 + */ 389 + struct dccp_ackpkts { 390 + unsigned int dccpap_buf_head; 391 + unsigned int dccpap_buf_tail; 392 + u64 dccpap_buf_ackno; 393 + u64 dccpap_ack_seqno; 394 + u64 dccpap_ack_ackno; 395 + unsigned int dccpap_ack_ptr; 396 + unsigned int dccpap_buf_vector_len; 397 + unsigned int dccpap_ack_vector_len; 398 + unsigned int dccpap_buf_len; 399 + unsigned long dccpap_time; 400 + u8 dccpap_buf_nonce; 401 + u8 dccpap_ack_nonce; 402 + u8 dccpap_buf[0]; 403 + }; 404 + 405 + extern struct dccp_ackpkts *dccp_ackpkts_alloc(unsigned int len, int priority); 406 + extern void dccp_ackpkts_free(struct dccp_ackpkts *ap); 407 + extern int dccp_ackpkts_add(struct dccp_ackpkts *ap, u64 ackno, u8 state); 408 + extern void dccp_ackpkts_check_rcv_ackno(struct dccp_ackpkts *ap, 409 + struct sock *sk, u64 ackno); 410 + 411 + #ifdef DCCP_DEBUG 412 + extern void dccp_ackvector_print(const u64 ackno, 413 + const unsigned char *vector, int len); 414 + extern void dccp_ackpkts_print(const struct dccp_ackpkts *ap); 415 + #else 416 + static inline void dccp_ackvector_print(const u64 ackno, 417 + const unsigned char *vector, 418 + int len) { } 419 + static inline void dccp_ackpkts_print(const struct dccp_ackpkts *ap) { } 420 + #endif 421 + 422 + #endif /* _DCCP_H */
+510
net/dccp/input.c
··· 1 + /* 2 + * net/dccp/input.c 3 + * 4 + * An implementation of the DCCP protocol 5 + * Arnaldo Carvalho de Melo <acme@conectiva.com.br> 6 + * 7 + * This program is free software; you can redistribute it and/or 8 + * modify it under the terms of the GNU General Public License 9 + * as published by the Free Software Foundation; either version 10 + * 2 of the License, or (at your option) any later version. 11 + */ 12 + 13 + #include <linux/config.h> 14 + #include <linux/dccp.h> 15 + #include <linux/skbuff.h> 16 + 17 + #include <net/sock.h> 18 + 19 + #include "ccid.h" 20 + #include "dccp.h" 21 + 22 + static void dccp_fin(struct sock *sk, struct sk_buff *skb) 23 + { 24 + sk->sk_shutdown |= RCV_SHUTDOWN; 25 + sock_set_flag(sk, SOCK_DONE); 26 + __skb_pull(skb, dccp_hdr(skb)->dccph_doff * 4); 27 + __skb_queue_tail(&sk->sk_receive_queue, skb); 28 + skb_set_owner_r(skb, sk); 29 + sk->sk_data_ready(sk, 0); 30 + } 31 + 32 + static void dccp_rcv_close(struct sock *sk, struct sk_buff *skb) 33 + { 34 + switch (sk->sk_state) { 35 + case DCCP_PARTOPEN: 36 + case DCCP_OPEN: 37 + dccp_v4_send_reset(sk, DCCP_RESET_CODE_CLOSED); 38 + dccp_fin(sk, skb); 39 + dccp_set_state(sk, DCCP_CLOSED); 40 + break; 41 + } 42 + } 43 + 44 + static void dccp_rcv_closereq(struct sock *sk, struct sk_buff *skb) 45 + { 46 + /* 47 + * Step 7: Check for unexpected packet types 48 + * If (S.is_server and P.type == CloseReq) 49 + * Send Sync packet acknowledging P.seqno 50 + * Drop packet and return 51 + */ 52 + if (dccp_sk(sk)->dccps_role != DCCP_ROLE_CLIENT) { 53 + dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq); 54 + return; 55 + } 56 + 57 + switch (sk->sk_state) { 58 + case DCCP_PARTOPEN: 59 + case DCCP_OPEN: 60 + dccp_set_state(sk, DCCP_CLOSING); 61 + dccp_send_close(sk); 62 + break; 63 + } 64 + } 65 + 66 + static inline void dccp_event_ack_recv(struct sock *sk, struct sk_buff *skb) 67 + { 68 + struct dccp_sock *dp = dccp_sk(sk); 69 + 70 + if (dp->dccps_options.dccpo_send_ack_vector) 71 + dccp_ackpkts_check_rcv_ackno(dp->dccps_hc_rx_ackpkts, sk, 72 + DCCP_SKB_CB(skb)->dccpd_ack_seq); 73 + } 74 + 75 + static int dccp_check_seqno(struct sock *sk, struct sk_buff *skb) 76 + { 77 + const struct dccp_hdr *dh = dccp_hdr(skb); 78 + struct dccp_sock *dp = dccp_sk(sk); 79 + u64 lswl = dp->dccps_swl; 80 + u64 lawl = dp->dccps_awl; 81 + 82 + /* 83 + * Step 5: Prepare sequence numbers for Sync 84 + * If P.type == Sync or P.type == SyncAck, 85 + * If S.AWL <= P.ackno <= S.AWH and P.seqno >= S.SWL, 86 + * / * P is valid, so update sequence number variables 87 + * accordingly. After this update, P will pass the tests 88 + * in Step 6. A SyncAck is generated if necessary in 89 + * Step 15 * / 90 + * Update S.GSR, S.SWL, S.SWH 91 + * Otherwise, 92 + * Drop packet and return 93 + */ 94 + if (dh->dccph_type == DCCP_PKT_SYNC || 95 + dh->dccph_type == DCCP_PKT_SYNCACK) { 96 + if (between48(DCCP_SKB_CB(skb)->dccpd_ack_seq, dp->dccps_awl, dp->dccps_awh) && 97 + !before48(DCCP_SKB_CB(skb)->dccpd_seq, dp->dccps_swl)) 98 + dccp_update_gsr(sk, DCCP_SKB_CB(skb)->dccpd_seq); 99 + else 100 + return -1; 101 + /* 102 + * Step 6: Check sequence numbers 103 + * Let LSWL = S.SWL and LAWL = S.AWL 104 + * If P.type == CloseReq or P.type == Close or P.type == Reset, 105 + * LSWL := S.GSR + 1, LAWL := S.GAR 106 + * If LSWL <= P.seqno <= S.SWH 107 + * and (P.ackno does not exist or LAWL <= P.ackno <= S.AWH), 108 + * Update S.GSR, S.SWL, S.SWH 109 + * If P.type != Sync, 110 + * Update S.GAR 111 + * Otherwise, 112 + * Send Sync packet acknowledging P.seqno 113 + * Drop packet and return 114 + */ 115 + } else if (dh->dccph_type == DCCP_PKT_CLOSEREQ || 116 + dh->dccph_type == DCCP_PKT_CLOSE || 117 + dh->dccph_type == DCCP_PKT_RESET) { 118 + lswl = dp->dccps_gsr; 119 + dccp_inc_seqno(&lswl); 120 + lawl = dp->dccps_gar; 121 + } 122 + 123 + if (between48(DCCP_SKB_CB(skb)->dccpd_seq, lswl, dp->dccps_swh) && 124 + (DCCP_SKB_CB(skb)->dccpd_ack_seq == DCCP_PKT_WITHOUT_ACK_SEQ || 125 + between48(DCCP_SKB_CB(skb)->dccpd_ack_seq, lawl, dp->dccps_awh))) { 126 + dccp_update_gsr(sk, DCCP_SKB_CB(skb)->dccpd_seq); 127 + 128 + if (dh->dccph_type != DCCP_PKT_SYNC && 129 + DCCP_SKB_CB(skb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ) 130 + dp->dccps_gar = DCCP_SKB_CB(skb)->dccpd_ack_seq; 131 + } else { 132 + dccp_pr_debug("Step 6 failed, sending SYNC...\n"); 133 + dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq); 134 + return -1; 135 + } 136 + 137 + return 0; 138 + } 139 + 140 + int dccp_rcv_established(struct sock *sk, struct sk_buff *skb, 141 + const struct dccp_hdr *dh, const unsigned len) 142 + { 143 + struct dccp_sock *dp = dccp_sk(sk); 144 + 145 + if (dccp_check_seqno(sk, skb)) 146 + goto discard; 147 + 148 + if (dccp_parse_options(sk, skb)) 149 + goto discard; 150 + 151 + if (DCCP_SKB_CB(skb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ) 152 + dccp_event_ack_recv(sk, skb); 153 + 154 + /* 155 + * FIXME: check ECN to see if we should use 156 + * DCCP_ACKPKTS_STATE_ECN_MARKED 157 + */ 158 + if (dp->dccps_options.dccpo_send_ack_vector) { 159 + struct dccp_ackpkts *ap = dp->dccps_hc_rx_ackpkts; 160 + 161 + if (dccp_ackpkts_add(dp->dccps_hc_rx_ackpkts, 162 + DCCP_SKB_CB(skb)->dccpd_seq, 163 + DCCP_ACKPKTS_STATE_RECEIVED)) { 164 + LIMIT_NETDEBUG(pr_info("DCCP: acknowledgeable packets buffer full!\n")); 165 + ap->dccpap_ack_seqno = DCCP_MAX_SEQNO + 1; 166 + inet_csk_schedule_ack(sk); 167 + inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, TCP_DELACK_MIN, TCP_RTO_MAX); 168 + goto discard; 169 + } 170 + 171 + /* 172 + * FIXME: this activation is probably wrong, have to study more 173 + * TCP delack machinery and how it fits into DCCP draft, but 174 + * for now it kinda "works" 8) 175 + */ 176 + if (!inet_csk_ack_scheduled(sk)) { 177 + inet_csk_schedule_ack(sk); 178 + inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, 5 * HZ, TCP_RTO_MAX); 179 + } 180 + } 181 + 182 + ccid_hc_rx_packet_recv(dp->dccps_hc_rx_ccid, sk, skb); 183 + ccid_hc_tx_packet_recv(dp->dccps_hc_tx_ccid, sk, skb); 184 + 185 + switch (dccp_hdr(skb)->dccph_type) { 186 + case DCCP_PKT_DATAACK: 187 + case DCCP_PKT_DATA: 188 + /* 189 + * FIXME: check if sk_receive_queue is full, schedule DATA_DROPPED option 190 + * if it is. 191 + */ 192 + __skb_pull(skb, dh->dccph_doff * 4); 193 + __skb_queue_tail(&sk->sk_receive_queue, skb); 194 + skb_set_owner_r(skb, sk); 195 + sk->sk_data_ready(sk, 0); 196 + return 0; 197 + case DCCP_PKT_ACK: 198 + goto discard; 199 + case DCCP_PKT_RESET: 200 + /* 201 + * Step 9: Process Reset 202 + * If P.type == Reset, 203 + * Tear down connection 204 + * S.state := TIMEWAIT 205 + * Set TIMEWAIT timer 206 + * Drop packet and return 207 + */ 208 + dccp_fin(sk, skb); 209 + dccp_time_wait(sk, DCCP_TIME_WAIT, 0); 210 + return 0; 211 + case DCCP_PKT_CLOSEREQ: 212 + dccp_rcv_closereq(sk, skb); 213 + goto discard; 214 + case DCCP_PKT_CLOSE: 215 + dccp_rcv_close(sk, skb); 216 + return 0; 217 + case DCCP_PKT_REQUEST: 218 + /* Step 7 219 + * or (S.is_server and P.type == Response) 220 + * or (S.is_client and P.type == Request) 221 + * or (S.state >= OPEN and P.type == Request 222 + * and P.seqno >= S.OSR) 223 + * or (S.state >= OPEN and P.type == Response 224 + * and P.seqno >= S.OSR) 225 + * or (S.state == RESPOND and P.type == Data), 226 + * Send Sync packet acknowledging P.seqno 227 + * Drop packet and return 228 + */ 229 + if (dp->dccps_role != DCCP_ROLE_LISTEN) 230 + goto send_sync; 231 + goto check_seq; 232 + case DCCP_PKT_RESPONSE: 233 + if (dp->dccps_role != DCCP_ROLE_CLIENT) 234 + goto send_sync; 235 + check_seq: 236 + if (!before48(DCCP_SKB_CB(skb)->dccpd_seq, dp->dccps_osr)) { 237 + send_sync: 238 + dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq); 239 + } 240 + break; 241 + } 242 + 243 + DCCP_INC_STATS_BH(DCCP_MIB_INERRS); 244 + discard: 245 + __kfree_skb(skb); 246 + return 0; 247 + } 248 + 249 + static int dccp_rcv_request_sent_state_process(struct sock *sk, 250 + struct sk_buff *skb, 251 + const struct dccp_hdr *dh, 252 + const unsigned len) 253 + { 254 + /* 255 + * Step 4: Prepare sequence numbers in REQUEST 256 + * If S.state == REQUEST, 257 + * If (P.type == Response or P.type == Reset) 258 + * and S.AWL <= P.ackno <= S.AWH, 259 + * / * Set sequence number variables corresponding to the 260 + * other endpoint, so P will pass the tests in Step 6 * / 261 + * Set S.GSR, S.ISR, S.SWL, S.SWH 262 + * / * Response processing continues in Step 10; Reset 263 + * processing continues in Step 9 * / 264 + */ 265 + if (dh->dccph_type == DCCP_PKT_RESPONSE) { 266 + const struct inet_connection_sock *icsk = inet_csk(sk); 267 + struct dccp_sock *dp = dccp_sk(sk); 268 + 269 + /* Stop the REQUEST timer */ 270 + inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS); 271 + BUG_TRAP(sk->sk_send_head != NULL); 272 + __kfree_skb(sk->sk_send_head); 273 + sk->sk_send_head = NULL; 274 + 275 + if (!between48(DCCP_SKB_CB(skb)->dccpd_ack_seq, dp->dccps_awl, dp->dccps_awh)) { 276 + dccp_pr_debug("invalid ackno: S.AWL=%llu, P.ackno=%llu, S.AWH=%llu \n", 277 + dp->dccps_awl, DCCP_SKB_CB(skb)->dccpd_ack_seq, dp->dccps_awh); 278 + goto out_invalid_packet; 279 + } 280 + 281 + dp->dccps_isr = DCCP_SKB_CB(skb)->dccpd_seq; 282 + dccp_update_gsr(sk, DCCP_SKB_CB(skb)->dccpd_seq); 283 + 284 + if (ccid_hc_rx_init(dp->dccps_hc_rx_ccid, sk) != 0 || 285 + ccid_hc_tx_init(dp->dccps_hc_tx_ccid, sk) != 0) { 286 + ccid_hc_rx_exit(dp->dccps_hc_rx_ccid, sk); 287 + ccid_hc_tx_exit(dp->dccps_hc_tx_ccid, sk); 288 + /* FIXME: send appropriate RESET code */ 289 + goto out_invalid_packet; 290 + } 291 + 292 + dccp_sync_mss(sk, dp->dccps_pmtu_cookie); 293 + 294 + /* 295 + * Step 10: Process REQUEST state (second part) 296 + * If S.state == REQUEST, 297 + * / * If we get here, P is a valid Response from the server (see 298 + * Step 4), and we should move to PARTOPEN state. PARTOPEN 299 + * means send an Ack, don't send Data packets, retransmit 300 + * Acks periodically, and always include any Init Cookie from 301 + * the Response * / 302 + * S.state := PARTOPEN 303 + * Set PARTOPEN timer 304 + * Continue with S.state == PARTOPEN 305 + * / * Step 12 will send the Ack completing the three-way 306 + * handshake * / 307 + */ 308 + dccp_set_state(sk, DCCP_PARTOPEN); 309 + 310 + /* Make sure socket is routed, for correct metrics. */ 311 + inet_sk_rebuild_header(sk); 312 + 313 + if (!sock_flag(sk, SOCK_DEAD)) { 314 + sk->sk_state_change(sk); 315 + sk_wake_async(sk, 0, POLL_OUT); 316 + } 317 + 318 + if (sk->sk_write_pending || icsk->icsk_ack.pingpong || 319 + icsk->icsk_accept_queue.rskq_defer_accept) { 320 + /* Save one ACK. Data will be ready after 321 + * several ticks, if write_pending is set. 322 + * 323 + * It may be deleted, but with this feature tcpdumps 324 + * look so _wonderfully_ clever, that I was not able 325 + * to stand against the temptation 8) --ANK 326 + */ 327 + /* 328 + * OK, in DCCP we can as well do a similar trick, its 329 + * even in the draft, but there is no need for us to 330 + * schedule an ack here, as dccp_sendmsg does this for 331 + * us, also stated in the draft. -acme 332 + */ 333 + __kfree_skb(skb); 334 + return 0; 335 + } 336 + dccp_send_ack(sk); 337 + return -1; 338 + } 339 + 340 + out_invalid_packet: 341 + return 1; /* dccp_v4_do_rcv will send a reset, but... 342 + FIXME: the reset code should be DCCP_RESET_CODE_PACKET_ERROR */ 343 + } 344 + 345 + static int dccp_rcv_respond_partopen_state_process(struct sock *sk, 346 + struct sk_buff *skb, 347 + const struct dccp_hdr *dh, 348 + const unsigned len) 349 + { 350 + int queued = 0; 351 + 352 + switch (dh->dccph_type) { 353 + case DCCP_PKT_RESET: 354 + inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK); 355 + break; 356 + case DCCP_PKT_DATAACK: 357 + case DCCP_PKT_ACK: 358 + /* 359 + * FIXME: we should be reseting the PARTOPEN (DELACK) timer here, 360 + * but only if we haven't used the DELACK timer for something else, 361 + * like sending a delayed ack for a TIMESTAMP echo, etc, for now 362 + * were not clearing it, sending an extra ACK when there is nothing 363 + * else to do in DELACK is not a big deal after all. 364 + */ 365 + 366 + /* Stop the PARTOPEN timer */ 367 + if (sk->sk_state == DCCP_PARTOPEN) 368 + inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK); 369 + 370 + dccp_sk(sk)->dccps_osr = DCCP_SKB_CB(skb)->dccpd_seq; 371 + dccp_set_state(sk, DCCP_OPEN); 372 + 373 + if (dh->dccph_type == DCCP_PKT_DATAACK) { 374 + dccp_rcv_established(sk, skb, dh, len); 375 + queued = 1; /* packet was queued (by dccp_rcv_established) */ 376 + } 377 + break; 378 + } 379 + 380 + return queued; 381 + } 382 + 383 + int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, 384 + struct dccp_hdr *dh, unsigned len) 385 + { 386 + struct dccp_sock *dp = dccp_sk(sk); 387 + const int old_state = sk->sk_state; 388 + int queued = 0; 389 + 390 + if (sk->sk_state != DCCP_LISTEN && sk->sk_state != DCCP_REQUESTING) { 391 + if (dccp_check_seqno(sk, skb)) 392 + goto discard; 393 + 394 + /* 395 + * Step 8: Process options and mark acknowledgeable 396 + */ 397 + if (dccp_parse_options(sk, skb)) 398 + goto discard; 399 + 400 + if (DCCP_SKB_CB(skb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ) 401 + dccp_event_ack_recv(sk, skb); 402 + 403 + ccid_hc_rx_packet_recv(dp->dccps_hc_rx_ccid, sk, skb); 404 + ccid_hc_tx_packet_recv(dp->dccps_hc_tx_ccid, sk, skb); 405 + 406 + /* 407 + * FIXME: check ECN to see if we should use 408 + * DCCP_ACKPKTS_STATE_ECN_MARKED 409 + */ 410 + if (dp->dccps_options.dccpo_send_ack_vector) { 411 + if (dccp_ackpkts_add(dp->dccps_hc_rx_ackpkts, 412 + DCCP_SKB_CB(skb)->dccpd_seq, 413 + DCCP_ACKPKTS_STATE_RECEIVED)) 414 + goto discard; 415 + /* 416 + * FIXME: this activation is probably wrong, have to study more 417 + * TCP delack machinery and how it fits into DCCP draft, but 418 + * for now it kinda "works" 8) 419 + */ 420 + if (dp->dccps_hc_rx_ackpkts->dccpap_ack_seqno == DCCP_MAX_SEQNO + 1 && 421 + !inet_csk_ack_scheduled(sk)) { 422 + inet_csk_schedule_ack(sk); 423 + inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, TCP_DELACK_MIN, TCP_RTO_MAX); 424 + } 425 + } 426 + } 427 + 428 + /* 429 + * Step 9: Process Reset 430 + * If P.type == Reset, 431 + * Tear down connection 432 + * S.state := TIMEWAIT 433 + * Set TIMEWAIT timer 434 + * Drop packet and return 435 + */ 436 + if (dh->dccph_type == DCCP_PKT_RESET) { 437 + /* Queue the equivalent of TCP fin so that dccp_recvmsg exits the loop */ 438 + dccp_fin(sk, skb); 439 + dccp_time_wait(sk, DCCP_TIME_WAIT, 0); 440 + return 0; 441 + /* 442 + * Step 7: Check for unexpected packet types 443 + * If (S.is_server and P.type == CloseReq) 444 + * or (S.is_server and P.type == Response) 445 + * or (S.is_client and P.type == Request) 446 + * or (S.state == RESPOND and P.type == Data), 447 + * Send Sync packet acknowledging P.seqno 448 + * Drop packet and return 449 + */ 450 + } else if ((dp->dccps_role != DCCP_ROLE_CLIENT && 451 + (dh->dccph_type == DCCP_PKT_RESPONSE || dh->dccph_type == DCCP_PKT_CLOSEREQ)) || 452 + (dp->dccps_role == DCCP_ROLE_CLIENT && 453 + dh->dccph_type == DCCP_PKT_REQUEST) || 454 + (sk->sk_state == DCCP_RESPOND && dh->dccph_type == DCCP_PKT_DATA)) { 455 + dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq); 456 + goto discard; 457 + } 458 + 459 + switch (sk->sk_state) { 460 + case DCCP_CLOSED: 461 + return 1; 462 + 463 + case DCCP_LISTEN: 464 + if (dh->dccph_type == DCCP_PKT_ACK || 465 + dh->dccph_type == DCCP_PKT_DATAACK) 466 + return 1; 467 + 468 + if (dh->dccph_type == DCCP_PKT_RESET) 469 + goto discard; 470 + 471 + if (dh->dccph_type == DCCP_PKT_REQUEST) { 472 + if (dccp_v4_conn_request(sk, skb) < 0) 473 + return 1; 474 + 475 + /* FIXME: do congestion control initialization */ 476 + goto discard; 477 + } 478 + goto discard; 479 + 480 + case DCCP_REQUESTING: 481 + /* FIXME: do congestion control initialization */ 482 + 483 + queued = dccp_rcv_request_sent_state_process(sk, skb, dh, len); 484 + if (queued >= 0) 485 + return queued; 486 + 487 + __kfree_skb(skb); 488 + return 0; 489 + 490 + case DCCP_RESPOND: 491 + case DCCP_PARTOPEN: 492 + queued = dccp_rcv_respond_partopen_state_process(sk, skb, dh, len); 493 + break; 494 + } 495 + 496 + if (dh->dccph_type == DCCP_PKT_ACK || dh->dccph_type == DCCP_PKT_DATAACK) { 497 + switch (old_state) { 498 + case DCCP_PARTOPEN: 499 + sk->sk_state_change(sk); 500 + sk_wake_async(sk, 0, POLL_OUT); 501 + break; 502 + } 503 + } 504 + 505 + if (!queued) { 506 + discard: 507 + __kfree_skb(skb); 508 + } 509 + return 0; 510 + }
+1289
net/dccp/ipv4.c
··· 1 + /* 2 + * net/dccp/ipv4.c 3 + * 4 + * An implementation of the DCCP protocol 5 + * Arnaldo Carvalho de Melo <acme@conectiva.com.br> 6 + * 7 + * This program is free software; you can redistribute it and/or 8 + * modify it under the terms of the GNU General Public License 9 + * as published by the Free Software Foundation; either version 10 + * 2 of the License, or (at your option) any later version. 11 + */ 12 + 13 + #include <linux/config.h> 14 + #include <linux/dccp.h> 15 + #include <linux/icmp.h> 16 + #include <linux/module.h> 17 + #include <linux/skbuff.h> 18 + #include <linux/random.h> 19 + 20 + #include <net/icmp.h> 21 + #include <net/inet_hashtables.h> 22 + #include <net/sock.h> 23 + #include <net/tcp_states.h> 24 + #include <net/xfrm.h> 25 + 26 + #include "ccid.h" 27 + #include "dccp.h" 28 + 29 + struct inet_hashinfo __cacheline_aligned dccp_hashinfo = { 30 + .lhash_lock = RW_LOCK_UNLOCKED, 31 + .lhash_users = ATOMIC_INIT(0), 32 + .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(dccp_hashinfo.lhash_wait), 33 + .portalloc_lock = SPIN_LOCK_UNLOCKED, 34 + .port_rover = 1024 - 1, 35 + }; 36 + 37 + static int dccp_v4_get_port(struct sock *sk, const unsigned short snum) 38 + { 39 + return inet_csk_get_port(&dccp_hashinfo, sk, snum); 40 + } 41 + 42 + static void dccp_v4_hash(struct sock *sk) 43 + { 44 + inet_hash(&dccp_hashinfo, sk); 45 + } 46 + 47 + static void dccp_v4_unhash(struct sock *sk) 48 + { 49 + inet_unhash(&dccp_hashinfo, sk); 50 + } 51 + 52 + /* called with local bh disabled */ 53 + static int __dccp_v4_check_established(struct sock *sk, const __u16 lport, 54 + struct inet_timewait_sock **twp) 55 + { 56 + struct inet_sock *inet = inet_sk(sk); 57 + const u32 daddr = inet->rcv_saddr; 58 + const u32 saddr = inet->daddr; 59 + const int dif = sk->sk_bound_dev_if; 60 + INET_ADDR_COOKIE(acookie, saddr, daddr) 61 + const __u32 ports = INET_COMBINED_PORTS(inet->dport, lport); 62 + const int hash = inet_ehashfn(daddr, lport, saddr, inet->dport, dccp_hashinfo.ehash_size); 63 + struct inet_ehash_bucket *head = &dccp_hashinfo.ehash[hash]; 64 + const struct sock *sk2; 65 + const struct hlist_node *node; 66 + struct inet_timewait_sock *tw; 67 + 68 + write_lock(&head->lock); 69 + 70 + /* Check TIME-WAIT sockets first. */ 71 + sk_for_each(sk2, node, &(head + dccp_hashinfo.ehash_size)->chain) { 72 + tw = inet_twsk(sk2); 73 + 74 + if (INET_TW_MATCH(sk2, acookie, saddr, daddr, ports, dif)) 75 + goto not_unique; 76 + } 77 + tw = NULL; 78 + 79 + /* And established part... */ 80 + sk_for_each(sk2, node, &head->chain) { 81 + if (INET_MATCH(sk2, acookie, saddr, daddr, ports, dif)) 82 + goto not_unique; 83 + } 84 + 85 + /* Must record num and sport now. Otherwise we will see 86 + * in hash table socket with a funny identity. */ 87 + inet->num = lport; 88 + inet->sport = htons(lport); 89 + sk->sk_hashent = hash; 90 + BUG_TRAP(sk_unhashed(sk)); 91 + __sk_add_node(sk, &head->chain); 92 + sock_prot_inc_use(sk->sk_prot); 93 + write_unlock(&head->lock); 94 + 95 + if (twp != NULL) { 96 + *twp = tw; 97 + NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED); 98 + } else if (tw != NULL) { 99 + /* Silly. Should hash-dance instead... */ 100 + dccp_tw_deschedule(tw); 101 + NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED); 102 + 103 + inet_twsk_put(tw); 104 + } 105 + 106 + return 0; 107 + 108 + not_unique: 109 + write_unlock(&head->lock); 110 + return -EADDRNOTAVAIL; 111 + } 112 + 113 + /* 114 + * Bind a port for a connect operation and hash it. 115 + */ 116 + static int dccp_v4_hash_connect(struct sock *sk) 117 + { 118 + const unsigned short snum = inet_sk(sk)->num; 119 + struct inet_bind_hashbucket *head; 120 + struct inet_bind_bucket *tb; 121 + int ret; 122 + 123 + if (snum == 0) { 124 + int rover; 125 + int low = sysctl_local_port_range[0]; 126 + int high = sysctl_local_port_range[1]; 127 + int remaining = (high - low) + 1; 128 + struct hlist_node *node; 129 + struct inet_timewait_sock *tw = NULL; 130 + 131 + local_bh_disable(); 132 + 133 + /* TODO. Actually it is not so bad idea to remove 134 + * dccp_hashinfo.portalloc_lock before next submission to Linus. 135 + * As soon as we touch this place at all it is time to think. 136 + * 137 + * Now it protects single _advisory_ variable dccp_hashinfo.port_rover, 138 + * hence it is mostly useless. 139 + * Code will work nicely if we just delete it, but 140 + * I am afraid in contented case it will work not better or 141 + * even worse: another cpu just will hit the same bucket 142 + * and spin there. 143 + * So some cpu salt could remove both contention and 144 + * memory pingpong. Any ideas how to do this in a nice way? 145 + */ 146 + spin_lock(&dccp_hashinfo.portalloc_lock); 147 + rover = dccp_hashinfo.port_rover; 148 + 149 + do { 150 + rover++; 151 + if ((rover < low) || (rover > high)) 152 + rover = low; 153 + head = &dccp_hashinfo.bhash[inet_bhashfn(rover, dccp_hashinfo.bhash_size)]; 154 + spin_lock(&head->lock); 155 + 156 + /* Does not bother with rcv_saddr checks, 157 + * because the established check is already 158 + * unique enough. 159 + */ 160 + inet_bind_bucket_for_each(tb, node, &head->chain) { 161 + if (tb->port == rover) { 162 + BUG_TRAP(!hlist_empty(&tb->owners)); 163 + if (tb->fastreuse >= 0) 164 + goto next_port; 165 + if (!__dccp_v4_check_established(sk, 166 + rover, 167 + &tw)) 168 + goto ok; 169 + goto next_port; 170 + } 171 + } 172 + 173 + tb = inet_bind_bucket_create(dccp_hashinfo.bind_bucket_cachep, head, rover); 174 + if (tb == NULL) { 175 + spin_unlock(&head->lock); 176 + break; 177 + } 178 + tb->fastreuse = -1; 179 + goto ok; 180 + 181 + next_port: 182 + spin_unlock(&head->lock); 183 + } while (--remaining > 0); 184 + dccp_hashinfo.port_rover = rover; 185 + spin_unlock(&dccp_hashinfo.portalloc_lock); 186 + 187 + local_bh_enable(); 188 + 189 + return -EADDRNOTAVAIL; 190 + 191 + ok: 192 + /* All locks still held and bhs disabled */ 193 + dccp_hashinfo.port_rover = rover; 194 + spin_unlock(&dccp_hashinfo.portalloc_lock); 195 + 196 + inet_bind_hash(sk, tb, rover); 197 + if (sk_unhashed(sk)) { 198 + inet_sk(sk)->sport = htons(rover); 199 + __inet_hash(&dccp_hashinfo, sk, 0); 200 + } 201 + spin_unlock(&head->lock); 202 + 203 + if (tw != NULL) { 204 + dccp_tw_deschedule(tw); 205 + inet_twsk_put(tw); 206 + } 207 + 208 + ret = 0; 209 + goto out; 210 + } 211 + 212 + head = &dccp_hashinfo.bhash[inet_bhashfn(snum, dccp_hashinfo.bhash_size)]; 213 + tb = inet_csk(sk)->icsk_bind_hash; 214 + spin_lock_bh(&head->lock); 215 + if (sk_head(&tb->owners) == sk && sk->sk_bind_node.next == NULL) { 216 + __inet_hash(&dccp_hashinfo, sk, 0); 217 + spin_unlock_bh(&head->lock); 218 + return 0; 219 + } else { 220 + spin_unlock(&head->lock); 221 + /* No definite answer... Walk to established hash table */ 222 + ret = __dccp_v4_check_established(sk, snum, NULL); 223 + out: 224 + local_bh_enable(); 225 + return ret; 226 + } 227 + } 228 + 229 + static int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, 230 + int addr_len) 231 + { 232 + struct inet_sock *inet = inet_sk(sk); 233 + struct dccp_sock *dp = dccp_sk(sk); 234 + const struct sockaddr_in *usin = (struct sockaddr_in *)uaddr; 235 + struct rtable *rt; 236 + u32 daddr, nexthop; 237 + int tmp; 238 + int err; 239 + 240 + dp->dccps_role = DCCP_ROLE_CLIENT; 241 + 242 + if (addr_len < sizeof(struct sockaddr_in)) 243 + return -EINVAL; 244 + 245 + if (usin->sin_family != AF_INET) 246 + return -EAFNOSUPPORT; 247 + 248 + nexthop = daddr = usin->sin_addr.s_addr; 249 + if (inet->opt != NULL && inet->opt->srr) { 250 + if (daddr == 0) 251 + return -EINVAL; 252 + nexthop = inet->opt->faddr; 253 + } 254 + 255 + tmp = ip_route_connect(&rt, nexthop, inet->saddr, 256 + RT_CONN_FLAGS(sk), sk->sk_bound_dev_if, 257 + IPPROTO_DCCP, 258 + inet->sport, usin->sin_port, sk); 259 + if (tmp < 0) 260 + return tmp; 261 + 262 + if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) { 263 + ip_rt_put(rt); 264 + return -ENETUNREACH; 265 + } 266 + 267 + if (inet->opt == NULL || !inet->opt->srr) 268 + daddr = rt->rt_dst; 269 + 270 + if (inet->saddr == 0) 271 + inet->saddr = rt->rt_src; 272 + inet->rcv_saddr = inet->saddr; 273 + 274 + inet->dport = usin->sin_port; 275 + inet->daddr = daddr; 276 + 277 + dp->dccps_ext_header_len = 0; 278 + if (inet->opt != NULL) 279 + dp->dccps_ext_header_len = inet->opt->optlen; 280 + /* 281 + * Socket identity is still unknown (sport may be zero). 282 + * However we set state to DCCP_REQUESTING and not releasing socket 283 + * lock select source port, enter ourselves into the hash tables and 284 + * complete initialization after this. 285 + */ 286 + dccp_set_state(sk, DCCP_REQUESTING); 287 + err = dccp_v4_hash_connect(sk); 288 + if (err != 0) 289 + goto failure; 290 + 291 + err = ip_route_newports(&rt, inet->sport, inet->dport, sk); 292 + if (err != 0) 293 + goto failure; 294 + 295 + /* OK, now commit destination to socket. */ 296 + sk_setup_caps(sk, &rt->u.dst); 297 + 298 + dp->dccps_gar = 299 + dp->dccps_iss = secure_dccp_sequence_number(inet->saddr, 300 + inet->daddr, 301 + inet->sport, 302 + usin->sin_port); 303 + dccp_update_gss(sk, dp->dccps_iss); 304 + 305 + inet->id = dp->dccps_iss ^ jiffies; 306 + 307 + err = dccp_connect(sk); 308 + rt = NULL; 309 + if (err != 0) 310 + goto failure; 311 + out: 312 + return err; 313 + failure: 314 + /* This unhashes the socket and releases the local port, if necessary. */ 315 + dccp_set_state(sk, DCCP_CLOSED); 316 + ip_rt_put(rt); 317 + sk->sk_route_caps = 0; 318 + inet->dport = 0; 319 + goto out; 320 + } 321 + 322 + /* 323 + * This routine does path mtu discovery as defined in RFC1191. 324 + */ 325 + static inline void dccp_do_pmtu_discovery(struct sock *sk, 326 + const struct iphdr *iph, 327 + u32 mtu) 328 + { 329 + struct dst_entry *dst; 330 + const struct inet_sock *inet = inet_sk(sk); 331 + const struct dccp_sock *dp = dccp_sk(sk); 332 + 333 + /* We are not interested in DCCP_LISTEN and request_socks (RESPONSEs 334 + * send out by Linux are always < 576bytes so they should go through 335 + * unfragmented). 336 + */ 337 + if (sk->sk_state == DCCP_LISTEN) 338 + return; 339 + 340 + /* We don't check in the destentry if pmtu discovery is forbidden 341 + * on this route. We just assume that no packet_to_big packets 342 + * are send back when pmtu discovery is not active. 343 + * There is a small race when the user changes this flag in the 344 + * route, but I think that's acceptable. 345 + */ 346 + if ((dst = __sk_dst_check(sk, 0)) == NULL) 347 + return; 348 + 349 + dst->ops->update_pmtu(dst, mtu); 350 + 351 + /* Something is about to be wrong... Remember soft error 352 + * for the case, if this connection will not able to recover. 353 + */ 354 + if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst)) 355 + sk->sk_err_soft = EMSGSIZE; 356 + 357 + mtu = dst_mtu(dst); 358 + 359 + if (inet->pmtudisc != IP_PMTUDISC_DONT && 360 + dp->dccps_pmtu_cookie > mtu) { 361 + dccp_sync_mss(sk, mtu); 362 + 363 + /* 364 + * From: draft-ietf-dccp-spec-11.txt 365 + * 366 + * DCCP-Sync packets are the best choice for upward probing, 367 + * since DCCP-Sync probes do not risk application data loss. 368 + */ 369 + dccp_send_sync(sk, dp->dccps_gsr); 370 + } /* else let the usual retransmit timer handle it */ 371 + } 372 + 373 + static void dccp_v4_ctl_send_ack(struct sk_buff *rxskb) 374 + { 375 + int err; 376 + struct dccp_hdr *rxdh = dccp_hdr(rxskb), *dh; 377 + const int dccp_hdr_ack_len = sizeof(struct dccp_hdr) + 378 + sizeof(struct dccp_hdr_ext) + 379 + sizeof(struct dccp_hdr_ack_bits); 380 + struct sk_buff *skb; 381 + 382 + if (((struct rtable *)rxskb->dst)->rt_type != RTN_LOCAL) 383 + return; 384 + 385 + skb = alloc_skb(MAX_DCCP_HEADER + 15, GFP_ATOMIC); 386 + if (skb == NULL) 387 + return; 388 + 389 + /* Reserve space for headers. */ 390 + skb_reserve(skb, MAX_DCCP_HEADER); 391 + 392 + skb->dst = dst_clone(rxskb->dst); 393 + 394 + skb->h.raw = skb_push(skb, dccp_hdr_ack_len); 395 + dh = dccp_hdr(skb); 396 + memset(dh, 0, dccp_hdr_ack_len); 397 + 398 + /* Build DCCP header and checksum it. */ 399 + dh->dccph_type = DCCP_PKT_ACK; 400 + dh->dccph_sport = rxdh->dccph_dport; 401 + dh->dccph_dport = rxdh->dccph_sport; 402 + dh->dccph_doff = dccp_hdr_ack_len / 4; 403 + dh->dccph_x = 1; 404 + 405 + dccp_hdr_set_seq(dh, DCCP_SKB_CB(rxskb)->dccpd_ack_seq); 406 + dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), DCCP_SKB_CB(rxskb)->dccpd_seq); 407 + 408 + bh_lock_sock(dccp_ctl_socket->sk); 409 + err = ip_build_and_send_pkt(skb, dccp_ctl_socket->sk, 410 + rxskb->nh.iph->daddr, rxskb->nh.iph->saddr, NULL); 411 + bh_unlock_sock(dccp_ctl_socket->sk); 412 + 413 + if (err == NET_XMIT_CN || err == 0) { 414 + DCCP_INC_STATS_BH(DCCP_MIB_OUTSEGS); 415 + DCCP_INC_STATS_BH(DCCP_MIB_OUTRSTS); 416 + } 417 + } 418 + 419 + static void dccp_v4_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req) 420 + { 421 + dccp_v4_ctl_send_ack(skb); 422 + } 423 + 424 + static int dccp_v4_send_response(struct sock *sk, struct request_sock *req, 425 + struct dst_entry *dst) 426 + { 427 + int err = -1; 428 + struct sk_buff *skb; 429 + 430 + /* First, grab a route. */ 431 + 432 + if (dst == NULL && (dst = inet_csk_route_req(sk, req)) == NULL) 433 + goto out; 434 + 435 + skb = dccp_make_response(sk, dst, req); 436 + if (skb != NULL) { 437 + const struct inet_request_sock *ireq = inet_rsk(req); 438 + 439 + err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr, 440 + ireq->rmt_addr, 441 + ireq->opt); 442 + if (err == NET_XMIT_CN) 443 + err = 0; 444 + } 445 + 446 + out: 447 + dst_release(dst); 448 + return err; 449 + } 450 + 451 + /* 452 + * This routine is called by the ICMP module when it gets some sort of error 453 + * condition. If err < 0 then the socket should be closed and the error 454 + * returned to the user. If err > 0 it's just the icmp type << 8 | icmp code. 455 + * After adjustment header points to the first 8 bytes of the tcp header. We 456 + * need to find the appropriate port. 457 + * 458 + * The locking strategy used here is very "optimistic". When someone else 459 + * accesses the socket the ICMP is just dropped and for some paths there is no 460 + * check at all. A more general error queue to queue errors for later handling 461 + * is probably better. 462 + */ 463 + void dccp_v4_err(struct sk_buff *skb, u32 info) 464 + { 465 + const struct iphdr *iph = (struct iphdr *)skb->data; 466 + const struct dccp_hdr *dh = (struct dccp_hdr *)(skb->data + (iph->ihl << 2)); 467 + struct dccp_sock *dp; 468 + struct inet_sock *inet; 469 + const int type = skb->h.icmph->type; 470 + const int code = skb->h.icmph->code; 471 + struct sock *sk; 472 + __u64 seq; 473 + int err; 474 + 475 + if (skb->len < (iph->ihl << 2) + 8) { 476 + ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); 477 + return; 478 + } 479 + 480 + sk = inet_lookup(&dccp_hashinfo, iph->daddr, dh->dccph_dport, 481 + iph->saddr, dh->dccph_sport, inet_iif(skb)); 482 + if (sk == NULL) { 483 + ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); 484 + return; 485 + } 486 + 487 + if (sk->sk_state == DCCP_TIME_WAIT) { 488 + inet_twsk_put((struct inet_timewait_sock *)sk); 489 + return; 490 + } 491 + 492 + bh_lock_sock(sk); 493 + /* If too many ICMPs get dropped on busy 494 + * servers this needs to be solved differently. 495 + */ 496 + if (sock_owned_by_user(sk)) 497 + NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS); 498 + 499 + if (sk->sk_state == DCCP_CLOSED) 500 + goto out; 501 + 502 + dp = dccp_sk(sk); 503 + seq = dccp_hdr_seq(skb); 504 + if (sk->sk_state != DCCP_LISTEN && 505 + !between48(seq, dp->dccps_swl, dp->dccps_swh)) { 506 + NET_INC_STATS(LINUX_MIB_OUTOFWINDOWICMPS); 507 + goto out; 508 + } 509 + 510 + switch (type) { 511 + case ICMP_SOURCE_QUENCH: 512 + /* Just silently ignore these. */ 513 + goto out; 514 + case ICMP_PARAMETERPROB: 515 + err = EPROTO; 516 + break; 517 + case ICMP_DEST_UNREACH: 518 + if (code > NR_ICMP_UNREACH) 519 + goto out; 520 + 521 + if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */ 522 + if (!sock_owned_by_user(sk)) 523 + dccp_do_pmtu_discovery(sk, iph, info); 524 + goto out; 525 + } 526 + 527 + err = icmp_err_convert[code].errno; 528 + break; 529 + case ICMP_TIME_EXCEEDED: 530 + err = EHOSTUNREACH; 531 + break; 532 + default: 533 + goto out; 534 + } 535 + 536 + switch (sk->sk_state) { 537 + struct request_sock *req , **prev; 538 + case DCCP_LISTEN: 539 + if (sock_owned_by_user(sk)) 540 + goto out; 541 + req = inet_csk_search_req(sk, &prev, dh->dccph_dport, 542 + iph->daddr, iph->saddr); 543 + if (!req) 544 + goto out; 545 + 546 + /* 547 + * ICMPs are not backlogged, hence we cannot get an established 548 + * socket here. 549 + */ 550 + BUG_TRAP(!req->sk); 551 + 552 + if (seq != dccp_rsk(req)->dreq_iss) { 553 + NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS); 554 + goto out; 555 + } 556 + /* 557 + * Still in RESPOND, just remove it silently. 558 + * There is no good way to pass the error to the newly 559 + * created socket, and POSIX does not want network 560 + * errors returned from accept(). 561 + */ 562 + inet_csk_reqsk_queue_drop(sk, req, prev); 563 + goto out; 564 + 565 + case DCCP_REQUESTING: 566 + case DCCP_RESPOND: 567 + if (!sock_owned_by_user(sk)) { 568 + DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS); 569 + sk->sk_err = err; 570 + 571 + sk->sk_error_report(sk); 572 + 573 + dccp_done(sk); 574 + } else 575 + sk->sk_err_soft = err; 576 + goto out; 577 + } 578 + 579 + /* If we've already connected we will keep trying 580 + * until we time out, or the user gives up. 581 + * 582 + * rfc1122 4.2.3.9 allows to consider as hard errors 583 + * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too, 584 + * but it is obsoleted by pmtu discovery). 585 + * 586 + * Note, that in modern internet, where routing is unreliable 587 + * and in each dark corner broken firewalls sit, sending random 588 + * errors ordered by their masters even this two messages finally lose 589 + * their original sense (even Linux sends invalid PORT_UNREACHs) 590 + * 591 + * Now we are in compliance with RFCs. 592 + * --ANK (980905) 593 + */ 594 + 595 + inet = inet_sk(sk); 596 + if (!sock_owned_by_user(sk) && inet->recverr) { 597 + sk->sk_err = err; 598 + sk->sk_error_report(sk); 599 + } else /* Only an error on timeout */ 600 + sk->sk_err_soft = err; 601 + out: 602 + bh_unlock_sock(sk); 603 + sock_put(sk); 604 + } 605 + 606 + extern struct sk_buff *dccp_make_reset(struct sock *sk, struct dst_entry *dst, enum dccp_reset_codes code); 607 + 608 + int dccp_v4_send_reset(struct sock *sk, enum dccp_reset_codes code) 609 + { 610 + struct sk_buff *skb; 611 + /* 612 + * FIXME: what if rebuild_header fails? 613 + * Should we be doing a rebuild_header here? 614 + */ 615 + int err = inet_sk_rebuild_header(sk); 616 + 617 + if (err != 0) 618 + return err; 619 + 620 + skb = dccp_make_reset(sk, sk->sk_dst_cache, code); 621 + if (skb != NULL) { 622 + const struct dccp_sock *dp = dccp_sk(sk); 623 + const struct inet_sock *inet = inet_sk(sk); 624 + 625 + err = ip_build_and_send_pkt(skb, sk, 626 + inet->saddr, inet->daddr, NULL); 627 + if (err == NET_XMIT_CN) 628 + err = 0; 629 + 630 + ccid_hc_rx_exit(dp->dccps_hc_rx_ccid, sk); 631 + ccid_hc_tx_exit(dp->dccps_hc_tx_ccid, sk); 632 + } 633 + 634 + return err; 635 + } 636 + 637 + static inline u64 dccp_v4_init_sequence(const struct sock *sk, 638 + const struct sk_buff *skb) 639 + { 640 + return secure_dccp_sequence_number(skb->nh.iph->daddr, 641 + skb->nh.iph->saddr, 642 + dccp_hdr(skb)->dccph_dport, 643 + dccp_hdr(skb)->dccph_sport); 644 + } 645 + 646 + int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) 647 + { 648 + struct inet_request_sock *ireq; 649 + struct dccp_sock dp; 650 + struct request_sock *req; 651 + struct dccp_request_sock *dreq; 652 + const __u32 saddr = skb->nh.iph->saddr; 653 + const __u32 daddr = skb->nh.iph->daddr; 654 + struct dst_entry *dst = NULL; 655 + 656 + /* Never answer to DCCP_PKT_REQUESTs send to broadcast or multicast */ 657 + if (((struct rtable *)skb->dst)->rt_flags & 658 + (RTCF_BROADCAST | RTCF_MULTICAST)) 659 + goto drop; 660 + 661 + /* 662 + * TW buckets are converted to open requests without 663 + * limitations, they conserve resources and peer is 664 + * evidently real one. 665 + */ 666 + if (inet_csk_reqsk_queue_is_full(sk)) 667 + goto drop; 668 + 669 + /* 670 + * Accept backlog is full. If we have already queued enough 671 + * of warm entries in syn queue, drop request. It is better than 672 + * clogging syn queue with openreqs with exponentially increasing 673 + * timeout. 674 + */ 675 + if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) 676 + goto drop; 677 + 678 + req = reqsk_alloc(sk->sk_prot->rsk_prot); 679 + if (req == NULL) 680 + goto drop; 681 + 682 + /* FIXME: process options */ 683 + 684 + dccp_openreq_init(req, &dp, skb); 685 + 686 + ireq = inet_rsk(req); 687 + ireq->loc_addr = daddr; 688 + ireq->rmt_addr = saddr; 689 + /* FIXME: Merge Aristeu's option parsing code when ready */ 690 + req->rcv_wnd = 100; /* Fake, option parsing will get the right value */ 691 + ireq->opt = NULL; 692 + 693 + /* 694 + * Step 3: Process LISTEN state 695 + * 696 + * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookie 697 + * 698 + * In fact we defer setting S.GSR, S.SWL, S.SWH to 699 + * dccp_create_openreq_child. 700 + */ 701 + dreq = dccp_rsk(req); 702 + dreq->dreq_isr = DCCP_SKB_CB(skb)->dccpd_seq; 703 + dreq->dreq_iss = dccp_v4_init_sequence(sk, skb); 704 + dreq->dreq_service = dccp_hdr_request(skb)->dccph_req_service; 705 + 706 + if (dccp_v4_send_response(sk, req, dst)) 707 + goto drop_and_free; 708 + 709 + inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT); 710 + return 0; 711 + 712 + drop_and_free: 713 + /* 714 + * FIXME: should be reqsk_free after implementing req->rsk_ops 715 + */ 716 + __reqsk_free(req); 717 + drop: 718 + DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS); 719 + return -1; 720 + } 721 + 722 + /* 723 + * The three way handshake has completed - we got a valid ACK or DATAACK - 724 + * now create the new socket. 725 + * 726 + * This is the equivalent of TCP's tcp_v4_syn_recv_sock 727 + */ 728 + struct sock *dccp_v4_request_recv_sock(struct sock *sk, struct sk_buff *skb, 729 + struct request_sock *req, 730 + struct dst_entry *dst) 731 + { 732 + struct inet_request_sock *ireq; 733 + struct inet_sock *newinet; 734 + struct dccp_sock *newdp; 735 + struct sock *newsk; 736 + 737 + if (sk_acceptq_is_full(sk)) 738 + goto exit_overflow; 739 + 740 + if (dst == NULL && (dst = inet_csk_route_req(sk, req)) == NULL) 741 + goto exit; 742 + 743 + newsk = dccp_create_openreq_child(sk, req, skb); 744 + if (newsk == NULL) 745 + goto exit; 746 + 747 + sk_setup_caps(newsk, dst); 748 + 749 + newdp = dccp_sk(newsk); 750 + newinet = inet_sk(newsk); 751 + ireq = inet_rsk(req); 752 + newinet->daddr = ireq->rmt_addr; 753 + newinet->rcv_saddr = ireq->loc_addr; 754 + newinet->saddr = ireq->loc_addr; 755 + newinet->opt = ireq->opt; 756 + ireq->opt = NULL; 757 + newinet->mc_index = inet_iif(skb); 758 + newinet->mc_ttl = skb->nh.iph->ttl; 759 + newinet->id = jiffies; 760 + 761 + dccp_sync_mss(newsk, dst_mtu(dst)); 762 + 763 + __inet_hash(&dccp_hashinfo, newsk, 0); 764 + __inet_inherit_port(&dccp_hashinfo, sk, newsk); 765 + 766 + return newsk; 767 + 768 + exit_overflow: 769 + NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS); 770 + exit: 771 + NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS); 772 + dst_release(dst); 773 + return NULL; 774 + } 775 + 776 + static struct sock *dccp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) 777 + { 778 + const struct dccp_hdr *dh = dccp_hdr(skb); 779 + const struct iphdr *iph = skb->nh.iph; 780 + struct sock *nsk; 781 + struct request_sock **prev; 782 + /* Find possible connection requests. */ 783 + struct request_sock *req = inet_csk_search_req(sk, &prev, 784 + dh->dccph_sport, 785 + iph->saddr, iph->daddr); 786 + if (req != NULL) 787 + return dccp_check_req(sk, skb, req, prev); 788 + 789 + nsk = __inet_lookup_established(&dccp_hashinfo, 790 + iph->saddr, dh->dccph_sport, 791 + iph->daddr, ntohs(dh->dccph_dport), 792 + inet_iif(skb)); 793 + if (nsk != NULL) { 794 + if (nsk->sk_state != DCCP_TIME_WAIT) { 795 + bh_lock_sock(nsk); 796 + return nsk; 797 + } 798 + inet_twsk_put((struct inet_timewait_sock *)nsk); 799 + return NULL; 800 + } 801 + 802 + return sk; 803 + } 804 + 805 + int dccp_v4_checksum(struct sk_buff *skb) 806 + { 807 + struct dccp_hdr* dh = dccp_hdr(skb); 808 + int checksum_len; 809 + u32 tmp; 810 + 811 + if (dh->dccph_cscov == 0) 812 + checksum_len = skb->len; 813 + else { 814 + checksum_len = (dh->dccph_cscov + dh->dccph_x) * sizeof(u32); 815 + checksum_len = checksum_len < skb->len ? checksum_len : skb->len; 816 + } 817 + 818 + tmp = csum_partial((unsigned char *)dh, checksum_len, 0); 819 + return csum_fold(tmp); 820 + } 821 + 822 + static int dccp_v4_verify_checksum(struct sk_buff *skb) 823 + { 824 + struct dccp_hdr *th = dccp_hdr(skb); 825 + const u16 remote_checksum = th->dccph_checksum; 826 + u16 local_checksum; 827 + 828 + /* FIXME: don't mess with skb payload */ 829 + th->dccph_checksum = 0; /* zero it for computation */ 830 + 831 + local_checksum = dccp_v4_checksum(skb); 832 + 833 + /* FIXME: don't mess with skb payload */ 834 + th->dccph_checksum = remote_checksum; /* put it back */ 835 + 836 + return remote_checksum == local_checksum ? 0 : -1; 837 + } 838 + 839 + static struct dst_entry* dccp_v4_route_skb(struct sock *sk, 840 + struct sk_buff *skb) 841 + { 842 + struct rtable *rt; 843 + struct flowi fl = { .oif = ((struct rtable *)skb->dst)->rt_iif, 844 + .nl_u = { .ip4_u = 845 + { .daddr = skb->nh.iph->saddr, 846 + .saddr = skb->nh.iph->daddr, 847 + .tos = RT_CONN_FLAGS(sk) } }, 848 + .proto = sk->sk_protocol, 849 + .uli_u = { .ports = 850 + { .sport = dccp_hdr(skb)->dccph_dport, 851 + .dport = dccp_hdr(skb)->dccph_sport } } }; 852 + 853 + if (ip_route_output_flow(&rt, &fl, sk, 0)) { 854 + IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); 855 + return NULL; 856 + } 857 + 858 + return &rt->u.dst; 859 + } 860 + 861 + void dccp_v4_ctl_send_reset(struct sk_buff *rxskb) 862 + { 863 + int err; 864 + struct dccp_hdr *rxdh = dccp_hdr(rxskb), *dh; 865 + const int dccp_hdr_reset_len = sizeof(struct dccp_hdr) + 866 + sizeof(struct dccp_hdr_ext) + 867 + sizeof(struct dccp_hdr_reset); 868 + struct sk_buff *skb; 869 + struct dst_entry *dst; 870 + 871 + /* Never send a reset in response to a reset. */ 872 + if (rxdh->dccph_type == DCCP_PKT_RESET) 873 + return; 874 + 875 + if (((struct rtable *)rxskb->dst)->rt_type != RTN_LOCAL) 876 + return; 877 + 878 + dst = dccp_v4_route_skb(dccp_ctl_socket->sk, rxskb); 879 + if (dst == NULL) 880 + return; 881 + 882 + skb = alloc_skb(MAX_DCCP_HEADER + 15, GFP_ATOMIC); 883 + if (skb == NULL) 884 + goto out; 885 + 886 + /* Reserve space for headers. */ 887 + skb_reserve(skb, MAX_DCCP_HEADER); 888 + skb->dst = dst_clone(dst); 889 + 890 + skb->h.raw = skb_push(skb, dccp_hdr_reset_len); 891 + dh = dccp_hdr(skb); 892 + memset(dh, 0, dccp_hdr_reset_len); 893 + 894 + /* Build DCCP header and checksum it. */ 895 + dh->dccph_type = DCCP_PKT_RESET; 896 + dh->dccph_sport = rxdh->dccph_dport; 897 + dh->dccph_dport = rxdh->dccph_sport; 898 + dh->dccph_doff = dccp_hdr_reset_len / 4; 899 + dh->dccph_x = 1; 900 + dccp_hdr_reset(skb)->dccph_reset_code = DCCP_SKB_CB(rxskb)->dccpd_reset_code; 901 + 902 + dccp_hdr_set_seq(dh, DCCP_SKB_CB(rxskb)->dccpd_ack_seq); 903 + dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), DCCP_SKB_CB(rxskb)->dccpd_seq); 904 + 905 + dh->dccph_checksum = dccp_v4_checksum(skb); 906 + 907 + bh_lock_sock(dccp_ctl_socket->sk); 908 + err = ip_build_and_send_pkt(skb, dccp_ctl_socket->sk, 909 + rxskb->nh.iph->daddr, rxskb->nh.iph->saddr, NULL); 910 + bh_unlock_sock(dccp_ctl_socket->sk); 911 + 912 + if (err == NET_XMIT_CN || err == 0) { 913 + DCCP_INC_STATS_BH(DCCP_MIB_OUTSEGS); 914 + DCCP_INC_STATS_BH(DCCP_MIB_OUTRSTS); 915 + } 916 + out: 917 + dst_release(dst); 918 + } 919 + 920 + int dccp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) 921 + { 922 + struct dccp_hdr *dh = dccp_hdr(skb); 923 + 924 + if (sk->sk_state == DCCP_OPEN) { /* Fast path */ 925 + if (dccp_rcv_established(sk, skb, dh, skb->len)) 926 + goto reset; 927 + return 0; 928 + } 929 + 930 + /* 931 + * Step 3: Process LISTEN state 932 + * If S.state == LISTEN, 933 + * If P.type == Request or P contains a valid Init Cookie option, 934 + * * Must scan the packet's options to check for an Init 935 + * Cookie. Only the Init Cookie is processed here, 936 + * however; other options are processed in Step 8. This 937 + * scan need only be performed if the endpoint uses Init 938 + * Cookies * 939 + * * Generate a new socket and switch to that socket * 940 + * Set S := new socket for this port pair 941 + * S.state = RESPOND 942 + * Choose S.ISS (initial seqno) or set from Init Cookie 943 + * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookie 944 + * Continue with S.state == RESPOND 945 + * * A Response packet will be generated in Step 11 * 946 + * Otherwise, 947 + * Generate Reset(No Connection) unless P.type == Reset 948 + * Drop packet and return 949 + * 950 + * NOTE: the check for the packet types is done in dccp_rcv_state_process 951 + */ 952 + if (sk->sk_state == DCCP_LISTEN) { 953 + struct sock *nsk = dccp_v4_hnd_req(sk, skb); 954 + 955 + if (nsk == NULL) 956 + goto discard; 957 + 958 + if (nsk != sk) { 959 + if (dccp_child_process(sk, nsk, skb)) 960 + goto reset; 961 + return 0; 962 + } 963 + } 964 + 965 + if (dccp_rcv_state_process(sk, skb, dh, skb->len)) 966 + goto reset; 967 + return 0; 968 + 969 + reset: 970 + DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION; 971 + dccp_v4_ctl_send_reset(skb); 972 + discard: 973 + kfree_skb(skb); 974 + return 0; 975 + } 976 + 977 + static inline int dccp_invalid_packet(struct sk_buff *skb) 978 + { 979 + const struct dccp_hdr *dh; 980 + 981 + if (skb->pkt_type != PACKET_HOST) 982 + return 1; 983 + 984 + if (!pskb_may_pull(skb, sizeof(struct dccp_hdr))) { 985 + dccp_pr_debug("pskb_may_pull failed\n"); 986 + return 1; 987 + } 988 + 989 + dh = dccp_hdr(skb); 990 + 991 + /* If the packet type is not understood, drop packet and return */ 992 + if (dh->dccph_type >= DCCP_PKT_INVALID) { 993 + dccp_pr_debug("invalid packet type\n"); 994 + return 1; 995 + } 996 + 997 + /* 998 + * If P.Data Offset is too small for packet type, or too large for 999 + * packet, drop packet and return 1000 + */ 1001 + if (dh->dccph_doff < dccp_hdr_len(skb) / sizeof(u32)) { 1002 + dccp_pr_debug("Offset(%u) too small 1\n", dh->dccph_doff); 1003 + return 1; 1004 + } 1005 + 1006 + if (!pskb_may_pull(skb, dh->dccph_doff * sizeof(u32))) { 1007 + dccp_pr_debug("P.Data Offset(%u) too small 2\n", dh->dccph_doff); 1008 + return 1; 1009 + } 1010 + 1011 + dh = dccp_hdr(skb); 1012 + 1013 + /* 1014 + * If P.type is not Data, Ack, or DataAck and P.X == 0 (the packet 1015 + * has short sequence numbers), drop packet and return 1016 + */ 1017 + if (dh->dccph_x == 0 && 1018 + dh->dccph_type != DCCP_PKT_DATA && 1019 + dh->dccph_type != DCCP_PKT_ACK && 1020 + dh->dccph_type != DCCP_PKT_DATAACK) { 1021 + dccp_pr_debug("P.type (%s) not Data, Ack nor DataAck and P.X == 0\n", 1022 + dccp_packet_name(dh->dccph_type)); 1023 + return 1; 1024 + } 1025 + 1026 + /* If the header checksum is incorrect, drop packet and return */ 1027 + if (dccp_v4_verify_checksum(skb) < 0) { 1028 + dccp_pr_debug("header checksum is incorrect\n"); 1029 + return 1; 1030 + } 1031 + 1032 + return 0; 1033 + } 1034 + 1035 + /* this is called when real data arrives */ 1036 + int dccp_v4_rcv(struct sk_buff *skb) 1037 + { 1038 + const struct dccp_hdr *dh; 1039 + struct sock *sk; 1040 + int rc; 1041 + 1042 + /* Step 1: Check header basics: */ 1043 + 1044 + if (dccp_invalid_packet(skb)) 1045 + goto discard_it; 1046 + 1047 + dh = dccp_hdr(skb); 1048 + #if 0 1049 + /* 1050 + * Use something like this to simulate some DATA/DATAACK loss to test 1051 + * dccp_ackpkts_add, you'll get something like this on a session that 1052 + * sends 10 DATA/DATAACK packets: 1053 + * 1054 + * dccp_ackpkts_print: 281473596467422 |0,0|3,0|0,0|3,0|0,0|3,0|0,0|3,0|0,1| 1055 + * 1056 + * 0, 0 means: DCCP_ACKPKTS_STATE_RECEIVED, RLE == just this packet 1057 + * 0, 1 means: DCCP_ACKPKTS_STATE_RECEIVED, RLE == two adjacent packets with the same state 1058 + * 3, 0 means: DCCP_ACKPKTS_STATE_NOT_RECEIVED, RLE == just this packet 1059 + * 1060 + * So... 1061 + * 1062 + * 281473596467422 was received 1063 + * 281473596467421 was not received 1064 + * 281473596467420 was received 1065 + * 281473596467419 was not received 1066 + * 281473596467418 was received 1067 + * 281473596467417 was not received 1068 + * 281473596467416 was received 1069 + * 281473596467415 was not received 1070 + * 281473596467414 was received 1071 + * 281473596467413 was received (this one was the 3way handshake RESPONSE) 1072 + * 1073 + */ 1074 + if (dh->dccph_type == DCCP_PKT_DATA || dh->dccph_type == DCCP_PKT_DATAACK) { 1075 + static int discard = 0; 1076 + 1077 + if (discard) { 1078 + discard = 0; 1079 + goto discard_it; 1080 + } 1081 + discard = 1; 1082 + } 1083 + #endif 1084 + DCCP_SKB_CB(skb)->dccpd_seq = dccp_hdr_seq(skb); 1085 + DCCP_SKB_CB(skb)->dccpd_type = dh->dccph_type; 1086 + 1087 + dccp_pr_debug("%8.8s " 1088 + "src=%u.%u.%u.%u@%-5d " 1089 + "dst=%u.%u.%u.%u@%-5d seq=%llu", 1090 + dccp_packet_name(dh->dccph_type), 1091 + NIPQUAD(skb->nh.iph->saddr), ntohs(dh->dccph_sport), 1092 + NIPQUAD(skb->nh.iph->daddr), ntohs(dh->dccph_dport), 1093 + DCCP_SKB_CB(skb)->dccpd_seq); 1094 + 1095 + if (dccp_packet_without_ack(skb)) { 1096 + DCCP_SKB_CB(skb)->dccpd_ack_seq = DCCP_PKT_WITHOUT_ACK_SEQ; 1097 + dccp_pr_debug_cat("\n"); 1098 + } else { 1099 + DCCP_SKB_CB(skb)->dccpd_ack_seq = dccp_hdr_ack_seq(skb); 1100 + dccp_pr_debug_cat(", ack=%llu\n", DCCP_SKB_CB(skb)->dccpd_ack_seq); 1101 + } 1102 + 1103 + /* Step 2: 1104 + * Look up flow ID in table and get corresponding socket */ 1105 + sk = __inet_lookup(&dccp_hashinfo, 1106 + skb->nh.iph->saddr, dh->dccph_sport, 1107 + skb->nh.iph->daddr, ntohs(dh->dccph_dport), 1108 + inet_iif(skb)); 1109 + 1110 + /* 1111 + * Step 2: 1112 + * If no socket ... 1113 + * Generate Reset(No Connection) unless P.type == Reset 1114 + * Drop packet and return 1115 + */ 1116 + if (sk == NULL) { 1117 + dccp_pr_debug("failed to look up flow ID in table and " 1118 + "get corresponding socket\n"); 1119 + goto no_dccp_socket; 1120 + } 1121 + 1122 + /* 1123 + * Step 2: 1124 + * ... or S.state == TIMEWAIT, 1125 + * Generate Reset(No Connection) unless P.type == Reset 1126 + * Drop packet and return 1127 + */ 1128 + 1129 + if (sk->sk_state == DCCP_TIME_WAIT) { 1130 + dccp_pr_debug("sk->sk_state == DCCP_TIME_WAIT: discard_and_relse\n"); 1131 + goto discard_and_relse; 1132 + } 1133 + 1134 + if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) { 1135 + dccp_pr_debug("xfrm4_policy_check failed\n"); 1136 + goto discard_and_relse; 1137 + } 1138 + 1139 + if (sk_filter(sk, skb, 0)) { 1140 + dccp_pr_debug("sk_filter failed\n"); 1141 + goto discard_and_relse; 1142 + } 1143 + 1144 + skb->dev = NULL; 1145 + 1146 + bh_lock_sock(sk); 1147 + rc = 0; 1148 + if (!sock_owned_by_user(sk)) 1149 + rc = dccp_v4_do_rcv(sk, skb); 1150 + else 1151 + sk_add_backlog(sk, skb); 1152 + bh_unlock_sock(sk); 1153 + 1154 + sock_put(sk); 1155 + return rc; 1156 + 1157 + no_dccp_socket: 1158 + if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) 1159 + goto discard_it; 1160 + /* 1161 + * Step 2: 1162 + * Generate Reset(No Connection) unless P.type == Reset 1163 + * Drop packet and return 1164 + */ 1165 + if (dh->dccph_type != DCCP_PKT_RESET) { 1166 + DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION; 1167 + dccp_v4_ctl_send_reset(skb); 1168 + } 1169 + 1170 + discard_it: 1171 + /* Discard frame. */ 1172 + kfree_skb(skb); 1173 + return 0; 1174 + 1175 + discard_and_relse: 1176 + sock_put(sk); 1177 + goto discard_it; 1178 + } 1179 + 1180 + static int dccp_v4_init_sock(struct sock *sk) 1181 + { 1182 + struct dccp_sock *dp = dccp_sk(sk); 1183 + static int dccp_ctl_socket_init = 1; 1184 + 1185 + dccp_options_init(&dp->dccps_options); 1186 + 1187 + if (dp->dccps_options.dccpo_send_ack_vector) { 1188 + dp->dccps_hc_rx_ackpkts = dccp_ackpkts_alloc(DCCP_MAX_ACK_VECTOR_LEN, 1189 + GFP_KERNEL); 1190 + 1191 + if (dp->dccps_hc_rx_ackpkts == NULL) 1192 + return -ENOMEM; 1193 + } 1194 + 1195 + /* 1196 + * FIXME: We're hardcoding the CCID, and doing this at this point makes 1197 + * the listening (master) sock get CCID control blocks, which is not 1198 + * necessary, but for now, to not mess with the test userspace apps, 1199 + * lets leave it here, later the real solution is to do this in a 1200 + * setsockopt(CCIDs-I-want/accept). -acme 1201 + */ 1202 + if (likely(!dccp_ctl_socket_init)) { 1203 + dp->dccps_hc_rx_ccid = ccid_init(dp->dccps_options.dccpo_ccid, sk); 1204 + dp->dccps_hc_tx_ccid = ccid_init(dp->dccps_options.dccpo_ccid, sk); 1205 + if (dp->dccps_hc_rx_ccid == NULL || 1206 + dp->dccps_hc_tx_ccid == NULL) { 1207 + ccid_exit(dp->dccps_hc_rx_ccid, sk); 1208 + ccid_exit(dp->dccps_hc_tx_ccid, sk); 1209 + dccp_ackpkts_free(dp->dccps_hc_rx_ackpkts); 1210 + dp->dccps_hc_rx_ackpkts = NULL; 1211 + dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL; 1212 + return -ENOMEM; 1213 + } 1214 + } else 1215 + dccp_ctl_socket_init = 0; 1216 + 1217 + dccp_init_xmit_timers(sk); 1218 + sk->sk_state = DCCP_CLOSED; 1219 + dp->dccps_mss_cache = 536; 1220 + dp->dccps_role = DCCP_ROLE_UNDEFINED; 1221 + 1222 + return 0; 1223 + } 1224 + 1225 + int dccp_v4_destroy_sock(struct sock *sk) 1226 + { 1227 + struct dccp_sock *dp = dccp_sk(sk); 1228 + 1229 + /* 1230 + * DCCP doesn't use sk_qrite_queue, just sk_send_head 1231 + * for retransmissions 1232 + */ 1233 + if (sk->sk_send_head != NULL) { 1234 + kfree_skb(sk->sk_send_head); 1235 + sk->sk_send_head = NULL; 1236 + } 1237 + 1238 + /* Clean up a referenced DCCP bind bucket. */ 1239 + if (inet_csk(sk)->icsk_bind_hash != NULL) 1240 + inet_put_port(&dccp_hashinfo, sk); 1241 + 1242 + dccp_ackpkts_free(dp->dccps_hc_rx_ackpkts); 1243 + dp->dccps_hc_rx_ackpkts = NULL; 1244 + ccid_exit(dp->dccps_hc_rx_ccid, sk); 1245 + ccid_exit(dp->dccps_hc_tx_ccid, sk); 1246 + dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL; 1247 + 1248 + return 0; 1249 + } 1250 + 1251 + static void dccp_v4_reqsk_destructor(struct request_sock *req) 1252 + { 1253 + kfree(inet_rsk(req)->opt); 1254 + } 1255 + 1256 + static struct request_sock_ops dccp_request_sock_ops = { 1257 + .family = PF_INET, 1258 + .obj_size = sizeof(struct dccp_request_sock), 1259 + .rtx_syn_ack = dccp_v4_send_response, 1260 + .send_ack = dccp_v4_reqsk_send_ack, 1261 + .destructor = dccp_v4_reqsk_destructor, 1262 + .send_reset = dccp_v4_ctl_send_reset, 1263 + }; 1264 + 1265 + struct proto dccp_v4_prot = { 1266 + .name = "DCCP", 1267 + .owner = THIS_MODULE, 1268 + .close = dccp_close, 1269 + .connect = dccp_v4_connect, 1270 + .disconnect = dccp_disconnect, 1271 + .ioctl = dccp_ioctl, 1272 + .init = dccp_v4_init_sock, 1273 + .setsockopt = dccp_setsockopt, 1274 + .getsockopt = dccp_getsockopt, 1275 + .sendmsg = dccp_sendmsg, 1276 + .recvmsg = dccp_recvmsg, 1277 + .backlog_rcv = dccp_v4_do_rcv, 1278 + .hash = dccp_v4_hash, 1279 + .unhash = dccp_v4_unhash, 1280 + .accept = inet_csk_accept, 1281 + .get_port = dccp_v4_get_port, 1282 + .shutdown = dccp_shutdown, 1283 + .destroy = dccp_v4_destroy_sock, 1284 + .orphan_count = &dccp_orphan_count, 1285 + .max_header = MAX_DCCP_HEADER, 1286 + .obj_size = sizeof(struct dccp_sock), 1287 + .rsk_prot = &dccp_request_sock_ops, 1288 + .twsk_obj_size = sizeof(struct inet_timewait_sock), /* FIXME! create dccp_timewait_sock */ 1289 + };
+199
net/dccp/minisocks.c
··· 1 + /* 2 + * net/dccp/minisocks.c 3 + * 4 + * An implementation of the DCCP protocol 5 + * Arnaldo Carvalho de Melo <acme@conectiva.com.br> 6 + * 7 + * This program is free software; you can redistribute it and/or 8 + * modify it under the terms of the GNU General Public License 9 + * as published by the Free Software Foundation; either version 10 + * 2 of the License, or (at your option) any later version. 11 + */ 12 + 13 + #include <linux/config.h> 14 + #include <linux/dccp.h> 15 + #include <linux/skbuff.h> 16 + #include <linux/timer.h> 17 + 18 + #include <net/sock.h> 19 + #include <net/xfrm.h> 20 + #include <net/inet_timewait_sock.h> 21 + 22 + #include "ccid.h" 23 + #include "dccp.h" 24 + 25 + void dccp_time_wait(struct sock *sk, int state, int timeo) 26 + { 27 + /* FIXME: Implement */ 28 + dccp_pr_debug("Want to help? Start here\n"); 29 + dccp_set_state(sk, state); 30 + } 31 + 32 + /* This is for handling early-kills of TIME_WAIT sockets. */ 33 + void dccp_tw_deschedule(struct inet_timewait_sock *tw) 34 + { 35 + dccp_pr_debug("Want to help? Start here\n"); 36 + __inet_twsk_kill(tw, &dccp_hashinfo); 37 + } 38 + 39 + struct sock *dccp_create_openreq_child(struct sock *sk, 40 + const struct request_sock *req, 41 + const struct sk_buff *skb) 42 + { 43 + /* 44 + * Step 3: Process LISTEN state 45 + * 46 + * // Generate a new socket and switch to that socket 47 + * Set S := new socket for this port pair 48 + */ 49 + struct sock *newsk = inet_csk_clone(sk, req, GFP_ATOMIC); 50 + 51 + if (newsk != NULL) { 52 + const struct dccp_request_sock *dreq = dccp_rsk(req); 53 + struct inet_connection_sock *newicsk = inet_csk(sk); 54 + struct dccp_sock *newdp = dccp_sk(newsk); 55 + 56 + newdp->dccps_hc_rx_ackpkts = NULL; 57 + newdp->dccps_role = DCCP_ROLE_SERVER; 58 + newicsk->icsk_rto = TCP_TIMEOUT_INIT; 59 + 60 + if (newdp->dccps_options.dccpo_send_ack_vector) { 61 + newdp->dccps_hc_rx_ackpkts = dccp_ackpkts_alloc(DCCP_MAX_ACK_VECTOR_LEN, 62 + GFP_ATOMIC); 63 + /* 64 + * XXX: We're using the same CCIDs set on the parent, i.e. sk_clone 65 + * copied the master sock and left the CCID pointers for this child, 66 + * that is why we do the __ccid_get calls. 67 + */ 68 + if (unlikely(newdp->dccps_hc_rx_ackpkts == NULL)) 69 + goto out_free; 70 + } 71 + 72 + if (unlikely(ccid_hc_rx_init(newdp->dccps_hc_rx_ccid, newsk) != 0 || 73 + ccid_hc_tx_init(newdp->dccps_hc_tx_ccid, newsk) != 0)) { 74 + dccp_ackpkts_free(newdp->dccps_hc_rx_ackpkts); 75 + ccid_hc_rx_exit(newdp->dccps_hc_rx_ccid, newsk); 76 + ccid_hc_tx_exit(newdp->dccps_hc_tx_ccid, newsk); 77 + out_free: 78 + /* It is still raw copy of parent, so invalidate 79 + * destructor and make plain sk_free() */ 80 + newsk->sk_destruct = NULL; 81 + sk_free(newsk); 82 + return NULL; 83 + } 84 + 85 + __ccid_get(newdp->dccps_hc_rx_ccid); 86 + __ccid_get(newdp->dccps_hc_tx_ccid); 87 + 88 + /* 89 + * Step 3: Process LISTEN state 90 + * 91 + * Choose S.ISS (initial seqno) or set from Init Cookie 92 + * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookie 93 + */ 94 + 95 + /* See dccp_v4_conn_request */ 96 + newdp->dccps_options.dccpo_sequence_window = req->rcv_wnd; 97 + 98 + newdp->dccps_gar = newdp->dccps_isr = dreq->dreq_isr; 99 + dccp_update_gsr(newsk, dreq->dreq_isr); 100 + 101 + newdp->dccps_iss = dreq->dreq_iss; 102 + dccp_update_gss(newsk, dreq->dreq_iss); 103 + 104 + dccp_init_xmit_timers(newsk); 105 + 106 + DCCP_INC_STATS_BH(DCCP_MIB_PASSIVEOPENS); 107 + } 108 + return newsk; 109 + } 110 + 111 + /* 112 + * Process an incoming packet for RESPOND sockets represented 113 + * as an request_sock. 114 + */ 115 + struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb, 116 + struct request_sock *req, 117 + struct request_sock **prev) 118 + { 119 + struct sock *child = NULL; 120 + 121 + /* Check for retransmitted REQUEST */ 122 + if (dccp_hdr(skb)->dccph_type == DCCP_PKT_REQUEST) { 123 + if (after48(DCCP_SKB_CB(skb)->dccpd_seq, dccp_rsk(req)->dreq_isr)) { 124 + struct dccp_request_sock *dreq = dccp_rsk(req); 125 + 126 + dccp_pr_debug("Retransmitted REQUEST\n"); 127 + /* Send another RESPONSE packet */ 128 + dccp_set_seqno(&dreq->dreq_iss, dreq->dreq_iss + 1); 129 + dccp_set_seqno(&dreq->dreq_isr, DCCP_SKB_CB(skb)->dccpd_seq); 130 + req->rsk_ops->rtx_syn_ack(sk, req, NULL); 131 + } 132 + /* Network Duplicate, discard packet */ 133 + return NULL; 134 + } 135 + 136 + DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_PACKET_ERROR; 137 + 138 + if (dccp_hdr(skb)->dccph_type != DCCP_PKT_ACK && 139 + dccp_hdr(skb)->dccph_type != DCCP_PKT_DATAACK) 140 + goto drop; 141 + 142 + /* Invalid ACK */ 143 + if (DCCP_SKB_CB(skb)->dccpd_ack_seq != dccp_rsk(req)->dreq_iss) { 144 + dccp_pr_debug("Invalid ACK number: ack_seq=%llu, dreq_iss=%llu\n", 145 + DCCP_SKB_CB(skb)->dccpd_ack_seq, dccp_rsk(req)->dreq_iss); 146 + goto drop; 147 + } 148 + 149 + child = dccp_v4_request_recv_sock(sk, skb, req, NULL); 150 + if (child == NULL) 151 + goto listen_overflow; 152 + 153 + /* FIXME: deal with options */ 154 + 155 + inet_csk_reqsk_queue_unlink(sk, req, prev); 156 + inet_csk_reqsk_queue_removed(sk, req); 157 + inet_csk_reqsk_queue_add(sk, req, child); 158 + out: 159 + return child; 160 + listen_overflow: 161 + dccp_pr_debug("listen_overflow!\n"); 162 + DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_TOO_BUSY; 163 + drop: 164 + if (dccp_hdr(skb)->dccph_type != DCCP_PKT_RESET) 165 + req->rsk_ops->send_reset(skb); 166 + 167 + inet_csk_reqsk_queue_drop(sk, req, prev); 168 + goto out; 169 + } 170 + 171 + /* 172 + * Queue segment on the new socket if the new socket is active, 173 + * otherwise we just shortcircuit this and continue with 174 + * the new socket. 175 + */ 176 + int dccp_child_process(struct sock *parent, struct sock *child, 177 + struct sk_buff *skb) 178 + { 179 + int ret = 0; 180 + const int state = child->sk_state; 181 + 182 + if (!sock_owned_by_user(child)) { 183 + ret = dccp_rcv_state_process(child, skb, dccp_hdr(skb), skb->len); 184 + 185 + /* Wakeup parent, send SIGIO */ 186 + if (state == DCCP_RESPOND && child->sk_state != state) 187 + parent->sk_data_ready(parent, 0); 188 + } else { 189 + /* Alas, it is possible again, because we do lookup 190 + * in main socket hash table and lock on listening 191 + * socket does not protect us more. 192 + */ 193 + sk_add_backlog(child, skb); 194 + } 195 + 196 + bh_unlock_sock(child); 197 + sock_put(child); 198 + return ret; 199 + }
+763
net/dccp/options.c
··· 1 + /* 2 + * net/dccp/options.c 3 + * 4 + * An implementation of the DCCP protocol 5 + * Aristeu Sergio Rozanski Filho <aris@cathedrallabs.org> 6 + * Arnaldo Carvalho de Melo <acme@ghostprotocols.net> 7 + * 8 + * This program is free software; you can redistribute it and/or 9 + * modify it under the terms of the GNU General Public License 10 + * as published by the Free Software Foundation; either version 11 + * 2 of the License, or (at your option) any later version. 12 + */ 13 + #include <linux/config.h> 14 + #include <linux/dccp.h> 15 + #include <linux/module.h> 16 + #include <linux/types.h> 17 + #include <linux/kernel.h> 18 + #include <linux/skbuff.h> 19 + 20 + #include "ccid.h" 21 + #include "dccp.h" 22 + 23 + static void dccp_ackpkts_check_rcv_ackvector(struct dccp_ackpkts *ap, 24 + struct sock *sk, 25 + const u64 ackno, 26 + const unsigned char len, 27 + const unsigned char *vector); 28 + 29 + /* stores the default values for new connection. may be changed with sysctl */ 30 + static const struct dccp_options dccpo_default_values = { 31 + .dccpo_sequence_window = DCCPF_INITIAL_SEQUENCE_WINDOW, 32 + .dccpo_ccid = DCCPF_INITIAL_CCID, 33 + .dccpo_send_ack_vector = DCCPF_INITIAL_SEND_ACK_VECTOR, 34 + .dccpo_send_ndp_count = DCCPF_INITIAL_SEND_NDP_COUNT, 35 + }; 36 + 37 + void dccp_options_init(struct dccp_options *dccpo) 38 + { 39 + memcpy(dccpo, &dccpo_default_values, sizeof(*dccpo)); 40 + } 41 + 42 + static u32 dccp_decode_value_var(const unsigned char *bf, const u8 len) 43 + { 44 + u32 value = 0; 45 + 46 + if (len > 3) 47 + value += *bf++ << 24; 48 + if (len > 2) 49 + value += *bf++ << 16; 50 + if (len > 1) 51 + value += *bf++ << 8; 52 + if (len > 0) 53 + value += *bf; 54 + 55 + return value; 56 + } 57 + 58 + int dccp_parse_options(struct sock *sk, struct sk_buff *skb) 59 + { 60 + struct dccp_sock *dp = dccp_sk(sk); 61 + #ifdef DCCP_DEBUG 62 + const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? "CLIENT rx opt: " : 63 + "server rx opt: "; 64 + #endif 65 + const struct dccp_hdr *dh = dccp_hdr(skb); 66 + const u8 pkt_type = DCCP_SKB_CB(skb)->dccpd_type; 67 + unsigned char *options = (unsigned char *)dh + dccp_hdr_len(skb); 68 + unsigned char *opt_ptr = options; 69 + const unsigned char *opt_end = (unsigned char *)dh + (dh->dccph_doff * 4); 70 + struct dccp_options_received *opt_recv = &dp->dccps_options_received; 71 + unsigned char opt, len; 72 + unsigned char *value; 73 + 74 + memset(opt_recv, 0, sizeof(*opt_recv)); 75 + 76 + while (opt_ptr != opt_end) { 77 + opt = *opt_ptr++; 78 + len = 0; 79 + value = NULL; 80 + 81 + /* Check if this isn't a single byte option */ 82 + if (opt > DCCPO_MAX_RESERVED) { 83 + if (opt_ptr == opt_end) 84 + goto out_invalid_option; 85 + 86 + len = *opt_ptr++; 87 + if (len < 3) 88 + goto out_invalid_option; 89 + /* 90 + * Remove the type and len fields, leaving 91 + * just the value size 92 + */ 93 + len -= 2; 94 + value = opt_ptr; 95 + opt_ptr += len; 96 + 97 + if (opt_ptr > opt_end) 98 + goto out_invalid_option; 99 + } 100 + 101 + switch (opt) { 102 + case DCCPO_PADDING: 103 + break; 104 + case DCCPO_NDP_COUNT: 105 + if (len > 3) 106 + goto out_invalid_option; 107 + 108 + opt_recv->dccpor_ndp = dccp_decode_value_var(value, len); 109 + dccp_pr_debug("%sNDP count=%d\n", debug_prefix, opt_recv->dccpor_ndp); 110 + break; 111 + case DCCPO_ACK_VECTOR_0: 112 + if (len > DCCP_MAX_ACK_VECTOR_LEN) 113 + goto out_invalid_option; 114 + 115 + if (pkt_type == DCCP_PKT_DATA) 116 + continue; 117 + 118 + opt_recv->dccpor_ack_vector_len = len; 119 + opt_recv->dccpor_ack_vector_idx = value - options; 120 + 121 + dccp_pr_debug("%sACK vector 0, len=%d, ack_ackno=%llu\n", 122 + debug_prefix, len, DCCP_SKB_CB(skb)->dccpd_ack_seq); 123 + dccp_ackvector_print(DCCP_SKB_CB(skb)->dccpd_ack_seq, 124 + value, len); 125 + dccp_ackpkts_check_rcv_ackvector(dp->dccps_hc_rx_ackpkts, sk, 126 + DCCP_SKB_CB(skb)->dccpd_ack_seq, 127 + len, value); 128 + break; 129 + case DCCPO_TIMESTAMP: 130 + if (len != 4) 131 + goto out_invalid_option; 132 + 133 + opt_recv->dccpor_timestamp = ntohl(*(u32 *)value); 134 + 135 + dp->dccps_timestamp_echo = opt_recv->dccpor_timestamp; 136 + dp->dccps_timestamp_time = jiffies; 137 + 138 + dccp_pr_debug("%sTIMESTAMP=%u, ackno=%llu\n", 139 + debug_prefix, opt_recv->dccpor_timestamp, 140 + DCCP_SKB_CB(skb)->dccpd_ack_seq); 141 + break; 142 + case DCCPO_TIMESTAMP_ECHO: 143 + if (len < 4 || len > 8) 144 + goto out_invalid_option; 145 + 146 + opt_recv->dccpor_timestamp_echo = ntohl(*(u32 *)value); 147 + 148 + dccp_pr_debug("%sTIMESTAMP_ECHO=%u, len=%d, ackno=%llu, diff=%u\n", 149 + debug_prefix, opt_recv->dccpor_timestamp_echo, 150 + len + 2, DCCP_SKB_CB(skb)->dccpd_ack_seq, 151 + tcp_time_stamp - opt_recv->dccpor_timestamp_echo); 152 + 153 + opt_recv->dccpor_elapsed_time = dccp_decode_value_var(value + 4, len - 4); 154 + dccp_pr_debug("%sTIMESTAMP_ECHO ELAPSED_TIME=%d\n", debug_prefix, 155 + opt_recv->dccpor_elapsed_time); 156 + break; 157 + case DCCPO_ELAPSED_TIME: 158 + if (len > 4) 159 + goto out_invalid_option; 160 + 161 + if (pkt_type == DCCP_PKT_DATA) 162 + continue; 163 + opt_recv->dccpor_elapsed_time = dccp_decode_value_var(value, len); 164 + dccp_pr_debug("%sELAPSED_TIME=%d\n", debug_prefix, 165 + opt_recv->dccpor_elapsed_time); 166 + break; 167 + /* 168 + * From draft-ietf-dccp-spec-11.txt: 169 + * 170 + * Option numbers 128 through 191 are for options sent from the HC- 171 + * Sender to the HC-Receiver; option numbers 192 through 255 are for 172 + * options sent from the HC-Receiver to the HC-Sender. 173 + */ 174 + case 128 ... 191: { 175 + const u16 idx = value - options; 176 + 177 + if (ccid_hc_rx_parse_options(dp->dccps_hc_rx_ccid, sk, opt, len, idx, value) != 0) 178 + goto out_invalid_option; 179 + } 180 + break; 181 + case 192 ... 255: { 182 + const u16 idx = value - options; 183 + 184 + if (ccid_hc_tx_parse_options(dp->dccps_hc_tx_ccid, sk, opt, len, idx, value) != 0) 185 + goto out_invalid_option; 186 + } 187 + break; 188 + default: 189 + pr_info("DCCP(%p): option %d(len=%d) not implemented, ignoring\n", 190 + sk, opt, len); 191 + break; 192 + } 193 + } 194 + 195 + return 0; 196 + 197 + out_invalid_option: 198 + DCCP_INC_STATS_BH(DCCP_MIB_INVALIDOPT); 199 + DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_OPTION_ERROR; 200 + pr_info("DCCP(%p): invalid option %d, len=%d\n", sk, opt, len); 201 + return -1; 202 + } 203 + 204 + static void dccp_encode_value_var(const u32 value, unsigned char *to, 205 + const unsigned int len) 206 + { 207 + if (len > 3) 208 + *to++ = (value & 0xFF000000) >> 24; 209 + if (len > 2) 210 + *to++ = (value & 0xFF0000) >> 16; 211 + if (len > 1) 212 + *to++ = (value & 0xFF00) >> 8; 213 + if (len > 0) 214 + *to++ = (value & 0xFF); 215 + } 216 + 217 + static inline int dccp_ndp_len(const int ndp) 218 + { 219 + return likely(ndp <= 0xFF) ? 1 : ndp <= 0xFFFF ? 2 : 3; 220 + } 221 + 222 + void dccp_insert_option(struct sock *sk, struct sk_buff *skb, 223 + const unsigned char option, 224 + const void *value, const unsigned char len) 225 + { 226 + unsigned char *to; 227 + 228 + if (DCCP_SKB_CB(skb)->dccpd_opt_len + len + 2 > DCCP_MAX_OPT_LEN) { 229 + LIMIT_NETDEBUG(pr_info("DCCP: packet too small to insert %d option!\n", option)); 230 + return; 231 + } 232 + 233 + DCCP_SKB_CB(skb)->dccpd_opt_len += len + 2; 234 + 235 + to = skb_push(skb, len + 2); 236 + *to++ = option; 237 + *to++ = len + 2; 238 + 239 + memcpy(to, value, len); 240 + } 241 + 242 + EXPORT_SYMBOL_GPL(dccp_insert_option); 243 + 244 + static void dccp_insert_option_ndp(struct sock *sk, struct sk_buff *skb) 245 + { 246 + struct dccp_sock *dp = dccp_sk(sk); 247 + int ndp = dp->dccps_ndp_count; 248 + 249 + if (dccp_non_data_packet(skb)) 250 + ++dp->dccps_ndp_count; 251 + else 252 + dp->dccps_ndp_count = 0; 253 + 254 + if (ndp > 0) { 255 + unsigned char *ptr; 256 + const int ndp_len = dccp_ndp_len(ndp); 257 + const int len = ndp_len + 2; 258 + 259 + if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) 260 + return; 261 + 262 + DCCP_SKB_CB(skb)->dccpd_opt_len += len; 263 + 264 + ptr = skb_push(skb, len); 265 + *ptr++ = DCCPO_NDP_COUNT; 266 + *ptr++ = len; 267 + dccp_encode_value_var(ndp, ptr, ndp_len); 268 + } 269 + } 270 + 271 + static inline int dccp_elapsed_time_len(const u32 elapsed_time) 272 + { 273 + return elapsed_time == 0 ? 0 : 274 + elapsed_time <= 0xFF ? 1 : 275 + elapsed_time <= 0xFFFF ? 2 : 276 + elapsed_time <= 0xFFFFFF ? 3 : 4; 277 + } 278 + 279 + void dccp_insert_option_elapsed_time(struct sock *sk, 280 + struct sk_buff *skb, 281 + u32 elapsed_time) 282 + { 283 + #ifdef DCCP_DEBUG 284 + struct dccp_sock *dp = dccp_sk(sk); 285 + const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? "CLIENT TX opt: " : 286 + "server TX opt: "; 287 + #endif 288 + const int elapsed_time_len = dccp_elapsed_time_len(elapsed_time); 289 + const int len = 2 + elapsed_time_len; 290 + unsigned char *to; 291 + 292 + /* If elapsed_time == 0... */ 293 + if (elapsed_time_len == 2) 294 + return; 295 + 296 + if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) { 297 + LIMIT_NETDEBUG(pr_info("DCCP: packet too small to insert elapsed time!\n")); 298 + return; 299 + } 300 + 301 + DCCP_SKB_CB(skb)->dccpd_opt_len += len; 302 + 303 + to = skb_push(skb, len); 304 + *to++ = DCCPO_ELAPSED_TIME; 305 + *to++ = len; 306 + 307 + dccp_encode_value_var(elapsed_time, to, elapsed_time_len); 308 + 309 + dccp_pr_debug("%sELAPSED_TIME=%u, len=%d, seqno=%llu\n", 310 + debug_prefix, elapsed_time, 311 + len, DCCP_SKB_CB(skb)->dccpd_seq); 312 + } 313 + 314 + EXPORT_SYMBOL(dccp_insert_option_elapsed_time); 315 + 316 + static void dccp_insert_option_ack_vector(struct sock *sk, struct sk_buff *skb) 317 + { 318 + struct dccp_sock *dp = dccp_sk(sk); 319 + #ifdef DCCP_DEBUG 320 + const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? "CLIENT TX opt: " : 321 + "server TX opt: "; 322 + #endif 323 + struct dccp_ackpkts *ap = dp->dccps_hc_rx_ackpkts; 324 + int len = ap->dccpap_buf_vector_len + 2; 325 + const u32 elapsed_time = jiffies_to_usecs(jiffies - ap->dccpap_time) / 10; 326 + unsigned char *to, *from; 327 + 328 + if (elapsed_time != 0) 329 + dccp_insert_option_elapsed_time(sk, skb, elapsed_time); 330 + 331 + if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) { 332 + LIMIT_NETDEBUG(pr_info("DCCP: packet too small to insert ACK Vector!\n")); 333 + return; 334 + } 335 + 336 + /* 337 + * XXX: now we have just one ack vector sent record, so 338 + * we have to wait for it to be cleared. 339 + * 340 + * Of course this is not acceptable, but this is just for 341 + * basic testing now. 342 + */ 343 + if (ap->dccpap_ack_seqno != DCCP_MAX_SEQNO + 1) 344 + return; 345 + 346 + DCCP_SKB_CB(skb)->dccpd_opt_len += len; 347 + 348 + to = skb_push(skb, len); 349 + *to++ = DCCPO_ACK_VECTOR_0; 350 + *to++ = len; 351 + 352 + len = ap->dccpap_buf_vector_len; 353 + from = ap->dccpap_buf + ap->dccpap_buf_head; 354 + 355 + /* Check if buf_head wraps */ 356 + if (ap->dccpap_buf_head + len > ap->dccpap_buf_len) { 357 + const unsigned int tailsize = ap->dccpap_buf_len - ap->dccpap_buf_head; 358 + 359 + memcpy(to, from, tailsize); 360 + to += tailsize; 361 + len -= tailsize; 362 + from = ap->dccpap_buf; 363 + } 364 + 365 + memcpy(to, from, len); 366 + /* 367 + * From draft-ietf-dccp-spec-11.txt: 368 + * 369 + * For each acknowledgement it sends, the HC-Receiver will add an 370 + * acknowledgement record. ack_seqno will equal the HC-Receiver 371 + * sequence number it used for the ack packet; ack_ptr will equal 372 + * buf_head; ack_ackno will equal buf_ackno; and ack_nonce will equal 373 + * buf_nonce. 374 + * 375 + * This implemention uses just one ack record for now. 376 + */ 377 + ap->dccpap_ack_seqno = DCCP_SKB_CB(skb)->dccpd_seq; 378 + ap->dccpap_ack_ptr = ap->dccpap_buf_head; 379 + ap->dccpap_ack_ackno = ap->dccpap_buf_ackno; 380 + ap->dccpap_ack_nonce = ap->dccpap_buf_nonce; 381 + ap->dccpap_ack_vector_len = ap->dccpap_buf_vector_len; 382 + 383 + dccp_pr_debug("%sACK Vector 0, len=%d, ack_seqno=%llu, ack_ackno=%llu\n", 384 + debug_prefix, ap->dccpap_ack_vector_len, 385 + ap->dccpap_ack_seqno, ap->dccpap_ack_ackno); 386 + } 387 + 388 + static inline void dccp_insert_option_timestamp(struct sock *sk, struct sk_buff *skb) 389 + { 390 + const u32 now = htonl(tcp_time_stamp); 391 + dccp_insert_option(sk, skb, DCCPO_TIMESTAMP, &now, sizeof(now)); 392 + } 393 + 394 + static void dccp_insert_option_timestamp_echo(struct sock *sk, struct sk_buff *skb) 395 + { 396 + struct dccp_sock *dp = dccp_sk(sk); 397 + #ifdef DCCP_DEBUG 398 + const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? "CLIENT TX opt: " : 399 + "server TX opt: "; 400 + #endif 401 + u32 tstamp_echo; 402 + const u32 elapsed_time = jiffies_to_usecs(jiffies - dp->dccps_timestamp_time) / 10; 403 + const int elapsed_time_len = dccp_elapsed_time_len(elapsed_time); 404 + const int len = 6 + elapsed_time_len; 405 + unsigned char *to; 406 + 407 + if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) { 408 + LIMIT_NETDEBUG(pr_info("DCCP: packet too small to insert timestamp echo!\n")); 409 + return; 410 + } 411 + 412 + DCCP_SKB_CB(skb)->dccpd_opt_len += len; 413 + 414 + to = skb_push(skb, len); 415 + *to++ = DCCPO_TIMESTAMP_ECHO; 416 + *to++ = len; 417 + 418 + tstamp_echo = htonl(dp->dccps_timestamp_echo); 419 + memcpy(to, &tstamp_echo, 4); 420 + to += 4; 421 + dccp_encode_value_var(elapsed_time, to, elapsed_time_len); 422 + 423 + dccp_pr_debug("%sTIMESTAMP_ECHO=%u, len=%d, seqno=%llu\n", 424 + debug_prefix, dp->dccps_timestamp_echo, 425 + len, DCCP_SKB_CB(skb)->dccpd_seq); 426 + 427 + dp->dccps_timestamp_echo = 0; 428 + dp->dccps_timestamp_time = 0; 429 + } 430 + 431 + void dccp_insert_options(struct sock *sk, struct sk_buff *skb) 432 + { 433 + struct dccp_sock *dp = dccp_sk(sk); 434 + 435 + DCCP_SKB_CB(skb)->dccpd_opt_len = 0; 436 + 437 + if (dp->dccps_options.dccpo_send_ndp_count) 438 + dccp_insert_option_ndp(sk, skb); 439 + 440 + if (!dccp_packet_without_ack(skb)) { 441 + if (dp->dccps_options.dccpo_send_ack_vector && 442 + dp->dccps_hc_rx_ackpkts->dccpap_buf_ackno != DCCP_MAX_SEQNO + 1) 443 + dccp_insert_option_ack_vector(sk, skb); 444 + 445 + dccp_insert_option_timestamp(sk, skb); 446 + if (dp->dccps_timestamp_echo != 0) 447 + dccp_insert_option_timestamp_echo(sk, skb); 448 + } 449 + 450 + ccid_hc_rx_insert_options(dp->dccps_hc_rx_ccid, sk, skb); 451 + ccid_hc_tx_insert_options(dp->dccps_hc_tx_ccid, sk, skb); 452 + 453 + /* XXX: insert other options when appropriate */ 454 + 455 + if (DCCP_SKB_CB(skb)->dccpd_opt_len != 0) { 456 + /* The length of all options has to be a multiple of 4 */ 457 + int padding = DCCP_SKB_CB(skb)->dccpd_opt_len % 4; 458 + 459 + if (padding != 0) { 460 + padding = 4 - padding; 461 + memset(skb_push(skb, padding), 0, padding); 462 + DCCP_SKB_CB(skb)->dccpd_opt_len += padding; 463 + } 464 + } 465 + } 466 + 467 + struct dccp_ackpkts *dccp_ackpkts_alloc(unsigned int len, int priority) 468 + { 469 + struct dccp_ackpkts *ap = kmalloc(sizeof(*ap) + len, priority); 470 + 471 + if (ap != NULL) { 472 + #ifdef DCCP_DEBUG 473 + memset(ap->dccpap_buf, 0xFF, len); 474 + #endif 475 + ap->dccpap_buf_len = len; 476 + ap->dccpap_buf_head = ap->dccpap_buf_tail = ap->dccpap_buf_len - 1; 477 + ap->dccpap_buf_ackno = ap->dccpap_ack_ackno = ap->dccpap_ack_seqno = DCCP_MAX_SEQNO + 1; 478 + ap->dccpap_buf_nonce = ap->dccpap_buf_nonce = 0; 479 + ap->dccpap_ack_ptr = 0; 480 + ap->dccpap_time = 0; 481 + ap->dccpap_buf_vector_len = ap->dccpap_ack_vector_len = 0; 482 + } 483 + 484 + return ap; 485 + } 486 + 487 + void dccp_ackpkts_free(struct dccp_ackpkts *ap) 488 + { 489 + if (ap != NULL) { 490 + #ifdef DCCP_DEBUG 491 + memset(ap, 0xFF, sizeof(*ap) + ap->dccpap_buf_len); 492 + #endif 493 + kfree(ap); 494 + } 495 + } 496 + 497 + static inline u8 dccp_ackpkts_state(const struct dccp_ackpkts *ap, 498 + const unsigned int index) 499 + { 500 + return ap->dccpap_buf[index] & DCCP_ACKPKTS_STATE_MASK; 501 + } 502 + 503 + static inline u8 dccp_ackpkts_len(const struct dccp_ackpkts *ap, 504 + const unsigned int index) 505 + { 506 + return ap->dccpap_buf[index] & DCCP_ACKPKTS_LEN_MASK; 507 + } 508 + 509 + /* 510 + * If several packets are missing, the HC-Receiver may prefer to enter multiple 511 + * bytes with run length 0, rather than a single byte with a larger run length; 512 + * this simplifies table updates if one of the missing packets arrives. 513 + */ 514 + static inline int dccp_ackpkts_set_buf_head_state(struct dccp_ackpkts *ap, 515 + const unsigned int packets, 516 + const unsigned char state) 517 + { 518 + unsigned int gap; 519 + signed long new_head; 520 + 521 + if (ap->dccpap_buf_vector_len + packets > ap->dccpap_buf_len) 522 + return -ENOBUFS; 523 + 524 + gap = packets - 1; 525 + new_head = ap->dccpap_buf_head - packets; 526 + 527 + if (new_head < 0) { 528 + if (gap > 0) { 529 + memset(ap->dccpap_buf, DCCP_ACKPKTS_STATE_NOT_RECEIVED, 530 + gap + new_head + 1); 531 + gap = -new_head; 532 + } 533 + new_head += ap->dccpap_buf_len; 534 + } 535 + 536 + ap->dccpap_buf_head = new_head; 537 + 538 + if (gap > 0) 539 + memset(ap->dccpap_buf + ap->dccpap_buf_head + 1, 540 + DCCP_ACKPKTS_STATE_NOT_RECEIVED, gap); 541 + 542 + ap->dccpap_buf[ap->dccpap_buf_head] = state; 543 + ap->dccpap_buf_vector_len += packets; 544 + return 0; 545 + } 546 + 547 + /* 548 + * Implements the draft-ietf-dccp-spec-11.txt Appendix A 549 + */ 550 + int dccp_ackpkts_add(struct dccp_ackpkts *ap, u64 ackno, u8 state) 551 + { 552 + /* 553 + * Check at the right places if the buffer is full, if it is, tell the 554 + * caller to start dropping packets till the HC-Sender acks our ACK 555 + * vectors, when we will free up space in dccpap_buf. 556 + * 557 + * We may well decide to do buffer compression, etc, but for now lets 558 + * just drop. 559 + * 560 + * From Appendix A: 561 + * 562 + * Of course, the circular buffer may overflow, either when the HC- 563 + * Sender is sending data at a very high rate, when the HC-Receiver's 564 + * acknowledgements are not reaching the HC-Sender, or when the HC- 565 + * Sender is forgetting to acknowledge those acks (so the HC-Receiver 566 + * is unable to clean up old state). In this case, the HC-Receiver 567 + * should either compress the buffer (by increasing run lengths when 568 + * possible), transfer its state to a larger buffer, or, as a last 569 + * resort, drop all received packets, without processing them 570 + * whatsoever, until its buffer shrinks again. 571 + */ 572 + 573 + /* See if this is the first ackno being inserted */ 574 + if (ap->dccpap_buf_vector_len == 0) { 575 + ap->dccpap_buf[ap->dccpap_buf_head] = state; 576 + ap->dccpap_buf_vector_len = 1; 577 + } else if (after48(ackno, ap->dccpap_buf_ackno)) { 578 + const u64 delta = dccp_delta_seqno(ap->dccpap_buf_ackno, ackno); 579 + 580 + /* 581 + * Look if the state of this packet is the same as the previous ackno 582 + * and if so if we can bump the head len. 583 + */ 584 + if (delta == 1 && 585 + dccp_ackpkts_state(ap, ap->dccpap_buf_head) == state && 586 + dccp_ackpkts_len(ap, ap->dccpap_buf_head) < DCCP_ACKPKTS_LEN_MASK) 587 + ap->dccpap_buf[ap->dccpap_buf_head]++; 588 + else if (dccp_ackpkts_set_buf_head_state(ap, delta, state)) 589 + return -ENOBUFS; 590 + } else { 591 + /* 592 + * A.1.2. Old Packets 593 + * 594 + * When a packet with Sequence Number S arrives, and S <= buf_ackno, 595 + * the HC-Receiver will scan the table for the byte corresponding to S. 596 + * (Indexing structures could reduce the complexity of this scan.) 597 + */ 598 + u64 delta = dccp_delta_seqno(ackno, ap->dccpap_buf_ackno); 599 + unsigned int index = ap->dccpap_buf_head; 600 + 601 + while (1) { 602 + const u8 len = dccp_ackpkts_len(ap, index); 603 + const u8 state = dccp_ackpkts_state(ap, index); 604 + /* 605 + * valid packets not yet in dccpap_buf have a reserved entry, with 606 + * a len equal to 0 607 + */ 608 + if (state == DCCP_ACKPKTS_STATE_NOT_RECEIVED && 609 + len == 0 && delta == 0) { /* Found our reserved seat! */ 610 + dccp_pr_debug("Found %llu reserved seat!\n", ackno); 611 + ap->dccpap_buf[index] = state; 612 + goto out; 613 + } 614 + /* len == 0 means one packet */ 615 + if (delta < len + 1) 616 + goto out_duplicate; 617 + 618 + delta -= len + 1; 619 + if (++index == ap->dccpap_buf_len) 620 + index = 0; 621 + } 622 + } 623 + 624 + ap->dccpap_buf_ackno = ackno; 625 + ap->dccpap_time = jiffies; 626 + out: 627 + dccp_pr_debug(""); 628 + dccp_ackpkts_print(ap); 629 + return 0; 630 + 631 + out_duplicate: 632 + /* Duplicate packet */ 633 + dccp_pr_debug("Received a dup or already considered lost packet: %llu\n", ackno); 634 + return -EILSEQ; 635 + } 636 + 637 + #ifdef DCCP_DEBUG 638 + void dccp_ackvector_print(const u64 ackno, const unsigned char *vector, int len) 639 + { 640 + if (!dccp_debug) 641 + return; 642 + 643 + printk("ACK vector len=%d, ackno=%llu |", len, ackno); 644 + 645 + while (len--) { 646 + const u8 state = (*vector & DCCP_ACKPKTS_STATE_MASK) >> 6; 647 + const u8 rl = (*vector & DCCP_ACKPKTS_LEN_MASK); 648 + 649 + printk("%d,%d|", state, rl); 650 + ++vector; 651 + } 652 + 653 + printk("\n"); 654 + } 655 + 656 + void dccp_ackpkts_print(const struct dccp_ackpkts *ap) 657 + { 658 + dccp_ackvector_print(ap->dccpap_buf_ackno, 659 + ap->dccpap_buf + ap->dccpap_buf_head, 660 + ap->dccpap_buf_vector_len); 661 + } 662 + #endif 663 + 664 + static void dccp_ackpkts_trow_away_ack_record(struct dccp_ackpkts *ap) 665 + { 666 + /* 667 + * As we're keeping track of the ack vector size 668 + * (dccpap_buf_vector_len) and the sent ack vector size 669 + * (dccpap_ack_vector_len) we don't need dccpap_buf_tail at all, but 670 + * keep this code here as in the future we'll implement a vector of ack 671 + * records, as suggested in draft-ietf-dccp-spec-11.txt Appendix A. -acme 672 + */ 673 + #if 0 674 + ap->dccpap_buf_tail = ap->dccpap_ack_ptr + 1; 675 + if (ap->dccpap_buf_tail >= ap->dccpap_buf_len) 676 + ap->dccpap_buf_tail -= ap->dccpap_buf_len; 677 + #endif 678 + ap->dccpap_buf_vector_len -= ap->dccpap_ack_vector_len; 679 + } 680 + 681 + void dccp_ackpkts_check_rcv_ackno(struct dccp_ackpkts *ap, struct sock *sk, 682 + u64 ackno) 683 + { 684 + /* Check if we actually sent an ACK vector */ 685 + if (ap->dccpap_ack_seqno == DCCP_MAX_SEQNO + 1) 686 + return; 687 + 688 + if (ackno == ap->dccpap_ack_seqno) { 689 + #ifdef DCCP_DEBUG 690 + struct dccp_sock *dp = dccp_sk(sk); 691 + const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? "CLIENT rx ack: " : 692 + "server rx ack: "; 693 + #endif 694 + dccp_pr_debug("%sACK packet 0, len=%d, ack_seqno=%llu, ack_ackno=%llu, ACKED!\n", 695 + debug_prefix, 1, 696 + ap->dccpap_ack_seqno, ap->dccpap_ack_ackno); 697 + dccp_ackpkts_trow_away_ack_record(ap); 698 + ap->dccpap_ack_seqno = DCCP_MAX_SEQNO + 1; 699 + } 700 + } 701 + 702 + static void dccp_ackpkts_check_rcv_ackvector(struct dccp_ackpkts *ap, 703 + struct sock *sk, u64 ackno, 704 + const unsigned char len, 705 + const unsigned char *vector) 706 + { 707 + unsigned char i; 708 + 709 + /* Check if we actually sent an ACK vector */ 710 + if (ap->dccpap_ack_seqno == DCCP_MAX_SEQNO + 1) 711 + return; 712 + /* 713 + * We're in the receiver half connection, so if the received an ACK vector 714 + * ackno (e.g. 50) before dccpap_ack_seqno (e.g. 52), we're not interested. 715 + * 716 + * Extra explanation with example: 717 + * 718 + * if we received an ACK vector with ackno 50, it can only be acking 719 + * 50, 49, 48, etc, not 52 (the seqno for the ACK vector we sent). 720 + */ 721 + // dccp_pr_debug("is %llu < %llu? ", ackno, ap->dccpap_ack_seqno); 722 + if (before48(ackno, ap->dccpap_ack_seqno)) { 723 + // dccp_pr_debug_cat("yes\n"); 724 + return; 725 + } 726 + // dccp_pr_debug_cat("no\n"); 727 + 728 + i = len; 729 + while (i--) { 730 + const u8 rl = (*vector & DCCP_ACKPKTS_LEN_MASK); 731 + u64 ackno_end_rl; 732 + 733 + dccp_set_seqno(&ackno_end_rl, ackno - rl); 734 + 735 + // dccp_pr_debug("is %llu <= %llu <= %llu? ", ackno_end_rl, ap->dccpap_ack_seqno, ackno); 736 + if (between48(ap->dccpap_ack_seqno, ackno_end_rl, ackno)) { 737 + const u8 state = (*vector & DCCP_ACKPKTS_STATE_MASK) >> 6; 738 + // dccp_pr_debug_cat("yes\n"); 739 + 740 + if (state != DCCP_ACKPKTS_STATE_NOT_RECEIVED) { 741 + #ifdef DCCP_DEBUG 742 + struct dccp_sock *dp = dccp_sk(sk); 743 + const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? "CLIENT rx ack: " : 744 + "server rx ack: "; 745 + #endif 746 + dccp_pr_debug("%sACK vector 0, len=%d, ack_seqno=%llu, ack_ackno=%llu, ACKED!\n", 747 + debug_prefix, len, 748 + ap->dccpap_ack_seqno, ap->dccpap_ack_ackno); 749 + dccp_ackpkts_trow_away_ack_record(ap); 750 + } 751 + /* 752 + * If dccpap_ack_seqno was not received, no problem we'll 753 + * send another ACK vector. 754 + */ 755 + ap->dccpap_ack_seqno = DCCP_MAX_SEQNO + 1; 756 + break; 757 + } 758 + // dccp_pr_debug_cat("no\n"); 759 + 760 + dccp_set_seqno(&ackno, ackno_end_rl - 1); 761 + ++vector; 762 + } 763 + }
+406
net/dccp/output.c
··· 1 + /* 2 + * net/dccp/output.c 3 + * 4 + * An implementation of the DCCP protocol 5 + * Arnaldo Carvalho de Melo <acme@conectiva.com.br> 6 + * 7 + * This program is free software; you can redistribute it and/or 8 + * modify it under the terms of the GNU General Public License 9 + * as published by the Free Software Foundation; either version 10 + * 2 of the License, or (at your option) any later version. 11 + */ 12 + 13 + #include <linux/config.h> 14 + #include <linux/dccp.h> 15 + #include <linux/skbuff.h> 16 + 17 + #include <net/sock.h> 18 + 19 + #include "ccid.h" 20 + #include "dccp.h" 21 + 22 + static inline void dccp_event_ack_sent(struct sock *sk) 23 + { 24 + inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK); 25 + } 26 + 27 + /* 28 + * All SKB's seen here are completely headerless. It is our 29 + * job to build the DCCP header, and pass the packet down to 30 + * IP so it can do the same plus pass the packet off to the 31 + * device. 32 + */ 33 + int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb) 34 + { 35 + if (likely(skb != NULL)) { 36 + const struct inet_sock *inet = inet_sk(sk); 37 + struct dccp_sock *dp = dccp_sk(sk); 38 + struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); 39 + struct dccp_hdr *dh; 40 + /* XXX For now we're using only 48 bits sequence numbers */ 41 + const int dccp_header_size = sizeof(*dh) + 42 + sizeof(struct dccp_hdr_ext) + 43 + dccp_packet_hdr_len(dcb->dccpd_type); 44 + int err, set_ack = 1; 45 + u64 ackno = dp->dccps_gsr; 46 + 47 + /* 48 + * FIXME: study DCCP_PKT_SYNC[ACK] to see what is the right thing 49 + * to do here... 50 + */ 51 + dccp_inc_seqno(&dp->dccps_gss); 52 + 53 + dcb->dccpd_seq = dp->dccps_gss; 54 + dccp_insert_options(sk, skb); 55 + 56 + switch (dcb->dccpd_type) { 57 + case DCCP_PKT_DATA: 58 + set_ack = 0; 59 + break; 60 + case DCCP_PKT_SYNC: 61 + case DCCP_PKT_SYNCACK: 62 + ackno = dcb->dccpd_seq; 63 + break; 64 + } 65 + 66 + skb->h.raw = skb_push(skb, dccp_header_size); 67 + dh = dccp_hdr(skb); 68 + /* Data packets are not cloned as they are never retransmitted */ 69 + if (skb_cloned(skb)) 70 + skb_set_owner_w(skb, sk); 71 + 72 + /* Build DCCP header and checksum it. */ 73 + memset(dh, 0, dccp_header_size); 74 + dh->dccph_type = dcb->dccpd_type; 75 + dh->dccph_sport = inet->sport; 76 + dh->dccph_dport = inet->dport; 77 + dh->dccph_doff = (dccp_header_size + dcb->dccpd_opt_len) / 4; 78 + dh->dccph_ccval = dcb->dccpd_ccval; 79 + /* XXX For now we're using only 48 bits sequence numbers */ 80 + dh->dccph_x = 1; 81 + 82 + dp->dccps_awh = dp->dccps_gss; 83 + dccp_hdr_set_seq(dh, dp->dccps_gss); 84 + if (set_ack) 85 + dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), ackno); 86 + 87 + switch (dcb->dccpd_type) { 88 + case DCCP_PKT_REQUEST: 89 + dccp_hdr_request(skb)->dccph_req_service = dcb->dccpd_service; 90 + break; 91 + case DCCP_PKT_RESET: 92 + dccp_hdr_reset(skb)->dccph_reset_code = dcb->dccpd_reset_code; 93 + break; 94 + } 95 + 96 + dh->dccph_checksum = dccp_v4_checksum(skb); 97 + 98 + if (dcb->dccpd_type == DCCP_PKT_ACK || 99 + dcb->dccpd_type == DCCP_PKT_DATAACK) 100 + dccp_event_ack_sent(sk); 101 + 102 + DCCP_INC_STATS(DCCP_MIB_OUTSEGS); 103 + 104 + err = ip_queue_xmit(skb, 0); 105 + if (err <= 0) 106 + return err; 107 + 108 + /* NET_XMIT_CN is special. It does not guarantee, 109 + * that this packet is lost. It tells that device 110 + * is about to start to drop packets or already 111 + * drops some packets of the same priority and 112 + * invokes us to send less aggressively. 113 + */ 114 + return err == NET_XMIT_CN ? 0 : err; 115 + } 116 + return -ENOBUFS; 117 + } 118 + 119 + unsigned int dccp_sync_mss(struct sock *sk, u32 pmtu) 120 + { 121 + struct dccp_sock *dp = dccp_sk(sk); 122 + int mss_now; 123 + 124 + /* 125 + * FIXME: we really should be using the af_specific thing to support IPv6. 126 + * mss_now = pmtu - tp->af_specific->net_header_len - sizeof(struct dccp_hdr) - sizeof(struct dccp_hdr_ext); 127 + */ 128 + mss_now = pmtu - sizeof(struct iphdr) - sizeof(struct dccp_hdr) - sizeof(struct dccp_hdr_ext); 129 + 130 + /* Now subtract optional transport overhead */ 131 + mss_now -= dp->dccps_ext_header_len; 132 + 133 + /* 134 + * FIXME: this should come from the CCID infrastructure, where, say, 135 + * TFRC will say it wants TIMESTAMPS, ELAPSED time, etc, for now lets 136 + * put a rough estimate for NDP + TIMESTAMP + TIMESTAMP_ECHO + ELAPSED 137 + * TIME + TFRC_OPT_LOSS_EVENT_RATE + TFRC_OPT_RECEIVE_RATE + padding to 138 + * make it a multiple of 4 139 + */ 140 + 141 + mss_now -= ((5 + 6 + 10 + 6 + 6 + 6 + 3) / 4) * 4; 142 + 143 + /* And store cached results */ 144 + dp->dccps_pmtu_cookie = pmtu; 145 + dp->dccps_mss_cache = mss_now; 146 + 147 + return mss_now; 148 + } 149 + 150 + int dccp_retransmit_skb(struct sock *sk, struct sk_buff *skb) 151 + { 152 + if (inet_sk_rebuild_header(sk) != 0) 153 + return -EHOSTUNREACH; /* Routing failure or similar. */ 154 + 155 + return dccp_transmit_skb(sk, (skb_cloned(skb) ? 156 + pskb_copy(skb, GFP_ATOMIC): 157 + skb_clone(skb, GFP_ATOMIC))); 158 + } 159 + 160 + struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst, 161 + struct request_sock *req) 162 + { 163 + struct dccp_hdr *dh; 164 + const int dccp_header_size = sizeof(struct dccp_hdr) + 165 + sizeof(struct dccp_hdr_ext) + 166 + sizeof(struct dccp_hdr_response); 167 + struct sk_buff *skb = sock_wmalloc(sk, MAX_HEADER + DCCP_MAX_OPT_LEN + 168 + dccp_header_size, 1, 169 + GFP_ATOMIC); 170 + if (skb == NULL) 171 + return NULL; 172 + 173 + /* Reserve space for headers. */ 174 + skb_reserve(skb, MAX_HEADER + DCCP_MAX_OPT_LEN + dccp_header_size); 175 + 176 + skb->dst = dst_clone(dst); 177 + skb->csum = 0; 178 + 179 + DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_RESPONSE; 180 + DCCP_SKB_CB(skb)->dccpd_seq = dccp_rsk(req)->dreq_iss; 181 + dccp_insert_options(sk, skb); 182 + 183 + skb->h.raw = skb_push(skb, dccp_header_size); 184 + 185 + dh = dccp_hdr(skb); 186 + memset(dh, 0, dccp_header_size); 187 + 188 + dh->dccph_sport = inet_sk(sk)->sport; 189 + dh->dccph_dport = inet_rsk(req)->rmt_port; 190 + dh->dccph_doff = (dccp_header_size + DCCP_SKB_CB(skb)->dccpd_opt_len) / 4; 191 + dh->dccph_type = DCCP_PKT_RESPONSE; 192 + dh->dccph_x = 1; 193 + dccp_hdr_set_seq(dh, dccp_rsk(req)->dreq_iss); 194 + dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), dccp_rsk(req)->dreq_isr); 195 + 196 + dh->dccph_checksum = dccp_v4_checksum(skb); 197 + 198 + DCCP_INC_STATS(DCCP_MIB_OUTSEGS); 199 + return skb; 200 + } 201 + 202 + struct sk_buff *dccp_make_reset(struct sock *sk, struct dst_entry *dst, 203 + const enum dccp_reset_codes code) 204 + 205 + { 206 + struct dccp_hdr *dh; 207 + struct dccp_sock *dp = dccp_sk(sk); 208 + const int dccp_header_size = sizeof(struct dccp_hdr) + 209 + sizeof(struct dccp_hdr_ext) + 210 + sizeof(struct dccp_hdr_reset); 211 + struct sk_buff *skb = sock_wmalloc(sk, MAX_HEADER + DCCP_MAX_OPT_LEN + 212 + dccp_header_size, 1, 213 + GFP_ATOMIC); 214 + if (skb == NULL) 215 + return NULL; 216 + 217 + /* Reserve space for headers. */ 218 + skb_reserve(skb, MAX_HEADER + DCCP_MAX_OPT_LEN + dccp_header_size); 219 + 220 + skb->dst = dst_clone(dst); 221 + skb->csum = 0; 222 + 223 + dccp_inc_seqno(&dp->dccps_gss); 224 + 225 + DCCP_SKB_CB(skb)->dccpd_reset_code = code; 226 + DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_RESET; 227 + DCCP_SKB_CB(skb)->dccpd_seq = dp->dccps_gss; 228 + dccp_insert_options(sk, skb); 229 + 230 + skb->h.raw = skb_push(skb, dccp_header_size); 231 + 232 + dh = dccp_hdr(skb); 233 + memset(dh, 0, dccp_header_size); 234 + 235 + dh->dccph_sport = inet_sk(sk)->sport; 236 + dh->dccph_dport = inet_sk(sk)->dport; 237 + dh->dccph_doff = (dccp_header_size + DCCP_SKB_CB(skb)->dccpd_opt_len) / 4; 238 + dh->dccph_type = DCCP_PKT_RESET; 239 + dh->dccph_x = 1; 240 + dccp_hdr_set_seq(dh, dp->dccps_gss); 241 + dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), dp->dccps_gsr); 242 + 243 + dccp_hdr_reset(skb)->dccph_reset_code = code; 244 + 245 + dh->dccph_checksum = dccp_v4_checksum(skb); 246 + 247 + DCCP_INC_STATS(DCCP_MIB_OUTSEGS); 248 + return skb; 249 + } 250 + 251 + /* 252 + * Do all connect socket setups that can be done AF independent. 253 + */ 254 + static inline void dccp_connect_init(struct sock *sk) 255 + { 256 + struct dst_entry *dst = __sk_dst_get(sk); 257 + struct inet_connection_sock *icsk = inet_csk(sk); 258 + 259 + sk->sk_err = 0; 260 + sock_reset_flag(sk, SOCK_DONE); 261 + 262 + dccp_sync_mss(sk, dst_mtu(dst)); 263 + 264 + /* 265 + * FIXME: set dp->{dccps_swh,dccps_swl}, with 266 + * something like dccp_inc_seq 267 + */ 268 + 269 + icsk->icsk_retransmits = 0; 270 + } 271 + 272 + int dccp_connect(struct sock *sk) 273 + { 274 + struct sk_buff *skb; 275 + struct inet_connection_sock *icsk = inet_csk(sk); 276 + 277 + dccp_connect_init(sk); 278 + 279 + skb = alloc_skb(MAX_DCCP_HEADER + 15, sk->sk_allocation); 280 + if (unlikely(skb == NULL)) 281 + return -ENOBUFS; 282 + 283 + /* Reserve space for headers. */ 284 + skb_reserve(skb, MAX_DCCP_HEADER); 285 + 286 + DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_REQUEST; 287 + /* FIXME: set service to something meaningful, coming 288 + * from userspace*/ 289 + DCCP_SKB_CB(skb)->dccpd_service = 0; 290 + skb->csum = 0; 291 + skb_set_owner_w(skb, sk); 292 + 293 + BUG_TRAP(sk->sk_send_head == NULL); 294 + sk->sk_send_head = skb; 295 + dccp_transmit_skb(sk, skb_clone(skb, GFP_KERNEL)); 296 + DCCP_INC_STATS(DCCP_MIB_ACTIVEOPENS); 297 + 298 + /* Timer for repeating the REQUEST until an answer. */ 299 + inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, TCP_RTO_MAX); 300 + return 0; 301 + } 302 + 303 + void dccp_send_ack(struct sock *sk) 304 + { 305 + /* If we have been reset, we may not send again. */ 306 + if (sk->sk_state != DCCP_CLOSED) { 307 + struct sk_buff *skb = alloc_skb(MAX_DCCP_HEADER, GFP_ATOMIC); 308 + 309 + if (skb == NULL) { 310 + inet_csk_schedule_ack(sk); 311 + inet_csk(sk)->icsk_ack.ato = TCP_ATO_MIN; 312 + inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, TCP_DELACK_MAX, TCP_RTO_MAX); 313 + return; 314 + } 315 + 316 + /* Reserve space for headers */ 317 + skb_reserve(skb, MAX_DCCP_HEADER); 318 + skb->csum = 0; 319 + DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_ACK; 320 + skb_set_owner_w(skb, sk); 321 + dccp_transmit_skb(sk, skb); 322 + } 323 + } 324 + 325 + EXPORT_SYMBOL_GPL(dccp_send_ack); 326 + 327 + void dccp_send_delayed_ack(struct sock *sk) 328 + { 329 + struct inet_connection_sock *icsk = inet_csk(sk); 330 + /* 331 + * FIXME: tune this timer. elapsed time fixes the skew, so no problem 332 + * with using 2s, and active senders also piggyback the ACK into a 333 + * DATAACK packet, so this is really for quiescent senders. 334 + */ 335 + unsigned long timeout = jiffies + 2 * HZ; 336 + 337 + /* Use new timeout only if there wasn't a older one earlier. */ 338 + if (icsk->icsk_ack.pending & ICSK_ACK_TIMER) { 339 + /* If delack timer was blocked or is about to expire, 340 + * send ACK now. 341 + * 342 + * FIXME: check the "about to expire" part 343 + */ 344 + if (icsk->icsk_ack.blocked) { 345 + dccp_send_ack(sk); 346 + return; 347 + } 348 + 349 + if (!time_before(timeout, icsk->icsk_ack.timeout)) 350 + timeout = icsk->icsk_ack.timeout; 351 + } 352 + icsk->icsk_ack.pending |= ICSK_ACK_SCHED | ICSK_ACK_TIMER; 353 + icsk->icsk_ack.timeout = timeout; 354 + sk_reset_timer(sk, &icsk->icsk_delack_timer, timeout); 355 + } 356 + 357 + void dccp_send_sync(struct sock *sk, u64 seq) 358 + { 359 + /* 360 + * We are not putting this on the write queue, so 361 + * dccp_transmit_skb() will set the ownership to this 362 + * sock. 363 + */ 364 + struct sk_buff *skb = alloc_skb(MAX_DCCP_HEADER, GFP_ATOMIC); 365 + 366 + if (skb == NULL) 367 + /* FIXME: how to make sure the sync is sent? */ 368 + return; 369 + 370 + /* Reserve space for headers and prepare control bits. */ 371 + skb_reserve(skb, MAX_DCCP_HEADER); 372 + skb->csum = 0; 373 + DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_SYNC; 374 + DCCP_SKB_CB(skb)->dccpd_seq = seq; 375 + 376 + skb_set_owner_w(skb, sk); 377 + dccp_transmit_skb(sk, skb); 378 + } 379 + 380 + /* Send a DCCP_PKT_CLOSE/CLOSEREQ. The caller locks the socket for us. This cannot be 381 + * allowed to fail queueing a DCCP_PKT_CLOSE/CLOSEREQ frame under any circumstances. 382 + */ 383 + void dccp_send_close(struct sock *sk) 384 + { 385 + struct dccp_sock *dp = dccp_sk(sk); 386 + struct sk_buff *skb; 387 + 388 + /* Socket is locked, keep trying until memory is available. */ 389 + for (;;) { 390 + skb = alloc_skb(sk->sk_prot->max_header, GFP_KERNEL); 391 + if (skb != NULL) 392 + break; 393 + yield(); 394 + } 395 + 396 + /* Reserve space for headers and prepare control bits. */ 397 + skb_reserve(skb, sk->sk_prot->max_header); 398 + skb->csum = 0; 399 + DCCP_SKB_CB(skb)->dccpd_type = dp->dccps_role == DCCP_ROLE_CLIENT ? DCCP_PKT_CLOSE : DCCP_PKT_CLOSEREQ; 400 + 401 + skb_set_owner_w(skb, sk); 402 + dccp_transmit_skb(sk, skb); 403 + 404 + ccid_hc_rx_exit(dp->dccps_hc_rx_ccid, sk); 405 + ccid_hc_tx_exit(dp->dccps_hc_tx_ccid, sk); 406 + }
+818
net/dccp/proto.c
··· 1 + /* 2 + * net/dccp/proto.c 3 + * 4 + * An implementation of the DCCP protocol 5 + * Arnaldo Carvalho de Melo <acme@conectiva.com.br> 6 + * 7 + * This program is free software; you can redistribute it and/or modify it 8 + * under the terms of the GNU General Public License version 2 as 9 + * published by the Free Software Foundation. 10 + */ 11 + 12 + #include <linux/config.h> 13 + #include <linux/dccp.h> 14 + #include <linux/module.h> 15 + #include <linux/types.h> 16 + #include <linux/sched.h> 17 + #include <linux/kernel.h> 18 + #include <linux/skbuff.h> 19 + #include <linux/netdevice.h> 20 + #include <linux/in.h> 21 + #include <linux/if_arp.h> 22 + #include <linux/init.h> 23 + #include <linux/random.h> 24 + #include <net/checksum.h> 25 + 26 + #include <net/inet_common.h> 27 + #include <net/ip.h> 28 + #include <net/protocol.h> 29 + #include <net/sock.h> 30 + #include <net/xfrm.h> 31 + 32 + #include <asm/semaphore.h> 33 + #include <linux/spinlock.h> 34 + #include <linux/timer.h> 35 + #include <linux/delay.h> 36 + #include <linux/poll.h> 37 + #include <linux/dccp.h> 38 + 39 + #include "ccid.h" 40 + #include "dccp.h" 41 + 42 + DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics); 43 + 44 + atomic_t dccp_orphan_count = ATOMIC_INIT(0); 45 + 46 + static struct net_protocol dccp_protocol = { 47 + .handler = dccp_v4_rcv, 48 + .err_handler = dccp_v4_err, 49 + }; 50 + 51 + const char *dccp_packet_name(const int type) 52 + { 53 + static const char *dccp_packet_names[] = { 54 + [DCCP_PKT_REQUEST] = "REQUEST", 55 + [DCCP_PKT_RESPONSE] = "RESPONSE", 56 + [DCCP_PKT_DATA] = "DATA", 57 + [DCCP_PKT_ACK] = "ACK", 58 + [DCCP_PKT_DATAACK] = "DATAACK", 59 + [DCCP_PKT_CLOSEREQ] = "CLOSEREQ", 60 + [DCCP_PKT_CLOSE] = "CLOSE", 61 + [DCCP_PKT_RESET] = "RESET", 62 + [DCCP_PKT_SYNC] = "SYNC", 63 + [DCCP_PKT_SYNCACK] = "SYNCACK", 64 + }; 65 + 66 + if (type >= DCCP_NR_PKT_TYPES) 67 + return "INVALID"; 68 + else 69 + return dccp_packet_names[type]; 70 + } 71 + 72 + EXPORT_SYMBOL_GPL(dccp_packet_name); 73 + 74 + const char *dccp_state_name(const int state) 75 + { 76 + static char *dccp_state_names[] = { 77 + [DCCP_OPEN] = "OPEN", 78 + [DCCP_REQUESTING] = "REQUESTING", 79 + [DCCP_PARTOPEN] = "PARTOPEN", 80 + [DCCP_LISTEN] = "LISTEN", 81 + [DCCP_RESPOND] = "RESPOND", 82 + [DCCP_CLOSING] = "CLOSING", 83 + [DCCP_TIME_WAIT] = "TIME_WAIT", 84 + [DCCP_CLOSED] = "CLOSED", 85 + }; 86 + 87 + if (state >= DCCP_MAX_STATES) 88 + return "INVALID STATE!"; 89 + else 90 + return dccp_state_names[state]; 91 + } 92 + 93 + EXPORT_SYMBOL_GPL(dccp_state_name); 94 + 95 + static inline int dccp_listen_start(struct sock *sk) 96 + { 97 + dccp_sk(sk)->dccps_role = DCCP_ROLE_LISTEN; 98 + return inet_csk_listen_start(sk, TCP_SYNQ_HSIZE); 99 + } 100 + 101 + int dccp_disconnect(struct sock *sk, int flags) 102 + { 103 + struct inet_connection_sock *icsk = inet_csk(sk); 104 + struct inet_sock *inet = inet_sk(sk); 105 + int err = 0; 106 + const int old_state = sk->sk_state; 107 + 108 + if (old_state != DCCP_CLOSED) 109 + dccp_set_state(sk, DCCP_CLOSED); 110 + 111 + /* ABORT function of RFC793 */ 112 + if (old_state == DCCP_LISTEN) { 113 + inet_csk_listen_stop(sk); 114 + /* FIXME: do the active reset thing */ 115 + } else if (old_state == DCCP_REQUESTING) 116 + sk->sk_err = ECONNRESET; 117 + 118 + dccp_clear_xmit_timers(sk); 119 + __skb_queue_purge(&sk->sk_receive_queue); 120 + if (sk->sk_send_head != NULL) { 121 + __kfree_skb(sk->sk_send_head); 122 + sk->sk_send_head = NULL; 123 + } 124 + 125 + inet->dport = 0; 126 + 127 + if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK)) 128 + inet_reset_saddr(sk); 129 + 130 + sk->sk_shutdown = 0; 131 + sock_reset_flag(sk, SOCK_DONE); 132 + 133 + icsk->icsk_backoff = 0; 134 + inet_csk_delack_init(sk); 135 + __sk_dst_reset(sk); 136 + 137 + BUG_TRAP(!inet->num || icsk->icsk_bind_hash); 138 + 139 + sk->sk_error_report(sk); 140 + return err; 141 + } 142 + 143 + int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg) 144 + { 145 + dccp_pr_debug("entry\n"); 146 + return -ENOIOCTLCMD; 147 + } 148 + 149 + int dccp_setsockopt(struct sock *sk, int level, int optname, 150 + char *optval, int optlen) 151 + { 152 + dccp_pr_debug("entry\n"); 153 + 154 + if (level != SOL_DCCP) 155 + return ip_setsockopt(sk, level, optname, optval, optlen); 156 + 157 + return -EOPNOTSUPP; 158 + } 159 + 160 + int dccp_getsockopt(struct sock *sk, int level, int optname, 161 + char *optval, int *optlen) 162 + { 163 + dccp_pr_debug("entry\n"); 164 + 165 + if (level != SOL_DCCP) 166 + return ip_getsockopt(sk, level, optname, optval, optlen); 167 + 168 + return -EOPNOTSUPP; 169 + } 170 + 171 + int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, 172 + size_t len) 173 + { 174 + const struct dccp_sock *dp = dccp_sk(sk); 175 + const int flags = msg->msg_flags; 176 + const int noblock = flags & MSG_DONTWAIT; 177 + struct sk_buff *skb; 178 + int rc, size; 179 + long timeo; 180 + 181 + if (len > dp->dccps_mss_cache) 182 + return -EMSGSIZE; 183 + 184 + lock_sock(sk); 185 + 186 + timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); 187 + 188 + /* 189 + * We have to use sk_stream_wait_connect here to set sk_write_pending, 190 + * so that the trick in dccp_rcv_request_sent_state_process. 191 + */ 192 + /* Wait for a connection to finish. */ 193 + if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN | DCCPF_CLOSING)) 194 + if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0) 195 + goto out_err; 196 + 197 + size = sk->sk_prot->max_header + len; 198 + release_sock(sk); 199 + skb = sock_alloc_send_skb(sk, size, noblock, &rc); 200 + lock_sock(sk); 201 + 202 + if (skb == NULL) 203 + goto out_release; 204 + 205 + skb_reserve(skb, sk->sk_prot->max_header); 206 + rc = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len); 207 + if (rc == 0) { 208 + struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); 209 + const struct dccp_ackpkts *ap = dp->dccps_hc_rx_ackpkts; 210 + long delay; 211 + 212 + /* 213 + * XXX: This is just to match the Waikato tree CA interaction 214 + * points, after the CCID3 code is stable and I have a better 215 + * understanding of behaviour I'll change this to look more like 216 + * TCP. 217 + */ 218 + while (1) { 219 + rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, 220 + skb, len, &delay); 221 + if (rc == 0) 222 + break; 223 + if (rc != -EAGAIN) 224 + goto out_discard; 225 + if (delay > timeo) 226 + goto out_discard; 227 + release_sock(sk); 228 + delay = schedule_timeout(delay); 229 + lock_sock(sk); 230 + timeo -= delay; 231 + if (signal_pending(current)) 232 + goto out_interrupted; 233 + rc = -EPIPE; 234 + if (!(sk->sk_state == DCCP_PARTOPEN || sk->sk_state == DCCP_OPEN)) 235 + goto out_discard; 236 + } 237 + 238 + if (sk->sk_state == DCCP_PARTOPEN) { 239 + /* See 8.1.5. Handshake Completion */ 240 + inet_csk_schedule_ack(sk); 241 + inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, inet_csk(sk)->icsk_rto, TCP_RTO_MAX); 242 + dcb->dccpd_type = DCCP_PKT_DATAACK; 243 + /* FIXME: we really should have a dccps_ack_pending or use icsk */ 244 + } else if (inet_csk_ack_scheduled(sk) || 245 + (dp->dccps_options.dccpo_send_ack_vector && 246 + ap->dccpap_buf_ackno != DCCP_MAX_SEQNO + 1 && 247 + ap->dccpap_ack_seqno == DCCP_MAX_SEQNO + 1)) 248 + dcb->dccpd_type = DCCP_PKT_DATAACK; 249 + else 250 + dcb->dccpd_type = DCCP_PKT_DATA; 251 + dccp_transmit_skb(sk, skb); 252 + ccid_hc_tx_packet_sent(dp->dccps_hc_tx_ccid, sk, 0, len); 253 + } else { 254 + out_discard: 255 + kfree_skb(skb); 256 + } 257 + out_release: 258 + release_sock(sk); 259 + return rc ? : len; 260 + out_err: 261 + rc = sk_stream_error(sk, flags, rc); 262 + goto out_release; 263 + out_interrupted: 264 + rc = sock_intr_errno(timeo); 265 + goto out_discard; 266 + } 267 + 268 + EXPORT_SYMBOL(dccp_sendmsg); 269 + 270 + int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, 271 + size_t len, int nonblock, int flags, int *addr_len) 272 + { 273 + const struct dccp_hdr *dh; 274 + int copied = 0; 275 + unsigned long used; 276 + int err; 277 + int target; /* Read at least this many bytes */ 278 + long timeo; 279 + 280 + lock_sock(sk); 281 + 282 + err = -ENOTCONN; 283 + if (sk->sk_state == DCCP_LISTEN) 284 + goto out; 285 + 286 + timeo = sock_rcvtimeo(sk, nonblock); 287 + 288 + /* Urgent data needs to be handled specially. */ 289 + if (flags & MSG_OOB) 290 + goto recv_urg; 291 + 292 + /* FIXME */ 293 + #if 0 294 + seq = &tp->copied_seq; 295 + if (flags & MSG_PEEK) { 296 + peek_seq = tp->copied_seq; 297 + seq = &peek_seq; 298 + } 299 + #endif 300 + 301 + target = sock_rcvlowat(sk, flags & MSG_WAITALL, len); 302 + 303 + do { 304 + struct sk_buff *skb; 305 + u32 offset; 306 + 307 + /* FIXME */ 308 + #if 0 309 + /* Are we at urgent data? Stop if we have read anything or have SIGURG pending. */ 310 + if (tp->urg_data && tp->urg_seq == *seq) { 311 + if (copied) 312 + break; 313 + if (signal_pending(current)) { 314 + copied = timeo ? sock_intr_errno(timeo) : -EAGAIN; 315 + break; 316 + } 317 + } 318 + #endif 319 + 320 + /* Next get a buffer. */ 321 + 322 + skb = skb_peek(&sk->sk_receive_queue); 323 + do { 324 + if (!skb) 325 + break; 326 + 327 + offset = 0; 328 + dh = dccp_hdr(skb); 329 + 330 + if (dh->dccph_type == DCCP_PKT_DATA || 331 + dh->dccph_type == DCCP_PKT_DATAACK) 332 + goto found_ok_skb; 333 + 334 + if (dh->dccph_type == DCCP_PKT_RESET || 335 + dh->dccph_type == DCCP_PKT_CLOSE) { 336 + dccp_pr_debug("found fin ok!\n"); 337 + goto found_fin_ok; 338 + } 339 + dccp_pr_debug("packet_type=%s\n", dccp_packet_name(dh->dccph_type)); 340 + BUG_TRAP(flags & MSG_PEEK); 341 + skb = skb->next; 342 + } while (skb != (struct sk_buff *)&sk->sk_receive_queue); 343 + 344 + /* Well, if we have backlog, try to process it now yet. */ 345 + if (copied >= target && !sk->sk_backlog.tail) 346 + break; 347 + 348 + if (copied) { 349 + if (sk->sk_err || 350 + sk->sk_state == DCCP_CLOSED || 351 + (sk->sk_shutdown & RCV_SHUTDOWN) || 352 + !timeo || 353 + signal_pending(current) || 354 + (flags & MSG_PEEK)) 355 + break; 356 + } else { 357 + if (sock_flag(sk, SOCK_DONE)) 358 + break; 359 + 360 + if (sk->sk_err) { 361 + copied = sock_error(sk); 362 + break; 363 + } 364 + 365 + if (sk->sk_shutdown & RCV_SHUTDOWN) 366 + break; 367 + 368 + if (sk->sk_state == DCCP_CLOSED) { 369 + if (!sock_flag(sk, SOCK_DONE)) { 370 + /* This occurs when user tries to read 371 + * from never connected socket. 372 + */ 373 + copied = -ENOTCONN; 374 + break; 375 + } 376 + break; 377 + } 378 + 379 + if (!timeo) { 380 + copied = -EAGAIN; 381 + break; 382 + } 383 + 384 + if (signal_pending(current)) { 385 + copied = sock_intr_errno(timeo); 386 + break; 387 + } 388 + } 389 + 390 + /* FIXME: cleanup_rbuf(sk, copied); */ 391 + 392 + if (copied >= target) { 393 + /* Do not sleep, just process backlog. */ 394 + release_sock(sk); 395 + lock_sock(sk); 396 + } else 397 + sk_wait_data(sk, &timeo); 398 + 399 + continue; 400 + 401 + found_ok_skb: 402 + /* Ok so how much can we use? */ 403 + used = skb->len - offset; 404 + if (len < used) 405 + used = len; 406 + 407 + if (!(flags & MSG_TRUNC)) { 408 + err = skb_copy_datagram_iovec(skb, offset, 409 + msg->msg_iov, used); 410 + if (err) { 411 + /* Exception. Bailout! */ 412 + if (!copied) 413 + copied = -EFAULT; 414 + break; 415 + } 416 + } 417 + 418 + copied += used; 419 + len -= used; 420 + 421 + /* FIXME: tcp_rcv_space_adjust(sk); */ 422 + 423 + //skip_copy: 424 + if (used + offset < skb->len) 425 + continue; 426 + 427 + if (!(flags & MSG_PEEK)) 428 + sk_eat_skb(sk, skb); 429 + continue; 430 + found_fin_ok: 431 + if (!(flags & MSG_PEEK)) 432 + sk_eat_skb(sk, skb); 433 + break; 434 + 435 + } while (len > 0); 436 + 437 + /* According to UNIX98, msg_name/msg_namelen are ignored 438 + * on connected socket. I was just happy when found this 8) --ANK 439 + */ 440 + 441 + /* Clean up data we have read: This will do ACK frames. */ 442 + /* FIXME: cleanup_rbuf(sk, copied); */ 443 + 444 + release_sock(sk); 445 + return copied; 446 + 447 + out: 448 + release_sock(sk); 449 + return err; 450 + 451 + recv_urg: 452 + /* FIXME: err = tcp_recv_urg(sk, timeo, msg, len, flags, addr_len); */ 453 + goto out; 454 + } 455 + 456 + static int inet_dccp_listen(struct socket *sock, int backlog) 457 + { 458 + struct sock *sk = sock->sk; 459 + unsigned char old_state; 460 + int err; 461 + 462 + lock_sock(sk); 463 + 464 + err = -EINVAL; 465 + if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP) 466 + goto out; 467 + 468 + old_state = sk->sk_state; 469 + if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN))) 470 + goto out; 471 + 472 + /* Really, if the socket is already in listen state 473 + * we can only allow the backlog to be adjusted. 474 + */ 475 + if (old_state != DCCP_LISTEN) { 476 + /* 477 + * FIXME: here it probably should be sk->sk_prot->listen_start 478 + * see tcp_listen_start 479 + */ 480 + err = dccp_listen_start(sk); 481 + if (err) 482 + goto out; 483 + } 484 + sk->sk_max_ack_backlog = backlog; 485 + err = 0; 486 + 487 + out: 488 + release_sock(sk); 489 + return err; 490 + } 491 + 492 + static const unsigned char dccp_new_state[] = { 493 + /* current state: new state: action: */ 494 + [0] = DCCP_CLOSED, 495 + [DCCP_OPEN] = DCCP_CLOSING | DCCP_ACTION_FIN, 496 + [DCCP_REQUESTING] = DCCP_CLOSED, 497 + [DCCP_PARTOPEN] = DCCP_CLOSING | DCCP_ACTION_FIN, 498 + [DCCP_LISTEN] = DCCP_CLOSED, 499 + [DCCP_RESPOND] = DCCP_CLOSED, 500 + [DCCP_CLOSING] = DCCP_CLOSED, 501 + [DCCP_TIME_WAIT] = DCCP_CLOSED, 502 + [DCCP_CLOSED] = DCCP_CLOSED, 503 + }; 504 + 505 + static int dccp_close_state(struct sock *sk) 506 + { 507 + const int next = dccp_new_state[sk->sk_state]; 508 + const int ns = next & DCCP_STATE_MASK; 509 + 510 + if (ns != sk->sk_state) 511 + dccp_set_state(sk, ns); 512 + 513 + return next & DCCP_ACTION_FIN; 514 + } 515 + 516 + void dccp_close(struct sock *sk, long timeout) 517 + { 518 + struct sk_buff *skb; 519 + 520 + lock_sock(sk); 521 + 522 + sk->sk_shutdown = SHUTDOWN_MASK; 523 + 524 + if (sk->sk_state == DCCP_LISTEN) { 525 + dccp_set_state(sk, DCCP_CLOSED); 526 + 527 + /* Special case. */ 528 + inet_csk_listen_stop(sk); 529 + 530 + goto adjudge_to_death; 531 + } 532 + 533 + /* 534 + * We need to flush the recv. buffs. We do this only on the 535 + * descriptor close, not protocol-sourced closes, because the 536 + *reader process may not have drained the data yet! 537 + */ 538 + /* FIXME: check for unread data */ 539 + while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) { 540 + __kfree_skb(skb); 541 + } 542 + 543 + if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) { 544 + /* Check zero linger _after_ checking for unread data. */ 545 + sk->sk_prot->disconnect(sk, 0); 546 + } else if (dccp_close_state(sk)) { 547 + dccp_send_close(sk); 548 + } 549 + 550 + sk_stream_wait_close(sk, timeout); 551 + 552 + adjudge_to_death: 553 + release_sock(sk); 554 + /* 555 + * Now socket is owned by kernel and we acquire BH lock 556 + * to finish close. No need to check for user refs. 557 + */ 558 + local_bh_disable(); 559 + bh_lock_sock(sk); 560 + BUG_TRAP(!sock_owned_by_user(sk)); 561 + 562 + sock_hold(sk); 563 + sock_orphan(sk); 564 + 565 + if (sk->sk_state != DCCP_CLOSED) 566 + dccp_set_state(sk, DCCP_CLOSED); 567 + 568 + atomic_inc(&dccp_orphan_count); 569 + if (sk->sk_state == DCCP_CLOSED) 570 + inet_csk_destroy_sock(sk); 571 + 572 + /* Otherwise, socket is reprieved until protocol close. */ 573 + 574 + bh_unlock_sock(sk); 575 + local_bh_enable(); 576 + sock_put(sk); 577 + } 578 + 579 + void dccp_shutdown(struct sock *sk, int how) 580 + { 581 + dccp_pr_debug("entry\n"); 582 + } 583 + 584 + struct proto_ops inet_dccp_ops = { 585 + .family = PF_INET, 586 + .owner = THIS_MODULE, 587 + .release = inet_release, 588 + .bind = inet_bind, 589 + .connect = inet_stream_connect, 590 + .socketpair = sock_no_socketpair, 591 + .accept = inet_accept, 592 + .getname = inet_getname, 593 + .poll = sock_no_poll, 594 + .ioctl = inet_ioctl, 595 + .listen = inet_dccp_listen, /* FIXME: work on inet_listen to rename it to sock_common_listen */ 596 + .shutdown = inet_shutdown, 597 + .setsockopt = sock_common_setsockopt, 598 + .getsockopt = sock_common_getsockopt, 599 + .sendmsg = inet_sendmsg, 600 + .recvmsg = sock_common_recvmsg, 601 + .mmap = sock_no_mmap, 602 + .sendpage = sock_no_sendpage, 603 + }; 604 + 605 + extern struct net_proto_family inet_family_ops; 606 + 607 + static struct inet_protosw dccp_v4_protosw = { 608 + .type = SOCK_DCCP, 609 + .protocol = IPPROTO_DCCP, 610 + .prot = &dccp_v4_prot, 611 + .ops = &inet_dccp_ops, 612 + .capability = -1, 613 + .no_check = 0, 614 + .flags = 0, 615 + }; 616 + 617 + /* 618 + * This is the global socket data structure used for responding to 619 + * the Out-of-the-blue (OOTB) packets. A control sock will be created 620 + * for this socket at the initialization time. 621 + */ 622 + struct socket *dccp_ctl_socket; 623 + 624 + static char dccp_ctl_socket_err_msg[] __initdata = 625 + KERN_ERR "DCCP: Failed to create the control socket.\n"; 626 + 627 + static int __init dccp_ctl_sock_init(void) 628 + { 629 + int rc = sock_create_kern(PF_INET, SOCK_DCCP, IPPROTO_DCCP, 630 + &dccp_ctl_socket); 631 + if (rc < 0) 632 + printk(dccp_ctl_socket_err_msg); 633 + else { 634 + dccp_ctl_socket->sk->sk_allocation = GFP_ATOMIC; 635 + inet_sk(dccp_ctl_socket->sk)->uc_ttl = -1; 636 + 637 + /* Unhash it so that IP input processing does not even 638 + * see it, we do not wish this socket to see incoming 639 + * packets. 640 + */ 641 + dccp_ctl_socket->sk->sk_prot->unhash(dccp_ctl_socket->sk); 642 + } 643 + 644 + return rc; 645 + } 646 + 647 + static void __exit dccp_ctl_sock_exit(void) 648 + { 649 + if (dccp_ctl_socket != NULL) 650 + sock_release(dccp_ctl_socket); 651 + } 652 + 653 + static int __init init_dccp_v4_mibs(void) 654 + { 655 + int rc = -ENOMEM; 656 + 657 + dccp_statistics[0] = alloc_percpu(struct dccp_mib); 658 + if (dccp_statistics[0] == NULL) 659 + goto out; 660 + 661 + dccp_statistics[1] = alloc_percpu(struct dccp_mib); 662 + if (dccp_statistics[1] == NULL) 663 + goto out_free_one; 664 + 665 + rc = 0; 666 + out: 667 + return rc; 668 + out_free_one: 669 + free_percpu(dccp_statistics[0]); 670 + dccp_statistics[0] = NULL; 671 + goto out; 672 + 673 + } 674 + 675 + static int thash_entries; 676 + module_param(thash_entries, int, 0444); 677 + MODULE_PARM_DESC(thash_entries, "Number of ehash buckets"); 678 + 679 + int dccp_debug; 680 + module_param(dccp_debug, int, 0444); 681 + MODULE_PARM_DESC(dccp_debug, "Enable debug messages"); 682 + 683 + static int __init dccp_init(void) 684 + { 685 + unsigned long goal; 686 + int ehash_order, bhash_order, i; 687 + int rc = proto_register(&dccp_v4_prot, 1); 688 + 689 + if (rc) 690 + goto out; 691 + 692 + dccp_hashinfo.bind_bucket_cachep = kmem_cache_create("dccp_bind_bucket", 693 + sizeof(struct inet_bind_bucket), 694 + 0, SLAB_HWCACHE_ALIGN, 695 + NULL, NULL); 696 + if (!dccp_hashinfo.bind_bucket_cachep) 697 + goto out_proto_unregister; 698 + 699 + /* 700 + * Size and allocate the main established and bind bucket 701 + * hash tables. 702 + * 703 + * The methodology is similar to that of the buffer cache. 704 + */ 705 + if (num_physpages >= (128 * 1024)) 706 + goal = num_physpages >> (21 - PAGE_SHIFT); 707 + else 708 + goal = num_physpages >> (23 - PAGE_SHIFT); 709 + 710 + if (thash_entries) 711 + goal = (thash_entries * sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT; 712 + for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++) 713 + ; 714 + do { 715 + dccp_hashinfo.ehash_size = (1UL << ehash_order) * PAGE_SIZE / 716 + sizeof(struct inet_ehash_bucket); 717 + dccp_hashinfo.ehash_size >>= 1; 718 + while (dccp_hashinfo.ehash_size & (dccp_hashinfo.ehash_size - 1)) 719 + dccp_hashinfo.ehash_size--; 720 + dccp_hashinfo.ehash = (struct inet_ehash_bucket *) 721 + __get_free_pages(GFP_ATOMIC, ehash_order); 722 + } while (!dccp_hashinfo.ehash && --ehash_order > 0); 723 + 724 + if (!dccp_hashinfo.ehash) { 725 + printk(KERN_CRIT "Failed to allocate DCCP " 726 + "established hash table\n"); 727 + goto out_free_bind_bucket_cachep; 728 + } 729 + 730 + for (i = 0; i < (dccp_hashinfo.ehash_size << 1); i++) { 731 + rwlock_init(&dccp_hashinfo.ehash[i].lock); 732 + INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].chain); 733 + } 734 + 735 + bhash_order = ehash_order; 736 + 737 + do { 738 + dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE / 739 + sizeof(struct inet_bind_hashbucket); 740 + if ((dccp_hashinfo.bhash_size > (64 * 1024)) && bhash_order > 0) 741 + continue; 742 + dccp_hashinfo.bhash = (struct inet_bind_hashbucket *) 743 + __get_free_pages(GFP_ATOMIC, bhash_order); 744 + } while (!dccp_hashinfo.bhash && --bhash_order >= 0); 745 + 746 + if (!dccp_hashinfo.bhash) { 747 + printk(KERN_CRIT "Failed to allocate DCCP bind hash table\n"); 748 + goto out_free_dccp_ehash; 749 + } 750 + 751 + for (i = 0; i < dccp_hashinfo.bhash_size; i++) { 752 + spin_lock_init(&dccp_hashinfo.bhash[i].lock); 753 + INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain); 754 + } 755 + 756 + if (init_dccp_v4_mibs()) 757 + goto out_free_dccp_bhash; 758 + 759 + rc = -EAGAIN; 760 + if (inet_add_protocol(&dccp_protocol, IPPROTO_DCCP)) 761 + goto out_free_dccp_v4_mibs; 762 + 763 + inet_register_protosw(&dccp_v4_protosw); 764 + 765 + rc = dccp_ctl_sock_init(); 766 + if (rc) 767 + goto out_unregister_protosw; 768 + out: 769 + return rc; 770 + out_unregister_protosw: 771 + inet_unregister_protosw(&dccp_v4_protosw); 772 + inet_del_protocol(&dccp_protocol, IPPROTO_DCCP); 773 + out_free_dccp_v4_mibs: 774 + free_percpu(dccp_statistics[0]); 775 + free_percpu(dccp_statistics[1]); 776 + dccp_statistics[0] = dccp_statistics[1] = NULL; 777 + out_free_dccp_bhash: 778 + free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order); 779 + dccp_hashinfo.bhash = NULL; 780 + out_free_dccp_ehash: 781 + free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order); 782 + dccp_hashinfo.ehash = NULL; 783 + out_free_bind_bucket_cachep: 784 + kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep); 785 + dccp_hashinfo.bind_bucket_cachep = NULL; 786 + out_proto_unregister: 787 + proto_unregister(&dccp_v4_prot); 788 + goto out; 789 + } 790 + 791 + static const char dccp_del_proto_err_msg[] __exitdata = 792 + KERN_ERR "can't remove dccp net_protocol\n"; 793 + 794 + static void __exit dccp_fini(void) 795 + { 796 + dccp_ctl_sock_exit(); 797 + 798 + inet_unregister_protosw(&dccp_v4_protosw); 799 + 800 + if (inet_del_protocol(&dccp_protocol, IPPROTO_DCCP) < 0) 801 + printk(dccp_del_proto_err_msg); 802 + 803 + /* Free the control endpoint. */ 804 + sock_release(dccp_ctl_socket); 805 + 806 + proto_unregister(&dccp_v4_prot); 807 + 808 + kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep); 809 + } 810 + 811 + module_init(dccp_init); 812 + module_exit(dccp_fini); 813 + 814 + /* __stringify doesn't likes enums, so use SOCK_DCCP (6) value directly */ 815 + MODULE_ALIAS("net-pf-" __stringify(PF_INET) "-6"); 816 + MODULE_LICENSE("GPL"); 817 + MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>"); 818 + MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");
+249
net/dccp/timer.c
··· 1 + /* 2 + * net/dccp/timer.c 3 + * 4 + * An implementation of the DCCP protocol 5 + * Arnaldo Carvalho de Melo <acme@conectiva.com.br> 6 + * 7 + * This program is free software; you can redistribute it and/or 8 + * modify it under the terms of the GNU General Public License 9 + * as published by the Free Software Foundation; either version 10 + * 2 of the License, or (at your option) any later version. 11 + */ 12 + 13 + #include <linux/config.h> 14 + #include <linux/dccp.h> 15 + #include <linux/skbuff.h> 16 + 17 + #include "dccp.h" 18 + 19 + static void dccp_write_timer(unsigned long data); 20 + static void dccp_keepalive_timer(unsigned long data); 21 + static void dccp_delack_timer(unsigned long data); 22 + 23 + void dccp_init_xmit_timers(struct sock *sk) 24 + { 25 + inet_csk_init_xmit_timers(sk, &dccp_write_timer, &dccp_delack_timer, 26 + &dccp_keepalive_timer); 27 + } 28 + 29 + static void dccp_write_err(struct sock *sk) 30 + { 31 + sk->sk_err = sk->sk_err_soft ? : ETIMEDOUT; 32 + sk->sk_error_report(sk); 33 + 34 + dccp_v4_send_reset(sk, DCCP_RESET_CODE_ABORTED); 35 + dccp_done(sk); 36 + DCCP_INC_STATS_BH(DCCP_MIB_ABORTONTIMEOUT); 37 + } 38 + 39 + /* A write timeout has occurred. Process the after effects. */ 40 + static int dccp_write_timeout(struct sock *sk) 41 + { 42 + const struct inet_connection_sock *icsk = inet_csk(sk); 43 + int retry_until; 44 + 45 + if (sk->sk_state == DCCP_REQUESTING || sk->sk_state == DCCP_PARTOPEN) { 46 + if (icsk->icsk_retransmits != 0) 47 + dst_negative_advice(&sk->sk_dst_cache); 48 + retry_until = icsk->icsk_syn_retries ? : /* FIXME! */ 3 /* FIXME! sysctl_tcp_syn_retries */; 49 + } else { 50 + if (icsk->icsk_retransmits >= /* FIXME! sysctl_tcp_retries1 */ 5 /* FIXME! */) { 51 + /* NOTE. draft-ietf-tcpimpl-pmtud-01.txt requires pmtu black 52 + hole detection. :-( 53 + 54 + It is place to make it. It is not made. I do not want 55 + to make it. It is disguisting. It does not work in any 56 + case. Let me to cite the same draft, which requires for 57 + us to implement this: 58 + 59 + "The one security concern raised by this memo is that ICMP black holes 60 + are often caused by over-zealous security administrators who block 61 + all ICMP messages. It is vitally important that those who design and 62 + deploy security systems understand the impact of strict filtering on 63 + upper-layer protocols. The safest web site in the world is worthless 64 + if most TCP implementations cannot transfer data from it. It would 65 + be far nicer to have all of the black holes fixed rather than fixing 66 + all of the TCP implementations." 67 + 68 + Golden words :-). 69 + */ 70 + 71 + dst_negative_advice(&sk->sk_dst_cache); 72 + } 73 + 74 + retry_until = /* FIXME! */ 15 /* FIXME! sysctl_tcp_retries2 */; 75 + /* 76 + * FIXME: see tcp_write_timout and tcp_out_of_resources 77 + */ 78 + } 79 + 80 + if (icsk->icsk_retransmits >= retry_until) { 81 + /* Has it gone just too far? */ 82 + dccp_write_err(sk); 83 + return 1; 84 + } 85 + return 0; 86 + } 87 + 88 + /* This is the same as tcp_delack_timer, sans prequeue & mem_reclaim stuff */ 89 + static void dccp_delack_timer(unsigned long data) 90 + { 91 + struct sock *sk = (struct sock *)data; 92 + struct inet_connection_sock *icsk = inet_csk(sk); 93 + 94 + bh_lock_sock(sk); 95 + if (sock_owned_by_user(sk)) { 96 + /* Try again later. */ 97 + icsk->icsk_ack.blocked = 1; 98 + NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKLOCKED); 99 + sk_reset_timer(sk, &icsk->icsk_delack_timer, jiffies + TCP_DELACK_MIN); 100 + goto out; 101 + } 102 + 103 + if (sk->sk_state == DCCP_CLOSED || !(icsk->icsk_ack.pending & ICSK_ACK_TIMER)) 104 + goto out; 105 + if (time_after(icsk->icsk_ack.timeout, jiffies)) { 106 + sk_reset_timer(sk, &icsk->icsk_delack_timer, icsk->icsk_ack.timeout); 107 + goto out; 108 + } 109 + 110 + icsk->icsk_ack.pending &= ~ICSK_ACK_TIMER; 111 + 112 + if (inet_csk_ack_scheduled(sk)) { 113 + if (!icsk->icsk_ack.pingpong) { 114 + /* Delayed ACK missed: inflate ATO. */ 115 + icsk->icsk_ack.ato = min(icsk->icsk_ack.ato << 1, icsk->icsk_rto); 116 + } else { 117 + /* Delayed ACK missed: leave pingpong mode and 118 + * deflate ATO. 119 + */ 120 + icsk->icsk_ack.pingpong = 0; 121 + icsk->icsk_ack.ato = TCP_ATO_MIN; 122 + } 123 + dccp_send_ack(sk); 124 + NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKS); 125 + } 126 + out: 127 + bh_unlock_sock(sk); 128 + sock_put(sk); 129 + } 130 + 131 + /* 132 + * The DCCP retransmit timer. 133 + */ 134 + static void dccp_retransmit_timer(struct sock *sk) 135 + { 136 + struct inet_connection_sock *icsk = inet_csk(sk); 137 + 138 + /* 139 + * sk->sk_send_head has to have one skb with 140 + * DCCP_SKB_CB(skb)->dccpd_type set to one of the retransmittable DCCP 141 + * packet types (REQUEST, RESPONSE, the ACK in the 3way hanshake 142 + * (PARTOPEN timer), etc). 143 + */ 144 + BUG_TRAP(sk->sk_send_head != NULL); 145 + 146 + /* 147 + * More than than 4MSL (8 minutes) has passed, a RESET(aborted) was 148 + * sent, no need to retransmit, this sock is dead. 149 + */ 150 + if (dccp_write_timeout(sk)) 151 + goto out; 152 + 153 + /* 154 + * We want to know the number of packets retransmitted, not the 155 + * total number of retransmissions of clones of original packets. 156 + */ 157 + if (icsk->icsk_retransmits == 0) 158 + DCCP_INC_STATS_BH(DCCP_MIB_TIMEOUTS); 159 + 160 + if (dccp_retransmit_skb(sk, sk->sk_send_head) < 0) { 161 + /* 162 + * Retransmission failed because of local congestion, 163 + * do not backoff. 164 + */ 165 + if (icsk->icsk_retransmits == 0) 166 + icsk->icsk_retransmits = 1; 167 + inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, 168 + min(icsk->icsk_rto, 169 + TCP_RESOURCE_PROBE_INTERVAL), 170 + TCP_RTO_MAX); 171 + goto out; 172 + } 173 + 174 + icsk->icsk_backoff++; 175 + icsk->icsk_retransmits++; 176 + 177 + icsk->icsk_rto = min(icsk->icsk_rto << 1, DCCP_RTO_MAX); 178 + inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, TCP_RTO_MAX); 179 + if (icsk->icsk_retransmits > 3 /* FIXME: sysctl_dccp_retries1 */) 180 + __sk_dst_reset(sk); 181 + out:; 182 + } 183 + 184 + static void dccp_write_timer(unsigned long data) 185 + { 186 + struct sock *sk = (struct sock *)data; 187 + struct inet_connection_sock *icsk = inet_csk(sk); 188 + int event = 0; 189 + 190 + bh_lock_sock(sk); 191 + if (sock_owned_by_user(sk)) { 192 + /* Try again later */ 193 + sk_reset_timer(sk, &icsk->icsk_retransmit_timer, jiffies + (HZ / 20)); 194 + goto out; 195 + } 196 + 197 + if (sk->sk_state == DCCP_CLOSED || !icsk->icsk_pending) 198 + goto out; 199 + 200 + if (time_after(icsk->icsk_timeout, jiffies)) { 201 + sk_reset_timer(sk, &icsk->icsk_retransmit_timer, icsk->icsk_timeout); 202 + goto out; 203 + } 204 + 205 + event = icsk->icsk_pending; 206 + icsk->icsk_pending = 0; 207 + 208 + switch (event) { 209 + case ICSK_TIME_RETRANS: 210 + dccp_retransmit_timer(sk); 211 + break; 212 + } 213 + out: 214 + bh_unlock_sock(sk); 215 + sock_put(sk); 216 + } 217 + 218 + /* 219 + * Timer for listening sockets 220 + */ 221 + static void dccp_response_timer(struct sock *sk) 222 + { 223 + struct inet_connection_sock *icsk = inet_csk(sk); 224 + const int max_retries = icsk->icsk_syn_retries ? : TCP_SYNACK_RETRIES /* FIXME sysctl_tcp_synack_retries */; 225 + 226 + reqsk_queue_prune(&icsk->icsk_accept_queue, sk, TCP_SYNQ_INTERVAL, 227 + DCCP_TIMEOUT_INIT, DCCP_RTO_MAX, max_retries); 228 + } 229 + 230 + static void dccp_keepalive_timer(unsigned long data) 231 + { 232 + struct sock *sk = (struct sock *)data; 233 + 234 + /* Only process if socket is not in use. */ 235 + bh_lock_sock(sk); 236 + if (sock_owned_by_user(sk)) { 237 + /* Try again later. */ 238 + inet_csk_reset_keepalive_timer(sk, HZ / 20); 239 + goto out; 240 + } 241 + 242 + if (sk->sk_state == DCCP_LISTEN) { 243 + dccp_response_timer(sk); 244 + goto out; 245 + } 246 + out: 247 + bh_unlock_sock(sk); 248 + sock_put(sk); 249 + }