Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

[ICSK]: Move generalised functions from tcp to inet_connection_sock

This also improves reqsk_queue_prune and renames it to
inet_csk_reqsk_queue_prune, as it deals with both inet_connection_sock
and inet_request_sock objects, not just with request_sock ones thus
belonging to inet_request_sock.

Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Arnaldo Carvalho de Melo and committed by
David S. Miller
a019d6fe 7c657876

+224 -226
+7
include/net/inet_connection_sock.h
··· 239 239 reqsk_free(req); 240 240 } 241 241 242 + extern void inet_csk_reqsk_queue_prune(struct sock *parent, 243 + const unsigned long interval, 244 + const unsigned long timeout, 245 + const unsigned long max_rto); 246 + 247 + extern void inet_csk_destroy_sock(struct sock *sk); 248 + extern int inet_csk_listen_start(struct sock *sk, const int nr_table_entries); 242 249 extern void inet_csk_listen_stop(struct sock *sk); 243 250 244 251 #endif /* _INET_CONNECTION_SOCK_H */
-4
include/net/request_sock.h
··· 258 258 write_unlock(&queue->syn_wait_lock); 259 259 } 260 260 261 - extern void reqsk_queue_prune(struct request_sock_queue *queue, struct sock *parent, 262 - const unsigned long interval, const unsigned long timeout, 263 - const unsigned long max_rto, int max_retries); 264 - 265 261 #endif /* _REQUEST_SOCK_H */
-6
include/net/tcp.h
··· 423 423 size_t len, int nonblock, 424 424 int flags, int *addr_len); 425 425 426 - extern int inet_csk_listen_start(struct sock *sk, 427 - const int nr_table_entries); 428 - 429 426 extern void tcp_parse_options(struct sk_buff *skb, 430 427 struct tcp_options_received *opt_rx, 431 428 int estab); ··· 857 860 { 858 861 tp->snd_wl1 = seq; 859 862 } 860 - 861 - extern void inet_csk_destroy_sock(struct sock *sk); 862 - 863 863 864 864 /* 865 865 * Calculate(/check) TCP checksum
+1 -5
net/dccp/timer.c
··· 220 220 */ 221 221 static void dccp_response_timer(struct sock *sk) 222 222 { 223 - struct inet_connection_sock *icsk = inet_csk(sk); 224 - const int max_retries = icsk->icsk_syn_retries ? : TCP_SYNACK_RETRIES /* FIXME sysctl_tcp_synack_retries */; 225 - 226 - reqsk_queue_prune(&icsk->icsk_accept_queue, sk, TCP_SYNQ_INTERVAL, 227 - DCCP_TIMEOUT_INIT, DCCP_RTO_MAX, max_retries); 223 + inet_csk_reqsk_queue_prune(sk, TCP_SYNQ_INTERVAL, DCCP_TIMEOUT_INIT, DCCP_RTO_MAX); 228 224 } 229 225 230 226 static void dccp_keepalive_timer(unsigned long data)
+214
net/ipv4/inet_connection_sock.c
··· 23 23 #include <net/ip.h> 24 24 #include <net/route.h> 25 25 #include <net/tcp_states.h> 26 + #include <net/xfrm.h> 26 27 27 28 #ifdef INET_CSK_DEBUG 28 29 const char inet_csk_timer_bug_msg[] = "inet_csk BUG: unknown timer value\n"; ··· 399 398 inet_csk_reqsk_queue_added(sk, timeout); 400 399 } 401 400 401 + /* Only thing we need from tcp.h */ 402 + extern int sysctl_tcp_synack_retries; 403 + 402 404 EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_hash_add); 405 + 406 + void inet_csk_reqsk_queue_prune(struct sock *parent, 407 + const unsigned long interval, 408 + const unsigned long timeout, 409 + const unsigned long max_rto) 410 + { 411 + struct inet_connection_sock *icsk = inet_csk(parent); 412 + struct request_sock_queue *queue = &icsk->icsk_accept_queue; 413 + struct listen_sock *lopt = queue->listen_opt; 414 + int max_retries = icsk->icsk_syn_retries ? : sysctl_tcp_synack_retries; 415 + int thresh = max_retries; 416 + unsigned long now = jiffies; 417 + struct request_sock **reqp, *req; 418 + int i, budget; 419 + 420 + if (lopt == NULL || lopt->qlen == 0) 421 + return; 422 + 423 + /* Normally all the openreqs are young and become mature 424 + * (i.e. converted to established socket) for first timeout. 425 + * If synack was not acknowledged for 3 seconds, it means 426 + * one of the following things: synack was lost, ack was lost, 427 + * rtt is high or nobody planned to ack (i.e. synflood). 428 + * When server is a bit loaded, queue is populated with old 429 + * open requests, reducing effective size of queue. 430 + * When server is well loaded, queue size reduces to zero 431 + * after several minutes of work. It is not synflood, 432 + * it is normal operation. The solution is pruning 433 + * too old entries overriding normal timeout, when 434 + * situation becomes dangerous. 435 + * 436 + * Essentially, we reserve half of room for young 437 + * embrions; and abort old ones without pity, if old 438 + * ones are about to clog our table. 439 + */ 440 + if (lopt->qlen>>(lopt->max_qlen_log-1)) { 441 + int young = (lopt->qlen_young<<1); 442 + 443 + while (thresh > 2) { 444 + if (lopt->qlen < young) 445 + break; 446 + thresh--; 447 + young <<= 1; 448 + } 449 + } 450 + 451 + if (queue->rskq_defer_accept) 452 + max_retries = queue->rskq_defer_accept; 453 + 454 + budget = 2 * (lopt->nr_table_entries / (timeout / interval)); 455 + i = lopt->clock_hand; 456 + 457 + do { 458 + reqp=&lopt->syn_table[i]; 459 + while ((req = *reqp) != NULL) { 460 + if (time_after_eq(now, req->expires)) { 461 + if ((req->retrans < thresh || 462 + (inet_rsk(req)->acked && req->retrans < max_retries)) 463 + && !req->rsk_ops->rtx_syn_ack(parent, req, NULL)) { 464 + unsigned long timeo; 465 + 466 + if (req->retrans++ == 0) 467 + lopt->qlen_young--; 468 + timeo = min((timeout << req->retrans), max_rto); 469 + req->expires = now + timeo; 470 + reqp = &req->dl_next; 471 + continue; 472 + } 473 + 474 + /* Drop this request */ 475 + inet_csk_reqsk_queue_unlink(parent, req, reqp); 476 + reqsk_queue_removed(queue, req); 477 + reqsk_free(req); 478 + continue; 479 + } 480 + reqp = &req->dl_next; 481 + } 482 + 483 + i = (i + 1) & (lopt->nr_table_entries - 1); 484 + 485 + } while (--budget > 0); 486 + 487 + lopt->clock_hand = i; 488 + 489 + if (lopt->qlen) 490 + inet_csk_reset_keepalive_timer(parent, interval); 491 + } 492 + 493 + EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_prune); 403 494 404 495 struct sock *inet_csk_clone(struct sock *sk, const struct request_sock *req, 405 496 const unsigned int __nocast priority) ··· 517 424 } 518 425 519 426 EXPORT_SYMBOL_GPL(inet_csk_clone); 427 + 428 + /* 429 + * At this point, there should be no process reference to this 430 + * socket, and thus no user references at all. Therefore we 431 + * can assume the socket waitqueue is inactive and nobody will 432 + * try to jump onto it. 433 + */ 434 + void inet_csk_destroy_sock(struct sock *sk) 435 + { 436 + BUG_TRAP(sk->sk_state == TCP_CLOSE); 437 + BUG_TRAP(sock_flag(sk, SOCK_DEAD)); 438 + 439 + /* It cannot be in hash table! */ 440 + BUG_TRAP(sk_unhashed(sk)); 441 + 442 + /* If it has not 0 inet_sk(sk)->num, it must be bound */ 443 + BUG_TRAP(!inet_sk(sk)->num || inet_csk(sk)->icsk_bind_hash); 444 + 445 + sk->sk_prot->destroy(sk); 446 + 447 + sk_stream_kill_queues(sk); 448 + 449 + xfrm_sk_free_policy(sk); 450 + 451 + sk_refcnt_debug_release(sk); 452 + 453 + atomic_dec(sk->sk_prot->orphan_count); 454 + sock_put(sk); 455 + } 456 + 457 + EXPORT_SYMBOL(inet_csk_destroy_sock); 458 + 459 + int inet_csk_listen_start(struct sock *sk, const int nr_table_entries) 460 + { 461 + struct inet_sock *inet = inet_sk(sk); 462 + struct inet_connection_sock *icsk = inet_csk(sk); 463 + int rc = reqsk_queue_alloc(&icsk->icsk_accept_queue, nr_table_entries); 464 + 465 + if (rc != 0) 466 + return rc; 467 + 468 + sk->sk_max_ack_backlog = 0; 469 + sk->sk_ack_backlog = 0; 470 + inet_csk_delack_init(sk); 471 + 472 + /* There is race window here: we announce ourselves listening, 473 + * but this transition is still not validated by get_port(). 474 + * It is OK, because this socket enters to hash table only 475 + * after validation is complete. 476 + */ 477 + sk->sk_state = TCP_LISTEN; 478 + if (!sk->sk_prot->get_port(sk, inet->num)) { 479 + inet->sport = htons(inet->num); 480 + 481 + sk_dst_reset(sk); 482 + sk->sk_prot->hash(sk); 483 + 484 + return 0; 485 + } 486 + 487 + sk->sk_state = TCP_CLOSE; 488 + __reqsk_queue_destroy(&icsk->icsk_accept_queue); 489 + return -EADDRINUSE; 490 + } 491 + 492 + EXPORT_SYMBOL_GPL(inet_csk_listen_start); 493 + 494 + /* 495 + * This routine closes sockets which have been at least partially 496 + * opened, but not yet accepted. 497 + */ 498 + void inet_csk_listen_stop(struct sock *sk) 499 + { 500 + struct inet_connection_sock *icsk = inet_csk(sk); 501 + struct request_sock *acc_req; 502 + struct request_sock *req; 503 + 504 + inet_csk_delete_keepalive_timer(sk); 505 + 506 + /* make all the listen_opt local to us */ 507 + acc_req = reqsk_queue_yank_acceptq(&icsk->icsk_accept_queue); 508 + 509 + /* Following specs, it would be better either to send FIN 510 + * (and enter FIN-WAIT-1, it is normal close) 511 + * or to send active reset (abort). 512 + * Certainly, it is pretty dangerous while synflood, but it is 513 + * bad justification for our negligence 8) 514 + * To be honest, we are not able to make either 515 + * of the variants now. --ANK 516 + */ 517 + reqsk_queue_destroy(&icsk->icsk_accept_queue); 518 + 519 + while ((req = acc_req) != NULL) { 520 + struct sock *child = req->sk; 521 + 522 + acc_req = req->dl_next; 523 + 524 + local_bh_disable(); 525 + bh_lock_sock(child); 526 + BUG_TRAP(!sock_owned_by_user(child)); 527 + sock_hold(child); 528 + 529 + sk->sk_prot->disconnect(child, O_NONBLOCK); 530 + 531 + sock_orphan(child); 532 + 533 + atomic_inc(sk->sk_prot->orphan_count); 534 + 535 + inet_csk_destroy_sock(child); 536 + 537 + bh_unlock_sock(child); 538 + local_bh_enable(); 539 + sock_put(child); 540 + 541 + sk_acceptq_removed(sk); 542 + __reqsk_free(req); 543 + } 544 + BUG_TRAP(!sk->sk_ack_backlog); 545 + } 546 + 547 + EXPORT_SYMBOL_GPL(inet_csk_listen_stop);
-120
net/ipv4/tcp.c
··· 456 456 return put_user(answ, (int __user *)arg); 457 457 } 458 458 459 - int inet_csk_listen_start(struct sock *sk, const int nr_table_entries) 460 - { 461 - struct inet_sock *inet = inet_sk(sk); 462 - struct inet_connection_sock *icsk = inet_csk(sk); 463 - int rc = reqsk_queue_alloc(&icsk->icsk_accept_queue, nr_table_entries); 464 - 465 - if (rc != 0) 466 - return rc; 467 - 468 - sk->sk_max_ack_backlog = 0; 469 - sk->sk_ack_backlog = 0; 470 - inet_csk_delack_init(sk); 471 - 472 - /* There is race window here: we announce ourselves listening, 473 - * but this transition is still not validated by get_port(). 474 - * It is OK, because this socket enters to hash table only 475 - * after validation is complete. 476 - */ 477 - sk->sk_state = TCP_LISTEN; 478 - if (!sk->sk_prot->get_port(sk, inet->num)) { 479 - inet->sport = htons(inet->num); 480 - 481 - sk_dst_reset(sk); 482 - sk->sk_prot->hash(sk); 483 - 484 - return 0; 485 - } 486 - 487 - sk->sk_state = TCP_CLOSE; 488 - __reqsk_queue_destroy(&icsk->icsk_accept_queue); 489 - return -EADDRINUSE; 490 - } 491 - 492 - EXPORT_SYMBOL_GPL(inet_csk_listen_start); 493 - 494 - /* 495 - * This routine closes sockets which have been at least partially 496 - * opened, but not yet accepted. 497 - */ 498 - void inet_csk_listen_stop(struct sock *sk) 499 - { 500 - struct inet_connection_sock *icsk = inet_csk(sk); 501 - struct request_sock *acc_req; 502 - struct request_sock *req; 503 - 504 - inet_csk_delete_keepalive_timer(sk); 505 - 506 - /* make all the listen_opt local to us */ 507 - acc_req = reqsk_queue_yank_acceptq(&icsk->icsk_accept_queue); 508 - 509 - /* Following specs, it would be better either to send FIN 510 - * (and enter FIN-WAIT-1, it is normal close) 511 - * or to send active reset (abort). 512 - * Certainly, it is pretty dangerous while synflood, but it is 513 - * bad justification for our negligence 8) 514 - * To be honest, we are not able to make either 515 - * of the variants now. --ANK 516 - */ 517 - reqsk_queue_destroy(&icsk->icsk_accept_queue); 518 - 519 - while ((req = acc_req) != NULL) { 520 - struct sock *child = req->sk; 521 - 522 - acc_req = req->dl_next; 523 - 524 - local_bh_disable(); 525 - bh_lock_sock(child); 526 - BUG_TRAP(!sock_owned_by_user(child)); 527 - sock_hold(child); 528 - 529 - sk->sk_prot->disconnect(child, O_NONBLOCK); 530 - 531 - sock_orphan(child); 532 - 533 - atomic_inc(sk->sk_prot->orphan_count); 534 - 535 - inet_csk_destroy_sock(child); 536 - 537 - bh_unlock_sock(child); 538 - local_bh_enable(); 539 - sock_put(child); 540 - 541 - sk_acceptq_removed(sk); 542 - __reqsk_free(req); 543 - } 544 - BUG_TRAP(!sk->sk_ack_backlog); 545 - } 546 - 547 - EXPORT_SYMBOL_GPL(inet_csk_listen_stop); 548 - 549 459 static inline void tcp_mark_push(struct tcp_sock *tp, struct sk_buff *skb) 550 460 { 551 461 TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH; ··· 1469 1559 } 1470 1560 } 1471 1561 1472 - /* 1473 - * At this point, there should be no process reference to this 1474 - * socket, and thus no user references at all. Therefore we 1475 - * can assume the socket waitqueue is inactive and nobody will 1476 - * try to jump onto it. 1477 - */ 1478 - void inet_csk_destroy_sock(struct sock *sk) 1479 - { 1480 - BUG_TRAP(sk->sk_state == TCP_CLOSE); 1481 - BUG_TRAP(sock_flag(sk, SOCK_DEAD)); 1482 - 1483 - /* It cannot be in hash table! */ 1484 - BUG_TRAP(sk_unhashed(sk)); 1485 - 1486 - /* If it has not 0 inet_sk(sk)->num, it must be bound */ 1487 - BUG_TRAP(!inet_sk(sk)->num || inet_csk(sk)->icsk_bind_hash); 1488 - 1489 - sk->sk_prot->destroy(sk); 1490 - 1491 - sk_stream_kill_queues(sk); 1492 - 1493 - xfrm_sk_free_policy(sk); 1494 - 1495 - sk_refcnt_debug_release(sk); 1496 - 1497 - atomic_dec(sk->sk_prot->orphan_count); 1498 - sock_put(sk); 1499 - } 1500 - 1501 1562 void tcp_close(struct sock *sk, long timeout) 1502 1563 { 1503 1564 struct sk_buff *skb; ··· 2139 2258 } 2140 2259 2141 2260 EXPORT_SYMBOL(tcp_close); 2142 - EXPORT_SYMBOL(inet_csk_destroy_sock); 2143 2261 EXPORT_SYMBOL(tcp_disconnect); 2144 2262 EXPORT_SYMBOL(tcp_getsockopt); 2145 2263 EXPORT_SYMBOL(tcp_ioctl);
+2 -91
net/ipv4/tcp_timer.c
··· 424 424 sock_put(sk); 425 425 } 426 426 427 - void reqsk_queue_prune(struct request_sock_queue *queue, struct sock *parent, 428 - const unsigned long interval, const unsigned long timeout, 429 - const unsigned long max_rto, int max_retries) 430 - { 431 - struct inet_connection_sock *icsk = inet_csk(parent); 432 - struct listen_sock *lopt = queue->listen_opt; 433 - int thresh = max_retries; 434 - unsigned long now = jiffies; 435 - struct request_sock **reqp, *req; 436 - int i, budget; 437 - 438 - if (lopt == NULL || lopt->qlen == 0) 439 - return; 440 - 441 - /* Normally all the openreqs are young and become mature 442 - * (i.e. converted to established socket) for first timeout. 443 - * If synack was not acknowledged for 3 seconds, it means 444 - * one of the following things: synack was lost, ack was lost, 445 - * rtt is high or nobody planned to ack (i.e. synflood). 446 - * When server is a bit loaded, queue is populated with old 447 - * open requests, reducing effective size of queue. 448 - * When server is well loaded, queue size reduces to zero 449 - * after several minutes of work. It is not synflood, 450 - * it is normal operation. The solution is pruning 451 - * too old entries overriding normal timeout, when 452 - * situation becomes dangerous. 453 - * 454 - * Essentially, we reserve half of room for young 455 - * embrions; and abort old ones without pity, if old 456 - * ones are about to clog our table. 457 - */ 458 - if (lopt->qlen>>(lopt->max_qlen_log-1)) { 459 - int young = (lopt->qlen_young<<1); 460 - 461 - while (thresh > 2) { 462 - if (lopt->qlen < young) 463 - break; 464 - thresh--; 465 - young <<= 1; 466 - } 467 - } 468 - 469 - if (queue->rskq_defer_accept) 470 - max_retries = queue->rskq_defer_accept; 471 - 472 - budget = 2 * (lopt->nr_table_entries / (timeout / interval)); 473 - i = lopt->clock_hand; 474 - 475 - do { 476 - reqp=&lopt->syn_table[i]; 477 - while ((req = *reqp) != NULL) { 478 - if (time_after_eq(now, req->expires)) { 479 - if ((req->retrans < thresh || 480 - (inet_rsk(req)->acked && req->retrans < max_retries)) 481 - && !req->rsk_ops->rtx_syn_ack(parent, req, NULL)) { 482 - unsigned long timeo; 483 - 484 - if (req->retrans++ == 0) 485 - lopt->qlen_young--; 486 - timeo = min((timeout << req->retrans), max_rto); 487 - req->expires = now + timeo; 488 - reqp = &req->dl_next; 489 - continue; 490 - } 491 - 492 - /* Drop this request */ 493 - inet_csk_reqsk_queue_unlink(parent, req, reqp); 494 - reqsk_queue_removed(&icsk->icsk_accept_queue, req); 495 - reqsk_free(req); 496 - continue; 497 - } 498 - reqp = &req->dl_next; 499 - } 500 - 501 - i = (i + 1) & (lopt->nr_table_entries - 1); 502 - 503 - } while (--budget > 0); 504 - 505 - lopt->clock_hand = i; 506 - 507 - if (lopt->qlen) 508 - inet_csk_reset_keepalive_timer(parent, interval); 509 - } 510 - 511 - EXPORT_SYMBOL_GPL(reqsk_queue_prune); 512 - 513 427 /* 514 428 * Timer for listening sockets 515 429 */ 516 430 517 431 static void tcp_synack_timer(struct sock *sk) 518 432 { 519 - struct inet_connection_sock *icsk = inet_csk(sk); 520 - const int max_retries = icsk->icsk_syn_retries ? : sysctl_tcp_synack_retries; 521 - 522 - reqsk_queue_prune(&icsk->icsk_accept_queue, sk, TCP_SYNQ_INTERVAL, 523 - TCP_TIMEOUT_INIT, TCP_RTO_MAX, max_retries); 433 + inet_csk_reqsk_queue_prune(sk, TCP_SYNQ_INTERVAL, 434 + TCP_TIMEOUT_INIT, TCP_RTO_MAX); 524 435 } 525 436 526 437 void tcp_set_keepalive(struct sock *sk, int val)