Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

tcp: introduce tcp_data_queue_ofo

Split tcp_data_queue() in two parts for better readability.

tcp_data_queue_ofo() is responsible for queueing incoming skb into out
of order queue.

Change code layout so that the skb_set_owner_r() is performed only if
skb is not dropped.

This is a preliminary patch before "reduce out_of_order memory use"
following patch.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Neal Cardwell <ncardwell@google.com>
Cc: Yuchung Cheng <ycheng@google.com>
Cc: H.K. Jerry Chu <hkchu@google.com>
Cc: Tom Herbert <therbert@google.com>
Cc: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Acked-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Eric Dumazet and committed by
David S. Miller
e86b2919 de128804

+115 -99
+115 -99
net/ipv4/tcp_input.c
··· 4446 4446 return 0; 4447 4447 } 4448 4448 4449 + static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) 4450 + { 4451 + struct tcp_sock *tp = tcp_sk(sk); 4452 + struct sk_buff *skb1; 4453 + u32 seq, end_seq; 4454 + 4455 + TCP_ECN_check_ce(tp, skb); 4456 + 4457 + if (tcp_try_rmem_schedule(sk, skb->truesize)) { 4458 + /* TODO: should increment a counter */ 4459 + __kfree_skb(skb); 4460 + return; 4461 + } 4462 + 4463 + /* Disable header prediction. */ 4464 + tp->pred_flags = 0; 4465 + inet_csk_schedule_ack(sk); 4466 + 4467 + SOCK_DEBUG(sk, "out of order segment: rcv_next %X seq %X - %X\n", 4468 + tp->rcv_nxt, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq); 4469 + 4470 + skb1 = skb_peek_tail(&tp->out_of_order_queue); 4471 + if (!skb1) { 4472 + /* Initial out of order segment, build 1 SACK. */ 4473 + if (tcp_is_sack(tp)) { 4474 + tp->rx_opt.num_sacks = 1; 4475 + tp->selective_acks[0].start_seq = TCP_SKB_CB(skb)->seq; 4476 + tp->selective_acks[0].end_seq = 4477 + TCP_SKB_CB(skb)->end_seq; 4478 + } 4479 + __skb_queue_head(&tp->out_of_order_queue, skb); 4480 + goto end; 4481 + } 4482 + 4483 + seq = TCP_SKB_CB(skb)->seq; 4484 + end_seq = TCP_SKB_CB(skb)->end_seq; 4485 + 4486 + if (seq == TCP_SKB_CB(skb1)->end_seq) { 4487 + __skb_queue_after(&tp->out_of_order_queue, skb1, skb); 4488 + 4489 + if (!tp->rx_opt.num_sacks || 4490 + tp->selective_acks[0].end_seq != seq) 4491 + goto add_sack; 4492 + 4493 + /* Common case: data arrive in order after hole. */ 4494 + tp->selective_acks[0].end_seq = end_seq; 4495 + goto end; 4496 + } 4497 + 4498 + /* Find place to insert this segment. */ 4499 + while (1) { 4500 + if (!after(TCP_SKB_CB(skb1)->seq, seq)) 4501 + break; 4502 + if (skb_queue_is_first(&tp->out_of_order_queue, skb1)) { 4503 + skb1 = NULL; 4504 + break; 4505 + } 4506 + skb1 = skb_queue_prev(&tp->out_of_order_queue, skb1); 4507 + } 4508 + 4509 + /* Do skb overlap to previous one? */ 4510 + if (skb1 && before(seq, TCP_SKB_CB(skb1)->end_seq)) { 4511 + if (!after(end_seq, TCP_SKB_CB(skb1)->end_seq)) { 4512 + /* All the bits are present. Drop. */ 4513 + __kfree_skb(skb); 4514 + skb = NULL; 4515 + tcp_dsack_set(sk, seq, end_seq); 4516 + goto add_sack; 4517 + } 4518 + if (after(seq, TCP_SKB_CB(skb1)->seq)) { 4519 + /* Partial overlap. */ 4520 + tcp_dsack_set(sk, seq, 4521 + TCP_SKB_CB(skb1)->end_seq); 4522 + } else { 4523 + if (skb_queue_is_first(&tp->out_of_order_queue, 4524 + skb1)) 4525 + skb1 = NULL; 4526 + else 4527 + skb1 = skb_queue_prev( 4528 + &tp->out_of_order_queue, 4529 + skb1); 4530 + } 4531 + } 4532 + if (!skb1) 4533 + __skb_queue_head(&tp->out_of_order_queue, skb); 4534 + else 4535 + __skb_queue_after(&tp->out_of_order_queue, skb1, skb); 4536 + 4537 + /* And clean segments covered by new one as whole. */ 4538 + while (!skb_queue_is_last(&tp->out_of_order_queue, skb)) { 4539 + skb1 = skb_queue_next(&tp->out_of_order_queue, skb); 4540 + 4541 + if (!after(end_seq, TCP_SKB_CB(skb1)->seq)) 4542 + break; 4543 + if (before(end_seq, TCP_SKB_CB(skb1)->end_seq)) { 4544 + tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq, 4545 + end_seq); 4546 + break; 4547 + } 4548 + __skb_unlink(skb1, &tp->out_of_order_queue); 4549 + tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq, 4550 + TCP_SKB_CB(skb1)->end_seq); 4551 + __kfree_skb(skb1); 4552 + } 4553 + 4554 + add_sack: 4555 + if (tcp_is_sack(tp)) 4556 + tcp_sack_new_ofo_skb(sk, seq, end_seq); 4557 + end: 4558 + if (skb) 4559 + skb_set_owner_r(skb, sk); 4560 + } 4561 + 4562 + 4449 4563 static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) 4450 4564 { 4451 4565 const struct tcphdr *th = tcp_hdr(skb); ··· 4675 4561 goto queue_and_out; 4676 4562 } 4677 4563 4678 - TCP_ECN_check_ce(tp, skb); 4679 - 4680 - if (tcp_try_rmem_schedule(sk, skb->truesize)) 4681 - goto drop; 4682 - 4683 - /* Disable header prediction. */ 4684 - tp->pred_flags = 0; 4685 - inet_csk_schedule_ack(sk); 4686 - 4687 - SOCK_DEBUG(sk, "out of order segment: rcv_next %X seq %X - %X\n", 4688 - tp->rcv_nxt, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq); 4689 - 4690 - skb_set_owner_r(skb, sk); 4691 - 4692 - if (!skb_peek(&tp->out_of_order_queue)) { 4693 - /* Initial out of order segment, build 1 SACK. */ 4694 - if (tcp_is_sack(tp)) { 4695 - tp->rx_opt.num_sacks = 1; 4696 - tp->selective_acks[0].start_seq = TCP_SKB_CB(skb)->seq; 4697 - tp->selective_acks[0].end_seq = 4698 - TCP_SKB_CB(skb)->end_seq; 4699 - } 4700 - __skb_queue_head(&tp->out_of_order_queue, skb); 4701 - } else { 4702 - struct sk_buff *skb1 = skb_peek_tail(&tp->out_of_order_queue); 4703 - u32 seq = TCP_SKB_CB(skb)->seq; 4704 - u32 end_seq = TCP_SKB_CB(skb)->end_seq; 4705 - 4706 - if (seq == TCP_SKB_CB(skb1)->end_seq) { 4707 - __skb_queue_after(&tp->out_of_order_queue, skb1, skb); 4708 - 4709 - if (!tp->rx_opt.num_sacks || 4710 - tp->selective_acks[0].end_seq != seq) 4711 - goto add_sack; 4712 - 4713 - /* Common case: data arrive in order after hole. */ 4714 - tp->selective_acks[0].end_seq = end_seq; 4715 - return; 4716 - } 4717 - 4718 - /* Find place to insert this segment. */ 4719 - while (1) { 4720 - if (!after(TCP_SKB_CB(skb1)->seq, seq)) 4721 - break; 4722 - if (skb_queue_is_first(&tp->out_of_order_queue, skb1)) { 4723 - skb1 = NULL; 4724 - break; 4725 - } 4726 - skb1 = skb_queue_prev(&tp->out_of_order_queue, skb1); 4727 - } 4728 - 4729 - /* Do skb overlap to previous one? */ 4730 - if (skb1 && before(seq, TCP_SKB_CB(skb1)->end_seq)) { 4731 - if (!after(end_seq, TCP_SKB_CB(skb1)->end_seq)) { 4732 - /* All the bits are present. Drop. */ 4733 - __kfree_skb(skb); 4734 - tcp_dsack_set(sk, seq, end_seq); 4735 - goto add_sack; 4736 - } 4737 - if (after(seq, TCP_SKB_CB(skb1)->seq)) { 4738 - /* Partial overlap. */ 4739 - tcp_dsack_set(sk, seq, 4740 - TCP_SKB_CB(skb1)->end_seq); 4741 - } else { 4742 - if (skb_queue_is_first(&tp->out_of_order_queue, 4743 - skb1)) 4744 - skb1 = NULL; 4745 - else 4746 - skb1 = skb_queue_prev( 4747 - &tp->out_of_order_queue, 4748 - skb1); 4749 - } 4750 - } 4751 - if (!skb1) 4752 - __skb_queue_head(&tp->out_of_order_queue, skb); 4753 - else 4754 - __skb_queue_after(&tp->out_of_order_queue, skb1, skb); 4755 - 4756 - /* And clean segments covered by new one as whole. */ 4757 - while (!skb_queue_is_last(&tp->out_of_order_queue, skb)) { 4758 - skb1 = skb_queue_next(&tp->out_of_order_queue, skb); 4759 - 4760 - if (!after(end_seq, TCP_SKB_CB(skb1)->seq)) 4761 - break; 4762 - if (before(end_seq, TCP_SKB_CB(skb1)->end_seq)) { 4763 - tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq, 4764 - end_seq); 4765 - break; 4766 - } 4767 - __skb_unlink(skb1, &tp->out_of_order_queue); 4768 - tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq, 4769 - TCP_SKB_CB(skb1)->end_seq); 4770 - __kfree_skb(skb1); 4771 - } 4772 - 4773 - add_sack: 4774 - if (tcp_is_sack(tp)) 4775 - tcp_sack_new_ofo_skb(sk, seq, end_seq); 4776 - } 4564 + tcp_data_queue_ofo(sk, skb); 4777 4565 } 4778 4566 4779 4567 static struct sk_buff *tcp_collapse_one(struct sock *sk, struct sk_buff *skb,