Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

sctp: Add GSO support

SCTP has this pecualiarity that its packets cannot be just segmented to
(P)MTU. Its chunks must be contained in IP segments, padding respected.
So we can't just generate a big skb, set gso_size to the fragmentation
point and deliver it to IP layer.

This patch takes a different approach. SCTP will now build a skb as it
would be if it was received using GRO. That is, there will be a cover
skb with protocol headers and children ones containing the actual
segments, already segmented to a way that respects SCTP RFCs.

With that, we can tell skb_segment() to just split based on frag_list,
trusting its sizes are already in accordance.

This way SCTP can benefit from GSO and instead of passing several
packets through the stack, it can pass a single large packet.

v2:
- Added support for receiving GSO frames, as requested by Dave Miller.
- Clear skb->cb if packet is GSO (otherwise it's not used by SCTP)
- Added heuristics similar to what we have in TCP for not generating
single GSO packets that fills cwnd.
v3:
- consider sctphdr size in skb_gso_transport_seglen()
- rebased due to 5c7cdf339af5 ("gso: Remove arbitrary checks for
unsupported GSO")

Signed-off-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Tested-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Marcelo Ricardo Leitner and committed by
David S. Miller
90017acc 3acb50c1

+428 -125
+5 -2
include/linux/netdev_features.h
··· 53 53 * headers in software. 54 54 */ 55 55 NETIF_F_GSO_TUNNEL_REMCSUM_BIT, /* ... TUNNEL with TSO & REMCSUM */ 56 + NETIF_F_GSO_SCTP_BIT, /* ... SCTP fragmentation */ 56 57 /**/NETIF_F_GSO_LAST = /* last bit, see GSO_MASK */ 57 - NETIF_F_GSO_TUNNEL_REMCSUM_BIT, 58 + NETIF_F_GSO_SCTP_BIT, 58 59 59 60 NETIF_F_FCOE_CRC_BIT, /* FCoE CRC32 */ 60 61 NETIF_F_SCTP_CRC_BIT, /* SCTP checksum offload */ ··· 129 128 #define NETIF_F_TSO_MANGLEID __NETIF_F(TSO_MANGLEID) 130 129 #define NETIF_F_GSO_PARTIAL __NETIF_F(GSO_PARTIAL) 131 130 #define NETIF_F_GSO_TUNNEL_REMCSUM __NETIF_F(GSO_TUNNEL_REMCSUM) 131 + #define NETIF_F_GSO_SCTP __NETIF_F(GSO_SCTP) 132 132 #define NETIF_F_HW_VLAN_STAG_FILTER __NETIF_F(HW_VLAN_STAG_FILTER) 133 133 #define NETIF_F_HW_VLAN_STAG_RX __NETIF_F(HW_VLAN_STAG_RX) 134 134 #define NETIF_F_HW_VLAN_STAG_TX __NETIF_F(HW_VLAN_STAG_TX) ··· 168 166 NETIF_F_FSO) 169 167 170 168 /* List of features with software fallbacks. */ 171 - #define NETIF_F_GSO_SOFTWARE (NETIF_F_ALL_TSO | NETIF_F_UFO) 169 + #define NETIF_F_GSO_SOFTWARE (NETIF_F_ALL_TSO | NETIF_F_UFO | \ 170 + NETIF_F_GSO_SCTP) 172 171 173 172 /* 174 173 * If one device supports one of these features, then enable them
+1
include/linux/netdevice.h
··· 4012 4012 BUILD_BUG_ON(SKB_GSO_UDP_TUNNEL_CSUM != (NETIF_F_GSO_UDP_TUNNEL_CSUM >> NETIF_F_GSO_SHIFT)); 4013 4013 BUILD_BUG_ON(SKB_GSO_PARTIAL != (NETIF_F_GSO_PARTIAL >> NETIF_F_GSO_SHIFT)); 4014 4014 BUILD_BUG_ON(SKB_GSO_TUNNEL_REMCSUM != (NETIF_F_GSO_TUNNEL_REMCSUM >> NETIF_F_GSO_SHIFT)); 4015 + BUILD_BUG_ON(SKB_GSO_SCTP != (NETIF_F_GSO_SCTP >> NETIF_F_GSO_SHIFT)); 4015 4016 4016 4017 return (features & feature) == feature; 4017 4018 }
+2
include/linux/skbuff.h
··· 487 487 SKB_GSO_PARTIAL = 1 << 13, 488 488 489 489 SKB_GSO_TUNNEL_REMCSUM = 1 << 14, 490 + 491 + SKB_GSO_SCTP = 1 << 15, 490 492 }; 491 493 492 494 #if BITS_PER_LONG > 32
+4
include/net/sctp/sctp.h
··· 186 186 int sctp_remaddr_proc_init(struct net *net); 187 187 void sctp_remaddr_proc_exit(struct net *net); 188 188 189 + /* 190 + * sctp/offload.c 191 + */ 192 + int sctp_offload_init(void); 189 193 190 194 /* 191 195 * Module global variables
+5
include/net/sctp/structs.h
··· 566 566 /* This points to the sk_buff containing the actual data. */ 567 567 struct sk_buff *skb; 568 568 569 + /* In case of GSO packets, this will store the head one */ 570 + struct sk_buff *head_skb; 571 + 569 572 /* These are the SCTP headers by reverse order in a packet. 570 573 * Note that some of these may happen more than once. In that 571 574 * case, we point at the "current" one, whatever that means ··· 699 696 size_t overhead; 700 697 /* This is the total size of all chunks INCLUDING padding. */ 701 698 size_t size; 699 + /* This is the maximum size this packet may have */ 700 + size_t max_size; 702 701 703 702 /* The packet is destined for this transport address. 704 703 * The function we finally use to pass down to the next lower
+1
net/core/ethtool.c
··· 89 89 [NETIF_F_GSO_UDP_TUNNEL_BIT] = "tx-udp_tnl-segmentation", 90 90 [NETIF_F_GSO_UDP_TUNNEL_CSUM_BIT] = "tx-udp_tnl-csum-segmentation", 91 91 [NETIF_F_GSO_PARTIAL_BIT] = "tx-gso-partial", 92 + [NETIF_F_GSO_SCTP_BIT] = "tx-sctp-segmentation", 92 93 93 94 [NETIF_F_FCOE_CRC_BIT] = "tx-checksum-fcoe-crc", 94 95 [NETIF_F_SCTP_CRC_BIT] = "tx-checksum-sctp",
+3
net/core/skbuff.c
··· 49 49 #include <linux/slab.h> 50 50 #include <linux/tcp.h> 51 51 #include <linux/udp.h> 52 + #include <linux/sctp.h> 52 53 #include <linux/netdevice.h> 53 54 #ifdef CONFIG_NET_CLS_ACT 54 55 #include <net/pkt_sched.h> ··· 4384 4383 thlen += inner_tcp_hdrlen(skb); 4385 4384 } else if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) { 4386 4385 thlen = tcp_hdrlen(skb); 4386 + } else if (unlikely(shinfo->gso_type & SKB_GSO_SCTP)) { 4387 + thlen = sizeof(struct sctphdr); 4387 4388 } 4388 4389 /* UFO sets gso_size to the size of the fragmentation 4389 4390 * payload, i.e. the size of the L4 (UDP) header is already
+2 -1
net/sctp/Makefile
··· 11 11 transport.o chunk.o sm_make_chunk.o ulpevent.o \ 12 12 inqueue.o outqueue.o ulpqueue.o \ 13 13 tsnmap.o bind_addr.o socket.o primitive.o \ 14 - output.o input.o debug.o ssnmap.o auth.o 14 + output.o input.o debug.o ssnmap.o auth.o \ 15 + offload.o 15 16 16 17 sctp_probe-y := probe.o 17 18
+11 -1
net/sctp/input.c
··· 139 139 skb->csum_valid = 0; /* Previous value not applicable */ 140 140 if (skb_csum_unnecessary(skb)) 141 141 __skb_decr_checksum_unnecessary(skb); 142 - else if (!sctp_checksum_disable && sctp_rcv_checksum(net, skb) < 0) 142 + else if (!sctp_checksum_disable && 143 + !(skb_shinfo(skb)->gso_type & SKB_GSO_SCTP) && 144 + sctp_rcv_checksum(net, skb) < 0) 143 145 goto discard_it; 144 146 skb->csum_valid = 1; 145 147 ··· 1176 1174 struct sctp_transport **transportp) 1177 1175 { 1178 1176 sctp_chunkhdr_t *ch; 1177 + 1178 + /* We do not allow GSO frames here as we need to linearize and 1179 + * then cannot guarantee frame boundaries. This shouldn't be an 1180 + * issue as packets hitting this are mostly INIT or INIT-ACK and 1181 + * those cannot be on GSO-style anyway. 1182 + */ 1183 + if ((skb_shinfo(skb)->gso_type & SKB_GSO_SCTP) == SKB_GSO_SCTP) 1184 + return NULL; 1179 1185 1180 1186 if (skb_linearize(skb)) 1181 1187 return NULL;
+44 -9
net/sctp/inqueue.c
··· 138 138 if (chunk->singleton || 139 139 chunk->end_of_packet || 140 140 chunk->pdiscard) { 141 + if (chunk->head_skb == chunk->skb) { 142 + chunk->skb = skb_shinfo(chunk->skb)->frag_list; 143 + goto new_skb; 144 + } 145 + if (chunk->skb->next) { 146 + chunk->skb = chunk->skb->next; 147 + goto new_skb; 148 + } 149 + 150 + if (chunk->head_skb) 151 + chunk->skb = chunk->head_skb; 141 152 sctp_chunk_free(chunk); 142 153 chunk = queue->in_progress = NULL; 143 154 } else { ··· 166 155 167 156 next_chunk: 168 157 /* Is the queue empty? */ 169 - if (list_empty(&queue->in_chunk_list)) 158 + entry = sctp_list_dequeue(&queue->in_chunk_list); 159 + if (!entry) 170 160 return NULL; 171 161 172 - entry = queue->in_chunk_list.next; 173 162 chunk = list_entry(entry, struct sctp_chunk, list); 174 - list_del_init(entry); 175 163 176 164 /* Linearize if it's not GSO */ 177 - if (skb_is_nonlinear(chunk->skb)) { 165 + if ((skb_shinfo(chunk->skb)->gso_type & SKB_GSO_SCTP) != SKB_GSO_SCTP && 166 + skb_is_nonlinear(chunk->skb)) { 178 167 if (skb_linearize(chunk->skb)) { 179 168 __SCTP_INC_STATS(dev_net(chunk->skb->dev), SCTP_MIB_IN_PKT_DISCARDS); 180 169 sctp_chunk_free(chunk); ··· 185 174 chunk->sctp_hdr = sctp_hdr(chunk->skb); 186 175 } 187 176 188 - queue->in_progress = chunk; 177 + if ((skb_shinfo(chunk->skb)->gso_type & SKB_GSO_SCTP) == SKB_GSO_SCTP) { 178 + /* GSO-marked skbs but without frags, handle 179 + * them normally 180 + */ 181 + if (skb_shinfo(chunk->skb)->frag_list) 182 + chunk->head_skb = chunk->skb; 189 183 190 - /* This is the first chunk in the packet. */ 191 - chunk->singleton = 1; 192 - ch = (sctp_chunkhdr_t *) chunk->skb->data; 193 - chunk->data_accepted = 0; 184 + /* skbs with "cover letter" */ 185 + if (chunk->head_skb && chunk->skb->data_len == chunk->skb->len) 186 + chunk->skb = skb_shinfo(chunk->skb)->frag_list; 187 + 188 + if (WARN_ON(!chunk->skb)) { 189 + __SCTP_INC_STATS(dev_net(chunk->skb->dev), SCTP_MIB_IN_PKT_DISCARDS); 190 + sctp_chunk_free(chunk); 191 + goto next_chunk; 192 + } 193 + } 194 194 195 195 if (chunk->asoc) 196 196 sock_rps_save_rxhash(chunk->asoc->base.sk, chunk->skb); 197 + 198 + queue->in_progress = chunk; 199 + 200 + new_skb: 201 + /* This is the first chunk in the packet. */ 202 + ch = (sctp_chunkhdr_t *) chunk->skb->data; 203 + chunk->singleton = 1; 204 + chunk->data_accepted = 0; 205 + chunk->pdiscard = 0; 206 + chunk->auth = 0; 207 + chunk->has_asconf = 0; 208 + chunk->end_of_packet = 0; 209 + chunk->ecn_ce_done = 0; 197 210 } 198 211 199 212 chunk->chunk_hdr = ch;
+98
net/sctp/offload.c
··· 1 + /* 2 + * sctp_offload - GRO/GSO Offloading for SCTP 3 + * 4 + * Copyright (C) 2015, Marcelo Ricardo Leitner <marcelo.leitner@gmail.com> 5 + * 6 + * This program is free software; you can redistribute it and/or modify 7 + * it under the terms of the GNU General Public License as published by 8 + * the Free Software Foundation; either version 2 of the License, or 9 + * (at your option) any later version. 10 + * 11 + * This program is distributed in the hope that it will be useful, 12 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 + * GNU General Public License for more details. 15 + */ 16 + 17 + #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 18 + 19 + #include <linux/kernel.h> 20 + #include <linux/kprobes.h> 21 + #include <linux/socket.h> 22 + #include <linux/sctp.h> 23 + #include <linux/proc_fs.h> 24 + #include <linux/vmalloc.h> 25 + #include <linux/module.h> 26 + #include <linux/kfifo.h> 27 + #include <linux/time.h> 28 + #include <net/net_namespace.h> 29 + 30 + #include <linux/skbuff.h> 31 + #include <net/sctp/sctp.h> 32 + #include <net/sctp/checksum.h> 33 + #include <net/protocol.h> 34 + 35 + static __le32 sctp_gso_make_checksum(struct sk_buff *skb) 36 + { 37 + skb->ip_summed = CHECKSUM_NONE; 38 + return sctp_compute_cksum(skb, skb_transport_offset(skb)); 39 + } 40 + 41 + static struct sk_buff *sctp_gso_segment(struct sk_buff *skb, 42 + netdev_features_t features) 43 + { 44 + struct sk_buff *segs = ERR_PTR(-EINVAL); 45 + struct sctphdr *sh; 46 + 47 + sh = sctp_hdr(skb); 48 + if (!pskb_may_pull(skb, sizeof(*sh))) 49 + goto out; 50 + 51 + __skb_pull(skb, sizeof(*sh)); 52 + 53 + if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) { 54 + /* Packet is from an untrusted source, reset gso_segs. */ 55 + struct skb_shared_info *pinfo = skb_shinfo(skb); 56 + struct sk_buff *frag_iter; 57 + 58 + pinfo->gso_segs = 0; 59 + if (skb->len != skb->data_len) { 60 + /* Means we have chunks in here too */ 61 + pinfo->gso_segs++; 62 + } 63 + 64 + skb_walk_frags(skb, frag_iter) 65 + pinfo->gso_segs++; 66 + 67 + segs = NULL; 68 + goto out; 69 + } 70 + 71 + segs = skb_segment(skb, features | NETIF_F_HW_CSUM); 72 + if (IS_ERR(segs)) 73 + goto out; 74 + 75 + /* All that is left is update SCTP CRC if necessary */ 76 + if (!(features & NETIF_F_SCTP_CRC)) { 77 + for (skb = segs; skb; skb = skb->next) { 78 + if (skb->ip_summed == CHECKSUM_PARTIAL) { 79 + sh = sctp_hdr(skb); 80 + sh->checksum = sctp_gso_make_checksum(skb); 81 + } 82 + } 83 + } 84 + 85 + out: 86 + return segs; 87 + } 88 + 89 + static const struct net_offload sctp_offload = { 90 + .callbacks = { 91 + .gso_segment = sctp_gso_segment, 92 + }, 93 + }; 94 + 95 + int __init sctp_offload_init(void) 96 + { 97 + return inet_add_offload(&sctp_offload, IPPROTO_SCTP); 98 + }
+247 -112
net/sctp/output.c
··· 84 84 struct sctp_packet *sctp_packet_config(struct sctp_packet *packet, 85 85 __u32 vtag, int ecn_capable) 86 86 { 87 - struct sctp_chunk *chunk = NULL; 87 + struct sctp_transport *tp = packet->transport; 88 + struct sctp_association *asoc = tp->asoc; 88 89 89 90 pr_debug("%s: packet:%p vtag:0x%x\n", __func__, packet, vtag); 90 91 91 92 packet->vtag = vtag; 92 93 94 + if (asoc && tp->dst) { 95 + struct sock *sk = asoc->base.sk; 96 + 97 + rcu_read_lock(); 98 + if (__sk_dst_get(sk) != tp->dst) { 99 + dst_hold(tp->dst); 100 + sk_setup_caps(sk, tp->dst); 101 + } 102 + 103 + if (sk_can_gso(sk)) { 104 + struct net_device *dev = tp->dst->dev; 105 + 106 + packet->max_size = dev->gso_max_size; 107 + } else { 108 + packet->max_size = asoc->pathmtu; 109 + } 110 + rcu_read_unlock(); 111 + 112 + } else { 113 + packet->max_size = tp->pathmtu; 114 + } 115 + 93 116 if (ecn_capable && sctp_packet_empty(packet)) { 94 - chunk = sctp_get_ecne_prepend(packet->transport->asoc); 117 + struct sctp_chunk *chunk; 95 118 96 119 /* If there a is a prepend chunk stick it on the list before 97 120 * any other chunks get appended. 98 121 */ 122 + chunk = sctp_get_ecne_prepend(asoc); 99 123 if (chunk) 100 124 sctp_packet_append_chunk(packet, chunk); 101 125 } ··· 405 381 struct sctp_transport *tp = packet->transport; 406 382 struct sctp_association *asoc = tp->asoc; 407 383 struct sctphdr *sh; 408 - struct sk_buff *nskb; 384 + struct sk_buff *nskb = NULL, *head = NULL; 409 385 struct sctp_chunk *chunk, *tmp; 410 386 struct sock *sk; 411 387 int err = 0; 412 388 int padding; /* How much padding do we need? */ 389 + int pkt_size; 413 390 __u8 has_data = 0; 391 + int gso = 0; 392 + int pktcount = 0; 414 393 struct dst_entry *dst; 415 394 unsigned char *auth = NULL; /* pointer to auth in skb data */ 416 395 ··· 427 400 chunk = list_entry(packet->chunk_list.next, struct sctp_chunk, list); 428 401 sk = chunk->skb->sk; 429 402 430 - /* Allocate the new skb. */ 431 - nskb = alloc_skb(packet->size + MAX_HEADER, gfp); 432 - if (!nskb) 403 + /* Allocate the head skb, or main one if not in GSO */ 404 + if (packet->size > tp->pathmtu && !packet->ipfragok) { 405 + if (sk_can_gso(sk)) { 406 + gso = 1; 407 + pkt_size = packet->overhead; 408 + } else { 409 + /* If this happens, we trash this packet and try 410 + * to build a new one, hopefully correct this 411 + * time. Application may notice this error. 412 + */ 413 + pr_err_once("Trying to GSO but underlying device doesn't support it."); 414 + goto nomem; 415 + } 416 + } else { 417 + pkt_size = packet->size; 418 + } 419 + head = alloc_skb(pkt_size + MAX_HEADER, gfp); 420 + if (!head) 433 421 goto nomem; 422 + if (gso) { 423 + NAPI_GRO_CB(head)->last = head; 424 + skb_shinfo(head)->gso_type = sk->sk_gso_type; 425 + } 434 426 435 427 /* Make sure the outbound skb has enough header room reserved. */ 436 - skb_reserve(nskb, packet->overhead + MAX_HEADER); 428 + skb_reserve(head, packet->overhead + MAX_HEADER); 437 429 438 430 /* Set the owning socket so that we know where to get the 439 431 * destination IP address. 440 432 */ 441 - sctp_packet_set_owner_w(nskb, sk); 433 + sctp_packet_set_owner_w(head, sk); 442 434 443 435 if (!sctp_transport_dst_check(tp)) { 444 436 sctp_transport_route(tp, NULL, sctp_sk(sk)); ··· 468 422 dst = dst_clone(tp->dst); 469 423 if (!dst) 470 424 goto no_route; 471 - skb_dst_set(nskb, dst); 425 + skb_dst_set(head, dst); 472 426 473 427 /* Build the SCTP header. */ 474 - sh = (struct sctphdr *)skb_push(nskb, sizeof(struct sctphdr)); 475 - skb_reset_transport_header(nskb); 428 + sh = (struct sctphdr *)skb_push(head, sizeof(struct sctphdr)); 429 + skb_reset_transport_header(head); 476 430 sh->source = htons(packet->source_port); 477 431 sh->dest = htons(packet->destination_port); 478 432 ··· 487 441 sh->vtag = htonl(packet->vtag); 488 442 sh->checksum = 0; 489 443 490 - /** 491 - * 6.10 Bundling 492 - * 493 - * An endpoint bundles chunks by simply including multiple 494 - * chunks in one outbound SCTP packet. ... 495 - */ 496 - 497 - /** 498 - * 3.2 Chunk Field Descriptions 499 - * 500 - * The total length of a chunk (including Type, Length and 501 - * Value fields) MUST be a multiple of 4 bytes. If the length 502 - * of the chunk is not a multiple of 4 bytes, the sender MUST 503 - * pad the chunk with all zero bytes and this padding is not 504 - * included in the chunk length field. The sender should 505 - * never pad with more than 3 bytes. 506 - * 507 - * [This whole comment explains WORD_ROUND() below.] 508 - */ 509 - 510 444 pr_debug("***sctp_transmit_packet***\n"); 511 445 512 - list_for_each_entry_safe(chunk, tmp, &packet->chunk_list, list) { 513 - list_del_init(&chunk->list); 514 - if (sctp_chunk_is_data(chunk)) { 515 - /* 6.3.1 C4) When data is in flight and when allowed 516 - * by rule C5, a new RTT measurement MUST be made each 517 - * round trip. Furthermore, new RTT measurements 518 - * SHOULD be made no more than once per round-trip 519 - * for a given destination transport address. 520 - */ 446 + do { 447 + /* Set up convenience variables... */ 448 + chunk = list_entry(packet->chunk_list.next, struct sctp_chunk, list); 449 + pktcount++; 521 450 522 - if (!chunk->resent && !tp->rto_pending) { 523 - chunk->rtt_in_progress = 1; 524 - tp->rto_pending = 1; 451 + /* Calculate packet size, so it fits in PMTU. Leave 452 + * other chunks for the next packets. 453 + */ 454 + if (gso) { 455 + pkt_size = packet->overhead; 456 + list_for_each_entry(chunk, &packet->chunk_list, list) { 457 + int padded = WORD_ROUND(chunk->skb->len); 458 + 459 + if (pkt_size + padded > tp->pathmtu) 460 + break; 461 + pkt_size += padded; 525 462 } 526 463 527 - has_data = 1; 464 + /* Allocate a new skb. */ 465 + nskb = alloc_skb(pkt_size + MAX_HEADER, gfp); 466 + if (!nskb) 467 + goto nomem; 468 + 469 + /* Make sure the outbound skb has enough header 470 + * room reserved. 471 + */ 472 + skb_reserve(nskb, packet->overhead + MAX_HEADER); 473 + } else { 474 + nskb = head; 528 475 } 529 476 530 - padding = WORD_ROUND(chunk->skb->len) - chunk->skb->len; 531 - if (padding) 532 - memset(skb_put(chunk->skb, padding), 0, padding); 533 - 534 - /* if this is the auth chunk that we are adding, 535 - * store pointer where it will be added and put 536 - * the auth into the packet. 477 + /** 478 + * 3.2 Chunk Field Descriptions 479 + * 480 + * The total length of a chunk (including Type, Length and 481 + * Value fields) MUST be a multiple of 4 bytes. If the length 482 + * of the chunk is not a multiple of 4 bytes, the sender MUST 483 + * pad the chunk with all zero bytes and this padding is not 484 + * included in the chunk length field. The sender should 485 + * never pad with more than 3 bytes. 486 + * 487 + * [This whole comment explains WORD_ROUND() below.] 537 488 */ 538 - if (chunk == packet->auth) 539 - auth = skb_tail_pointer(nskb); 540 489 541 - memcpy(skb_put(nskb, chunk->skb->len), 490 + pkt_size -= packet->overhead; 491 + list_for_each_entry_safe(chunk, tmp, &packet->chunk_list, list) { 492 + list_del_init(&chunk->list); 493 + if (sctp_chunk_is_data(chunk)) { 494 + /* 6.3.1 C4) When data is in flight and when allowed 495 + * by rule C5, a new RTT measurement MUST be made each 496 + * round trip. Furthermore, new RTT measurements 497 + * SHOULD be made no more than once per round-trip 498 + * for a given destination transport address. 499 + */ 500 + 501 + if (!chunk->resent && !tp->rto_pending) { 502 + chunk->rtt_in_progress = 1; 503 + tp->rto_pending = 1; 504 + } 505 + 506 + has_data = 1; 507 + } 508 + 509 + padding = WORD_ROUND(chunk->skb->len) - chunk->skb->len; 510 + if (padding) 511 + memset(skb_put(chunk->skb, padding), 0, padding); 512 + 513 + /* if this is the auth chunk that we are adding, 514 + * store pointer where it will be added and put 515 + * the auth into the packet. 516 + */ 517 + if (chunk == packet->auth) 518 + auth = skb_tail_pointer(nskb); 519 + 520 + memcpy(skb_put(nskb, chunk->skb->len), 542 521 chunk->skb->data, chunk->skb->len); 543 522 544 - pr_debug("*** Chunk:%p[%s] %s 0x%x, length:%d, chunk->skb->len:%d, " 545 - "rtt_in_progress:%d\n", chunk, 546 - sctp_cname(SCTP_ST_CHUNK(chunk->chunk_hdr->type)), 547 - chunk->has_tsn ? "TSN" : "No TSN", 548 - chunk->has_tsn ? ntohl(chunk->subh.data_hdr->tsn) : 0, 549 - ntohs(chunk->chunk_hdr->length), chunk->skb->len, 550 - chunk->rtt_in_progress); 523 + pr_debug("*** Chunk:%p[%s] %s 0x%x, length:%d, chunk->skb->len:%d, rtt_in_progress:%d\n", 524 + chunk, 525 + sctp_cname(SCTP_ST_CHUNK(chunk->chunk_hdr->type)), 526 + chunk->has_tsn ? "TSN" : "No TSN", 527 + chunk->has_tsn ? ntohl(chunk->subh.data_hdr->tsn) : 0, 528 + ntohs(chunk->chunk_hdr->length), chunk->skb->len, 529 + chunk->rtt_in_progress); 551 530 552 - /* 553 - * If this is a control chunk, this is our last 554 - * reference. Free data chunks after they've been 555 - * acknowledged or have failed. 531 + /* If this is a control chunk, this is our last 532 + * reference. Free data chunks after they've been 533 + * acknowledged or have failed. 534 + * Re-queue auth chunks if needed. 535 + */ 536 + pkt_size -= WORD_ROUND(chunk->skb->len); 537 + 538 + if (chunk == packet->auth && !list_empty(&packet->chunk_list)) 539 + list_add(&chunk->list, &packet->chunk_list); 540 + else if (!sctp_chunk_is_data(chunk)) 541 + sctp_chunk_free(chunk); 542 + 543 + if (!pkt_size) 544 + break; 545 + } 546 + 547 + /* SCTP-AUTH, Section 6.2 548 + * The sender MUST calculate the MAC as described in RFC2104 [2] 549 + * using the hash function H as described by the MAC Identifier and 550 + * the shared association key K based on the endpoint pair shared key 551 + * described by the shared key identifier. The 'data' used for the 552 + * computation of the AUTH-chunk is given by the AUTH chunk with its 553 + * HMAC field set to zero (as shown in Figure 6) followed by all 554 + * chunks that are placed after the AUTH chunk in the SCTP packet. 556 555 */ 557 - if (!sctp_chunk_is_data(chunk)) 558 - sctp_chunk_free(chunk); 559 - } 556 + if (auth) 557 + sctp_auth_calculate_hmac(asoc, nskb, 558 + (struct sctp_auth_chunk *)auth, 559 + gfp); 560 560 561 - /* SCTP-AUTH, Section 6.2 562 - * The sender MUST calculate the MAC as described in RFC2104 [2] 563 - * using the hash function H as described by the MAC Identifier and 564 - * the shared association key K based on the endpoint pair shared key 565 - * described by the shared key identifier. The 'data' used for the 566 - * computation of the AUTH-chunk is given by the AUTH chunk with its 567 - * HMAC field set to zero (as shown in Figure 6) followed by all 568 - * chunks that are placed after the AUTH chunk in the SCTP packet. 569 - */ 570 - if (auth) 571 - sctp_auth_calculate_hmac(asoc, nskb, 572 - (struct sctp_auth_chunk *)auth, 573 - gfp); 561 + if (!gso) 562 + break; 563 + 564 + if (skb_gro_receive(&head, nskb)) 565 + goto nomem; 566 + nskb = NULL; 567 + if (WARN_ON_ONCE(skb_shinfo(head)->gso_segs >= 568 + sk->sk_gso_max_segs)) 569 + goto nomem; 570 + } while (!list_empty(&packet->chunk_list)); 574 571 575 572 /* 2) Calculate the Adler-32 checksum of the whole packet, 576 573 * including the SCTP common header and all the ··· 621 532 * 622 533 * Note: Adler-32 is no longer applicable, as has been replaced 623 534 * by CRC32-C as described in <draft-ietf-tsvwg-sctpcsum-02.txt>. 535 + * 536 + * If it's a GSO packet, it's postponed to sctp_skb_segment. 624 537 */ 625 - if (!sctp_checksum_disable) { 626 - if (!(dst->dev->features & NETIF_F_SCTP_CRC) || 627 - (dst_xfrm(dst) != NULL) || packet->ipfragok) { 628 - sh->checksum = sctp_compute_cksum(nskb, 0); 538 + if (!sctp_checksum_disable || gso) { 539 + if (!gso && (!(dst->dev->features & NETIF_F_SCTP_CRC) || 540 + dst_xfrm(dst) || packet->ipfragok)) { 541 + sh->checksum = sctp_compute_cksum(head, 0); 629 542 } else { 630 543 /* no need to seed pseudo checksum for SCTP */ 631 - nskb->ip_summed = CHECKSUM_PARTIAL; 632 - nskb->csum_start = skb_transport_header(nskb) - nskb->head; 633 - nskb->csum_offset = offsetof(struct sctphdr, checksum); 544 + head->ip_summed = CHECKSUM_PARTIAL; 545 + head->csum_start = skb_transport_header(head) - head->head; 546 + head->csum_offset = offsetof(struct sctphdr, checksum); 634 547 } 635 548 } 636 549 ··· 648 557 * Note: The works for IPv6 layer checks this bit too later 649 558 * in transmission. See IP6_ECN_flow_xmit(). 650 559 */ 651 - tp->af_specific->ecn_capable(nskb->sk); 560 + tp->af_specific->ecn_capable(sk); 652 561 653 562 /* Set up the IP options. */ 654 563 /* BUG: not implemented ··· 657 566 658 567 /* Dump that on IP! */ 659 568 if (asoc) { 660 - asoc->stats.opackets++; 569 + asoc->stats.opackets += pktcount; 661 570 if (asoc->peer.last_sent_to != tp) 662 571 /* Considering the multiple CPU scenario, this is a 663 572 * "correcter" place for last_sent_to. --xguo ··· 680 589 } 681 590 } 682 591 683 - pr_debug("***sctp_transmit_packet*** skb->len:%d\n", nskb->len); 592 + pr_debug("***sctp_transmit_packet*** skb->len:%d\n", head->len); 684 593 685 - nskb->ignore_df = packet->ipfragok; 686 - tp->af_specific->sctp_xmit(nskb, tp); 594 + if (gso) { 595 + /* Cleanup our debris for IP stacks */ 596 + memset(head->cb, 0, max(sizeof(struct inet_skb_parm), 597 + sizeof(struct inet6_skb_parm))); 598 + 599 + skb_shinfo(head)->gso_segs = pktcount; 600 + skb_shinfo(head)->gso_size = GSO_BY_FRAGS; 601 + 602 + /* We have to refresh this in case we are xmiting to 603 + * more than one transport at a time 604 + */ 605 + rcu_read_lock(); 606 + if (__sk_dst_get(sk) != tp->dst) { 607 + dst_hold(tp->dst); 608 + sk_setup_caps(sk, tp->dst); 609 + } 610 + rcu_read_unlock(); 611 + } 612 + head->ignore_df = packet->ipfragok; 613 + tp->af_specific->sctp_xmit(head, tp); 687 614 688 615 out: 689 616 sctp_packet_reset(packet); 690 617 return err; 691 618 no_route: 692 - kfree_skb(nskb); 619 + kfree_skb(head); 620 + if (nskb != head) 621 + kfree_skb(nskb); 693 622 694 623 if (asoc) 695 624 IP_INC_STATS(sock_net(asoc->base.sk), IPSTATS_MIB_OUTNOROUTES); ··· 862 751 struct sctp_chunk *chunk, 863 752 u16 chunk_len) 864 753 { 865 - size_t psize; 866 - size_t pmtu; 867 - int too_big; 754 + size_t psize, pmtu; 868 755 sctp_xmit_t retval = SCTP_XMIT_OK; 869 756 870 757 psize = packet->size; 871 - pmtu = ((packet->transport->asoc) ? 872 - (packet->transport->asoc->pathmtu) : 873 - (packet->transport->pathmtu)); 874 - 875 - too_big = (psize + chunk_len > pmtu); 758 + if (packet->transport->asoc) 759 + pmtu = packet->transport->asoc->pathmtu; 760 + else 761 + pmtu = packet->transport->pathmtu; 876 762 877 763 /* Decide if we need to fragment or resubmit later. */ 878 - if (too_big) { 879 - /* It's OK to fragmet at IP level if any one of the following 764 + if (psize + chunk_len > pmtu) { 765 + /* It's OK to fragment at IP level if any one of the following 880 766 * is true: 881 - * 1. The packet is empty (meaning this chunk is greater 882 - * the MTU) 883 - * 2. The chunk we are adding is a control chunk 884 - * 3. The packet doesn't have any data in it yet and data 885 - * requires authentication. 767 + * 1. The packet is empty (meaning this chunk is greater 768 + * the MTU) 769 + * 2. The packet doesn't have any data in it yet and data 770 + * requires authentication. 886 771 */ 887 - if (sctp_packet_empty(packet) || !sctp_chunk_is_data(chunk) || 772 + if (sctp_packet_empty(packet) || 888 773 (!packet->has_data && chunk->auth)) { 889 774 /* We no longer do re-fragmentation. 890 775 * Just fragment at the IP layer, if we 891 776 * actually hit this condition 892 777 */ 893 778 packet->ipfragok = 1; 894 - } else { 895 - retval = SCTP_XMIT_PMTU_FULL; 779 + goto out; 896 780 } 781 + 782 + /* It is also okay to fragment if the chunk we are 783 + * adding is a control chunk, but only if current packet 784 + * is not a GSO one otherwise it causes fragmentation of 785 + * a large frame. So in this case we allow the 786 + * fragmentation by forcing it to be in a new packet. 787 + */ 788 + if (!sctp_chunk_is_data(chunk) && packet->has_data) 789 + retval = SCTP_XMIT_PMTU_FULL; 790 + 791 + if (psize + chunk_len > packet->max_size) 792 + /* Hit GSO/PMTU limit, gotta flush */ 793 + retval = SCTP_XMIT_PMTU_FULL; 794 + 795 + if (!packet->transport->burst_limited && 796 + psize + chunk_len > (packet->transport->cwnd >> 1)) 797 + /* Do not allow a single GSO packet to use more 798 + * than half of cwnd. 799 + */ 800 + retval = SCTP_XMIT_PMTU_FULL; 801 + 802 + if (packet->transport->burst_limited && 803 + psize + chunk_len > (packet->transport->burst_limited >> 1)) 804 + /* Do not allow a single GSO packet to use more 805 + * than half of original cwnd. 806 + */ 807 + retval = SCTP_XMIT_PMTU_FULL; 808 + /* Otherwise it will fit in the GSO packet */ 897 809 } 898 810 811 + out: 899 812 return retval; 900 813 }
+3
net/sctp/protocol.c
··· 1516 1516 if (status) 1517 1517 goto err_v6_add_protocol; 1518 1518 1519 + if (sctp_offload_init() < 0) 1520 + pr_crit("%s: Cannot add SCTP protocol offload\n", __func__); 1521 + 1519 1522 out: 1520 1523 return status; 1521 1524 err_v6_add_protocol:
+2
net/sctp/socket.c
··· 4003 4003 return -ESOCKTNOSUPPORT; 4004 4004 } 4005 4005 4006 + sk->sk_gso_type = SKB_GSO_SCTP; 4007 + 4006 4008 /* Initialize default send parameters. These parameters can be 4007 4009 * modified with the SCTP_DEFAULT_SEND_PARAM socket option. 4008 4010 */