Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at v2.6.36 600 lines 15 kB view raw
1/* 2 * linux/net/ipv4/inet_lro.c 3 * 4 * Large Receive Offload (ipv4 / tcp) 5 * 6 * (C) Copyright IBM Corp. 2007 7 * 8 * Authors: 9 * Jan-Bernd Themann <themann@de.ibm.com> 10 * Christoph Raisch <raisch@de.ibm.com> 11 * 12 * 13 * This program is free software; you can redistribute it and/or modify 14 * it under the terms of the GNU General Public License as published by 15 * the Free Software Foundation; either version 2, or (at your option) 16 * any later version. 17 * 18 * This program is distributed in the hope that it will be useful, 19 * but WITHOUT ANY WARRANTY; without even the implied warranty of 20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 21 * GNU General Public License for more details. 22 * 23 * You should have received a copy of the GNU General Public License 24 * along with this program; if not, write to the Free Software 25 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 26 */ 27 28 29#include <linux/module.h> 30#include <linux/if_vlan.h> 31#include <linux/inet_lro.h> 32 33MODULE_LICENSE("GPL"); 34MODULE_AUTHOR("Jan-Bernd Themann <themann@de.ibm.com>"); 35MODULE_DESCRIPTION("Large Receive Offload (ipv4 / tcp)"); 36 37#define TCP_HDR_LEN(tcph) (tcph->doff << 2) 38#define IP_HDR_LEN(iph) (iph->ihl << 2) 39#define TCP_PAYLOAD_LENGTH(iph, tcph) \ 40 (ntohs(iph->tot_len) - IP_HDR_LEN(iph) - TCP_HDR_LEN(tcph)) 41 42#define IPH_LEN_WO_OPTIONS 5 43#define TCPH_LEN_WO_OPTIONS 5 44#define TCPH_LEN_W_TIMESTAMP 8 45 46#define LRO_MAX_PG_HLEN 64 47 48#define LRO_INC_STATS(lro_mgr, attr) { lro_mgr->stats.attr++; } 49 50/* 51 * Basic tcp checks whether packet is suitable for LRO 52 */ 53 54static int lro_tcp_ip_check(struct iphdr *iph, struct tcphdr *tcph, 55 int len, struct net_lro_desc *lro_desc) 56{ 57 /* check ip header: don't aggregate padded frames */ 58 if (ntohs(iph->tot_len) != len) 59 return -1; 60 61 if (TCP_PAYLOAD_LENGTH(iph, tcph) == 0) 62 return -1; 63 64 if (iph->ihl != IPH_LEN_WO_OPTIONS) 65 return -1; 66 67 if (tcph->cwr || tcph->ece || tcph->urg || !tcph->ack || 68 tcph->rst || tcph->syn || tcph->fin) 69 return -1; 70 71 if (INET_ECN_is_ce(ipv4_get_dsfield(iph))) 72 return -1; 73 74 if (tcph->doff != TCPH_LEN_WO_OPTIONS && 75 tcph->doff != TCPH_LEN_W_TIMESTAMP) 76 return -1; 77 78 /* check tcp options (only timestamp allowed) */ 79 if (tcph->doff == TCPH_LEN_W_TIMESTAMP) { 80 __be32 *topt = (__be32 *)(tcph + 1); 81 82 if (*topt != htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) 83 | (TCPOPT_TIMESTAMP << 8) 84 | TCPOLEN_TIMESTAMP)) 85 return -1; 86 87 /* timestamp should be in right order */ 88 topt++; 89 if (lro_desc && after(ntohl(lro_desc->tcp_rcv_tsval), 90 ntohl(*topt))) 91 return -1; 92 93 /* timestamp reply should not be zero */ 94 topt++; 95 if (*topt == 0) 96 return -1; 97 } 98 99 return 0; 100} 101 102static void lro_update_tcp_ip_header(struct net_lro_desc *lro_desc) 103{ 104 struct iphdr *iph = lro_desc->iph; 105 struct tcphdr *tcph = lro_desc->tcph; 106 __be32 *p; 107 __wsum tcp_hdr_csum; 108 109 tcph->ack_seq = lro_desc->tcp_ack; 110 tcph->window = lro_desc->tcp_window; 111 112 if (lro_desc->tcp_saw_tstamp) { 113 p = (__be32 *)(tcph + 1); 114 *(p+2) = lro_desc->tcp_rcv_tsecr; 115 } 116 117 iph->tot_len = htons(lro_desc->ip_tot_len); 118 119 iph->check = 0; 120 iph->check = ip_fast_csum((u8 *)lro_desc->iph, iph->ihl); 121 122 tcph->check = 0; 123 tcp_hdr_csum = csum_partial(tcph, TCP_HDR_LEN(tcph), 0); 124 lro_desc->data_csum = csum_add(lro_desc->data_csum, tcp_hdr_csum); 125 tcph->check = csum_tcpudp_magic(iph->saddr, iph->daddr, 126 lro_desc->ip_tot_len - 127 IP_HDR_LEN(iph), IPPROTO_TCP, 128 lro_desc->data_csum); 129} 130 131static __wsum lro_tcp_data_csum(struct iphdr *iph, struct tcphdr *tcph, int len) 132{ 133 __wsum tcp_csum; 134 __wsum tcp_hdr_csum; 135 __wsum tcp_ps_hdr_csum; 136 137 tcp_csum = ~csum_unfold(tcph->check); 138 tcp_hdr_csum = csum_partial(tcph, TCP_HDR_LEN(tcph), tcp_csum); 139 140 tcp_ps_hdr_csum = csum_tcpudp_nofold(iph->saddr, iph->daddr, 141 len + TCP_HDR_LEN(tcph), 142 IPPROTO_TCP, 0); 143 144 return csum_sub(csum_sub(tcp_csum, tcp_hdr_csum), 145 tcp_ps_hdr_csum); 146} 147 148static void lro_init_desc(struct net_lro_desc *lro_desc, struct sk_buff *skb, 149 struct iphdr *iph, struct tcphdr *tcph, 150 u16 vlan_tag, struct vlan_group *vgrp) 151{ 152 int nr_frags; 153 __be32 *ptr; 154 u32 tcp_data_len = TCP_PAYLOAD_LENGTH(iph, tcph); 155 156 nr_frags = skb_shinfo(skb)->nr_frags; 157 lro_desc->parent = skb; 158 lro_desc->next_frag = &(skb_shinfo(skb)->frags[nr_frags]); 159 lro_desc->iph = iph; 160 lro_desc->tcph = tcph; 161 lro_desc->tcp_next_seq = ntohl(tcph->seq) + tcp_data_len; 162 lro_desc->tcp_ack = tcph->ack_seq; 163 lro_desc->tcp_window = tcph->window; 164 165 lro_desc->pkt_aggr_cnt = 1; 166 lro_desc->ip_tot_len = ntohs(iph->tot_len); 167 168 if (tcph->doff == 8) { 169 ptr = (__be32 *)(tcph+1); 170 lro_desc->tcp_saw_tstamp = 1; 171 lro_desc->tcp_rcv_tsval = *(ptr+1); 172 lro_desc->tcp_rcv_tsecr = *(ptr+2); 173 } 174 175 lro_desc->mss = tcp_data_len; 176 lro_desc->vgrp = vgrp; 177 lro_desc->vlan_tag = vlan_tag; 178 lro_desc->active = 1; 179 180 lro_desc->data_csum = lro_tcp_data_csum(iph, tcph, 181 tcp_data_len); 182} 183 184static inline void lro_clear_desc(struct net_lro_desc *lro_desc) 185{ 186 memset(lro_desc, 0, sizeof(struct net_lro_desc)); 187} 188 189static void lro_add_common(struct net_lro_desc *lro_desc, struct iphdr *iph, 190 struct tcphdr *tcph, int tcp_data_len) 191{ 192 struct sk_buff *parent = lro_desc->parent; 193 __be32 *topt; 194 195 lro_desc->pkt_aggr_cnt++; 196 lro_desc->ip_tot_len += tcp_data_len; 197 lro_desc->tcp_next_seq += tcp_data_len; 198 lro_desc->tcp_window = tcph->window; 199 lro_desc->tcp_ack = tcph->ack_seq; 200 201 /* don't update tcp_rcv_tsval, would not work with PAWS */ 202 if (lro_desc->tcp_saw_tstamp) { 203 topt = (__be32 *) (tcph + 1); 204 lro_desc->tcp_rcv_tsecr = *(topt + 2); 205 } 206 207 lro_desc->data_csum = csum_block_add(lro_desc->data_csum, 208 lro_tcp_data_csum(iph, tcph, 209 tcp_data_len), 210 parent->len); 211 212 parent->len += tcp_data_len; 213 parent->data_len += tcp_data_len; 214 if (tcp_data_len > lro_desc->mss) 215 lro_desc->mss = tcp_data_len; 216} 217 218static void lro_add_packet(struct net_lro_desc *lro_desc, struct sk_buff *skb, 219 struct iphdr *iph, struct tcphdr *tcph) 220{ 221 struct sk_buff *parent = lro_desc->parent; 222 int tcp_data_len = TCP_PAYLOAD_LENGTH(iph, tcph); 223 224 lro_add_common(lro_desc, iph, tcph, tcp_data_len); 225 226 skb_pull(skb, (skb->len - tcp_data_len)); 227 parent->truesize += skb->truesize; 228 229 if (lro_desc->last_skb) 230 lro_desc->last_skb->next = skb; 231 else 232 skb_shinfo(parent)->frag_list = skb; 233 234 lro_desc->last_skb = skb; 235} 236 237static void lro_add_frags(struct net_lro_desc *lro_desc, 238 int len, int hlen, int truesize, 239 struct skb_frag_struct *skb_frags, 240 struct iphdr *iph, struct tcphdr *tcph) 241{ 242 struct sk_buff *skb = lro_desc->parent; 243 int tcp_data_len = TCP_PAYLOAD_LENGTH(iph, tcph); 244 245 lro_add_common(lro_desc, iph, tcph, tcp_data_len); 246 247 skb->truesize += truesize; 248 249 skb_frags[0].page_offset += hlen; 250 skb_frags[0].size -= hlen; 251 252 while (tcp_data_len > 0) { 253 *(lro_desc->next_frag) = *skb_frags; 254 tcp_data_len -= skb_frags->size; 255 lro_desc->next_frag++; 256 skb_frags++; 257 skb_shinfo(skb)->nr_frags++; 258 } 259} 260 261static int lro_check_tcp_conn(struct net_lro_desc *lro_desc, 262 struct iphdr *iph, 263 struct tcphdr *tcph) 264{ 265 if ((lro_desc->iph->saddr != iph->saddr) || 266 (lro_desc->iph->daddr != iph->daddr) || 267 (lro_desc->tcph->source != tcph->source) || 268 (lro_desc->tcph->dest != tcph->dest)) 269 return -1; 270 return 0; 271} 272 273static struct net_lro_desc *lro_get_desc(struct net_lro_mgr *lro_mgr, 274 struct net_lro_desc *lro_arr, 275 struct iphdr *iph, 276 struct tcphdr *tcph) 277{ 278 struct net_lro_desc *lro_desc = NULL; 279 struct net_lro_desc *tmp; 280 int max_desc = lro_mgr->max_desc; 281 int i; 282 283 for (i = 0; i < max_desc; i++) { 284 tmp = &lro_arr[i]; 285 if (tmp->active) 286 if (!lro_check_tcp_conn(tmp, iph, tcph)) { 287 lro_desc = tmp; 288 goto out; 289 } 290 } 291 292 for (i = 0; i < max_desc; i++) { 293 if (!lro_arr[i].active) { 294 lro_desc = &lro_arr[i]; 295 goto out; 296 } 297 } 298 299 LRO_INC_STATS(lro_mgr, no_desc); 300out: 301 return lro_desc; 302} 303 304static void lro_flush(struct net_lro_mgr *lro_mgr, 305 struct net_lro_desc *lro_desc) 306{ 307 if (lro_desc->pkt_aggr_cnt > 1) 308 lro_update_tcp_ip_header(lro_desc); 309 310 skb_shinfo(lro_desc->parent)->gso_size = lro_desc->mss; 311 312 if (lro_desc->vgrp) { 313 if (lro_mgr->features & LRO_F_NAPI) 314 vlan_hwaccel_receive_skb(lro_desc->parent, 315 lro_desc->vgrp, 316 lro_desc->vlan_tag); 317 else 318 vlan_hwaccel_rx(lro_desc->parent, 319 lro_desc->vgrp, 320 lro_desc->vlan_tag); 321 322 } else { 323 if (lro_mgr->features & LRO_F_NAPI) 324 netif_receive_skb(lro_desc->parent); 325 else 326 netif_rx(lro_desc->parent); 327 } 328 329 LRO_INC_STATS(lro_mgr, flushed); 330 lro_clear_desc(lro_desc); 331} 332 333static int __lro_proc_skb(struct net_lro_mgr *lro_mgr, struct sk_buff *skb, 334 struct vlan_group *vgrp, u16 vlan_tag, void *priv) 335{ 336 struct net_lro_desc *lro_desc; 337 struct iphdr *iph; 338 struct tcphdr *tcph; 339 u64 flags; 340 int vlan_hdr_len = 0; 341 342 if (!lro_mgr->get_skb_header || 343 lro_mgr->get_skb_header(skb, (void *)&iph, (void *)&tcph, 344 &flags, priv)) 345 goto out; 346 347 if (!(flags & LRO_IPV4) || !(flags & LRO_TCP)) 348 goto out; 349 350 lro_desc = lro_get_desc(lro_mgr, lro_mgr->lro_arr, iph, tcph); 351 if (!lro_desc) 352 goto out; 353 354 if ((skb->protocol == htons(ETH_P_8021Q)) && 355 !(lro_mgr->features & LRO_F_EXTRACT_VLAN_ID)) 356 vlan_hdr_len = VLAN_HLEN; 357 358 if (!lro_desc->active) { /* start new lro session */ 359 if (lro_tcp_ip_check(iph, tcph, skb->len - vlan_hdr_len, NULL)) 360 goto out; 361 362 skb->ip_summed = lro_mgr->ip_summed_aggr; 363 lro_init_desc(lro_desc, skb, iph, tcph, vlan_tag, vgrp); 364 LRO_INC_STATS(lro_mgr, aggregated); 365 return 0; 366 } 367 368 if (lro_desc->tcp_next_seq != ntohl(tcph->seq)) 369 goto out2; 370 371 if (lro_tcp_ip_check(iph, tcph, skb->len, lro_desc)) 372 goto out2; 373 374 lro_add_packet(lro_desc, skb, iph, tcph); 375 LRO_INC_STATS(lro_mgr, aggregated); 376 377 if ((lro_desc->pkt_aggr_cnt >= lro_mgr->max_aggr) || 378 lro_desc->parent->len > (0xFFFF - lro_mgr->dev->mtu)) 379 lro_flush(lro_mgr, lro_desc); 380 381 return 0; 382 383out2: /* send aggregated SKBs to stack */ 384 lro_flush(lro_mgr, lro_desc); 385 386out: 387 return 1; 388} 389 390 391static struct sk_buff *lro_gen_skb(struct net_lro_mgr *lro_mgr, 392 struct skb_frag_struct *frags, 393 int len, int true_size, 394 void *mac_hdr, 395 int hlen, __wsum sum, 396 u32 ip_summed) 397{ 398 struct sk_buff *skb; 399 struct skb_frag_struct *skb_frags; 400 int data_len = len; 401 int hdr_len = min(len, hlen); 402 403 skb = netdev_alloc_skb(lro_mgr->dev, hlen + lro_mgr->frag_align_pad); 404 if (!skb) 405 return NULL; 406 407 skb_reserve(skb, lro_mgr->frag_align_pad); 408 skb->len = len; 409 skb->data_len = len - hdr_len; 410 skb->truesize += true_size; 411 skb->tail += hdr_len; 412 413 memcpy(skb->data, mac_hdr, hdr_len); 414 415 skb_frags = skb_shinfo(skb)->frags; 416 while (data_len > 0) { 417 *skb_frags = *frags; 418 data_len -= frags->size; 419 skb_frags++; 420 frags++; 421 skb_shinfo(skb)->nr_frags++; 422 } 423 424 skb_shinfo(skb)->frags[0].page_offset += hdr_len; 425 skb_shinfo(skb)->frags[0].size -= hdr_len; 426 427 skb->ip_summed = ip_summed; 428 skb->csum = sum; 429 skb->protocol = eth_type_trans(skb, lro_mgr->dev); 430 return skb; 431} 432 433static struct sk_buff *__lro_proc_segment(struct net_lro_mgr *lro_mgr, 434 struct skb_frag_struct *frags, 435 int len, int true_size, 436 struct vlan_group *vgrp, 437 u16 vlan_tag, void *priv, __wsum sum) 438{ 439 struct net_lro_desc *lro_desc; 440 struct iphdr *iph; 441 struct tcphdr *tcph; 442 struct sk_buff *skb; 443 u64 flags; 444 void *mac_hdr; 445 int mac_hdr_len; 446 int hdr_len = LRO_MAX_PG_HLEN; 447 int vlan_hdr_len = 0; 448 449 if (!lro_mgr->get_frag_header || 450 lro_mgr->get_frag_header(frags, (void *)&mac_hdr, (void *)&iph, 451 (void *)&tcph, &flags, priv)) { 452 mac_hdr = page_address(frags->page) + frags->page_offset; 453 goto out1; 454 } 455 456 if (!(flags & LRO_IPV4) || !(flags & LRO_TCP)) 457 goto out1; 458 459 hdr_len = (int)((void *)(tcph) + TCP_HDR_LEN(tcph) - mac_hdr); 460 mac_hdr_len = (int)((void *)(iph) - mac_hdr); 461 462 lro_desc = lro_get_desc(lro_mgr, lro_mgr->lro_arr, iph, tcph); 463 if (!lro_desc) 464 goto out1; 465 466 if (!lro_desc->active) { /* start new lro session */ 467 if (lro_tcp_ip_check(iph, tcph, len - mac_hdr_len, NULL)) 468 goto out1; 469 470 skb = lro_gen_skb(lro_mgr, frags, len, true_size, mac_hdr, 471 hdr_len, 0, lro_mgr->ip_summed_aggr); 472 if (!skb) 473 goto out; 474 475 if ((skb->protocol == htons(ETH_P_8021Q)) && 476 !(lro_mgr->features & LRO_F_EXTRACT_VLAN_ID)) 477 vlan_hdr_len = VLAN_HLEN; 478 479 iph = (void *)(skb->data + vlan_hdr_len); 480 tcph = (void *)((u8 *)skb->data + vlan_hdr_len 481 + IP_HDR_LEN(iph)); 482 483 lro_init_desc(lro_desc, skb, iph, tcph, 0, NULL); 484 LRO_INC_STATS(lro_mgr, aggregated); 485 return NULL; 486 } 487 488 if (lro_desc->tcp_next_seq != ntohl(tcph->seq)) 489 goto out2; 490 491 if (lro_tcp_ip_check(iph, tcph, len - mac_hdr_len, lro_desc)) 492 goto out2; 493 494 lro_add_frags(lro_desc, len, hdr_len, true_size, frags, iph, tcph); 495 LRO_INC_STATS(lro_mgr, aggregated); 496 497 if ((skb_shinfo(lro_desc->parent)->nr_frags >= lro_mgr->max_aggr) || 498 lro_desc->parent->len > (0xFFFF - lro_mgr->dev->mtu)) 499 lro_flush(lro_mgr, lro_desc); 500 501 return NULL; 502 503out2: /* send aggregated packets to the stack */ 504 lro_flush(lro_mgr, lro_desc); 505 506out1: /* Original packet has to be posted to the stack */ 507 skb = lro_gen_skb(lro_mgr, frags, len, true_size, mac_hdr, 508 hdr_len, sum, lro_mgr->ip_summed); 509out: 510 return skb; 511} 512 513void lro_receive_skb(struct net_lro_mgr *lro_mgr, 514 struct sk_buff *skb, 515 void *priv) 516{ 517 if (__lro_proc_skb(lro_mgr, skb, NULL, 0, priv)) { 518 if (lro_mgr->features & LRO_F_NAPI) 519 netif_receive_skb(skb); 520 else 521 netif_rx(skb); 522 } 523} 524EXPORT_SYMBOL(lro_receive_skb); 525 526void lro_vlan_hwaccel_receive_skb(struct net_lro_mgr *lro_mgr, 527 struct sk_buff *skb, 528 struct vlan_group *vgrp, 529 u16 vlan_tag, 530 void *priv) 531{ 532 if (__lro_proc_skb(lro_mgr, skb, vgrp, vlan_tag, priv)) { 533 if (lro_mgr->features & LRO_F_NAPI) 534 vlan_hwaccel_receive_skb(skb, vgrp, vlan_tag); 535 else 536 vlan_hwaccel_rx(skb, vgrp, vlan_tag); 537 } 538} 539EXPORT_SYMBOL(lro_vlan_hwaccel_receive_skb); 540 541void lro_receive_frags(struct net_lro_mgr *lro_mgr, 542 struct skb_frag_struct *frags, 543 int len, int true_size, void *priv, __wsum sum) 544{ 545 struct sk_buff *skb; 546 547 skb = __lro_proc_segment(lro_mgr, frags, len, true_size, NULL, 0, 548 priv, sum); 549 if (!skb) 550 return; 551 552 if (lro_mgr->features & LRO_F_NAPI) 553 netif_receive_skb(skb); 554 else 555 netif_rx(skb); 556} 557EXPORT_SYMBOL(lro_receive_frags); 558 559void lro_vlan_hwaccel_receive_frags(struct net_lro_mgr *lro_mgr, 560 struct skb_frag_struct *frags, 561 int len, int true_size, 562 struct vlan_group *vgrp, 563 u16 vlan_tag, void *priv, __wsum sum) 564{ 565 struct sk_buff *skb; 566 567 skb = __lro_proc_segment(lro_mgr, frags, len, true_size, vgrp, 568 vlan_tag, priv, sum); 569 if (!skb) 570 return; 571 572 if (lro_mgr->features & LRO_F_NAPI) 573 vlan_hwaccel_receive_skb(skb, vgrp, vlan_tag); 574 else 575 vlan_hwaccel_rx(skb, vgrp, vlan_tag); 576} 577EXPORT_SYMBOL(lro_vlan_hwaccel_receive_frags); 578 579void lro_flush_all(struct net_lro_mgr *lro_mgr) 580{ 581 int i; 582 struct net_lro_desc *lro_desc = lro_mgr->lro_arr; 583 584 for (i = 0; i < lro_mgr->max_desc; i++) { 585 if (lro_desc[i].active) 586 lro_flush(lro_mgr, &lro_desc[i]); 587 } 588} 589EXPORT_SYMBOL(lro_flush_all); 590 591void lro_flush_pkt(struct net_lro_mgr *lro_mgr, 592 struct iphdr *iph, struct tcphdr *tcph) 593{ 594 struct net_lro_desc *lro_desc; 595 596 lro_desc = lro_get_desc(lro_mgr, lro_mgr->lro_arr, iph, tcph); 597 if (lro_desc->active) 598 lro_flush(lro_mgr, lro_desc); 599} 600EXPORT_SYMBOL(lro_flush_pkt);