Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

GRO: Add support for TCP with fixed IPv4 ID field, limit tunnel IP ID values

This patch does two things.

First it allows TCP to aggregate TCP frames with a fixed IPv4 ID field. As
a result we should now be able to aggregate flows that were converted from
IPv6 to IPv4. In addition this allows us more flexibility for future
implementations of segmentation as we may be able to use a fixed IP ID when
segmenting the flow.

The second thing this does is that it places limitations on the outer IPv4
ID header in the case of tunneled frames. Specifically it forces the IP ID
to be incrementing by 1 unless the DF bit is set in the outer IPv4 header.
This way we can avoid creating overlapping series of IP IDs that could
possibly be fragmented if the frame goes through GRO and is then
resegmented via GSO.

Signed-off-by: Alexander Duyck <aduyck@mirantis.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Alexander Duyck and committed by
David S. Miller
1530545e cbc53e08

+54 -11
+4 -1
include/linux/netdevice.h
··· 2121 2121 /* Used in GRE, set in fou/gue_gro_receive */ 2122 2122 u8 is_fou:1; 2123 2123 2124 - /* 6 bit hole */ 2124 + /* Used to determine if flush_id can be ignored */ 2125 + u8 is_atomic:1; 2126 + 2127 + /* 5 bit hole */ 2125 2128 2126 2129 /* used to support CHECKSUM_COMPLETE for tunneling protocols */ 2127 2130 __wsum csum;
+1
net/core/dev.c
··· 4462 4462 NAPI_GRO_CB(skb)->free = 0; 4463 4463 NAPI_GRO_CB(skb)->encap_mark = 0; 4464 4464 NAPI_GRO_CB(skb)->is_fou = 0; 4465 + NAPI_GRO_CB(skb)->is_atomic = 1; 4465 4466 NAPI_GRO_CB(skb)->gro_remcsum_start = 0; 4466 4467 4467 4468 /* Setup for GRO checksum validation */
+28 -7
net/ipv4/af_inet.c
··· 1328 1328 1329 1329 for (p = *head; p; p = p->next) { 1330 1330 struct iphdr *iph2; 1331 + u16 flush_id; 1331 1332 1332 1333 if (!NAPI_GRO_CB(p)->same_flow) 1333 1334 continue; ··· 1352 1351 (iph->tos ^ iph2->tos) | 1353 1352 ((iph->frag_off ^ iph2->frag_off) & htons(IP_DF)); 1354 1353 1355 - /* Save the IP ID check to be included later when we get to 1356 - * the transport layer so only the inner most IP ID is checked. 1357 - * This is because some GSO/TSO implementations do not 1358 - * correctly increment the IP ID for the outer hdrs. 1359 - */ 1360 - NAPI_GRO_CB(p)->flush_id = 1361 - ((u16)(ntohs(iph2->id) + NAPI_GRO_CB(p)->count) ^ id); 1362 1354 NAPI_GRO_CB(p)->flush |= flush; 1355 + 1356 + /* We need to store of the IP ID check to be included later 1357 + * when we can verify that this packet does in fact belong 1358 + * to a given flow. 1359 + */ 1360 + flush_id = (u16)(id - ntohs(iph2->id)); 1361 + 1362 + /* This bit of code makes it much easier for us to identify 1363 + * the cases where we are doing atomic vs non-atomic IP ID 1364 + * checks. Specifically an atomic check can return IP ID 1365 + * values 0 - 0xFFFF, while a non-atomic check can only 1366 + * return 0 or 0xFFFF. 1367 + */ 1368 + if (!NAPI_GRO_CB(p)->is_atomic || 1369 + !(iph->frag_off & htons(IP_DF))) { 1370 + flush_id ^= NAPI_GRO_CB(p)->count; 1371 + flush_id = flush_id ? 0xFFFF : 0; 1372 + } 1373 + 1374 + /* If the previous IP ID value was based on an atomic 1375 + * datagram we can overwrite the value and ignore it. 1376 + */ 1377 + if (NAPI_GRO_CB(skb)->is_atomic) 1378 + NAPI_GRO_CB(p)->flush_id = flush_id; 1379 + else 1380 + NAPI_GRO_CB(p)->flush_id |= flush_id; 1363 1381 } 1364 1382 1383 + NAPI_GRO_CB(skb)->is_atomic = !!(iph->frag_off & htons(IP_DF)); 1365 1384 NAPI_GRO_CB(skb)->flush |= flush; 1366 1385 skb_set_network_header(skb, off); 1367 1386 /* The above will be needed by the transport layer if there is one
+15 -1
net/ipv4/tcp_offload.c
··· 239 239 240 240 found: 241 241 /* Include the IP ID check below from the inner most IP hdr */ 242 - flush = NAPI_GRO_CB(p)->flush | NAPI_GRO_CB(p)->flush_id; 242 + flush = NAPI_GRO_CB(p)->flush; 243 243 flush |= (__force int)(flags & TCP_FLAG_CWR); 244 244 flush |= (__force int)((flags ^ tcp_flag_word(th2)) & 245 245 ~(TCP_FLAG_CWR | TCP_FLAG_FIN | TCP_FLAG_PSH)); ··· 247 247 for (i = sizeof(*th); i < thlen; i += 4) 248 248 flush |= *(u32 *)((u8 *)th + i) ^ 249 249 *(u32 *)((u8 *)th2 + i); 250 + 251 + /* When we receive our second frame we can made a decision on if we 252 + * continue this flow as an atomic flow with a fixed ID or if we use 253 + * an incrementing ID. 254 + */ 255 + if (NAPI_GRO_CB(p)->flush_id != 1 || 256 + NAPI_GRO_CB(p)->count != 1 || 257 + !NAPI_GRO_CB(p)->is_atomic) 258 + flush |= NAPI_GRO_CB(p)->flush_id; 259 + else 260 + NAPI_GRO_CB(p)->is_atomic = false; 250 261 251 262 mss = skb_shinfo(p)->gso_size; 252 263 ··· 326 315 th->check = ~tcp_v4_check(skb->len - thoff, iph->saddr, 327 316 iph->daddr, 0); 328 317 skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV4; 318 + 319 + if (NAPI_GRO_CB(skb)->is_atomic) 320 + skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_FIXEDID; 329 321 330 322 return tcp_gro_complete(skb); 331 323 }
+6 -2
net/ipv6/ip6_offload.c
··· 240 240 NAPI_GRO_CB(p)->flush |= !!(first_word & htonl(0x0FF00000)); 241 241 NAPI_GRO_CB(p)->flush |= flush; 242 242 243 - /* Clear flush_id, there's really no concept of ID in IPv6. */ 244 - NAPI_GRO_CB(p)->flush_id = 0; 243 + /* If the previous IP ID value was based on an atomic 244 + * datagram we can overwrite the value and ignore it. 245 + */ 246 + if (NAPI_GRO_CB(skb)->is_atomic) 247 + NAPI_GRO_CB(p)->flush_id = 0; 245 248 } 246 249 250 + NAPI_GRO_CB(skb)->is_atomic = true; 247 251 NAPI_GRO_CB(skb)->flush |= flush; 248 252 249 253 skb_gro_postpull_rcsum(skb, iph, nlen);