at v6.17-rc7 604 lines 16 kB view raw
1/* SPDX-License-Identifier: GPL-2.0-or-later */ 2 3#ifndef _NET_GRO_H 4#define _NET_GRO_H 5 6#include <linux/indirect_call_wrapper.h> 7#include <linux/ip.h> 8#include <linux/ipv6.h> 9#include <net/ip6_checksum.h> 10#include <linux/skbuff.h> 11#include <net/udp.h> 12#include <net/hotdata.h> 13 14/* This should be increased if a protocol with a bigger head is added. */ 15#define GRO_MAX_HEAD (MAX_HEADER + 128) 16 17struct napi_gro_cb { 18 union { 19 struct { 20 /* Virtual address of skb_shinfo(skb)->frags[0].page + offset. */ 21 void *frag0; 22 23 /* Length of frag0. */ 24 unsigned int frag0_len; 25 }; 26 27 struct { 28 /* used in skb_gro_receive() slow path */ 29 struct sk_buff *last; 30 31 /* jiffies when first packet was created/queued */ 32 unsigned long age; 33 }; 34 }; 35 36 /* This indicates where we are processing relative to skb->data. */ 37 int data_offset; 38 39 /* This is non-zero if the packet cannot be merged with the new skb. */ 40 u16 flush; 41 42 /* Number of segments aggregated. */ 43 u16 count; 44 45 /* Used in ipv6_gro_receive() and foo-over-udp and esp-in-udp */ 46 u16 proto; 47 48 u16 pad; 49 50/* Used in napi_gro_cb::free */ 51#define NAPI_GRO_FREE 1 52#define NAPI_GRO_FREE_STOLEN_HEAD 2 53 /* portion of the cb set to zero at every gro iteration */ 54 struct_group(zeroed, 55 56 /* Start offset for remote checksum offload */ 57 u16 gro_remcsum_start; 58 59 /* This is non-zero if the packet may be of the same flow. */ 60 u8 same_flow:1; 61 62 /* Used in tunnel GRO receive */ 63 u8 encap_mark:1; 64 65 /* GRO checksum is valid */ 66 u8 csum_valid:1; 67 68 /* Number of checksums via CHECKSUM_UNNECESSARY */ 69 u8 csum_cnt:3; 70 71 /* Free the skb? */ 72 u8 free:2; 73 74 /* Used in foo-over-udp, set in udp[46]_gro_receive */ 75 u8 is_ipv6:1; 76 77 /* Used in GRE, set in fou/gue_gro_receive */ 78 u8 is_fou:1; 79 80 /* Used to determine if ipid_offset can be ignored */ 81 u8 ip_fixedid:1; 82 83 /* Number of gro_receive callbacks this packet already went through */ 84 u8 recursion_counter:4; 85 86 /* GRO is done by frag_list pointer chaining. */ 87 u8 is_flist:1; 88 ); 89 90 /* used to support CHECKSUM_COMPLETE for tunneling protocols */ 91 __wsum csum; 92 93 /* L3 offsets */ 94 union { 95 struct { 96 u16 network_offset; 97 u16 inner_network_offset; 98 }; 99 u16 network_offsets[2]; 100 }; 101}; 102 103#define NAPI_GRO_CB(skb) ((struct napi_gro_cb *)(skb)->cb) 104 105#define GRO_RECURSION_LIMIT 15 106static inline int gro_recursion_inc_test(struct sk_buff *skb) 107{ 108 return ++NAPI_GRO_CB(skb)->recursion_counter == GRO_RECURSION_LIMIT; 109} 110 111typedef struct sk_buff *(*gro_receive_t)(struct list_head *, struct sk_buff *); 112static inline struct sk_buff *call_gro_receive(gro_receive_t cb, 113 struct list_head *head, 114 struct sk_buff *skb) 115{ 116 if (unlikely(gro_recursion_inc_test(skb))) { 117 NAPI_GRO_CB(skb)->flush |= 1; 118 return NULL; 119 } 120 121 return cb(head, skb); 122} 123 124typedef struct sk_buff *(*gro_receive_sk_t)(struct sock *, struct list_head *, 125 struct sk_buff *); 126static inline struct sk_buff *call_gro_receive_sk(gro_receive_sk_t cb, 127 struct sock *sk, 128 struct list_head *head, 129 struct sk_buff *skb) 130{ 131 if (unlikely(gro_recursion_inc_test(skb))) { 132 NAPI_GRO_CB(skb)->flush |= 1; 133 return NULL; 134 } 135 136 return cb(sk, head, skb); 137} 138 139static inline unsigned int skb_gro_offset(const struct sk_buff *skb) 140{ 141 return NAPI_GRO_CB(skb)->data_offset; 142} 143 144static inline unsigned int skb_gro_len(const struct sk_buff *skb) 145{ 146 return skb->len - NAPI_GRO_CB(skb)->data_offset; 147} 148 149static inline void skb_gro_pull(struct sk_buff *skb, unsigned int len) 150{ 151 NAPI_GRO_CB(skb)->data_offset += len; 152} 153 154static inline void *skb_gro_header_fast(const struct sk_buff *skb, 155 unsigned int offset) 156{ 157 return NAPI_GRO_CB(skb)->frag0 + offset; 158} 159 160static inline bool skb_gro_may_pull(const struct sk_buff *skb, 161 unsigned int hlen) 162{ 163 return likely(hlen <= NAPI_GRO_CB(skb)->frag0_len); 164} 165 166static inline void *skb_gro_header_slow(struct sk_buff *skb, unsigned int hlen, 167 unsigned int offset) 168{ 169 if (!pskb_may_pull(skb, hlen)) 170 return NULL; 171 172 return skb->data + offset; 173} 174 175static inline void *skb_gro_header(struct sk_buff *skb, unsigned int hlen, 176 unsigned int offset) 177{ 178 void *ptr; 179 180 ptr = skb_gro_header_fast(skb, offset); 181 if (!skb_gro_may_pull(skb, hlen)) 182 ptr = skb_gro_header_slow(skb, hlen, offset); 183 return ptr; 184} 185 186static inline int skb_gro_receive_network_offset(const struct sk_buff *skb) 187{ 188 return NAPI_GRO_CB(skb)->network_offsets[NAPI_GRO_CB(skb)->encap_mark]; 189} 190 191static inline void *skb_gro_network_header(const struct sk_buff *skb) 192{ 193 if (skb_gro_may_pull(skb, skb_gro_offset(skb))) 194 return skb_gro_header_fast(skb, skb_gro_receive_network_offset(skb)); 195 196 return skb->data + skb_gro_receive_network_offset(skb); 197} 198 199static inline __wsum inet_gro_compute_pseudo(const struct sk_buff *skb, 200 int proto) 201{ 202 const struct iphdr *iph = skb_gro_network_header(skb); 203 204 return csum_tcpudp_nofold(iph->saddr, iph->daddr, 205 skb_gro_len(skb), proto, 0); 206} 207 208static inline void skb_gro_postpull_rcsum(struct sk_buff *skb, 209 const void *start, unsigned int len) 210{ 211 if (NAPI_GRO_CB(skb)->csum_valid) 212 NAPI_GRO_CB(skb)->csum = wsum_negate(csum_partial(start, len, 213 wsum_negate(NAPI_GRO_CB(skb)->csum))); 214} 215 216/* GRO checksum functions. These are logical equivalents of the normal 217 * checksum functions (in skbuff.h) except that they operate on the GRO 218 * offsets and fields in sk_buff. 219 */ 220 221__sum16 __skb_gro_checksum_complete(struct sk_buff *skb); 222 223static inline bool skb_at_gro_remcsum_start(struct sk_buff *skb) 224{ 225 return (NAPI_GRO_CB(skb)->gro_remcsum_start == skb_gro_offset(skb)); 226} 227 228static inline bool __skb_gro_checksum_validate_needed(struct sk_buff *skb, 229 bool zero_okay, 230 __sum16 check) 231{ 232 return ((skb->ip_summed != CHECKSUM_PARTIAL || 233 skb_checksum_start_offset(skb) < 234 skb_gro_offset(skb)) && 235 !skb_at_gro_remcsum_start(skb) && 236 NAPI_GRO_CB(skb)->csum_cnt == 0 && 237 (!zero_okay || check)); 238} 239 240static inline __sum16 __skb_gro_checksum_validate_complete(struct sk_buff *skb, 241 __wsum psum) 242{ 243 if (NAPI_GRO_CB(skb)->csum_valid && 244 !csum_fold(csum_add(psum, NAPI_GRO_CB(skb)->csum))) 245 return 0; 246 247 NAPI_GRO_CB(skb)->csum = psum; 248 249 return __skb_gro_checksum_complete(skb); 250} 251 252static inline void skb_gro_incr_csum_unnecessary(struct sk_buff *skb) 253{ 254 if (NAPI_GRO_CB(skb)->csum_cnt > 0) { 255 /* Consume a checksum from CHECKSUM_UNNECESSARY */ 256 NAPI_GRO_CB(skb)->csum_cnt--; 257 } else { 258 /* Update skb for CHECKSUM_UNNECESSARY and csum_level when we 259 * verified a new top level checksum or an encapsulated one 260 * during GRO. This saves work if we fallback to normal path. 261 */ 262 __skb_incr_checksum_unnecessary(skb); 263 } 264} 265 266#define __skb_gro_checksum_validate(skb, proto, zero_okay, check, \ 267 compute_pseudo) \ 268({ \ 269 __sum16 __ret = 0; \ 270 if (__skb_gro_checksum_validate_needed(skb, zero_okay, check)) \ 271 __ret = __skb_gro_checksum_validate_complete(skb, \ 272 compute_pseudo(skb, proto)); \ 273 if (!__ret) \ 274 skb_gro_incr_csum_unnecessary(skb); \ 275 __ret; \ 276}) 277 278#define skb_gro_checksum_validate(skb, proto, compute_pseudo) \ 279 __skb_gro_checksum_validate(skb, proto, false, 0, compute_pseudo) 280 281#define skb_gro_checksum_validate_zero_check(skb, proto, check, \ 282 compute_pseudo) \ 283 __skb_gro_checksum_validate(skb, proto, true, check, compute_pseudo) 284 285#define skb_gro_checksum_simple_validate(skb) \ 286 __skb_gro_checksum_validate(skb, 0, false, 0, null_compute_pseudo) 287 288static inline bool __skb_gro_checksum_convert_check(struct sk_buff *skb) 289{ 290 return (NAPI_GRO_CB(skb)->csum_cnt == 0 && 291 !NAPI_GRO_CB(skb)->csum_valid); 292} 293 294static inline void __skb_gro_checksum_convert(struct sk_buff *skb, 295 __wsum pseudo) 296{ 297 NAPI_GRO_CB(skb)->csum = ~pseudo; 298 NAPI_GRO_CB(skb)->csum_valid = 1; 299} 300 301#define skb_gro_checksum_try_convert(skb, proto, compute_pseudo) \ 302do { \ 303 if (__skb_gro_checksum_convert_check(skb)) \ 304 __skb_gro_checksum_convert(skb, \ 305 compute_pseudo(skb, proto)); \ 306} while (0) 307 308struct gro_remcsum { 309 int offset; 310 __wsum delta; 311}; 312 313static inline void skb_gro_remcsum_init(struct gro_remcsum *grc) 314{ 315 grc->offset = 0; 316 grc->delta = 0; 317} 318 319static inline void *skb_gro_remcsum_process(struct sk_buff *skb, void *ptr, 320 unsigned int off, size_t hdrlen, 321 int start, int offset, 322 struct gro_remcsum *grc, 323 bool nopartial) 324{ 325 __wsum delta; 326 size_t plen = hdrlen + max_t(size_t, offset + sizeof(u16), start); 327 328 BUG_ON(!NAPI_GRO_CB(skb)->csum_valid); 329 330 if (!nopartial) { 331 NAPI_GRO_CB(skb)->gro_remcsum_start = off + hdrlen + start; 332 return ptr; 333 } 334 335 ptr = skb_gro_header(skb, off + plen, off); 336 if (!ptr) 337 return NULL; 338 339 delta = remcsum_adjust(ptr + hdrlen, NAPI_GRO_CB(skb)->csum, 340 start, offset); 341 342 /* Adjust skb->csum since we changed the packet */ 343 NAPI_GRO_CB(skb)->csum = csum_add(NAPI_GRO_CB(skb)->csum, delta); 344 345 grc->offset = off + hdrlen + offset; 346 grc->delta = delta; 347 348 return ptr; 349} 350 351static inline void skb_gro_remcsum_cleanup(struct sk_buff *skb, 352 struct gro_remcsum *grc) 353{ 354 void *ptr; 355 size_t plen = grc->offset + sizeof(u16); 356 357 if (!grc->delta) 358 return; 359 360 ptr = skb_gro_header(skb, plen, grc->offset); 361 if (!ptr) 362 return; 363 364 remcsum_unadjust((__sum16 *)ptr, grc->delta); 365} 366 367#ifdef CONFIG_XFRM_OFFLOAD 368static inline void skb_gro_flush_final(struct sk_buff *skb, struct sk_buff *pp, int flush) 369{ 370 if (PTR_ERR(pp) != -EINPROGRESS) 371 NAPI_GRO_CB(skb)->flush |= flush; 372} 373static inline void skb_gro_flush_final_remcsum(struct sk_buff *skb, 374 struct sk_buff *pp, 375 int flush, 376 struct gro_remcsum *grc) 377{ 378 if (PTR_ERR(pp) != -EINPROGRESS) { 379 NAPI_GRO_CB(skb)->flush |= flush; 380 skb_gro_remcsum_cleanup(skb, grc); 381 skb->remcsum_offload = 0; 382 } 383} 384#else 385static inline void skb_gro_flush_final(struct sk_buff *skb, struct sk_buff *pp, int flush) 386{ 387 NAPI_GRO_CB(skb)->flush |= flush; 388} 389static inline void skb_gro_flush_final_remcsum(struct sk_buff *skb, 390 struct sk_buff *pp, 391 int flush, 392 struct gro_remcsum *grc) 393{ 394 NAPI_GRO_CB(skb)->flush |= flush; 395 skb_gro_remcsum_cleanup(skb, grc); 396 skb->remcsum_offload = 0; 397} 398#endif 399 400INDIRECT_CALLABLE_DECLARE(struct sk_buff *ipv6_gro_receive(struct list_head *, 401 struct sk_buff *)); 402INDIRECT_CALLABLE_DECLARE(int ipv6_gro_complete(struct sk_buff *, int)); 403INDIRECT_CALLABLE_DECLARE(struct sk_buff *inet_gro_receive(struct list_head *, 404 struct sk_buff *)); 405INDIRECT_CALLABLE_DECLARE(int inet_gro_complete(struct sk_buff *, int)); 406 407INDIRECT_CALLABLE_DECLARE(struct sk_buff *udp4_gro_receive(struct list_head *, 408 struct sk_buff *)); 409INDIRECT_CALLABLE_DECLARE(int udp4_gro_complete(struct sk_buff *, int)); 410 411INDIRECT_CALLABLE_DECLARE(struct sk_buff *udp6_gro_receive(struct list_head *, 412 struct sk_buff *)); 413INDIRECT_CALLABLE_DECLARE(int udp6_gro_complete(struct sk_buff *, int)); 414 415#define indirect_call_gro_receive_inet(cb, f2, f1, head, skb) \ 416({ \ 417 unlikely(gro_recursion_inc_test(skb)) ? \ 418 NAPI_GRO_CB(skb)->flush |= 1, NULL : \ 419 INDIRECT_CALL_INET(cb, f2, f1, head, skb); \ 420}) 421 422struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb, 423 struct udphdr *uh, struct sock *sk); 424int udp_gro_complete(struct sk_buff *skb, int nhoff, udp_lookup_t lookup); 425 426static inline struct udphdr *udp_gro_udphdr(struct sk_buff *skb) 427{ 428 struct udphdr *uh; 429 unsigned int hlen, off; 430 431 off = skb_gro_offset(skb); 432 hlen = off + sizeof(*uh); 433 uh = skb_gro_header(skb, hlen, off); 434 435 return uh; 436} 437 438static inline __wsum ip6_gro_compute_pseudo(const struct sk_buff *skb, 439 int proto) 440{ 441 const struct ipv6hdr *iph = skb_gro_network_header(skb); 442 443 return ~csum_unfold(csum_ipv6_magic(&iph->saddr, &iph->daddr, 444 skb_gro_len(skb), proto, 0)); 445} 446 447static inline int inet_gro_flush(const struct iphdr *iph, const struct iphdr *iph2, 448 struct sk_buff *p, bool outer) 449{ 450 const u32 id = ntohl(*(__be32 *)&iph->id); 451 const u32 id2 = ntohl(*(__be32 *)&iph2->id); 452 const u16 ipid_offset = (id >> 16) - (id2 >> 16); 453 const u16 count = NAPI_GRO_CB(p)->count; 454 const u32 df = id & IP_DF; 455 int flush; 456 457 /* All fields must match except length and checksum. */ 458 flush = (iph->ttl ^ iph2->ttl) | (iph->tos ^ iph2->tos) | (df ^ (id2 & IP_DF)); 459 460 if (flush | (outer && df)) 461 return flush; 462 463 /* When we receive our second frame we can make a decision on if we 464 * continue this flow as an atomic flow with a fixed ID or if we use 465 * an incrementing ID. 466 */ 467 if (count == 1 && df && !ipid_offset) 468 NAPI_GRO_CB(p)->ip_fixedid = true; 469 470 return ipid_offset ^ (count * !NAPI_GRO_CB(p)->ip_fixedid); 471} 472 473static inline int ipv6_gro_flush(const struct ipv6hdr *iph, const struct ipv6hdr *iph2) 474{ 475 /* <Version:4><Traffic_Class:8><Flow_Label:20> */ 476 __be32 first_word = *(__be32 *)iph ^ *(__be32 *)iph2; 477 478 /* Flush if Traffic Class fields are different. */ 479 return !!((first_word & htonl(0x0FF00000)) | 480 (__force __be32)(iph->hop_limit ^ iph2->hop_limit)); 481} 482 483static inline int __gro_receive_network_flush(const void *th, const void *th2, 484 struct sk_buff *p, const u16 diff, 485 bool outer) 486{ 487 const void *nh = th - diff; 488 const void *nh2 = th2 - diff; 489 490 if (((struct iphdr *)nh)->version == 6) 491 return ipv6_gro_flush(nh, nh2); 492 else 493 return inet_gro_flush(nh, nh2, p, outer); 494} 495 496static inline int gro_receive_network_flush(const void *th, const void *th2, 497 struct sk_buff *p) 498{ 499 const bool encap_mark = NAPI_GRO_CB(p)->encap_mark; 500 int off = skb_transport_offset(p); 501 int flush; 502 503 flush = __gro_receive_network_flush(th, th2, p, off - NAPI_GRO_CB(p)->network_offset, encap_mark); 504 if (encap_mark) 505 flush |= __gro_receive_network_flush(th, th2, p, off - NAPI_GRO_CB(p)->inner_network_offset, false); 506 507 return flush; 508} 509 510int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb); 511int skb_gro_receive_list(struct sk_buff *p, struct sk_buff *skb); 512void __gro_flush(struct gro_node *gro, bool flush_old); 513 514static inline void gro_flush(struct gro_node *gro, bool flush_old) 515{ 516 if (!gro->bitmask) 517 return; 518 519 __gro_flush(gro, flush_old); 520} 521 522static inline void napi_gro_flush(struct napi_struct *napi, bool flush_old) 523{ 524 gro_flush(&napi->gro, flush_old); 525} 526 527/* Pass the currently batched GRO_NORMAL SKBs up to the stack. */ 528static inline void gro_normal_list(struct gro_node *gro) 529{ 530 if (!gro->rx_count) 531 return; 532 netif_receive_skb_list_internal(&gro->rx_list); 533 INIT_LIST_HEAD(&gro->rx_list); 534 gro->rx_count = 0; 535} 536 537static inline void gro_flush_normal(struct gro_node *gro, bool flush_old) 538{ 539 gro_flush(gro, flush_old); 540 gro_normal_list(gro); 541} 542 543/* Queue one GRO_NORMAL SKB up for list processing. If batch size exceeded, 544 * pass the whole batch up to the stack. 545 */ 546static inline void gro_normal_one(struct gro_node *gro, struct sk_buff *skb, 547 int segs) 548{ 549 list_add_tail(&skb->list, &gro->rx_list); 550 gro->rx_count += segs; 551 if (gro->rx_count >= READ_ONCE(net_hotdata.gro_normal_batch)) 552 gro_normal_list(gro); 553} 554 555void gro_init(struct gro_node *gro); 556void gro_cleanup(struct gro_node *gro); 557 558/* This function is the alternative of 'inet_iif' and 'inet_sdif' 559 * functions in case we can not rely on fields of IPCB. 560 * 561 * The caller must verify skb_valid_dst(skb) is false and skb->dev is initialized. 562 * The caller must hold the RCU read lock. 563 */ 564static inline void inet_get_iif_sdif(const struct sk_buff *skb, int *iif, int *sdif) 565{ 566 *iif = inet_iif(skb) ?: skb->dev->ifindex; 567 *sdif = 0; 568 569#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV) 570 if (netif_is_l3_slave(skb->dev)) { 571 struct net_device *master = netdev_master_upper_dev_get_rcu(skb->dev); 572 573 *sdif = *iif; 574 *iif = master ? master->ifindex : 0; 575 } 576#endif 577} 578 579/* This function is the alternative of 'inet6_iif' and 'inet6_sdif' 580 * functions in case we can not rely on fields of IP6CB. 581 * 582 * The caller must verify skb_valid_dst(skb) is false and skb->dev is initialized. 583 * The caller must hold the RCU read lock. 584 */ 585static inline void inet6_get_iif_sdif(const struct sk_buff *skb, int *iif, int *sdif) 586{ 587 /* using skb->dev->ifindex because skb_dst(skb) is not initialized */ 588 *iif = skb->dev->ifindex; 589 *sdif = 0; 590 591#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV) 592 if (netif_is_l3_slave(skb->dev)) { 593 struct net_device *master = netdev_master_upper_dev_get_rcu(skb->dev); 594 595 *sdif = *iif; 596 *iif = master ? master->ifindex : 0; 597 } 598#endif 599} 600 601struct packet_offload *gro_find_receive_by_type(__be16 type); 602struct packet_offload *gro_find_complete_by_type(__be16 type); 603 604#endif /* _NET_GRO_H */