at v2.6.15 38 kB view raw
1/* 2 * Definitions for the 'struct sk_buff' memory handlers. 3 * 4 * Authors: 5 * Alan Cox, <gw4pts@gw4pts.ampr.org> 6 * Florian La Roche, <rzsfl@rz.uni-sb.de> 7 * 8 * This program is free software; you can redistribute it and/or 9 * modify it under the terms of the GNU General Public License 10 * as published by the Free Software Foundation; either version 11 * 2 of the License, or (at your option) any later version. 12 */ 13 14#ifndef _LINUX_SKBUFF_H 15#define _LINUX_SKBUFF_H 16 17#include <linux/config.h> 18#include <linux/kernel.h> 19#include <linux/compiler.h> 20#include <linux/time.h> 21#include <linux/cache.h> 22 23#include <asm/atomic.h> 24#include <asm/types.h> 25#include <linux/spinlock.h> 26#include <linux/mm.h> 27#include <linux/highmem.h> 28#include <linux/poll.h> 29#include <linux/net.h> 30#include <linux/textsearch.h> 31#include <net/checksum.h> 32 33#define HAVE_ALLOC_SKB /* For the drivers to know */ 34#define HAVE_ALIGNABLE_SKB /* Ditto 8) */ 35#define SLAB_SKB /* Slabified skbuffs */ 36 37#define CHECKSUM_NONE 0 38#define CHECKSUM_HW 1 39#define CHECKSUM_UNNECESSARY 2 40 41#define SKB_DATA_ALIGN(X) (((X) + (SMP_CACHE_BYTES - 1)) & \ 42 ~(SMP_CACHE_BYTES - 1)) 43#define SKB_MAX_ORDER(X, ORDER) (((PAGE_SIZE << (ORDER)) - (X) - \ 44 sizeof(struct skb_shared_info)) & \ 45 ~(SMP_CACHE_BYTES - 1)) 46#define SKB_MAX_HEAD(X) (SKB_MAX_ORDER((X), 0)) 47#define SKB_MAX_ALLOC (SKB_MAX_ORDER(0, 2)) 48 49/* A. Checksumming of received packets by device. 50 * 51 * NONE: device failed to checksum this packet. 52 * skb->csum is undefined. 53 * 54 * UNNECESSARY: device parsed packet and wouldbe verified checksum. 55 * skb->csum is undefined. 56 * It is bad option, but, unfortunately, many of vendors do this. 57 * Apparently with secret goal to sell you new device, when you 58 * will add new protocol to your host. F.e. IPv6. 8) 59 * 60 * HW: the most generic way. Device supplied checksum of _all_ 61 * the packet as seen by netif_rx in skb->csum. 62 * NOTE: Even if device supports only some protocols, but 63 * is able to produce some skb->csum, it MUST use HW, 64 * not UNNECESSARY. 65 * 66 * B. Checksumming on output. 67 * 68 * NONE: skb is checksummed by protocol or csum is not required. 69 * 70 * HW: device is required to csum packet as seen by hard_start_xmit 71 * from skb->h.raw to the end and to record the checksum 72 * at skb->h.raw+skb->csum. 73 * 74 * Device must show its capabilities in dev->features, set 75 * at device setup time. 76 * NETIF_F_HW_CSUM - it is clever device, it is able to checksum 77 * everything. 78 * NETIF_F_NO_CSUM - loopback or reliable single hop media. 79 * NETIF_F_IP_CSUM - device is dumb. It is able to csum only 80 * TCP/UDP over IPv4. Sigh. Vendors like this 81 * way by an unknown reason. Though, see comment above 82 * about CHECKSUM_UNNECESSARY. 8) 83 * 84 * Any questions? No questions, good. --ANK 85 */ 86 87struct net_device; 88 89#ifdef CONFIG_NETFILTER 90struct nf_conntrack { 91 atomic_t use; 92 void (*destroy)(struct nf_conntrack *); 93}; 94 95#ifdef CONFIG_BRIDGE_NETFILTER 96struct nf_bridge_info { 97 atomic_t use; 98 struct net_device *physindev; 99 struct net_device *physoutdev; 100#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE) 101 struct net_device *netoutdev; 102#endif 103 unsigned int mask; 104 unsigned long data[32 / sizeof(unsigned long)]; 105}; 106#endif 107 108#endif 109 110struct sk_buff_head { 111 /* These two members must be first. */ 112 struct sk_buff *next; 113 struct sk_buff *prev; 114 115 __u32 qlen; 116 spinlock_t lock; 117}; 118 119struct sk_buff; 120 121/* To allow 64K frame to be packed as single skb without frag_list */ 122#define MAX_SKB_FRAGS (65536/PAGE_SIZE + 2) 123 124typedef struct skb_frag_struct skb_frag_t; 125 126struct skb_frag_struct { 127 struct page *page; 128 __u16 page_offset; 129 __u16 size; 130}; 131 132/* This data is invariant across clones and lives at 133 * the end of the header data, ie. at skb->end. 134 */ 135struct skb_shared_info { 136 atomic_t dataref; 137 unsigned int nr_frags; 138 unsigned short tso_size; 139 unsigned short tso_segs; 140 unsigned short ufo_size; 141 unsigned int ip6_frag_id; 142 struct sk_buff *frag_list; 143 skb_frag_t frags[MAX_SKB_FRAGS]; 144}; 145 146/* We divide dataref into two halves. The higher 16 bits hold references 147 * to the payload part of skb->data. The lower 16 bits hold references to 148 * the entire skb->data. It is up to the users of the skb to agree on 149 * where the payload starts. 150 * 151 * All users must obey the rule that the skb->data reference count must be 152 * greater than or equal to the payload reference count. 153 * 154 * Holding a reference to the payload part means that the user does not 155 * care about modifications to the header part of skb->data. 156 */ 157#define SKB_DATAREF_SHIFT 16 158#define SKB_DATAREF_MASK ((1 << SKB_DATAREF_SHIFT) - 1) 159 160struct skb_timeval { 161 u32 off_sec; 162 u32 off_usec; 163}; 164 165 166enum { 167 SKB_FCLONE_UNAVAILABLE, 168 SKB_FCLONE_ORIG, 169 SKB_FCLONE_CLONE, 170}; 171 172/** 173 * struct sk_buff - socket buffer 174 * @next: Next buffer in list 175 * @prev: Previous buffer in list 176 * @sk: Socket we are owned by 177 * @tstamp: Time we arrived 178 * @dev: Device we arrived on/are leaving by 179 * @input_dev: Device we arrived on 180 * @h: Transport layer header 181 * @nh: Network layer header 182 * @mac: Link layer header 183 * @dst: destination entry 184 * @sp: the security path, used for xfrm 185 * @cb: Control buffer. Free for use by every layer. Put private vars here 186 * @len: Length of actual data 187 * @data_len: Data length 188 * @mac_len: Length of link layer header 189 * @csum: Checksum 190 * @local_df: allow local fragmentation 191 * @cloned: Head may be cloned (check refcnt to be sure) 192 * @nohdr: Payload reference only, must not modify header 193 * @pkt_type: Packet class 194 * @fclone: skbuff clone status 195 * @ip_summed: Driver fed us an IP checksum 196 * @priority: Packet queueing priority 197 * @users: User count - see {datagram,tcp}.c 198 * @protocol: Packet protocol from driver 199 * @truesize: Buffer size 200 * @head: Head of buffer 201 * @data: Data head pointer 202 * @tail: Tail pointer 203 * @end: End pointer 204 * @destructor: Destruct function 205 * @nfmark: Can be used for communication between hooks 206 * @nfct: Associated connection, if any 207 * @ipvs_property: skbuff is owned by ipvs 208 * @nfctinfo: Relationship of this skb to the connection 209 * @nfct_reasm: netfilter conntrack re-assembly pointer 210 * @nf_bridge: Saved data about a bridged frame - see br_netfilter.c 211 * @tc_index: Traffic control index 212 * @tc_verd: traffic control verdict 213 */ 214 215struct sk_buff { 216 /* These two members must be first. */ 217 struct sk_buff *next; 218 struct sk_buff *prev; 219 220 struct sock *sk; 221 struct skb_timeval tstamp; 222 struct net_device *dev; 223 struct net_device *input_dev; 224 225 union { 226 struct tcphdr *th; 227 struct udphdr *uh; 228 struct icmphdr *icmph; 229 struct igmphdr *igmph; 230 struct iphdr *ipiph; 231 struct ipv6hdr *ipv6h; 232 unsigned char *raw; 233 } h; 234 235 union { 236 struct iphdr *iph; 237 struct ipv6hdr *ipv6h; 238 struct arphdr *arph; 239 unsigned char *raw; 240 } nh; 241 242 union { 243 unsigned char *raw; 244 } mac; 245 246 struct dst_entry *dst; 247 struct sec_path *sp; 248 249 /* 250 * This is the control buffer. It is free to use for every 251 * layer. Please put your private variables there. If you 252 * want to keep them across layers you have to do a skb_clone() 253 * first. This is owned by whoever has the skb queued ATM. 254 */ 255 char cb[40]; 256 257 unsigned int len, 258 data_len, 259 mac_len, 260 csum; 261 __u32 priority; 262 __u8 local_df:1, 263 cloned:1, 264 ip_summed:2, 265 nohdr:1, 266 nfctinfo:3; 267 __u8 pkt_type:3, 268 fclone:2, 269 ipvs_property:1; 270 __be16 protocol; 271 272 void (*destructor)(struct sk_buff *skb); 273#ifdef CONFIG_NETFILTER 274 __u32 nfmark; 275 struct nf_conntrack *nfct; 276#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) 277 struct sk_buff *nfct_reasm; 278#endif 279#ifdef CONFIG_BRIDGE_NETFILTER 280 struct nf_bridge_info *nf_bridge; 281#endif 282#endif /* CONFIG_NETFILTER */ 283#ifdef CONFIG_NET_SCHED 284 __u16 tc_index; /* traffic control index */ 285#ifdef CONFIG_NET_CLS_ACT 286 __u16 tc_verd; /* traffic control verdict */ 287#endif 288#endif 289 290 291 /* These elements must be at the end, see alloc_skb() for details. */ 292 unsigned int truesize; 293 atomic_t users; 294 unsigned char *head, 295 *data, 296 *tail, 297 *end; 298}; 299 300#ifdef __KERNEL__ 301/* 302 * Handling routines are only of interest to the kernel 303 */ 304#include <linux/slab.h> 305 306#include <asm/system.h> 307 308extern void __kfree_skb(struct sk_buff *skb); 309extern struct sk_buff *__alloc_skb(unsigned int size, 310 gfp_t priority, int fclone); 311static inline struct sk_buff *alloc_skb(unsigned int size, 312 gfp_t priority) 313{ 314 return __alloc_skb(size, priority, 0); 315} 316 317static inline struct sk_buff *alloc_skb_fclone(unsigned int size, 318 gfp_t priority) 319{ 320 return __alloc_skb(size, priority, 1); 321} 322 323extern struct sk_buff *alloc_skb_from_cache(kmem_cache_t *cp, 324 unsigned int size, 325 gfp_t priority); 326extern void kfree_skbmem(struct sk_buff *skb); 327extern struct sk_buff *skb_clone(struct sk_buff *skb, 328 gfp_t priority); 329extern struct sk_buff *skb_copy(const struct sk_buff *skb, 330 gfp_t priority); 331extern struct sk_buff *pskb_copy(struct sk_buff *skb, 332 gfp_t gfp_mask); 333extern int pskb_expand_head(struct sk_buff *skb, 334 int nhead, int ntail, 335 gfp_t gfp_mask); 336extern struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, 337 unsigned int headroom); 338extern struct sk_buff *skb_copy_expand(const struct sk_buff *skb, 339 int newheadroom, int newtailroom, 340 gfp_t priority); 341extern struct sk_buff * skb_pad(struct sk_buff *skb, int pad); 342#define dev_kfree_skb(a) kfree_skb(a) 343extern void skb_over_panic(struct sk_buff *skb, int len, 344 void *here); 345extern void skb_under_panic(struct sk_buff *skb, int len, 346 void *here); 347 348extern int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb, 349 int getfrag(void *from, char *to, int offset, 350 int len,int odd, struct sk_buff *skb), 351 void *from, int length); 352 353struct skb_seq_state 354{ 355 __u32 lower_offset; 356 __u32 upper_offset; 357 __u32 frag_idx; 358 __u32 stepped_offset; 359 struct sk_buff *root_skb; 360 struct sk_buff *cur_skb; 361 __u8 *frag_data; 362}; 363 364extern void skb_prepare_seq_read(struct sk_buff *skb, 365 unsigned int from, unsigned int to, 366 struct skb_seq_state *st); 367extern unsigned int skb_seq_read(unsigned int consumed, const u8 **data, 368 struct skb_seq_state *st); 369extern void skb_abort_seq_read(struct skb_seq_state *st); 370 371extern unsigned int skb_find_text(struct sk_buff *skb, unsigned int from, 372 unsigned int to, struct ts_config *config, 373 struct ts_state *state); 374 375/* Internal */ 376#define skb_shinfo(SKB) ((struct skb_shared_info *)((SKB)->end)) 377 378/** 379 * skb_queue_empty - check if a queue is empty 380 * @list: queue head 381 * 382 * Returns true if the queue is empty, false otherwise. 383 */ 384static inline int skb_queue_empty(const struct sk_buff_head *list) 385{ 386 return list->next == (struct sk_buff *)list; 387} 388 389/** 390 * skb_get - reference buffer 391 * @skb: buffer to reference 392 * 393 * Makes another reference to a socket buffer and returns a pointer 394 * to the buffer. 395 */ 396static inline struct sk_buff *skb_get(struct sk_buff *skb) 397{ 398 atomic_inc(&skb->users); 399 return skb; 400} 401 402/* 403 * If users == 1, we are the only owner and are can avoid redundant 404 * atomic change. 405 */ 406 407/** 408 * kfree_skb - free an sk_buff 409 * @skb: buffer to free 410 * 411 * Drop a reference to the buffer and free it if the usage count has 412 * hit zero. 413 */ 414static inline void kfree_skb(struct sk_buff *skb) 415{ 416 if (likely(atomic_read(&skb->users) == 1)) 417 smp_rmb(); 418 else if (likely(!atomic_dec_and_test(&skb->users))) 419 return; 420 __kfree_skb(skb); 421} 422 423/** 424 * skb_cloned - is the buffer a clone 425 * @skb: buffer to check 426 * 427 * Returns true if the buffer was generated with skb_clone() and is 428 * one of multiple shared copies of the buffer. Cloned buffers are 429 * shared data so must not be written to under normal circumstances. 430 */ 431static inline int skb_cloned(const struct sk_buff *skb) 432{ 433 return skb->cloned && 434 (atomic_read(&skb_shinfo(skb)->dataref) & SKB_DATAREF_MASK) != 1; 435} 436 437/** 438 * skb_header_cloned - is the header a clone 439 * @skb: buffer to check 440 * 441 * Returns true if modifying the header part of the buffer requires 442 * the data to be copied. 443 */ 444static inline int skb_header_cloned(const struct sk_buff *skb) 445{ 446 int dataref; 447 448 if (!skb->cloned) 449 return 0; 450 451 dataref = atomic_read(&skb_shinfo(skb)->dataref); 452 dataref = (dataref & SKB_DATAREF_MASK) - (dataref >> SKB_DATAREF_SHIFT); 453 return dataref != 1; 454} 455 456/** 457 * skb_header_release - release reference to header 458 * @skb: buffer to operate on 459 * 460 * Drop a reference to the header part of the buffer. This is done 461 * by acquiring a payload reference. You must not read from the header 462 * part of skb->data after this. 463 */ 464static inline void skb_header_release(struct sk_buff *skb) 465{ 466 BUG_ON(skb->nohdr); 467 skb->nohdr = 1; 468 atomic_add(1 << SKB_DATAREF_SHIFT, &skb_shinfo(skb)->dataref); 469} 470 471/** 472 * skb_shared - is the buffer shared 473 * @skb: buffer to check 474 * 475 * Returns true if more than one person has a reference to this 476 * buffer. 477 */ 478static inline int skb_shared(const struct sk_buff *skb) 479{ 480 return atomic_read(&skb->users) != 1; 481} 482 483/** 484 * skb_share_check - check if buffer is shared and if so clone it 485 * @skb: buffer to check 486 * @pri: priority for memory allocation 487 * 488 * If the buffer is shared the buffer is cloned and the old copy 489 * drops a reference. A new clone with a single reference is returned. 490 * If the buffer is not shared the original buffer is returned. When 491 * being called from interrupt status or with spinlocks held pri must 492 * be GFP_ATOMIC. 493 * 494 * NULL is returned on a memory allocation failure. 495 */ 496static inline struct sk_buff *skb_share_check(struct sk_buff *skb, 497 gfp_t pri) 498{ 499 might_sleep_if(pri & __GFP_WAIT); 500 if (skb_shared(skb)) { 501 struct sk_buff *nskb = skb_clone(skb, pri); 502 kfree_skb(skb); 503 skb = nskb; 504 } 505 return skb; 506} 507 508/* 509 * Copy shared buffers into a new sk_buff. We effectively do COW on 510 * packets to handle cases where we have a local reader and forward 511 * and a couple of other messy ones. The normal one is tcpdumping 512 * a packet thats being forwarded. 513 */ 514 515/** 516 * skb_unshare - make a copy of a shared buffer 517 * @skb: buffer to check 518 * @pri: priority for memory allocation 519 * 520 * If the socket buffer is a clone then this function creates a new 521 * copy of the data, drops a reference count on the old copy and returns 522 * the new copy with the reference count at 1. If the buffer is not a clone 523 * the original buffer is returned. When called with a spinlock held or 524 * from interrupt state @pri must be %GFP_ATOMIC 525 * 526 * %NULL is returned on a memory allocation failure. 527 */ 528static inline struct sk_buff *skb_unshare(struct sk_buff *skb, 529 gfp_t pri) 530{ 531 might_sleep_if(pri & __GFP_WAIT); 532 if (skb_cloned(skb)) { 533 struct sk_buff *nskb = skb_copy(skb, pri); 534 kfree_skb(skb); /* Free our shared copy */ 535 skb = nskb; 536 } 537 return skb; 538} 539 540/** 541 * skb_peek 542 * @list_: list to peek at 543 * 544 * Peek an &sk_buff. Unlike most other operations you _MUST_ 545 * be careful with this one. A peek leaves the buffer on the 546 * list and someone else may run off with it. You must hold 547 * the appropriate locks or have a private queue to do this. 548 * 549 * Returns %NULL for an empty list or a pointer to the head element. 550 * The reference count is not incremented and the reference is therefore 551 * volatile. Use with caution. 552 */ 553static inline struct sk_buff *skb_peek(struct sk_buff_head *list_) 554{ 555 struct sk_buff *list = ((struct sk_buff *)list_)->next; 556 if (list == (struct sk_buff *)list_) 557 list = NULL; 558 return list; 559} 560 561/** 562 * skb_peek_tail 563 * @list_: list to peek at 564 * 565 * Peek an &sk_buff. Unlike most other operations you _MUST_ 566 * be careful with this one. A peek leaves the buffer on the 567 * list and someone else may run off with it. You must hold 568 * the appropriate locks or have a private queue to do this. 569 * 570 * Returns %NULL for an empty list or a pointer to the tail element. 571 * The reference count is not incremented and the reference is therefore 572 * volatile. Use with caution. 573 */ 574static inline struct sk_buff *skb_peek_tail(struct sk_buff_head *list_) 575{ 576 struct sk_buff *list = ((struct sk_buff *)list_)->prev; 577 if (list == (struct sk_buff *)list_) 578 list = NULL; 579 return list; 580} 581 582/** 583 * skb_queue_len - get queue length 584 * @list_: list to measure 585 * 586 * Return the length of an &sk_buff queue. 587 */ 588static inline __u32 skb_queue_len(const struct sk_buff_head *list_) 589{ 590 return list_->qlen; 591} 592 593static inline void skb_queue_head_init(struct sk_buff_head *list) 594{ 595 spin_lock_init(&list->lock); 596 list->prev = list->next = (struct sk_buff *)list; 597 list->qlen = 0; 598} 599 600/* 601 * Insert an sk_buff at the start of a list. 602 * 603 * The "__skb_xxxx()" functions are the non-atomic ones that 604 * can only be called with interrupts disabled. 605 */ 606 607/** 608 * __skb_queue_after - queue a buffer at the list head 609 * @list: list to use 610 * @prev: place after this buffer 611 * @newsk: buffer to queue 612 * 613 * Queue a buffer int the middle of a list. This function takes no locks 614 * and you must therefore hold required locks before calling it. 615 * 616 * A buffer cannot be placed on two lists at the same time. 617 */ 618static inline void __skb_queue_after(struct sk_buff_head *list, 619 struct sk_buff *prev, 620 struct sk_buff *newsk) 621{ 622 struct sk_buff *next; 623 list->qlen++; 624 625 next = prev->next; 626 newsk->next = next; 627 newsk->prev = prev; 628 next->prev = prev->next = newsk; 629} 630 631/** 632 * __skb_queue_head - queue a buffer at the list head 633 * @list: list to use 634 * @newsk: buffer to queue 635 * 636 * Queue a buffer at the start of a list. This function takes no locks 637 * and you must therefore hold required locks before calling it. 638 * 639 * A buffer cannot be placed on two lists at the same time. 640 */ 641extern void skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk); 642static inline void __skb_queue_head(struct sk_buff_head *list, 643 struct sk_buff *newsk) 644{ 645 __skb_queue_after(list, (struct sk_buff *)list, newsk); 646} 647 648/** 649 * __skb_queue_tail - queue a buffer at the list tail 650 * @list: list to use 651 * @newsk: buffer to queue 652 * 653 * Queue a buffer at the end of a list. This function takes no locks 654 * and you must therefore hold required locks before calling it. 655 * 656 * A buffer cannot be placed on two lists at the same time. 657 */ 658extern void skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk); 659static inline void __skb_queue_tail(struct sk_buff_head *list, 660 struct sk_buff *newsk) 661{ 662 struct sk_buff *prev, *next; 663 664 list->qlen++; 665 next = (struct sk_buff *)list; 666 prev = next->prev; 667 newsk->next = next; 668 newsk->prev = prev; 669 next->prev = prev->next = newsk; 670} 671 672 673/** 674 * __skb_dequeue - remove from the head of the queue 675 * @list: list to dequeue from 676 * 677 * Remove the head of the list. This function does not take any locks 678 * so must be used with appropriate locks held only. The head item is 679 * returned or %NULL if the list is empty. 680 */ 681extern struct sk_buff *skb_dequeue(struct sk_buff_head *list); 682static inline struct sk_buff *__skb_dequeue(struct sk_buff_head *list) 683{ 684 struct sk_buff *next, *prev, *result; 685 686 prev = (struct sk_buff *) list; 687 next = prev->next; 688 result = NULL; 689 if (next != prev) { 690 result = next; 691 next = next->next; 692 list->qlen--; 693 next->prev = prev; 694 prev->next = next; 695 result->next = result->prev = NULL; 696 } 697 return result; 698} 699 700 701/* 702 * Insert a packet on a list. 703 */ 704extern void skb_insert(struct sk_buff *old, struct sk_buff *newsk, struct sk_buff_head *list); 705static inline void __skb_insert(struct sk_buff *newsk, 706 struct sk_buff *prev, struct sk_buff *next, 707 struct sk_buff_head *list) 708{ 709 newsk->next = next; 710 newsk->prev = prev; 711 next->prev = prev->next = newsk; 712 list->qlen++; 713} 714 715/* 716 * Place a packet after a given packet in a list. 717 */ 718extern void skb_append(struct sk_buff *old, struct sk_buff *newsk, struct sk_buff_head *list); 719static inline void __skb_append(struct sk_buff *old, struct sk_buff *newsk, struct sk_buff_head *list) 720{ 721 __skb_insert(newsk, old, old->next, list); 722} 723 724/* 725 * remove sk_buff from list. _Must_ be called atomically, and with 726 * the list known.. 727 */ 728extern void skb_unlink(struct sk_buff *skb, struct sk_buff_head *list); 729static inline void __skb_unlink(struct sk_buff *skb, struct sk_buff_head *list) 730{ 731 struct sk_buff *next, *prev; 732 733 list->qlen--; 734 next = skb->next; 735 prev = skb->prev; 736 skb->next = skb->prev = NULL; 737 next->prev = prev; 738 prev->next = next; 739} 740 741 742/* XXX: more streamlined implementation */ 743 744/** 745 * __skb_dequeue_tail - remove from the tail of the queue 746 * @list: list to dequeue from 747 * 748 * Remove the tail of the list. This function does not take any locks 749 * so must be used with appropriate locks held only. The tail item is 750 * returned or %NULL if the list is empty. 751 */ 752extern struct sk_buff *skb_dequeue_tail(struct sk_buff_head *list); 753static inline struct sk_buff *__skb_dequeue_tail(struct sk_buff_head *list) 754{ 755 struct sk_buff *skb = skb_peek_tail(list); 756 if (skb) 757 __skb_unlink(skb, list); 758 return skb; 759} 760 761 762static inline int skb_is_nonlinear(const struct sk_buff *skb) 763{ 764 return skb->data_len; 765} 766 767static inline unsigned int skb_headlen(const struct sk_buff *skb) 768{ 769 return skb->len - skb->data_len; 770} 771 772static inline int skb_pagelen(const struct sk_buff *skb) 773{ 774 int i, len = 0; 775 776 for (i = (int)skb_shinfo(skb)->nr_frags - 1; i >= 0; i--) 777 len += skb_shinfo(skb)->frags[i].size; 778 return len + skb_headlen(skb); 779} 780 781static inline void skb_fill_page_desc(struct sk_buff *skb, int i, 782 struct page *page, int off, int size) 783{ 784 skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; 785 786 frag->page = page; 787 frag->page_offset = off; 788 frag->size = size; 789 skb_shinfo(skb)->nr_frags = i + 1; 790} 791 792#define SKB_PAGE_ASSERT(skb) BUG_ON(skb_shinfo(skb)->nr_frags) 793#define SKB_FRAG_ASSERT(skb) BUG_ON(skb_shinfo(skb)->frag_list) 794#define SKB_LINEAR_ASSERT(skb) BUG_ON(skb_is_nonlinear(skb)) 795 796/* 797 * Add data to an sk_buff 798 */ 799static inline unsigned char *__skb_put(struct sk_buff *skb, unsigned int len) 800{ 801 unsigned char *tmp = skb->tail; 802 SKB_LINEAR_ASSERT(skb); 803 skb->tail += len; 804 skb->len += len; 805 return tmp; 806} 807 808/** 809 * skb_put - add data to a buffer 810 * @skb: buffer to use 811 * @len: amount of data to add 812 * 813 * This function extends the used data area of the buffer. If this would 814 * exceed the total buffer size the kernel will panic. A pointer to the 815 * first byte of the extra data is returned. 816 */ 817static inline unsigned char *skb_put(struct sk_buff *skb, unsigned int len) 818{ 819 unsigned char *tmp = skb->tail; 820 SKB_LINEAR_ASSERT(skb); 821 skb->tail += len; 822 skb->len += len; 823 if (unlikely(skb->tail>skb->end)) 824 skb_over_panic(skb, len, current_text_addr()); 825 return tmp; 826} 827 828static inline unsigned char *__skb_push(struct sk_buff *skb, unsigned int len) 829{ 830 skb->data -= len; 831 skb->len += len; 832 return skb->data; 833} 834 835/** 836 * skb_push - add data to the start of a buffer 837 * @skb: buffer to use 838 * @len: amount of data to add 839 * 840 * This function extends the used data area of the buffer at the buffer 841 * start. If this would exceed the total buffer headroom the kernel will 842 * panic. A pointer to the first byte of the extra data is returned. 843 */ 844static inline unsigned char *skb_push(struct sk_buff *skb, unsigned int len) 845{ 846 skb->data -= len; 847 skb->len += len; 848 if (unlikely(skb->data<skb->head)) 849 skb_under_panic(skb, len, current_text_addr()); 850 return skb->data; 851} 852 853static inline unsigned char *__skb_pull(struct sk_buff *skb, unsigned int len) 854{ 855 skb->len -= len; 856 BUG_ON(skb->len < skb->data_len); 857 return skb->data += len; 858} 859 860/** 861 * skb_pull - remove data from the start of a buffer 862 * @skb: buffer to use 863 * @len: amount of data to remove 864 * 865 * This function removes data from the start of a buffer, returning 866 * the memory to the headroom. A pointer to the next data in the buffer 867 * is returned. Once the data has been pulled future pushes will overwrite 868 * the old data. 869 */ 870static inline unsigned char *skb_pull(struct sk_buff *skb, unsigned int len) 871{ 872 return unlikely(len > skb->len) ? NULL : __skb_pull(skb, len); 873} 874 875extern unsigned char *__pskb_pull_tail(struct sk_buff *skb, int delta); 876 877static inline unsigned char *__pskb_pull(struct sk_buff *skb, unsigned int len) 878{ 879 if (len > skb_headlen(skb) && 880 !__pskb_pull_tail(skb, len-skb_headlen(skb))) 881 return NULL; 882 skb->len -= len; 883 return skb->data += len; 884} 885 886static inline unsigned char *pskb_pull(struct sk_buff *skb, unsigned int len) 887{ 888 return unlikely(len > skb->len) ? NULL : __pskb_pull(skb, len); 889} 890 891static inline int pskb_may_pull(struct sk_buff *skb, unsigned int len) 892{ 893 if (likely(len <= skb_headlen(skb))) 894 return 1; 895 if (unlikely(len > skb->len)) 896 return 0; 897 return __pskb_pull_tail(skb, len-skb_headlen(skb)) != NULL; 898} 899 900/** 901 * skb_headroom - bytes at buffer head 902 * @skb: buffer to check 903 * 904 * Return the number of bytes of free space at the head of an &sk_buff. 905 */ 906static inline int skb_headroom(const struct sk_buff *skb) 907{ 908 return skb->data - skb->head; 909} 910 911/** 912 * skb_tailroom - bytes at buffer end 913 * @skb: buffer to check 914 * 915 * Return the number of bytes of free space at the tail of an sk_buff 916 */ 917static inline int skb_tailroom(const struct sk_buff *skb) 918{ 919 return skb_is_nonlinear(skb) ? 0 : skb->end - skb->tail; 920} 921 922/** 923 * skb_reserve - adjust headroom 924 * @skb: buffer to alter 925 * @len: bytes to move 926 * 927 * Increase the headroom of an empty &sk_buff by reducing the tail 928 * room. This is only allowed for an empty buffer. 929 */ 930static inline void skb_reserve(struct sk_buff *skb, unsigned int len) 931{ 932 skb->data += len; 933 skb->tail += len; 934} 935 936/* 937 * CPUs often take a performance hit when accessing unaligned memory 938 * locations. The actual performance hit varies, it can be small if the 939 * hardware handles it or large if we have to take an exception and fix it 940 * in software. 941 * 942 * Since an ethernet header is 14 bytes network drivers often end up with 943 * the IP header at an unaligned offset. The IP header can be aligned by 944 * shifting the start of the packet by 2 bytes. Drivers should do this 945 * with: 946 * 947 * skb_reserve(NET_IP_ALIGN); 948 * 949 * The downside to this alignment of the IP header is that the DMA is now 950 * unaligned. On some architectures the cost of an unaligned DMA is high 951 * and this cost outweighs the gains made by aligning the IP header. 952 * 953 * Since this trade off varies between architectures, we allow NET_IP_ALIGN 954 * to be overridden. 955 */ 956#ifndef NET_IP_ALIGN 957#define NET_IP_ALIGN 2 958#endif 959 960extern int ___pskb_trim(struct sk_buff *skb, unsigned int len, int realloc); 961 962static inline void __skb_trim(struct sk_buff *skb, unsigned int len) 963{ 964 if (!skb->data_len) { 965 skb->len = len; 966 skb->tail = skb->data + len; 967 } else 968 ___pskb_trim(skb, len, 0); 969} 970 971/** 972 * skb_trim - remove end from a buffer 973 * @skb: buffer to alter 974 * @len: new length 975 * 976 * Cut the length of a buffer down by removing data from the tail. If 977 * the buffer is already under the length specified it is not modified. 978 */ 979static inline void skb_trim(struct sk_buff *skb, unsigned int len) 980{ 981 if (skb->len > len) 982 __skb_trim(skb, len); 983} 984 985 986static inline int __pskb_trim(struct sk_buff *skb, unsigned int len) 987{ 988 if (!skb->data_len) { 989 skb->len = len; 990 skb->tail = skb->data+len; 991 return 0; 992 } 993 return ___pskb_trim(skb, len, 1); 994} 995 996static inline int pskb_trim(struct sk_buff *skb, unsigned int len) 997{ 998 return (len < skb->len) ? __pskb_trim(skb, len) : 0; 999} 1000 1001/** 1002 * skb_orphan - orphan a buffer 1003 * @skb: buffer to orphan 1004 * 1005 * If a buffer currently has an owner then we call the owner's 1006 * destructor function and make the @skb unowned. The buffer continues 1007 * to exist but is no longer charged to its former owner. 1008 */ 1009static inline void skb_orphan(struct sk_buff *skb) 1010{ 1011 if (skb->destructor) 1012 skb->destructor(skb); 1013 skb->destructor = NULL; 1014 skb->sk = NULL; 1015} 1016 1017/** 1018 * __skb_queue_purge - empty a list 1019 * @list: list to empty 1020 * 1021 * Delete all buffers on an &sk_buff list. Each buffer is removed from 1022 * the list and one reference dropped. This function does not take the 1023 * list lock and the caller must hold the relevant locks to use it. 1024 */ 1025extern void skb_queue_purge(struct sk_buff_head *list); 1026static inline void __skb_queue_purge(struct sk_buff_head *list) 1027{ 1028 struct sk_buff *skb; 1029 while ((skb = __skb_dequeue(list)) != NULL) 1030 kfree_skb(skb); 1031} 1032 1033#ifndef CONFIG_HAVE_ARCH_DEV_ALLOC_SKB 1034/** 1035 * __dev_alloc_skb - allocate an skbuff for sending 1036 * @length: length to allocate 1037 * @gfp_mask: get_free_pages mask, passed to alloc_skb 1038 * 1039 * Allocate a new &sk_buff and assign it a usage count of one. The 1040 * buffer has unspecified headroom built in. Users should allocate 1041 * the headroom they think they need without accounting for the 1042 * built in space. The built in space is used for optimisations. 1043 * 1044 * %NULL is returned in there is no free memory. 1045 */ 1046static inline struct sk_buff *__dev_alloc_skb(unsigned int length, 1047 gfp_t gfp_mask) 1048{ 1049 struct sk_buff *skb = alloc_skb(length + 16, gfp_mask); 1050 if (likely(skb)) 1051 skb_reserve(skb, 16); 1052 return skb; 1053} 1054#else 1055extern struct sk_buff *__dev_alloc_skb(unsigned int length, int gfp_mask); 1056#endif 1057 1058/** 1059 * dev_alloc_skb - allocate an skbuff for sending 1060 * @length: length to allocate 1061 * 1062 * Allocate a new &sk_buff and assign it a usage count of one. The 1063 * buffer has unspecified headroom built in. Users should allocate 1064 * the headroom they think they need without accounting for the 1065 * built in space. The built in space is used for optimisations. 1066 * 1067 * %NULL is returned in there is no free memory. Although this function 1068 * allocates memory it can be called from an interrupt. 1069 */ 1070static inline struct sk_buff *dev_alloc_skb(unsigned int length) 1071{ 1072 return __dev_alloc_skb(length, GFP_ATOMIC); 1073} 1074 1075/** 1076 * skb_cow - copy header of skb when it is required 1077 * @skb: buffer to cow 1078 * @headroom: needed headroom 1079 * 1080 * If the skb passed lacks sufficient headroom or its data part 1081 * is shared, data is reallocated. If reallocation fails, an error 1082 * is returned and original skb is not changed. 1083 * 1084 * The result is skb with writable area skb->head...skb->tail 1085 * and at least @headroom of space at head. 1086 */ 1087static inline int skb_cow(struct sk_buff *skb, unsigned int headroom) 1088{ 1089 int delta = (headroom > 16 ? headroom : 16) - skb_headroom(skb); 1090 1091 if (delta < 0) 1092 delta = 0; 1093 1094 if (delta || skb_cloned(skb)) 1095 return pskb_expand_head(skb, (delta + 15) & ~15, 0, GFP_ATOMIC); 1096 return 0; 1097} 1098 1099/** 1100 * skb_padto - pad an skbuff up to a minimal size 1101 * @skb: buffer to pad 1102 * @len: minimal length 1103 * 1104 * Pads up a buffer to ensure the trailing bytes exist and are 1105 * blanked. If the buffer already contains sufficient data it 1106 * is untouched. Returns the buffer, which may be a replacement 1107 * for the original, or NULL for out of memory - in which case 1108 * the original buffer is still freed. 1109 */ 1110 1111static inline struct sk_buff *skb_padto(struct sk_buff *skb, unsigned int len) 1112{ 1113 unsigned int size = skb->len; 1114 if (likely(size >= len)) 1115 return skb; 1116 return skb_pad(skb, len-size); 1117} 1118 1119static inline int skb_add_data(struct sk_buff *skb, 1120 char __user *from, int copy) 1121{ 1122 const int off = skb->len; 1123 1124 if (skb->ip_summed == CHECKSUM_NONE) { 1125 int err = 0; 1126 unsigned int csum = csum_and_copy_from_user(from, 1127 skb_put(skb, copy), 1128 copy, 0, &err); 1129 if (!err) { 1130 skb->csum = csum_block_add(skb->csum, csum, off); 1131 return 0; 1132 } 1133 } else if (!copy_from_user(skb_put(skb, copy), from, copy)) 1134 return 0; 1135 1136 __skb_trim(skb, off); 1137 return -EFAULT; 1138} 1139 1140static inline int skb_can_coalesce(struct sk_buff *skb, int i, 1141 struct page *page, int off) 1142{ 1143 if (i) { 1144 struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i - 1]; 1145 1146 return page == frag->page && 1147 off == frag->page_offset + frag->size; 1148 } 1149 return 0; 1150} 1151 1152/** 1153 * skb_linearize - convert paged skb to linear one 1154 * @skb: buffer to linarize 1155 * @gfp: allocation mode 1156 * 1157 * If there is no free memory -ENOMEM is returned, otherwise zero 1158 * is returned and the old skb data released. 1159 */ 1160extern int __skb_linearize(struct sk_buff *skb, gfp_t gfp); 1161static inline int skb_linearize(struct sk_buff *skb, gfp_t gfp) 1162{ 1163 return __skb_linearize(skb, gfp); 1164} 1165 1166/** 1167 * skb_postpull_rcsum - update checksum for received skb after pull 1168 * @skb: buffer to update 1169 * @start: start of data before pull 1170 * @len: length of data pulled 1171 * 1172 * After doing a pull on a received packet, you need to call this to 1173 * update the CHECKSUM_HW checksum, or set ip_summed to CHECKSUM_NONE 1174 * so that it can be recomputed from scratch. 1175 */ 1176 1177static inline void skb_postpull_rcsum(struct sk_buff *skb, 1178 const void *start, int len) 1179{ 1180 if (skb->ip_summed == CHECKSUM_HW) 1181 skb->csum = csum_sub(skb->csum, csum_partial(start, len, 0)); 1182} 1183 1184/** 1185 * pskb_trim_rcsum - trim received skb and update checksum 1186 * @skb: buffer to trim 1187 * @len: new length 1188 * 1189 * This is exactly the same as pskb_trim except that it ensures the 1190 * checksum of received packets are still valid after the operation. 1191 */ 1192 1193static inline int pskb_trim_rcsum(struct sk_buff *skb, unsigned int len) 1194{ 1195 if (likely(len >= skb->len)) 1196 return 0; 1197 if (skb->ip_summed == CHECKSUM_HW) 1198 skb->ip_summed = CHECKSUM_NONE; 1199 return __pskb_trim(skb, len); 1200} 1201 1202static inline void *kmap_skb_frag(const skb_frag_t *frag) 1203{ 1204#ifdef CONFIG_HIGHMEM 1205 BUG_ON(in_irq()); 1206 1207 local_bh_disable(); 1208#endif 1209 return kmap_atomic(frag->page, KM_SKB_DATA_SOFTIRQ); 1210} 1211 1212static inline void kunmap_skb_frag(void *vaddr) 1213{ 1214 kunmap_atomic(vaddr, KM_SKB_DATA_SOFTIRQ); 1215#ifdef CONFIG_HIGHMEM 1216 local_bh_enable(); 1217#endif 1218} 1219 1220#define skb_queue_walk(queue, skb) \ 1221 for (skb = (queue)->next; \ 1222 prefetch(skb->next), (skb != (struct sk_buff *)(queue)); \ 1223 skb = skb->next) 1224 1225#define skb_queue_reverse_walk(queue, skb) \ 1226 for (skb = (queue)->prev; \ 1227 prefetch(skb->prev), (skb != (struct sk_buff *)(queue)); \ 1228 skb = skb->prev) 1229 1230 1231extern struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags, 1232 int noblock, int *err); 1233extern unsigned int datagram_poll(struct file *file, struct socket *sock, 1234 struct poll_table_struct *wait); 1235extern int skb_copy_datagram_iovec(const struct sk_buff *from, 1236 int offset, struct iovec *to, 1237 int size); 1238extern int skb_copy_and_csum_datagram_iovec(struct sk_buff *skb, 1239 int hlen, 1240 struct iovec *iov); 1241extern void skb_free_datagram(struct sock *sk, struct sk_buff *skb); 1242extern unsigned int skb_checksum(const struct sk_buff *skb, int offset, 1243 int len, unsigned int csum); 1244extern int skb_copy_bits(const struct sk_buff *skb, int offset, 1245 void *to, int len); 1246extern int skb_store_bits(const struct sk_buff *skb, int offset, 1247 void *from, int len); 1248extern unsigned int skb_copy_and_csum_bits(const struct sk_buff *skb, 1249 int offset, u8 *to, int len, 1250 unsigned int csum); 1251extern void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to); 1252extern void skb_split(struct sk_buff *skb, 1253 struct sk_buff *skb1, const u32 len); 1254 1255extern void skb_release_data(struct sk_buff *skb); 1256 1257static inline void *skb_header_pointer(const struct sk_buff *skb, int offset, 1258 int len, void *buffer) 1259{ 1260 int hlen = skb_headlen(skb); 1261 1262 if (hlen - offset >= len) 1263 return skb->data + offset; 1264 1265 if (skb_copy_bits(skb, offset, buffer, len) < 0) 1266 return NULL; 1267 1268 return buffer; 1269} 1270 1271extern void skb_init(void); 1272extern void skb_add_mtu(int mtu); 1273 1274/** 1275 * skb_get_timestamp - get timestamp from a skb 1276 * @skb: skb to get stamp from 1277 * @stamp: pointer to struct timeval to store stamp in 1278 * 1279 * Timestamps are stored in the skb as offsets to a base timestamp. 1280 * This function converts the offset back to a struct timeval and stores 1281 * it in stamp. 1282 */ 1283static inline void skb_get_timestamp(const struct sk_buff *skb, struct timeval *stamp) 1284{ 1285 stamp->tv_sec = skb->tstamp.off_sec; 1286 stamp->tv_usec = skb->tstamp.off_usec; 1287} 1288 1289/** 1290 * skb_set_timestamp - set timestamp of a skb 1291 * @skb: skb to set stamp of 1292 * @stamp: pointer to struct timeval to get stamp from 1293 * 1294 * Timestamps are stored in the skb as offsets to a base timestamp. 1295 * This function converts a struct timeval to an offset and stores 1296 * it in the skb. 1297 */ 1298static inline void skb_set_timestamp(struct sk_buff *skb, const struct timeval *stamp) 1299{ 1300 skb->tstamp.off_sec = stamp->tv_sec; 1301 skb->tstamp.off_usec = stamp->tv_usec; 1302} 1303 1304extern void __net_timestamp(struct sk_buff *skb); 1305 1306extern unsigned int __skb_checksum_complete(struct sk_buff *skb); 1307 1308/** 1309 * skb_checksum_complete - Calculate checksum of an entire packet 1310 * @skb: packet to process 1311 * 1312 * This function calculates the checksum over the entire packet plus 1313 * the value of skb->csum. The latter can be used to supply the 1314 * checksum of a pseudo header as used by TCP/UDP. It returns the 1315 * checksum. 1316 * 1317 * For protocols that contain complete checksums such as ICMP/TCP/UDP, 1318 * this function can be used to verify that checksum on received 1319 * packets. In that case the function should return zero if the 1320 * checksum is correct. In particular, this function will return zero 1321 * if skb->ip_summed is CHECKSUM_UNNECESSARY which indicates that the 1322 * hardware has already verified the correctness of the checksum. 1323 */ 1324static inline unsigned int skb_checksum_complete(struct sk_buff *skb) 1325{ 1326 return skb->ip_summed != CHECKSUM_UNNECESSARY && 1327 __skb_checksum_complete(skb); 1328} 1329 1330#ifdef CONFIG_NETFILTER 1331static inline void nf_conntrack_put(struct nf_conntrack *nfct) 1332{ 1333 if (nfct && atomic_dec_and_test(&nfct->use)) 1334 nfct->destroy(nfct); 1335} 1336static inline void nf_conntrack_get(struct nf_conntrack *nfct) 1337{ 1338 if (nfct) 1339 atomic_inc(&nfct->use); 1340} 1341#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) 1342static inline void nf_conntrack_get_reasm(struct sk_buff *skb) 1343{ 1344 if (skb) 1345 atomic_inc(&skb->users); 1346} 1347static inline void nf_conntrack_put_reasm(struct sk_buff *skb) 1348{ 1349 if (skb) 1350 kfree_skb(skb); 1351} 1352#endif 1353static inline void nf_reset(struct sk_buff *skb) 1354{ 1355 nf_conntrack_put(skb->nfct); 1356 skb->nfct = NULL; 1357#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) 1358 nf_conntrack_put_reasm(skb->nfct_reasm); 1359 skb->nfct_reasm = NULL; 1360#endif 1361} 1362 1363#ifdef CONFIG_BRIDGE_NETFILTER 1364static inline void nf_bridge_put(struct nf_bridge_info *nf_bridge) 1365{ 1366 if (nf_bridge && atomic_dec_and_test(&nf_bridge->use)) 1367 kfree(nf_bridge); 1368} 1369static inline void nf_bridge_get(struct nf_bridge_info *nf_bridge) 1370{ 1371 if (nf_bridge) 1372 atomic_inc(&nf_bridge->use); 1373} 1374#endif /* CONFIG_BRIDGE_NETFILTER */ 1375#else /* CONFIG_NETFILTER */ 1376static inline void nf_reset(struct sk_buff *skb) {} 1377#endif /* CONFIG_NETFILTER */ 1378 1379#endif /* __KERNEL__ */ 1380#endif /* _LINUX_SKBUFF_H */