at master 18 kB view raw
1// SPDX-License-Identifier: GPL-2.0-or-later 2/* 3 * inet fragments management 4 * 5 * Authors: Pavel Emelyanov <xemul@openvz.org> 6 * Started as consolidation of ipv4/ip_fragment.c, 7 * ipv6/reassembly. and ipv6 nf conntrack reassembly 8 */ 9 10#include <linux/list.h> 11#include <linux/spinlock.h> 12#include <linux/module.h> 13#include <linux/timer.h> 14#include <linux/mm.h> 15#include <linux/random.h> 16#include <linux/skbuff.h> 17#include <linux/rtnetlink.h> 18#include <linux/slab.h> 19#include <linux/rhashtable.h> 20 21#include <net/sock.h> 22#include <net/inet_frag.h> 23#include <net/inet_ecn.h> 24#include <net/ip.h> 25#include <net/ipv6.h> 26 27#include "../core/sock_destructor.h" 28 29/* Use skb->cb to track consecutive/adjacent fragments coming at 30 * the end of the queue. Nodes in the rb-tree queue will 31 * contain "runs" of one or more adjacent fragments. 32 * 33 * Invariants: 34 * - next_frag is NULL at the tail of a "run"; 35 * - the head of a "run" has the sum of all fragment lengths in frag_run_len. 36 */ 37struct ipfrag_skb_cb { 38 union { 39 struct inet_skb_parm h4; 40 struct inet6_skb_parm h6; 41 }; 42 struct sk_buff *next_frag; 43 int frag_run_len; 44 int ip_defrag_offset; 45}; 46 47#define FRAG_CB(skb) ((struct ipfrag_skb_cb *)((skb)->cb)) 48 49static void fragcb_clear(struct sk_buff *skb) 50{ 51 RB_CLEAR_NODE(&skb->rbnode); 52 FRAG_CB(skb)->next_frag = NULL; 53 FRAG_CB(skb)->frag_run_len = skb->len; 54} 55 56/* Append skb to the last "run". */ 57static void fragrun_append_to_last(struct inet_frag_queue *q, 58 struct sk_buff *skb) 59{ 60 fragcb_clear(skb); 61 62 FRAG_CB(q->last_run_head)->frag_run_len += skb->len; 63 FRAG_CB(q->fragments_tail)->next_frag = skb; 64 q->fragments_tail = skb; 65} 66 67/* Create a new "run" with the skb. */ 68static void fragrun_create(struct inet_frag_queue *q, struct sk_buff *skb) 69{ 70 BUILD_BUG_ON(sizeof(struct ipfrag_skb_cb) > sizeof(skb->cb)); 71 fragcb_clear(skb); 72 73 if (q->last_run_head) 74 rb_link_node(&skb->rbnode, &q->last_run_head->rbnode, 75 &q->last_run_head->rbnode.rb_right); 76 else 77 rb_link_node(&skb->rbnode, NULL, &q->rb_fragments.rb_node); 78 rb_insert_color(&skb->rbnode, &q->rb_fragments); 79 80 q->fragments_tail = skb; 81 q->last_run_head = skb; 82} 83 84/* Given the OR values of all fragments, apply RFC 3168 5.3 requirements 85 * Value : 0xff if frame should be dropped. 86 * 0 or INET_ECN_CE value, to be ORed in to final iph->tos field 87 */ 88const u8 ip_frag_ecn_table[16] = { 89 /* at least one fragment had CE, and others ECT_0 or ECT_1 */ 90 [IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0] = INET_ECN_CE, 91 [IPFRAG_ECN_CE | IPFRAG_ECN_ECT_1] = INET_ECN_CE, 92 [IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1] = INET_ECN_CE, 93 94 /* invalid combinations : drop frame */ 95 [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE] = 0xff, 96 [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_0] = 0xff, 97 [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_1] = 0xff, 98 [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1] = 0xff, 99 [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0] = 0xff, 100 [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_1] = 0xff, 101 [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1] = 0xff, 102}; 103EXPORT_SYMBOL(ip_frag_ecn_table); 104 105int inet_frags_init(struct inet_frags *f) 106{ 107 f->frags_cachep = kmem_cache_create(f->frags_cache_name, f->qsize, 0, 0, 108 NULL); 109 if (!f->frags_cachep) 110 return -ENOMEM; 111 112 refcount_set(&f->refcnt, 1); 113 init_completion(&f->completion); 114 return 0; 115} 116EXPORT_SYMBOL(inet_frags_init); 117 118void inet_frags_fini(struct inet_frags *f) 119{ 120 if (refcount_dec_and_test(&f->refcnt)) 121 complete(&f->completion); 122 123 wait_for_completion(&f->completion); 124 125 kmem_cache_destroy(f->frags_cachep); 126 f->frags_cachep = NULL; 127} 128EXPORT_SYMBOL(inet_frags_fini); 129 130/* called from rhashtable_free_and_destroy() at netns_frags dismantle */ 131static void inet_frags_free_cb(void *ptr, void *arg) 132{ 133 struct inet_frag_queue *fq = ptr; 134 int count; 135 136 count = timer_delete_sync(&fq->timer) ? 1 : 0; 137 138 spin_lock_bh(&fq->lock); 139 fq->flags |= INET_FRAG_DROP; 140 if (!(fq->flags & INET_FRAG_COMPLETE)) { 141 fq->flags |= INET_FRAG_COMPLETE; 142 count++; 143 } else if (fq->flags & INET_FRAG_HASH_DEAD) { 144 count++; 145 } 146 spin_unlock_bh(&fq->lock); 147 148 inet_frag_putn(fq, count); 149} 150 151static LLIST_HEAD(fqdir_free_list); 152 153static void fqdir_free_fn(struct work_struct *work) 154{ 155 struct llist_node *kill_list; 156 struct fqdir *fqdir, *tmp; 157 struct inet_frags *f; 158 159 /* Atomically snapshot the list of fqdirs to free */ 160 kill_list = llist_del_all(&fqdir_free_list); 161 162 /* We need to make sure all ongoing call_rcu(..., inet_frag_destroy_rcu) 163 * have completed, since they need to dereference fqdir. 164 * Would it not be nice to have kfree_rcu_barrier() ? :) 165 */ 166 rcu_barrier(); 167 168 llist_for_each_entry_safe(fqdir, tmp, kill_list, free_list) { 169 f = fqdir->f; 170 if (refcount_dec_and_test(&f->refcnt)) 171 complete(&f->completion); 172 173 kfree(fqdir); 174 } 175} 176 177static DECLARE_DELAYED_WORK(fqdir_free_work, fqdir_free_fn); 178 179static void fqdir_work_fn(struct work_struct *work) 180{ 181 struct fqdir *fqdir = container_of(work, struct fqdir, destroy_work); 182 183 rhashtable_free_and_destroy(&fqdir->rhashtable, inet_frags_free_cb, NULL); 184 185 if (llist_add(&fqdir->free_list, &fqdir_free_list)) 186 queue_delayed_work(system_percpu_wq, &fqdir_free_work, HZ); 187} 188 189int fqdir_init(struct fqdir **fqdirp, struct inet_frags *f, struct net *net) 190{ 191 struct fqdir *fqdir = kzalloc(sizeof(*fqdir), GFP_KERNEL); 192 int res; 193 194 if (!fqdir) 195 return -ENOMEM; 196 fqdir->f = f; 197 fqdir->net = net; 198 res = rhashtable_init(&fqdir->rhashtable, &fqdir->f->rhash_params); 199 if (res < 0) { 200 kfree(fqdir); 201 return res; 202 } 203 refcount_inc(&f->refcnt); 204 *fqdirp = fqdir; 205 return 0; 206} 207EXPORT_SYMBOL(fqdir_init); 208 209static struct workqueue_struct *inet_frag_wq; 210 211static int __init inet_frag_wq_init(void) 212{ 213 inet_frag_wq = create_workqueue("inet_frag_wq"); 214 if (!inet_frag_wq) 215 panic("Could not create inet frag workq"); 216 return 0; 217} 218 219pure_initcall(inet_frag_wq_init); 220 221void fqdir_pre_exit(struct fqdir *fqdir) 222{ 223 struct inet_frag_queue *fq; 224 struct rhashtable_iter hti; 225 226 /* Prevent creation of new frags. 227 * Pairs with READ_ONCE() in inet_frag_find(). 228 */ 229 WRITE_ONCE(fqdir->high_thresh, 0); 230 231 /* Pairs with READ_ONCE() in inet_frag_kill(), ip_expire() 232 * and ip6frag_expire_frag_queue(). 233 */ 234 WRITE_ONCE(fqdir->dead, true); 235 236 rhashtable_walk_enter(&fqdir->rhashtable, &hti); 237 rhashtable_walk_start(&hti); 238 239 while ((fq = rhashtable_walk_next(&hti))) { 240 if (IS_ERR(fq)) { 241 if (PTR_ERR(fq) != -EAGAIN) 242 break; 243 continue; 244 } 245 spin_lock_bh(&fq->lock); 246 if (!(fq->flags & INET_FRAG_COMPLETE)) 247 inet_frag_queue_flush(fq, 0); 248 spin_unlock_bh(&fq->lock); 249 } 250 251 rhashtable_walk_stop(&hti); 252 rhashtable_walk_exit(&hti); 253} 254EXPORT_SYMBOL(fqdir_pre_exit); 255 256void fqdir_exit(struct fqdir *fqdir) 257{ 258 INIT_WORK(&fqdir->destroy_work, fqdir_work_fn); 259 queue_work(inet_frag_wq, &fqdir->destroy_work); 260} 261EXPORT_SYMBOL(fqdir_exit); 262 263void inet_frag_kill(struct inet_frag_queue *fq, int *refs) 264{ 265 if (timer_delete(&fq->timer)) 266 (*refs)++; 267 268 if (!(fq->flags & INET_FRAG_COMPLETE)) { 269 struct fqdir *fqdir = fq->fqdir; 270 271 fq->flags |= INET_FRAG_COMPLETE; 272 rcu_read_lock(); 273 /* The RCU read lock provides a memory barrier 274 * guaranteeing that if fqdir->dead is false then 275 * the hash table destruction will not start until 276 * after we unlock. Paired with fqdir_pre_exit(). 277 */ 278 if (!READ_ONCE(fqdir->dead)) { 279 rhashtable_remove_fast(&fqdir->rhashtable, &fq->node, 280 fqdir->f->rhash_params); 281 (*refs)++; 282 } else { 283 fq->flags |= INET_FRAG_HASH_DEAD; 284 } 285 rcu_read_unlock(); 286 } 287} 288EXPORT_SYMBOL(inet_frag_kill); 289 290static void inet_frag_destroy_rcu(struct rcu_head *head) 291{ 292 struct inet_frag_queue *q = container_of(head, struct inet_frag_queue, 293 rcu); 294 struct inet_frags *f = q->fqdir->f; 295 296 if (f->destructor) 297 f->destructor(q); 298 kmem_cache_free(f->frags_cachep, q); 299} 300 301static unsigned int 302inet_frag_rbtree_purge(struct rb_root *root, enum skb_drop_reason reason) 303{ 304 struct rb_node *p = rb_first(root); 305 unsigned int sum = 0; 306 307 while (p) { 308 struct sk_buff *skb = rb_entry(p, struct sk_buff, rbnode); 309 310 p = rb_next(p); 311 rb_erase(&skb->rbnode, root); 312 while (skb) { 313 struct sk_buff *next = FRAG_CB(skb)->next_frag; 314 315 sum += skb->truesize; 316 kfree_skb_reason(skb, reason); 317 skb = next; 318 } 319 } 320 return sum; 321} 322 323void inet_frag_queue_flush(struct inet_frag_queue *q, 324 enum skb_drop_reason reason) 325{ 326 unsigned int sum; 327 328 reason = reason ?: SKB_DROP_REASON_FRAG_REASM_TIMEOUT; 329 sum = inet_frag_rbtree_purge(&q->rb_fragments, reason); 330 sub_frag_mem_limit(q->fqdir, sum); 331} 332EXPORT_SYMBOL(inet_frag_queue_flush); 333 334void inet_frag_destroy(struct inet_frag_queue *q) 335{ 336 unsigned int sum, sum_truesize = 0; 337 enum skb_drop_reason reason; 338 struct inet_frags *f; 339 struct fqdir *fqdir; 340 341 WARN_ON(!(q->flags & INET_FRAG_COMPLETE)); 342 reason = (q->flags & INET_FRAG_DROP) ? 343 SKB_DROP_REASON_FRAG_REASM_TIMEOUT : 344 SKB_CONSUMED; 345 WARN_ON(timer_delete(&q->timer) != 0); 346 347 /* Release all fragment data. */ 348 fqdir = q->fqdir; 349 f = fqdir->f; 350 sum_truesize = inet_frag_rbtree_purge(&q->rb_fragments, reason); 351 sum = sum_truesize + f->qsize; 352 353 call_rcu(&q->rcu, inet_frag_destroy_rcu); 354 355 sub_frag_mem_limit(fqdir, sum); 356} 357EXPORT_SYMBOL(inet_frag_destroy); 358 359static struct inet_frag_queue *inet_frag_alloc(struct fqdir *fqdir, 360 struct inet_frags *f, 361 void *arg) 362{ 363 struct inet_frag_queue *q; 364 365 q = kmem_cache_zalloc(f->frags_cachep, GFP_ATOMIC); 366 if (!q) 367 return NULL; 368 369 q->fqdir = fqdir; 370 f->constructor(q, arg); 371 add_frag_mem_limit(fqdir, f->qsize); 372 373 timer_setup(&q->timer, f->frag_expire, 0); 374 spin_lock_init(&q->lock); 375 /* One reference for the timer, one for the hash table. 376 * We never take any extra references, only decrement this field. 377 */ 378 refcount_set(&q->refcnt, 2); 379 380 return q; 381} 382 383static struct inet_frag_queue *inet_frag_create(struct fqdir *fqdir, 384 void *arg, 385 struct inet_frag_queue **prev) 386{ 387 struct inet_frags *f = fqdir->f; 388 struct inet_frag_queue *q; 389 390 q = inet_frag_alloc(fqdir, f, arg); 391 if (!q) { 392 *prev = ERR_PTR(-ENOMEM); 393 return NULL; 394 } 395 mod_timer(&q->timer, jiffies + fqdir->timeout); 396 397 *prev = rhashtable_lookup_get_insert_key(&fqdir->rhashtable, &q->key, 398 &q->node, f->rhash_params); 399 if (*prev) { 400 /* We could not insert in the hash table, 401 * we need to cancel what inet_frag_alloc() 402 * anticipated. 403 */ 404 int refs = 1; 405 406 q->flags |= INET_FRAG_COMPLETE; 407 inet_frag_kill(q, &refs); 408 inet_frag_putn(q, refs); 409 return NULL; 410 } 411 return q; 412} 413 414struct inet_frag_queue *inet_frag_find(struct fqdir *fqdir, void *key) 415{ 416 /* This pairs with WRITE_ONCE() in fqdir_pre_exit(). */ 417 long high_thresh = READ_ONCE(fqdir->high_thresh); 418 struct inet_frag_queue *fq = NULL, *prev; 419 420 if (!high_thresh || frag_mem_limit(fqdir) > high_thresh) 421 return NULL; 422 423 prev = rhashtable_lookup(&fqdir->rhashtable, key, fqdir->f->rhash_params); 424 if (!prev) 425 fq = inet_frag_create(fqdir, key, &prev); 426 if (!IS_ERR_OR_NULL(prev)) 427 fq = prev; 428 return fq; 429} 430EXPORT_SYMBOL(inet_frag_find); 431 432int inet_frag_queue_insert(struct inet_frag_queue *q, struct sk_buff *skb, 433 int offset, int end) 434{ 435 struct sk_buff *last = q->fragments_tail; 436 437 /* RFC5722, Section 4, amended by Errata ID : 3089 438 * When reassembling an IPv6 datagram, if 439 * one or more its constituent fragments is determined to be an 440 * overlapping fragment, the entire datagram (and any constituent 441 * fragments) MUST be silently discarded. 442 * 443 * Duplicates, however, should be ignored (i.e. skb dropped, but the 444 * queue/fragments kept for later reassembly). 445 */ 446 if (!last) 447 fragrun_create(q, skb); /* First fragment. */ 448 else if (FRAG_CB(last)->ip_defrag_offset + last->len < end) { 449 /* This is the common case: skb goes to the end. */ 450 /* Detect and discard overlaps. */ 451 if (offset < FRAG_CB(last)->ip_defrag_offset + last->len) 452 return IPFRAG_OVERLAP; 453 if (offset == FRAG_CB(last)->ip_defrag_offset + last->len) 454 fragrun_append_to_last(q, skb); 455 else 456 fragrun_create(q, skb); 457 } else { 458 /* Binary search. Note that skb can become the first fragment, 459 * but not the last (covered above). 460 */ 461 struct rb_node **rbn, *parent; 462 463 rbn = &q->rb_fragments.rb_node; 464 do { 465 struct sk_buff *curr; 466 int curr_run_end; 467 468 parent = *rbn; 469 curr = rb_to_skb(parent); 470 curr_run_end = FRAG_CB(curr)->ip_defrag_offset + 471 FRAG_CB(curr)->frag_run_len; 472 if (end <= FRAG_CB(curr)->ip_defrag_offset) 473 rbn = &parent->rb_left; 474 else if (offset >= curr_run_end) 475 rbn = &parent->rb_right; 476 else if (offset >= FRAG_CB(curr)->ip_defrag_offset && 477 end <= curr_run_end) 478 return IPFRAG_DUP; 479 else 480 return IPFRAG_OVERLAP; 481 } while (*rbn); 482 /* Here we have parent properly set, and rbn pointing to 483 * one of its NULL left/right children. Insert skb. 484 */ 485 fragcb_clear(skb); 486 rb_link_node(&skb->rbnode, parent, rbn); 487 rb_insert_color(&skb->rbnode, &q->rb_fragments); 488 } 489 490 FRAG_CB(skb)->ip_defrag_offset = offset; 491 if (offset) 492 nf_reset_ct(skb); 493 494 return IPFRAG_OK; 495} 496EXPORT_SYMBOL(inet_frag_queue_insert); 497 498void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb, 499 struct sk_buff *parent) 500{ 501 struct sk_buff *fp, *head = skb_rb_first(&q->rb_fragments); 502 void (*destructor)(struct sk_buff *); 503 unsigned int orig_truesize = 0; 504 struct sk_buff **nextp = NULL; 505 struct sock *sk = skb->sk; 506 int delta; 507 508 if (sk && is_skb_wmem(skb)) { 509 /* TX: skb->sk might have been passed as argument to 510 * dst->output and must remain valid until tx completes. 511 * 512 * Move sk to reassembled skb and fix up wmem accounting. 513 */ 514 orig_truesize = skb->truesize; 515 destructor = skb->destructor; 516 } 517 518 if (head != skb) { 519 fp = skb_clone(skb, GFP_ATOMIC); 520 if (!fp) { 521 head = skb; 522 goto out_restore_sk; 523 } 524 FRAG_CB(fp)->next_frag = FRAG_CB(skb)->next_frag; 525 if (RB_EMPTY_NODE(&skb->rbnode)) 526 FRAG_CB(parent)->next_frag = fp; 527 else 528 rb_replace_node(&skb->rbnode, &fp->rbnode, 529 &q->rb_fragments); 530 if (q->fragments_tail == skb) 531 q->fragments_tail = fp; 532 533 if (orig_truesize) { 534 /* prevent skb_morph from releasing sk */ 535 skb->sk = NULL; 536 skb->destructor = NULL; 537 } 538 skb_morph(skb, head); 539 FRAG_CB(skb)->next_frag = FRAG_CB(head)->next_frag; 540 rb_replace_node(&head->rbnode, &skb->rbnode, 541 &q->rb_fragments); 542 consume_skb(head); 543 head = skb; 544 } 545 WARN_ON(FRAG_CB(head)->ip_defrag_offset != 0); 546 547 delta = -head->truesize; 548 549 /* Head of list must not be cloned. */ 550 if (skb_unclone(head, GFP_ATOMIC)) 551 goto out_restore_sk; 552 553 delta += head->truesize; 554 if (delta) 555 add_frag_mem_limit(q->fqdir, delta); 556 557 /* If the first fragment is fragmented itself, we split 558 * it to two chunks: the first with data and paged part 559 * and the second, holding only fragments. 560 */ 561 if (skb_has_frag_list(head)) { 562 struct sk_buff *clone; 563 int i, plen = 0; 564 565 clone = alloc_skb(0, GFP_ATOMIC); 566 if (!clone) 567 goto out_restore_sk; 568 skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list; 569 skb_frag_list_init(head); 570 for (i = 0; i < skb_shinfo(head)->nr_frags; i++) 571 plen += skb_frag_size(&skb_shinfo(head)->frags[i]); 572 clone->data_len = head->data_len - plen; 573 clone->len = clone->data_len; 574 head->truesize += clone->truesize; 575 clone->csum = 0; 576 clone->ip_summed = head->ip_summed; 577 add_frag_mem_limit(q->fqdir, clone->truesize); 578 skb_shinfo(head)->frag_list = clone; 579 nextp = &clone->next; 580 } else { 581 nextp = &skb_shinfo(head)->frag_list; 582 } 583 584out_restore_sk: 585 if (orig_truesize) { 586 int ts_delta = head->truesize - orig_truesize; 587 588 /* if this reassembled skb is fragmented later, 589 * fraglist skbs will get skb->sk assigned from head->sk, 590 * and each frag skb will be released via sock_wfree. 591 * 592 * Update sk_wmem_alloc. 593 */ 594 head->sk = sk; 595 head->destructor = destructor; 596 refcount_add(ts_delta, &sk->sk_wmem_alloc); 597 } 598 599 return nextp; 600} 601EXPORT_SYMBOL(inet_frag_reasm_prepare); 602 603void inet_frag_reasm_finish(struct inet_frag_queue *q, struct sk_buff *head, 604 void *reasm_data, bool try_coalesce) 605{ 606 struct sock *sk = is_skb_wmem(head) ? head->sk : NULL; 607 const unsigned int head_truesize = head->truesize; 608 struct sk_buff **nextp = reasm_data; 609 struct rb_node *rbn; 610 struct sk_buff *fp; 611 int sum_truesize; 612 613 skb_push(head, head->data - skb_network_header(head)); 614 615 /* Traverse the tree in order, to build frag_list. */ 616 fp = FRAG_CB(head)->next_frag; 617 rbn = rb_next(&head->rbnode); 618 rb_erase(&head->rbnode, &q->rb_fragments); 619 620 sum_truesize = head->truesize; 621 while (rbn || fp) { 622 /* fp points to the next sk_buff in the current run; 623 * rbn points to the next run. 624 */ 625 /* Go through the current run. */ 626 while (fp) { 627 struct sk_buff *next_frag = FRAG_CB(fp)->next_frag; 628 bool stolen; 629 int delta; 630 631 sum_truesize += fp->truesize; 632 if (head->ip_summed != fp->ip_summed) 633 head->ip_summed = CHECKSUM_NONE; 634 else if (head->ip_summed == CHECKSUM_COMPLETE) 635 head->csum = csum_add(head->csum, fp->csum); 636 637 if (try_coalesce && skb_try_coalesce(head, fp, &stolen, 638 &delta)) { 639 kfree_skb_partial(fp, stolen); 640 } else { 641 fp->prev = NULL; 642 memset(&fp->rbnode, 0, sizeof(fp->rbnode)); 643 fp->sk = NULL; 644 645 head->data_len += fp->len; 646 head->len += fp->len; 647 head->truesize += fp->truesize; 648 649 *nextp = fp; 650 nextp = &fp->next; 651 } 652 653 fp = next_frag; 654 } 655 /* Move to the next run. */ 656 if (rbn) { 657 struct rb_node *rbnext = rb_next(rbn); 658 659 fp = rb_to_skb(rbn); 660 rb_erase(rbn, &q->rb_fragments); 661 rbn = rbnext; 662 } 663 } 664 sub_frag_mem_limit(q->fqdir, sum_truesize); 665 666 *nextp = NULL; 667 skb_mark_not_on_list(head); 668 head->prev = NULL; 669 head->tstamp = q->stamp; 670 head->tstamp_type = q->tstamp_type; 671 672 if (sk) 673 refcount_add(sum_truesize - head_truesize, &sk->sk_wmem_alloc); 674} 675EXPORT_SYMBOL(inet_frag_reasm_finish); 676 677struct sk_buff *inet_frag_pull_head(struct inet_frag_queue *q) 678{ 679 struct sk_buff *head, *skb; 680 681 head = skb_rb_first(&q->rb_fragments); 682 if (!head) 683 return NULL; 684 skb = FRAG_CB(head)->next_frag; 685 if (skb) 686 rb_replace_node(&head->rbnode, &skb->rbnode, 687 &q->rb_fragments); 688 else 689 rb_erase(&head->rbnode, &q->rb_fragments); 690 memset(&head->rbnode, 0, sizeof(head->rbnode)); 691 barrier(); 692 693 if (head == q->fragments_tail) 694 q->fragments_tail = NULL; 695 696 sub_frag_mem_limit(q->fqdir, head->truesize); 697 698 return head; 699} 700EXPORT_SYMBOL(inet_frag_pull_head);