Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

net: dynamically allocate fqdir structures

Following patch will add rcu grace period before fqdir
rhashtable destruction, so we need to dynamically allocate
fqdir structures to not force expensive synchronize_rcu() calls
in netns dismantle path.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Eric Dumazet and committed by
David S. Miller
4907abc6 a39aca67

+78 -63
+14 -3
include/net/inet_frag.h
··· 105 105 int inet_frags_init(struct inet_frags *); 106 106 void inet_frags_fini(struct inet_frags *); 107 107 108 - static inline int fqdir_init(struct fqdir *fqdir, struct inet_frags *f, 108 + static inline int fqdir_init(struct fqdir **fqdirp, struct inet_frags *f, 109 109 struct net *net) 110 110 { 111 + struct fqdir *fqdir = kzalloc(sizeof(*fqdir), GFP_KERNEL); 112 + int res; 113 + 114 + if (!fqdir) 115 + return -ENOMEM; 111 116 fqdir->f = f; 112 117 fqdir->net = net; 113 - atomic_long_set(&fqdir->mem, 0); 114 - return rhashtable_init(&fqdir->rhashtable, &fqdir->f->rhash_params); 118 + res = rhashtable_init(&fqdir->rhashtable, &fqdir->f->rhash_params); 119 + if (res < 0) { 120 + kfree(fqdir); 121 + return res; 122 + } 123 + *fqdirp = fqdir; 124 + return 0; 115 125 } 126 + 116 127 void fqdir_exit(struct fqdir *fqdir); 117 128 118 129 void inet_frag_kill(struct inet_frag_queue *q);
+1 -1
include/net/netns/ieee802154_6lowpan.h
··· 16 16 17 17 struct netns_ieee802154_lowpan { 18 18 struct netns_sysctl_lowpan sysctl; 19 - struct fqdir fqdir; 19 + struct fqdir *fqdir; 20 20 }; 21 21 22 22 #endif
+1 -1
include/net/netns/ipv4.h
··· 72 72 73 73 struct inet_peer_base *peers; 74 74 struct sock * __percpu *tcp_sk; 75 - struct fqdir fqdir; 75 + struct fqdir *fqdir; 76 76 #ifdef CONFIG_NETFILTER 77 77 struct xt_table *iptable_filter; 78 78 struct xt_table *iptable_mangle;
+2 -2
include/net/netns/ipv6.h
··· 58 58 struct ipv6_devconf *devconf_all; 59 59 struct ipv6_devconf *devconf_dflt; 60 60 struct inet_peer_base *peers; 61 - struct fqdir fqdir; 61 + struct fqdir *fqdir; 62 62 #ifdef CONFIG_NETFILTER 63 63 struct xt_table *ip6table_filter; 64 64 struct xt_table *ip6table_mangle; ··· 116 116 117 117 #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) 118 118 struct netns_nf_frag { 119 - struct fqdir fqdir; 119 + struct fqdir *fqdir; 120 120 }; 121 121 #endif 122 122
+13 -11
net/ieee802154/6lowpan/reassembly.c
··· 79 79 key.src = *src; 80 80 key.dst = *dst; 81 81 82 - q = inet_frag_find(&ieee802154_lowpan->fqdir, &key); 82 + q = inet_frag_find(ieee802154_lowpan->fqdir, &key); 83 83 if (!q) 84 84 return NULL; 85 85 ··· 377 377 table[0].procname = NULL; 378 378 } 379 379 380 - table[0].data = &ieee802154_lowpan->fqdir.high_thresh; 381 - table[0].extra1 = &ieee802154_lowpan->fqdir.low_thresh; 382 - table[1].data = &ieee802154_lowpan->fqdir.low_thresh; 383 - table[1].extra2 = &ieee802154_lowpan->fqdir.high_thresh; 384 - table[2].data = &ieee802154_lowpan->fqdir.timeout; 380 + table[0].data = &ieee802154_lowpan->fqdir->high_thresh; 381 + table[0].extra1 = &ieee802154_lowpan->fqdir->low_thresh; 382 + table[1].data = &ieee802154_lowpan->fqdir->low_thresh; 383 + table[1].extra2 = &ieee802154_lowpan->fqdir->high_thresh; 384 + table[2].data = &ieee802154_lowpan->fqdir->timeout; 385 385 386 386 hdr = register_net_sysctl(net, "net/ieee802154/6lowpan", table); 387 387 if (hdr == NULL) ··· 449 449 net_ieee802154_lowpan(net); 450 450 int res; 451 451 452 - ieee802154_lowpan->fqdir.high_thresh = IPV6_FRAG_HIGH_THRESH; 453 - ieee802154_lowpan->fqdir.low_thresh = IPV6_FRAG_LOW_THRESH; 454 - ieee802154_lowpan->fqdir.timeout = IPV6_FRAG_TIMEOUT; 455 452 456 453 res = fqdir_init(&ieee802154_lowpan->fqdir, &lowpan_frags, net); 457 454 if (res < 0) 458 455 return res; 456 + 457 + ieee802154_lowpan->fqdir->high_thresh = IPV6_FRAG_HIGH_THRESH; 458 + ieee802154_lowpan->fqdir->low_thresh = IPV6_FRAG_LOW_THRESH; 459 + ieee802154_lowpan->fqdir->timeout = IPV6_FRAG_TIMEOUT; 460 + 459 461 res = lowpan_frags_ns_sysctl_register(net); 460 462 if (res < 0) 461 - fqdir_exit(&ieee802154_lowpan->fqdir); 463 + fqdir_exit(ieee802154_lowpan->fqdir); 462 464 return res; 463 465 } 464 466 ··· 470 468 net_ieee802154_lowpan(net); 471 469 472 470 lowpan_frags_ns_sysctl_unregister(net); 473 - fqdir_exit(&ieee802154_lowpan->fqdir); 471 + fqdir_exit(ieee802154_lowpan->fqdir); 474 472 } 475 473 476 474 static struct pernet_operations lowpan_frags_ops = {
+1
net/ipv4/inet_fragment.c
··· 150 150 fqdir->high_thresh = 0; /* prevent creation of new frags */ 151 151 152 152 rhashtable_free_and_destroy(&fqdir->rhashtable, inet_frags_free_cb, NULL); 153 + kfree(fqdir); 153 154 } 154 155 EXPORT_SYMBOL(fqdir_exit); 155 156
+16 -16
net/ipv4/ip_fragment.c
··· 209 209 }; 210 210 struct inet_frag_queue *q; 211 211 212 - q = inet_frag_find(&net->ipv4.fqdir, &key); 212 + q = inet_frag_find(net->ipv4.fqdir, &key); 213 213 if (!q) 214 214 return NULL; 215 215 ··· 589 589 goto err_alloc; 590 590 591 591 } 592 - table[0].data = &net->ipv4.fqdir.high_thresh; 593 - table[0].extra1 = &net->ipv4.fqdir.low_thresh; 594 - table[1].data = &net->ipv4.fqdir.low_thresh; 595 - table[1].extra2 = &net->ipv4.fqdir.high_thresh; 596 - table[2].data = &net->ipv4.fqdir.timeout; 597 - table[3].data = &net->ipv4.fqdir.max_dist; 592 + table[0].data = &net->ipv4.fqdir->high_thresh; 593 + table[0].extra1 = &net->ipv4.fqdir->low_thresh; 594 + table[1].data = &net->ipv4.fqdir->low_thresh; 595 + table[1].extra2 = &net->ipv4.fqdir->high_thresh; 596 + table[2].data = &net->ipv4.fqdir->timeout; 597 + table[3].data = &net->ipv4.fqdir->max_dist; 598 598 599 599 hdr = register_net_sysctl(net, "net/ipv4", table); 600 600 if (!hdr) ··· 642 642 { 643 643 int res; 644 644 645 + res = fqdir_init(&net->ipv4.fqdir, &ip4_frags, net); 646 + if (res < 0) 647 + return res; 645 648 /* Fragment cache limits. 646 649 * 647 650 * The fragment memory accounting code, (tries to) account for ··· 659 656 * we will prune down to 3MB, making room for approx 8 big 64K 660 657 * fragments 8x128k. 661 658 */ 662 - net->ipv4.fqdir.high_thresh = 4 * 1024 * 1024; 663 - net->ipv4.fqdir.low_thresh = 3 * 1024 * 1024; 659 + net->ipv4.fqdir->high_thresh = 4 * 1024 * 1024; 660 + net->ipv4.fqdir->low_thresh = 3 * 1024 * 1024; 664 661 /* 665 662 * Important NOTE! Fragment queue must be destroyed before MSL expires. 666 663 * RFC791 is wrong proposing to prolongate timer each fragment arrival 667 664 * by TTL. 668 665 */ 669 - net->ipv4.fqdir.timeout = IP_FRAG_TIME; 666 + net->ipv4.fqdir->timeout = IP_FRAG_TIME; 670 667 671 - net->ipv4.fqdir.max_dist = 64; 668 + net->ipv4.fqdir->max_dist = 64; 672 669 673 - res = fqdir_init(&net->ipv4.fqdir, &ip4_frags, net); 674 - if (res < 0) 675 - return res; 676 670 res = ip4_frags_ns_ctl_register(net); 677 671 if (res < 0) 678 - fqdir_exit(&net->ipv4.fqdir); 672 + fqdir_exit(net->ipv4.fqdir); 679 673 return res; 680 674 } 681 675 682 676 static void __net_exit ipv4_frags_exit_net(struct net *net) 683 677 { 684 678 ip4_frags_ns_ctl_unregister(net); 685 - fqdir_exit(&net->ipv4.fqdir); 679 + fqdir_exit(net->ipv4.fqdir); 686 680 } 687 681 688 682 static struct pernet_operations ip4_frags_ops = {
+2 -2
net/ipv4/proc.c
··· 72 72 seq_printf(seq, "RAW: inuse %d\n", 73 73 sock_prot_inuse_get(net, &raw_prot)); 74 74 seq_printf(seq, "FRAG: inuse %u memory %lu\n", 75 - atomic_read(&net->ipv4.fqdir.rhashtable.nelems), 76 - frag_mem_limit(&net->ipv4.fqdir)); 75 + atomic_read(&net->ipv4.fqdir->rhashtable.nelems), 76 + frag_mem_limit(net->ipv4.fqdir)); 77 77 return 0; 78 78 } 79 79
+14 -13
net/ipv6/netfilter/nf_conntrack_reasm.c
··· 90 90 goto err_alloc; 91 91 } 92 92 93 - table[0].data = &net->nf_frag.fqdir.timeout; 94 - table[1].data = &net->nf_frag.fqdir.low_thresh; 95 - table[1].extra2 = &net->nf_frag.fqdir.high_thresh; 96 - table[2].data = &net->nf_frag.fqdir.high_thresh; 97 - table[2].extra1 = &net->nf_frag.fqdir.low_thresh; 98 - table[2].extra2 = &init_net.nf_frag.fqdir.high_thresh; 93 + table[0].data = &net->nf_frag.fqdir->timeout; 94 + table[1].data = &net->nf_frag.fqdir->low_thresh; 95 + table[1].extra2 = &net->nf_frag.fqdir->high_thresh; 96 + table[2].data = &net->nf_frag.fqdir->high_thresh; 97 + table[2].extra1 = &net->nf_frag.fqdir->low_thresh; 98 + table[2].extra2 = &init_net.nf_frag.fqdir->high_thresh; 99 99 100 100 hdr = register_net_sysctl(net, "net/netfilter", table); 101 101 if (hdr == NULL) ··· 162 162 }; 163 163 struct inet_frag_queue *q; 164 164 165 - q = inet_frag_find(&net->nf_frag.fqdir, &key); 165 + q = inet_frag_find(net->nf_frag.fqdir, &key); 166 166 if (!q) 167 167 return NULL; 168 168 ··· 489 489 { 490 490 int res; 491 491 492 - net->nf_frag.fqdir.high_thresh = IPV6_FRAG_HIGH_THRESH; 493 - net->nf_frag.fqdir.low_thresh = IPV6_FRAG_LOW_THRESH; 494 - net->nf_frag.fqdir.timeout = IPV6_FRAG_TIMEOUT; 495 - 496 492 res = fqdir_init(&net->nf_frag.fqdir, &nf_frags, net); 497 493 if (res < 0) 498 494 return res; 495 + 496 + net->nf_frag.fqdir->high_thresh = IPV6_FRAG_HIGH_THRESH; 497 + net->nf_frag.fqdir->low_thresh = IPV6_FRAG_LOW_THRESH; 498 + net->nf_frag.fqdir->timeout = IPV6_FRAG_TIMEOUT; 499 + 499 500 res = nf_ct_frag6_sysctl_register(net); 500 501 if (res < 0) 501 - fqdir_exit(&net->nf_frag.fqdir); 502 + fqdir_exit(net->nf_frag.fqdir); 502 503 return res; 503 504 } 504 505 505 506 static void nf_ct_net_exit(struct net *net) 506 507 { 507 508 nf_ct_frags6_sysctl_unregister(net); 508 - fqdir_exit(&net->nf_frag.fqdir); 509 + fqdir_exit(net->nf_frag.fqdir); 509 510 } 510 511 511 512 static struct pernet_operations nf_ct_net_ops = {
+2 -2
net/ipv6/proc.c
··· 48 48 seq_printf(seq, "RAW6: inuse %d\n", 49 49 sock_prot_inuse_get(net, &rawv6_prot)); 50 50 seq_printf(seq, "FRAG6: inuse %u memory %lu\n", 51 - atomic_read(&net->ipv6.fqdir.rhashtable.nelems), 52 - frag_mem_limit(&net->ipv6.fqdir)); 51 + atomic_read(&net->ipv6.fqdir->rhashtable.nelems), 52 + frag_mem_limit(net->ipv6.fqdir)); 53 53 return 0; 54 54 } 55 55
+12 -12
net/ipv6/reassembly.c
··· 98 98 IPV6_ADDR_LINKLOCAL))) 99 99 key.iif = 0; 100 100 101 - q = inet_frag_find(&net->ipv6.fqdir, &key); 101 + q = inet_frag_find(net->ipv6.fqdir, &key); 102 102 if (!q) 103 103 return NULL; 104 104 ··· 443 443 goto err_alloc; 444 444 445 445 } 446 - table[0].data = &net->ipv6.fqdir.high_thresh; 447 - table[0].extra1 = &net->ipv6.fqdir.low_thresh; 448 - table[1].data = &net->ipv6.fqdir.low_thresh; 449 - table[1].extra2 = &net->ipv6.fqdir.high_thresh; 450 - table[2].data = &net->ipv6.fqdir.timeout; 446 + table[0].data = &net->ipv6.fqdir->high_thresh; 447 + table[0].extra1 = &net->ipv6.fqdir->low_thresh; 448 + table[1].data = &net->ipv6.fqdir->low_thresh; 449 + table[1].extra2 = &net->ipv6.fqdir->high_thresh; 450 + table[2].data = &net->ipv6.fqdir->timeout; 451 451 452 452 hdr = register_net_sysctl(net, "net/ipv6", table); 453 453 if (!hdr) ··· 510 510 { 511 511 int res; 512 512 513 - net->ipv6.fqdir.high_thresh = IPV6_FRAG_HIGH_THRESH; 514 - net->ipv6.fqdir.low_thresh = IPV6_FRAG_LOW_THRESH; 515 - net->ipv6.fqdir.timeout = IPV6_FRAG_TIMEOUT; 516 - 517 513 res = fqdir_init(&net->ipv6.fqdir, &ip6_frags, net); 518 514 if (res < 0) 519 515 return res; 520 516 517 + net->ipv6.fqdir->high_thresh = IPV6_FRAG_HIGH_THRESH; 518 + net->ipv6.fqdir->low_thresh = IPV6_FRAG_LOW_THRESH; 519 + net->ipv6.fqdir->timeout = IPV6_FRAG_TIMEOUT; 520 + 521 521 res = ip6_frags_ns_sysctl_register(net); 522 522 if (res < 0) 523 - fqdir_exit(&net->ipv6.fqdir); 523 + fqdir_exit(net->ipv6.fqdir); 524 524 return res; 525 525 } 526 526 527 527 static void __net_exit ipv6_frags_exit_net(struct net *net) 528 528 { 529 529 ip6_frags_ns_sysctl_unregister(net); 530 - fqdir_exit(&net->ipv6.fqdir); 530 + fqdir_exit(net->ipv6.fqdir); 531 531 } 532 532 533 533 static struct pernet_operations ip6_frags_ops = {