Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next

Pablo Neira Ayuso says:

====================
netfilter updates for net-next

The following patchset contains netfilter updates for net-next, just a
bunch of cleanups and small enhancement to selectively flush conntracks
in ctnetlink, more specifically the patches are:

1) Rise default number of buckets in conntrack from 16384 to 65536 in
systems with >= 4GBytes, patch from Marcelo Leitner.

2) Small refactor to save one level on indentation in xt_osf, from
Joe Perches.

3) Remove unnecessary sizeof(char) in nf_log, from Fabian Frederick.

4) Another small cleanup to remove redundant variable in nfnetlink,
from Duan Jiong.

5) Fix compilation warning in nfnetlink_cthelper on parisc, from
Chen Gang.

6) Fix wrong format in debugging for ctseqadj, from Gao feng.

7) Selective conntrack flushing through the mark for ctnetlink, patch
from Kristian Evensen.

8) Remove nf_ct_conntrack_flush_report() exported symbol now that is
not required anymore after the selective flushing patch, again from
Kristian.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>

+176 -140
+2 -1
Documentation/networking/nf_conntrack-sysctl.txt
··· 11 11 Size of hash table. If not specified as parameter during module 12 12 loading, the default size is calculated by dividing total memory 13 13 by 16384 to determine the number of buckets but the hash table will 14 - never have fewer than 32 or more than 16384 buckets. 14 + never have fewer than 32 and limited to 16384 buckets. For systems 15 + with more than 4GB of memory it will be 65536 buckets. 15 16 16 17 nf_conntrack_checksum - BOOLEAN 17 18 0 - disabled
-2
include/net/netfilter/nf_conntrack.h
··· 191 191 int nf_conntrack_hash_check_insert(struct nf_conn *ct); 192 192 bool nf_ct_delete(struct nf_conn *ct, u32 pid, int report); 193 193 194 - void nf_conntrack_flush_report(struct net *net, u32 portid, int report); 195 - 196 194 bool nf_ct_get_tuplepr(const struct sk_buff *skb, unsigned int nhoff, 197 195 u_int16_t l3num, struct nf_conntrack_tuple *tuple); 198 196 bool nf_ct_invert_tuplepr(struct nf_conntrack_tuple *inverse,
+8 -9
net/netfilter/nf_conntrack_core.c
··· 1424 1424 } 1425 1425 EXPORT_SYMBOL_GPL(nf_ct_free_hashtable); 1426 1426 1427 - void nf_conntrack_flush_report(struct net *net, u32 portid, int report) 1428 - { 1429 - nf_ct_iterate_cleanup(net, kill_all, NULL, portid, report); 1430 - } 1431 - EXPORT_SYMBOL_GPL(nf_conntrack_flush_report); 1432 - 1433 1427 static int untrack_refs(void) 1434 1428 { 1435 1429 int cnt = 0, cpu; ··· 1616 1622 for (i = 0; i < CONNTRACK_LOCKS; i++) 1617 1623 spin_lock_init(&nf_conntrack_locks[i]); 1618 1624 1619 - /* Idea from tcp.c: use 1/16384 of memory. On i386: 32MB 1620 - * machine has 512 buckets. >= 1GB machines have 16384 buckets. */ 1621 1625 if (!nf_conntrack_htable_size) { 1626 + /* Idea from tcp.c: use 1/16384 of memory. 1627 + * On i386: 32MB machine has 512 buckets. 1628 + * >= 1GB machines have 16384 buckets. 1629 + * >= 4GB machines have 65536 buckets. 1630 + */ 1622 1631 nf_conntrack_htable_size 1623 1632 = (((totalram_pages << PAGE_SHIFT) / 16384) 1624 1633 / sizeof(struct hlist_head)); 1625 - if (totalram_pages > (1024 * 1024 * 1024 / PAGE_SIZE)) 1634 + if (totalram_pages > (4 * (1024 * 1024 * 1024 / PAGE_SIZE))) 1635 + nf_conntrack_htable_size = 65536; 1636 + else if (totalram_pages > (1024 * 1024 * 1024 / PAGE_SIZE)) 1626 1637 nf_conntrack_htable_size = 16384; 1627 1638 if (nf_conntrack_htable_size < 32) 1628 1639 nf_conntrack_htable_size = 32;
+64 -25
net/netfilter/nf_conntrack_netlink.c
··· 749 749 return 0; 750 750 } 751 751 752 - struct ctnetlink_dump_filter { 752 + struct ctnetlink_filter { 753 753 struct { 754 754 u_int32_t val; 755 755 u_int32_t mask; 756 756 } mark; 757 757 }; 758 + 759 + static struct ctnetlink_filter * 760 + ctnetlink_alloc_filter(const struct nlattr * const cda[]) 761 + { 762 + #ifdef CONFIG_NF_CONNTRACK_MARK 763 + struct ctnetlink_filter *filter; 764 + 765 + filter = kzalloc(sizeof(*filter), GFP_KERNEL); 766 + if (filter == NULL) 767 + return ERR_PTR(-ENOMEM); 768 + 769 + filter->mark.val = ntohl(nla_get_be32(cda[CTA_MARK])); 770 + filter->mark.mask = ntohl(nla_get_be32(cda[CTA_MARK_MASK])); 771 + 772 + return filter; 773 + #else 774 + return ERR_PTR(-EOPNOTSUPP); 775 + #endif 776 + } 777 + 778 + static int ctnetlink_filter_match(struct nf_conn *ct, void *data) 779 + { 780 + struct ctnetlink_filter *filter = data; 781 + 782 + if (filter == NULL) 783 + return 1; 784 + 785 + #ifdef CONFIG_NF_CONNTRACK_MARK 786 + if ((ct->mark & filter->mark.mask) == filter->mark.val) 787 + return 1; 788 + #endif 789 + 790 + return 0; 791 + } 758 792 759 793 static int 760 794 ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb) ··· 801 767 u_int8_t l3proto = nfmsg->nfgen_family; 802 768 int res; 803 769 spinlock_t *lockp; 804 - 805 - #ifdef CONFIG_NF_CONNTRACK_MARK 806 - const struct ctnetlink_dump_filter *filter = cb->data; 807 - #endif 808 770 809 771 last = (struct nf_conn *)cb->args[1]; 810 772 ··· 828 798 continue; 829 799 cb->args[1] = 0; 830 800 } 831 - #ifdef CONFIG_NF_CONNTRACK_MARK 832 - if (filter && !((ct->mark & filter->mark.mask) == 833 - filter->mark.val)) { 801 + if (!ctnetlink_filter_match(ct, cb->data)) 834 802 continue; 835 - } 836 - #endif 803 + 837 804 rcu_read_lock(); 838 805 res = 839 806 ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).portid, ··· 1028 1001 .len = NF_CT_LABELS_MAX_SIZE }, 1029 1002 }; 1030 1003 1004 + static int ctnetlink_flush_conntrack(struct net *net, 1005 + const struct nlattr * const cda[], 1006 + u32 portid, int report) 1007 + { 1008 + struct ctnetlink_filter *filter = NULL; 1009 + 1010 + if (cda[CTA_MARK] && cda[CTA_MARK_MASK]) { 1011 + filter = ctnetlink_alloc_filter(cda); 1012 + if (IS_ERR(filter)) 1013 + return PTR_ERR(filter); 1014 + } 1015 + 1016 + nf_ct_iterate_cleanup(net, ctnetlink_filter_match, filter, 1017 + portid, report); 1018 + kfree(filter); 1019 + 1020 + return 0; 1021 + } 1022 + 1031 1023 static int 1032 1024 ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb, 1033 1025 const struct nlmsghdr *nlh, ··· 1070 1024 else if (cda[CTA_TUPLE_REPLY]) 1071 1025 err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_REPLY, u3); 1072 1026 else { 1073 - /* Flush the whole table */ 1074 - nf_conntrack_flush_report(net, 1075 - NETLINK_CB(skb).portid, 1076 - nlmsg_report(nlh)); 1077 - return 0; 1027 + return ctnetlink_flush_conntrack(net, cda, 1028 + NETLINK_CB(skb).portid, 1029 + nlmsg_report(nlh)); 1078 1030 } 1079 1031 1080 1032 if (err < 0) ··· 1120 1076 .dump = ctnetlink_dump_table, 1121 1077 .done = ctnetlink_done, 1122 1078 }; 1123 - #ifdef CONFIG_NF_CONNTRACK_MARK 1079 + 1124 1080 if (cda[CTA_MARK] && cda[CTA_MARK_MASK]) { 1125 - struct ctnetlink_dump_filter *filter; 1081 + struct ctnetlink_filter *filter; 1126 1082 1127 - filter = kzalloc(sizeof(struct ctnetlink_dump_filter), 1128 - GFP_ATOMIC); 1129 - if (filter == NULL) 1130 - return -ENOMEM; 1083 + filter = ctnetlink_alloc_filter(cda); 1084 + if (IS_ERR(filter)) 1085 + return PTR_ERR(filter); 1131 1086 1132 - filter->mark.val = ntohl(nla_get_be32(cda[CTA_MARK])); 1133 - filter->mark.mask = 1134 - ntohl(nla_get_be32(cda[CTA_MARK_MASK])); 1135 1087 c.data = filter; 1136 1088 } 1137 - #endif 1138 1089 return netlink_dump_start(ctnl, skb, nlh, &c); 1139 1090 } 1140 1091
+3 -3
net/netfilter/nf_conntrack_seqadj.c
··· 98 98 new_end_seq = htonl(ntohl(sack->end_seq) - 99 99 seq->offset_before); 100 100 101 - pr_debug("sack_adjust: start_seq: %d->%d, end_seq: %d->%d\n", 102 - ntohl(sack->start_seq), new_start_seq, 103 - ntohl(sack->end_seq), new_end_seq); 101 + pr_debug("sack_adjust: start_seq: %u->%u, end_seq: %u->%u\n", 102 + ntohl(sack->start_seq), ntohl(new_start_seq), 103 + ntohl(sack->end_seq), ntohl(new_end_seq)); 104 104 105 105 inet_proto_csum_replace4(&tcph->check, skb, 106 106 sack->start_seq, new_start_seq, 0);
+1 -2
net/netfilter/nf_log.c
··· 425 425 nf_log_sysctl_table[i].procname = 426 426 nf_log_sysctl_fnames[i]; 427 427 nf_log_sysctl_table[i].data = NULL; 428 - nf_log_sysctl_table[i].maxlen = 429 - NFLOGGER_NAME_LEN * sizeof(char); 428 + nf_log_sysctl_table[i].maxlen = NFLOGGER_NAME_LEN; 430 429 nf_log_sysctl_table[i].mode = 0644; 431 430 nf_log_sysctl_table[i].proc_handler = 432 431 nf_log_proc_dostring;
+7 -8
net/netfilter/nfnetlink.c
··· 272 272 static void nfnetlink_rcv_batch(struct sk_buff *skb, struct nlmsghdr *nlh, 273 273 u_int16_t subsys_id) 274 274 { 275 - struct sk_buff *nskb, *oskb = skb; 275 + struct sk_buff *oskb = skb; 276 276 struct net *net = sock_net(skb->sk); 277 277 const struct nfnetlink_subsystem *ss; 278 278 const struct nfnl_callback *nc; ··· 283 283 if (subsys_id >= NFNL_SUBSYS_COUNT) 284 284 return netlink_ack(skb, nlh, -EINVAL); 285 285 replay: 286 - nskb = netlink_skb_clone(oskb, GFP_KERNEL); 287 - if (!nskb) 286 + skb = netlink_skb_clone(oskb, GFP_KERNEL); 287 + if (!skb) 288 288 return netlink_ack(oskb, nlh, -ENOMEM); 289 289 290 - nskb->sk = oskb->sk; 291 - skb = nskb; 290 + skb->sk = oskb->sk; 292 291 293 292 nfnl_lock(subsys_id); 294 293 ss = rcu_dereference_protected(table[subsys_id].subsys, ··· 304 305 { 305 306 nfnl_unlock(subsys_id); 306 307 netlink_ack(skb, nlh, -EOPNOTSUPP); 307 - return kfree_skb(nskb); 308 + return kfree_skb(skb); 308 309 } 309 310 } 310 311 ··· 385 386 nfnl_err_reset(&err_list); 386 387 ss->abort(oskb); 387 388 nfnl_unlock(subsys_id); 388 - kfree_skb(nskb); 389 + kfree_skb(skb); 389 390 goto replay; 390 391 } 391 392 } ··· 426 427 427 428 nfnl_err_deliver(&err_list, oskb); 428 429 nfnl_unlock(subsys_id); 429 - kfree_skb(nskb); 430 + kfree_skb(skb); 430 431 } 431 432 432 433 static void nfnetlink_rcv(struct sk_buff *skb)
+89 -88
net/netfilter/xt_osf.c
··· 225 225 226 226 rcu_read_lock(); 227 227 list_for_each_entry_rcu(kf, &xt_osf_fingers[df], finger_entry) { 228 + int foptsize, optnum; 229 + 228 230 f = &kf->finger; 229 231 230 232 if (!(info->flags & XT_OSF_LOG) && strcmp(info->genre, f->genre)) ··· 235 233 optp = _optp; 236 234 fmatch = FMATCH_WRONG; 237 235 238 - if (totlen == f->ss && xt_osf_ttl(skb, info, f->ttl)) { 239 - int foptsize, optnum; 236 + if (totlen != f->ss || !xt_osf_ttl(skb, info, f->ttl)) 237 + continue; 240 238 241 - /* 242 - * Should not happen if userspace parser was written correctly. 243 - */ 244 - if (f->wss.wc >= OSF_WSS_MAX) 245 - continue; 239 + /* 240 + * Should not happen if userspace parser was written correctly. 241 + */ 242 + if (f->wss.wc >= OSF_WSS_MAX) 243 + continue; 246 244 247 - /* Check options */ 245 + /* Check options */ 248 246 249 - foptsize = 0; 250 - for (optnum = 0; optnum < f->opt_num; ++optnum) 251 - foptsize += f->opt[optnum].length; 247 + foptsize = 0; 248 + for (optnum = 0; optnum < f->opt_num; ++optnum) 249 + foptsize += f->opt[optnum].length; 252 250 253 - if (foptsize > MAX_IPOPTLEN || 254 - optsize > MAX_IPOPTLEN || 255 - optsize != foptsize) 256 - continue; 251 + if (foptsize > MAX_IPOPTLEN || 252 + optsize > MAX_IPOPTLEN || 253 + optsize != foptsize) 254 + continue; 257 255 258 - check_WSS = f->wss.wc; 256 + check_WSS = f->wss.wc; 259 257 260 - for (optnum = 0; optnum < f->opt_num; ++optnum) { 261 - if (f->opt[optnum].kind == (*optp)) { 262 - __u32 len = f->opt[optnum].length; 263 - const __u8 *optend = optp + len; 264 - int loop_cont = 0; 258 + for (optnum = 0; optnum < f->opt_num; ++optnum) { 259 + if (f->opt[optnum].kind == (*optp)) { 260 + __u32 len = f->opt[optnum].length; 261 + const __u8 *optend = optp + len; 262 + int loop_cont = 0; 265 263 266 - fmatch = FMATCH_OK; 264 + fmatch = FMATCH_OK; 267 265 268 - switch (*optp) { 269 - case OSFOPT_MSS: 270 - mss = optp[3]; 271 - mss <<= 8; 272 - mss |= optp[2]; 266 + switch (*optp) { 267 + case OSFOPT_MSS: 268 + mss = optp[3]; 269 + mss <<= 8; 270 + mss |= optp[2]; 273 271 274 - mss = ntohs((__force __be16)mss); 275 - break; 276 - case OSFOPT_TS: 277 - loop_cont = 1; 278 - break; 279 - } 280 - 281 - optp = optend; 282 - } else 283 - fmatch = FMATCH_OPT_WRONG; 284 - 285 - if (fmatch != FMATCH_OK) 272 + mss = ntohs((__force __be16)mss); 286 273 break; 287 - } 288 - 289 - if (fmatch != FMATCH_OPT_WRONG) { 290 - fmatch = FMATCH_WRONG; 291 - 292 - switch (check_WSS) { 293 - case OSF_WSS_PLAIN: 294 - if (f->wss.val == 0 || window == f->wss.val) 295 - fmatch = FMATCH_OK; 296 - break; 297 - case OSF_WSS_MSS: 298 - /* 299 - * Some smart modems decrease mangle MSS to 300 - * SMART_MSS_2, so we check standard, decreased 301 - * and the one provided in the fingerprint MSS 302 - * values. 303 - */ 304 - #define SMART_MSS_1 1460 305 - #define SMART_MSS_2 1448 306 - if (window == f->wss.val * mss || 307 - window == f->wss.val * SMART_MSS_1 || 308 - window == f->wss.val * SMART_MSS_2) 309 - fmatch = FMATCH_OK; 310 - break; 311 - case OSF_WSS_MTU: 312 - if (window == f->wss.val * (mss + 40) || 313 - window == f->wss.val * (SMART_MSS_1 + 40) || 314 - window == f->wss.val * (SMART_MSS_2 + 40)) 315 - fmatch = FMATCH_OK; 316 - break; 317 - case OSF_WSS_MODULO: 318 - if ((window % f->wss.val) == 0) 319 - fmatch = FMATCH_OK; 274 + case OSFOPT_TS: 275 + loop_cont = 1; 320 276 break; 321 277 } 322 - } 278 + 279 + optp = optend; 280 + } else 281 + fmatch = FMATCH_OPT_WRONG; 323 282 324 283 if (fmatch != FMATCH_OK) 325 - continue; 326 - 327 - fcount++; 328 - 329 - if (info->flags & XT_OSF_LOG) 330 - nf_log_packet(net, p->family, p->hooknum, skb, 331 - p->in, p->out, NULL, 332 - "%s [%s:%s] : %pI4:%d -> %pI4:%d hops=%d\n", 333 - f->genre, f->version, f->subtype, 334 - &ip->saddr, ntohs(tcp->source), 335 - &ip->daddr, ntohs(tcp->dest), 336 - f->ttl - ip->ttl); 337 - 338 - if ((info->flags & XT_OSF_LOG) && 339 - info->loglevel == XT_OSF_LOGLEVEL_FIRST) 340 284 break; 341 285 } 286 + 287 + if (fmatch != FMATCH_OPT_WRONG) { 288 + fmatch = FMATCH_WRONG; 289 + 290 + switch (check_WSS) { 291 + case OSF_WSS_PLAIN: 292 + if (f->wss.val == 0 || window == f->wss.val) 293 + fmatch = FMATCH_OK; 294 + break; 295 + case OSF_WSS_MSS: 296 + /* 297 + * Some smart modems decrease mangle MSS to 298 + * SMART_MSS_2, so we check standard, decreased 299 + * and the one provided in the fingerprint MSS 300 + * values. 301 + */ 302 + #define SMART_MSS_1 1460 303 + #define SMART_MSS_2 1448 304 + if (window == f->wss.val * mss || 305 + window == f->wss.val * SMART_MSS_1 || 306 + window == f->wss.val * SMART_MSS_2) 307 + fmatch = FMATCH_OK; 308 + break; 309 + case OSF_WSS_MTU: 310 + if (window == f->wss.val * (mss + 40) || 311 + window == f->wss.val * (SMART_MSS_1 + 40) || 312 + window == f->wss.val * (SMART_MSS_2 + 40)) 313 + fmatch = FMATCH_OK; 314 + break; 315 + case OSF_WSS_MODULO: 316 + if ((window % f->wss.val) == 0) 317 + fmatch = FMATCH_OK; 318 + break; 319 + } 320 + } 321 + 322 + if (fmatch != FMATCH_OK) 323 + continue; 324 + 325 + fcount++; 326 + 327 + if (info->flags & XT_OSF_LOG) 328 + nf_log_packet(net, p->family, p->hooknum, skb, 329 + p->in, p->out, NULL, 330 + "%s [%s:%s] : %pI4:%d -> %pI4:%d hops=%d\n", 331 + f->genre, f->version, f->subtype, 332 + &ip->saddr, ntohs(tcp->source), 333 + &ip->daddr, ntohs(tcp->dest), 334 + f->ttl - ip->ttl); 335 + 336 + if ((info->flags & XT_OSF_LOG) && 337 + info->loglevel == XT_OSF_LOGLEVEL_FIRST) 338 + break; 342 339 } 343 340 rcu_read_unlock(); 344 341