Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/kaber/nf-next-2.6

+4 -1

include/linux/netfilter/Kbuild

··· 3 3 header-y += nfnetlink_conntrack.h 4 4 header-y += nfnetlink_log.h 5 5 header-y += nfnetlink_queue.h 6 + header-y += xt_CHECKSUM.h 6 7 header-y += xt_CLASSIFY.h 7 8 header-y += xt_CONNMARK.h 8 9 header-y += xt_CONNSECMARK.h ··· 20 19 header-y += xt_TCPOPTSTRIP.h 21 20 header-y += xt_TEE.h 22 21 header-y += xt_TPROXY.h 22 + header-y += xt_cluster.h 23 23 header-y += xt_comment.h 24 24 header-y += xt_connbytes.h 25 25 header-y += xt_connlimit.h 26 26 header-y += xt_connmark.h 27 27 header-y += xt_conntrack.h 28 - header-y += xt_cluster.h 28 + header-y += xt_cpu.h 29 29 header-y += xt_dccp.h 30 30 header-y += xt_dscp.h 31 31 header-y += xt_esp.h 32 32 header-y += xt_hashlimit.h 33 33 header-y += xt_iprange.h 34 + header-y += xt_ipvs.h 34 35 header-y += xt_helper.h 35 36 header-y += xt_length.h 36 37 header-y += xt_limit.h

+1 -1

include/linux/netfilter/nfnetlink_log.h

··· 89 89 #define NFULNL_COPY_NONE 0x00 90 90 #define NFULNL_COPY_META 0x01 91 91 #define NFULNL_COPY_PACKET 0x02 92 - #define NFULNL_COPY_DISABLED 0x03 92 + /* 0xff is reserved, don't use it for new copy modes. */ 93 93 94 94 #define NFULNL_CFG_F_SEQ 0x0001 95 95 #define NFULNL_CFG_F_SEQ_GLOBAL 0x0002

+20

include/linux/netfilter/xt_CHECKSUM.h

··· 1 + /* Header file for iptables ipt_CHECKSUM target 2 + * 3 + * (C) 2002 by Harald Welte <laforge@gnumonks.org> 4 + * (C) 2010 Red Hat Inc 5 + * Author: Michael S. Tsirkin <mst@redhat.com> 6 + * 7 + * This software is distributed under GNU GPL v2, 1991 8 + */ 9 + #ifndef _XT_CHECKSUM_TARGET_H 10 + #define _XT_CHECKSUM_TARGET_H 11 + 12 + #include <linux/types.h> 13 + 14 + #define XT_CHECKSUM_OP_FILL 0x01 /* fill in checksum in IP header */ 15 + 16 + struct xt_CHECKSUM_info { 17 + __u8 operation; /* bitset of operations */ 18 + }; 19 + 20 + #endif /* _XT_CHECKSUM_TARGET_H */

+11

include/linux/netfilter/xt_cpu.h

··· 1 + #ifndef _XT_CPU_H 2 + #define _XT_CPU_H 3 + 4 + #include <linux/types.h> 5 + 6 + struct xt_cpu_info { 7 + __u32 cpu; 8 + __u32 invert; 9 + }; 10 + 11 + #endif /*_XT_CPU_H*/

+27

include/linux/netfilter/xt_ipvs.h

··· 1 + #ifndef _XT_IPVS_H 2 + #define _XT_IPVS_H 3 + 4 + enum { 5 + XT_IPVS_IPVS_PROPERTY = 1 << 0, /* all other options imply this one */ 6 + XT_IPVS_PROTO = 1 << 1, 7 + XT_IPVS_VADDR = 1 << 2, 8 + XT_IPVS_VPORT = 1 << 3, 9 + XT_IPVS_DIR = 1 << 4, 10 + XT_IPVS_METHOD = 1 << 5, 11 + XT_IPVS_VPORTCTL = 1 << 6, 12 + XT_IPVS_MASK = (1 << 7) - 1, 13 + XT_IPVS_ONCE_MASK = XT_IPVS_MASK & ~XT_IPVS_IPVS_PROPERTY 14 + }; 15 + 16 + struct xt_ipvs_mtinfo { 17 + union nf_inet_addr vaddr, vmask; 18 + __be16 vport; 19 + __u8 l4proto; 20 + __u8 fwd_method; 21 + __be16 vportctl; 22 + 23 + __u8 invert; 24 + __u8 bitmask; 25 + }; 26 + 27 + #endif /* _XT_IPVS_H */

+1 -1

include/linux/netfilter/xt_quota.h

··· 11 11 struct xt_quota_info { 12 12 u_int32_t flags; 13 13 u_int32_t pad; 14 + aligned_u64 quota; 14 15 15 16 /* Used internally by the kernel */ 16 - aligned_u64 quota; 17 17 struct xt_quota_priv *master; 18 18 }; 19 19

+12 -2

include/net/ip_vs.h

··· 632 632 (int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port, 633 633 const union nf_inet_addr *d_addr, __be16 d_port); 634 634 635 + struct ip_vs_conn * ip_vs_conn_in_get_proto(int af, const struct sk_buff *skb, 636 + struct ip_vs_protocol *pp, 637 + const struct ip_vs_iphdr *iph, 638 + unsigned int proto_off, 639 + int inverse); 640 + 635 641 extern struct ip_vs_conn *ip_vs_conn_out_get 636 642 (int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port, 637 643 const union nf_inet_addr *d_addr, __be16 d_port); 644 + 645 + struct ip_vs_conn * ip_vs_conn_out_get_proto(int af, const struct sk_buff *skb, 646 + struct ip_vs_protocol *pp, 647 + const struct ip_vs_iphdr *iph, 648 + unsigned int proto_off, 649 + int inverse); 638 650 639 651 /* put back the conn without restarting its timer */ 640 652 static inline void __ip_vs_conn_put(struct ip_vs_conn *cp) ··· 748 736 749 737 extern int ip_vs_app_pkt_out(struct ip_vs_conn *, struct sk_buff *skb); 750 738 extern int ip_vs_app_pkt_in(struct ip_vs_conn *, struct sk_buff *skb); 751 - extern int ip_vs_skb_replace(struct sk_buff *skb, gfp_t pri, 752 - char *o_buf, int o_len, char *n_buf, int n_len); 753 739 extern int ip_vs_app_init(void); 754 740 extern void ip_vs_app_cleanup(void); 755 741

+7 -2

include/net/netfilter/nf_conntrack_extend.h

··· 28 28 char data[0]; 29 29 }; 30 30 31 - static inline int nf_ct_ext_exist(const struct nf_conn *ct, u8 id) 31 + static inline bool __nf_ct_ext_exist(const struct nf_ct_ext *ext, u8 id) 32 32 { 33 - return (ct->ext && ct->ext->offset[id]); 33 + return !!ext->offset[id]; 34 + } 35 + 36 + static inline bool nf_ct_ext_exist(const struct nf_conn *ct, u8 id) 37 + { 38 + return (ct->ext && __nf_ct_ext_exist(ct->ext, id)); 34 39 } 35 40 36 41 static inline void *__nf_ct_ext_find(const struct nf_conn *ct, u8 id)

+4 -4

include/net/netfilter/nf_nat_protocol.h

··· 27 27 28 28 /* Alter the per-proto part of the tuple (depending on 29 29 maniptype), to give a unique tuple in the given range if 30 - possible; return false if not. Per-protocol part of tuple 31 - is initialized to the incoming packet. */ 32 - bool (*unique_tuple)(struct nf_conntrack_tuple *tuple, 30 + possible. Per-protocol part of tuple is initialized to the 31 + incoming packet. */ 32 + void (*unique_tuple)(struct nf_conntrack_tuple *tuple, 33 33 const struct nf_nat_range *range, 34 34 enum nf_nat_manip_type maniptype, 35 35 const struct nf_conn *ct); ··· 63 63 const union nf_conntrack_man_proto *min, 64 64 const union nf_conntrack_man_proto *max); 65 65 66 - extern bool nf_nat_proto_unique_tuple(struct nf_conntrack_tuple *tuple, 66 + extern void nf_nat_proto_unique_tuple(struct nf_conntrack_tuple *tuple, 67 67 const struct nf_nat_range *range, 68 68 enum nf_nat_manip_type maniptype, 69 69 const struct nf_conn *ct,

+2

include/net/netfilter/nfnetlink_log.h

··· 10 10 const struct nf_loginfo *li_user, 11 11 const char *prefix); 12 12 13 + #define NFULNL_COPY_DISABLED 0xff 14 + 13 15 #endif /* _KER_NFNETLINK_LOG_H */ 14 16

+7 -8

net/ipv4/netfilter/arp_tables.c

··· 283 283 arp = arp_hdr(skb); 284 284 do { 285 285 const struct arpt_entry_target *t; 286 - int hdr_len; 287 286 288 287 if (!arp_packet_match(arp, skb->dev, indev, outdev, &e->arp)) { 289 288 e = arpt_next_entry(e); 290 289 continue; 291 290 } 292 291 293 - hdr_len = sizeof(*arp) + (2 * sizeof(struct in_addr)) + 294 - (2 * skb->dev->addr_len); 295 - ADD_COUNTER(e->counters, hdr_len, 1); 292 + ADD_COUNTER(e->counters, arp_hdr_len(skb->dev), 1); 296 293 297 294 t = arpt_get_target_c(e); 298 295 ··· 710 713 struct arpt_entry *iter; 711 714 unsigned int cpu; 712 715 unsigned int i; 713 - unsigned int curcpu; 716 + unsigned int curcpu = get_cpu(); 714 717 715 718 /* Instead of clearing (by a previous call to memset()) 716 719 * the counters and using adds, we set the counters ··· 720 723 * if new softirq were to run and call ipt_do_table 721 724 */ 722 725 local_bh_disable(); 723 - curcpu = smp_processor_id(); 724 - 725 726 i = 0; 726 727 xt_entry_foreach(iter, t->entries[curcpu], t->size) { 727 728 SET_COUNTER(counters[i], iter->counters.bcnt, 728 729 iter->counters.pcnt); 729 730 ++i; 730 731 } 732 + local_bh_enable(); 733 + /* Processing counters from other cpus, we can let bottom half enabled, 734 + * (preemption is disabled) 735 + */ 731 736 732 737 for_each_possible_cpu(cpu) { 733 738 if (cpu == curcpu) ··· 743 744 } 744 745 xt_info_wrunlock(cpu); 745 746 } 746 - local_bh_enable(); 747 + put_cpu(); 747 748 } 748 749 749 750 static struct xt_counters *alloc_counters(const struct xt_table *table)

+7 -5

net/ipv4/netfilter/ip_tables.c

··· 364 364 goto no_match; 365 365 } 366 366 367 - ADD_COUNTER(e->counters, ntohs(ip->tot_len), 1); 367 + ADD_COUNTER(e->counters, skb->len, 1); 368 368 369 369 t = ipt_get_target(e); 370 370 IP_NF_ASSERT(t->u.kernel.target); ··· 884 884 struct ipt_entry *iter; 885 885 unsigned int cpu; 886 886 unsigned int i; 887 - unsigned int curcpu; 887 + unsigned int curcpu = get_cpu(); 888 888 889 889 /* Instead of clearing (by a previous call to memset()) 890 890 * the counters and using adds, we set the counters ··· 894 894 * if new softirq were to run and call ipt_do_table 895 895 */ 896 896 local_bh_disable(); 897 - curcpu = smp_processor_id(); 898 - 899 897 i = 0; 900 898 xt_entry_foreach(iter, t->entries[curcpu], t->size) { 901 899 SET_COUNTER(counters[i], iter->counters.bcnt, 902 900 iter->counters.pcnt); 903 901 ++i; 904 902 } 903 + local_bh_enable(); 904 + /* Processing counters from other cpus, we can let bottom half enabled, 905 + * (preemption is disabled) 906 + */ 905 907 906 908 for_each_possible_cpu(cpu) { 907 909 if (cpu == curcpu) ··· 917 915 } 918 916 xt_info_wrunlock(cpu); 919 917 } 920 - local_bh_enable(); 918 + put_cpu(); 921 919 } 922 920 923 921 static struct xt_counters *alloc_counters(const struct xt_table *table)

+5 -5

net/ipv4/netfilter/ipt_REJECT.c

··· 95 95 } 96 96 97 97 tcph->rst = 1; 98 - tcph->check = tcp_v4_check(sizeof(struct tcphdr), 99 - niph->saddr, niph->daddr, 100 - csum_partial(tcph, 101 - sizeof(struct tcphdr), 0)); 98 + tcph->check = ~tcp_v4_check(sizeof(struct tcphdr), niph->saddr, 99 + niph->daddr, 0); 100 + nskb->ip_summed = CHECKSUM_PARTIAL; 101 + nskb->csum_start = (unsigned char *)tcph - nskb->head; 102 + nskb->csum_offset = offsetof(struct tcphdr, check); 102 103 103 104 addr_type = RTN_UNSPEC; 104 105 if (hook != NF_INET_FORWARD ··· 116 115 goto free_nskb; 117 116 118 117 niph->ttl = dst_metric(skb_dst(nskb), RTAX_HOPLIMIT); 119 - nskb->ip_summed = CHECKSUM_NONE; 120 118 121 119 /* "Never happens" */ 122 120 if (nskb->len > dst_mtu(skb_dst(nskb)))

+9 -18

net/ipv4/netfilter/nf_nat_core.c

··· 261 261 rcu_read_lock(); 262 262 proto = __nf_nat_proto_find(orig_tuple->dst.protonum); 263 263 264 - /* Change protocol info to have some randomization */ 265 - if (range->flags & IP_NAT_RANGE_PROTO_RANDOM) { 266 - proto->unique_tuple(tuple, range, maniptype, ct); 267 - goto out; 268 - } 269 - 270 264 /* Only bother mapping if it's not already in range and unique */ 271 - if ((!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED) || 265 + if (!(range->flags & IP_NAT_RANGE_PROTO_RANDOM) && 266 + (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED) || 272 267 proto->in_range(tuple, maniptype, &range->min, &range->max)) && 273 268 !nf_nat_used_tuple(tuple, ct)) 274 269 goto out; ··· 435 440 if (!skb_make_writable(skb, hdrlen + sizeof(*inside))) 436 441 return 0; 437 442 438 - inside = (void *)skb->data + ip_hdrlen(skb); 443 + inside = (void *)skb->data + hdrlen; 439 444 440 445 /* We're actually going to mangle it beyond trivial checksum 441 446 adjustment, so make sure the current checksum is correct. */ ··· 465 470 /* rcu_read_lock()ed by nf_hook_slow */ 466 471 l4proto = __nf_ct_l4proto_find(PF_INET, inside->ip.protocol); 467 472 468 - if (!nf_ct_get_tuple(skb, 469 - ip_hdrlen(skb) + sizeof(struct icmphdr), 470 - (ip_hdrlen(skb) + 473 + if (!nf_ct_get_tuple(skb, hdrlen + sizeof(struct icmphdr), 474 + (hdrlen + 471 475 sizeof(struct icmphdr) + inside->ip.ihl * 4), 472 - (u_int16_t)AF_INET, 473 - inside->ip.protocol, 476 + (u_int16_t)AF_INET, inside->ip.protocol, 474 477 &inner, l3proto, l4proto)) 475 478 return 0; 476 479 ··· 477 484 pass all hooks (locally-generated ICMP). Consider incoming 478 485 packet: PREROUTING (DST manip), routing produces ICMP, goes 479 486 through POSTROUTING (which must correct the DST manip). */ 480 - if (!manip_pkt(inside->ip.protocol, skb, 481 - ip_hdrlen(skb) + sizeof(inside->icmp), 482 - &ct->tuplehash[!dir].tuple, 483 - !manip)) 487 + if (!manip_pkt(inside->ip.protocol, skb, hdrlen + sizeof(inside->icmp), 488 + &ct->tuplehash[!dir].tuple, !manip)) 484 489 return 0; 485 490 486 491 if (skb->ip_summed != CHECKSUM_PARTIAL) { 487 492 /* Reloading "inside" here since manip_pkt inner. */ 488 - inside = (void *)skb->data + ip_hdrlen(skb); 493 + inside = (void *)skb->data + hdrlen; 489 494 inside->icmp.checksum = 0; 490 495 inside->icmp.checksum = 491 496 csum_fold(skb_checksum(skb, hdrlen,

+6 -6

net/ipv4/netfilter/nf_nat_proto_common.c

··· 34 34 } 35 35 EXPORT_SYMBOL_GPL(nf_nat_proto_in_range); 36 36 37 - bool nf_nat_proto_unique_tuple(struct nf_conntrack_tuple *tuple, 37 + void nf_nat_proto_unique_tuple(struct nf_conntrack_tuple *tuple, 38 38 const struct nf_nat_range *range, 39 39 enum nf_nat_manip_type maniptype, 40 40 const struct nf_conn *ct, ··· 53 53 if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) { 54 54 /* If it's dst rewrite, can't change port */ 55 55 if (maniptype == IP_NAT_MANIP_DST) 56 - return false; 56 + return; 57 57 58 58 if (ntohs(*portptr) < 1024) { 59 59 /* Loose convention: >> 512 is credential passing */ ··· 81 81 else 82 82 off = *rover; 83 83 84 - for (i = 0; i < range_size; i++, off++) { 84 + for (i = 0; ; ++off) { 85 85 *portptr = htons(min + off % range_size); 86 - if (nf_nat_used_tuple(tuple, ct)) 86 + if (++i != range_size && nf_nat_used_tuple(tuple, ct)) 87 87 continue; 88 88 if (!(range->flags & IP_NAT_RANGE_PROTO_RANDOM)) 89 89 *rover = off; 90 - return true; 90 + return; 91 91 } 92 - return false; 92 + return; 93 93 } 94 94 EXPORT_SYMBOL_GPL(nf_nat_proto_unique_tuple); 95 95

+3 -3

net/ipv4/netfilter/nf_nat_proto_dccp.c

··· 22 22 23 23 static u_int16_t dccp_port_rover; 24 24 25 - static bool 25 + static void 26 26 dccp_unique_tuple(struct nf_conntrack_tuple *tuple, 27 27 const struct nf_nat_range *range, 28 28 enum nf_nat_manip_type maniptype, 29 29 const struct nf_conn *ct) 30 30 { 31 - return nf_nat_proto_unique_tuple(tuple, range, maniptype, ct, 32 - &dccp_port_rover); 31 + nf_nat_proto_unique_tuple(tuple, range, maniptype, ct, 32 + &dccp_port_rover); 33 33 } 34 34 35 35 static bool

+6 -6

net/ipv4/netfilter/nf_nat_proto_gre.c

··· 37 37 MODULE_DESCRIPTION("Netfilter NAT protocol helper module for GRE"); 38 38 39 39 /* generate unique tuple ... */ 40 - static bool 40 + static void 41 41 gre_unique_tuple(struct nf_conntrack_tuple *tuple, 42 42 const struct nf_nat_range *range, 43 43 enum nf_nat_manip_type maniptype, ··· 50 50 /* If there is no master conntrack we are not PPTP, 51 51 do not change tuples */ 52 52 if (!ct->master) 53 - return false; 53 + return; 54 54 55 55 if (maniptype == IP_NAT_MANIP_SRC) 56 56 keyptr = &tuple->src.u.gre.key; ··· 68 68 69 69 pr_debug("min = %u, range_size = %u\n", min, range_size); 70 70 71 - for (i = 0; i < range_size; i++, key++) { 71 + for (i = 0; ; ++key) { 72 72 *keyptr = htons(min + key % range_size); 73 - if (!nf_nat_used_tuple(tuple, ct)) 74 - return true; 73 + if (++i == range_size || !nf_nat_used_tuple(tuple, ct)) 74 + return; 75 75 } 76 76 77 77 pr_debug("%p: no NAT mapping\n", ct); 78 - return false; 78 + return; 79 79 } 80 80 81 81 /* manipulate a GRE packet according to maniptype */

+5 -5

net/ipv4/netfilter/nf_nat_proto_icmp.c

··· 27 27 ntohs(tuple->src.u.icmp.id) <= ntohs(max->icmp.id); 28 28 } 29 29 30 - static bool 30 + static void 31 31 icmp_unique_tuple(struct nf_conntrack_tuple *tuple, 32 32 const struct nf_nat_range *range, 33 33 enum nf_nat_manip_type maniptype, ··· 42 42 if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) 43 43 range_size = 0xFFFF; 44 44 45 - for (i = 0; i < range_size; i++, id++) { 45 + for (i = 0; ; ++id) { 46 46 tuple->src.u.icmp.id = htons(ntohs(range->min.icmp.id) + 47 47 (id % range_size)); 48 - if (!nf_nat_used_tuple(tuple, ct)) 49 - return true; 48 + if (++i == range_size || !nf_nat_used_tuple(tuple, ct)) 49 + return; 50 50 } 51 - return false; 51 + return; 52 52 } 53 53 54 54 static bool

+3 -3

net/ipv4/netfilter/nf_nat_proto_sctp.c

··· 16 16 17 17 static u_int16_t nf_sctp_port_rover; 18 18 19 - static bool 19 + static void 20 20 sctp_unique_tuple(struct nf_conntrack_tuple *tuple, 21 21 const struct nf_nat_range *range, 22 22 enum nf_nat_manip_type maniptype, 23 23 const struct nf_conn *ct) 24 24 { 25 - return nf_nat_proto_unique_tuple(tuple, range, maniptype, ct, 26 - &nf_sctp_port_rover); 25 + nf_nat_proto_unique_tuple(tuple, range, maniptype, ct, 26 + &nf_sctp_port_rover); 27 27 } 28 28 29 29 static bool

+2 -3

net/ipv4/netfilter/nf_nat_proto_tcp.c

··· 20 20 21 21 static u_int16_t tcp_port_rover; 22 22 23 - static bool 23 + static void 24 24 tcp_unique_tuple(struct nf_conntrack_tuple *tuple, 25 25 const struct nf_nat_range *range, 26 26 enum nf_nat_manip_type maniptype, 27 27 const struct nf_conn *ct) 28 28 { 29 - return nf_nat_proto_unique_tuple(tuple, range, maniptype, ct, 30 - &tcp_port_rover); 29 + nf_nat_proto_unique_tuple(tuple, range, maniptype, ct, &tcp_port_rover); 31 30 } 32 31 33 32 static bool

+2 -3

net/ipv4/netfilter/nf_nat_proto_udp.c

··· 19 19 20 20 static u_int16_t udp_port_rover; 21 21 22 - static bool 22 + static void 23 23 udp_unique_tuple(struct nf_conntrack_tuple *tuple, 24 24 const struct nf_nat_range *range, 25 25 enum nf_nat_manip_type maniptype, 26 26 const struct nf_conn *ct) 27 27 { 28 - return nf_nat_proto_unique_tuple(tuple, range, maniptype, ct, 29 - &udp_port_rover); 28 + nf_nat_proto_unique_tuple(tuple, range, maniptype, ct, &udp_port_rover); 30 29 } 31 30 32 31 static bool

+3 -3

net/ipv4/netfilter/nf_nat_proto_udplite.c

··· 18 18 19 19 static u_int16_t udplite_port_rover; 20 20 21 - static bool 21 + static void 22 22 udplite_unique_tuple(struct nf_conntrack_tuple *tuple, 23 23 const struct nf_nat_range *range, 24 24 enum nf_nat_manip_type maniptype, 25 25 const struct nf_conn *ct) 26 26 { 27 - return nf_nat_proto_unique_tuple(tuple, range, maniptype, ct, 28 - &udplite_port_rover); 27 + nf_nat_proto_unique_tuple(tuple, range, maniptype, ct, 28 + &udplite_port_rover); 29 29 } 30 30 31 31 static bool

+2 -2

net/ipv4/netfilter/nf_nat_proto_unknown.c

··· 26 26 return true; 27 27 } 28 28 29 - static bool unknown_unique_tuple(struct nf_conntrack_tuple *tuple, 29 + static void unknown_unique_tuple(struct nf_conntrack_tuple *tuple, 30 30 const struct nf_nat_range *range, 31 31 enum nf_nat_manip_type maniptype, 32 32 const struct nf_conn *ct) 33 33 { 34 34 /* Sorry: we can't help you; if it's not unique, we can't frob 35 35 anything. */ 36 - return false; 36 + return; 37 37 } 38 38 39 39 static bool

+7 -7

net/ipv6/netfilter/ip6_tables.c

··· 387 387 goto no_match; 388 388 } 389 389 390 - ADD_COUNTER(e->counters, 391 - ntohs(ipv6_hdr(skb)->payload_len) + 392 - sizeof(struct ipv6hdr), 1); 390 + ADD_COUNTER(e->counters, skb->len, 1); 393 391 394 392 t = ip6t_get_target_c(e); 395 393 IP_NF_ASSERT(t->u.kernel.target); ··· 897 899 struct ip6t_entry *iter; 898 900 unsigned int cpu; 899 901 unsigned int i; 900 - unsigned int curcpu; 902 + unsigned int curcpu = get_cpu(); 901 903 902 904 /* Instead of clearing (by a previous call to memset()) 903 905 * the counters and using adds, we set the counters ··· 907 909 * if new softirq were to run and call ipt_do_table 908 910 */ 909 911 local_bh_disable(); 910 - curcpu = smp_processor_id(); 911 - 912 912 i = 0; 913 913 xt_entry_foreach(iter, t->entries[curcpu], t->size) { 914 914 SET_COUNTER(counters[i], iter->counters.bcnt, 915 915 iter->counters.pcnt); 916 916 ++i; 917 917 } 918 + local_bh_enable(); 919 + /* Processing counters from other cpus, we can let bottom half enabled, 920 + * (preemption is disabled) 921 + */ 918 922 919 923 for_each_possible_cpu(cpu) { 920 924 if (cpu == curcpu) ··· 930 930 } 931 931 xt_info_wrunlock(cpu); 932 932 } 933 - local_bh_enable(); 933 + put_cpu(); 934 934 } 935 935 936 936 static struct xt_counters *alloc_counters(const struct xt_table *table)

+9

net/ipv6/netfilter/nf_conntrack_reasm.c

··· 269 269 * in the chain of fragments so far. We must know where to put 270 270 * this fragment, right? 271 271 */ 272 + prev = fq->q.fragments_tail; 273 + if (!prev || NFCT_FRAG6_CB(prev)->offset < offset) { 274 + next = NULL; 275 + goto found; 276 + } 272 277 prev = NULL; 273 278 for (next = fq->q.fragments; next != NULL; next = next->next) { 274 279 if (NFCT_FRAG6_CB(next)->offset >= offset) ··· 281 276 prev = next; 282 277 } 283 278 279 + found: 284 280 /* We found where to put this one. Check for overlap with 285 281 * preceding fragment, and, if needed, align things so that 286 282 * any overlaps are eliminated. ··· 347 341 348 342 /* Insert this fragment in the chain of fragments. */ 349 343 skb->next = next; 344 + if (!next) 345 + fq->q.fragments_tail = skb; 350 346 if (prev) 351 347 prev->next = skb; 352 348 else ··· 472 464 head->csum); 473 465 474 466 fq->q.fragments = NULL; 467 + fq->q.fragments_tail = NULL; 475 468 476 469 /* all original skbs are linked into the NFCT_FRAG6_CB(head).orig */ 477 470 fp = skb_shinfo(head)->frag_list;

+35

net/netfilter/Kconfig

··· 326 326 327 327 comment "Xtables targets" 328 328 329 + config NETFILTER_XT_TARGET_CHECKSUM 330 + tristate "CHECKSUM target support" 331 + depends on IP_NF_MANGLE || IP6_NF_MANGLE 332 + depends on NETFILTER_ADVANCED 333 + ---help--- 334 + This option adds a `CHECKSUM' target, which can be used in the iptables mangle 335 + table. 336 + 337 + You can use this target to compute and fill in the checksum in 338 + a packet that lacks a checksum. This is particularly useful, 339 + if you need to work around old applications such as dhcp clients, 340 + that do not work well with checksum offloads, but don't want to disable 341 + checksum offload in your device. 342 + 343 + To compile it as a module, choose M here. If unsure, say N. 344 + 329 345 config NETFILTER_XT_TARGET_CLASSIFY 330 346 tristate '"CLASSIFY" target support' 331 347 depends on NETFILTER_ADVANCED ··· 663 647 664 648 To compile it as a module, choose M here. If unsure, say N. 665 649 650 + config NETFILTER_XT_MATCH_CPU 651 + tristate '"cpu" match support' 652 + depends on NETFILTER_ADVANCED 653 + help 654 + CPU matching allows you to match packets based on the CPU 655 + currently handling the packet. 656 + 657 + To compile it as a module, choose M here. If unsure, say N. 658 + 666 659 config NETFILTER_XT_MATCH_DCCP 667 660 tristate '"dccp" protocol match support' 668 661 depends on NETFILTER_ADVANCED ··· 750 725 with an optional mask.) 751 726 752 727 If unsure, say M. 728 + 729 + config NETFILTER_XT_MATCH_IPVS 730 + tristate '"ipvs" match support' 731 + depends on IP_VS 732 + depends on NETFILTER_ADVANCED 733 + depends on NF_CONNTRACK 734 + help 735 + This option allows you to match against IPVS properties of a packet. 736 + 737 + If unsure, say N. 753 738 754 739 config NETFILTER_XT_MATCH_LENGTH 755 740 tristate '"length" match support'

+3

net/netfilter/Makefile

··· 45 45 obj-$(CONFIG_NETFILTER_XT_CONNMARK) += xt_connmark.o 46 46 47 47 # targets 48 + obj-$(CONFIG_NETFILTER_XT_TARGET_CHECKSUM) += xt_CHECKSUM.o 48 49 obj-$(CONFIG_NETFILTER_XT_TARGET_CLASSIFY) += xt_CLASSIFY.o 49 50 obj-$(CONFIG_NETFILTER_XT_TARGET_CONNSECMARK) += xt_CONNSECMARK.o 50 51 obj-$(CONFIG_NETFILTER_XT_TARGET_CT) += xt_CT.o ··· 70 69 obj-$(CONFIG_NETFILTER_XT_MATCH_CONNBYTES) += xt_connbytes.o 71 70 obj-$(CONFIG_NETFILTER_XT_MATCH_CONNLIMIT) += xt_connlimit.o 72 71 obj-$(CONFIG_NETFILTER_XT_MATCH_CONNTRACK) += xt_conntrack.o 72 + obj-$(CONFIG_NETFILTER_XT_MATCH_CPU) += xt_cpu.o 73 73 obj-$(CONFIG_NETFILTER_XT_MATCH_DCCP) += xt_dccp.o 74 74 obj-$(CONFIG_NETFILTER_XT_MATCH_DSCP) += xt_dscp.o 75 75 obj-$(CONFIG_NETFILTER_XT_MATCH_ESP) += xt_esp.o ··· 78 76 obj-$(CONFIG_NETFILTER_XT_MATCH_HELPER) += xt_helper.o 79 77 obj-$(CONFIG_NETFILTER_XT_MATCH_HL) += xt_hl.o 80 78 obj-$(CONFIG_NETFILTER_XT_MATCH_IPRANGE) += xt_iprange.o 79 + obj-$(CONFIG_NETFILTER_XT_MATCH_IPVS) += xt_ipvs.o 81 80 obj-$(CONFIG_NETFILTER_XT_MATCH_LENGTH) += xt_length.o 82 81 obj-$(CONFIG_NETFILTER_XT_MATCH_LIMIT) += xt_limit.o 83 82 obj-$(CONFIG_NETFILTER_XT_MATCH_MAC) += xt_mac.o

+4 -7

net/netfilter/ipvs/Kconfig

··· 3 3 # 4 4 menuconfig IP_VS 5 5 tristate "IP virtual server support" 6 - depends on NET && INET && NETFILTER 6 + depends on NET && INET && NETFILTER && NF_CONNTRACK 7 7 ---help--- 8 8 IP Virtual Server support will let you build a high-performance 9 9 virtual server based on cluster of two or more real servers. This ··· 26 26 27 27 config IP_VS_IPV6 28 28 bool "IPv6 support for IPVS" 29 - depends on EXPERIMENTAL && (IPV6 = y || IP_VS = IPV6) 29 + depends on IPV6 = y || IP_VS = IPV6 30 30 ---help--- 31 31 Add IPv6 support to IPVS. This is incomplete and might be dangerous. 32 32 ··· 87 87 protocol. Say Y if unsure. 88 88 89 89 config IP_VS_PROTO_AH_ESP 90 - bool 91 - depends on UNDEFINED 90 + def_bool IP_VS_PROTO_ESP || IP_VS_PROTO_AH 92 91 93 92 config IP_VS_PROTO_ESP 94 93 bool "ESP load balancing support" 95 - select IP_VS_PROTO_AH_ESP 96 94 ---help--- 97 95 This option enables support for load balancing ESP (Encapsulation 98 96 Security Payload) transport protocol. Say Y if unsure. 99 97 100 98 config IP_VS_PROTO_AH 101 99 bool "AH load balancing support" 102 - select IP_VS_PROTO_AH_ESP 103 100 ---help--- 104 101 This option enables support for load balancing AH (Authentication 105 102 Header) transport protocol. Say Y if unsure. ··· 235 238 236 239 config IP_VS_FTP 237 240 tristate "FTP protocol helper" 238 - depends on IP_VS_PROTO_TCP 241 + depends on IP_VS_PROTO_TCP && NF_NAT 239 242 ---help--- 240 243 FTP is a protocol that transfers IP address and/or port number in 241 244 the payload. In the virtual server via Network Address Translation,

-43

net/netfilter/ipvs/ip_vs_app.c

··· 569 569 }; 570 570 #endif 571 571 572 - 573 - /* 574 - * Replace a segment of data with a new segment 575 - */ 576 - int ip_vs_skb_replace(struct sk_buff *skb, gfp_t pri, 577 - char *o_buf, int o_len, char *n_buf, int n_len) 578 - { 579 - int diff; 580 - int o_offset; 581 - int o_left; 582 - 583 - EnterFunction(9); 584 - 585 - diff = n_len - o_len; 586 - o_offset = o_buf - (char *)skb->data; 587 - /* The length of left data after o_buf+o_len in the skb data */ 588 - o_left = skb->len - (o_offset + o_len); 589 - 590 - if (diff <= 0) { 591 - memmove(o_buf + n_len, o_buf + o_len, o_left); 592 - memcpy(o_buf, n_buf, n_len); 593 - skb_trim(skb, skb->len + diff); 594 - } else if (diff <= skb_tailroom(skb)) { 595 - skb_put(skb, diff); 596 - memmove(o_buf + n_len, o_buf + o_len, o_left); 597 - memcpy(o_buf, n_buf, n_len); 598 - } else { 599 - if (pskb_expand_head(skb, skb_headroom(skb), diff, pri)) 600 - return -ENOMEM; 601 - skb_put(skb, diff); 602 - memmove(skb->data + o_offset + n_len, 603 - skb->data + o_offset + o_len, o_left); 604 - skb_copy_to_linear_data_offset(skb, o_offset, n_buf, n_len); 605 - } 606 - 607 - /* must update the iph total length here */ 608 - ip_hdr(skb)->tot_len = htons(skb->len); 609 - 610 - LeaveFunction(9); 611 - return 0; 612 - } 613 - 614 - 615 572 int __init ip_vs_app_init(void) 616 573 { 617 574 /* we will replace it with proc_net_ipvs_create() soon */

+45

net/netfilter/ipvs/ip_vs_conn.c

··· 271 271 return cp; 272 272 } 273 273 274 + struct ip_vs_conn * 275 + ip_vs_conn_in_get_proto(int af, const struct sk_buff *skb, 276 + struct ip_vs_protocol *pp, 277 + const struct ip_vs_iphdr *iph, 278 + unsigned int proto_off, int inverse) 279 + { 280 + __be16 _ports[2], *pptr; 281 + 282 + pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports); 283 + if (pptr == NULL) 284 + return NULL; 285 + 286 + if (likely(!inverse)) 287 + return ip_vs_conn_in_get(af, iph->protocol, 288 + &iph->saddr, pptr[0], 289 + &iph->daddr, pptr[1]); 290 + else 291 + return ip_vs_conn_in_get(af, iph->protocol, 292 + &iph->daddr, pptr[1], 293 + &iph->saddr, pptr[0]); 294 + } 295 + EXPORT_SYMBOL_GPL(ip_vs_conn_in_get_proto); 296 + 274 297 /* Get reference to connection template */ 275 298 struct ip_vs_conn *ip_vs_ct_in_get 276 299 (int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port, ··· 379 356 return ret; 380 357 } 381 358 359 + struct ip_vs_conn * 360 + ip_vs_conn_out_get_proto(int af, const struct sk_buff *skb, 361 + struct ip_vs_protocol *pp, 362 + const struct ip_vs_iphdr *iph, 363 + unsigned int proto_off, int inverse) 364 + { 365 + __be16 _ports[2], *pptr; 366 + 367 + pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports); 368 + if (pptr == NULL) 369 + return NULL; 370 + 371 + if (likely(!inverse)) 372 + return ip_vs_conn_out_get(af, iph->protocol, 373 + &iph->saddr, pptr[0], 374 + &iph->daddr, pptr[1]); 375 + else 376 + return ip_vs_conn_out_get(af, iph->protocol, 377 + &iph->daddr, pptr[1], 378 + &iph->saddr, pptr[0]); 379 + } 380 + EXPORT_SYMBOL_GPL(ip_vs_conn_out_get_proto); 382 381 383 382 /* 384 383 * Put back the conn and restart its timer with its timeout

-37

net/netfilter/ipvs/ip_vs_core.c

··· 54 54 55 55 EXPORT_SYMBOL(register_ip_vs_scheduler); 56 56 EXPORT_SYMBOL(unregister_ip_vs_scheduler); 57 - EXPORT_SYMBOL(ip_vs_skb_replace); 58 57 EXPORT_SYMBOL(ip_vs_proto_name); 59 58 EXPORT_SYMBOL(ip_vs_conn_new); 60 59 EXPORT_SYMBOL(ip_vs_conn_in_get); ··· 533 534 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); 534 535 535 536 return NF_DROP; 536 - } 537 - 538 - 539 - /* 540 - * It is hooked before NF_IP_PRI_NAT_SRC at the NF_INET_POST_ROUTING 541 - * chain, and is used for VS/NAT. 542 - * It detects packets for VS/NAT connections and sends the packets 543 - * immediately. This can avoid that iptable_nat mangles the packets 544 - * for VS/NAT. 545 - */ 546 - static unsigned int ip_vs_post_routing(unsigned int hooknum, 547 - struct sk_buff *skb, 548 - const struct net_device *in, 549 - const struct net_device *out, 550 - int (*okfn)(struct sk_buff *)) 551 - { 552 - if (!skb->ipvs_property) 553 - return NF_ACCEPT; 554 - /* The packet was sent from IPVS, exit this chain */ 555 - return NF_STOP; 556 537 } 557 538 558 539 __sum16 ip_vs_checksum_complete(struct sk_buff *skb, int offset) ··· 1478 1499 .hooknum = NF_INET_FORWARD, 1479 1500 .priority = 99, 1480 1501 }, 1481 - /* Before the netfilter connection tracking, exit from POST_ROUTING */ 1482 - { 1483 - .hook = ip_vs_post_routing, 1484 - .owner = THIS_MODULE, 1485 - .pf = PF_INET, 1486 - .hooknum = NF_INET_POST_ROUTING, 1487 - .priority = NF_IP_PRI_NAT_SRC-1, 1488 - }, 1489 1502 #ifdef CONFIG_IP_VS_IPV6 1490 1503 /* After packet filtering, forward packet through VS/DR, VS/TUN, 1491 1504 * or VS/NAT(change destination), so that filtering rules can be ··· 1505 1534 .pf = PF_INET6, 1506 1535 .hooknum = NF_INET_FORWARD, 1507 1536 .priority = 99, 1508 - }, 1509 - /* Before the netfilter connection tracking, exit from POST_ROUTING */ 1510 - { 1511 - .hook = ip_vs_post_routing, 1512 - .owner = THIS_MODULE, 1513 - .pf = PF_INET6, 1514 - .hooknum = NF_INET_POST_ROUTING, 1515 - .priority = NF_IP6_PRI_NAT_SRC-1, 1516 1537 }, 1517 1538 #endif 1518 1539 };

+165 -13

net/netfilter/ipvs/ip_vs_ftp.c

··· 20 20 * 21 21 * Author: Wouter Gadeyne 22 22 * 23 + * 24 + * Code for ip_vs_expect_related and ip_vs_expect_callback is taken from 25 + * http://www.ssi.bg/~ja/nfct/: 26 + * 27 + * ip_vs_nfct.c: Netfilter connection tracking support for IPVS 28 + * 29 + * Portions Copyright (C) 2001-2002 30 + * Antefacto Ltd, 181 Parnell St, Dublin 1, Ireland. 31 + * 32 + * Portions Copyright (C) 2003-2008 33 + * Julian Anastasov 23 34 */ 24 35 25 36 #define KMSG_COMPONENT "IPVS" ··· 43 32 #include <linux/in.h> 44 33 #include <linux/ip.h> 45 34 #include <linux/netfilter.h> 35 + #include <net/netfilter/nf_conntrack.h> 36 + #include <net/netfilter/nf_conntrack_expect.h> 37 + #include <net/netfilter/nf_nat_helper.h> 46 38 #include <linux/gfp.h> 47 39 #include <net/protocol.h> 48 40 #include <net/tcp.h> ··· 57 43 #define SERVER_STRING "227 Entering Passive Mode (" 58 44 #define CLIENT_STRING "PORT " 59 45 46 + #define FMT_TUPLE "%pI4:%u->%pI4:%u/%u" 47 + #define ARG_TUPLE(T) &(T)->src.u3.ip, ntohs((T)->src.u.all), \ 48 + &(T)->dst.u3.ip, ntohs((T)->dst.u.all), \ 49 + (T)->dst.protonum 50 + 51 + #define FMT_CONN "%pI4:%u->%pI4:%u->%pI4:%u/%u:%u" 52 + #define ARG_CONN(C) &((C)->caddr.ip), ntohs((C)->cport), \ 53 + &((C)->vaddr.ip), ntohs((C)->vport), \ 54 + &((C)->daddr.ip), ntohs((C)->dport), \ 55 + (C)->protocol, (C)->state 60 56 61 57 /* 62 58 * List of ports (up to IP_VS_APP_MAX_PORTS) to be handled by helper ··· 147 123 return 1; 148 124 } 149 125 126 + /* 127 + * Called from init_conntrack() as expectfn handler. 128 + */ 129 + static void 130 + ip_vs_expect_callback(struct nf_conn *ct, 131 + struct nf_conntrack_expect *exp) 132 + { 133 + struct nf_conntrack_tuple *orig, new_reply; 134 + struct ip_vs_conn *cp; 135 + 136 + if (exp->tuple.src.l3num != PF_INET) 137 + return; 138 + 139 + /* 140 + * We assume that no NF locks are held before this callback. 141 + * ip_vs_conn_out_get and ip_vs_conn_in_get should match their 142 + * expectations even if they use wildcard values, now we provide the 143 + * actual values from the newly created original conntrack direction. 144 + * The conntrack is confirmed when packet reaches IPVS hooks. 145 + */ 146 + 147 + /* RS->CLIENT */ 148 + orig = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; 149 + cp = ip_vs_conn_out_get(exp->tuple.src.l3num, orig->dst.protonum, 150 + &orig->src.u3, orig->src.u.tcp.port, 151 + &orig->dst.u3, orig->dst.u.tcp.port); 152 + if (cp) { 153 + /* Change reply CLIENT->RS to CLIENT->VS */ 154 + new_reply = ct->tuplehash[IP_CT_DIR_REPLY].tuple; 155 + IP_VS_DBG(7, "%s(): ct=%p, status=0x%lX, tuples=" FMT_TUPLE ", " 156 + FMT_TUPLE ", found inout cp=" FMT_CONN "\n", 157 + __func__, ct, ct->status, 158 + ARG_TUPLE(orig), ARG_TUPLE(&new_reply), 159 + ARG_CONN(cp)); 160 + new_reply.dst.u3 = cp->vaddr; 161 + new_reply.dst.u.tcp.port = cp->vport; 162 + IP_VS_DBG(7, "%s(): ct=%p, new tuples=" FMT_TUPLE ", " FMT_TUPLE 163 + ", inout cp=" FMT_CONN "\n", 164 + __func__, ct, 165 + ARG_TUPLE(orig), ARG_TUPLE(&new_reply), 166 + ARG_CONN(cp)); 167 + goto alter; 168 + } 169 + 170 + /* CLIENT->VS */ 171 + cp = ip_vs_conn_in_get(exp->tuple.src.l3num, orig->dst.protonum, 172 + &orig->src.u3, orig->src.u.tcp.port, 173 + &orig->dst.u3, orig->dst.u.tcp.port); 174 + if (cp) { 175 + /* Change reply VS->CLIENT to RS->CLIENT */ 176 + new_reply = ct->tuplehash[IP_CT_DIR_REPLY].tuple; 177 + IP_VS_DBG(7, "%s(): ct=%p, status=0x%lX, tuples=" FMT_TUPLE ", " 178 + FMT_TUPLE ", found outin cp=" FMT_CONN "\n", 179 + __func__, ct, ct->status, 180 + ARG_TUPLE(orig), ARG_TUPLE(&new_reply), 181 + ARG_CONN(cp)); 182 + new_reply.src.u3 = cp->daddr; 183 + new_reply.src.u.tcp.port = cp->dport; 184 + IP_VS_DBG(7, "%s(): ct=%p, new tuples=" FMT_TUPLE ", " 185 + FMT_TUPLE ", outin cp=" FMT_CONN "\n", 186 + __func__, ct, 187 + ARG_TUPLE(orig), ARG_TUPLE(&new_reply), 188 + ARG_CONN(cp)); 189 + goto alter; 190 + } 191 + 192 + IP_VS_DBG(7, "%s(): ct=%p, status=0x%lX, tuple=" FMT_TUPLE 193 + " - unknown expect\n", 194 + __func__, ct, ct->status, ARG_TUPLE(orig)); 195 + return; 196 + 197 + alter: 198 + /* Never alter conntrack for non-NAT conns */ 199 + if (IP_VS_FWD_METHOD(cp) == IP_VS_CONN_F_MASQ) 200 + nf_conntrack_alter_reply(ct, &new_reply); 201 + ip_vs_conn_put(cp); 202 + return; 203 + } 204 + 205 + /* 206 + * Create NF conntrack expectation with wildcard (optional) source port. 207 + * Then the default callback function will alter the reply and will confirm 208 + * the conntrack entry when the first packet comes. 209 + */ 210 + static void 211 + ip_vs_expect_related(struct sk_buff *skb, struct nf_conn *ct, 212 + struct ip_vs_conn *cp, u_int8_t proto, 213 + const __be16 *port, int from_rs) 214 + { 215 + struct nf_conntrack_expect *exp; 216 + 217 + BUG_ON(!ct || ct == &nf_conntrack_untracked); 218 + 219 + exp = nf_ct_expect_alloc(ct); 220 + if (!exp) 221 + return; 222 + 223 + if (from_rs) 224 + nf_ct_expect_init(exp, NF_CT_EXPECT_CLASS_DEFAULT, 225 + nf_ct_l3num(ct), &cp->daddr, &cp->caddr, 226 + proto, port, &cp->cport); 227 + else 228 + nf_ct_expect_init(exp, NF_CT_EXPECT_CLASS_DEFAULT, 229 + nf_ct_l3num(ct), &cp->caddr, &cp->vaddr, 230 + proto, port, &cp->vport); 231 + 232 + exp->expectfn = ip_vs_expect_callback; 233 + 234 + IP_VS_DBG(7, "%s(): ct=%p, expect tuple=" FMT_TUPLE "\n", 235 + __func__, ct, ARG_TUPLE(&exp->tuple)); 236 + nf_ct_expect_related(exp); 237 + nf_ct_expect_put(exp); 238 + } 150 239 151 240 /* 152 241 * Look at outgoing ftp packets to catch the response to a PASV command ··· 286 149 struct ip_vs_conn *n_cp; 287 150 char buf[24]; /* xxx.xxx.xxx.xxx,ppp,ppp\000 */ 288 151 unsigned buf_len; 289 - int ret; 152 + int ret = 0; 153 + enum ip_conntrack_info ctinfo; 154 + struct nf_conn *ct; 290 155 291 156 #ifdef CONFIG_IP_VS_IPV6 292 157 /* This application helper doesn't work with IPv6 yet, ··· 358 219 359 220 buf_len = strlen(buf); 360 221 361 - /* 362 - * Calculate required delta-offset to keep TCP happy 363 - */ 364 - *diff = buf_len - (end-start); 365 - 366 - if (*diff == 0) { 367 - /* simply replace it with new passive address */ 368 - memcpy(start, buf, buf_len); 369 - ret = 1; 370 - } else { 371 - ret = !ip_vs_skb_replace(skb, GFP_ATOMIC, start, 372 - end-start, buf, buf_len); 222 + ct = nf_ct_get(skb, &ctinfo); 223 + if (ct && !nf_ct_is_untracked(ct)) { 224 + /* If mangling fails this function will return 0 225 + * which will cause the packet to be dropped. 226 + * Mangling can only fail under memory pressure, 227 + * hopefully it will succeed on the retransmitted 228 + * packet. 229 + */ 230 + ret = nf_nat_mangle_tcp_packet(skb, ct, ctinfo, 231 + start-data, end-start, 232 + buf, buf_len); 233 + if (ret) 234 + ip_vs_expect_related(skb, ct, n_cp, 235 + IPPROTO_TCP, NULL, 0); 373 236 } 237 + 238 + /* 239 + * Not setting 'diff' is intentional, otherwise the sequence 240 + * would be adjusted twice. 241 + */ 374 242 375 243 cp->app_data = NULL; 376 244 ip_vs_tcp_conn_listen(n_cp); ··· 409 263 union nf_inet_addr to; 410 264 __be16 port; 411 265 struct ip_vs_conn *n_cp; 266 + struct nf_conn *ct; 412 267 413 268 #ifdef CONFIG_IP_VS_IPV6 414 269 /* This application helper doesn't work with IPv6 yet, ··· 495 348 /* add its controller */ 496 349 ip_vs_control_add(n_cp, cp); 497 350 } 351 + 352 + ct = (struct nf_conn *)skb->nfct; 353 + if (ct && ct != &nf_conntrack_untracked) 354 + ip_vs_expect_related(skb, ct, n_cp, 355 + IPPROTO_TCP, &n_cp->dport, 1); 498 356 499 357 /* 500 358 * Move tunnel to listen state

+1

net/netfilter/ipvs/ip_vs_proto.c

··· 98 98 99 99 return NULL; 100 100 } 101 + EXPORT_SYMBOL(ip_vs_proto_get); 101 102 102 103 103 104 /*

+3 -52

net/netfilter/ipvs/ip_vs_proto_sctp.c

··· 8 8 #include <net/sctp/checksum.h> 9 9 #include <net/ip_vs.h> 10 10 11 - 12 - static struct ip_vs_conn * 13 - sctp_conn_in_get(int af, 14 - const struct sk_buff *skb, 15 - struct ip_vs_protocol *pp, 16 - const struct ip_vs_iphdr *iph, 17 - unsigned int proto_off, 18 - int inverse) 19 - { 20 - __be16 _ports[2], *pptr; 21 - 22 - pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports); 23 - if (pptr == NULL) 24 - return NULL; 25 - 26 - if (likely(!inverse)) 27 - return ip_vs_conn_in_get(af, iph->protocol, 28 - &iph->saddr, pptr[0], 29 - &iph->daddr, pptr[1]); 30 - else 31 - return ip_vs_conn_in_get(af, iph->protocol, 32 - &iph->daddr, pptr[1], 33 - &iph->saddr, pptr[0]); 34 - } 35 - 36 - static struct ip_vs_conn * 37 - sctp_conn_out_get(int af, 38 - const struct sk_buff *skb, 39 - struct ip_vs_protocol *pp, 40 - const struct ip_vs_iphdr *iph, 41 - unsigned int proto_off, 42 - int inverse) 43 - { 44 - __be16 _ports[2], *pptr; 45 - 46 - pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports); 47 - if (pptr == NULL) 48 - return NULL; 49 - 50 - if (likely(!inverse)) 51 - return ip_vs_conn_out_get(af, iph->protocol, 52 - &iph->saddr, pptr[0], 53 - &iph->daddr, pptr[1]); 54 - else 55 - return ip_vs_conn_out_get(af, iph->protocol, 56 - &iph->daddr, pptr[1], 57 - &iph->saddr, pptr[0]); 58 - } 59 - 60 11 static int 61 12 sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, 62 13 int *verdict, struct ip_vs_conn **cpp) ··· 124 173 return 0; 125 174 126 175 /* Call application helper if needed */ 127 - if (!ip_vs_app_pkt_out(cp, skb)) 176 + if (!ip_vs_app_pkt_in(cp, skb)) 128 177 return 0; 129 178 } 130 179 ··· 1120 1169 .register_app = sctp_register_app, 1121 1170 .unregister_app = sctp_unregister_app, 1122 1171 .conn_schedule = sctp_conn_schedule, 1123 - .conn_in_get = sctp_conn_in_get, 1124 - .conn_out_get = sctp_conn_out_get, 1172 + .conn_in_get = ip_vs_conn_in_get_proto, 1173 + .conn_out_get = ip_vs_conn_out_get_proto, 1125 1174 .snat_handler = sctp_snat_handler, 1126 1175 .dnat_handler = sctp_dnat_handler, 1127 1176 .csum_check = sctp_csum_check,

+2 -48

net/netfilter/ipvs/ip_vs_proto_tcp.c

··· 27 27 28 28 #include <net/ip_vs.h> 29 29 30 - 31 - static struct ip_vs_conn * 32 - tcp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp, 33 - const struct ip_vs_iphdr *iph, unsigned int proto_off, 34 - int inverse) 35 - { 36 - __be16 _ports[2], *pptr; 37 - 38 - pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports); 39 - if (pptr == NULL) 40 - return NULL; 41 - 42 - if (likely(!inverse)) { 43 - return ip_vs_conn_in_get(af, iph->protocol, 44 - &iph->saddr, pptr[0], 45 - &iph->daddr, pptr[1]); 46 - } else { 47 - return ip_vs_conn_in_get(af, iph->protocol, 48 - &iph->daddr, pptr[1], 49 - &iph->saddr, pptr[0]); 50 - } 51 - } 52 - 53 - static struct ip_vs_conn * 54 - tcp_conn_out_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp, 55 - const struct ip_vs_iphdr *iph, unsigned int proto_off, 56 - int inverse) 57 - { 58 - __be16 _ports[2], *pptr; 59 - 60 - pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports); 61 - if (pptr == NULL) 62 - return NULL; 63 - 64 - if (likely(!inverse)) { 65 - return ip_vs_conn_out_get(af, iph->protocol, 66 - &iph->saddr, pptr[0], 67 - &iph->daddr, pptr[1]); 68 - } else { 69 - return ip_vs_conn_out_get(af, iph->protocol, 70 - &iph->daddr, pptr[1], 71 - &iph->saddr, pptr[0]); 72 - } 73 - } 74 - 75 - 76 30 static int 77 31 tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, 78 32 int *verdict, struct ip_vs_conn **cpp) ··· 675 721 .register_app = tcp_register_app, 676 722 .unregister_app = tcp_unregister_app, 677 723 .conn_schedule = tcp_conn_schedule, 678 - .conn_in_get = tcp_conn_in_get, 679 - .conn_out_get = tcp_conn_out_get, 724 + .conn_in_get = ip_vs_conn_in_get_proto, 725 + .conn_out_get = ip_vs_conn_out_get_proto, 680 726 .snat_handler = tcp_snat_handler, 681 727 .dnat_handler = tcp_dnat_handler, 682 728 .csum_check = tcp_csum_check,

+2 -54

net/netfilter/ipvs/ip_vs_proto_udp.c

··· 27 27 #include <net/ip.h> 28 28 #include <net/ip6_checksum.h> 29 29 30 - static struct ip_vs_conn * 31 - udp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp, 32 - const struct ip_vs_iphdr *iph, unsigned int proto_off, 33 - int inverse) 34 - { 35 - struct ip_vs_conn *cp; 36 - __be16 _ports[2], *pptr; 37 - 38 - pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports); 39 - if (pptr == NULL) 40 - return NULL; 41 - 42 - if (likely(!inverse)) { 43 - cp = ip_vs_conn_in_get(af, iph->protocol, 44 - &iph->saddr, pptr[0], 45 - &iph->daddr, pptr[1]); 46 - } else { 47 - cp = ip_vs_conn_in_get(af, iph->protocol, 48 - &iph->daddr, pptr[1], 49 - &iph->saddr, pptr[0]); 50 - } 51 - 52 - return cp; 53 - } 54 - 55 - 56 - static struct ip_vs_conn * 57 - udp_conn_out_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp, 58 - const struct ip_vs_iphdr *iph, unsigned int proto_off, 59 - int inverse) 60 - { 61 - struct ip_vs_conn *cp; 62 - __be16 _ports[2], *pptr; 63 - 64 - pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports); 65 - if (pptr == NULL) 66 - return NULL; 67 - 68 - if (likely(!inverse)) { 69 - cp = ip_vs_conn_out_get(af, iph->protocol, 70 - &iph->saddr, pptr[0], 71 - &iph->daddr, pptr[1]); 72 - } else { 73 - cp = ip_vs_conn_out_get(af, iph->protocol, 74 - &iph->daddr, pptr[1], 75 - &iph->saddr, pptr[0]); 76 - } 77 - 78 - return cp; 79 - } 80 - 81 - 82 30 static int 83 31 udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, 84 32 int *verdict, struct ip_vs_conn **cpp) ··· 468 520 .init = udp_init, 469 521 .exit = udp_exit, 470 522 .conn_schedule = udp_conn_schedule, 471 - .conn_in_get = udp_conn_in_get, 472 - .conn_out_get = udp_conn_out_get, 523 + .conn_in_get = ip_vs_conn_in_get_proto, 524 + .conn_out_get = ip_vs_conn_out_get_proto, 473 525 .snat_handler = udp_snat_handler, 474 526 .dnat_handler = udp_dnat_handler, 475 527 .csum_check = udp_csum_check,

+29

net/netfilter/ipvs/ip_vs_xmit.c

··· 28 28 #include <net/ip6_route.h> 29 29 #include <linux/icmpv6.h> 30 30 #include <linux/netfilter.h> 31 + #include <net/netfilter/nf_conntrack.h> 31 32 #include <linux/netfilter_ipv4.h> 32 33 33 34 #include <net/ip_vs.h> ··· 349 348 } 350 349 #endif 351 350 351 + static void 352 + ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp) 353 + { 354 + struct nf_conn *ct = (struct nf_conn *)skb->nfct; 355 + struct nf_conntrack_tuple new_tuple; 356 + 357 + if (ct == NULL || nf_ct_is_untracked(ct) || nf_ct_is_confirmed(ct)) 358 + return; 359 + 360 + /* 361 + * The connection is not yet in the hashtable, so we update it. 362 + * CIP->VIP will remain the same, so leave the tuple in 363 + * IP_CT_DIR_ORIGINAL untouched. When the reply comes back from the 364 + * real-server we will see RIP->DIP. 365 + */ 366 + new_tuple = ct->tuplehash[IP_CT_DIR_REPLY].tuple; 367 + new_tuple.src.u3 = cp->daddr; 368 + /* 369 + * This will also take care of UDP and other protocols. 370 + */ 371 + new_tuple.src.u.tcp.port = cp->dport; 372 + nf_conntrack_alter_reply(ct, &new_tuple); 373 + } 374 + 352 375 /* 353 376 * NAT transmitter (only for outside-to-inside nat forwarding) 354 377 * Not used for related ICMP ··· 427 402 ip_send_check(ip_hdr(skb)); 428 403 429 404 IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT"); 405 + 406 + ip_vs_update_conntrack(skb, cp); 430 407 431 408 /* FIXME: when application helper enlarges the packet and the length 432 409 is larger than the MTU of outgoing device, there will be still ··· 505 478 ipv6_hdr(skb)->daddr = cp->daddr.in6; 506 479 507 480 IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT"); 481 + 482 + ip_vs_update_conntrack(skb, cp); 508 483 509 484 /* FIXME: when application helper enlarges the packet and the length 510 485 is larger than the MTU of outgoing device, there will be still

+1 -2

net/netfilter/nf_conntrack_core.c

··· 966 966 if (acct) { 967 967 spin_lock_bh(&ct->lock); 968 968 acct[CTINFO2DIR(ctinfo)].packets++; 969 - acct[CTINFO2DIR(ctinfo)].bytes += 970 - skb->len - skb_network_offset(skb); 969 + acct[CTINFO2DIR(ctinfo)].bytes += skb->len; 971 970 spin_unlock_bh(&ct->lock); 972 971 } 973 972 }

+12 -10

net/netfilter/nf_conntrack_extend.c

··· 23 23 { 24 24 unsigned int i; 25 25 struct nf_ct_ext_type *t; 26 + struct nf_ct_ext *ext = ct->ext; 26 27 27 28 for (i = 0; i < NF_CT_EXT_NUM; i++) { 28 - if (!nf_ct_ext_exist(ct, i)) 29 + if (!__nf_ct_ext_exist(ext, i)) 29 30 continue; 30 31 31 32 rcu_read_lock(); ··· 74 73 75 74 void *__nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp) 76 75 { 77 - struct nf_ct_ext *new; 76 + struct nf_ct_ext *old, *new; 78 77 int i, newlen, newoff; 79 78 struct nf_ct_ext_type *t; 80 79 81 80 /* Conntrack must not be confirmed to avoid races on reallocation. */ 82 81 NF_CT_ASSERT(!nf_ct_is_confirmed(ct)); 83 82 84 - if (!ct->ext) 83 + old = ct->ext; 84 + if (!old) 85 85 return nf_ct_ext_create(&ct->ext, id, gfp); 86 86 87 - if (nf_ct_ext_exist(ct, id)) 87 + if (__nf_ct_ext_exist(old, id)) 88 88 return NULL; 89 89 90 90 rcu_read_lock(); 91 91 t = rcu_dereference(nf_ct_ext_types[id]); 92 92 BUG_ON(t == NULL); 93 93 94 - newoff = ALIGN(ct->ext->len, t->align); 94 + newoff = ALIGN(old->len, t->align); 95 95 newlen = newoff + t->len; 96 96 rcu_read_unlock(); 97 97 98 - new = __krealloc(ct->ext, newlen, gfp); 98 + new = __krealloc(old, newlen, gfp); 99 99 if (!new) 100 100 return NULL; 101 101 102 - if (new != ct->ext) { 102 + if (new != old) { 103 103 for (i = 0; i < NF_CT_EXT_NUM; i++) { 104 - if (!nf_ct_ext_exist(ct, i)) 104 + if (!__nf_ct_ext_exist(old, i)) 105 105 continue; 106 106 107 107 rcu_read_lock(); 108 108 t = rcu_dereference(nf_ct_ext_types[i]); 109 109 if (t && t->move) 110 110 t->move((void *)new + new->offset[i], 111 - (void *)ct->ext + ct->ext->offset[i]); 111 + (void *)old + old->offset[i]); 112 112 rcu_read_unlock(); 113 113 } 114 - call_rcu(&ct->ext->rcu, __nf_ct_ext_free_rcu); 114 + call_rcu(&old->rcu, __nf_ct_ext_free_rcu); 115 115 ct->ext = new; 116 116 } 117 117

+9 -1

net/netfilter/nf_conntrack_proto_tcp.c

··· 585 585 * Let's try to use the data from the packet. 586 586 */ 587 587 sender->td_end = end; 588 + win <<= sender->td_scale; 588 589 sender->td_maxwin = (win == 0 ? 1 : win); 589 590 sender->td_maxend = end + sender->td_maxwin; 591 + /* 592 + * We haven't seen traffic in the other direction yet 593 + * but we have to tweak window tracking to pass III 594 + * and IV until that happens. 595 + */ 596 + if (receiver->td_maxwin == 0) 597 + receiver->td_end = receiver->td_maxend = sack; 590 598 } 591 599 } else if (((state->state == TCP_CONNTRACK_SYN_SENT 592 600 && dir == IP_CT_DIR_ORIGINAL) ··· 688 680 /* 689 681 * Update receiver data. 690 682 */ 691 - if (after(end, sender->td_maxend)) 683 + if (receiver->td_maxwin != 0 && after(end, sender->td_maxend)) 692 684 receiver->td_maxwin += end - sender->td_maxend; 693 685 if (after(sack + win, receiver->td_maxend - 1)) { 694 686 receiver->td_maxend = sack + win;

+70

net/netfilter/xt_CHECKSUM.c

··· 1 + /* iptables module for the packet checksum mangling 2 + * 3 + * (C) 2002 by Harald Welte <laforge@netfilter.org> 4 + * (C) 2010 Red Hat, Inc. 5 + * 6 + * Author: Michael S. Tsirkin <mst@redhat.com> 7 + * 8 + * This program is free software; you can redistribute it and/or modify 9 + * it under the terms of the GNU General Public License version 2 as 10 + * published by the Free Software Foundation. 11 + */ 12 + #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 13 + #include <linux/module.h> 14 + #include <linux/skbuff.h> 15 + 16 + #include <linux/netfilter/x_tables.h> 17 + #include <linux/netfilter/xt_CHECKSUM.h> 18 + 19 + MODULE_LICENSE("GPL"); 20 + MODULE_AUTHOR("Michael S. Tsirkin <mst@redhat.com>"); 21 + MODULE_DESCRIPTION("Xtables: checksum modification"); 22 + MODULE_ALIAS("ipt_CHECKSUM"); 23 + MODULE_ALIAS("ip6t_CHECKSUM"); 24 + 25 + static unsigned int 26 + checksum_tg(struct sk_buff *skb, const struct xt_action_param *par) 27 + { 28 + if (skb->ip_summed == CHECKSUM_PARTIAL) 29 + skb_checksum_help(skb); 30 + 31 + return XT_CONTINUE; 32 + } 33 + 34 + static int checksum_tg_check(const struct xt_tgchk_param *par) 35 + { 36 + const struct xt_CHECKSUM_info *einfo = par->targinfo; 37 + 38 + if (einfo->operation & ~XT_CHECKSUM_OP_FILL) { 39 + pr_info("unsupported CHECKSUM operation %x\n", einfo->operation); 40 + return -EINVAL; 41 + } 42 + if (!einfo->operation) { 43 + pr_info("no CHECKSUM operation enabled\n"); 44 + return -EINVAL; 45 + } 46 + return 0; 47 + } 48 + 49 + static struct xt_target checksum_tg_reg __read_mostly = { 50 + .name = "CHECKSUM", 51 + .family = NFPROTO_UNSPEC, 52 + .target = checksum_tg, 53 + .targetsize = sizeof(struct xt_CHECKSUM_info), 54 + .table = "mangle", 55 + .checkentry = checksum_tg_check, 56 + .me = THIS_MODULE, 57 + }; 58 + 59 + static int __init checksum_tg_init(void) 60 + { 61 + return xt_register_target(&checksum_tg_reg); 62 + } 63 + 64 + static void __exit checksum_tg_exit(void) 65 + { 66 + xt_unregister_target(&checksum_tg_reg); 67 + } 68 + 69 + module_init(checksum_tg_init); 70 + module_exit(checksum_tg_exit);

+4 -2

net/netfilter/xt_TPROXY.c

··· 37 37 return NF_DROP; 38 38 39 39 sk = nf_tproxy_get_sock_v4(dev_net(skb->dev), iph->protocol, 40 - iph->saddr, tgi->laddr ? tgi->laddr : iph->daddr, 41 - hp->source, tgi->lport ? tgi->lport : hp->dest, 40 + iph->saddr, 41 + tgi->laddr ? tgi->laddr : iph->daddr, 42 + hp->source, 43 + tgi->lport ? tgi->lport : hp->dest, 42 44 par->in, true); 43 45 44 46 /* NOTE: assign_sock consumes our sk reference */

+63

net/netfilter/xt_cpu.c

··· 1 + /* Kernel module to match running CPU */ 2 + 3 + /* 4 + * Might be used to distribute connections on several daemons, if 5 + * RPS (Remote Packet Steering) is enabled or NIC is multiqueue capable, 6 + * each RX queue IRQ affined to one CPU (1:1 mapping) 7 + * 8 + */ 9 + 10 + /* (C) 2010 Eric Dumazet 11 + * 12 + * This program is free software; you can redistribute it and/or modify 13 + * it under the terms of the GNU General Public License version 2 as 14 + * published by the Free Software Foundation. 15 + */ 16 + 17 + #include <linux/module.h> 18 + #include <linux/skbuff.h> 19 + #include <linux/netfilter/xt_cpu.h> 20 + #include <linux/netfilter/x_tables.h> 21 + 22 + MODULE_LICENSE("GPL"); 23 + MODULE_AUTHOR("Eric Dumazet <eric.dumazet@gmail.com>"); 24 + MODULE_DESCRIPTION("Xtables: CPU match"); 25 + 26 + static int cpu_mt_check(const struct xt_mtchk_param *par) 27 + { 28 + const struct xt_cpu_info *info = par->matchinfo; 29 + 30 + if (info->invert & ~1) 31 + return -EINVAL; 32 + return 0; 33 + } 34 + 35 + static bool cpu_mt(const struct sk_buff *skb, struct xt_action_param *par) 36 + { 37 + const struct xt_cpu_info *info = par->matchinfo; 38 + 39 + return (info->cpu == smp_processor_id()) ^ info->invert; 40 + } 41 + 42 + static struct xt_match cpu_mt_reg __read_mostly = { 43 + .name = "cpu", 44 + .revision = 0, 45 + .family = NFPROTO_UNSPEC, 46 + .checkentry = cpu_mt_check, 47 + .match = cpu_mt, 48 + .matchsize = sizeof(struct xt_cpu_info), 49 + .me = THIS_MODULE, 50 + }; 51 + 52 + static int __init cpu_mt_init(void) 53 + { 54 + return xt_register_match(&cpu_mt_reg); 55 + } 56 + 57 + static void __exit cpu_mt_exit(void) 58 + { 59 + xt_unregister_match(&cpu_mt_reg); 60 + } 61 + 62 + module_init(cpu_mt_init); 63 + module_exit(cpu_mt_exit);

+189

net/netfilter/xt_ipvs.c

··· 1 + /* 2 + * xt_ipvs - kernel module to match IPVS connection properties 3 + * 4 + * Author: Hannes Eder <heder@google.com> 5 + */ 6 + 7 + #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 8 + 9 + #include <linux/module.h> 10 + #include <linux/moduleparam.h> 11 + #include <linux/spinlock.h> 12 + #include <linux/skbuff.h> 13 + #ifdef CONFIG_IP_VS_IPV6 14 + #include <net/ipv6.h> 15 + #endif 16 + #include <linux/ip_vs.h> 17 + #include <linux/types.h> 18 + #include <linux/netfilter/x_tables.h> 19 + #include <linux/netfilter/x_tables.h> 20 + #include <linux/netfilter/xt_ipvs.h> 21 + #include <net/netfilter/nf_conntrack.h> 22 + 23 + #include <net/ip_vs.h> 24 + 25 + MODULE_AUTHOR("Hannes Eder <heder@google.com>"); 26 + MODULE_DESCRIPTION("Xtables: match IPVS connection properties"); 27 + MODULE_LICENSE("GPL"); 28 + MODULE_ALIAS("ipt_ipvs"); 29 + MODULE_ALIAS("ip6t_ipvs"); 30 + 31 + /* borrowed from xt_conntrack */ 32 + static bool ipvs_mt_addrcmp(const union nf_inet_addr *kaddr, 33 + const union nf_inet_addr *uaddr, 34 + const union nf_inet_addr *umask, 35 + unsigned int l3proto) 36 + { 37 + if (l3proto == NFPROTO_IPV4) 38 + return ((kaddr->ip ^ uaddr->ip) & umask->ip) == 0; 39 + #ifdef CONFIG_IP_VS_IPV6 40 + else if (l3proto == NFPROTO_IPV6) 41 + return ipv6_masked_addr_cmp(&kaddr->in6, &umask->in6, 42 + &uaddr->in6) == 0; 43 + #endif 44 + else 45 + return false; 46 + } 47 + 48 + static bool 49 + ipvs_mt(const struct sk_buff *skb, struct xt_action_param *par) 50 + { 51 + const struct xt_ipvs_mtinfo *data = par->matchinfo; 52 + /* ipvs_mt_check ensures that family is only NFPROTO_IPV[46]. */ 53 + const u_int8_t family = par->family; 54 + struct ip_vs_iphdr iph; 55 + struct ip_vs_protocol *pp; 56 + struct ip_vs_conn *cp; 57 + bool match = true; 58 + 59 + if (data->bitmask == XT_IPVS_IPVS_PROPERTY) { 60 + match = skb->ipvs_property ^ 61 + !!(data->invert & XT_IPVS_IPVS_PROPERTY); 62 + goto out; 63 + } 64 + 65 + /* other flags than XT_IPVS_IPVS_PROPERTY are set */ 66 + if (!skb->ipvs_property) { 67 + match = false; 68 + goto out; 69 + } 70 + 71 + ip_vs_fill_iphdr(family, skb_network_header(skb), &iph); 72 + 73 + if (data->bitmask & XT_IPVS_PROTO) 74 + if ((iph.protocol == data->l4proto) ^ 75 + !(data->invert & XT_IPVS_PROTO)) { 76 + match = false; 77 + goto out; 78 + } 79 + 80 + pp = ip_vs_proto_get(iph.protocol); 81 + if (unlikely(!pp)) { 82 + match = false; 83 + goto out; 84 + } 85 + 86 + /* 87 + * Check if the packet belongs to an existing entry 88 + */ 89 + cp = pp->conn_out_get(family, skb, pp, &iph, iph.len, 1 /* inverse */); 90 + if (unlikely(cp == NULL)) { 91 + match = false; 92 + goto out; 93 + } 94 + 95 + /* 96 + * We found a connection, i.e. ct != 0, make sure to call 97 + * __ip_vs_conn_put before returning. In our case jump to out_put_con. 98 + */ 99 + 100 + if (data->bitmask & XT_IPVS_VPORT) 101 + if ((cp->vport == data->vport) ^ 102 + !(data->invert & XT_IPVS_VPORT)) { 103 + match = false; 104 + goto out_put_cp; 105 + } 106 + 107 + if (data->bitmask & XT_IPVS_VPORTCTL) 108 + if ((cp->control != NULL && 109 + cp->control->vport == data->vportctl) ^ 110 + !(data->invert & XT_IPVS_VPORTCTL)) { 111 + match = false; 112 + goto out_put_cp; 113 + } 114 + 115 + if (data->bitmask & XT_IPVS_DIR) { 116 + enum ip_conntrack_info ctinfo; 117 + struct nf_conn *ct = nf_ct_get(skb, &ctinfo); 118 + 119 + if (ct == NULL || nf_ct_is_untracked(ct)) { 120 + match = false; 121 + goto out_put_cp; 122 + } 123 + 124 + if ((ctinfo >= IP_CT_IS_REPLY) ^ 125 + !!(data->invert & XT_IPVS_DIR)) { 126 + match = false; 127 + goto out_put_cp; 128 + } 129 + } 130 + 131 + if (data->bitmask & XT_IPVS_METHOD) 132 + if (((cp->flags & IP_VS_CONN_F_FWD_MASK) == data->fwd_method) ^ 133 + !(data->invert & XT_IPVS_METHOD)) { 134 + match = false; 135 + goto out_put_cp; 136 + } 137 + 138 + if (data->bitmask & XT_IPVS_VADDR) { 139 + if (ipvs_mt_addrcmp(&cp->vaddr, &data->vaddr, 140 + &data->vmask, family) ^ 141 + !(data->invert & XT_IPVS_VADDR)) { 142 + match = false; 143 + goto out_put_cp; 144 + } 145 + } 146 + 147 + out_put_cp: 148 + __ip_vs_conn_put(cp); 149 + out: 150 + pr_debug("match=%d\n", match); 151 + return match; 152 + } 153 + 154 + static int ipvs_mt_check(const struct xt_mtchk_param *par) 155 + { 156 + if (par->family != NFPROTO_IPV4 157 + #ifdef CONFIG_IP_VS_IPV6 158 + && par->family != NFPROTO_IPV6 159 + #endif 160 + ) { 161 + pr_info("protocol family %u not supported\n", par->family); 162 + return -EINVAL; 163 + } 164 + 165 + return 0; 166 + } 167 + 168 + static struct xt_match xt_ipvs_mt_reg __read_mostly = { 169 + .name = "ipvs", 170 + .revision = 0, 171 + .family = NFPROTO_UNSPEC, 172 + .match = ipvs_mt, 173 + .checkentry = ipvs_mt_check, 174 + .matchsize = XT_ALIGN(sizeof(struct xt_ipvs_mtinfo)), 175 + .me = THIS_MODULE, 176 + }; 177 + 178 + static int __init ipvs_mt_init(void) 179 + { 180 + return xt_register_match(&xt_ipvs_mt_reg); 181 + } 182 + 183 + static void __exit ipvs_mt_exit(void) 184 + { 185 + xt_unregister_match(&xt_ipvs_mt_reg); 186 + } 187 + 188 + module_init(ipvs_mt_init); 189 + module_exit(ipvs_mt_exit);

+5 -7

net/netfilter/xt_quota.c

··· 11 11 #include <linux/netfilter/xt_quota.h> 12 12 13 13 struct xt_quota_priv { 14 - uint64_t quota; 14 + spinlock_t lock; 15 + uint64_t quota; 15 16 }; 16 17 17 18 MODULE_LICENSE("GPL"); ··· 21 20 MODULE_ALIAS("ipt_quota"); 22 21 MODULE_ALIAS("ip6t_quota"); 23 22 24 - static DEFINE_SPINLOCK(quota_lock); 25 - 26 23 static bool 27 24 quota_mt(const struct sk_buff *skb, struct xt_action_param *par) 28 25 { ··· 28 29 struct xt_quota_priv *priv = q->master; 29 30 bool ret = q->flags & XT_QUOTA_INVERT; 30 31 31 - spin_lock_bh(&quota_lock); 32 + spin_lock_bh(&priv->lock); 32 33 if (priv->quota >= skb->len) { 33 34 priv->quota -= skb->len; 34 35 ret = !ret; ··· 36 37 /* we do not allow even small packets from now on */ 37 38 priv->quota = 0; 38 39 } 39 - /* Copy quota back to matchinfo so that iptables can display it */ 40 - q->quota = priv->quota; 41 - spin_unlock_bh(&quota_lock); 40 + spin_unlock_bh(&priv->lock); 42 41 43 42 return ret; 44 43 } ··· 52 55 if (q->master == NULL) 53 56 return -ENOMEM; 54 57 58 + spin_lock_init(&q->master->lock); 55 59 q->master->quota = q->quota; 56 60 return 0; 57 61 }