Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/kaber/nf-next-2.6

+2

include/linux/audit.h

··· 103 103 #define AUDIT_BPRM_FCAPS 1321 /* Information about fcaps increasing perms */ 104 104 #define AUDIT_CAPSET 1322 /* Record showing argument to sys_capset */ 105 105 #define AUDIT_MMAP 1323 /* Record showing descriptor and flags in mmap */ 106 + #define AUDIT_NETFILTER_PKT 1324 /* Packets traversing netfilter chains */ 107 + #define AUDIT_NETFILTER_CFG 1325 /* Netfilter chain modifications */ 106 108 107 109 #define AUDIT_AVC 1400 /* SE Linux avc denial or grant */ 108 110 #define AUDIT_SELINUX_ERR 1401 /* Internal SE Linux Errors */

+8

include/linux/ip_vs.h

··· 89 89 #define IP_VS_CONN_F_TEMPLATE 0x1000 /* template, not connection */ 90 90 #define IP_VS_CONN_F_ONE_PACKET 0x2000 /* forward only one packet */ 91 91 92 + #define IP_VS_CONN_F_BACKUP_MASK (IP_VS_CONN_F_FWD_MASK | \ 93 + IP_VS_CONN_F_NOOUTPUT | \ 94 + IP_VS_CONN_F_INACTIVE | \ 95 + IP_VS_CONN_F_SEQ_MASK | \ 96 + IP_VS_CONN_F_NO_CPORT | \ 97 + IP_VS_CONN_F_TEMPLATE \ 98 + ) 99 + 92 100 /* Flags that are not sent to backup server start from bit 16 */ 93 101 #define IP_VS_CONN_F_NFCT (1 << 16) /* use netfilter conntrack */ 94 102

+19 -8

include/linux/netfilter.h

··· 24 24 #define NF_MAX_VERDICT NF_STOP 25 25 26 26 /* we overload the higher bits for encoding auxiliary data such as the queue 27 - * number. Not nice, but better than additional function arguments. */ 28 - #define NF_VERDICT_MASK 0x0000ffff 29 - #define NF_VERDICT_BITS 16 27 + * number or errno values. Not nice, but better than additional function 28 + * arguments. */ 29 + #define NF_VERDICT_MASK 0x000000ff 30 30 31 + /* extra verdict flags have mask 0x0000ff00 */ 32 + #define NF_VERDICT_FLAG_QUEUE_BYPASS 0x00008000 33 + 34 + /* queue number (NF_QUEUE) or errno (NF_DROP) */ 31 35 #define NF_VERDICT_QMASK 0xffff0000 32 36 #define NF_VERDICT_QBITS 16 33 37 34 - #define NF_QUEUE_NR(x) ((((x) << NF_VERDICT_BITS) & NF_VERDICT_QMASK) | NF_QUEUE) 38 + #define NF_QUEUE_NR(x) ((((x) << 16) & NF_VERDICT_QMASK) | NF_QUEUE) 35 39 36 - #define NF_DROP_ERR(x) (((-x) << NF_VERDICT_BITS) | NF_DROP) 40 + #define NF_DROP_ERR(x) (((-x) << 16) | NF_DROP) 37 41 38 42 /* only for userspace compatibility */ 39 43 #ifndef __KERNEL__ ··· 45 41 <= 0x2000 is used for protocol-flags. */ 46 42 #define NFC_UNKNOWN 0x4000 47 43 #define NFC_ALTERED 0x8000 44 + 45 + /* NF_VERDICT_BITS should be 8 now, but userspace might break if this changes */ 46 + #define NF_VERDICT_BITS 16 48 47 #endif 49 48 50 49 enum nf_inet_hooks { ··· 79 72 80 73 #ifdef __KERNEL__ 81 74 #ifdef CONFIG_NETFILTER 75 + static inline int NF_DROP_GETERR(int verdict) 76 + { 77 + return -(verdict >> NF_VERDICT_QBITS); 78 + } 82 79 83 80 static inline int nf_inet_addr_cmp(const union nf_inet_addr *a1, 84 81 const union nf_inet_addr *a2) ··· 278 267 int route_key_size; 279 268 }; 280 269 281 - extern const struct nf_afinfo *nf_afinfo[NFPROTO_NUMPROTO]; 270 + extern const struct nf_afinfo __rcu *nf_afinfo[NFPROTO_NUMPROTO]; 282 271 static inline const struct nf_afinfo *nf_get_afinfo(unsigned short family) 283 272 { 284 273 return rcu_dereference(nf_afinfo[family]); ··· 368 357 #endif /*CONFIG_NETFILTER*/ 369 358 370 359 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) 371 - extern void (*ip_ct_attach)(struct sk_buff *, struct sk_buff *); 360 + extern void (*ip_ct_attach)(struct sk_buff *, struct sk_buff *) __rcu; 372 361 extern void nf_ct_attach(struct sk_buff *, struct sk_buff *); 373 - extern void (*nf_ct_destroy)(struct nf_conntrack *); 362 + extern void (*nf_ct_destroy)(struct nf_conntrack *) __rcu; 374 363 #else 375 364 static inline void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb) {} 376 365 #endif

+2

include/linux/netfilter/Kbuild

··· 9 9 header-y += nfnetlink_log.h 10 10 header-y += nfnetlink_queue.h 11 11 header-y += x_tables.h 12 + header-y += xt_AUDIT.h 12 13 header-y += xt_CHECKSUM.h 13 14 header-y += xt_CLASSIFY.h 14 15 header-y += xt_CONNMARK.h ··· 56 55 header-y += xt_realm.h 57 56 header-y += xt_recent.h 58 57 header-y += xt_sctp.h 58 + header-y += xt_socket.h 59 59 header-y += xt_state.h 60 60 header-y += xt_statistic.h 61 61 header-y += xt_string.h

+9

include/linux/netfilter/nf_conntrack_snmp.h

··· 1 + #ifndef _NF_CONNTRACK_SNMP_H 2 + #define _NF_CONNTRACK_SNMP_H 3 + 4 + extern int (*nf_nat_snmp_hook)(struct sk_buff *skb, 5 + unsigned int protoff, 6 + struct nf_conn *ct, 7 + enum ip_conntrack_info ctinfo); 8 + 9 + #endif /* _NF_CONNTRACK_SNMP_H */

+9

include/linux/netfilter/nfnetlink_conntrack.h

··· 42 42 CTA_SECMARK, /* obsolete */ 43 43 CTA_ZONE, 44 44 CTA_SECCTX, 45 + CTA_TIMESTAMP, 45 46 __CTA_MAX 46 47 }; 47 48 #define CTA_MAX (__CTA_MAX - 1) ··· 127 126 __CTA_COUNTERS_MAX 128 127 }; 129 128 #define CTA_COUNTERS_MAX (__CTA_COUNTERS_MAX - 1) 129 + 130 + enum ctattr_tstamp { 131 + CTA_TIMESTAMP_UNSPEC, 132 + CTA_TIMESTAMP_START, 133 + CTA_TIMESTAMP_STOP, 134 + __CTA_TIMESTAMP_MAX 135 + }; 136 + #define CTA_TIMESTAMP_MAX (__CTA_TIMESTAMP_MAX - 1) 130 137 131 138 enum ctattr_nat { 132 139 CTA_NAT_UNSPEC,

+2 -1

include/linux/netfilter/x_tables.h

··· 611 611 extern void xt_compat_lock(u_int8_t af); 612 612 extern void xt_compat_unlock(u_int8_t af); 613 613 614 - extern int xt_compat_add_offset(u_int8_t af, unsigned int offset, short delta); 614 + extern int xt_compat_add_offset(u_int8_t af, unsigned int offset, int delta); 615 615 extern void xt_compat_flush_offsets(u_int8_t af); 616 + extern void xt_compat_init_offsets(u_int8_t af, unsigned int number); 616 617 extern int xt_compat_calc_jump(u_int8_t af, unsigned int offset); 617 618 618 619 extern int xt_compat_match_offset(const struct xt_match *match);

+30

include/linux/netfilter/xt_AUDIT.h

··· 1 + /* 2 + * Header file for iptables xt_AUDIT target 3 + * 4 + * (C) 2010-2011 Thomas Graf <tgraf@redhat.com> 5 + * (C) 2010-2011 Red Hat, Inc. 6 + * 7 + * This program is free software; you can redistribute it and/or modify 8 + * it under the terms of the GNU General Public License version 2 as 9 + * published by the Free Software Foundation. 10 + */ 11 + 12 + #ifndef _XT_AUDIT_TARGET_H 13 + #define _XT_AUDIT_TARGET_H 14 + 15 + #include <linux/types.h> 16 + 17 + enum { 18 + XT_AUDIT_TYPE_ACCEPT = 0, 19 + XT_AUDIT_TYPE_DROP, 20 + XT_AUDIT_TYPE_REJECT, 21 + __XT_AUDIT_TYPE_MAX, 22 + }; 23 + 24 + #define XT_AUDIT_TYPE_MAX (__XT_AUDIT_TYPE_MAX - 1) 25 + 26 + struct xt_audit_info { 27 + __u8 type; /* XT_AUDIT_TYPE_* */ 28 + }; 29 + 30 + #endif /* _XT_AUDIT_TARGET_H */

+5 -5

include/linux/netfilter/xt_CT.h

··· 4 4 #define XT_CT_NOTRACK 0x1 5 5 6 6 struct xt_ct_target_info { 7 - u_int16_t flags; 8 - u_int16_t zone; 9 - u_int32_t ct_events; 10 - u_int32_t exp_events; 11 - char helper[16]; 7 + __u16 flags; 8 + __u16 zone; 9 + __u32 ct_events; 10 + __u32 exp_events; 11 + char helper[16]; 12 12 13 13 /* Used internally by the kernel */ 14 14 struct nf_conn *ct __attribute__((aligned(8)));

+6

include/linux/netfilter/xt_NFQUEUE.h

··· 20 20 __u16 queues_total; 21 21 }; 22 22 23 + struct xt_NFQ_info_v2 { 24 + __u16 queuenum; 25 + __u16 queues_total; 26 + __u16 bypass; 27 + }; 28 + 23 29 #endif /* _XT_NFQ_TARGET_H */

+1 -1

include/linux/netfilter/xt_TCPOPTSTRIP.h

··· 7 7 (((1U << (idx & 31)) & bmap[(idx) >> 5]) != 0) 8 8 9 9 struct xt_tcpoptstrip_target_info { 10 - u_int32_t strip_bmap[8]; 10 + __u32 strip_bmap[8]; 11 11 }; 12 12 13 13 #endif /* _XT_TCPOPTSTRIP_H */

+4 -4

include/linux/netfilter/xt_TPROXY.h

··· 5 5 * redirection. We can get rid of that whenever we get support for 6 6 * mutliple targets in the same rule. */ 7 7 struct xt_tproxy_target_info { 8 - u_int32_t mark_mask; 9 - u_int32_t mark_value; 8 + __u32 mark_mask; 9 + __u32 mark_value; 10 10 __be32 laddr; 11 11 __be16 lport; 12 12 }; 13 13 14 14 struct xt_tproxy_target_info_v1 { 15 - u_int32_t mark_mask; 16 - u_int32_t mark_value; 15 + __u32 mark_mask; 16 + __u32 mark_value; 17 17 union nf_inet_addr laddr; 18 18 __be16 lport; 19 19 };

+4 -4

include/linux/netfilter/xt_cluster.h

··· 6 6 }; 7 7 8 8 struct xt_cluster_match_info { 9 - u_int32_t total_nodes; 10 - u_int32_t node_mask; 11 - u_int32_t hash_seed; 12 - u_int32_t flags; 9 + __u32 total_nodes; 10 + __u32 node_mask; 11 + __u32 hash_seed; 12 + __u32 flags; 13 13 }; 14 14 15 15 #define XT_CLUSTER_NODES_MAX 32

+1 -1

include/linux/netfilter/xt_comment.h

··· 4 4 #define XT_MAX_COMMENT_LEN 256 5 5 6 6 struct xt_comment_info { 7 - unsigned char comment[XT_MAX_COMMENT_LEN]; 7 + char comment[XT_MAX_COMMENT_LEN]; 8 8 }; 9 9 10 10 #endif /* XT_COMMENT_H */

+15

include/linux/netfilter/xt_conntrack.h

··· 58 58 __u16 state_mask, status_mask; 59 59 }; 60 60 61 + struct xt_conntrack_mtinfo3 { 62 + union nf_inet_addr origsrc_addr, origsrc_mask; 63 + union nf_inet_addr origdst_addr, origdst_mask; 64 + union nf_inet_addr replsrc_addr, replsrc_mask; 65 + union nf_inet_addr repldst_addr, repldst_mask; 66 + __u32 expires_min, expires_max; 67 + __u16 l4proto; 68 + __u16 origsrc_port, origdst_port; 69 + __u16 replsrc_port, repldst_port; 70 + __u16 match_flags, invert_flags; 71 + __u16 state_mask, status_mask; 72 + __u16 origsrc_port_high, origdst_port_high; 73 + __u16 replsrc_port_high, repldst_port_high; 74 + }; 75 + 61 76 #endif /*_XT_CONNTRACK_H*/

+3 -3

include/linux/netfilter/xt_quota.h

··· 9 9 struct xt_quota_priv; 10 10 11 11 struct xt_quota_info { 12 - u_int32_t flags; 13 - u_int32_t pad; 14 - aligned_u64 quota; 12 + __u32 flags; 13 + __u32 pad; 14 + aligned_u64 quota; 15 15 16 16 /* Used internally by the kernel */ 17 17 struct xt_quota_priv *master;

+7 -7

include/linux/netfilter/xt_time.h

··· 2 2 #define _XT_TIME_H 1 3 3 4 4 struct xt_time_info { 5 - u_int32_t date_start; 6 - u_int32_t date_stop; 7 - u_int32_t daytime_start; 8 - u_int32_t daytime_stop; 9 - u_int32_t monthdays_match; 10 - u_int8_t weekdays_match; 11 - u_int8_t flags; 5 + __u32 date_start; 6 + __u32 date_stop; 7 + __u32 daytime_start; 8 + __u32 daytime_stop; 9 + __u32 monthdays_match; 10 + __u8 weekdays_match; 11 + __u8 flags; 12 12 }; 13 13 14 14 enum {

+8 -8

include/linux/netfilter/xt_u32.h

··· 9 9 }; 10 10 11 11 struct xt_u32_location_element { 12 - u_int32_t number; 13 - u_int8_t nextop; 12 + __u32 number; 13 + __u8 nextop; 14 14 }; 15 15 16 16 struct xt_u32_value_element { 17 - u_int32_t min; 18 - u_int32_t max; 17 + __u32 min; 18 + __u32 max; 19 19 }; 20 20 21 21 /* ··· 27 27 struct xt_u32_test { 28 28 struct xt_u32_location_element location[XT_U32_MAXSIZE+1]; 29 29 struct xt_u32_value_element value[XT_U32_MAXSIZE+1]; 30 - u_int8_t nnums; 31 - u_int8_t nvalues; 30 + __u8 nnums; 31 + __u8 nvalues; 32 32 }; 33 33 34 34 struct xt_u32 { 35 35 struct xt_u32_test tests[XT_U32_MAXSIZE+1]; 36 - u_int8_t ntests; 37 - u_int8_t invert; 36 + __u8 ntests; 37 + __u8 invert; 38 38 }; 39 39 40 40 #endif /* _XT_U32_H */

+12 -12

include/linux/netfilter_bridge/ebt_802_3.h

··· 24 24 25 25 /* ui has one byte ctrl, ni has two */ 26 26 struct hdr_ui { 27 - uint8_t dsap; 28 - uint8_t ssap; 29 - uint8_t ctrl; 30 - uint8_t orig[3]; 27 + __u8 dsap; 28 + __u8 ssap; 29 + __u8 ctrl; 30 + __u8 orig[3]; 31 31 __be16 type; 32 32 }; 33 33 34 34 struct hdr_ni { 35 - uint8_t dsap; 36 - uint8_t ssap; 35 + __u8 dsap; 36 + __u8 ssap; 37 37 __be16 ctrl; 38 - uint8_t orig[3]; 38 + __u8 orig[3]; 39 39 __be16 type; 40 40 }; 41 41 42 42 struct ebt_802_3_hdr { 43 - uint8_t daddr[6]; 44 - uint8_t saddr[6]; 43 + __u8 daddr[6]; 44 + __u8 saddr[6]; 45 45 __be16 len; 46 46 union { 47 47 struct hdr_ui ui; ··· 59 59 #endif 60 60 61 61 struct ebt_802_3_info { 62 - uint8_t sap; 62 + __u8 sap; 63 63 __be16 type; 64 - uint8_t bitmask; 65 - uint8_t invflags; 64 + __u8 bitmask; 65 + __u8 invflags; 66 66 }; 67 67 68 68 #endif

+1 -1

include/linux/netfilter_bridge/ebt_among.h

··· 30 30 */ 31 31 32 32 struct ebt_mac_wormhash_tuple { 33 - uint32_t cmp[2]; 33 + __u32 cmp[2]; 34 34 __be32 ip; 35 35 }; 36 36

+2 -2

include/linux/netfilter_bridge/ebt_arp.h

··· 27 27 unsigned char smmsk[ETH_ALEN]; 28 28 unsigned char dmaddr[ETH_ALEN]; 29 29 unsigned char dmmsk[ETH_ALEN]; 30 - uint8_t bitmask; 31 - uint8_t invflags; 30 + __u8 bitmask; 31 + __u8 invflags; 32 32 }; 33 33 34 34 #endif

+6 -6

include/linux/netfilter_bridge/ebt_ip.h

··· 31 31 __be32 daddr; 32 32 __be32 smsk; 33 33 __be32 dmsk; 34 - uint8_t tos; 35 - uint8_t protocol; 36 - uint8_t bitmask; 37 - uint8_t invflags; 38 - uint16_t sport[2]; 39 - uint16_t dport[2]; 34 + __u8 tos; 35 + __u8 protocol; 36 + __u8 bitmask; 37 + __u8 invflags; 38 + __u16 sport[2]; 39 + __u16 dport[2]; 40 40 }; 41 41 42 42 #endif

+16 -7

include/linux/netfilter_bridge/ebt_ip6.h

··· 18 18 #define EBT_IP6_PROTO 0x08 19 19 #define EBT_IP6_SPORT 0x10 20 20 #define EBT_IP6_DPORT 0x20 21 + #define EBT_IP6_ICMP6 0x40 22 + 21 23 #define EBT_IP6_MASK (EBT_IP6_SOURCE | EBT_IP6_DEST | EBT_IP6_TCLASS |\ 22 - EBT_IP6_PROTO | EBT_IP6_SPORT | EBT_IP6_DPORT) 24 + EBT_IP6_PROTO | EBT_IP6_SPORT | EBT_IP6_DPORT | \ 25 + EBT_IP6_ICMP6) 23 26 #define EBT_IP6_MATCH "ip6" 24 27 25 28 /* the same values are used for the invflags */ ··· 31 28 struct in6_addr daddr; 32 29 struct in6_addr smsk; 33 30 struct in6_addr dmsk; 34 - uint8_t tclass; 35 - uint8_t protocol; 36 - uint8_t bitmask; 37 - uint8_t invflags; 38 - uint16_t sport[2]; 39 - uint16_t dport[2]; 31 + __u8 tclass; 32 + __u8 protocol; 33 + __u8 bitmask; 34 + __u8 invflags; 35 + union { 36 + __u16 sport[2]; 37 + __u8 icmpv6_type[2]; 38 + }; 39 + union { 40 + __u16 dport[2]; 41 + __u8 icmpv6_code[2]; 42 + }; 40 43 }; 41 44 42 45 #endif

+4 -4

include/linux/netfilter_bridge/ebt_limit.h

··· 10 10 seconds, or one every 59 hours. */ 11 11 12 12 struct ebt_limit_info { 13 - u_int32_t avg; /* Average secs between packets * scale */ 14 - u_int32_t burst; /* Period multiplier for upper limit. */ 13 + __u32 avg; /* Average secs between packets * scale */ 14 + __u32 burst; /* Period multiplier for upper limit. */ 15 15 16 16 /* Used internally by the kernel */ 17 17 unsigned long prev; 18 - u_int32_t credit; 19 - u_int32_t credit_cap, cost; 18 + __u32 credit; 19 + __u32 credit_cap, cost; 20 20 }; 21 21 22 22 #endif

+3 -3

include/linux/netfilter_bridge/ebt_log.h

··· 10 10 #define EBT_LOG_WATCHER "log" 11 11 12 12 struct ebt_log_info { 13 - uint8_t loglevel; 14 - uint8_t prefix[EBT_LOG_PREFIX_SIZE]; 15 - uint32_t bitmask; 13 + __u8 loglevel; 14 + __u8 prefix[EBT_LOG_PREFIX_SIZE]; 15 + __u32 bitmask; 16 16 }; 17 17 18 18 #endif

+2 -2

include/linux/netfilter_bridge/ebt_mark_m.h

··· 6 6 #define EBT_MARK_MASK (EBT_MARK_AND | EBT_MARK_OR) 7 7 struct ebt_mark_m_info { 8 8 unsigned long mark, mask; 9 - uint8_t invert; 10 - uint8_t bitmask; 9 + __u8 invert; 10 + __u8 bitmask; 11 11 }; 12 12 #define EBT_MARK_MATCH "mark_m" 13 13

+5 -5

include/linux/netfilter_bridge/ebt_nflog.h

··· 10 10 #define EBT_NFLOG_DEFAULT_THRESHOLD 1 11 11 12 12 struct ebt_nflog_info { 13 - u_int32_t len; 14 - u_int16_t group; 15 - u_int16_t threshold; 16 - u_int16_t flags; 17 - u_int16_t pad; 13 + __u32 len; 14 + __u16 group; 15 + __u16 threshold; 16 + __u16 flags; 17 + __u16 pad; 18 18 char prefix[EBT_NFLOG_PREFIX_SIZE]; 19 19 }; 20 20

+2 -2

include/linux/netfilter_bridge/ebt_pkttype.h

··· 2 2 #define __LINUX_BRIDGE_EBT_PKTTYPE_H 3 3 4 4 struct ebt_pkttype_info { 5 - uint8_t pkt_type; 6 - uint8_t invert; 5 + __u8 pkt_type; 6 + __u8 invert; 7 7 }; 8 8 #define EBT_PKTTYPE_MATCH "pkttype" 9 9

+12 -12

include/linux/netfilter_bridge/ebt_stp.h

··· 21 21 #define EBT_STP_MATCH "stp" 22 22 23 23 struct ebt_stp_config_info { 24 - uint8_t flags; 25 - uint16_t root_priol, root_priou; 24 + __u8 flags; 25 + __u16 root_priol, root_priou; 26 26 char root_addr[6], root_addrmsk[6]; 27 - uint32_t root_costl, root_costu; 28 - uint16_t sender_priol, sender_priou; 27 + __u32 root_costl, root_costu; 28 + __u16 sender_priol, sender_priou; 29 29 char sender_addr[6], sender_addrmsk[6]; 30 - uint16_t portl, portu; 31 - uint16_t msg_agel, msg_ageu; 32 - uint16_t max_agel, max_ageu; 33 - uint16_t hello_timel, hello_timeu; 34 - uint16_t forward_delayl, forward_delayu; 30 + __u16 portl, portu; 31 + __u16 msg_agel, msg_ageu; 32 + __u16 max_agel, max_ageu; 33 + __u16 hello_timel, hello_timeu; 34 + __u16 forward_delayl, forward_delayu; 35 35 }; 36 36 37 37 struct ebt_stp_info { 38 - uint8_t type; 38 + __u8 type; 39 39 struct ebt_stp_config_info config; 40 - uint16_t bitmask; 41 - uint16_t invflags; 40 + __u16 bitmask; 41 + __u16 invflags; 42 42 }; 43 43 44 44 #endif

+1 -1

include/linux/netfilter_bridge/ebt_ulog.h

··· 10 10 #define EBT_ULOG_VERSION 1 11 11 12 12 struct ebt_ulog_info { 13 - uint32_t nlgroup; 13 + __u32 nlgroup; 14 14 unsigned int cprange; 15 15 unsigned int qthreshold; 16 16 char prefix[EBT_ULOG_PREFIX_LEN];

+4 -4

include/linux/netfilter_bridge/ebt_vlan.h

··· 8 8 #define EBT_VLAN_MATCH "vlan" 9 9 10 10 struct ebt_vlan_info { 11 - uint16_t id; /* VLAN ID {1-4095} */ 12 - uint8_t prio; /* VLAN User Priority {0-7} */ 11 + __u16 id; /* VLAN ID {1-4095} */ 12 + __u8 prio; /* VLAN User Priority {0-7} */ 13 13 __be16 encap; /* VLAN Encapsulated frame code {0-65535} */ 14 - uint8_t bitmask; /* Args bitmask bit 1=1 - ID arg, 14 + __u8 bitmask; /* Args bitmask bit 1=1 - ID arg, 15 15 bit 2=1 User-Priority arg, bit 3=1 encap*/ 16 - uint8_t invflags; /* Inverse bitmask bit 1=1 - inversed ID arg, 16 + __u8 invflags; /* Inverse bitmask bit 1=1 - inversed ID arg, 17 17 bit 2=1 - inversed Pirority arg */ 18 18 }; 19 19

+7 -7

include/linux/netfilter_ipv4/ipt_CLUSTERIP.h

··· 17 17 18 18 struct ipt_clusterip_tgt_info { 19 19 20 - u_int32_t flags; 20 + __u32 flags; 21 21 22 22 /* only relevant for new ones */ 23 - u_int8_t clustermac[6]; 24 - u_int16_t num_total_nodes; 25 - u_int16_t num_local_nodes; 26 - u_int16_t local_nodes[CLUSTERIP_MAX_NODES]; 27 - u_int32_t hash_mode; 28 - u_int32_t hash_initval; 23 + __u8 clustermac[6]; 24 + __u16 num_total_nodes; 25 + __u16 num_local_nodes; 26 + __u16 local_nodes[CLUSTERIP_MAX_NODES]; 27 + __u32 hash_mode; 28 + __u32 hash_initval; 29 29 30 30 /* Used internally by the kernel */ 31 31 struct clusterip_config *config;

+3 -3

include/linux/netfilter_ipv4/ipt_ECN.h

··· 19 19 #define IPT_ECN_OP_MASK 0xce 20 20 21 21 struct ipt_ECN_info { 22 - u_int8_t operation; /* bitset of operations */ 23 - u_int8_t ip_ect; /* ECT codepoint of IPv4 header, pre-shifted */ 22 + __u8 operation; /* bitset of operations */ 23 + __u8 ip_ect; /* ECT codepoint of IPv4 header, pre-shifted */ 24 24 union { 25 25 struct { 26 - u_int8_t ece:1, cwr:1; /* TCP ECT bits */ 26 + __u8 ece:1, cwr:1; /* TCP ECT bits */ 27 27 } tcp; 28 28 } proto; 29 29 };

+3 -3

include/linux/netfilter_ipv4/ipt_SAME.h

··· 7 7 8 8 struct ipt_same_info { 9 9 unsigned char info; 10 - u_int32_t rangesize; 11 - u_int32_t ipnum; 12 - u_int32_t *iparray; 10 + __u32 rangesize; 11 + __u32 ipnum; 12 + __u32 *iparray; 13 13 14 14 /* hangs off end. */ 15 15 struct nf_nat_range range[IPT_SAME_MAX_RANGE];

+2 -2

include/linux/netfilter_ipv4/ipt_TTL.h

··· 13 13 #define IPT_TTL_MAXMODE IPT_TTL_DEC 14 14 15 15 struct ipt_TTL_info { 16 - u_int8_t mode; 17 - u_int8_t ttl; 16 + __u8 mode; 17 + __u8 ttl; 18 18 }; 19 19 20 20

+7 -7

include/linux/netfilter_ipv4/ipt_addrtype.h

··· 9 9 }; 10 10 11 11 struct ipt_addrtype_info_v1 { 12 - u_int16_t source; /* source-type mask */ 13 - u_int16_t dest; /* dest-type mask */ 14 - u_int32_t flags; 12 + __u16 source; /* source-type mask */ 13 + __u16 dest; /* dest-type mask */ 14 + __u32 flags; 15 15 }; 16 16 17 17 /* revision 0 */ 18 18 struct ipt_addrtype_info { 19 - u_int16_t source; /* source-type mask */ 20 - u_int16_t dest; /* dest-type mask */ 21 - u_int32_t invert_source; 22 - u_int32_t invert_dest; 19 + __u16 source; /* source-type mask */ 20 + __u16 dest; /* dest-type mask */ 21 + __u32 invert_source; 22 + __u32 invert_dest; 23 23 }; 24 24 25 25 #endif

+2 -2

include/linux/netfilter_ipv4/ipt_ah.h

··· 2 2 #define _IPT_AH_H 3 3 4 4 struct ipt_ah { 5 - u_int32_t spis[2]; /* Security Parameter Index */ 6 - u_int8_t invflags; /* Inverse flags */ 5 + __u32 spis[2]; /* Security Parameter Index */ 6 + __u8 invflags; /* Inverse flags */ 7 7 }; 8 8 9 9

+4 -4

include/linux/netfilter_ipv4/ipt_ecn.h

··· 20 20 21 21 /* match info */ 22 22 struct ipt_ecn_info { 23 - u_int8_t operation; 24 - u_int8_t invert; 25 - u_int8_t ip_ect; 23 + __u8 operation; 24 + __u8 invert; 25 + __u8 ip_ect; 26 26 union { 27 27 struct { 28 - u_int8_t ect; 28 + __u8 ect; 29 29 } tcp; 30 30 } proto; 31 31 };

+2 -2

include/linux/netfilter_ipv4/ipt_ttl.h

··· 13 13 14 14 15 15 struct ipt_ttl_info { 16 - u_int8_t mode; 17 - u_int8_t ttl; 16 + __u8 mode; 17 + __u8 ttl; 18 18 }; 19 19 20 20

+2 -2

include/linux/netfilter_ipv6/ip6t_HL.h

··· 14 14 #define IP6T_HL_MAXMODE IP6T_HL_DEC 15 15 16 16 struct ip6t_HL_info { 17 - u_int8_t mode; 18 - u_int8_t hop_limit; 17 + __u8 mode; 18 + __u8 hop_limit; 19 19 }; 20 20 21 21

+1 -1

include/linux/netfilter_ipv6/ip6t_REJECT.h

··· 12 12 }; 13 13 14 14 struct ip6t_reject_info { 15 - u_int32_t with; /* reject type */ 15 + __u32 with; /* reject type */ 16 16 }; 17 17 18 18 #endif /*_IP6T_REJECT_H*/

+4 -4

include/linux/netfilter_ipv6/ip6t_ah.h

··· 2 2 #define _IP6T_AH_H 3 3 4 4 struct ip6t_ah { 5 - u_int32_t spis[2]; /* Security Parameter Index */ 6 - u_int32_t hdrlen; /* Header Length */ 7 - u_int8_t hdrres; /* Test of the Reserved Filed */ 8 - u_int8_t invflags; /* Inverse flags */ 5 + __u32 spis[2]; /* Security Parameter Index */ 6 + __u32 hdrlen; /* Header Length */ 7 + __u8 hdrres; /* Test of the Reserved Filed */ 8 + __u8 invflags; /* Inverse flags */ 9 9 }; 10 10 11 11 #define IP6T_AH_SPI 0x01

+4 -4

include/linux/netfilter_ipv6/ip6t_frag.h

··· 2 2 #define _IP6T_FRAG_H 3 3 4 4 struct ip6t_frag { 5 - u_int32_t ids[2]; /* Security Parameter Index */ 6 - u_int32_t hdrlen; /* Header Length */ 7 - u_int8_t flags; /* */ 8 - u_int8_t invflags; /* Inverse flags */ 5 + __u32 ids[2]; /* Security Parameter Index */ 6 + __u32 hdrlen; /* Header Length */ 7 + __u8 flags; /* */ 8 + __u8 invflags; /* Inverse flags */ 9 9 }; 10 10 11 11 #define IP6T_FRAG_IDS 0x01

+2 -2

include/linux/netfilter_ipv6/ip6t_hl.h

··· 14 14 15 15 16 16 struct ip6t_hl_info { 17 - u_int8_t mode; 18 - u_int8_t hop_limit; 17 + __u8 mode; 18 + __u8 hop_limit; 19 19 }; 20 20 21 21

+3 -3

include/linux/netfilter_ipv6/ip6t_ipv6header.h

··· 9 9 #define __IPV6HEADER_H 10 10 11 11 struct ip6t_ipv6header_info { 12 - u_int8_t matchflags; 13 - u_int8_t invflags; 14 - u_int8_t modeflag; 12 + __u8 matchflags; 13 + __u8 invflags; 14 + __u8 modeflag; 15 15 }; 16 16 17 17 #define MASK_HOPOPTS 128

+2 -2

include/linux/netfilter_ipv6/ip6t_mh.h

··· 3 3 4 4 /* MH matching stuff */ 5 5 struct ip6t_mh { 6 - u_int8_t types[2]; /* MH type range */ 7 - u_int8_t invflags; /* Inverse flags */ 6 + __u8 types[2]; /* MH type range */ 7 + __u8 invflags; /* Inverse flags */ 8 8 }; 9 9 10 10 /* Values for "invflags" field in struct ip6t_mh. */

+5 -5

include/linux/netfilter_ipv6/ip6t_opts.h

··· 4 4 #define IP6T_OPTS_OPTSNR 16 5 5 6 6 struct ip6t_opts { 7 - u_int32_t hdrlen; /* Header Length */ 8 - u_int8_t flags; /* */ 9 - u_int8_t invflags; /* Inverse flags */ 10 - u_int16_t opts[IP6T_OPTS_OPTSNR]; /* opts */ 11 - u_int8_t optsnr; /* Nr of OPts */ 7 + __u32 hdrlen; /* Header Length */ 8 + __u8 flags; /* */ 9 + __u8 invflags; /* Inverse flags */ 10 + __u16 opts[IP6T_OPTS_OPTSNR]; /* opts */ 11 + __u8 optsnr; /* Nr of OPts */ 12 12 }; 13 13 14 14 #define IP6T_OPTS_LEN 0x01

+6 -6

include/linux/netfilter_ipv6/ip6t_rt.h

··· 6 6 #define IP6T_RT_HOPS 16 7 7 8 8 struct ip6t_rt { 9 - u_int32_t rt_type; /* Routing Type */ 10 - u_int32_t segsleft[2]; /* Segments Left */ 11 - u_int32_t hdrlen; /* Header Length */ 12 - u_int8_t flags; /* */ 13 - u_int8_t invflags; /* Inverse flags */ 9 + __u32 rt_type; /* Routing Type */ 10 + __u32 segsleft[2]; /* Segments Left */ 11 + __u32 hdrlen; /* Header Length */ 12 + __u8 flags; /* */ 13 + __u8 invflags; /* Inverse flags */ 14 14 struct in6_addr addrs[IP6T_RT_HOPS]; /* Hops */ 15 - u_int8_t addrnr; /* Nr of Addresses */ 15 + __u8 addrnr; /* Nr of Addresses */ 16 16 }; 17 17 18 18 #define IP6T_RT_TYP 0x01

+1 -1

include/net/dst.h

··· 72 72 73 73 u32 _metrics[RTAX_MAX]; 74 74 75 - #ifdef CONFIG_NET_CLS_ROUTE 75 + #ifdef CONFIG_IP_ROUTE_CLASSID 76 76 __u32 tclassid; 77 77 #else 78 78 __u32 __pad2;

+3 -3

include/net/ip_fib.h

··· 55 55 int nh_weight; 56 56 int nh_power; 57 57 #endif 58 - #ifdef CONFIG_NET_CLS_ROUTE 58 + #ifdef CONFIG_IP_ROUTE_CLASSID 59 59 __u32 nh_tclassid; 60 60 #endif 61 61 int nh_oif; ··· 201 201 extern int __net_init fib4_rules_init(struct net *net); 202 202 extern void __net_exit fib4_rules_exit(struct net *net); 203 203 204 - #ifdef CONFIG_NET_CLS_ROUTE 204 + #ifdef CONFIG_IP_ROUTE_CLASSID 205 205 extern u32 fib_rules_tclass(struct fib_result *res); 206 206 #endif 207 207 ··· 235 235 236 236 static inline void fib_combine_itag(u32 *itag, struct fib_result *res) 237 237 { 238 - #ifdef CONFIG_NET_CLS_ROUTE 238 + #ifdef CONFIG_IP_ROUTE_CLASSID 239 239 #ifdef CONFIG_IP_MULTIPLE_TABLES 240 240 u32 rtag; 241 241 #endif

+228 -69

include/net/ip_vs.h

··· 28 28 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) 29 29 #include <net/netfilter/nf_conntrack.h> 30 30 #endif 31 + #include <net/net_namespace.h> /* Netw namespace */ 32 + 33 + /* 34 + * Generic access of ipvs struct 35 + */ 36 + static inline struct netns_ipvs *net_ipvs(struct net* net) 37 + { 38 + return net->ipvs; 39 + } 40 + /* 41 + * Get net ptr from skb in traffic cases 42 + * use skb_sknet when call is from userland (ioctl or netlink) 43 + */ 44 + static inline struct net *skb_net(const struct sk_buff *skb) 45 + { 46 + #ifdef CONFIG_NET_NS 47 + #ifdef CONFIG_IP_VS_DEBUG 48 + /* 49 + * This is used for debug only. 50 + * Start with the most likely hit 51 + * End with BUG 52 + */ 53 + if (likely(skb->dev && skb->dev->nd_net)) 54 + return dev_net(skb->dev); 55 + if (skb_dst(skb)->dev) 56 + return dev_net(skb_dst(skb)->dev); 57 + WARN(skb->sk, "Maybe skb_sknet should be used in %s() at line:%d\n", 58 + __func__, __LINE__); 59 + if (likely(skb->sk && skb->sk->sk_net)) 60 + return sock_net(skb->sk); 61 + pr_err("There is no net ptr to find in the skb in %s() line:%d\n", 62 + __func__, __LINE__); 63 + BUG(); 64 + #else 65 + return dev_net(skb->dev ? : skb_dst(skb)->dev); 66 + #endif 67 + #else 68 + return &init_net; 69 + #endif 70 + } 71 + 72 + static inline struct net *skb_sknet(const struct sk_buff *skb) 73 + { 74 + #ifdef CONFIG_NET_NS 75 + #ifdef CONFIG_IP_VS_DEBUG 76 + /* Start with the most likely hit */ 77 + if (likely(skb->sk && skb->sk->sk_net)) 78 + return sock_net(skb->sk); 79 + WARN(skb->dev, "Maybe skb_net should be used instead in %s() line:%d\n", 80 + __func__, __LINE__); 81 + if (likely(skb->dev && skb->dev->nd_net)) 82 + return dev_net(skb->dev); 83 + pr_err("There is no net ptr to find in the skb in %s() line:%d\n", 84 + __func__, __LINE__); 85 + BUG(); 86 + #else 87 + return sock_net(skb->sk); 88 + #endif 89 + #else 90 + return &init_net; 91 + #endif 92 + } 93 + /* 94 + * This one needed for single_open_net since net is stored directly in 95 + * private not as a struct i.e. seq_file_net cant be used. 96 + */ 97 + static inline struct net *seq_file_single_net(struct seq_file *seq) 98 + { 99 + #ifdef CONFIG_NET_NS 100 + return (struct net *)seq->private; 101 + #else 102 + return &init_net; 103 + #endif 104 + } 31 105 32 106 /* Connections' size value needed by ip_vs_ctl.c */ 33 107 extern int ip_vs_conn_tab_size; ··· 332 258 before last resized pkt */ 333 259 }; 334 260 261 + /* 262 + * counters per cpu 263 + */ 264 + struct ip_vs_counters { 265 + __u32 conns; /* connections scheduled */ 266 + __u32 inpkts; /* incoming packets */ 267 + __u32 outpkts; /* outgoing packets */ 268 + __u64 inbytes; /* incoming bytes */ 269 + __u64 outbytes; /* outgoing bytes */ 270 + }; 271 + /* 272 + * Stats per cpu 273 + */ 274 + struct ip_vs_cpu_stats { 275 + struct ip_vs_counters ustats; 276 + struct u64_stats_sync syncp; 277 + }; 335 278 336 279 /* 337 280 * IPVS statistics objects ··· 370 279 }; 371 280 372 281 struct ip_vs_stats { 373 - struct ip_vs_stats_user ustats; /* statistics */ 282 + struct ip_vs_stats_user ustats; /* statistics */ 374 283 struct ip_vs_estimator est; /* estimator */ 375 - 376 - spinlock_t lock; /* spin lock */ 284 + struct ip_vs_cpu_stats *cpustats; /* per cpu counters */ 285 + spinlock_t lock; /* spin lock */ 377 286 }; 287 + 288 + /* 289 + * Helper Macros for per cpu 290 + * ipvs->tot_stats->ustats.count 291 + */ 292 + #define IPVS_STAT_INC(ipvs, count) \ 293 + __this_cpu_inc((ipvs)->ustats->count) 294 + 295 + #define IPVS_STAT_ADD(ipvs, count, value) \ 296 + do {\ 297 + write_seqcount_begin(per_cpu_ptr((ipvs)->ustats_seq, \ 298 + raw_smp_processor_id())); \ 299 + __this_cpu_add((ipvs)->ustats->count, value); \ 300 + write_seqcount_end(per_cpu_ptr((ipvs)->ustats_seq, \ 301 + raw_smp_processor_id())); \ 302 + } while (0) 378 303 379 304 struct dst_entry; 380 305 struct iphdr; 381 306 struct ip_vs_conn; 382 307 struct ip_vs_app; 383 308 struct sk_buff; 309 + struct ip_vs_proto_data; 384 310 385 311 struct ip_vs_protocol { 386 312 struct ip_vs_protocol *next; ··· 405 297 u16 protocol; 406 298 u16 num_states; 407 299 int dont_defrag; 408 - atomic_t appcnt; /* counter of proto app incs */ 409 - int *timeout_table; /* protocol timeout table */ 410 300 411 301 void (*init)(struct ip_vs_protocol *pp); 412 302 413 303 void (*exit)(struct ip_vs_protocol *pp); 414 304 305 + void (*init_netns)(struct net *net, struct ip_vs_proto_data *pd); 306 + 307 + void (*exit_netns)(struct net *net, struct ip_vs_proto_data *pd); 308 + 415 309 int (*conn_schedule)(int af, struct sk_buff *skb, 416 - struct ip_vs_protocol *pp, 310 + struct ip_vs_proto_data *pd, 417 311 int *verdict, struct ip_vs_conn **cpp); 418 312 419 313 struct ip_vs_conn * 420 314 (*conn_in_get)(int af, 421 315 const struct sk_buff *skb, 422 - struct ip_vs_protocol *pp, 423 316 const struct ip_vs_iphdr *iph, 424 317 unsigned int proto_off, 425 318 int inverse); ··· 428 319 struct ip_vs_conn * 429 320 (*conn_out_get)(int af, 430 321 const struct sk_buff *skb, 431 - struct ip_vs_protocol *pp, 432 322 const struct ip_vs_iphdr *iph, 433 323 unsigned int proto_off, 434 324 int inverse); ··· 445 337 446 338 int (*state_transition)(struct ip_vs_conn *cp, int direction, 447 339 const struct sk_buff *skb, 448 - struct ip_vs_protocol *pp); 340 + struct ip_vs_proto_data *pd); 449 341 450 - int (*register_app)(struct ip_vs_app *inc); 342 + int (*register_app)(struct net *net, struct ip_vs_app *inc); 451 343 452 - void (*unregister_app)(struct ip_vs_app *inc); 344 + void (*unregister_app)(struct net *net, struct ip_vs_app *inc); 453 345 454 346 int (*app_conn_bind)(struct ip_vs_conn *cp); 455 347 ··· 458 350 int offset, 459 351 const char *msg); 460 352 461 - void (*timeout_change)(struct ip_vs_protocol *pp, int flags); 462 - 463 - int (*set_state_timeout)(struct ip_vs_protocol *pp, char *sname, int to); 353 + void (*timeout_change)(struct ip_vs_proto_data *pd, int flags); 464 354 }; 465 355 466 - extern struct ip_vs_protocol * ip_vs_proto_get(unsigned short proto); 356 + /* 357 + * protocol data per netns 358 + */ 359 + struct ip_vs_proto_data { 360 + struct ip_vs_proto_data *next; 361 + struct ip_vs_protocol *pp; 362 + int *timeout_table; /* protocol timeout table */ 363 + atomic_t appcnt; /* counter of proto app incs. */ 364 + struct tcp_states_t *tcp_state_table; 365 + }; 366 + 367 + extern struct ip_vs_protocol *ip_vs_proto_get(unsigned short proto); 368 + extern struct ip_vs_proto_data *ip_vs_proto_data_get(struct net *net, 369 + unsigned short proto); 467 370 468 371 struct ip_vs_conn_param { 372 + struct net *net; 469 373 const union nf_inet_addr *caddr; 470 374 const union nf_inet_addr *vaddr; 471 375 __be16 cport; ··· 495 375 */ 496 376 struct ip_vs_conn { 497 377 struct list_head c_list; /* hashed list heads */ 498 - 378 + #ifdef CONFIG_NET_NS 379 + struct net *net; /* Name space */ 380 + #endif 499 381 /* Protocol, addresses and port numbers */ 500 - u16 af; /* address family */ 501 - union nf_inet_addr caddr; /* client address */ 502 - union nf_inet_addr vaddr; /* virtual address */ 503 - union nf_inet_addr daddr; /* destination address */ 504 - volatile __u32 flags; /* status flags */ 505 - __be16 cport; 506 - __be16 vport; 507 - __be16 dport; 382 + u16 af; /* address family */ 383 + __be16 cport; 384 + __be16 vport; 385 + __be16 dport; 386 + __u32 fwmark; /* Fire wall mark from skb */ 387 + union nf_inet_addr caddr; /* client address */ 388 + union nf_inet_addr vaddr; /* virtual address */ 389 + union nf_inet_addr daddr; /* destination address */ 390 + volatile __u32 flags; /* status flags */ 508 391 __u16 protocol; /* Which protocol (TCP/UDP) */ 509 392 510 393 /* counter and timer */ ··· 545 422 struct ip_vs_seq in_seq; /* incoming seq. struct */ 546 423 struct ip_vs_seq out_seq; /* outgoing seq. struct */ 547 424 425 + const struct ip_vs_pe *pe; 548 426 char *pe_data; 549 427 __u8 pe_data_len; 550 428 }; 551 429 430 + /* 431 + * To save some memory in conn table when name space is disabled. 432 + */ 433 + static inline struct net *ip_vs_conn_net(const struct ip_vs_conn *cp) 434 + { 435 + #ifdef CONFIG_NET_NS 436 + return cp->net; 437 + #else 438 + return &init_net; 439 + #endif 440 + } 441 + static inline void ip_vs_conn_net_set(struct ip_vs_conn *cp, struct net *net) 442 + { 443 + #ifdef CONFIG_NET_NS 444 + cp->net = net; 445 + #endif 446 + } 447 + 448 + static inline int ip_vs_conn_net_eq(const struct ip_vs_conn *cp, 449 + struct net *net) 450 + { 451 + #ifdef CONFIG_NET_NS 452 + return cp->net == net; 453 + #else 454 + return 1; 455 + #endif 456 + } 552 457 553 458 /* 554 459 * Extended internal versions of struct ip_vs_service_user and ··· 636 485 unsigned flags; /* service status flags */ 637 486 unsigned timeout; /* persistent timeout in ticks */ 638 487 __be32 netmask; /* grouping granularity */ 488 + struct net *net; 639 489 640 490 struct list_head destinations; /* real server d-linked list */ 641 491 __u32 num_dests; /* number of servers */ ··· 662 510 struct list_head d_list; /* for table with all the dests */ 663 511 664 512 u16 af; /* address family */ 665 - union nf_inet_addr addr; /* IP address of the server */ 666 513 __be16 port; /* port number of the server */ 514 + union nf_inet_addr addr; /* IP address of the server */ 667 515 volatile unsigned flags; /* dest status flags */ 668 516 atomic_t conn_flags; /* flags to copy to conn */ 669 517 atomic_t weight; /* server weight */ ··· 690 538 /* for virtual service */ 691 539 struct ip_vs_service *svc; /* service it belongs to */ 692 540 __u16 protocol; /* which protocol (TCP/UDP) */ 693 - union nf_inet_addr vaddr; /* virtual IP address */ 694 541 __be16 vport; /* virtual port number */ 542 + union nf_inet_addr vaddr; /* virtual IP address */ 695 543 __u32 vfwmark; /* firewall mark of service */ 696 544 }; 697 545 ··· 826 674 IP_VS_DIR_LAST, 827 675 }; 828 676 829 - static inline void ip_vs_conn_fill_param(int af, int protocol, 677 + static inline void ip_vs_conn_fill_param(struct net *net, int af, int protocol, 830 678 const union nf_inet_addr *caddr, 831 679 __be16 cport, 832 680 const union nf_inet_addr *vaddr, 833 681 __be16 vport, 834 682 struct ip_vs_conn_param *p) 835 683 { 684 + p->net = net; 836 685 p->af = af; 837 686 p->protocol = protocol; 838 687 p->caddr = caddr; ··· 848 695 struct ip_vs_conn *ip_vs_ct_in_get(const struct ip_vs_conn_param *p); 849 696 850 697 struct ip_vs_conn * ip_vs_conn_in_get_proto(int af, const struct sk_buff *skb, 851 - struct ip_vs_protocol *pp, 852 698 const struct ip_vs_iphdr *iph, 853 699 unsigned int proto_off, 854 700 int inverse); ··· 855 703 struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p); 856 704 857 705 struct ip_vs_conn * ip_vs_conn_out_get_proto(int af, const struct sk_buff *skb, 858 - struct ip_vs_protocol *pp, 859 706 const struct ip_vs_iphdr *iph, 860 707 unsigned int proto_off, 861 708 int inverse); ··· 870 719 struct ip_vs_conn *ip_vs_conn_new(const struct ip_vs_conn_param *p, 871 720 const union nf_inet_addr *daddr, 872 721 __be16 dport, unsigned flags, 873 - struct ip_vs_dest *dest); 722 + struct ip_vs_dest *dest, __u32 fwmark); 874 723 extern void ip_vs_conn_expire_now(struct ip_vs_conn *cp); 875 724 876 725 extern const char * ip_vs_state_name(__u16 proto, int state); 877 726 878 - extern void ip_vs_tcp_conn_listen(struct ip_vs_conn *cp); 727 + extern void ip_vs_tcp_conn_listen(struct net *net, struct ip_vs_conn *cp); 879 728 extern int ip_vs_check_template(struct ip_vs_conn *ct); 880 - extern void ip_vs_random_dropentry(void); 729 + extern void ip_vs_random_dropentry(struct net *net); 881 730 extern int ip_vs_conn_init(void); 882 731 extern void ip_vs_conn_cleanup(void); 883 732 ··· 947 796 * (from ip_vs_app.c) 948 797 */ 949 798 #define IP_VS_APP_MAX_PORTS 8 950 - extern int register_ip_vs_app(struct ip_vs_app *app); 951 - extern void unregister_ip_vs_app(struct ip_vs_app *app); 799 + extern int register_ip_vs_app(struct net *net, struct ip_vs_app *app); 800 + extern void unregister_ip_vs_app(struct net *net, struct ip_vs_app *app); 952 801 extern int ip_vs_bind_app(struct ip_vs_conn *cp, struct ip_vs_protocol *pp); 953 802 extern void ip_vs_unbind_app(struct ip_vs_conn *cp); 954 - extern int 955 - register_ip_vs_app_inc(struct ip_vs_app *app, __u16 proto, __u16 port); 803 + extern int register_ip_vs_app_inc(struct net *net, struct ip_vs_app *app, 804 + __u16 proto, __u16 port); 956 805 extern int ip_vs_app_inc_get(struct ip_vs_app *inc); 957 806 extern void ip_vs_app_inc_put(struct ip_vs_app *inc); 958 807 ··· 965 814 void ip_vs_unbind_pe(struct ip_vs_service *svc); 966 815 int register_ip_vs_pe(struct ip_vs_pe *pe); 967 816 int unregister_ip_vs_pe(struct ip_vs_pe *pe); 968 - extern struct ip_vs_pe *ip_vs_pe_get(const char *name); 969 - extern void ip_vs_pe_put(struct ip_vs_pe *pe); 817 + struct ip_vs_pe *ip_vs_pe_getbyname(const char *name); 818 + struct ip_vs_pe *__ip_vs_pe_getbyname(const char *pe_name); 819 + 820 + static inline void ip_vs_pe_get(const struct ip_vs_pe *pe) 821 + { 822 + if (pe && pe->module) 823 + __module_get(pe->module); 824 + } 825 + 826 + static inline void ip_vs_pe_put(const struct ip_vs_pe *pe) 827 + { 828 + if (pe && pe->module) 829 + module_put(pe->module); 830 + } 970 831 971 832 /* 972 833 * IPVS protocol functions (from ip_vs_proto.c) 973 834 */ 974 835 extern int ip_vs_protocol_init(void); 975 836 extern void ip_vs_protocol_cleanup(void); 976 - extern void ip_vs_protocol_timeout_change(int flags); 837 + extern void ip_vs_protocol_timeout_change(struct netns_ipvs *ipvs, int flags); 977 838 extern int *ip_vs_create_timeout_table(int *table, int size); 978 839 extern int 979 840 ip_vs_set_state_timeout(int *table, int num, const char *const *names, ··· 1015 852 extern void ip_vs_scheduler_put(struct ip_vs_scheduler *scheduler); 1016 853 extern struct ip_vs_conn * 1017 854 ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, 1018 - struct ip_vs_protocol *pp, int *ignored); 855 + struct ip_vs_proto_data *pd, int *ignored); 1019 856 extern int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, 1020 - struct ip_vs_protocol *pp); 857 + struct ip_vs_proto_data *pd); 1021 858 1022 859 1023 860 /* 1024 861 * IPVS control data and functions (from ip_vs_ctl.c) 1025 862 */ 1026 - extern int sysctl_ip_vs_cache_bypass; 1027 - extern int sysctl_ip_vs_expire_nodest_conn; 1028 - extern int sysctl_ip_vs_expire_quiescent_template; 1029 - extern int sysctl_ip_vs_sync_threshold[2]; 1030 - extern int sysctl_ip_vs_nat_icmp_send; 1031 - extern int sysctl_ip_vs_conntrack; 1032 - extern int sysctl_ip_vs_snat_reroute; 1033 863 extern struct ip_vs_stats ip_vs_stats; 1034 864 extern const struct ctl_path net_vs_ctl_path[]; 865 + extern int sysctl_ip_vs_sync_ver; 1035 866 867 + extern void ip_vs_sync_switch_mode(struct net *net, int mode); 1036 868 extern struct ip_vs_service * 1037 - ip_vs_service_get(int af, __u32 fwmark, __u16 protocol, 869 + ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol, 1038 870 const union nf_inet_addr *vaddr, __be16 vport); 1039 871 1040 872 static inline void ip_vs_service_put(struct ip_vs_service *svc) ··· 1038 880 } 1039 881 1040 882 extern struct ip_vs_dest * 1041 - ip_vs_lookup_real_service(int af, __u16 protocol, 883 + ip_vs_lookup_real_service(struct net *net, int af, __u16 protocol, 1042 884 const union nf_inet_addr *daddr, __be16 dport); 1043 885 1044 886 extern int ip_vs_use_count_inc(void); ··· 1046 888 extern int ip_vs_control_init(void); 1047 889 extern void ip_vs_control_cleanup(void); 1048 890 extern struct ip_vs_dest * 1049 - ip_vs_find_dest(int af, const union nf_inet_addr *daddr, __be16 dport, 1050 - const union nf_inet_addr *vaddr, __be16 vport, __u16 protocol); 891 + ip_vs_find_dest(struct net *net, int af, const union nf_inet_addr *daddr, 892 + __be16 dport, const union nf_inet_addr *vaddr, __be16 vport, 893 + __u16 protocol, __u32 fwmark); 1051 894 extern struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp); 1052 895 1053 896 ··· 1056 897 * IPVS sync daemon data and function prototypes 1057 898 * (from ip_vs_sync.c) 1058 899 */ 1059 - extern volatile int ip_vs_sync_state; 1060 - extern volatile int ip_vs_master_syncid; 1061 - extern volatile int ip_vs_backup_syncid; 1062 - extern char ip_vs_master_mcast_ifn[IP_VS_IFNAME_MAXLEN]; 1063 - extern char ip_vs_backup_mcast_ifn[IP_VS_IFNAME_MAXLEN]; 1064 - extern int start_sync_thread(int state, char *mcast_ifn, __u8 syncid); 1065 - extern int stop_sync_thread(int state); 1066 - extern void ip_vs_sync_conn(struct ip_vs_conn *cp); 900 + extern int start_sync_thread(struct net *net, int state, char *mcast_ifn, 901 + __u8 syncid); 902 + extern int stop_sync_thread(struct net *net, int state); 903 + extern void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp); 904 + extern int ip_vs_sync_init(void); 905 + extern void ip_vs_sync_cleanup(void); 1067 906 1068 907 1069 908 /* ··· 1069 912 */ 1070 913 extern int ip_vs_estimator_init(void); 1071 914 extern void ip_vs_estimator_cleanup(void); 1072 - extern void ip_vs_new_estimator(struct ip_vs_stats *stats); 1073 - extern void ip_vs_kill_estimator(struct ip_vs_stats *stats); 915 + extern void ip_vs_new_estimator(struct net *net, struct ip_vs_stats *stats); 916 + extern void ip_vs_kill_estimator(struct net *net, struct ip_vs_stats *stats); 1074 917 extern void ip_vs_zero_estimator(struct ip_vs_stats *stats); 1075 918 1076 919 /* ··· 1112 955 extern int ip_vs_drop_rate; 1113 956 extern int ip_vs_drop_counter; 1114 957 1115 - static __inline__ int ip_vs_todrop(void) 958 + static inline int ip_vs_todrop(struct netns_ipvs *ipvs) 1116 959 { 1117 - if (!ip_vs_drop_rate) return 0; 1118 - if (--ip_vs_drop_counter > 0) return 0; 1119 - ip_vs_drop_counter = ip_vs_drop_rate; 960 + if (!ipvs->drop_rate) 961 + return 0; 962 + if (--ipvs->drop_counter > 0) 963 + return 0; 964 + ipvs->drop_counter = ipvs->drop_rate; 1120 965 return 1; 1121 966 } 1122 967 ··· 1206 1047 * Netfilter connection tracking 1207 1048 * (from ip_vs_nfct.c) 1208 1049 */ 1209 - static inline int ip_vs_conntrack_enabled(void) 1050 + static inline int ip_vs_conntrack_enabled(struct netns_ipvs *ipvs) 1210 1051 { 1211 - return sysctl_ip_vs_conntrack; 1052 + return ipvs->sysctl_conntrack; 1212 1053 } 1213 1054 1214 1055 extern void ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp, ··· 1221 1062 1222 1063 #else 1223 1064 1224 - static inline int ip_vs_conntrack_enabled(void) 1065 + static inline int ip_vs_conntrack_enabled(struct netns_ipvs *ipvs) 1225 1066 { 1226 1067 return 0; 1227 1068 }

+2

include/net/net_namespace.h

··· 20 20 #include <net/netns/conntrack.h> 21 21 #endif 22 22 #include <net/netns/xfrm.h> 23 + #include <net/netns/ip_vs.h> 23 24 24 25 struct proc_dir_entry; 25 26 struct net_device; ··· 95 94 #ifdef CONFIG_XFRM 96 95 struct netns_xfrm xfrm; 97 96 #endif 97 + struct netns_ipvs *ipvs; 98 98 }; 99 99 100 100

+18 -5

include/net/netfilter/nf_conntrack.h

··· 50 50 /* per conntrack: application helper private data */ 51 51 union nf_conntrack_help { 52 52 /* insert conntrack helper private data (master) here */ 53 + #if defined(CONFIG_NF_CONNTRACK_FTP) || defined(CONFIG_NF_CONNTRACK_FTP_MODULE) 53 54 struct nf_ct_ftp_master ct_ftp_info; 55 + #endif 56 + #if defined(CONFIG_NF_CONNTRACK_PPTP) || \ 57 + defined(CONFIG_NF_CONNTRACK_PPTP_MODULE) 54 58 struct nf_ct_pptp_master ct_pptp_info; 59 + #endif 60 + #if defined(CONFIG_NF_CONNTRACK_H323) || \ 61 + defined(CONFIG_NF_CONNTRACK_H323_MODULE) 55 62 struct nf_ct_h323_master ct_h323_info; 63 + #endif 64 + #if defined(CONFIG_NF_CONNTRACK_SANE) || \ 65 + defined(CONFIG_NF_CONNTRACK_SANE_MODULE) 56 66 struct nf_ct_sane_master ct_sane_info; 67 + #endif 68 + #if defined(CONFIG_NF_CONNTRACK_SIP) || defined(CONFIG_NF_CONNTRACK_SIP_MODULE) 57 69 struct nf_ct_sip_master ct_sip_info; 70 + #endif 58 71 }; 59 72 60 73 #include <linux/types.h> ··· 129 116 u_int32_t secmark; 130 117 #endif 131 118 132 - /* Storage reserved for other modules: */ 133 - union nf_conntrack_proto proto; 134 - 135 119 /* Extensions */ 136 120 struct nf_ct_ext *ext; 137 121 #ifdef CONFIG_NET_NS 138 122 struct net *ct_net; 139 123 #endif 124 + 125 + /* Storage reserved for other modules, must be the last member */ 126 + union nf_conntrack_proto proto; 140 127 }; 141 128 142 129 static inline struct nf_conn * ··· 202 189 * Allocate a hashtable of hlist_head (if nulls == 0), 203 190 * or hlist_nulls_head (if nulls == 1) 204 191 */ 205 - extern void *nf_ct_alloc_hashtable(unsigned int *sizep, int *vmalloced, int nulls); 192 + extern void *nf_ct_alloc_hashtable(unsigned int *sizep, int nulls); 206 193 207 - extern void nf_ct_free_hashtable(void *hash, int vmalloced, unsigned int size); 194 + extern void nf_ct_free_hashtable(void *hash, unsigned int size); 208 195 209 196 extern struct nf_conntrack_tuple_hash * 210 197 __nf_conntrack_find(struct net *net, u16 zone,

+10 -2

include/net/netfilter/nf_conntrack_ecache.h

··· 23 23 static inline struct nf_conntrack_ecache * 24 24 nf_ct_ecache_find(const struct nf_conn *ct) 25 25 { 26 + #ifdef CONFIG_NF_CONNTRACK_EVENTS 26 27 return nf_ct_ext_find(ct, NF_CT_EXT_ECACHE); 28 + #else 29 + return NULL; 30 + #endif 27 31 } 28 32 29 33 static inline struct nf_conntrack_ecache * 30 34 nf_ct_ecache_ext_add(struct nf_conn *ct, u16 ctmask, u16 expmask, gfp_t gfp) 31 35 { 36 + #ifdef CONFIG_NF_CONNTRACK_EVENTS 32 37 struct net *net = nf_ct_net(ct); 33 38 struct nf_conntrack_ecache *e; 34 39 ··· 50 45 e->expmask = expmask; 51 46 } 52 47 return e; 48 + #else 49 + return NULL; 50 + #endif 53 51 }; 54 52 55 53 #ifdef CONFIG_NF_CONNTRACK_EVENTS ··· 67 59 int (*fcn)(unsigned int events, struct nf_ct_event *item); 68 60 }; 69 61 70 - extern struct nf_ct_event_notifier *nf_conntrack_event_cb; 62 + extern struct nf_ct_event_notifier __rcu *nf_conntrack_event_cb; 71 63 extern int nf_conntrack_register_notifier(struct nf_ct_event_notifier *nb); 72 64 extern void nf_conntrack_unregister_notifier(struct nf_ct_event_notifier *nb); 73 65 ··· 167 159 int (*fcn)(unsigned int events, struct nf_exp_event *item); 168 160 }; 169 161 170 - extern struct nf_exp_event_notifier *nf_expect_event_cb; 162 + extern struct nf_exp_event_notifier __rcu *nf_expect_event_cb; 171 163 extern int nf_ct_expect_register_notifier(struct nf_exp_event_notifier *nb); 172 164 extern void nf_ct_expect_unregister_notifier(struct nf_exp_event_notifier *nb); 173 165

+10

include/net/netfilter/nf_conntrack_extend.h

··· 7 7 8 8 enum nf_ct_ext_id { 9 9 NF_CT_EXT_HELPER, 10 + #if defined(CONFIG_NF_NAT) || defined(CONFIG_NF_NAT_MODULE) 10 11 NF_CT_EXT_NAT, 12 + #endif 11 13 NF_CT_EXT_ACCT, 14 + #ifdef CONFIG_NF_CONNTRACK_EVENTS 12 15 NF_CT_EXT_ECACHE, 16 + #endif 17 + #ifdef CONFIG_NF_CONNTRACK_ZONES 13 18 NF_CT_EXT_ZONE, 19 + #endif 20 + #ifdef CONFIG_NF_CONNTRACK_TIMESTAMP 21 + NF_CT_EXT_TSTAMP, 22 + #endif 14 23 NF_CT_EXT_NUM, 15 24 }; 16 25 ··· 28 19 #define NF_CT_EXT_ACCT_TYPE struct nf_conn_counter 29 20 #define NF_CT_EXT_ECACHE_TYPE struct nf_conntrack_ecache 30 21 #define NF_CT_EXT_ZONE_TYPE struct nf_conntrack_zone 22 + #define NF_CT_EXT_TSTAMP_TYPE struct nf_conn_tstamp 31 23 32 24 /* Extensions: optional stuff which isn't permanently in struct. */ 33 25 struct nf_ct_ext {

+6

include/net/netfilter/nf_conntrack_helper.h

··· 63 63 extern int nf_conntrack_helper_init(void); 64 64 extern void nf_conntrack_helper_fini(void); 65 65 66 + extern int nf_conntrack_broadcast_help(struct sk_buff *skb, 67 + unsigned int protoff, 68 + struct nf_conn *ct, 69 + enum ip_conntrack_info ctinfo, 70 + unsigned int timeout); 71 + 66 72 #endif /*_NF_CONNTRACK_HELPER_H*/

+1 -1

include/net/netfilter/nf_conntrack_l3proto.h

··· 73 73 struct module *me; 74 74 }; 75 75 76 - extern struct nf_conntrack_l3proto *nf_ct_l3protos[AF_MAX]; 76 + extern struct nf_conntrack_l3proto __rcu *nf_ct_l3protos[AF_MAX]; 77 77 78 78 /* Protocol registration. */ 79 79 extern int nf_conntrack_l3proto_register(struct nf_conntrack_l3proto *proto);

+53

include/net/netfilter/nf_conntrack_timestamp.h

··· 1 + #ifndef _NF_CONNTRACK_TSTAMP_H 2 + #define _NF_CONNTRACK_TSTAMP_H 3 + 4 + #include <net/net_namespace.h> 5 + #include <linux/netfilter/nf_conntrack_common.h> 6 + #include <linux/netfilter/nf_conntrack_tuple_common.h> 7 + #include <net/netfilter/nf_conntrack.h> 8 + #include <net/netfilter/nf_conntrack_extend.h> 9 + 10 + struct nf_conn_tstamp { 11 + u_int64_t start; 12 + u_int64_t stop; 13 + }; 14 + 15 + static inline 16 + struct nf_conn_tstamp *nf_conn_tstamp_find(const struct nf_conn *ct) 17 + { 18 + #ifdef CONFIG_NF_CONNTRACK_TIMESTAMP 19 + return nf_ct_ext_find(ct, NF_CT_EXT_TSTAMP); 20 + #else 21 + return NULL; 22 + #endif 23 + } 24 + 25 + static inline 26 + struct nf_conn_tstamp *nf_ct_tstamp_ext_add(struct nf_conn *ct, gfp_t gfp) 27 + { 28 + #ifdef CONFIG_NF_CONNTRACK_TIMESTAMP 29 + struct net *net = nf_ct_net(ct); 30 + 31 + if (!net->ct.sysctl_tstamp) 32 + return NULL; 33 + 34 + return nf_ct_ext_add(ct, NF_CT_EXT_TSTAMP, gfp); 35 + #else 36 + return NULL; 37 + #endif 38 + }; 39 + 40 + static inline bool nf_ct_tstamp_enabled(struct net *net) 41 + { 42 + return net->ct.sysctl_tstamp != 0; 43 + } 44 + 45 + static inline void nf_ct_set_tstamp(struct net *net, bool enable) 46 + { 47 + net->ct.sysctl_tstamp = enable; 48 + } 49 + 50 + extern int nf_conntrack_tstamp_init(struct net *net); 51 + extern void nf_conntrack_tstamp_fini(struct net *net); 52 + 53 + #endif /* _NF_CONNTRACK_TSTAMP_H */

+6

include/net/netfilter/nf_nat.h

··· 56 56 /* per conntrack: nat application helper private data */ 57 57 union nf_conntrack_nat_help { 58 58 /* insert nat helper private data here */ 59 + #if defined(CONFIG_NF_NAT_PPTP) || defined(CONFIG_NF_NAT_PPTP_MODULE) 59 60 struct nf_nat_pptp nat_pptp_info; 61 + #endif 60 62 }; 61 63 62 64 struct nf_conn; ··· 86 84 87 85 static inline struct nf_conn_nat *nfct_nat(const struct nf_conn *ct) 88 86 { 87 + #if defined(CONFIG_NF_NAT) || defined(CONFIG_NF_NAT_MODULE) 89 88 return nf_ct_ext_find(ct, NF_CT_EXT_NAT); 89 + #else 90 + return NULL; 91 + #endif 90 92 } 91 93 92 94 #else /* !__KERNEL__: iptables wants this to compile. */

+2 -2

include/net/netfilter/nf_nat_core.h

··· 21 21 enum nf_nat_manip_type manip) 22 22 { 23 23 if (manip == IP_NAT_MANIP_SRC) 24 - return test_bit(IPS_SRC_NAT_DONE_BIT, &ct->status); 24 + return ct->status & IPS_SRC_NAT_DONE; 25 25 else 26 - return test_bit(IPS_DST_NAT_DONE_BIT, &ct->status); 26 + return ct->status & IPS_DST_NAT_DONE; 27 27 } 28 28 29 29 struct nlattr;

+2 -2

include/net/netns/conntrack.h

··· 21 21 int sysctl_events; 22 22 unsigned int sysctl_events_retry_timeout; 23 23 int sysctl_acct; 24 + int sysctl_tstamp; 24 25 int sysctl_checksum; 25 26 unsigned int sysctl_log_invalid; /* Log invalid packets */ 26 27 #ifdef CONFIG_SYSCTL 27 28 struct ctl_table_header *sysctl_header; 28 29 struct ctl_table_header *acct_sysctl_header; 30 + struct ctl_table_header *tstamp_sysctl_header; 29 31 struct ctl_table_header *event_sysctl_header; 30 32 #endif 31 - int hash_vmalloc; 32 - int expect_vmalloc; 33 33 char *slabname; 34 34 }; 35 35 #endif

+143

include/net/netns/ip_vs.h

··· 1 + /* 2 + * IP Virtual Server 3 + * Data structure for network namspace 4 + * 5 + */ 6 + 7 + #ifndef IP_VS_H_ 8 + #define IP_VS_H_ 9 + 10 + #include <linux/list.h> 11 + #include <linux/mutex.h> 12 + #include <linux/list_nulls.h> 13 + #include <linux/ip_vs.h> 14 + #include <asm/atomic.h> 15 + #include <linux/in.h> 16 + 17 + struct ip_vs_stats; 18 + struct ip_vs_sync_buff; 19 + struct ctl_table_header; 20 + 21 + struct netns_ipvs { 22 + int gen; /* Generation */ 23 + /* 24 + * Hash table: for real service lookups 25 + */ 26 + #define IP_VS_RTAB_BITS 4 27 + #define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS) 28 + #define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1) 29 + 30 + struct list_head rs_table[IP_VS_RTAB_SIZE]; 31 + /* ip_vs_app */ 32 + struct list_head app_list; 33 + struct mutex app_mutex; 34 + struct lock_class_key app_key; /* mutex debuging */ 35 + 36 + /* ip_vs_proto */ 37 + #define IP_VS_PROTO_TAB_SIZE 32 /* must be power of 2 */ 38 + struct ip_vs_proto_data *proto_data_table[IP_VS_PROTO_TAB_SIZE]; 39 + /* ip_vs_proto_tcp */ 40 + #ifdef CONFIG_IP_VS_PROTO_TCP 41 + #define TCP_APP_TAB_BITS 4 42 + #define TCP_APP_TAB_SIZE (1 << TCP_APP_TAB_BITS) 43 + #define TCP_APP_TAB_MASK (TCP_APP_TAB_SIZE - 1) 44 + struct list_head tcp_apps[TCP_APP_TAB_SIZE]; 45 + spinlock_t tcp_app_lock; 46 + #endif 47 + /* ip_vs_proto_udp */ 48 + #ifdef CONFIG_IP_VS_PROTO_UDP 49 + #define UDP_APP_TAB_BITS 4 50 + #define UDP_APP_TAB_SIZE (1 << UDP_APP_TAB_BITS) 51 + #define UDP_APP_TAB_MASK (UDP_APP_TAB_SIZE - 1) 52 + struct list_head udp_apps[UDP_APP_TAB_SIZE]; 53 + spinlock_t udp_app_lock; 54 + #endif 55 + /* ip_vs_proto_sctp */ 56 + #ifdef CONFIG_IP_VS_PROTO_SCTP 57 + #define SCTP_APP_TAB_BITS 4 58 + #define SCTP_APP_TAB_SIZE (1 << SCTP_APP_TAB_BITS) 59 + #define SCTP_APP_TAB_MASK (SCTP_APP_TAB_SIZE - 1) 60 + /* Hash table for SCTP application incarnations */ 61 + struct list_head sctp_apps[SCTP_APP_TAB_SIZE]; 62 + spinlock_t sctp_app_lock; 63 + #endif 64 + /* ip_vs_conn */ 65 + atomic_t conn_count; /* connection counter */ 66 + 67 + /* ip_vs_ctl */ 68 + struct ip_vs_stats *tot_stats; /* Statistics & est. */ 69 + struct ip_vs_cpu_stats __percpu *cpustats; /* Stats per cpu */ 70 + seqcount_t *ustats_seq; /* u64 read retry */ 71 + 72 + int num_services; /* no of virtual services */ 73 + /* 1/rate drop and drop-entry variables */ 74 + struct delayed_work defense_work; /* Work handler */ 75 + int drop_rate; 76 + int drop_counter; 77 + atomic_t dropentry; 78 + /* locks in ctl.c */ 79 + spinlock_t dropentry_lock; /* drop entry handling */ 80 + spinlock_t droppacket_lock; /* drop packet handling */ 81 + spinlock_t securetcp_lock; /* state and timeout tables */ 82 + rwlock_t rs_lock; /* real services table */ 83 + /* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */ 84 + struct lock_class_key ctl_key; /* ctl_mutex debuging */ 85 + /* Trash for destinations */ 86 + struct list_head dest_trash; 87 + /* Service counters */ 88 + atomic_t ftpsvc_counter; 89 + atomic_t nullsvc_counter; 90 + 91 + /* sys-ctl struct */ 92 + struct ctl_table_header *sysctl_hdr; 93 + struct ctl_table *sysctl_tbl; 94 + /* sysctl variables */ 95 + int sysctl_amemthresh; 96 + int sysctl_am_droprate; 97 + int sysctl_drop_entry; 98 + int sysctl_drop_packet; 99 + int sysctl_secure_tcp; 100 + #ifdef CONFIG_IP_VS_NFCT 101 + int sysctl_conntrack; 102 + #endif 103 + int sysctl_snat_reroute; 104 + int sysctl_sync_ver; 105 + int sysctl_cache_bypass; 106 + int sysctl_expire_nodest_conn; 107 + int sysctl_expire_quiescent_template; 108 + int sysctl_sync_threshold[2]; 109 + int sysctl_nat_icmp_send; 110 + 111 + /* ip_vs_lblc */ 112 + int sysctl_lblc_expiration; 113 + struct ctl_table_header *lblc_ctl_header; 114 + struct ctl_table *lblc_ctl_table; 115 + /* ip_vs_lblcr */ 116 + int sysctl_lblcr_expiration; 117 + struct ctl_table_header *lblcr_ctl_header; 118 + struct ctl_table *lblcr_ctl_table; 119 + /* ip_vs_est */ 120 + struct list_head est_list; /* estimator list */ 121 + spinlock_t est_lock; 122 + struct timer_list est_timer; /* Estimation timer */ 123 + /* ip_vs_sync */ 124 + struct list_head sync_queue; 125 + spinlock_t sync_lock; 126 + struct ip_vs_sync_buff *sync_buff; 127 + spinlock_t sync_buff_lock; 128 + struct sockaddr_in sync_mcast_addr; 129 + struct task_struct *master_thread; 130 + struct task_struct *backup_thread; 131 + int send_mesg_maxlen; 132 + int recv_mesg_maxlen; 133 + volatile int sync_state; 134 + volatile int master_syncid; 135 + volatile int backup_syncid; 136 + /* multicast interface name */ 137 + char master_mcast_ifn[IP_VS_IFNAME_MAXLEN]; 138 + char backup_mcast_ifn[IP_VS_IFNAME_MAXLEN]; 139 + /* net name space ptr */ 140 + struct net *net; /* Needed by timer routines */ 141 + }; 142 + 143 + #endif /* IP_VS_H_ */

-1

include/net/netns/ipv4.h

··· 43 43 struct xt_table *nat_table; 44 44 struct hlist_head *nat_bysource; 45 45 unsigned int nat_htable_size; 46 - int nat_vmalloced; 47 46 #endif 48 47 49 48 int sysctl_icmp_echo_ignore_all;

+2

kernel/audit.c

··· 74 74 int audit_enabled; 75 75 int audit_ever_enabled; 76 76 77 + EXPORT_SYMBOL_GPL(audit_enabled); 78 + 77 79 /* Default state when kernel boots without any parameters. */ 78 80 static int audit_default; 79 81

+34 -12

net/bridge/netfilter/ebt_ip6.c

··· 22 22 #include <linux/netfilter_bridge/ebtables.h> 23 23 #include <linux/netfilter_bridge/ebt_ip6.h> 24 24 25 - struct tcpudphdr { 26 - __be16 src; 27 - __be16 dst; 25 + union pkthdr { 26 + struct { 27 + __be16 src; 28 + __be16 dst; 29 + } tcpudphdr; 30 + struct { 31 + u8 type; 32 + u8 code; 33 + } icmphdr; 28 34 }; 29 35 30 36 static bool ··· 39 33 const struct ebt_ip6_info *info = par->matchinfo; 40 34 const struct ipv6hdr *ih6; 41 35 struct ipv6hdr _ip6h; 42 - const struct tcpudphdr *pptr; 43 - struct tcpudphdr _ports; 36 + const union pkthdr *pptr; 37 + union pkthdr _pkthdr; 44 38 45 39 ih6 = skb_header_pointer(skb, 0, sizeof(_ip6h), &_ip6h); 46 40 if (ih6 == NULL) ··· 62 56 return false; 63 57 if (FWINV(info->protocol != nexthdr, EBT_IP6_PROTO)) 64 58 return false; 65 - if (!(info->bitmask & EBT_IP6_DPORT) && 66 - !(info->bitmask & EBT_IP6_SPORT)) 59 + if (!(info->bitmask & ( EBT_IP6_DPORT | 60 + EBT_IP6_SPORT | EBT_IP6_ICMP6))) 67 61 return true; 68 - pptr = skb_header_pointer(skb, offset_ph, sizeof(_ports), 69 - &_ports); 62 + 63 + /* min icmpv6 headersize is 4, so sizeof(_pkthdr) is ok. */ 64 + pptr = skb_header_pointer(skb, offset_ph, sizeof(_pkthdr), 65 + &_pkthdr); 70 66 if (pptr == NULL) 71 67 return false; 72 68 if (info->bitmask & EBT_IP6_DPORT) { 73 - u32 dst = ntohs(pptr->dst); 69 + u16 dst = ntohs(pptr->tcpudphdr.dst); 74 70 if (FWINV(dst < info->dport[0] || 75 71 dst > info->dport[1], EBT_IP6_DPORT)) 76 72 return false; 77 73 } 78 74 if (info->bitmask & EBT_IP6_SPORT) { 79 - u32 src = ntohs(pptr->src); 75 + u16 src = ntohs(pptr->tcpudphdr.src); 80 76 if (FWINV(src < info->sport[0] || 81 77 src > info->sport[1], EBT_IP6_SPORT)) 82 78 return false; 83 79 } 84 - return true; 80 + if ((info->bitmask & EBT_IP6_ICMP6) && 81 + FWINV(pptr->icmphdr.type < info->icmpv6_type[0] || 82 + pptr->icmphdr.type > info->icmpv6_type[1] || 83 + pptr->icmphdr.code < info->icmpv6_code[0] || 84 + pptr->icmphdr.code > info->icmpv6_code[1], 85 + EBT_IP6_ICMP6)) 86 + return false; 85 87 } 86 88 return true; 87 89 } ··· 117 103 return -EINVAL; 118 104 if (info->bitmask & EBT_IP6_SPORT && info->sport[0] > info->sport[1]) 119 105 return -EINVAL; 106 + if (info->bitmask & EBT_IP6_ICMP6) { 107 + if ((info->invflags & EBT_IP6_PROTO) || 108 + info->protocol != IPPROTO_ICMPV6) 109 + return -EINVAL; 110 + if (info->icmpv6_type[0] > info->icmpv6_type[1] || 111 + info->icmpv6_code[0] > info->icmpv6_code[1]) 112 + return -EINVAL; 113 + } 120 114 return 0; 121 115 } 122 116

+1

net/bridge/netfilter/ebtables.c

··· 1764 1764 1765 1765 newinfo->entries_size = size; 1766 1766 1767 + xt_compat_init_offsets(AF_INET, info->nentries); 1767 1768 return EBT_ENTRY_ITERATE(entries, size, compat_calc_entry, info, 1768 1769 entries, newinfo); 1769 1770 }

+3 -1

net/ipv4/Kconfig

··· 140 140 handled by the klogd daemon which is responsible for kernel messages 141 141 ("man klogd"). 142 142 143 + config IP_ROUTE_CLASSID 144 + bool 145 + 143 146 config IP_PNP 144 147 bool "IP: kernel level autoconfiguration" 145 148 help ··· 660 657 on the Internet. 661 658 662 659 If unsure, say N. 663 -

+5 -5

net/ipv4/fib_rules.c

··· 41 41 __be32 srcmask; 42 42 __be32 dst; 43 43 __be32 dstmask; 44 - #ifdef CONFIG_NET_CLS_ROUTE 44 + #ifdef CONFIG_IP_ROUTE_CLASSID 45 45 u32 tclassid; 46 46 #endif 47 47 }; 48 48 49 - #ifdef CONFIG_NET_CLS_ROUTE 49 + #ifdef CONFIG_IP_ROUTE_CLASSID 50 50 u32 fib_rules_tclass(struct fib_result *res) 51 51 { 52 52 return res->r ? ((struct fib4_rule *) res->r)->tclassid : 0; ··· 165 165 if (frh->dst_len) 166 166 rule4->dst = nla_get_be32(tb[FRA_DST]); 167 167 168 - #ifdef CONFIG_NET_CLS_ROUTE 168 + #ifdef CONFIG_IP_ROUTE_CLASSID 169 169 if (tb[FRA_FLOW]) 170 170 rule4->tclassid = nla_get_u32(tb[FRA_FLOW]); 171 171 #endif ··· 195 195 if (frh->tos && (rule4->tos != frh->tos)) 196 196 return 0; 197 197 198 - #ifdef CONFIG_NET_CLS_ROUTE 198 + #ifdef CONFIG_IP_ROUTE_CLASSID 199 199 if (tb[FRA_FLOW] && (rule4->tclassid != nla_get_u32(tb[FRA_FLOW]))) 200 200 return 0; 201 201 #endif ··· 224 224 if (rule4->src_len) 225 225 NLA_PUT_BE32(skb, FRA_SRC, rule4->src); 226 226 227 - #ifdef CONFIG_NET_CLS_ROUTE 227 + #ifdef CONFIG_IP_ROUTE_CLASSID 228 228 if (rule4->tclassid) 229 229 NLA_PUT_U32(skb, FRA_FLOW, rule4->tclassid); 230 230 #endif

+7 -7

net/ipv4/fib_semantics.c

··· 200 200 #ifdef CONFIG_IP_ROUTE_MULTIPATH 201 201 nh->nh_weight != onh->nh_weight || 202 202 #endif 203 - #ifdef CONFIG_NET_CLS_ROUTE 203 + #ifdef CONFIG_IP_ROUTE_CLASSID 204 204 nh->nh_tclassid != onh->nh_tclassid || 205 205 #endif 206 206 ((nh->nh_flags ^ onh->nh_flags) & ~RTNH_F_DEAD)) ··· 422 422 423 423 nla = nla_find(attrs, attrlen, RTA_GATEWAY); 424 424 nexthop_nh->nh_gw = nla ? nla_get_be32(nla) : 0; 425 - #ifdef CONFIG_NET_CLS_ROUTE 425 + #ifdef CONFIG_IP_ROUTE_CLASSID 426 426 nla = nla_find(attrs, attrlen, RTA_FLOW); 427 427 nexthop_nh->nh_tclassid = nla ? nla_get_u32(nla) : 0; 428 428 #endif ··· 476 476 nla = nla_find(attrs, attrlen, RTA_GATEWAY); 477 477 if (nla && nla_get_be32(nla) != nh->nh_gw) 478 478 return 1; 479 - #ifdef CONFIG_NET_CLS_ROUTE 479 + #ifdef CONFIG_IP_ROUTE_CLASSID 480 480 nla = nla_find(attrs, attrlen, RTA_FLOW); 481 481 if (nla && nla_get_u32(nla) != nh->nh_tclassid) 482 482 return 1; ··· 779 779 goto err_inval; 780 780 if (cfg->fc_gw && fi->fib_nh->nh_gw != cfg->fc_gw) 781 781 goto err_inval; 782 - #ifdef CONFIG_NET_CLS_ROUTE 782 + #ifdef CONFIG_IP_ROUTE_CLASSID 783 783 if (cfg->fc_flow && fi->fib_nh->nh_tclassid != cfg->fc_flow) 784 784 goto err_inval; 785 785 #endif ··· 792 792 nh->nh_oif = cfg->fc_oif; 793 793 nh->nh_gw = cfg->fc_gw; 794 794 nh->nh_flags = cfg->fc_flags; 795 - #ifdef CONFIG_NET_CLS_ROUTE 795 + #ifdef CONFIG_IP_ROUTE_CLASSID 796 796 nh->nh_tclassid = cfg->fc_flow; 797 797 #endif 798 798 #ifdef CONFIG_IP_ROUTE_MULTIPATH ··· 1002 1002 1003 1003 if (fi->fib_nh->nh_oif) 1004 1004 NLA_PUT_U32(skb, RTA_OIF, fi->fib_nh->nh_oif); 1005 - #ifdef CONFIG_NET_CLS_ROUTE 1005 + #ifdef CONFIG_IP_ROUTE_CLASSID 1006 1006 if (fi->fib_nh[0].nh_tclassid) 1007 1007 NLA_PUT_U32(skb, RTA_FLOW, fi->fib_nh[0].nh_tclassid); 1008 1008 #endif ··· 1027 1027 1028 1028 if (nh->nh_gw) 1029 1029 NLA_PUT_BE32(skb, RTA_GATEWAY, nh->nh_gw); 1030 - #ifdef CONFIG_NET_CLS_ROUTE 1030 + #ifdef CONFIG_IP_ROUTE_CLASSID 1031 1031 if (nh->nh_tclassid) 1032 1032 NLA_PUT_U32(skb, RTA_FLOW, nh->nh_tclassid); 1033 1033 #endif

+1 -1

net/ipv4/ip_input.c

··· 340 340 } 341 341 } 342 342 343 - #ifdef CONFIG_NET_CLS_ROUTE 343 + #ifdef CONFIG_IP_ROUTE_CLASSID 344 344 if (unlikely(skb_dst(skb)->tclassid)) { 345 345 struct ip_rt_acct *st = this_cpu_ptr(ip_rt_acct); 346 346 u32 idx = skb_dst(skb)->tclassid;

+2 -1

net/ipv4/netfilter/Kconfig

··· 206 206 207 207 config NF_NAT_SNMP_BASIC 208 208 tristate "Basic SNMP-ALG support" 209 - depends on NF_NAT 209 + depends on NF_CONNTRACK_SNMP && NF_NAT 210 210 depends on NETFILTER_ADVANCED 211 + default NF_NAT && NF_CONNTRACK_SNMP 211 212 ---help--- 212 213 213 214 This module implements an Application Layer Gateway (ALG) for

+2

net/ipv4/netfilter/arp_tables.c

··· 866 866 memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); 867 867 newinfo->initial_entries = 0; 868 868 loc_cpu_entry = info->entries[raw_smp_processor_id()]; 869 + xt_compat_init_offsets(NFPROTO_ARP, info->number); 869 870 xt_entry_foreach(iter, loc_cpu_entry, info->size) { 870 871 ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo); 871 872 if (ret != 0) ··· 1334 1333 duprintf("translate_compat_table: size %u\n", info->size); 1335 1334 j = 0; 1336 1335 xt_compat_lock(NFPROTO_ARP); 1336 + xt_compat_init_offsets(NFPROTO_ARP, number); 1337 1337 /* Walk through entries, checking offsets. */ 1338 1338 xt_entry_foreach(iter0, entry0, total_size) { 1339 1339 ret = check_compat_entry_size_and_hooks(iter0, info, &size,

+2

net/ipv4/netfilter/ip_tables.c

··· 1063 1063 memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); 1064 1064 newinfo->initial_entries = 0; 1065 1065 loc_cpu_entry = info->entries[raw_smp_processor_id()]; 1066 + xt_compat_init_offsets(AF_INET, info->number); 1066 1067 xt_entry_foreach(iter, loc_cpu_entry, info->size) { 1067 1068 ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo); 1068 1069 if (ret != 0) ··· 1665 1664 duprintf("translate_compat_table: size %u\n", info->size); 1666 1665 j = 0; 1667 1666 xt_compat_lock(AF_INET); 1667 + xt_compat_init_offsets(AF_INET, number); 1668 1668 /* Walk through entries, checking offsets. */ 1669 1669 xt_entry_foreach(iter0, entry0, total_size) { 1670 1670 ret = check_compat_entry_size_and_hooks(iter0, info, &size,

+1 -6

net/ipv4/netfilter/ipt_CLUSTERIP.c

··· 300 300 * that the ->target() function isn't called after ->destroy() */ 301 301 302 302 ct = nf_ct_get(skb, &ctinfo); 303 - if (ct == NULL) { 304 - pr_info("no conntrack!\n"); 305 - /* FIXME: need to drop invalid ones, since replies 306 - * to outgoing connections of other nodes will be 307 - * marked as INVALID */ 303 + if (ct == NULL) 308 304 return NF_DROP; 309 - } 310 305 311 306 /* special case: ICMP error handling. conntrack distinguishes between 312 307 * error messages (RELATED) and information requests (see below) */

+1 -2

net/ipv4/netfilter/ipt_LOG.c

··· 442 442 } 443 443 #endif 444 444 445 - /* MAC logging for input path only. */ 446 - if (in && !out) 445 + if (in != NULL) 447 446 dump_mac_header(m, loginfo, skb); 448 447 449 448 dump_packet(m, loginfo, skb, 0);

+11 -6

net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c

··· 20 20 #include <net/netfilter/nf_conntrack_l4proto.h> 21 21 #include <net/netfilter/nf_conntrack_expect.h> 22 22 #include <net/netfilter/nf_conntrack_acct.h> 23 + #include <linux/rculist_nulls.h> 23 24 24 25 struct ct_iter_state { 25 26 struct seq_net_private p; ··· 36 35 for (st->bucket = 0; 37 36 st->bucket < net->ct.htable_size; 38 37 st->bucket++) { 39 - n = rcu_dereference(net->ct.hash[st->bucket].first); 38 + n = rcu_dereference( 39 + hlist_nulls_first_rcu(&net->ct.hash[st->bucket])); 40 40 if (!is_a_nulls(n)) 41 41 return n; 42 42 } ··· 50 48 struct net *net = seq_file_net(seq); 51 49 struct ct_iter_state *st = seq->private; 52 50 53 - head = rcu_dereference(head->next); 51 + head = rcu_dereference(hlist_nulls_next_rcu(head)); 54 52 while (is_a_nulls(head)) { 55 53 if (likely(get_nulls_value(head) == st->bucket)) { 56 54 if (++st->bucket >= net->ct.htable_size) 57 55 return NULL; 58 56 } 59 - head = rcu_dereference(net->ct.hash[st->bucket].first); 57 + head = rcu_dereference( 58 + hlist_nulls_first_rcu(&net->ct.hash[st->bucket])); 60 59 } 61 60 return head; 62 61 } ··· 220 217 struct hlist_node *n; 221 218 222 219 for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) { 223 - n = rcu_dereference(net->ct.expect_hash[st->bucket].first); 220 + n = rcu_dereference( 221 + hlist_first_rcu(&net->ct.expect_hash[st->bucket])); 224 222 if (n) 225 223 return n; 226 224 } ··· 234 230 struct net *net = seq_file_net(seq); 235 231 struct ct_expect_iter_state *st = seq->private; 236 232 237 - head = rcu_dereference(head->next); 233 + head = rcu_dereference(hlist_next_rcu(head)); 238 234 while (head == NULL) { 239 235 if (++st->bucket >= nf_ct_expect_hsize) 240 236 return NULL; 241 - head = rcu_dereference(net->ct.expect_hash[st->bucket].first); 237 + head = rcu_dereference( 238 + hlist_first_rcu(&net->ct.expect_hash[st->bucket])); 242 239 } 243 240 return head; 244 241 }

+4 -4

net/ipv4/netfilter/nf_nat_amanda.c

··· 44 44 45 45 /* Try to get same port: if not, try to change it. */ 46 46 for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) { 47 - int ret; 47 + int res; 48 48 49 49 exp->tuple.dst.u.tcp.port = htons(port); 50 - ret = nf_ct_expect_related(exp); 51 - if (ret == 0) 50 + res = nf_ct_expect_related(exp); 51 + if (res == 0) 52 52 break; 53 - else if (ret != -EBUSY) { 53 + else if (res != -EBUSY) { 54 54 port = 0; 55 55 break; 56 56 }

+8 -7

net/ipv4/netfilter/nf_nat_core.c

··· 323 323 324 324 /* It's done. */ 325 325 if (maniptype == IP_NAT_MANIP_DST) 326 - set_bit(IPS_DST_NAT_DONE_BIT, &ct->status); 326 + ct->status |= IPS_DST_NAT_DONE; 327 327 else 328 - set_bit(IPS_SRC_NAT_DONE_BIT, &ct->status); 328 + ct->status |= IPS_SRC_NAT_DONE; 329 329 330 330 return NF_ACCEPT; 331 331 } ··· 502 502 int ret = 0; 503 503 504 504 spin_lock_bh(&nf_nat_lock); 505 - if (nf_nat_protos[proto->protonum] != &nf_nat_unknown_protocol) { 505 + if (rcu_dereference_protected( 506 + nf_nat_protos[proto->protonum], 507 + lockdep_is_held(&nf_nat_lock) 508 + ) != &nf_nat_unknown_protocol) { 506 509 ret = -EBUSY; 507 510 goto out; 508 511 } ··· 682 679 { 683 680 /* Leave them the same for the moment. */ 684 681 net->ipv4.nat_htable_size = net->ct.htable_size; 685 - net->ipv4.nat_bysource = nf_ct_alloc_hashtable(&net->ipv4.nat_htable_size, 686 - &net->ipv4.nat_vmalloced, 0); 682 + net->ipv4.nat_bysource = nf_ct_alloc_hashtable(&net->ipv4.nat_htable_size, 0); 687 683 if (!net->ipv4.nat_bysource) 688 684 return -ENOMEM; 689 685 return 0; ··· 704 702 { 705 703 nf_ct_iterate_cleanup(net, &clean_nat, NULL); 706 704 synchronize_rcu(); 707 - nf_ct_free_hashtable(net->ipv4.nat_bysource, net->ipv4.nat_vmalloced, 708 - net->ipv4.nat_htable_size); 705 + nf_ct_free_hashtable(net->ipv4.nat_bysource, net->ipv4.nat_htable_size); 709 706 } 710 707 711 708 static struct pernet_operations nf_nat_net_ops = {

+5 -4

net/ipv4/netfilter/nf_nat_snmp_basic.c

··· 54 54 #include <net/netfilter/nf_conntrack_expect.h> 55 55 #include <net/netfilter/nf_conntrack_helper.h> 56 56 #include <net/netfilter/nf_nat_helper.h> 57 + #include <linux/netfilter/nf_conntrack_snmp.h> 57 58 58 59 MODULE_LICENSE("GPL"); 59 60 MODULE_AUTHOR("James Morris <jmorris@intercode.com.au>"); ··· 1311 1310 { 1312 1311 int ret = 0; 1313 1312 1314 - ret = nf_conntrack_helper_register(&snmp_helper); 1315 - if (ret < 0) 1316 - return ret; 1313 + BUG_ON(nf_nat_snmp_hook != NULL); 1314 + rcu_assign_pointer(nf_nat_snmp_hook, help); 1315 + 1317 1316 ret = nf_conntrack_helper_register(&snmp_trap_helper); 1318 1317 if (ret < 0) { 1319 1318 nf_conntrack_helper_unregister(&snmp_helper); ··· 1324 1323 1325 1324 static void __exit nf_nat_snmp_basic_fini(void) 1326 1325 { 1327 - nf_conntrack_helper_unregister(&snmp_helper); 1326 + rcu_assign_pointer(nf_nat_snmp_hook, NULL); 1328 1327 nf_conntrack_helper_unregister(&snmp_trap_helper); 1329 1328 } 1330 1329

+13 -13

net/ipv4/route.c

··· 514 514 .release = seq_release, 515 515 }; 516 516 517 - #ifdef CONFIG_NET_CLS_ROUTE 517 + #ifdef CONFIG_IP_ROUTE_CLASSID 518 518 static int rt_acct_proc_show(struct seq_file *m, void *v) 519 519 { 520 520 struct ip_rt_acct *dst, *src; ··· 567 567 if (!pde) 568 568 goto err2; 569 569 570 - #ifdef CONFIG_NET_CLS_ROUTE 570 + #ifdef CONFIG_IP_ROUTE_CLASSID 571 571 pde = proc_create("rt_acct", 0, net->proc_net, &rt_acct_proc_fops); 572 572 if (!pde) 573 573 goto err3; 574 574 #endif 575 575 return 0; 576 576 577 - #ifdef CONFIG_NET_CLS_ROUTE 577 + #ifdef CONFIG_IP_ROUTE_CLASSID 578 578 err3: 579 579 remove_proc_entry("rt_cache", net->proc_net_stat); 580 580 #endif ··· 588 588 { 589 589 remove_proc_entry("rt_cache", net->proc_net_stat); 590 590 remove_proc_entry("rt_cache", net->proc_net); 591 - #ifdef CONFIG_NET_CLS_ROUTE 591 + #ifdef CONFIG_IP_ROUTE_CLASSID 592 592 remove_proc_entry("rt_acct", net->proc_net); 593 593 #endif 594 594 } ··· 1775 1775 memcpy(addr, &src, 4); 1776 1776 } 1777 1777 1778 - #ifdef CONFIG_NET_CLS_ROUTE 1778 + #ifdef CONFIG_IP_ROUTE_CLASSID 1779 1779 static void set_class_tag(struct rtable *rt, u32 tag) 1780 1780 { 1781 1781 if (!(rt->dst.tclassid & 0xFFFF)) ··· 1825 1825 FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) 1826 1826 rt->rt_gateway = FIB_RES_GW(*res); 1827 1827 dst_import_metrics(dst, fi->fib_metrics); 1828 - #ifdef CONFIG_NET_CLS_ROUTE 1828 + #ifdef CONFIG_IP_ROUTE_CLASSID 1829 1829 dst->tclassid = FIB_RES_NH(*res).nh_tclassid; 1830 1830 #endif 1831 1831 } ··· 1835 1835 if (dst_metric_raw(dst, RTAX_ADVMSS) > 65535 - 40) 1836 1836 dst_metric_set(dst, RTAX_ADVMSS, 65535 - 40); 1837 1837 1838 - #ifdef CONFIG_NET_CLS_ROUTE 1838 + #ifdef CONFIG_IP_ROUTE_CLASSID 1839 1839 #ifdef CONFIG_IP_MULTIPLE_TABLES 1840 1840 set_class_tag(rt, fib_rules_tclass(res)); 1841 1841 #endif ··· 1891 1891 rth->fl.mark = skb->mark; 1892 1892 rth->fl.fl4_src = saddr; 1893 1893 rth->rt_src = saddr; 1894 - #ifdef CONFIG_NET_CLS_ROUTE 1894 + #ifdef CONFIG_IP_ROUTE_CLASSID 1895 1895 rth->dst.tclassid = itag; 1896 1896 #endif 1897 1897 rth->rt_iif = ··· 2208 2208 rth->fl.mark = skb->mark; 2209 2209 rth->fl.fl4_src = saddr; 2210 2210 rth->rt_src = saddr; 2211 - #ifdef CONFIG_NET_CLS_ROUTE 2211 + #ifdef CONFIG_IP_ROUTE_CLASSID 2212 2212 rth->dst.tclassid = itag; 2213 2213 #endif 2214 2214 rth->rt_iif = ··· 2828 2828 } 2829 2829 if (rt->dst.dev) 2830 2830 NLA_PUT_U32(skb, RTA_OIF, rt->dst.dev->ifindex); 2831 - #ifdef CONFIG_NET_CLS_ROUTE 2831 + #ifdef CONFIG_IP_ROUTE_CLASSID 2832 2832 if (rt->dst.tclassid) 2833 2833 NLA_PUT_U32(skb, RTA_FLOW, rt->dst.tclassid); 2834 2834 #endif ··· 3249 3249 }; 3250 3250 3251 3251 3252 - #ifdef CONFIG_NET_CLS_ROUTE 3252 + #ifdef CONFIG_IP_ROUTE_CLASSID 3253 3253 struct ip_rt_acct __percpu *ip_rt_acct __read_mostly; 3254 - #endif /* CONFIG_NET_CLS_ROUTE */ 3254 + #endif /* CONFIG_IP_ROUTE_CLASSID */ 3255 3255 3256 3256 static __initdata unsigned long rhash_entries; 3257 3257 static int __init set_rhash_entries(char *str) ··· 3267 3267 { 3268 3268 int rc = 0; 3269 3269 3270 - #ifdef CONFIG_NET_CLS_ROUTE 3270 + #ifdef CONFIG_IP_ROUTE_CLASSID 3271 3271 ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct), __alignof__(struct ip_rt_acct)); 3272 3272 if (!ip_rt_acct) 3273 3273 panic("IP: failed to allocate ip_rt_acct\n");

+2

net/ipv6/netfilter/ip6_tables.c

··· 1076 1076 memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); 1077 1077 newinfo->initial_entries = 0; 1078 1078 loc_cpu_entry = info->entries[raw_smp_processor_id()]; 1079 + xt_compat_init_offsets(AF_INET6, info->number); 1079 1080 xt_entry_foreach(iter, loc_cpu_entry, info->size) { 1080 1081 ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo); 1081 1082 if (ret != 0) ··· 1680 1679 duprintf("translate_compat_table: size %u\n", info->size); 1681 1680 j = 0; 1682 1681 xt_compat_lock(AF_INET6); 1682 + xt_compat_init_offsets(AF_INET6, number); 1683 1683 /* Walk through entries, checking offsets. */ 1684 1684 xt_entry_foreach(iter0, entry0, total_size) { 1685 1685 ret = check_compat_entry_size_and_hooks(iter0, info, &size,

+1 -2

net/ipv6/netfilter/ip6t_LOG.c

··· 452 452 in ? in->name : "", 453 453 out ? out->name : ""); 454 454 455 - /* MAC logging for input path only. */ 456 - if (in && !out) 455 + if (in != NULL) 457 456 dump_mac_header(m, loginfo, skb); 458 457 459 458 dump_packet(m, loginfo, skb, skb_network_offset(skb), 1);

+1 -1

net/ipv6/netfilter/nf_conntrack_reasm.c

··· 73 73 static struct netns_frags nf_init_frags; 74 74 75 75 #ifdef CONFIG_SYSCTL 76 - struct ctl_table nf_ct_frag6_sysctl_table[] = { 76 + static struct ctl_table nf_ct_frag6_sysctl_table[] = { 77 77 { 78 78 .procname = "nf_conntrack_frag6_timeout", 79 79 .data = &nf_init_frags.timeout,

+42 -1

net/netfilter/Kconfig

··· 85 85 86 86 If unsure, say `N'. 87 87 88 + config NF_CONNTRACK_TIMESTAMP 89 + bool 'Connection tracking timestamping' 90 + depends on NETFILTER_ADVANCED 91 + help 92 + This option enables support for connection tracking timestamping. 93 + This allows you to store the flow start-time and to obtain 94 + the flow-stop time (once it has been destroyed) via Connection 95 + tracking events. 96 + 97 + If unsure, say `N'. 98 + 88 99 config NF_CT_PROTO_DCCP 89 100 tristate 'DCCP protocol connection tracking support (EXPERIMENTAL)' 90 101 depends on EXPERIMENTAL ··· 196 185 197 186 To compile it as a module, choose M here. If unsure, say N. 198 187 188 + config NF_CONNTRACK_BROADCAST 189 + tristate 190 + 199 191 config NF_CONNTRACK_NETBIOS_NS 200 192 tristate "NetBIOS name service protocol support" 201 193 depends on NETFILTER_ADVANCED 194 + select NF_CONNTRACK_BROADCAST 202 195 help 203 196 NetBIOS name service requests are sent as broadcast messages from an 204 197 unprivileged port and responded to with unicast messages to the ··· 216 201 $ ip -4 address show eth0 217 202 4: eth0: <BROADCAST,MULTICAST,UP> mtu 1500 qdisc pfifo_fast qlen 1000 218 203 inet 172.16.2.252/24 brd 172.16.2.255 scope global eth0 204 + 205 + To compile it as a module, choose M here. If unsure, say N. 206 + 207 + config NF_CONNTRACK_SNMP 208 + tristate "SNMP service protocol support" 209 + depends on NETFILTER_ADVANCED 210 + select NF_CONNTRACK_BROADCAST 211 + help 212 + SNMP service requests are sent as broadcast messages from an 213 + unprivileged port and responded to with unicast messages to the 214 + same port. This make them hard to firewall properly because connection 215 + tracking doesn't deal with broadcasts. This helper tracks locally 216 + originating SNMP service requests and the corresponding 217 + responses. It relies on correct IP address configuration, specifically 218 + netmask and broadcast address. 219 219 220 220 To compile it as a module, choose M here. If unsure, say N. 221 221 ··· 355 325 # alphabetically ordered list of targets 356 326 357 327 comment "Xtables targets" 328 + 329 + config NETFILTER_XT_TARGET_AUDIT 330 + tristate "AUDIT target support" 331 + depends on AUDIT 332 + depends on NETFILTER_ADVANCED 333 + ---help--- 334 + This option adds a 'AUDIT' target, which can be used to create 335 + audit records for packets dropped/accepted. 336 + 337 + To compileit as a module, choose M here. If unsure, say N. 358 338 359 339 config NETFILTER_XT_TARGET_CHECKSUM 360 340 tristate "CHECKSUM target support" ··· 517 477 config NETFILTER_XT_TARGET_NFQUEUE 518 478 tristate '"NFQUEUE" target Support' 519 479 depends on NETFILTER_ADVANCED 480 + select NETFILTER_NETLINK_QUEUE 520 481 help 521 482 This target replaced the old obsolete QUEUE target. 522 483 ··· 927 886 config NETFILTER_XT_MATCH_REALM 928 887 tristate '"realm" match support' 929 888 depends on NETFILTER_ADVANCED 930 - select NET_CLS_ROUTE 889 + select IP_ROUTE_CLASSID 931 890 help 932 891 This option adds a `realm' match, which allows you to use the realm 933 892 key from the routing subsystem inside iptables.

+4

net/netfilter/Makefile

··· 1 1 netfilter-objs := core.o nf_log.o nf_queue.o nf_sockopt.o 2 2 3 3 nf_conntrack-y := nf_conntrack_core.o nf_conntrack_standalone.o nf_conntrack_expect.o nf_conntrack_helper.o nf_conntrack_proto.o nf_conntrack_l3proto_generic.o nf_conntrack_proto_generic.o nf_conntrack_proto_tcp.o nf_conntrack_proto_udp.o nf_conntrack_extend.o nf_conntrack_acct.o 4 + nf_conntrack-$(CONFIG_NF_CONNTRACK_TIMESTAMP) += nf_conntrack_timestamp.o 4 5 nf_conntrack-$(CONFIG_NF_CONNTRACK_EVENTS) += nf_conntrack_ecache.o 5 6 6 7 obj-$(CONFIG_NETFILTER) = netfilter.o ··· 29 28 obj-$(CONFIG_NF_CONNTRACK_FTP) += nf_conntrack_ftp.o 30 29 obj-$(CONFIG_NF_CONNTRACK_H323) += nf_conntrack_h323.o 31 30 obj-$(CONFIG_NF_CONNTRACK_IRC) += nf_conntrack_irc.o 31 + obj-$(CONFIG_NF_CONNTRACK_BROADCAST) += nf_conntrack_broadcast.o 32 32 obj-$(CONFIG_NF_CONNTRACK_NETBIOS_NS) += nf_conntrack_netbios_ns.o 33 + obj-$(CONFIG_NF_CONNTRACK_SNMP) += nf_conntrack_snmp.o 33 34 obj-$(CONFIG_NF_CONNTRACK_PPTP) += nf_conntrack_pptp.o 34 35 obj-$(CONFIG_NF_CONNTRACK_SANE) += nf_conntrack_sane.o 35 36 obj-$(CONFIG_NF_CONNTRACK_SIP) += nf_conntrack_sip.o ··· 48 45 obj-$(CONFIG_NETFILTER_XT_CONNMARK) += xt_connmark.o 49 46 50 47 # targets 48 + obj-$(CONFIG_NETFILTER_XT_TARGET_AUDIT) += xt_AUDIT.o 51 49 obj-$(CONFIG_NETFILTER_XT_TARGET_CHECKSUM) += xt_CHECKSUM.o 52 50 obj-$(CONFIG_NETFILTER_XT_TARGET_CLASSIFY) += xt_CLASSIFY.o 53 51 obj-$(CONFIG_NETFILTER_XT_TARGET_CONNSECMARK) += xt_CONNSECMARK.o

+14 -6

net/netfilter/core.c

··· 175 175 ret = 1; 176 176 } else if ((verdict & NF_VERDICT_MASK) == NF_DROP) { 177 177 kfree_skb(skb); 178 - ret = -(verdict >> NF_VERDICT_BITS); 178 + ret = NF_DROP_GETERR(verdict); 179 179 if (ret == 0) 180 180 ret = -EPERM; 181 181 } else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE) { 182 - if (!nf_queue(skb, elem, pf, hook, indev, outdev, okfn, 183 - verdict >> NF_VERDICT_BITS)) 184 - goto next_hook; 182 + ret = nf_queue(skb, elem, pf, hook, indev, outdev, okfn, 183 + verdict >> NF_VERDICT_QBITS); 184 + if (ret < 0) { 185 + if (ret == -ECANCELED) 186 + goto next_hook; 187 + if (ret == -ESRCH && 188 + (verdict & NF_VERDICT_FLAG_QUEUE_BYPASS)) 189 + goto next_hook; 190 + kfree_skb(skb); 191 + } 192 + ret = 0; 185 193 } 186 194 rcu_read_unlock(); 187 195 return ret; ··· 222 214 /* This does not belong here, but locally generated errors need it if connection 223 215 tracking in use: without this, connection may not be in hash table, and hence 224 216 manufactured ICMP or RST packets will not be associated with it. */ 225 - void (*ip_ct_attach)(struct sk_buff *, struct sk_buff *); 217 + void (*ip_ct_attach)(struct sk_buff *, struct sk_buff *) __rcu __read_mostly; 226 218 EXPORT_SYMBOL(ip_ct_attach); 227 219 228 220 void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb) ··· 239 231 } 240 232 EXPORT_SYMBOL(nf_ct_attach); 241 233 242 - void (*nf_ct_destroy)(struct nf_conntrack *); 234 + void (*nf_ct_destroy)(struct nf_conntrack *) __rcu __read_mostly; 243 235 EXPORT_SYMBOL(nf_ct_destroy); 244 236 245 237 void nf_conntrack_destroy(struct nf_conntrack *nfct)

+64 -34

net/netfilter/ipvs/ip_vs_app.c

··· 43 43 EXPORT_SYMBOL(unregister_ip_vs_app); 44 44 EXPORT_SYMBOL(register_ip_vs_app_inc); 45 45 46 - /* ipvs application list head */ 47 - static LIST_HEAD(ip_vs_app_list); 48 - static DEFINE_MUTEX(__ip_vs_app_mutex); 49 - 50 - 51 46 /* 52 47 * Get an ip_vs_app object 53 48 */ ··· 62 67 * Allocate/initialize app incarnation and register it in proto apps. 63 68 */ 64 69 static int 65 - ip_vs_app_inc_new(struct ip_vs_app *app, __u16 proto, __u16 port) 70 + ip_vs_app_inc_new(struct net *net, struct ip_vs_app *app, __u16 proto, 71 + __u16 port) 66 72 { 67 73 struct ip_vs_protocol *pp; 68 74 struct ip_vs_app *inc; ··· 94 98 } 95 99 } 96 100 97 - ret = pp->register_app(inc); 101 + ret = pp->register_app(net, inc); 98 102 if (ret) 99 103 goto out; 100 104 ··· 115 119 * Release app incarnation 116 120 */ 117 121 static void 118 - ip_vs_app_inc_release(struct ip_vs_app *inc) 122 + ip_vs_app_inc_release(struct net *net, struct ip_vs_app *inc) 119 123 { 120 124 struct ip_vs_protocol *pp; 121 125 ··· 123 127 return; 124 128 125 129 if (pp->unregister_app) 126 - pp->unregister_app(inc); 130 + pp->unregister_app(net, inc); 127 131 128 132 IP_VS_DBG(9, "%s App %s:%u unregistered\n", 129 133 pp->name, inc->name, ntohs(inc->port)); ··· 164 168 * Register an application incarnation in protocol applications 165 169 */ 166 170 int 167 - register_ip_vs_app_inc(struct ip_vs_app *app, __u16 proto, __u16 port) 171 + register_ip_vs_app_inc(struct net *net, struct ip_vs_app *app, __u16 proto, 172 + __u16 port) 168 173 { 174 + struct netns_ipvs *ipvs = net_ipvs(net); 169 175 int result; 170 176 171 - mutex_lock(&__ip_vs_app_mutex); 177 + mutex_lock(&ipvs->app_mutex); 172 178 173 - result = ip_vs_app_inc_new(app, proto, port); 179 + result = ip_vs_app_inc_new(net, app, proto, port); 174 180 175 - mutex_unlock(&__ip_vs_app_mutex); 181 + mutex_unlock(&ipvs->app_mutex); 176 182 177 183 return result; 178 184 } ··· 183 185 /* 184 186 * ip_vs_app registration routine 185 187 */ 186 - int register_ip_vs_app(struct ip_vs_app *app) 188 + int register_ip_vs_app(struct net *net, struct ip_vs_app *app) 187 189 { 190 + struct netns_ipvs *ipvs = net_ipvs(net); 188 191 /* increase the module use count */ 189 192 ip_vs_use_count_inc(); 190 193 191 - mutex_lock(&__ip_vs_app_mutex); 194 + mutex_lock(&ipvs->app_mutex); 192 195 193 - list_add(&app->a_list, &ip_vs_app_list); 196 + list_add(&app->a_list, &ipvs->app_list); 194 197 195 - mutex_unlock(&__ip_vs_app_mutex); 198 + mutex_unlock(&ipvs->app_mutex); 196 199 197 200 return 0; 198 201 } ··· 203 204 * ip_vs_app unregistration routine 204 205 * We are sure there are no app incarnations attached to services 205 206 */ 206 - void unregister_ip_vs_app(struct ip_vs_app *app) 207 + void unregister_ip_vs_app(struct net *net, struct ip_vs_app *app) 207 208 { 209 + struct netns_ipvs *ipvs = net_ipvs(net); 208 210 struct ip_vs_app *inc, *nxt; 209 211 210 - mutex_lock(&__ip_vs_app_mutex); 212 + mutex_lock(&ipvs->app_mutex); 211 213 212 214 list_for_each_entry_safe(inc, nxt, &app->incs_list, a_list) { 213 - ip_vs_app_inc_release(inc); 215 + ip_vs_app_inc_release(net, inc); 214 216 } 215 217 216 218 list_del(&app->a_list); 217 219 218 - mutex_unlock(&__ip_vs_app_mutex); 220 + mutex_unlock(&ipvs->app_mutex); 219 221 220 222 /* decrease the module use count */ 221 223 ip_vs_use_count_dec(); ··· 226 226 /* 227 227 * Bind ip_vs_conn to its ip_vs_app (called by cp constructor) 228 228 */ 229 - int ip_vs_bind_app(struct ip_vs_conn *cp, struct ip_vs_protocol *pp) 229 + int ip_vs_bind_app(struct ip_vs_conn *cp, 230 + struct ip_vs_protocol *pp) 230 231 { 231 232 return pp->app_conn_bind(cp); 232 233 } ··· 482 481 * /proc/net/ip_vs_app entry function 483 482 */ 484 483 485 - static struct ip_vs_app *ip_vs_app_idx(loff_t pos) 484 + static struct ip_vs_app *ip_vs_app_idx(struct netns_ipvs *ipvs, loff_t pos) 486 485 { 487 486 struct ip_vs_app *app, *inc; 488 487 489 - list_for_each_entry(app, &ip_vs_app_list, a_list) { 488 + list_for_each_entry(app, &ipvs->app_list, a_list) { 490 489 list_for_each_entry(inc, &app->incs_list, a_list) { 491 490 if (pos-- == 0) 492 491 return inc; ··· 498 497 499 498 static void *ip_vs_app_seq_start(struct seq_file *seq, loff_t *pos) 500 499 { 501 - mutex_lock(&__ip_vs_app_mutex); 500 + struct net *net = seq_file_net(seq); 501 + struct netns_ipvs *ipvs = net_ipvs(net); 502 502 503 - return *pos ? ip_vs_app_idx(*pos - 1) : SEQ_START_TOKEN; 503 + mutex_lock(&ipvs->app_mutex); 504 + 505 + return *pos ? ip_vs_app_idx(ipvs, *pos - 1) : SEQ_START_TOKEN; 504 506 } 505 507 506 508 static void *ip_vs_app_seq_next(struct seq_file *seq, void *v, loff_t *pos) 507 509 { 508 510 struct ip_vs_app *inc, *app; 509 511 struct list_head *e; 512 + struct net *net = seq_file_net(seq); 513 + struct netns_ipvs *ipvs = net_ipvs(net); 510 514 511 515 ++*pos; 512 516 if (v == SEQ_START_TOKEN) 513 - return ip_vs_app_idx(0); 517 + return ip_vs_app_idx(ipvs, 0); 514 518 515 519 inc = v; 516 520 app = inc->app; ··· 524 518 return list_entry(e, struct ip_vs_app, a_list); 525 519 526 520 /* go on to next application */ 527 - for (e = app->a_list.next; e != &ip_vs_app_list; e = e->next) { 521 + for (e = app->a_list.next; e != &ipvs->app_list; e = e->next) { 528 522 app = list_entry(e, struct ip_vs_app, a_list); 529 523 list_for_each_entry(inc, &app->incs_list, a_list) { 530 524 return inc; ··· 535 529 536 530 static void ip_vs_app_seq_stop(struct seq_file *seq, void *v) 537 531 { 538 - mutex_unlock(&__ip_vs_app_mutex); 532 + struct netns_ipvs *ipvs = net_ipvs(seq_file_net(seq)); 533 + 534 + mutex_unlock(&ipvs->app_mutex); 539 535 } 540 536 541 537 static int ip_vs_app_seq_show(struct seq_file *seq, void *v) ··· 565 557 566 558 static int ip_vs_app_open(struct inode *inode, struct file *file) 567 559 { 568 - return seq_open(file, &ip_vs_app_seq_ops); 560 + return seq_open_net(inode, file, &ip_vs_app_seq_ops, 561 + sizeof(struct seq_net_private)); 569 562 } 570 563 571 564 static const struct file_operations ip_vs_app_fops = { ··· 578 569 }; 579 570 #endif 580 571 572 + static int __net_init __ip_vs_app_init(struct net *net) 573 + { 574 + struct netns_ipvs *ipvs = net_ipvs(net); 575 + 576 + INIT_LIST_HEAD(&ipvs->app_list); 577 + __mutex_init(&ipvs->app_mutex, "ipvs->app_mutex", &ipvs->app_key); 578 + proc_net_fops_create(net, "ip_vs_app", 0, &ip_vs_app_fops); 579 + return 0; 580 + } 581 + 582 + static void __net_exit __ip_vs_app_cleanup(struct net *net) 583 + { 584 + proc_net_remove(net, "ip_vs_app"); 585 + } 586 + 587 + static struct pernet_operations ip_vs_app_ops = { 588 + .init = __ip_vs_app_init, 589 + .exit = __ip_vs_app_cleanup, 590 + }; 591 + 581 592 int __init ip_vs_app_init(void) 582 593 { 583 - /* we will replace it with proc_net_ipvs_create() soon */ 584 - proc_net_fops_create(&init_net, "ip_vs_app", 0, &ip_vs_app_fops); 585 - return 0; 594 + int rv; 595 + 596 + rv = register_pernet_subsys(&ip_vs_app_ops); 597 + return rv; 586 598 } 587 599 588 600 589 601 void ip_vs_app_cleanup(void) 590 602 { 591 - proc_net_remove(&init_net, "ip_vs_app"); 603 + unregister_pernet_subsys(&ip_vs_app_ops); 592 604 }

+121 -74

net/netfilter/ipvs/ip_vs_conn.c

··· 48 48 /* 49 49 * Connection hash size. Default is what was selected at compile time. 50 50 */ 51 - int ip_vs_conn_tab_bits = CONFIG_IP_VS_TAB_BITS; 51 + static int ip_vs_conn_tab_bits = CONFIG_IP_VS_TAB_BITS; 52 52 module_param_named(conn_tab_bits, ip_vs_conn_tab_bits, int, 0444); 53 53 MODULE_PARM_DESC(conn_tab_bits, "Set connections' hash size"); 54 54 55 55 /* size and mask values */ 56 - int ip_vs_conn_tab_size; 57 - int ip_vs_conn_tab_mask; 56 + int ip_vs_conn_tab_size __read_mostly; 57 + static int ip_vs_conn_tab_mask __read_mostly; 58 58 59 59 /* 60 60 * Connection hash table: for input and output packets lookups of IPVS 61 61 */ 62 - static struct list_head *ip_vs_conn_tab; 62 + static struct list_head *ip_vs_conn_tab __read_mostly; 63 63 64 64 /* SLAB cache for IPVS connections */ 65 65 static struct kmem_cache *ip_vs_conn_cachep __read_mostly; 66 - 67 - /* counter for current IPVS connections */ 68 - static atomic_t ip_vs_conn_count = ATOMIC_INIT(0); 69 66 70 67 /* counter for no client port connections */ 71 68 static atomic_t ip_vs_conn_no_cport_cnt = ATOMIC_INIT(0); 72 69 73 70 /* random value for IPVS connection hash */ 74 - static unsigned int ip_vs_conn_rnd; 71 + static unsigned int ip_vs_conn_rnd __read_mostly; 75 72 76 73 /* 77 74 * Fine locking granularity for big connection hash table 78 75 */ 79 - #define CT_LOCKARRAY_BITS 4 76 + #define CT_LOCKARRAY_BITS 5 80 77 #define CT_LOCKARRAY_SIZE (1<<CT_LOCKARRAY_BITS) 81 78 #define CT_LOCKARRAY_MASK (CT_LOCKARRAY_SIZE-1) 82 79 ··· 130 133 /* 131 134 * Returns hash value for IPVS connection entry 132 135 */ 133 - static unsigned int ip_vs_conn_hashkey(int af, unsigned proto, 136 + static unsigned int ip_vs_conn_hashkey(struct net *net, int af, unsigned proto, 134 137 const union nf_inet_addr *addr, 135 138 __be16 port) 136 139 { 137 140 #ifdef CONFIG_IP_VS_IPV6 138 141 if (af == AF_INET6) 139 - return jhash_3words(jhash(addr, 16, ip_vs_conn_rnd), 140 - (__force u32)port, proto, ip_vs_conn_rnd) 141 - & ip_vs_conn_tab_mask; 142 + return (jhash_3words(jhash(addr, 16, ip_vs_conn_rnd), 143 + (__force u32)port, proto, ip_vs_conn_rnd) ^ 144 + ((size_t)net>>8)) & ip_vs_conn_tab_mask; 142 145 #endif 143 - return jhash_3words((__force u32)addr->ip, (__force u32)port, proto, 144 - ip_vs_conn_rnd) 145 - & ip_vs_conn_tab_mask; 146 + return (jhash_3words((__force u32)addr->ip, (__force u32)port, proto, 147 + ip_vs_conn_rnd) ^ 148 + ((size_t)net>>8)) & ip_vs_conn_tab_mask; 146 149 } 147 150 148 151 static unsigned int ip_vs_conn_hashkey_param(const struct ip_vs_conn_param *p, ··· 163 166 port = p->vport; 164 167 } 165 168 166 - return ip_vs_conn_hashkey(p->af, p->protocol, addr, port); 169 + return ip_vs_conn_hashkey(p->net, p->af, p->protocol, addr, port); 167 170 } 168 171 169 172 static unsigned int ip_vs_conn_hashkey_conn(const struct ip_vs_conn *cp) 170 173 { 171 174 struct ip_vs_conn_param p; 172 175 173 - ip_vs_conn_fill_param(cp->af, cp->protocol, &cp->caddr, cp->cport, 174 - NULL, 0, &p); 176 + ip_vs_conn_fill_param(ip_vs_conn_net(cp), cp->af, cp->protocol, 177 + &cp->caddr, cp->cport, NULL, 0, &p); 175 178 176 - if (cp->dest && cp->dest->svc->pe) { 177 - p.pe = cp->dest->svc->pe; 179 + if (cp->pe) { 180 + p.pe = cp->pe; 178 181 p.pe_data = cp->pe_data; 179 182 p.pe_data_len = cp->pe_data_len; 180 183 } ··· 183 186 } 184 187 185 188 /* 186 - * Hashes ip_vs_conn in ip_vs_conn_tab by proto,addr,port. 189 + * Hashes ip_vs_conn in ip_vs_conn_tab by netns,proto,addr,port. 187 190 * returns bool success. 188 191 */ 189 192 static inline int ip_vs_conn_hash(struct ip_vs_conn *cp) ··· 266 269 267 270 list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { 268 271 if (cp->af == p->af && 272 + p->cport == cp->cport && p->vport == cp->vport && 269 273 ip_vs_addr_equal(p->af, p->caddr, &cp->caddr) && 270 274 ip_vs_addr_equal(p->af, p->vaddr, &cp->vaddr) && 271 - p->cport == cp->cport && p->vport == cp->vport && 272 275 ((!p->cport) ^ (!(cp->flags & IP_VS_CONN_F_NO_CPORT))) && 273 - p->protocol == cp->protocol) { 276 + p->protocol == cp->protocol && 277 + ip_vs_conn_net_eq(cp, p->net)) { 274 278 /* HIT */ 275 279 atomic_inc(&cp->refcnt); 276 280 ct_read_unlock(hash); ··· 311 313 struct ip_vs_conn_param *p) 312 314 { 313 315 __be16 _ports[2], *pptr; 316 + struct net *net = skb_net(skb); 314 317 315 318 pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports); 316 319 if (pptr == NULL) 317 320 return 1; 318 321 319 322 if (likely(!inverse)) 320 - ip_vs_conn_fill_param(af, iph->protocol, &iph->saddr, pptr[0], 321 - &iph->daddr, pptr[1], p); 323 + ip_vs_conn_fill_param(net, af, iph->protocol, &iph->saddr, 324 + pptr[0], &iph->daddr, pptr[1], p); 322 325 else 323 - ip_vs_conn_fill_param(af, iph->protocol, &iph->daddr, pptr[1], 324 - &iph->saddr, pptr[0], p); 326 + ip_vs_conn_fill_param(net, af, iph->protocol, &iph->daddr, 327 + pptr[1], &iph->saddr, pptr[0], p); 325 328 return 0; 326 329 } 327 330 328 331 struct ip_vs_conn * 329 332 ip_vs_conn_in_get_proto(int af, const struct sk_buff *skb, 330 - struct ip_vs_protocol *pp, 331 333 const struct ip_vs_iphdr *iph, 332 334 unsigned int proto_off, int inverse) 333 335 { ··· 351 353 ct_read_lock(hash); 352 354 353 355 list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { 356 + if (!ip_vs_conn_net_eq(cp, p->net)) 357 + continue; 354 358 if (p->pe_data && p->pe->ct_match) { 355 - if (p->pe->ct_match(p, cp)) 359 + if (p->pe == cp->pe && p->pe->ct_match(p, cp)) 356 360 goto out; 357 361 continue; 358 362 } ··· 404 404 405 405 list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { 406 406 if (cp->af == p->af && 407 + p->vport == cp->cport && p->cport == cp->dport && 407 408 ip_vs_addr_equal(p->af, p->vaddr, &cp->caddr) && 408 409 ip_vs_addr_equal(p->af, p->caddr, &cp->daddr) && 409 - p->vport == cp->cport && p->cport == cp->dport && 410 - p->protocol == cp->protocol) { 410 + p->protocol == cp->protocol && 411 + ip_vs_conn_net_eq(cp, p->net)) { 411 412 /* HIT */ 412 413 atomic_inc(&cp->refcnt); 413 414 ret = cp; ··· 429 428 430 429 struct ip_vs_conn * 431 430 ip_vs_conn_out_get_proto(int af, const struct sk_buff *skb, 432 - struct ip_vs_protocol *pp, 433 431 const struct ip_vs_iphdr *iph, 434 432 unsigned int proto_off, int inverse) 435 433 { ··· 611 611 struct ip_vs_dest *dest; 612 612 613 613 if ((cp) && (!cp->dest)) { 614 - dest = ip_vs_find_dest(cp->af, &cp->daddr, cp->dport, 615 - &cp->vaddr, cp->vport, 616 - cp->protocol); 614 + dest = ip_vs_find_dest(ip_vs_conn_net(cp), cp->af, &cp->daddr, 615 + cp->dport, &cp->vaddr, cp->vport, 616 + cp->protocol, cp->fwmark); 617 617 ip_vs_bind_dest(cp, dest); 618 618 return dest; 619 619 } else ··· 686 686 int ip_vs_check_template(struct ip_vs_conn *ct) 687 687 { 688 688 struct ip_vs_dest *dest = ct->dest; 689 + struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(ct)); 689 690 690 691 /* 691 692 * Checking the dest server status. 692 693 */ 693 694 if ((dest == NULL) || 694 695 !(dest->flags & IP_VS_DEST_F_AVAILABLE) || 695 - (sysctl_ip_vs_expire_quiescent_template && 696 + (ipvs->sysctl_expire_quiescent_template && 696 697 (atomic_read(&dest->weight) == 0))) { 697 698 IP_VS_DBG_BUF(9, "check_template: dest not available for " 698 699 "protocol %s s:%s:%d v:%s:%d " ··· 731 730 static void ip_vs_conn_expire(unsigned long data) 732 731 { 733 732 struct ip_vs_conn *cp = (struct ip_vs_conn *)data; 733 + struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(cp)); 734 734 735 735 cp->timeout = 60*HZ; 736 736 ··· 767 765 if (cp->flags & IP_VS_CONN_F_NFCT) 768 766 ip_vs_conn_drop_conntrack(cp); 769 767 768 + ip_vs_pe_put(cp->pe); 770 769 kfree(cp->pe_data); 771 770 if (unlikely(cp->app != NULL)) 772 771 ip_vs_unbind_app(cp); 773 772 ip_vs_unbind_dest(cp); 774 773 if (cp->flags & IP_VS_CONN_F_NO_CPORT) 775 774 atomic_dec(&ip_vs_conn_no_cport_cnt); 776 - atomic_dec(&ip_vs_conn_count); 775 + atomic_dec(&ipvs->conn_count); 777 776 778 777 kmem_cache_free(ip_vs_conn_cachep, cp); 779 778 return; ··· 805 802 struct ip_vs_conn * 806 803 ip_vs_conn_new(const struct ip_vs_conn_param *p, 807 804 const union nf_inet_addr *daddr, __be16 dport, unsigned flags, 808 - struct ip_vs_dest *dest) 805 + struct ip_vs_dest *dest, __u32 fwmark) 809 806 { 810 807 struct ip_vs_conn *cp; 811 - struct ip_vs_protocol *pp = ip_vs_proto_get(p->protocol); 808 + struct netns_ipvs *ipvs = net_ipvs(p->net); 809 + struct ip_vs_proto_data *pd = ip_vs_proto_data_get(p->net, 810 + p->protocol); 812 811 813 812 cp = kmem_cache_zalloc(ip_vs_conn_cachep, GFP_ATOMIC); 814 813 if (cp == NULL) { ··· 820 815 821 816 INIT_LIST_HEAD(&cp->c_list); 822 817 setup_timer(&cp->timer, ip_vs_conn_expire, (unsigned long)cp); 818 + ip_vs_conn_net_set(cp, p->net); 823 819 cp->af = p->af; 824 820 cp->protocol = p->protocol; 825 821 ip_vs_addr_copy(p->af, &cp->caddr, p->caddr); ··· 832 826 &cp->daddr, daddr); 833 827 cp->dport = dport; 834 828 cp->flags = flags; 835 - if (flags & IP_VS_CONN_F_TEMPLATE && p->pe_data) { 829 + cp->fwmark = fwmark; 830 + if (flags & IP_VS_CONN_F_TEMPLATE && p->pe) { 831 + ip_vs_pe_get(p->pe); 832 + cp->pe = p->pe; 836 833 cp->pe_data = p->pe_data; 837 834 cp->pe_data_len = p->pe_data_len; 838 835 } ··· 851 842 atomic_set(&cp->n_control, 0); 852 843 atomic_set(&cp->in_pkts, 0); 853 844 854 - atomic_inc(&ip_vs_conn_count); 845 + atomic_inc(&ipvs->conn_count); 855 846 if (flags & IP_VS_CONN_F_NO_CPORT) 856 847 atomic_inc(&ip_vs_conn_no_cport_cnt); 857 848 ··· 870 861 #endif 871 862 ip_vs_bind_xmit(cp); 872 863 873 - if (unlikely(pp && atomic_read(&pp->appcnt))) 874 - ip_vs_bind_app(cp, pp); 864 + if (unlikely(pd && atomic_read(&pd->appcnt))) 865 + ip_vs_bind_app(cp, pd->pp); 875 866 876 867 /* 877 868 * Allow conntrack to be preserved. By default, conntrack ··· 880 871 * IP_VS_CONN_F_ONE_PACKET too. 881 872 */ 882 873 883 - if (ip_vs_conntrack_enabled()) 874 + if (ip_vs_conntrack_enabled(ipvs)) 884 875 cp->flags |= IP_VS_CONN_F_NFCT; 885 876 886 877 /* Hash it in the ip_vs_conn_tab finally */ ··· 893 884 * /proc/net/ip_vs_conn entries 894 885 */ 895 886 #ifdef CONFIG_PROC_FS 887 + struct ip_vs_iter_state { 888 + struct seq_net_private p; 889 + struct list_head *l; 890 + }; 896 891 897 892 static void *ip_vs_conn_array(struct seq_file *seq, loff_t pos) 898 893 { 899 894 int idx; 900 895 struct ip_vs_conn *cp; 896 + struct ip_vs_iter_state *iter = seq->private; 901 897 902 898 for (idx = 0; idx < ip_vs_conn_tab_size; idx++) { 903 899 ct_read_lock_bh(idx); 904 900 list_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) { 905 901 if (pos-- == 0) { 906 - seq->private = &ip_vs_conn_tab[idx]; 902 + iter->l = &ip_vs_conn_tab[idx]; 907 903 return cp; 908 904 } 909 905 } ··· 920 906 921 907 static void *ip_vs_conn_seq_start(struct seq_file *seq, loff_t *pos) 922 908 { 923 - seq->private = NULL; 909 + struct ip_vs_iter_state *iter = seq->private; 910 + 911 + iter->l = NULL; 924 912 return *pos ? ip_vs_conn_array(seq, *pos - 1) :SEQ_START_TOKEN; 925 913 } 926 914 927 915 static void *ip_vs_conn_seq_next(struct seq_file *seq, void *v, loff_t *pos) 928 916 { 929 917 struct ip_vs_conn *cp = v; 930 - struct list_head *e, *l = seq->private; 918 + struct ip_vs_iter_state *iter = seq->private; 919 + struct list_head *e, *l = iter->l; 931 920 int idx; 932 921 933 922 ++*pos; ··· 947 930 while (++idx < ip_vs_conn_tab_size) { 948 931 ct_read_lock_bh(idx); 949 932 list_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) { 950 - seq->private = &ip_vs_conn_tab[idx]; 933 + iter->l = &ip_vs_conn_tab[idx]; 951 934 return cp; 952 935 } 953 936 ct_read_unlock_bh(idx); 954 937 } 955 - seq->private = NULL; 938 + iter->l = NULL; 956 939 return NULL; 957 940 } 958 941 959 942 static void ip_vs_conn_seq_stop(struct seq_file *seq, void *v) 960 943 { 961 - struct list_head *l = seq->private; 944 + struct ip_vs_iter_state *iter = seq->private; 945 + struct list_head *l = iter->l; 962 946 963 947 if (l) 964 948 ct_read_unlock_bh(l - ip_vs_conn_tab); ··· 973 955 "Pro FromIP FPrt ToIP TPrt DestIP DPrt State Expires PEName PEData\n"); 974 956 else { 975 957 const struct ip_vs_conn *cp = v; 958 + struct net *net = seq_file_net(seq); 976 959 char pe_data[IP_VS_PENAME_MAXLEN + IP_VS_PEDATA_MAXLEN + 3]; 977 960 size_t len = 0; 978 961 979 - if (cp->dest && cp->pe_data && 980 - cp->dest->svc->pe->show_pe_data) { 962 + if (!ip_vs_conn_net_eq(cp, net)) 963 + return 0; 964 + if (cp->pe_data) { 981 965 pe_data[0] = ' '; 982 - len = strlen(cp->dest->svc->pe->name); 983 - memcpy(pe_data + 1, cp->dest->svc->pe->name, len); 966 + len = strlen(cp->pe->name); 967 + memcpy(pe_data + 1, cp->pe->name, len); 984 968 pe_data[len + 1] = ' '; 985 969 len += 2; 986 - len += cp->dest->svc->pe->show_pe_data(cp, 987 - pe_data + len); 970 + len += cp->pe->show_pe_data(cp, pe_data + len); 988 971 } 989 972 pe_data[len] = '\0'; 990 973 ··· 1023 1004 1024 1005 static int ip_vs_conn_open(struct inode *inode, struct file *file) 1025 1006 { 1026 - return seq_open(file, &ip_vs_conn_seq_ops); 1007 + return seq_open_net(inode, file, &ip_vs_conn_seq_ops, 1008 + sizeof(struct ip_vs_iter_state)); 1027 1009 } 1028 1010 1029 1011 static const struct file_operations ip_vs_conn_fops = { ··· 1051 1031 "Pro FromIP FPrt ToIP TPrt DestIP DPrt State Origin Expires\n"); 1052 1032 else { 1053 1033 const struct ip_vs_conn *cp = v; 1034 + struct net *net = seq_file_net(seq); 1035 + 1036 + if (!ip_vs_conn_net_eq(cp, net)) 1037 + return 0; 1054 1038 1055 1039 #ifdef CONFIG_IP_VS_IPV6 1056 1040 if (cp->af == AF_INET6) ··· 1091 1067 1092 1068 static int ip_vs_conn_sync_open(struct inode *inode, struct file *file) 1093 1069 { 1094 - return seq_open(file, &ip_vs_conn_sync_seq_ops); 1070 + return seq_open_net(inode, file, &ip_vs_conn_sync_seq_ops, 1071 + sizeof(struct ip_vs_iter_state)); 1095 1072 } 1096 1073 1097 1074 static const struct file_operations ip_vs_conn_sync_fops = { ··· 1138 1113 } 1139 1114 1140 1115 /* Called from keventd and must protect itself from softirqs */ 1141 - void ip_vs_random_dropentry(void) 1116 + void ip_vs_random_dropentry(struct net *net) 1142 1117 { 1143 1118 int idx; 1144 1119 struct ip_vs_conn *cp; ··· 1158 1133 if (cp->flags & IP_VS_CONN_F_TEMPLATE) 1159 1134 /* connection template */ 1160 1135 continue; 1161 - 1136 + if (!ip_vs_conn_net_eq(cp, net)) 1137 + continue; 1162 1138 if (cp->protocol == IPPROTO_TCP) { 1163 1139 switch(cp->state) { 1164 1140 case IP_VS_TCP_S_SYN_RECV: ··· 1194 1168 /* 1195 1169 * Flush all the connection entries in the ip_vs_conn_tab 1196 1170 */ 1197 - static void ip_vs_conn_flush(void) 1171 + static void ip_vs_conn_flush(struct net *net) 1198 1172 { 1199 1173 int idx; 1200 1174 struct ip_vs_conn *cp; 1175 + struct netns_ipvs *ipvs = net_ipvs(net); 1201 1176 1202 - flush_again: 1177 + flush_again: 1203 1178 for (idx = 0; idx < ip_vs_conn_tab_size; idx++) { 1204 1179 /* 1205 1180 * Lock is actually needed in this loop. ··· 1208 1181 ct_write_lock_bh(idx); 1209 1182 1210 1183 list_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) { 1211 - 1184 + if (!ip_vs_conn_net_eq(cp, net)) 1185 + continue; 1212 1186 IP_VS_DBG(4, "del connection\n"); 1213 1187 ip_vs_conn_expire_now(cp); 1214 1188 if (cp->control) { ··· 1222 1194 1223 1195 /* the counter may be not NULL, because maybe some conn entries 1224 1196 are run by slow timer handler or unhashed but still referred */ 1225 - if (atomic_read(&ip_vs_conn_count) != 0) { 1197 + if (atomic_read(&ipvs->conn_count) != 0) { 1226 1198 schedule(); 1227 1199 goto flush_again; 1228 1200 } 1229 1201 } 1202 + /* 1203 + * per netns init and exit 1204 + */ 1205 + int __net_init __ip_vs_conn_init(struct net *net) 1206 + { 1207 + struct netns_ipvs *ipvs = net_ipvs(net); 1230 1208 1209 + atomic_set(&ipvs->conn_count, 0); 1210 + 1211 + proc_net_fops_create(net, "ip_vs_conn", 0, &ip_vs_conn_fops); 1212 + proc_net_fops_create(net, "ip_vs_conn_sync", 0, &ip_vs_conn_sync_fops); 1213 + return 0; 1214 + } 1215 + 1216 + static void __net_exit __ip_vs_conn_cleanup(struct net *net) 1217 + { 1218 + /* flush all the connection entries first */ 1219 + ip_vs_conn_flush(net); 1220 + proc_net_remove(net, "ip_vs_conn"); 1221 + proc_net_remove(net, "ip_vs_conn_sync"); 1222 + } 1223 + static struct pernet_operations ipvs_conn_ops = { 1224 + .init = __ip_vs_conn_init, 1225 + .exit = __ip_vs_conn_cleanup, 1226 + }; 1231 1227 1232 1228 int __init ip_vs_conn_init(void) 1233 1229 { 1234 1230 int idx; 1231 + int retc; 1235 1232 1236 1233 /* Compute size and mask */ 1237 1234 ip_vs_conn_tab_size = 1 << ip_vs_conn_tab_bits; ··· 1294 1241 rwlock_init(&__ip_vs_conntbl_lock_array[idx].l); 1295 1242 } 1296 1243 1297 - proc_net_fops_create(&init_net, "ip_vs_conn", 0, &ip_vs_conn_fops); 1298 - proc_net_fops_create(&init_net, "ip_vs_conn_sync", 0, &ip_vs_conn_sync_fops); 1244 + retc = register_pernet_subsys(&ipvs_conn_ops); 1299 1245 1300 1246 /* calculate the random value for connection hash */ 1301 1247 get_random_bytes(&ip_vs_conn_rnd, sizeof(ip_vs_conn_rnd)); 1302 1248 1303 - return 0; 1249 + return retc; 1304 1250 } 1305 - 1306 1251 1307 1252 void ip_vs_conn_cleanup(void) 1308 1253 { 1309 - /* flush all the connection entries first */ 1310 - ip_vs_conn_flush(); 1311 - 1254 + unregister_pernet_subsys(&ipvs_conn_ops); 1312 1255 /* Release the empty cache */ 1313 1256 kmem_cache_destroy(ip_vs_conn_cachep); 1314 - proc_net_remove(&init_net, "ip_vs_conn"); 1315 - proc_net_remove(&init_net, "ip_vs_conn_sync"); 1316 1257 vfree(ip_vs_conn_tab); 1317 1258 }

+253 -117

net/netfilter/ipvs/ip_vs_core.c

··· 41 41 #include <net/icmp.h> /* for icmp_send */ 42 42 #include <net/route.h> 43 43 #include <net/ip6_checksum.h> 44 + #include <net/netns/generic.h> /* net_generic() */ 44 45 45 46 #include <linux/netfilter.h> 46 47 #include <linux/netfilter_ipv4.h> ··· 69 68 EXPORT_SYMBOL(ip_vs_get_debug_level); 70 69 #endif 71 70 71 + int ip_vs_net_id __read_mostly; 72 + #ifdef IP_VS_GENERIC_NETNS 73 + EXPORT_SYMBOL(ip_vs_net_id); 74 + #endif 75 + /* netns cnt used for uniqueness */ 76 + static atomic_t ipvs_netns_cnt = ATOMIC_INIT(0); 72 77 73 78 /* ID used in ICMP lookups */ 74 79 #define icmp_id(icmph) (((icmph)->un).echo.id) ··· 115 108 ip_vs_in_stats(struct ip_vs_conn *cp, struct sk_buff *skb) 116 109 { 117 110 struct ip_vs_dest *dest = cp->dest; 111 + struct netns_ipvs *ipvs = net_ipvs(skb_net(skb)); 112 + 118 113 if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) { 119 - spin_lock(&dest->stats.lock); 120 - dest->stats.ustats.inpkts++; 121 - dest->stats.ustats.inbytes += skb->len; 122 - spin_unlock(&dest->stats.lock); 114 + struct ip_vs_cpu_stats *s; 123 115 124 - spin_lock(&dest->svc->stats.lock); 125 - dest->svc->stats.ustats.inpkts++; 126 - dest->svc->stats.ustats.inbytes += skb->len; 127 - spin_unlock(&dest->svc->stats.lock); 116 + s = this_cpu_ptr(dest->stats.cpustats); 117 + s->ustats.inpkts++; 118 + u64_stats_update_begin(&s->syncp); 119 + s->ustats.inbytes += skb->len; 120 + u64_stats_update_end(&s->syncp); 128 121 129 - spin_lock(&ip_vs_stats.lock); 130 - ip_vs_stats.ustats.inpkts++; 131 - ip_vs_stats.ustats.inbytes += skb->len; 132 - spin_unlock(&ip_vs_stats.lock); 122 + s = this_cpu_ptr(dest->svc->stats.cpustats); 123 + s->ustats.inpkts++; 124 + u64_stats_update_begin(&s->syncp); 125 + s->ustats.inbytes += skb->len; 126 + u64_stats_update_end(&s->syncp); 127 + 128 + s = this_cpu_ptr(ipvs->cpustats); 129 + s->ustats.inpkts++; 130 + u64_stats_update_begin(&s->syncp); 131 + s->ustats.inbytes += skb->len; 132 + u64_stats_update_end(&s->syncp); 133 133 } 134 134 } 135 135 ··· 145 131 ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb) 146 132 { 147 133 struct ip_vs_dest *dest = cp->dest; 134 + struct netns_ipvs *ipvs = net_ipvs(skb_net(skb)); 135 + 148 136 if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) { 149 - spin_lock(&dest->stats.lock); 150 - dest->stats.ustats.outpkts++; 151 - dest->stats.ustats.outbytes += skb->len; 152 - spin_unlock(&dest->stats.lock); 137 + struct ip_vs_cpu_stats *s; 153 138 154 - spin_lock(&dest->svc->stats.lock); 155 - dest->svc->stats.ustats.outpkts++; 156 - dest->svc->stats.ustats.outbytes += skb->len; 157 - spin_unlock(&dest->svc->stats.lock); 139 + s = this_cpu_ptr(dest->stats.cpustats); 140 + s->ustats.outpkts++; 141 + u64_stats_update_begin(&s->syncp); 142 + s->ustats.outbytes += skb->len; 143 + u64_stats_update_end(&s->syncp); 158 144 159 - spin_lock(&ip_vs_stats.lock); 160 - ip_vs_stats.ustats.outpkts++; 161 - ip_vs_stats.ustats.outbytes += skb->len; 162 - spin_unlock(&ip_vs_stats.lock); 145 + s = this_cpu_ptr(dest->svc->stats.cpustats); 146 + s->ustats.outpkts++; 147 + u64_stats_update_begin(&s->syncp); 148 + s->ustats.outbytes += skb->len; 149 + u64_stats_update_end(&s->syncp); 150 + 151 + s = this_cpu_ptr(ipvs->cpustats); 152 + s->ustats.outpkts++; 153 + u64_stats_update_begin(&s->syncp); 154 + s->ustats.outbytes += skb->len; 155 + u64_stats_update_end(&s->syncp); 163 156 } 164 157 } 165 158 ··· 174 153 static inline void 175 154 ip_vs_conn_stats(struct ip_vs_conn *cp, struct ip_vs_service *svc) 176 155 { 177 - spin_lock(&cp->dest->stats.lock); 178 - cp->dest->stats.ustats.conns++; 179 - spin_unlock(&cp->dest->stats.lock); 156 + struct netns_ipvs *ipvs = net_ipvs(svc->net); 157 + struct ip_vs_cpu_stats *s; 180 158 181 - spin_lock(&svc->stats.lock); 182 - svc->stats.ustats.conns++; 183 - spin_unlock(&svc->stats.lock); 159 + s = this_cpu_ptr(cp->dest->stats.cpustats); 160 + s->ustats.conns++; 184 161 185 - spin_lock(&ip_vs_stats.lock); 186 - ip_vs_stats.ustats.conns++; 187 - spin_unlock(&ip_vs_stats.lock); 162 + s = this_cpu_ptr(svc->stats.cpustats); 163 + s->ustats.conns++; 164 + 165 + s = this_cpu_ptr(ipvs->cpustats); 166 + s->ustats.conns++; 188 167 } 189 168 190 169 191 170 static inline int 192 171 ip_vs_set_state(struct ip_vs_conn *cp, int direction, 193 172 const struct sk_buff *skb, 194 - struct ip_vs_protocol *pp) 173 + struct ip_vs_proto_data *pd) 195 174 { 196 - if (unlikely(!pp->state_transition)) 175 + if (unlikely(!pd->pp->state_transition)) 197 176 return 0; 198 - return pp->state_transition(cp, direction, skb, pp); 177 + return pd->pp->state_transition(cp, direction, skb, pd); 199 178 } 200 179 201 - static inline void 180 + static inline int 202 181 ip_vs_conn_fill_param_persist(const struct ip_vs_service *svc, 203 182 struct sk_buff *skb, int protocol, 204 183 const union nf_inet_addr *caddr, __be16 cport, 205 184 const union nf_inet_addr *vaddr, __be16 vport, 206 185 struct ip_vs_conn_param *p) 207 186 { 208 - ip_vs_conn_fill_param(svc->af, protocol, caddr, cport, vaddr, vport, p); 187 + ip_vs_conn_fill_param(svc->net, svc->af, protocol, caddr, cport, vaddr, 188 + vport, p); 209 189 p->pe = svc->pe; 210 190 if (p->pe && p->pe->fill_param) 211 - p->pe->fill_param(p, skb); 191 + return p->pe->fill_param(p, skb); 192 + 193 + return 0; 212 194 } 213 195 214 196 /* ··· 224 200 static struct ip_vs_conn * 225 201 ip_vs_sched_persist(struct ip_vs_service *svc, 226 202 struct sk_buff *skb, 227 - __be16 ports[2]) 203 + __be16 src_port, __be16 dst_port, int *ignored) 228 204 { 229 205 struct ip_vs_conn *cp = NULL; 230 206 struct ip_vs_iphdr iph; ··· 248 224 249 225 IP_VS_DBG_BUF(6, "p-schedule: src %s:%u dest %s:%u " 250 226 "mnet %s\n", 251 - IP_VS_DBG_ADDR(svc->af, &iph.saddr), ntohs(ports[0]), 252 - IP_VS_DBG_ADDR(svc->af, &iph.daddr), ntohs(ports[1]), 227 + IP_VS_DBG_ADDR(svc->af, &iph.saddr), ntohs(src_port), 228 + IP_VS_DBG_ADDR(svc->af, &iph.daddr), ntohs(dst_port), 253 229 IP_VS_DBG_ADDR(svc->af, &snet)); 254 230 255 231 /* ··· 271 247 const union nf_inet_addr fwmark = { .ip = htonl(svc->fwmark) }; 272 248 __be16 vport = 0; 273 249 274 - if (ports[1] == svc->port) { 250 + if (dst_port == svc->port) { 275 251 /* non-FTP template: 276 252 * <protocol, caddr, 0, vaddr, vport, daddr, dport> 277 253 * FTP template: 278 254 * <protocol, caddr, 0, vaddr, 0, daddr, 0> 279 255 */ 280 256 if (svc->port != FTPPORT) 281 - vport = ports[1]; 257 + vport = dst_port; 282 258 } else { 283 259 /* Note: persistent fwmark-based services and 284 260 * persistent port zero service are handled here. ··· 292 268 vaddr = &fwmark; 293 269 } 294 270 } 295 - ip_vs_conn_fill_param_persist(svc, skb, protocol, &snet, 0, 296 - vaddr, vport, &param); 271 + /* return *ignored = -1 so NF_DROP can be used */ 272 + if (ip_vs_conn_fill_param_persist(svc, skb, protocol, &snet, 0, 273 + vaddr, vport, &param) < 0) { 274 + *ignored = -1; 275 + return NULL; 276 + } 297 277 } 298 278 299 279 /* Check if a template already exists */ 300 280 ct = ip_vs_ct_in_get(&param); 301 281 if (!ct || !ip_vs_check_template(ct)) { 302 - /* No template found or the dest of the connection 282 + /* 283 + * No template found or the dest of the connection 303 284 * template is not available. 285 + * return *ignored=0 i.e. ICMP and NF_DROP 304 286 */ 305 287 dest = svc->scheduler->schedule(svc, skb); 306 288 if (!dest) { 307 289 IP_VS_DBG(1, "p-schedule: no dest found.\n"); 308 290 kfree(param.pe_data); 291 + *ignored = 0; 309 292 return NULL; 310 293 } 311 294 312 - if (ports[1] == svc->port && svc->port != FTPPORT) 295 + if (dst_port == svc->port && svc->port != FTPPORT) 313 296 dport = dest->port; 314 297 315 298 /* Create a template ··· 324 293 * and thus param.pe_data will be destroyed 325 294 * when the template expires */ 326 295 ct = ip_vs_conn_new(&param, &dest->addr, dport, 327 - IP_VS_CONN_F_TEMPLATE, dest); 296 + IP_VS_CONN_F_TEMPLATE, dest, skb->mark); 328 297 if (ct == NULL) { 329 298 kfree(param.pe_data); 299 + *ignored = -1; 330 300 return NULL; 331 301 } 332 302 ··· 338 306 kfree(param.pe_data); 339 307 } 340 308 341 - dport = ports[1]; 309 + dport = dst_port; 342 310 if (dport == svc->port && dest->port) 343 311 dport = dest->port; 344 312 ··· 349 317 /* 350 318 * Create a new connection according to the template 351 319 */ 352 - ip_vs_conn_fill_param(svc->af, iph.protocol, &iph.saddr, ports[0], 353 - &iph.daddr, ports[1], &param); 354 - cp = ip_vs_conn_new(&param, &dest->addr, dport, flags, dest); 320 + ip_vs_conn_fill_param(svc->net, svc->af, iph.protocol, &iph.saddr, 321 + src_port, &iph.daddr, dst_port, &param); 322 + 323 + cp = ip_vs_conn_new(&param, &dest->addr, dport, flags, dest, skb->mark); 355 324 if (cp == NULL) { 356 325 ip_vs_conn_put(ct); 326 + *ignored = -1; 357 327 return NULL; 358 328 } 359 329 ··· 375 341 * It selects a server according to the virtual service, and 376 342 * creates a connection entry. 377 343 * Protocols supported: TCP, UDP 344 + * 345 + * Usage of *ignored 346 + * 347 + * 1 : protocol tried to schedule (eg. on SYN), found svc but the 348 + * svc/scheduler decides that this packet should be accepted with 349 + * NF_ACCEPT because it must not be scheduled. 350 + * 351 + * 0 : scheduler can not find destination, so try bypass or 352 + * return ICMP and then NF_DROP (ip_vs_leave). 353 + * 354 + * -1 : scheduler tried to schedule but fatal error occurred, eg. 355 + * ip_vs_conn_new failure (ENOMEM) or ip_vs_sip_fill_param 356 + * failure such as missing Call-ID, ENOMEM on skb_linearize 357 + * or pe_data. In this case we should return NF_DROP without 358 + * any attempts to send ICMP with ip_vs_leave. 378 359 */ 379 360 struct ip_vs_conn * 380 361 ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, 381 - struct ip_vs_protocol *pp, int *ignored) 362 + struct ip_vs_proto_data *pd, int *ignored) 382 363 { 364 + struct ip_vs_protocol *pp = pd->pp; 383 365 struct ip_vs_conn *cp = NULL; 384 366 struct ip_vs_iphdr iph; 385 367 struct ip_vs_dest *dest; ··· 421 371 } 422 372 423 373 /* 424 - * Do not schedule replies from local real server. It is risky 425 - * for fwmark services but mostly for persistent services. 374 + * Do not schedule replies from local real server. 426 375 */ 427 376 if ((!skb->dev || skb->dev->flags & IFF_LOOPBACK) && 428 - (svc->flags & IP_VS_SVC_F_PERSISTENT || svc->fwmark) && 429 - (cp = pp->conn_in_get(svc->af, skb, pp, &iph, iph.len, 1))) { 377 + (cp = pp->conn_in_get(svc->af, skb, &iph, iph.len, 1))) { 430 378 IP_VS_DBG_PKT(12, svc->af, pp, skb, 0, 431 379 "Not scheduling reply for existing connection"); 432 380 __ip_vs_conn_put(cp); ··· 434 386 /* 435 387 * Persistent service 436 388 */ 437 - if (svc->flags & IP_VS_SVC_F_PERSISTENT) { 438 - *ignored = 0; 439 - return ip_vs_sched_persist(svc, skb, pptr); 440 - } 389 + if (svc->flags & IP_VS_SVC_F_PERSISTENT) 390 + return ip_vs_sched_persist(svc, skb, pptr[0], pptr[1], ignored); 391 + 392 + *ignored = 0; 441 393 442 394 /* 443 395 * Non-persistent service ··· 449 401 "check your ipvs configuration\n"); 450 402 return NULL; 451 403 } 452 - 453 - *ignored = 0; 454 404 455 405 dest = svc->scheduler->schedule(svc, skb); 456 406 if (dest == NULL) { ··· 465 419 */ 466 420 { 467 421 struct ip_vs_conn_param p; 468 - ip_vs_conn_fill_param(svc->af, iph.protocol, &iph.saddr, 469 - pptr[0], &iph.daddr, pptr[1], &p); 422 + 423 + ip_vs_conn_fill_param(svc->net, svc->af, iph.protocol, 424 + &iph.saddr, pptr[0], &iph.daddr, pptr[1], 425 + &p); 470 426 cp = ip_vs_conn_new(&p, &dest->addr, 471 427 dest->port ? dest->port : pptr[1], 472 - flags, dest); 473 - if (!cp) 428 + flags, dest, skb->mark); 429 + if (!cp) { 430 + *ignored = -1; 474 431 return NULL; 432 + } 475 433 } 476 434 477 435 IP_VS_DBG_BUF(6, "Schedule fwd:%c c:%s:%u v:%s:%u " ··· 497 447 * no destination is available for a new connection. 498 448 */ 499 449 int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, 500 - struct ip_vs_protocol *pp) 450 + struct ip_vs_proto_data *pd) 501 451 { 452 + struct net *net; 453 + struct netns_ipvs *ipvs; 502 454 __be16 _ports[2], *pptr; 503 455 struct ip_vs_iphdr iph; 504 456 int unicast; 457 + 505 458 ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph); 506 459 507 460 pptr = skb_header_pointer(skb, iph.len, sizeof(_ports), _ports); ··· 512 459 ip_vs_service_put(svc); 513 460 return NF_DROP; 514 461 } 462 + net = skb_net(skb); 515 463 516 464 #ifdef CONFIG_IP_VS_IPV6 517 465 if (svc->af == AF_INET6) 518 466 unicast = ipv6_addr_type(&iph.daddr.in6) & IPV6_ADDR_UNICAST; 519 467 else 520 468 #endif 521 - unicast = (inet_addr_type(&init_net, iph.daddr.ip) == RTN_UNICAST); 469 + unicast = (inet_addr_type(net, iph.daddr.ip) == RTN_UNICAST); 522 470 523 471 /* if it is fwmark-based service, the cache_bypass sysctl is up 524 472 and the destination is a non-local unicast, then create 525 473 a cache_bypass connection entry */ 526 - if (sysctl_ip_vs_cache_bypass && svc->fwmark && unicast) { 474 + ipvs = net_ipvs(net); 475 + if (ipvs->sysctl_cache_bypass && svc->fwmark && unicast) { 527 476 int ret, cs; 528 477 struct ip_vs_conn *cp; 529 478 unsigned int flags = (svc->flags & IP_VS_SVC_F_ONEPACKET && ··· 539 484 IP_VS_DBG(6, "%s(): create a cache_bypass entry\n", __func__); 540 485 { 541 486 struct ip_vs_conn_param p; 542 - ip_vs_conn_fill_param(svc->af, iph.protocol, 487 + ip_vs_conn_fill_param(svc->net, svc->af, iph.protocol, 543 488 &iph.saddr, pptr[0], 544 489 &iph.daddr, pptr[1], &p); 545 490 cp = ip_vs_conn_new(&p, &daddr, 0, 546 491 IP_VS_CONN_F_BYPASS | flags, 547 - NULL); 492 + NULL, skb->mark); 548 493 if (!cp) 549 494 return NF_DROP; 550 495 } ··· 553 498 ip_vs_in_stats(cp, skb); 554 499 555 500 /* set state */ 556 - cs = ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pp); 501 + cs = ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pd); 557 502 558 503 /* transmit the first SYN packet */ 559 - ret = cp->packet_xmit(skb, cp, pp); 504 + ret = cp->packet_xmit(skb, cp, pd->pp); 560 505 /* do not touch skb anymore */ 561 506 562 507 atomic_inc(&cp->in_pkts); ··· 737 682 struct ip_vs_protocol *pp, 738 683 unsigned int offset, unsigned int ihl) 739 684 { 685 + struct netns_ipvs *ipvs; 740 686 unsigned int verdict = NF_DROP; 741 687 742 688 if (IP_VS_FWD_METHOD(cp) != 0) { ··· 759 703 if (!skb_make_writable(skb, offset)) 760 704 goto out; 761 705 706 + ipvs = net_ipvs(skb_net(skb)); 707 + 762 708 #ifdef CONFIG_IP_VS_IPV6 763 709 if (af == AF_INET6) 764 710 ip_vs_nat_icmp_v6(skb, pp, cp, 1); ··· 770 712 771 713 #ifdef CONFIG_IP_VS_IPV6 772 714 if (af == AF_INET6) { 773 - if (sysctl_ip_vs_snat_reroute && ip6_route_me_harder(skb) != 0) 715 + if (ipvs->sysctl_snat_reroute && ip6_route_me_harder(skb) != 0) 774 716 goto out; 775 717 } else 776 718 #endif 777 - if ((sysctl_ip_vs_snat_reroute || 719 + if ((ipvs->sysctl_snat_reroute || 778 720 skb_rtable(skb)->rt_flags & RTCF_LOCAL) && 779 721 ip_route_me_harder(skb, RTN_LOCAL) != 0) 780 722 goto out; ··· 866 808 867 809 ip_vs_fill_iphdr(AF_INET, cih, &ciph); 868 810 /* The embedded headers contain source and dest in reverse order */ 869 - cp = pp->conn_out_get(AF_INET, skb, pp, &ciph, offset, 1); 811 + cp = pp->conn_out_get(AF_INET, skb, &ciph, offset, 1); 870 812 if (!cp) 871 813 return NF_ACCEPT; 872 814 ··· 943 885 944 886 ip_vs_fill_iphdr(AF_INET6, cih, &ciph); 945 887 /* The embedded headers contain source and dest in reverse order */ 946 - cp = pp->conn_out_get(AF_INET6, skb, pp, &ciph, offset, 1); 888 + cp = pp->conn_out_get(AF_INET6, skb, &ciph, offset, 1); 947 889 if (!cp) 948 890 return NF_ACCEPT; 949 891 ··· 982 924 * Used for NAT and local client. 983 925 */ 984 926 static unsigned int 985 - handle_response(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, 927 + handle_response(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, 986 928 struct ip_vs_conn *cp, int ihl) 987 929 { 930 + struct ip_vs_protocol *pp = pd->pp; 931 + struct netns_ipvs *ipvs; 932 + 988 933 IP_VS_DBG_PKT(11, af, pp, skb, 0, "Outgoing packet"); 989 934 990 935 if (!skb_make_writable(skb, ihl)) ··· 1022 961 * if it came from this machine itself. So re-compute 1023 962 * the routing information. 1024 963 */ 964 + ipvs = net_ipvs(skb_net(skb)); 965 + 1025 966 #ifdef CONFIG_IP_VS_IPV6 1026 967 if (af == AF_INET6) { 1027 - if (sysctl_ip_vs_snat_reroute && ip6_route_me_harder(skb) != 0) 968 + if (ipvs->sysctl_snat_reroute && ip6_route_me_harder(skb) != 0) 1028 969 goto drop; 1029 970 } else 1030 971 #endif 1031 - if ((sysctl_ip_vs_snat_reroute || 972 + if ((ipvs->sysctl_snat_reroute || 1032 973 skb_rtable(skb)->rt_flags & RTCF_LOCAL) && 1033 974 ip_route_me_harder(skb, RTN_LOCAL) != 0) 1034 975 goto drop; ··· 1038 975 IP_VS_DBG_PKT(10, af, pp, skb, 0, "After SNAT"); 1039 976 1040 977 ip_vs_out_stats(cp, skb); 1041 - ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pp); 978 + ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pd); 1042 979 skb->ipvs_property = 1; 1043 980 if (!(cp->flags & IP_VS_CONN_F_NFCT)) 1044 981 ip_vs_notrack(skb); ··· 1062 999 static unsigned int 1063 1000 ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af) 1064 1001 { 1002 + struct net *net = NULL; 1065 1003 struct ip_vs_iphdr iph; 1066 1004 struct ip_vs_protocol *pp; 1005 + struct ip_vs_proto_data *pd; 1067 1006 struct ip_vs_conn *cp; 1007 + struct netns_ipvs *ipvs; 1068 1008 1069 1009 EnterFunction(11); 1070 1010 ··· 1088 1022 if (unlikely(!skb_dst(skb))) 1089 1023 return NF_ACCEPT; 1090 1024 1025 + net = skb_net(skb); 1091 1026 ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); 1092 1027 #ifdef CONFIG_IP_VS_IPV6 1093 1028 if (af == AF_INET6) { ··· 1112 1045 ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); 1113 1046 } 1114 1047 1115 - pp = ip_vs_proto_get(iph.protocol); 1116 - if (unlikely(!pp)) 1048 + pd = ip_vs_proto_data_get(net, iph.protocol); 1049 + if (unlikely(!pd)) 1117 1050 return NF_ACCEPT; 1051 + pp = pd->pp; 1118 1052 1119 1053 /* reassemble IP fragments */ 1120 1054 #ifdef CONFIG_IP_VS_IPV6 ··· 1141 1073 /* 1142 1074 * Check if the packet belongs to an existing entry 1143 1075 */ 1144 - cp = pp->conn_out_get(af, skb, pp, &iph, iph.len, 0); 1076 + cp = pp->conn_out_get(af, skb, &iph, iph.len, 0); 1077 + ipvs = net_ipvs(net); 1145 1078 1146 1079 if (likely(cp)) 1147 - return handle_response(af, skb, pp, cp, iph.len); 1148 - if (sysctl_ip_vs_nat_icmp_send && 1080 + return handle_response(af, skb, pd, cp, iph.len); 1081 + if (ipvs->sysctl_nat_icmp_send && 1149 1082 (pp->protocol == IPPROTO_TCP || 1150 1083 pp->protocol == IPPROTO_UDP || 1151 1084 pp->protocol == IPPROTO_SCTP)) { ··· 1156 1087 sizeof(_ports), _ports); 1157 1088 if (pptr == NULL) 1158 1089 return NF_ACCEPT; /* Not for me */ 1159 - if (ip_vs_lookup_real_service(af, iph.protocol, 1090 + if (ip_vs_lookup_real_service(net, af, iph.protocol, 1160 1091 &iph.saddr, 1161 1092 pptr[0])) { 1162 1093 /* ··· 1271 1202 static int 1272 1203 ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum) 1273 1204 { 1205 + struct net *net = NULL; 1274 1206 struct iphdr *iph; 1275 1207 struct icmphdr _icmph, *ic; 1276 1208 struct iphdr _ciph, *cih; /* The ip header contained within the ICMP */ 1277 1209 struct ip_vs_iphdr ciph; 1278 1210 struct ip_vs_conn *cp; 1279 1211 struct ip_vs_protocol *pp; 1212 + struct ip_vs_proto_data *pd; 1280 1213 unsigned int offset, ihl, verdict; 1281 1214 union nf_inet_addr snet; 1282 1215 ··· 1320 1249 if (cih == NULL) 1321 1250 return NF_ACCEPT; /* The packet looks wrong, ignore */ 1322 1251 1323 - pp = ip_vs_proto_get(cih->protocol); 1324 - if (!pp) 1252 + net = skb_net(skb); 1253 + pd = ip_vs_proto_data_get(net, cih->protocol); 1254 + if (!pd) 1325 1255 return NF_ACCEPT; 1256 + pp = pd->pp; 1326 1257 1327 1258 /* Is the embedded protocol header present? */ 1328 1259 if (unlikely(cih->frag_off & htons(IP_OFFSET) && ··· 1338 1265 1339 1266 ip_vs_fill_iphdr(AF_INET, cih, &ciph); 1340 1267 /* The embedded headers contain source and dest in reverse order */ 1341 - cp = pp->conn_in_get(AF_INET, skb, pp, &ciph, offset, 1); 1268 + cp = pp->conn_in_get(AF_INET, skb, &ciph, offset, 1); 1342 1269 if (!cp) { 1343 1270 /* The packet could also belong to a local client */ 1344 - cp = pp->conn_out_get(AF_INET, skb, pp, &ciph, offset, 1); 1271 + cp = pp->conn_out_get(AF_INET, skb, &ciph, offset, 1); 1345 1272 if (cp) { 1346 1273 snet.ip = iph->saddr; 1347 1274 return handle_response_icmp(AF_INET, skb, &snet, ··· 1385 1312 static int 1386 1313 ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum) 1387 1314 { 1315 + struct net *net = NULL; 1388 1316 struct ipv6hdr *iph; 1389 1317 struct icmp6hdr _icmph, *ic; 1390 1318 struct ipv6hdr _ciph, *cih; /* The ip header contained ··· 1393 1319 struct ip_vs_iphdr ciph; 1394 1320 struct ip_vs_conn *cp; 1395 1321 struct ip_vs_protocol *pp; 1322 + struct ip_vs_proto_data *pd; 1396 1323 unsigned int offset, verdict; 1397 1324 union nf_inet_addr snet; 1398 1325 struct rt6_info *rt; ··· 1436 1361 if (cih == NULL) 1437 1362 return NF_ACCEPT; /* The packet looks wrong, ignore */ 1438 1363 1439 - pp = ip_vs_proto_get(cih->nexthdr); 1440 - if (!pp) 1364 + net = skb_net(skb); 1365 + pd = ip_vs_proto_data_get(net, cih->nexthdr); 1366 + if (!pd) 1441 1367 return NF_ACCEPT; 1368 + pp = pd->pp; 1442 1369 1443 1370 /* Is the embedded protocol header present? */ 1444 1371 /* TODO: we don't support fragmentation at the moment anyways */ ··· 1454 1377 1455 1378 ip_vs_fill_iphdr(AF_INET6, cih, &ciph); 1456 1379 /* The embedded headers contain source and dest in reverse order */ 1457 - cp = pp->conn_in_get(AF_INET6, skb, pp, &ciph, offset, 1); 1380 + cp = pp->conn_in_get(AF_INET6, skb, &ciph, offset, 1); 1458 1381 if (!cp) { 1459 1382 /* The packet could also belong to a local client */ 1460 - cp = pp->conn_out_get(AF_INET6, skb, pp, &ciph, offset, 1); 1383 + cp = pp->conn_out_get(AF_INET6, skb, &ciph, offset, 1); 1461 1384 if (cp) { 1462 1385 ipv6_addr_copy(&snet.in6, &iph->saddr); 1463 1386 return handle_response_icmp(AF_INET6, skb, &snet, ··· 1500 1423 static unsigned int 1501 1424 ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) 1502 1425 { 1426 + struct net *net; 1503 1427 struct ip_vs_iphdr iph; 1504 1428 struct ip_vs_protocol *pp; 1429 + struct ip_vs_proto_data *pd; 1505 1430 struct ip_vs_conn *cp; 1506 1431 int ret, restart, pkts; 1432 + struct netns_ipvs *ipvs; 1507 1433 1508 1434 /* Already marked as IPVS request or reply? */ 1509 1435 if (skb->ipvs_property) ··· 1560 1480 ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); 1561 1481 } 1562 1482 1483 + net = skb_net(skb); 1563 1484 /* Protocol supported? */ 1564 - pp = ip_vs_proto_get(iph.protocol); 1565 - if (unlikely(!pp)) 1485 + pd = ip_vs_proto_data_get(net, iph.protocol); 1486 + if (unlikely(!pd)) 1566 1487 return NF_ACCEPT; 1567 - 1488 + pp = pd->pp; 1568 1489 /* 1569 1490 * Check if the packet belongs to an existing connection entry 1570 1491 */ 1571 - cp = pp->conn_in_get(af, skb, pp, &iph, iph.len, 0); 1492 + cp = pp->conn_in_get(af, skb, &iph, iph.len, 0); 1572 1493 1573 1494 if (unlikely(!cp)) { 1574 1495 int v; 1575 1496 1576 - if (!pp->conn_schedule(af, skb, pp, &v, &cp)) 1497 + if (!pp->conn_schedule(af, skb, pd, &v, &cp)) 1577 1498 return v; 1578 1499 } 1579 1500 ··· 1586 1505 } 1587 1506 1588 1507 IP_VS_DBG_PKT(11, af, pp, skb, 0, "Incoming packet"); 1589 - 1508 + net = skb_net(skb); 1509 + ipvs = net_ipvs(net); 1590 1510 /* Check the server status */ 1591 1511 if (cp->dest && !(cp->dest->flags & IP_VS_DEST_F_AVAILABLE)) { 1592 1512 /* the destination server is not available */ 1593 1513 1594 - if (sysctl_ip_vs_expire_nodest_conn) { 1514 + if (ipvs->sysctl_expire_nodest_conn) { 1595 1515 /* try to expire the connection immediately */ 1596 1516 ip_vs_conn_expire_now(cp); 1597 1517 } ··· 1603 1521 } 1604 1522 1605 1523 ip_vs_in_stats(cp, skb); 1606 - restart = ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pp); 1524 + restart = ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pd); 1607 1525 if (cp->packet_xmit) 1608 1526 ret = cp->packet_xmit(skb, cp, pp); 1609 1527 /* do not touch skb anymore */ ··· 1617 1535 * 1618 1536 * Sync connection if it is about to close to 1619 1537 * encorage the standby servers to update the connections timeout 1538 + * 1539 + * For ONE_PKT let ip_vs_sync_conn() do the filter work. 1620 1540 */ 1621 - pkts = atomic_add_return(1, &cp->in_pkts); 1622 - if (af == AF_INET && (ip_vs_sync_state & IP_VS_STATE_MASTER) && 1541 + 1542 + if (cp->flags & IP_VS_CONN_F_ONE_PACKET) 1543 + pkts = ipvs->sysctl_sync_threshold[0]; 1544 + else 1545 + pkts = atomic_add_return(1, &cp->in_pkts); 1546 + 1547 + if ((ipvs->sync_state & IP_VS_STATE_MASTER) && 1623 1548 cp->protocol == IPPROTO_SCTP) { 1624 1549 if ((cp->state == IP_VS_SCTP_S_ESTABLISHED && 1625 - (pkts % sysctl_ip_vs_sync_threshold[1] 1626 - == sysctl_ip_vs_sync_threshold[0])) || 1550 + (pkts % ipvs->sysctl_sync_threshold[1] 1551 + == ipvs->sysctl_sync_threshold[0])) || 1627 1552 (cp->old_state != cp->state && 1628 1553 ((cp->state == IP_VS_SCTP_S_CLOSED) || 1629 1554 (cp->state == IP_VS_SCTP_S_SHUT_ACK_CLI) || 1630 1555 (cp->state == IP_VS_SCTP_S_SHUT_ACK_SER)))) { 1631 - ip_vs_sync_conn(cp); 1556 + ip_vs_sync_conn(net, cp); 1632 1557 goto out; 1633 1558 } 1634 1559 } 1635 1560 1636 1561 /* Keep this block last: TCP and others with pp->num_states <= 1 */ 1637 - else if (af == AF_INET && 1638 - (ip_vs_sync_state & IP_VS_STATE_MASTER) && 1562 + else if ((ipvs->sync_state & IP_VS_STATE_MASTER) && 1639 1563 (((cp->protocol != IPPROTO_TCP || 1640 1564 cp->state == IP_VS_TCP_S_ESTABLISHED) && 1641 - (pkts % sysctl_ip_vs_sync_threshold[1] 1642 - == sysctl_ip_vs_sync_threshold[0])) || 1565 + (pkts % ipvs->sysctl_sync_threshold[1] 1566 + == ipvs->sysctl_sync_threshold[0])) || 1643 1567 ((cp->protocol == IPPROTO_TCP) && (cp->old_state != cp->state) && 1644 1568 ((cp->state == IP_VS_TCP_S_FIN_WAIT) || 1645 1569 (cp->state == IP_VS_TCP_S_CLOSE) || 1646 1570 (cp->state == IP_VS_TCP_S_CLOSE_WAIT) || 1647 1571 (cp->state == IP_VS_TCP_S_TIME_WAIT))))) 1648 - ip_vs_sync_conn(cp); 1572 + ip_vs_sync_conn(net, cp); 1649 1573 out: 1650 1574 cp->old_state = cp->state; 1651 1575 ··· 1870 1782 }, 1871 1783 #endif 1872 1784 }; 1785 + /* 1786 + * Initialize IP Virtual Server netns mem. 1787 + */ 1788 + static int __net_init __ip_vs_init(struct net *net) 1789 + { 1790 + struct netns_ipvs *ipvs; 1873 1791 1792 + ipvs = net_generic(net, ip_vs_net_id); 1793 + if (ipvs == NULL) { 1794 + pr_err("%s(): no memory.\n", __func__); 1795 + return -ENOMEM; 1796 + } 1797 + ipvs->net = net; 1798 + /* Counters used for creating unique names */ 1799 + ipvs->gen = atomic_read(&ipvs_netns_cnt); 1800 + atomic_inc(&ipvs_netns_cnt); 1801 + net->ipvs = ipvs; 1802 + printk(KERN_INFO "IPVS: Creating netns size=%lu id=%d\n", 1803 + sizeof(struct netns_ipvs), ipvs->gen); 1804 + return 0; 1805 + } 1806 + 1807 + static void __net_exit __ip_vs_cleanup(struct net *net) 1808 + { 1809 + struct netns_ipvs *ipvs = net_ipvs(net); 1810 + 1811 + IP_VS_DBG(10, "ipvs netns %d released\n", ipvs->gen); 1812 + } 1813 + 1814 + static struct pernet_operations ipvs_core_ops = { 1815 + .init = __ip_vs_init, 1816 + .exit = __ip_vs_cleanup, 1817 + .id = &ip_vs_net_id, 1818 + .size = sizeof(struct netns_ipvs), 1819 + }; 1874 1820 1875 1821 /* 1876 1822 * Initialize IP Virtual Server ··· 1913 1791 { 1914 1792 int ret; 1915 1793 1916 - ip_vs_estimator_init(); 1794 + ret = register_pernet_subsys(&ipvs_core_ops); /* Alloc ip_vs struct */ 1795 + if (ret < 0) 1796 + return ret; 1917 1797 1798 + ip_vs_estimator_init(); 1918 1799 ret = ip_vs_control_init(); 1919 1800 if (ret < 0) { 1920 1801 pr_err("can't setup control.\n"); ··· 1938 1813 goto cleanup_app; 1939 1814 } 1940 1815 1816 + ret = ip_vs_sync_init(); 1817 + if (ret < 0) { 1818 + pr_err("can't setup sync data.\n"); 1819 + goto cleanup_conn; 1820 + } 1821 + 1941 1822 ret = nf_register_hooks(ip_vs_ops, ARRAY_SIZE(ip_vs_ops)); 1942 1823 if (ret < 0) { 1943 1824 pr_err("can't register hooks.\n"); 1944 - goto cleanup_conn; 1825 + goto cleanup_sync; 1945 1826 } 1946 1827 1947 1828 pr_info("ipvs loaded.\n"); 1948 1829 return ret; 1949 1830 1831 + cleanup_sync: 1832 + ip_vs_sync_cleanup(); 1950 1833 cleanup_conn: 1951 1834 ip_vs_conn_cleanup(); 1952 1835 cleanup_app: ··· 1964 1831 ip_vs_control_cleanup(); 1965 1832 cleanup_estimator: 1966 1833 ip_vs_estimator_cleanup(); 1834 + unregister_pernet_subsys(&ipvs_core_ops); /* free ip_vs struct */ 1967 1835 return ret; 1968 1836 } 1969 1837 1970 1838 static void __exit ip_vs_cleanup(void) 1971 1839 { 1972 1840 nf_unregister_hooks(ip_vs_ops, ARRAY_SIZE(ip_vs_ops)); 1841 + ip_vs_sync_cleanup(); 1973 1842 ip_vs_conn_cleanup(); 1974 1843 ip_vs_app_cleanup(); 1975 1844 ip_vs_protocol_cleanup(); 1976 1845 ip_vs_control_cleanup(); 1977 1846 ip_vs_estimator_cleanup(); 1847 + unregister_pernet_subsys(&ipvs_core_ops); /* free ip_vs struct */ 1978 1848 pr_info("ipvs unloaded.\n"); 1979 1849 } 1980 1850

+575 -370

net/netfilter/ipvs/ip_vs_ctl.c

··· 38 38 #include <linux/mutex.h> 39 39 40 40 #include <net/net_namespace.h> 41 + #include <linux/nsproxy.h> 41 42 #include <net/ip.h> 42 43 #ifdef CONFIG_IP_VS_IPV6 43 44 #include <net/ipv6.h> ··· 58 57 /* lock for service table */ 59 58 static DEFINE_RWLOCK(__ip_vs_svc_lock); 60 59 61 - /* lock for table with the real services */ 62 - static DEFINE_RWLOCK(__ip_vs_rs_lock); 63 - 64 - /* lock for state and timeout tables */ 65 - static DEFINE_SPINLOCK(ip_vs_securetcp_lock); 66 - 67 - /* lock for drop entry handling */ 68 - static DEFINE_SPINLOCK(__ip_vs_dropentry_lock); 69 - 70 - /* lock for drop packet handling */ 71 - static DEFINE_SPINLOCK(__ip_vs_droppacket_lock); 72 - 73 - /* 1/rate drop and drop-entry variables */ 74 - int ip_vs_drop_rate = 0; 75 - int ip_vs_drop_counter = 0; 76 - static atomic_t ip_vs_dropentry = ATOMIC_INIT(0); 77 - 78 - /* number of virtual services */ 79 - static int ip_vs_num_services = 0; 80 - 81 60 /* sysctl variables */ 82 - static int sysctl_ip_vs_drop_entry = 0; 83 - static int sysctl_ip_vs_drop_packet = 0; 84 - static int sysctl_ip_vs_secure_tcp = 0; 85 - static int sysctl_ip_vs_amemthresh = 1024; 86 - static int sysctl_ip_vs_am_droprate = 10; 87 - int sysctl_ip_vs_cache_bypass = 0; 88 - int sysctl_ip_vs_expire_nodest_conn = 0; 89 - int sysctl_ip_vs_expire_quiescent_template = 0; 90 - int sysctl_ip_vs_sync_threshold[2] = { 3, 50 }; 91 - int sysctl_ip_vs_nat_icmp_send = 0; 92 - #ifdef CONFIG_IP_VS_NFCT 93 - int sysctl_ip_vs_conntrack; 94 - #endif 95 - int sysctl_ip_vs_snat_reroute = 1; 96 - 97 61 98 62 #ifdef CONFIG_IP_VS_DEBUG 99 63 static int sysctl_ip_vs_debug_level = 0; ··· 71 105 72 106 #ifdef CONFIG_IP_VS_IPV6 73 107 /* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */ 74 - static int __ip_vs_addr_is_local_v6(const struct in6_addr *addr) 108 + static int __ip_vs_addr_is_local_v6(struct net *net, 109 + const struct in6_addr *addr) 75 110 { 76 111 struct rt6_info *rt; 77 112 struct flowi fl = { ··· 81 114 .fl6_src = { .s6_addr32 = {0, 0, 0, 0} }, 82 115 }; 83 116 84 - rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl); 117 + rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl); 85 118 if (rt && rt->rt6i_dev && (rt->rt6i_dev->flags & IFF_LOOPBACK)) 86 119 return 1; 87 120 ··· 92 125 * update_defense_level is called from keventd and from sysctl, 93 126 * so it needs to protect itself from softirqs 94 127 */ 95 - static void update_defense_level(void) 128 + static void update_defense_level(struct netns_ipvs *ipvs) 96 129 { 97 130 struct sysinfo i; 98 131 static int old_secure_tcp = 0; ··· 108 141 /* si_swapinfo(&i); */ 109 142 /* availmem = availmem - (i.totalswap - i.freeswap); */ 110 143 111 - nomem = (availmem < sysctl_ip_vs_amemthresh); 144 + nomem = (availmem < ipvs->sysctl_amemthresh); 112 145 113 146 local_bh_disable(); 114 147 115 148 /* drop_entry */ 116 - spin_lock(&__ip_vs_dropentry_lock); 117 - switch (sysctl_ip_vs_drop_entry) { 149 + spin_lock(&ipvs->dropentry_lock); 150 + switch (ipvs->sysctl_drop_entry) { 118 151 case 0: 119 - atomic_set(&ip_vs_dropentry, 0); 152 + atomic_set(&ipvs->dropentry, 0); 120 153 break; 121 154 case 1: 122 155 if (nomem) { 123 - atomic_set(&ip_vs_dropentry, 1); 124 - sysctl_ip_vs_drop_entry = 2; 156 + atomic_set(&ipvs->dropentry, 1); 157 + ipvs->sysctl_drop_entry = 2; 125 158 } else { 126 - atomic_set(&ip_vs_dropentry, 0); 159 + atomic_set(&ipvs->dropentry, 0); 127 160 } 128 161 break; 129 162 case 2: 130 163 if (nomem) { 131 - atomic_set(&ip_vs_dropentry, 1); 164 + atomic_set(&ipvs->dropentry, 1); 132 165 } else { 133 - atomic_set(&ip_vs_dropentry, 0); 134 - sysctl_ip_vs_drop_entry = 1; 166 + atomic_set(&ipvs->dropentry, 0); 167 + ipvs->sysctl_drop_entry = 1; 135 168 }; 136 169 break; 137 170 case 3: 138 - atomic_set(&ip_vs_dropentry, 1); 171 + atomic_set(&ipvs->dropentry, 1); 139 172 break; 140 173 } 141 - spin_unlock(&__ip_vs_dropentry_lock); 174 + spin_unlock(&ipvs->dropentry_lock); 142 175 143 176 /* drop_packet */ 144 - spin_lock(&__ip_vs_droppacket_lock); 145 - switch (sysctl_ip_vs_drop_packet) { 177 + spin_lock(&ipvs->droppacket_lock); 178 + switch (ipvs->sysctl_drop_packet) { 146 179 case 0: 147 - ip_vs_drop_rate = 0; 180 + ipvs->drop_rate = 0; 148 181 break; 149 182 case 1: 150 183 if (nomem) { 151 - ip_vs_drop_rate = ip_vs_drop_counter 152 - = sysctl_ip_vs_amemthresh / 153 - (sysctl_ip_vs_amemthresh-availmem); 154 - sysctl_ip_vs_drop_packet = 2; 184 + ipvs->drop_rate = ipvs->drop_counter 185 + = ipvs->sysctl_amemthresh / 186 + (ipvs->sysctl_amemthresh-availmem); 187 + ipvs->sysctl_drop_packet = 2; 155 188 } else { 156 - ip_vs_drop_rate = 0; 189 + ipvs->drop_rate = 0; 157 190 } 158 191 break; 159 192 case 2: 160 193 if (nomem) { 161 - ip_vs_drop_rate = ip_vs_drop_counter 162 - = sysctl_ip_vs_amemthresh / 163 - (sysctl_ip_vs_amemthresh-availmem); 194 + ipvs->drop_rate = ipvs->drop_counter 195 + = ipvs->sysctl_amemthresh / 196 + (ipvs->sysctl_amemthresh-availmem); 164 197 } else { 165 - ip_vs_drop_rate = 0; 166 - sysctl_ip_vs_drop_packet = 1; 198 + ipvs->drop_rate = 0; 199 + ipvs->sysctl_drop_packet = 1; 167 200 } 168 201 break; 169 202 case 3: 170 - ip_vs_drop_rate = sysctl_ip_vs_am_droprate; 203 + ipvs->drop_rate = ipvs->sysctl_am_droprate; 171 204 break; 172 205 } 173 - spin_unlock(&__ip_vs_droppacket_lock); 206 + spin_unlock(&ipvs->droppacket_lock); 174 207 175 208 /* secure_tcp */ 176 - spin_lock(&ip_vs_securetcp_lock); 177 - switch (sysctl_ip_vs_secure_tcp) { 209 + spin_lock(&ipvs->securetcp_lock); 210 + switch (ipvs->sysctl_secure_tcp) { 178 211 case 0: 179 212 if (old_secure_tcp >= 2) 180 213 to_change = 0; ··· 183 216 if (nomem) { 184 217 if (old_secure_tcp < 2) 185 218 to_change = 1; 186 - sysctl_ip_vs_secure_tcp = 2; 219 + ipvs->sysctl_secure_tcp = 2; 187 220 } else { 188 221 if (old_secure_tcp >= 2) 189 222 to_change = 0; ··· 196 229 } else { 197 230 if (old_secure_tcp >= 2) 198 231 to_change = 0; 199 - sysctl_ip_vs_secure_tcp = 1; 232 + ipvs->sysctl_secure_tcp = 1; 200 233 } 201 234 break; 202 235 case 3: ··· 204 237 to_change = 1; 205 238 break; 206 239 } 207 - old_secure_tcp = sysctl_ip_vs_secure_tcp; 240 + old_secure_tcp = ipvs->sysctl_secure_tcp; 208 241 if (to_change >= 0) 209 - ip_vs_protocol_timeout_change(sysctl_ip_vs_secure_tcp>1); 210 - spin_unlock(&ip_vs_securetcp_lock); 242 + ip_vs_protocol_timeout_change(ipvs, 243 + ipvs->sysctl_secure_tcp > 1); 244 + spin_unlock(&ipvs->securetcp_lock); 211 245 212 246 local_bh_enable(); 213 247 } ··· 218 250 * Timer for checking the defense 219 251 */ 220 252 #define DEFENSE_TIMER_PERIOD 1*HZ 221 - static void defense_work_handler(struct work_struct *work); 222 - static DECLARE_DELAYED_WORK(defense_work, defense_work_handler); 223 253 224 254 static void defense_work_handler(struct work_struct *work) 225 255 { 226 - update_defense_level(); 227 - if (atomic_read(&ip_vs_dropentry)) 228 - ip_vs_random_dropentry(); 256 + struct netns_ipvs *ipvs = 257 + container_of(work, struct netns_ipvs, defense_work.work); 229 258 230 - schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD); 259 + update_defense_level(ipvs); 260 + if (atomic_read(&ipvs->dropentry)) 261 + ip_vs_random_dropentry(ipvs->net); 262 + schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD); 231 263 } 232 264 233 265 int ··· 255 287 /* the service table hashed by fwmark */ 256 288 static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE]; 257 289 258 - /* 259 - * Hash table: for real service lookups 260 - */ 261 - #define IP_VS_RTAB_BITS 4 262 - #define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS) 263 - #define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1) 264 - 265 - static struct list_head ip_vs_rtable[IP_VS_RTAB_SIZE]; 266 - 267 - /* 268 - * Trash for destinations 269 - */ 270 - static LIST_HEAD(ip_vs_dest_trash); 271 - 272 - /* 273 - * FTP & NULL virtual service counters 274 - */ 275 - static atomic_t ip_vs_ftpsvc_counter = ATOMIC_INIT(0); 276 - static atomic_t ip_vs_nullsvc_counter = ATOMIC_INIT(0); 277 - 278 290 279 291 /* 280 292 * Returns hash value for virtual service 281 293 */ 282 - static __inline__ unsigned 283 - ip_vs_svc_hashkey(int af, unsigned proto, const union nf_inet_addr *addr, 284 - __be16 port) 294 + static inline unsigned 295 + ip_vs_svc_hashkey(struct net *net, int af, unsigned proto, 296 + const union nf_inet_addr *addr, __be16 port) 285 297 { 286 298 register unsigned porth = ntohs(port); 287 299 __be32 addr_fold = addr->ip; ··· 271 323 addr_fold = addr->ip6[0]^addr->ip6[1]^ 272 324 addr->ip6[2]^addr->ip6[3]; 273 325 #endif 326 + addr_fold ^= ((size_t)net>>8); 274 327 275 328 return (proto^ntohl(addr_fold)^(porth>>IP_VS_SVC_TAB_BITS)^porth) 276 329 & IP_VS_SVC_TAB_MASK; ··· 280 331 /* 281 332 * Returns hash value of fwmark for virtual service lookup 282 333 */ 283 - static __inline__ unsigned ip_vs_svc_fwm_hashkey(__u32 fwmark) 334 + static inline unsigned ip_vs_svc_fwm_hashkey(struct net *net, __u32 fwmark) 284 335 { 285 - return fwmark & IP_VS_SVC_TAB_MASK; 336 + return (((size_t)net>>8) ^ fwmark) & IP_VS_SVC_TAB_MASK; 286 337 } 287 338 288 339 /* 289 - * Hashes a service in the ip_vs_svc_table by <proto,addr,port> 340 + * Hashes a service in the ip_vs_svc_table by <netns,proto,addr,port> 290 341 * or in the ip_vs_svc_fwm_table by fwmark. 291 342 * Should be called with locked tables. 292 343 */ ··· 302 353 303 354 if (svc->fwmark == 0) { 304 355 /* 305 - * Hash it by <protocol,addr,port> in ip_vs_svc_table 356 + * Hash it by <netns,protocol,addr,port> in ip_vs_svc_table 306 357 */ 307 - hash = ip_vs_svc_hashkey(svc->af, svc->protocol, &svc->addr, 308 - svc->port); 358 + hash = ip_vs_svc_hashkey(svc->net, svc->af, svc->protocol, 359 + &svc->addr, svc->port); 309 360 list_add(&svc->s_list, &ip_vs_svc_table[hash]); 310 361 } else { 311 362 /* 312 - * Hash it by fwmark in ip_vs_svc_fwm_table 363 + * Hash it by fwmark in svc_fwm_table 313 364 */ 314 - hash = ip_vs_svc_fwm_hashkey(svc->fwmark); 365 + hash = ip_vs_svc_fwm_hashkey(svc->net, svc->fwmark); 315 366 list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]); 316 367 } 317 368 ··· 323 374 324 375 325 376 /* 326 - * Unhashes a service from ip_vs_svc_table/ip_vs_svc_fwm_table. 377 + * Unhashes a service from svc_table / svc_fwm_table. 327 378 * Should be called with locked tables. 328 379 */ 329 380 static int ip_vs_svc_unhash(struct ip_vs_service *svc) ··· 335 386 } 336 387 337 388 if (svc->fwmark == 0) { 338 - /* Remove it from the ip_vs_svc_table table */ 389 + /* Remove it from the svc_table table */ 339 390 list_del(&svc->s_list); 340 391 } else { 341 - /* Remove it from the ip_vs_svc_fwm_table table */ 392 + /* Remove it from the svc_fwm_table table */ 342 393 list_del(&svc->f_list); 343 394 } 344 395 ··· 349 400 350 401 351 402 /* 352 - * Get service by {proto,addr,port} in the service table. 403 + * Get service by {netns, proto,addr,port} in the service table. 353 404 */ 354 405 static inline struct ip_vs_service * 355 - __ip_vs_service_find(int af, __u16 protocol, const union nf_inet_addr *vaddr, 356 - __be16 vport) 406 + __ip_vs_service_find(struct net *net, int af, __u16 protocol, 407 + const union nf_inet_addr *vaddr, __be16 vport) 357 408 { 358 409 unsigned hash; 359 410 struct ip_vs_service *svc; 360 411 361 412 /* Check for "full" addressed entries */ 362 - hash = ip_vs_svc_hashkey(af, protocol, vaddr, vport); 413 + hash = ip_vs_svc_hashkey(net, af, protocol, vaddr, vport); 363 414 364 415 list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){ 365 416 if ((svc->af == af) 366 417 && ip_vs_addr_equal(af, &svc->addr, vaddr) 367 418 && (svc->port == vport) 368 - && (svc->protocol == protocol)) { 419 + && (svc->protocol == protocol) 420 + && net_eq(svc->net, net)) { 369 421 /* HIT */ 370 422 return svc; 371 423 } ··· 380 430 * Get service by {fwmark} in the service table. 381 431 */ 382 432 static inline struct ip_vs_service * 383 - __ip_vs_svc_fwm_find(int af, __u32 fwmark) 433 + __ip_vs_svc_fwm_find(struct net *net, int af, __u32 fwmark) 384 434 { 385 435 unsigned hash; 386 436 struct ip_vs_service *svc; 387 437 388 438 /* Check for fwmark addressed entries */ 389 - hash = ip_vs_svc_fwm_hashkey(fwmark); 439 + hash = ip_vs_svc_fwm_hashkey(net, fwmark); 390 440 391 441 list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) { 392 - if (svc->fwmark == fwmark && svc->af == af) { 442 + if (svc->fwmark == fwmark && svc->af == af 443 + && net_eq(svc->net, net)) { 393 444 /* HIT */ 394 445 return svc; 395 446 } ··· 400 449 } 401 450 402 451 struct ip_vs_service * 403 - ip_vs_service_get(int af, __u32 fwmark, __u16 protocol, 452 + ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol, 404 453 const union nf_inet_addr *vaddr, __be16 vport) 405 454 { 406 455 struct ip_vs_service *svc; 456 + struct netns_ipvs *ipvs = net_ipvs(net); 407 457 408 458 read_lock(&__ip_vs_svc_lock); 409 459 410 460 /* 411 461 * Check the table hashed by fwmark first 412 462 */ 413 - if (fwmark && (svc = __ip_vs_svc_fwm_find(af, fwmark))) 463 + svc = __ip_vs_svc_fwm_find(net, af, fwmark); 464 + if (fwmark && svc) 414 465 goto out; 415 466 416 467 /* 417 468 * Check the table hashed by <protocol,addr,port> 418 469 * for "full" addressed entries 419 470 */ 420 - svc = __ip_vs_service_find(af, protocol, vaddr, vport); 471 + svc = __ip_vs_service_find(net, af, protocol, vaddr, vport); 421 472 422 473 if (svc == NULL 423 474 && protocol == IPPROTO_TCP 424 - && atomic_read(&ip_vs_ftpsvc_counter) 475 + && atomic_read(&ipvs->ftpsvc_counter) 425 476 && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) { 426 477 /* 427 478 * Check if ftp service entry exists, the packet 428 479 * might belong to FTP data connections. 429 480 */ 430 - svc = __ip_vs_service_find(af, protocol, vaddr, FTPPORT); 481 + svc = __ip_vs_service_find(net, af, protocol, vaddr, FTPPORT); 431 482 } 432 483 433 484 if (svc == NULL 434 - && atomic_read(&ip_vs_nullsvc_counter)) { 485 + && atomic_read(&ipvs->nullsvc_counter)) { 435 486 /* 436 487 * Check if the catch-all port (port zero) exists 437 488 */ 438 - svc = __ip_vs_service_find(af, protocol, vaddr, 0); 489 + svc = __ip_vs_service_find(net, af, protocol, vaddr, 0); 439 490 } 440 491 441 492 out: ··· 472 519 svc->fwmark, 473 520 IP_VS_DBG_ADDR(svc->af, &svc->addr), 474 521 ntohs(svc->port), atomic_read(&svc->usecnt)); 522 + free_percpu(svc->stats.cpustats); 475 523 kfree(svc); 476 524 } 477 525 } ··· 499 545 } 500 546 501 547 /* 502 - * Hashes ip_vs_dest in ip_vs_rtable by <proto,addr,port>. 548 + * Hashes ip_vs_dest in rs_table by <proto,addr,port>. 503 549 * should be called with locked tables. 504 550 */ 505 - static int ip_vs_rs_hash(struct ip_vs_dest *dest) 551 + static int ip_vs_rs_hash(struct netns_ipvs *ipvs, struct ip_vs_dest *dest) 506 552 { 507 553 unsigned hash; 508 554 ··· 516 562 */ 517 563 hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port); 518 564 519 - list_add(&dest->d_list, &ip_vs_rtable[hash]); 565 + list_add(&dest->d_list, &ipvs->rs_table[hash]); 520 566 521 567 return 1; 522 568 } 523 569 524 570 /* 525 - * UNhashes ip_vs_dest from ip_vs_rtable. 571 + * UNhashes ip_vs_dest from rs_table. 526 572 * should be called with locked tables. 527 573 */ 528 574 static int ip_vs_rs_unhash(struct ip_vs_dest *dest) 529 575 { 530 576 /* 531 - * Remove it from the ip_vs_rtable table. 577 + * Remove it from the rs_table table. 532 578 */ 533 579 if (!list_empty(&dest->d_list)) { 534 580 list_del(&dest->d_list); ··· 542 588 * Lookup real service by <proto,addr,port> in the real service table. 543 589 */ 544 590 struct ip_vs_dest * 545 - ip_vs_lookup_real_service(int af, __u16 protocol, 591 + ip_vs_lookup_real_service(struct net *net, int af, __u16 protocol, 546 592 const union nf_inet_addr *daddr, 547 593 __be16 dport) 548 594 { 595 + struct netns_ipvs *ipvs = net_ipvs(net); 549 596 unsigned hash; 550 597 struct ip_vs_dest *dest; 551 598 ··· 556 601 */ 557 602 hash = ip_vs_rs_hashkey(af, daddr, dport); 558 603 559 - read_lock(&__ip_vs_rs_lock); 560 - list_for_each_entry(dest, &ip_vs_rtable[hash], d_list) { 604 + read_lock(&ipvs->rs_lock); 605 + list_for_each_entry(dest, &ipvs->rs_table[hash], d_list) { 561 606 if ((dest->af == af) 562 607 && ip_vs_addr_equal(af, &dest->addr, daddr) 563 608 && (dest->port == dport) 564 609 && ((dest->protocol == protocol) || 565 610 dest->vfwmark)) { 566 611 /* HIT */ 567 - read_unlock(&__ip_vs_rs_lock); 612 + read_unlock(&ipvs->rs_lock); 568 613 return dest; 569 614 } 570 615 } 571 - read_unlock(&__ip_vs_rs_lock); 616 + read_unlock(&ipvs->rs_lock); 572 617 573 618 return NULL; 574 619 } ··· 607 652 * ip_vs_lookup_real_service() looked promissing, but 608 653 * seems not working as expected. 609 654 */ 610 - struct ip_vs_dest *ip_vs_find_dest(int af, const union nf_inet_addr *daddr, 655 + struct ip_vs_dest *ip_vs_find_dest(struct net *net, int af, 656 + const union nf_inet_addr *daddr, 611 657 __be16 dport, 612 658 const union nf_inet_addr *vaddr, 613 - __be16 vport, __u16 protocol) 659 + __be16 vport, __u16 protocol, __u32 fwmark) 614 660 { 615 661 struct ip_vs_dest *dest; 616 662 struct ip_vs_service *svc; 617 663 618 - svc = ip_vs_service_get(af, 0, protocol, vaddr, vport); 664 + svc = ip_vs_service_get(net, af, fwmark, protocol, vaddr, vport); 619 665 if (!svc) 620 666 return NULL; 621 667 dest = ip_vs_lookup_dest(svc, daddr, dport); ··· 641 685 __be16 dport) 642 686 { 643 687 struct ip_vs_dest *dest, *nxt; 688 + struct netns_ipvs *ipvs = net_ipvs(svc->net); 644 689 645 690 /* 646 691 * Find the destination in trash 647 692 */ 648 - list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) { 693 + list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, n_list) { 649 694 IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, " 650 695 "dest->refcnt=%d\n", 651 696 dest->vfwmark, ··· 677 720 list_del(&dest->n_list); 678 721 ip_vs_dst_reset(dest); 679 722 __ip_vs_unbind_svc(dest); 723 + free_percpu(dest->stats.cpustats); 680 724 kfree(dest); 681 725 } 682 726 } ··· 695 737 * are expired, and the refcnt of each destination in the trash must 696 738 * be 1, so we simply release them here. 697 739 */ 698 - static void ip_vs_trash_cleanup(void) 740 + static void ip_vs_trash_cleanup(struct net *net) 699 741 { 700 742 struct ip_vs_dest *dest, *nxt; 743 + struct netns_ipvs *ipvs = net_ipvs(net); 701 744 702 - list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) { 745 + list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, n_list) { 703 746 list_del(&dest->n_list); 704 747 ip_vs_dst_reset(dest); 705 748 __ip_vs_unbind_svc(dest); 749 + free_percpu(dest->stats.cpustats); 706 750 kfree(dest); 707 751 } 708 752 } ··· 728 768 __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest, 729 769 struct ip_vs_dest_user_kern *udest, int add) 730 770 { 771 + struct netns_ipvs *ipvs = net_ipvs(svc->net); 731 772 int conn_flags; 732 773 733 774 /* set the weight and the flags */ ··· 741 780 conn_flags |= IP_VS_CONN_F_NOOUTPUT; 742 781 } else { 743 782 /* 744 - * Put the real service in ip_vs_rtable if not present. 783 + * Put the real service in rs_table if not present. 745 784 * For now only for NAT! 746 785 */ 747 - write_lock_bh(&__ip_vs_rs_lock); 748 - ip_vs_rs_hash(dest); 749 - write_unlock_bh(&__ip_vs_rs_lock); 786 + write_lock_bh(&ipvs->rs_lock); 787 + ip_vs_rs_hash(ipvs, dest); 788 + write_unlock_bh(&ipvs->rs_lock); 750 789 } 751 790 atomic_set(&dest->conn_flags, conn_flags); 752 791 ··· 774 813 spin_unlock(&dest->dst_lock); 775 814 776 815 if (add) 777 - ip_vs_new_estimator(&dest->stats); 816 + ip_vs_new_estimator(svc->net, &dest->stats); 778 817 779 818 write_lock_bh(&__ip_vs_svc_lock); 780 819 ··· 811 850 atype = ipv6_addr_type(&udest->addr.in6); 812 851 if ((!(atype & IPV6_ADDR_UNICAST) || 813 852 atype & IPV6_ADDR_LINKLOCAL) && 814 - !__ip_vs_addr_is_local_v6(&udest->addr.in6)) 853 + !__ip_vs_addr_is_local_v6(svc->net, &udest->addr.in6)) 815 854 return -EINVAL; 816 855 } else 817 856 #endif 818 857 { 819 - atype = inet_addr_type(&init_net, udest->addr.ip); 858 + atype = inet_addr_type(svc->net, udest->addr.ip); 820 859 if (atype != RTN_LOCAL && atype != RTN_UNICAST) 821 860 return -EINVAL; 822 861 } ··· 825 864 if (dest == NULL) { 826 865 pr_err("%s(): no memory.\n", __func__); 827 866 return -ENOMEM; 867 + } 868 + dest->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats); 869 + if (!dest->stats.cpustats) { 870 + pr_err("%s() alloc_percpu failed\n", __func__); 871 + goto err_alloc; 828 872 } 829 873 830 874 dest->af = svc->af; ··· 854 888 855 889 LeaveFunction(2); 856 890 return 0; 891 + 892 + err_alloc: 893 + kfree(dest); 894 + return -ENOMEM; 857 895 } 858 896 859 897 ··· 976 1006 /* 977 1007 * Delete a destination (must be already unlinked from the service) 978 1008 */ 979 - static void __ip_vs_del_dest(struct ip_vs_dest *dest) 1009 + static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest) 980 1010 { 981 - ip_vs_kill_estimator(&dest->stats); 1011 + struct netns_ipvs *ipvs = net_ipvs(net); 1012 + 1013 + ip_vs_kill_estimator(net, &dest->stats); 982 1014 983 1015 /* 984 1016 * Remove it from the d-linked list with the real services. 985 1017 */ 986 - write_lock_bh(&__ip_vs_rs_lock); 1018 + write_lock_bh(&ipvs->rs_lock); 987 1019 ip_vs_rs_unhash(dest); 988 - write_unlock_bh(&__ip_vs_rs_lock); 1020 + write_unlock_bh(&ipvs->rs_lock); 989 1021 990 1022 /* 991 1023 * Decrease the refcnt of the dest, and free the dest ··· 1006 1034 and only one user context can update virtual service at a 1007 1035 time, so the operation here is OK */ 1008 1036 atomic_dec(&dest->svc->refcnt); 1037 + free_percpu(dest->stats.cpustats); 1009 1038 kfree(dest); 1010 1039 } else { 1011 1040 IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, " ··· 1014 1041 IP_VS_DBG_ADDR(dest->af, &dest->addr), 1015 1042 ntohs(dest->port), 1016 1043 atomic_read(&dest->refcnt)); 1017 - list_add(&dest->n_list, &ip_vs_dest_trash); 1044 + list_add(&dest->n_list, &ipvs->dest_trash); 1018 1045 atomic_inc(&dest->refcnt); 1019 1046 } 1020 1047 } ··· 1078 1105 /* 1079 1106 * Delete the destination 1080 1107 */ 1081 - __ip_vs_del_dest(dest); 1108 + __ip_vs_del_dest(svc->net, dest); 1082 1109 1083 1110 LeaveFunction(2); 1084 1111 ··· 1090 1117 * Add a service into the service hash table 1091 1118 */ 1092 1119 static int 1093 - ip_vs_add_service(struct ip_vs_service_user_kern *u, 1120 + ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u, 1094 1121 struct ip_vs_service **svc_p) 1095 1122 { 1096 1123 int ret = 0; 1097 1124 struct ip_vs_scheduler *sched = NULL; 1098 1125 struct ip_vs_pe *pe = NULL; 1099 1126 struct ip_vs_service *svc = NULL; 1127 + struct netns_ipvs *ipvs = net_ipvs(net); 1100 1128 1101 1129 /* increase the module use count */ 1102 1130 ip_vs_use_count_inc(); ··· 1111 1137 } 1112 1138 1113 1139 if (u->pe_name && *u->pe_name) { 1114 - pe = ip_vs_pe_get(u->pe_name); 1140 + pe = ip_vs_pe_getbyname(u->pe_name); 1115 1141 if (pe == NULL) { 1116 1142 pr_info("persistence engine module ip_vs_pe_%s " 1117 1143 "not found\n", u->pe_name); ··· 1133 1159 ret = -ENOMEM; 1134 1160 goto out_err; 1135 1161 } 1162 + svc->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats); 1163 + if (!svc->stats.cpustats) { 1164 + pr_err("%s() alloc_percpu failed\n", __func__); 1165 + goto out_err; 1166 + } 1136 1167 1137 1168 /* I'm the first user of the service */ 1138 1169 atomic_set(&svc->usecnt, 0); ··· 1151 1172 svc->flags = u->flags; 1152 1173 svc->timeout = u->timeout * HZ; 1153 1174 svc->netmask = u->netmask; 1175 + svc->net = net; 1154 1176 1155 1177 INIT_LIST_HEAD(&svc->destinations); 1156 1178 rwlock_init(&svc->sched_lock); ··· 1169 1189 1170 1190 /* Update the virtual service counters */ 1171 1191 if (svc->port == FTPPORT) 1172 - atomic_inc(&ip_vs_ftpsvc_counter); 1192 + atomic_inc(&ipvs->ftpsvc_counter); 1173 1193 else if (svc->port == 0) 1174 - atomic_inc(&ip_vs_nullsvc_counter); 1194 + atomic_inc(&ipvs->nullsvc_counter); 1175 1195 1176 - ip_vs_new_estimator(&svc->stats); 1196 + ip_vs_new_estimator(net, &svc->stats); 1177 1197 1178 1198 /* Count only IPv4 services for old get/setsockopt interface */ 1179 1199 if (svc->af == AF_INET) 1180 - ip_vs_num_services++; 1200 + ipvs->num_services++; 1181 1201 1182 1202 /* Hash the service into the service table */ 1183 1203 write_lock_bh(&__ip_vs_svc_lock); ··· 1187 1207 *svc_p = svc; 1188 1208 return 0; 1189 1209 1210 + 1190 1211 out_err: 1191 1212 if (svc != NULL) { 1192 1213 ip_vs_unbind_scheduler(svc); ··· 1196 1215 ip_vs_app_inc_put(svc->inc); 1197 1216 local_bh_enable(); 1198 1217 } 1218 + if (svc->stats.cpustats) 1219 + free_percpu(svc->stats.cpustats); 1199 1220 kfree(svc); 1200 1221 } 1201 1222 ip_vs_scheduler_put(sched); ··· 1231 1248 old_sched = sched; 1232 1249 1233 1250 if (u->pe_name && *u->pe_name) { 1234 - pe = ip_vs_pe_get(u->pe_name); 1251 + pe = ip_vs_pe_getbyname(u->pe_name); 1235 1252 if (pe == NULL) { 1236 1253 pr_info("persistence engine module ip_vs_pe_%s " 1237 1254 "not found\n", u->pe_name); ··· 1317 1334 struct ip_vs_dest *dest, *nxt; 1318 1335 struct ip_vs_scheduler *old_sched; 1319 1336 struct ip_vs_pe *old_pe; 1337 + struct netns_ipvs *ipvs = net_ipvs(svc->net); 1320 1338 1321 1339 pr_info("%s: enter\n", __func__); 1322 1340 1323 1341 /* Count only IPv4 services for old get/setsockopt interface */ 1324 1342 if (svc->af == AF_INET) 1325 - ip_vs_num_services--; 1343 + ipvs->num_services--; 1326 1344 1327 - ip_vs_kill_estimator(&svc->stats); 1345 + ip_vs_kill_estimator(svc->net, &svc->stats); 1328 1346 1329 1347 /* Unbind scheduler */ 1330 1348 old_sched = svc->scheduler; ··· 1348 1364 */ 1349 1365 list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) { 1350 1366 __ip_vs_unlink_dest(svc, dest, 0); 1351 - __ip_vs_del_dest(dest); 1367 + __ip_vs_del_dest(svc->net, dest); 1352 1368 } 1353 1369 1354 1370 /* 1355 1371 * Update the virtual service counters 1356 1372 */ 1357 1373 if (svc->port == FTPPORT) 1358 - atomic_dec(&ip_vs_ftpsvc_counter); 1374 + atomic_dec(&ipvs->ftpsvc_counter); 1359 1375 else if (svc->port == 0) 1360 - atomic_dec(&ip_vs_nullsvc_counter); 1376 + atomic_dec(&ipvs->nullsvc_counter); 1361 1377 1362 1378 /* 1363 1379 * Free the service if nobody refers to it ··· 1367 1383 svc->fwmark, 1368 1384 IP_VS_DBG_ADDR(svc->af, &svc->addr), 1369 1385 ntohs(svc->port), atomic_read(&svc->usecnt)); 1386 + free_percpu(svc->stats.cpustats); 1370 1387 kfree(svc); 1371 1388 } 1372 1389 ··· 1413 1428 /* 1414 1429 * Flush all the virtual services 1415 1430 */ 1416 - static int ip_vs_flush(void) 1431 + static int ip_vs_flush(struct net *net) 1417 1432 { 1418 1433 int idx; 1419 1434 struct ip_vs_service *svc, *nxt; 1420 1435 1421 1436 /* 1422 - * Flush the service table hashed by <protocol,addr,port> 1437 + * Flush the service table hashed by <netns,protocol,addr,port> 1423 1438 */ 1424 1439 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 1425 - list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx], s_list) { 1426 - ip_vs_unlink_service(svc); 1440 + list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx], 1441 + s_list) { 1442 + if (net_eq(svc->net, net)) 1443 + ip_vs_unlink_service(svc); 1427 1444 } 1428 1445 } 1429 1446 ··· 1435 1448 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 1436 1449 list_for_each_entry_safe(svc, nxt, 1437 1450 &ip_vs_svc_fwm_table[idx], f_list) { 1438 - ip_vs_unlink_service(svc); 1451 + if (net_eq(svc->net, net)) 1452 + ip_vs_unlink_service(svc); 1439 1453 } 1440 1454 } 1441 1455 ··· 1460 1472 return 0; 1461 1473 } 1462 1474 1463 - static int ip_vs_zero_all(void) 1475 + static int ip_vs_zero_all(struct net *net) 1464 1476 { 1465 1477 int idx; 1466 1478 struct ip_vs_service *svc; 1467 1479 1468 1480 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 1469 1481 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { 1470 - ip_vs_zero_service(svc); 1482 + if (net_eq(svc->net, net)) 1483 + ip_vs_zero_service(svc); 1471 1484 } 1472 1485 } 1473 1486 1474 1487 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 1475 1488 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { 1476 - ip_vs_zero_service(svc); 1489 + if (net_eq(svc->net, net)) 1490 + ip_vs_zero_service(svc); 1477 1491 } 1478 1492 } 1479 1493 1480 - ip_vs_zero_stats(&ip_vs_stats); 1494 + ip_vs_zero_stats(net_ipvs(net)->tot_stats); 1481 1495 return 0; 1482 1496 } 1483 1497 ··· 1488 1498 proc_do_defense_mode(ctl_table *table, int write, 1489 1499 void __user *buffer, size_t *lenp, loff_t *ppos) 1490 1500 { 1501 + struct net *net = current->nsproxy->net_ns; 1491 1502 int *valp = table->data; 1492 1503 int val = *valp; 1493 1504 int rc; ··· 1499 1508 /* Restore the correct value */ 1500 1509 *valp = val; 1501 1510 } else { 1502 - update_defense_level(); 1511 + update_defense_level(net_ipvs(net)); 1503 1512 } 1504 1513 } 1505 1514 return rc; ··· 1525 1534 return rc; 1526 1535 } 1527 1536 1537 + static int 1538 + proc_do_sync_mode(ctl_table *table, int write, 1539 + void __user *buffer, size_t *lenp, loff_t *ppos) 1540 + { 1541 + int *valp = table->data; 1542 + int val = *valp; 1543 + int rc; 1544 + 1545 + rc = proc_dointvec(table, write, buffer, lenp, ppos); 1546 + if (write && (*valp != val)) { 1547 + if ((*valp < 0) || (*valp > 1)) { 1548 + /* Restore the correct value */ 1549 + *valp = val; 1550 + } else { 1551 + struct net *net = current->nsproxy->net_ns; 1552 + ip_vs_sync_switch_mode(net, val); 1553 + } 1554 + } 1555 + return rc; 1556 + } 1528 1557 1529 1558 /* 1530 1559 * IPVS sysctl table (under the /proc/sys/net/ipv4/vs/) 1560 + * Do not change order or insert new entries without 1561 + * align with netns init in __ip_vs_control_init() 1531 1562 */ 1532 1563 1533 1564 static struct ctl_table vs_vars[] = { 1534 1565 { 1535 1566 .procname = "amemthresh", 1536 - .data = &sysctl_ip_vs_amemthresh, 1567 + .maxlen = sizeof(int), 1568 + .mode = 0644, 1569 + .proc_handler = proc_dointvec, 1570 + }, 1571 + { 1572 + .procname = "am_droprate", 1573 + .maxlen = sizeof(int), 1574 + .mode = 0644, 1575 + .proc_handler = proc_dointvec, 1576 + }, 1577 + { 1578 + .procname = "drop_entry", 1579 + .maxlen = sizeof(int), 1580 + .mode = 0644, 1581 + .proc_handler = proc_do_defense_mode, 1582 + }, 1583 + { 1584 + .procname = "drop_packet", 1585 + .maxlen = sizeof(int), 1586 + .mode = 0644, 1587 + .proc_handler = proc_do_defense_mode, 1588 + }, 1589 + #ifdef CONFIG_IP_VS_NFCT 1590 + { 1591 + .procname = "conntrack", 1592 + .maxlen = sizeof(int), 1593 + .mode = 0644, 1594 + .proc_handler = &proc_dointvec, 1595 + }, 1596 + #endif 1597 + { 1598 + .procname = "secure_tcp", 1599 + .maxlen = sizeof(int), 1600 + .mode = 0644, 1601 + .proc_handler = proc_do_defense_mode, 1602 + }, 1603 + { 1604 + .procname = "snat_reroute", 1605 + .maxlen = sizeof(int), 1606 + .mode = 0644, 1607 + .proc_handler = &proc_dointvec, 1608 + }, 1609 + { 1610 + .procname = "sync_version", 1611 + .maxlen = sizeof(int), 1612 + .mode = 0644, 1613 + .proc_handler = &proc_do_sync_mode, 1614 + }, 1615 + { 1616 + .procname = "cache_bypass", 1617 + .maxlen = sizeof(int), 1618 + .mode = 0644, 1619 + .proc_handler = proc_dointvec, 1620 + }, 1621 + { 1622 + .procname = "expire_nodest_conn", 1623 + .maxlen = sizeof(int), 1624 + .mode = 0644, 1625 + .proc_handler = proc_dointvec, 1626 + }, 1627 + { 1628 + .procname = "expire_quiescent_template", 1629 + .maxlen = sizeof(int), 1630 + .mode = 0644, 1631 + .proc_handler = proc_dointvec, 1632 + }, 1633 + { 1634 + .procname = "sync_threshold", 1635 + .maxlen = 1636 + sizeof(((struct netns_ipvs *)0)->sysctl_sync_threshold), 1637 + .mode = 0644, 1638 + .proc_handler = proc_do_sync_threshold, 1639 + }, 1640 + { 1641 + .procname = "nat_icmp_send", 1537 1642 .maxlen = sizeof(int), 1538 1643 .mode = 0644, 1539 1644 .proc_handler = proc_dointvec, ··· 1643 1556 .proc_handler = proc_dointvec, 1644 1557 }, 1645 1558 #endif 1646 - { 1647 - .procname = "am_droprate", 1648 - .data = &sysctl_ip_vs_am_droprate, 1649 - .maxlen = sizeof(int), 1650 - .mode = 0644, 1651 - .proc_handler = proc_dointvec, 1652 - }, 1653 - { 1654 - .procname = "drop_entry", 1655 - .data = &sysctl_ip_vs_drop_entry, 1656 - .maxlen = sizeof(int), 1657 - .mode = 0644, 1658 - .proc_handler = proc_do_defense_mode, 1659 - }, 1660 - { 1661 - .procname = "drop_packet", 1662 - .data = &sysctl_ip_vs_drop_packet, 1663 - .maxlen = sizeof(int), 1664 - .mode = 0644, 1665 - .proc_handler = proc_do_defense_mode, 1666 - }, 1667 - #ifdef CONFIG_IP_VS_NFCT 1668 - { 1669 - .procname = "conntrack", 1670 - .data = &sysctl_ip_vs_conntrack, 1671 - .maxlen = sizeof(int), 1672 - .mode = 0644, 1673 - .proc_handler = &proc_dointvec, 1674 - }, 1675 - #endif 1676 - { 1677 - .procname = "secure_tcp", 1678 - .data = &sysctl_ip_vs_secure_tcp, 1679 - .maxlen = sizeof(int), 1680 - .mode = 0644, 1681 - .proc_handler = proc_do_defense_mode, 1682 - }, 1683 - { 1684 - .procname = "snat_reroute", 1685 - .data = &sysctl_ip_vs_snat_reroute, 1686 - .maxlen = sizeof(int), 1687 - .mode = 0644, 1688 - .proc_handler = &proc_dointvec, 1689 - }, 1690 1559 #if 0 1691 1560 { 1692 1561 .procname = "timeout_established", ··· 1729 1686 .proc_handler = proc_dointvec_jiffies, 1730 1687 }, 1731 1688 #endif 1732 - { 1733 - .procname = "cache_bypass", 1734 - .data = &sysctl_ip_vs_cache_bypass, 1735 - .maxlen = sizeof(int), 1736 - .mode = 0644, 1737 - .proc_handler = proc_dointvec, 1738 - }, 1739 - { 1740 - .procname = "expire_nodest_conn", 1741 - .data = &sysctl_ip_vs_expire_nodest_conn, 1742 - .maxlen = sizeof(int), 1743 - .mode = 0644, 1744 - .proc_handler = proc_dointvec, 1745 - }, 1746 - { 1747 - .procname = "expire_quiescent_template", 1748 - .data = &sysctl_ip_vs_expire_quiescent_template, 1749 - .maxlen = sizeof(int), 1750 - .mode = 0644, 1751 - .proc_handler = proc_dointvec, 1752 - }, 1753 - { 1754 - .procname = "sync_threshold", 1755 - .data = &sysctl_ip_vs_sync_threshold, 1756 - .maxlen = sizeof(sysctl_ip_vs_sync_threshold), 1757 - .mode = 0644, 1758 - .proc_handler = proc_do_sync_threshold, 1759 - }, 1760 - { 1761 - .procname = "nat_icmp_send", 1762 - .data = &sysctl_ip_vs_nat_icmp_send, 1763 - .maxlen = sizeof(int), 1764 - .mode = 0644, 1765 - .proc_handler = proc_dointvec, 1766 - }, 1767 1689 { } 1768 1690 }; 1769 1691 ··· 1740 1732 }; 1741 1733 EXPORT_SYMBOL_GPL(net_vs_ctl_path); 1742 1734 1743 - static struct ctl_table_header * sysctl_header; 1744 - 1745 1735 #ifdef CONFIG_PROC_FS 1746 1736 1747 1737 struct ip_vs_iter { 1738 + struct seq_net_private p; /* Do not move this, netns depends upon it*/ 1748 1739 struct list_head *table; 1749 1740 int bucket; 1750 1741 }; ··· 1770 1763 /* Get the Nth entry in the two lists */ 1771 1764 static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos) 1772 1765 { 1766 + struct net *net = seq_file_net(seq); 1773 1767 struct ip_vs_iter *iter = seq->private; 1774 1768 int idx; 1775 1769 struct ip_vs_service *svc; ··· 1778 1770 /* look in hash by protocol */ 1779 1771 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 1780 1772 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { 1781 - if (pos-- == 0){ 1773 + if (net_eq(svc->net, net) && pos-- == 0) { 1782 1774 iter->table = ip_vs_svc_table; 1783 1775 iter->bucket = idx; 1784 1776 return svc; ··· 1789 1781 /* keep looking in fwmark */ 1790 1782 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 1791 1783 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { 1792 - if (pos-- == 0) { 1784 + if (net_eq(svc->net, net) && pos-- == 0) { 1793 1785 iter->table = ip_vs_svc_fwm_table; 1794 1786 iter->bucket = idx; 1795 1787 return svc; ··· 1943 1935 1944 1936 static int ip_vs_info_open(struct inode *inode, struct file *file) 1945 1937 { 1946 - return seq_open_private(file, &ip_vs_info_seq_ops, 1938 + return seq_open_net(inode, file, &ip_vs_info_seq_ops, 1947 1939 sizeof(struct ip_vs_iter)); 1948 1940 } 1949 1941 ··· 1957 1949 1958 1950 #endif 1959 1951 1960 - struct ip_vs_stats ip_vs_stats = { 1961 - .lock = __SPIN_LOCK_UNLOCKED(ip_vs_stats.lock), 1962 - }; 1963 - 1964 1952 #ifdef CONFIG_PROC_FS 1965 1953 static int ip_vs_stats_show(struct seq_file *seq, void *v) 1966 1954 { 1955 + struct net *net = seq_file_single_net(seq); 1956 + struct ip_vs_stats *tot_stats = net_ipvs(net)->tot_stats; 1967 1957 1968 1958 /* 01234567 01234567 01234567 0123456701234567 0123456701234567 */ 1969 1959 seq_puts(seq, ··· 1969 1963 seq_printf(seq, 1970 1964 " Conns Packets Packets Bytes Bytes\n"); 1971 1965 1972 - spin_lock_bh(&ip_vs_stats.lock); 1973 - seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", ip_vs_stats.ustats.conns, 1974 - ip_vs_stats.ustats.inpkts, ip_vs_stats.ustats.outpkts, 1975 - (unsigned long long) ip_vs_stats.ustats.inbytes, 1976 - (unsigned long long) ip_vs_stats.ustats.outbytes); 1966 + spin_lock_bh(&tot_stats->lock); 1967 + seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", tot_stats->ustats.conns, 1968 + tot_stats->ustats.inpkts, tot_stats->ustats.outpkts, 1969 + (unsigned long long) tot_stats->ustats.inbytes, 1970 + (unsigned long long) tot_stats->ustats.outbytes); 1977 1971 1978 1972 /* 01234567 01234567 01234567 0123456701234567 0123456701234567 */ 1979 1973 seq_puts(seq, 1980 1974 " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n"); 1981 1975 seq_printf(seq,"%8X %8X %8X %16X %16X\n", 1982 - ip_vs_stats.ustats.cps, 1983 - ip_vs_stats.ustats.inpps, 1984 - ip_vs_stats.ustats.outpps, 1985 - ip_vs_stats.ustats.inbps, 1986 - ip_vs_stats.ustats.outbps); 1987 - spin_unlock_bh(&ip_vs_stats.lock); 1976 + tot_stats->ustats.cps, 1977 + tot_stats->ustats.inpps, 1978 + tot_stats->ustats.outpps, 1979 + tot_stats->ustats.inbps, 1980 + tot_stats->ustats.outbps); 1981 + spin_unlock_bh(&tot_stats->lock); 1988 1982 1989 1983 return 0; 1990 1984 } 1991 1985 1992 1986 static int ip_vs_stats_seq_open(struct inode *inode, struct file *file) 1993 1987 { 1994 - return single_open(file, ip_vs_stats_show, NULL); 1988 + return single_open_net(inode, file, ip_vs_stats_show); 1995 1989 } 1996 1990 1997 1991 static const struct file_operations ip_vs_stats_fops = { ··· 2002 1996 .release = single_release, 2003 1997 }; 2004 1998 1999 + static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v) 2000 + { 2001 + struct net *net = seq_file_single_net(seq); 2002 + struct ip_vs_stats *tot_stats = net_ipvs(net)->tot_stats; 2003 + int i; 2004 + 2005 + /* 01234567 01234567 01234567 0123456701234567 0123456701234567 */ 2006 + seq_puts(seq, 2007 + " Total Incoming Outgoing Incoming Outgoing\n"); 2008 + seq_printf(seq, 2009 + "CPU Conns Packets Packets Bytes Bytes\n"); 2010 + 2011 + for_each_possible_cpu(i) { 2012 + struct ip_vs_cpu_stats *u = per_cpu_ptr(net->ipvs->cpustats, i); 2013 + seq_printf(seq, "%3X %8X %8X %8X %16LX %16LX\n", 2014 + i, u->ustats.conns, u->ustats.inpkts, 2015 + u->ustats.outpkts, (__u64)u->ustats.inbytes, 2016 + (__u64)u->ustats.outbytes); 2017 + } 2018 + 2019 + spin_lock_bh(&tot_stats->lock); 2020 + seq_printf(seq, " ~ %8X %8X %8X %16LX %16LX\n\n", 2021 + tot_stats->ustats.conns, tot_stats->ustats.inpkts, 2022 + tot_stats->ustats.outpkts, 2023 + (unsigned long long) tot_stats->ustats.inbytes, 2024 + (unsigned long long) tot_stats->ustats.outbytes); 2025 + 2026 + /* 01234567 01234567 01234567 0123456701234567 0123456701234567 */ 2027 + seq_puts(seq, 2028 + " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n"); 2029 + seq_printf(seq, " %8X %8X %8X %16X %16X\n", 2030 + tot_stats->ustats.cps, 2031 + tot_stats->ustats.inpps, 2032 + tot_stats->ustats.outpps, 2033 + tot_stats->ustats.inbps, 2034 + tot_stats->ustats.outbps); 2035 + spin_unlock_bh(&tot_stats->lock); 2036 + 2037 + return 0; 2038 + } 2039 + 2040 + static int ip_vs_stats_percpu_seq_open(struct inode *inode, struct file *file) 2041 + { 2042 + return single_open_net(inode, file, ip_vs_stats_percpu_show); 2043 + } 2044 + 2045 + static const struct file_operations ip_vs_stats_percpu_fops = { 2046 + .owner = THIS_MODULE, 2047 + .open = ip_vs_stats_percpu_seq_open, 2048 + .read = seq_read, 2049 + .llseek = seq_lseek, 2050 + .release = single_release, 2051 + }; 2005 2052 #endif 2006 2053 2007 2054 /* 2008 2055 * Set timeout values for tcp tcpfin udp in the timeout_table. 2009 2056 */ 2010 - static int ip_vs_set_timeout(struct ip_vs_timeout_user *u) 2057 + static int ip_vs_set_timeout(struct net *net, struct ip_vs_timeout_user *u) 2011 2058 { 2059 + struct ip_vs_proto_data *pd; 2060 + 2012 2061 IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n", 2013 2062 u->tcp_timeout, 2014 2063 u->tcp_fin_timeout, ··· 2071 2010 2072 2011 #ifdef CONFIG_IP_VS_PROTO_TCP 2073 2012 if (u->tcp_timeout) { 2074 - ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED] 2013 + pd = ip_vs_proto_data_get(net, IPPROTO_TCP); 2014 + pd->timeout_table[IP_VS_TCP_S_ESTABLISHED] 2075 2015 = u->tcp_timeout * HZ; 2076 2016 } 2077 2017 2078 2018 if (u->tcp_fin_timeout) { 2079 - ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT] 2019 + pd = ip_vs_proto_data_get(net, IPPROTO_TCP); 2020 + pd->timeout_table[IP_VS_TCP_S_FIN_WAIT] 2080 2021 = u->tcp_fin_timeout * HZ; 2081 2022 } 2082 2023 #endif 2083 2024 2084 2025 #ifdef CONFIG_IP_VS_PROTO_UDP 2085 2026 if (u->udp_timeout) { 2086 - ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL] 2027 + pd = ip_vs_proto_data_get(net, IPPROTO_UDP); 2028 + pd->timeout_table[IP_VS_UDP_S_NORMAL] 2087 2029 = u->udp_timeout * HZ; 2088 2030 } 2089 2031 #endif ··· 2151 2087 static int 2152 2088 do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) 2153 2089 { 2090 + struct net *net = sock_net(sk); 2154 2091 int ret; 2155 2092 unsigned char arg[MAX_ARG_LEN]; 2156 2093 struct ip_vs_service_user *usvc_compat; ··· 2186 2121 2187 2122 if (cmd == IP_VS_SO_SET_FLUSH) { 2188 2123 /* Flush the virtual service */ 2189 - ret = ip_vs_flush(); 2124 + ret = ip_vs_flush(net); 2190 2125 goto out_unlock; 2191 2126 } else if (cmd == IP_VS_SO_SET_TIMEOUT) { 2192 2127 /* Set timeout values for (tcp tcpfin udp) */ 2193 - ret = ip_vs_set_timeout((struct ip_vs_timeout_user *)arg); 2128 + ret = ip_vs_set_timeout(net, (struct ip_vs_timeout_user *)arg); 2194 2129 goto out_unlock; 2195 2130 } else if (cmd == IP_VS_SO_SET_STARTDAEMON) { 2196 2131 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg; 2197 - ret = start_sync_thread(dm->state, dm->mcast_ifn, dm->syncid); 2132 + ret = start_sync_thread(net, dm->state, dm->mcast_ifn, 2133 + dm->syncid); 2198 2134 goto out_unlock; 2199 2135 } else if (cmd == IP_VS_SO_SET_STOPDAEMON) { 2200 2136 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg; 2201 - ret = stop_sync_thread(dm->state); 2137 + ret = stop_sync_thread(net, dm->state); 2202 2138 goto out_unlock; 2203 2139 } 2204 2140 ··· 2214 2148 if (cmd == IP_VS_SO_SET_ZERO) { 2215 2149 /* if no service address is set, zero counters in all */ 2216 2150 if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) { 2217 - ret = ip_vs_zero_all(); 2151 + ret = ip_vs_zero_all(net); 2218 2152 goto out_unlock; 2219 2153 } 2220 2154 } ··· 2231 2165 2232 2166 /* Lookup the exact service by <protocol, addr, port> or fwmark */ 2233 2167 if (usvc.fwmark == 0) 2234 - svc = __ip_vs_service_find(usvc.af, usvc.protocol, 2168 + svc = __ip_vs_service_find(net, usvc.af, usvc.protocol, 2235 2169 &usvc.addr, usvc.port); 2236 2170 else 2237 - svc = __ip_vs_svc_fwm_find(usvc.af, usvc.fwmark); 2171 + svc = __ip_vs_svc_fwm_find(net, usvc.af, usvc.fwmark); 2238 2172 2239 2173 if (cmd != IP_VS_SO_SET_ADD 2240 2174 && (svc == NULL || svc->protocol != usvc.protocol)) { ··· 2247 2181 if (svc != NULL) 2248 2182 ret = -EEXIST; 2249 2183 else 2250 - ret = ip_vs_add_service(&usvc, &svc); 2184 + ret = ip_vs_add_service(net, &usvc, &svc); 2251 2185 break; 2252 2186 case IP_VS_SO_SET_EDIT: 2253 2187 ret = ip_vs_edit_service(svc, &usvc); ··· 2307 2241 } 2308 2242 2309 2243 static inline int 2310 - __ip_vs_get_service_entries(const struct ip_vs_get_services *get, 2244 + __ip_vs_get_service_entries(struct net *net, 2245 + const struct ip_vs_get_services *get, 2311 2246 struct ip_vs_get_services __user *uptr) 2312 2247 { 2313 2248 int idx, count=0; ··· 2319 2252 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 2320 2253 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { 2321 2254 /* Only expose IPv4 entries to old interface */ 2322 - if (svc->af != AF_INET) 2255 + if (svc->af != AF_INET || !net_eq(svc->net, net)) 2323 2256 continue; 2324 2257 2325 2258 if (count >= get->num_services) ··· 2338 2271 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 2339 2272 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { 2340 2273 /* Only expose IPv4 entries to old interface */ 2341 - if (svc->af != AF_INET) 2274 + if (svc->af != AF_INET || !net_eq(svc->net, net)) 2342 2275 continue; 2343 2276 2344 2277 if (count >= get->num_services) ··· 2358 2291 } 2359 2292 2360 2293 static inline int 2361 - __ip_vs_get_dest_entries(const struct ip_vs_get_dests *get, 2294 + __ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get, 2362 2295 struct ip_vs_get_dests __user *uptr) 2363 2296 { 2364 2297 struct ip_vs_service *svc; ··· 2366 2299 int ret = 0; 2367 2300 2368 2301 if (get->fwmark) 2369 - svc = __ip_vs_svc_fwm_find(AF_INET, get->fwmark); 2302 + svc = __ip_vs_svc_fwm_find(net, AF_INET, get->fwmark); 2370 2303 else 2371 - svc = __ip_vs_service_find(AF_INET, get->protocol, &addr, 2304 + svc = __ip_vs_service_find(net, AF_INET, get->protocol, &addr, 2372 2305 get->port); 2373 2306 2374 2307 if (svc) { ··· 2403 2336 } 2404 2337 2405 2338 static inline void 2406 - __ip_vs_get_timeouts(struct ip_vs_timeout_user *u) 2339 + __ip_vs_get_timeouts(struct net *net, struct ip_vs_timeout_user *u) 2407 2340 { 2341 + struct ip_vs_proto_data *pd; 2342 + 2408 2343 #ifdef CONFIG_IP_VS_PROTO_TCP 2409 - u->tcp_timeout = 2410 - ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ; 2411 - u->tcp_fin_timeout = 2412 - ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ; 2344 + pd = ip_vs_proto_data_get(net, IPPROTO_TCP); 2345 + u->tcp_timeout = pd->timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ; 2346 + u->tcp_fin_timeout = pd->timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ; 2413 2347 #endif 2414 2348 #ifdef CONFIG_IP_VS_PROTO_UDP 2349 + pd = ip_vs_proto_data_get(net, IPPROTO_UDP); 2415 2350 u->udp_timeout = 2416 - ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL] / HZ; 2351 + pd->timeout_table[IP_VS_UDP_S_NORMAL] / HZ; 2417 2352 #endif 2418 2353 } 2419 2354 ··· 2444 2375 unsigned char arg[128]; 2445 2376 int ret = 0; 2446 2377 unsigned int copylen; 2378 + struct net *net = sock_net(sk); 2379 + struct netns_ipvs *ipvs = net_ipvs(net); 2447 2380 2381 + BUG_ON(!net); 2448 2382 if (!capable(CAP_NET_ADMIN)) 2449 2383 return -EPERM; 2450 2384 ··· 2490 2418 struct ip_vs_getinfo info; 2491 2419 info.version = IP_VS_VERSION_CODE; 2492 2420 info.size = ip_vs_conn_tab_size; 2493 - info.num_services = ip_vs_num_services; 2421 + info.num_services = ipvs->num_services; 2494 2422 if (copy_to_user(user, &info, sizeof(info)) != 0) 2495 2423 ret = -EFAULT; 2496 2424 } ··· 2509 2437 ret = -EINVAL; 2510 2438 goto out; 2511 2439 } 2512 - ret = __ip_vs_get_service_entries(get, user); 2440 + ret = __ip_vs_get_service_entries(net, get, user); 2513 2441 } 2514 2442 break; 2515 2443 ··· 2522 2450 entry = (struct ip_vs_service_entry *)arg; 2523 2451 addr.ip = entry->addr; 2524 2452 if (entry->fwmark) 2525 - svc = __ip_vs_svc_fwm_find(AF_INET, entry->fwmark); 2453 + svc = __ip_vs_svc_fwm_find(net, AF_INET, entry->fwmark); 2526 2454 else 2527 - svc = __ip_vs_service_find(AF_INET, entry->protocol, 2528 - &addr, entry->port); 2455 + svc = __ip_vs_service_find(net, AF_INET, 2456 + entry->protocol, &addr, 2457 + entry->port); 2529 2458 if (svc) { 2530 2459 ip_vs_copy_service(entry, svc); 2531 2460 if (copy_to_user(user, entry, sizeof(*entry)) != 0) ··· 2549 2476 ret = -EINVAL; 2550 2477 goto out; 2551 2478 } 2552 - ret = __ip_vs_get_dest_entries(get, user); 2479 + ret = __ip_vs_get_dest_entries(net, get, user); 2553 2480 } 2554 2481 break; 2555 2482 ··· 2557 2484 { 2558 2485 struct ip_vs_timeout_user t; 2559 2486 2560 - __ip_vs_get_timeouts(&t); 2487 + __ip_vs_get_timeouts(net, &t); 2561 2488 if (copy_to_user(user, &t, sizeof(t)) != 0) 2562 2489 ret = -EFAULT; 2563 2490 } ··· 2568 2495 struct ip_vs_daemon_user d[2]; 2569 2496 2570 2497 memset(&d, 0, sizeof(d)); 2571 - if (ip_vs_sync_state & IP_VS_STATE_MASTER) { 2498 + if (ipvs->sync_state & IP_VS_STATE_MASTER) { 2572 2499 d[0].state = IP_VS_STATE_MASTER; 2573 - strlcpy(d[0].mcast_ifn, ip_vs_master_mcast_ifn, sizeof(d[0].mcast_ifn)); 2574 - d[0].syncid = ip_vs_master_syncid; 2500 + strlcpy(d[0].mcast_ifn, ipvs->master_mcast_ifn, 2501 + sizeof(d[0].mcast_ifn)); 2502 + d[0].syncid = ipvs->master_syncid; 2575 2503 } 2576 - if (ip_vs_sync_state & IP_VS_STATE_BACKUP) { 2504 + if (ipvs->sync_state & IP_VS_STATE_BACKUP) { 2577 2505 d[1].state = IP_VS_STATE_BACKUP; 2578 - strlcpy(d[1].mcast_ifn, ip_vs_backup_mcast_ifn, sizeof(d[1].mcast_ifn)); 2579 - d[1].syncid = ip_vs_backup_syncid; 2506 + strlcpy(d[1].mcast_ifn, ipvs->backup_mcast_ifn, 2507 + sizeof(d[1].mcast_ifn)); 2508 + d[1].syncid = ipvs->backup_syncid; 2580 2509 } 2581 2510 if (copy_to_user(user, &d, sizeof(d)) != 0) 2582 2511 ret = -EFAULT; ··· 2617 2542 .name = IPVS_GENL_NAME, 2618 2543 .version = IPVS_GENL_VERSION, 2619 2544 .maxattr = IPVS_CMD_MAX, 2545 + .netnsok = true, /* Make ipvsadm to work on netns */ 2620 2546 }; 2621 2547 2622 2548 /* Policy used for first-level command attributes */ ··· 2772 2696 int idx = 0, i; 2773 2697 int start = cb->args[0]; 2774 2698 struct ip_vs_service *svc; 2699 + struct net *net = skb_sknet(skb); 2775 2700 2776 2701 mutex_lock(&__ip_vs_mutex); 2777 2702 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) { 2778 2703 list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) { 2779 - if (++idx <= start) 2704 + if (++idx <= start || !net_eq(svc->net, net)) 2780 2705 continue; 2781 2706 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) { 2782 2707 idx--; ··· 2788 2711 2789 2712 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) { 2790 2713 list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) { 2791 - if (++idx <= start) 2714 + if (++idx <= start || !net_eq(svc->net, net)) 2792 2715 continue; 2793 2716 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) { 2794 2717 idx--; ··· 2804 2727 return skb->len; 2805 2728 } 2806 2729 2807 - static int ip_vs_genl_parse_service(struct ip_vs_service_user_kern *usvc, 2730 + static int ip_vs_genl_parse_service(struct net *net, 2731 + struct ip_vs_service_user_kern *usvc, 2808 2732 struct nlattr *nla, int full_entry, 2809 2733 struct ip_vs_service **ret_svc) 2810 2734 { ··· 2848 2770 } 2849 2771 2850 2772 if (usvc->fwmark) 2851 - svc = __ip_vs_svc_fwm_find(usvc->af, usvc->fwmark); 2773 + svc = __ip_vs_svc_fwm_find(net, usvc->af, usvc->fwmark); 2852 2774 else 2853 - svc = __ip_vs_service_find(usvc->af, usvc->protocol, 2775 + svc = __ip_vs_service_find(net, usvc->af, usvc->protocol, 2854 2776 &usvc->addr, usvc->port); 2855 2777 *ret_svc = svc; 2856 2778 ··· 2887 2809 return 0; 2888 2810 } 2889 2811 2890 - static struct ip_vs_service *ip_vs_genl_find_service(struct nlattr *nla) 2812 + static struct ip_vs_service *ip_vs_genl_find_service(struct net *net, 2813 + struct nlattr *nla) 2891 2814 { 2892 2815 struct ip_vs_service_user_kern usvc; 2893 2816 struct ip_vs_service *svc; 2894 2817 int ret; 2895 2818 2896 - ret = ip_vs_genl_parse_service(&usvc, nla, 0, &svc); 2819 + ret = ip_vs_genl_parse_service(net, &usvc, nla, 0, &svc); 2897 2820 return ret ? ERR_PTR(ret) : svc; 2898 2821 } 2899 2822 ··· 2962 2883 struct ip_vs_service *svc; 2963 2884 struct ip_vs_dest *dest; 2964 2885 struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1]; 2886 + struct net *net = skb_sknet(skb); 2965 2887 2966 2888 mutex_lock(&__ip_vs_mutex); 2967 2889 ··· 2971 2891 IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy)) 2972 2892 goto out_err; 2973 2893 2974 - svc = ip_vs_genl_find_service(attrs[IPVS_CMD_ATTR_SERVICE]); 2894 + 2895 + svc = ip_vs_genl_find_service(net, attrs[IPVS_CMD_ATTR_SERVICE]); 2975 2896 if (IS_ERR(svc) || svc == NULL) 2976 2897 goto out_err; 2977 2898 ··· 3086 3005 static int ip_vs_genl_dump_daemons(struct sk_buff *skb, 3087 3006 struct netlink_callback *cb) 3088 3007 { 3008 + struct net *net = skb_net(skb); 3009 + struct netns_ipvs *ipvs = net_ipvs(net); 3010 + 3089 3011 mutex_lock(&__ip_vs_mutex); 3090 - if ((ip_vs_sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) { 3012 + if ((ipvs->sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) { 3091 3013 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER, 3092 - ip_vs_master_mcast_ifn, 3093 - ip_vs_master_syncid, cb) < 0) 3014 + ipvs->master_mcast_ifn, 3015 + ipvs->master_syncid, cb) < 0) 3094 3016 goto nla_put_failure; 3095 3017 3096 3018 cb->args[0] = 1; 3097 3019 } 3098 3020 3099 - if ((ip_vs_sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) { 3021 + if ((ipvs->sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) { 3100 3022 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP, 3101 - ip_vs_backup_mcast_ifn, 3102 - ip_vs_backup_syncid, cb) < 0) 3023 + ipvs->backup_mcast_ifn, 3024 + ipvs->backup_syncid, cb) < 0) 3103 3025 goto nla_put_failure; 3104 3026 3105 3027 cb->args[1] = 1; ··· 3114 3030 return skb->len; 3115 3031 } 3116 3032 3117 - static int ip_vs_genl_new_daemon(struct nlattr **attrs) 3033 + static int ip_vs_genl_new_daemon(struct net *net, struct nlattr **attrs) 3118 3034 { 3119 3035 if (!(attrs[IPVS_DAEMON_ATTR_STATE] && 3120 3036 attrs[IPVS_DAEMON_ATTR_MCAST_IFN] && 3121 3037 attrs[IPVS_DAEMON_ATTR_SYNC_ID])) 3122 3038 return -EINVAL; 3123 3039 3124 - return start_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]), 3040 + return start_sync_thread(net, 3041 + nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]), 3125 3042 nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]), 3126 3043 nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID])); 3127 3044 } 3128 3045 3129 - static int ip_vs_genl_del_daemon(struct nlattr **attrs) 3046 + static int ip_vs_genl_del_daemon(struct net *net, struct nlattr **attrs) 3130 3047 { 3131 3048 if (!attrs[IPVS_DAEMON_ATTR_STATE]) 3132 3049 return -EINVAL; 3133 3050 3134 - return stop_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE])); 3051 + return stop_sync_thread(net, 3052 + nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE])); 3135 3053 } 3136 3054 3137 - static int ip_vs_genl_set_config(struct nlattr **attrs) 3055 + static int ip_vs_genl_set_config(struct net *net, struct nlattr **attrs) 3138 3056 { 3139 3057 struct ip_vs_timeout_user t; 3140 3058 3141 - __ip_vs_get_timeouts(&t); 3059 + __ip_vs_get_timeouts(net, &t); 3142 3060 3143 3061 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]) 3144 3062 t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]); ··· 3152 3066 if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]) 3153 3067 t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]); 3154 3068 3155 - return ip_vs_set_timeout(&t); 3069 + return ip_vs_set_timeout(net, &t); 3156 3070 } 3157 3071 3158 3072 static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info) ··· 3162 3076 struct ip_vs_dest_user_kern udest; 3163 3077 int ret = 0, cmd; 3164 3078 int need_full_svc = 0, need_full_dest = 0; 3079 + struct net *net; 3080 + struct netns_ipvs *ipvs; 3165 3081 3082 + net = skb_sknet(skb); 3083 + ipvs = net_ipvs(net); 3166 3084 cmd = info->genlhdr->cmd; 3167 3085 3168 3086 mutex_lock(&__ip_vs_mutex); 3169 3087 3170 3088 if (cmd == IPVS_CMD_FLUSH) { 3171 - ret = ip_vs_flush(); 3089 + ret = ip_vs_flush(net); 3172 3090 goto out; 3173 3091 } else if (cmd == IPVS_CMD_SET_CONFIG) { 3174 - ret = ip_vs_genl_set_config(info->attrs); 3092 + ret = ip_vs_genl_set_config(net, info->attrs); 3175 3093 goto out; 3176 3094 } else if (cmd == IPVS_CMD_NEW_DAEMON || 3177 3095 cmd == IPVS_CMD_DEL_DAEMON) { ··· 3191 3101 } 3192 3102 3193 3103 if (cmd == IPVS_CMD_NEW_DAEMON) 3194 - ret = ip_vs_genl_new_daemon(daemon_attrs); 3104 + ret = ip_vs_genl_new_daemon(net, daemon_attrs); 3195 3105 else 3196 - ret = ip_vs_genl_del_daemon(daemon_attrs); 3106 + ret = ip_vs_genl_del_daemon(net, daemon_attrs); 3197 3107 goto out; 3198 3108 } else if (cmd == IPVS_CMD_ZERO && 3199 3109 !info->attrs[IPVS_CMD_ATTR_SERVICE]) { 3200 - ret = ip_vs_zero_all(); 3110 + ret = ip_vs_zero_all(net); 3201 3111 goto out; 3202 3112 } 3203 3113 ··· 3207 3117 if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE) 3208 3118 need_full_svc = 1; 3209 3119 3210 - ret = ip_vs_genl_parse_service(&usvc, 3120 + ret = ip_vs_genl_parse_service(net, &usvc, 3211 3121 info->attrs[IPVS_CMD_ATTR_SERVICE], 3212 3122 need_full_svc, &svc); 3213 3123 if (ret) ··· 3237 3147 switch (cmd) { 3238 3148 case IPVS_CMD_NEW_SERVICE: 3239 3149 if (svc == NULL) 3240 - ret = ip_vs_add_service(&usvc, &svc); 3150 + ret = ip_vs_add_service(net, &usvc, &svc); 3241 3151 else 3242 3152 ret = -EEXIST; 3243 3153 break; ··· 3275 3185 struct sk_buff *msg; 3276 3186 void *reply; 3277 3187 int ret, cmd, reply_cmd; 3188 + struct net *net; 3189 + struct netns_ipvs *ipvs; 3278 3190 3191 + net = skb_sknet(skb); 3192 + ipvs = net_ipvs(net); 3279 3193 cmd = info->genlhdr->cmd; 3280 3194 3281 3195 if (cmd == IPVS_CMD_GET_SERVICE) ··· 3308 3214 { 3309 3215 struct ip_vs_service *svc; 3310 3216 3311 - svc = ip_vs_genl_find_service(info->attrs[IPVS_CMD_ATTR_SERVICE]); 3217 + svc = ip_vs_genl_find_service(net, 3218 + info->attrs[IPVS_CMD_ATTR_SERVICE]); 3312 3219 if (IS_ERR(svc)) { 3313 3220 ret = PTR_ERR(svc); 3314 3221 goto out_err; ··· 3329 3234 { 3330 3235 struct ip_vs_timeout_user t; 3331 3236 3332 - __ip_vs_get_timeouts(&t); 3237 + __ip_vs_get_timeouts(net, &t); 3333 3238 #ifdef CONFIG_IP_VS_PROTO_TCP 3334 3239 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, t.tcp_timeout); 3335 3240 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN, ··· 3475 3380 3476 3381 /* End of Generic Netlink interface definitions */ 3477 3382 3383 + /* 3384 + * per netns intit/exit func. 3385 + */ 3386 + int __net_init __ip_vs_control_init(struct net *net) 3387 + { 3388 + int idx; 3389 + struct netns_ipvs *ipvs = net_ipvs(net); 3390 + struct ctl_table *tbl; 3391 + 3392 + atomic_set(&ipvs->dropentry, 0); 3393 + spin_lock_init(&ipvs->dropentry_lock); 3394 + spin_lock_init(&ipvs->droppacket_lock); 3395 + spin_lock_init(&ipvs->securetcp_lock); 3396 + ipvs->rs_lock = __RW_LOCK_UNLOCKED(ipvs->rs_lock); 3397 + 3398 + /* Initialize rs_table */ 3399 + for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++) 3400 + INIT_LIST_HEAD(&ipvs->rs_table[idx]); 3401 + 3402 + INIT_LIST_HEAD(&ipvs->dest_trash); 3403 + atomic_set(&ipvs->ftpsvc_counter, 0); 3404 + atomic_set(&ipvs->nullsvc_counter, 0); 3405 + 3406 + /* procfs stats */ 3407 + ipvs->tot_stats = kzalloc(sizeof(struct ip_vs_stats), GFP_KERNEL); 3408 + if (ipvs->tot_stats == NULL) { 3409 + pr_err("%s(): no memory.\n", __func__); 3410 + return -ENOMEM; 3411 + } 3412 + ipvs->cpustats = alloc_percpu(struct ip_vs_cpu_stats); 3413 + if (!ipvs->cpustats) { 3414 + pr_err("%s() alloc_percpu failed\n", __func__); 3415 + goto err_alloc; 3416 + } 3417 + spin_lock_init(&ipvs->tot_stats->lock); 3418 + 3419 + for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++) 3420 + INIT_LIST_HEAD(&ipvs->rs_table[idx]); 3421 + 3422 + proc_net_fops_create(net, "ip_vs", 0, &ip_vs_info_fops); 3423 + proc_net_fops_create(net, "ip_vs_stats", 0, &ip_vs_stats_fops); 3424 + proc_net_fops_create(net, "ip_vs_stats_percpu", 0, 3425 + &ip_vs_stats_percpu_fops); 3426 + 3427 + if (!net_eq(net, &init_net)) { 3428 + tbl = kmemdup(vs_vars, sizeof(vs_vars), GFP_KERNEL); 3429 + if (tbl == NULL) 3430 + goto err_dup; 3431 + } else 3432 + tbl = vs_vars; 3433 + /* Initialize sysctl defaults */ 3434 + idx = 0; 3435 + ipvs->sysctl_amemthresh = 1024; 3436 + tbl[idx++].data = &ipvs->sysctl_amemthresh; 3437 + ipvs->sysctl_am_droprate = 10; 3438 + tbl[idx++].data = &ipvs->sysctl_am_droprate; 3439 + tbl[idx++].data = &ipvs->sysctl_drop_entry; 3440 + tbl[idx++].data = &ipvs->sysctl_drop_packet; 3441 + #ifdef CONFIG_IP_VS_NFCT 3442 + tbl[idx++].data = &ipvs->sysctl_conntrack; 3443 + #endif 3444 + tbl[idx++].data = &ipvs->sysctl_secure_tcp; 3445 + ipvs->sysctl_snat_reroute = 1; 3446 + tbl[idx++].data = &ipvs->sysctl_snat_reroute; 3447 + ipvs->sysctl_sync_ver = 1; 3448 + tbl[idx++].data = &ipvs->sysctl_sync_ver; 3449 + tbl[idx++].data = &ipvs->sysctl_cache_bypass; 3450 + tbl[idx++].data = &ipvs->sysctl_expire_nodest_conn; 3451 + tbl[idx++].data = &ipvs->sysctl_expire_quiescent_template; 3452 + ipvs->sysctl_sync_threshold[0] = 3; 3453 + ipvs->sysctl_sync_threshold[1] = 50; 3454 + tbl[idx].data = &ipvs->sysctl_sync_threshold; 3455 + tbl[idx++].maxlen = sizeof(ipvs->sysctl_sync_threshold); 3456 + tbl[idx++].data = &ipvs->sysctl_nat_icmp_send; 3457 + 3458 + 3459 + ipvs->sysctl_hdr = register_net_sysctl_table(net, net_vs_ctl_path, 3460 + vs_vars); 3461 + if (ipvs->sysctl_hdr == NULL) 3462 + goto err_reg; 3463 + ip_vs_new_estimator(net, ipvs->tot_stats); 3464 + ipvs->sysctl_tbl = tbl; 3465 + /* Schedule defense work */ 3466 + INIT_DELAYED_WORK(&ipvs->defense_work, defense_work_handler); 3467 + schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD); 3468 + return 0; 3469 + 3470 + err_reg: 3471 + if (!net_eq(net, &init_net)) 3472 + kfree(tbl); 3473 + err_dup: 3474 + free_percpu(ipvs->cpustats); 3475 + err_alloc: 3476 + kfree(ipvs->tot_stats); 3477 + return -ENOMEM; 3478 + } 3479 + 3480 + static void __net_exit __ip_vs_control_cleanup(struct net *net) 3481 + { 3482 + struct netns_ipvs *ipvs = net_ipvs(net); 3483 + 3484 + ip_vs_trash_cleanup(net); 3485 + ip_vs_kill_estimator(net, ipvs->tot_stats); 3486 + cancel_delayed_work_sync(&ipvs->defense_work); 3487 + cancel_work_sync(&ipvs->defense_work.work); 3488 + unregister_net_sysctl_table(ipvs->sysctl_hdr); 3489 + proc_net_remove(net, "ip_vs_stats_percpu"); 3490 + proc_net_remove(net, "ip_vs_stats"); 3491 + proc_net_remove(net, "ip_vs"); 3492 + free_percpu(ipvs->cpustats); 3493 + kfree(ipvs->tot_stats); 3494 + } 3495 + 3496 + static struct pernet_operations ipvs_control_ops = { 3497 + .init = __ip_vs_control_init, 3498 + .exit = __ip_vs_control_cleanup, 3499 + }; 3478 3500 3479 3501 int __init ip_vs_control_init(void) 3480 3502 { 3481 - int ret; 3482 3503 int idx; 3504 + int ret; 3483 3505 3484 3506 EnterFunction(2); 3485 3507 3486 - /* Initialize ip_vs_svc_table, ip_vs_svc_fwm_table, ip_vs_rtable */ 3508 + /* Initialize svc_table, ip_vs_svc_fwm_table, rs_table */ 3487 3509 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 3488 3510 INIT_LIST_HEAD(&ip_vs_svc_table[idx]); 3489 3511 INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]); 3490 3512 } 3491 - for(idx = 0; idx < IP_VS_RTAB_SIZE; idx++) { 3492 - INIT_LIST_HEAD(&ip_vs_rtable[idx]); 3513 + 3514 + ret = register_pernet_subsys(&ipvs_control_ops); 3515 + if (ret) { 3516 + pr_err("cannot register namespace.\n"); 3517 + goto err; 3493 3518 } 3494 - smp_wmb(); 3519 + 3520 + smp_wmb(); /* Do we really need it now ? */ 3495 3521 3496 3522 ret = nf_register_sockopt(&ip_vs_sockopts); 3497 3523 if (ret) { 3498 3524 pr_err("cannot register sockopt.\n"); 3499 - return ret; 3525 + goto err_net; 3500 3526 } 3501 3527 3502 3528 ret = ip_vs_genl_register(); 3503 3529 if (ret) { 3504 3530 pr_err("cannot register Generic Netlink interface.\n"); 3505 3531 nf_unregister_sockopt(&ip_vs_sockopts); 3506 - return ret; 3532 + goto err_net; 3507 3533 } 3508 - 3509 - proc_net_fops_create(&init_net, "ip_vs", 0, &ip_vs_info_fops); 3510 - proc_net_fops_create(&init_net, "ip_vs_stats",0, &ip_vs_stats_fops); 3511 - 3512 - sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars); 3513 - 3514 - ip_vs_new_estimator(&ip_vs_stats); 3515 - 3516 - /* Hook the defense timer */ 3517 - schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD); 3518 3534 3519 3535 LeaveFunction(2); 3520 3536 return 0; 3537 + 3538 + err_net: 3539 + unregister_pernet_subsys(&ipvs_control_ops); 3540 + err: 3541 + return ret; 3521 3542 } 3522 3543 3523 3544 3524 3545 void ip_vs_control_cleanup(void) 3525 3546 { 3526 3547 EnterFunction(2); 3527 - ip_vs_trash_cleanup(); 3528 - cancel_delayed_work_sync(&defense_work); 3529 - cancel_work_sync(&defense_work.work); 3530 - ip_vs_kill_estimator(&ip_vs_stats); 3531 - unregister_sysctl_table(sysctl_header); 3532 - proc_net_remove(&init_net, "ip_vs_stats"); 3533 - proc_net_remove(&init_net, "ip_vs"); 3548 + unregister_pernet_subsys(&ipvs_control_ops); 3534 3549 ip_vs_genl_unregister(); 3535 3550 nf_unregister_sockopt(&ip_vs_sockopts); 3536 3551 LeaveFunction(2);

+99 -35

net/netfilter/ipvs/ip_vs_est.c

··· 8 8 * as published by the Free Software Foundation; either version 9 9 * 2 of the License, or (at your option) any later version. 10 10 * 11 - * Changes: 12 - * 11 + * Changes: Hans Schillstrom <hans.schillstrom@ericsson.com> 12 + * Network name space (netns) aware. 13 + * Global data moved to netns i.e struct netns_ipvs 14 + * Affected data: est_list and est_lock. 15 + * estimation_timer() runs with timer per netns. 16 + * get_stats()) do the per cpu summing. 13 17 */ 14 18 15 19 #define KMSG_COMPONENT "IPVS" ··· 52 48 */ 53 49 54 50 55 - static void estimation_timer(unsigned long arg); 51 + /* 52 + * Make a summary from each cpu 53 + */ 54 + static void ip_vs_read_cpu_stats(struct ip_vs_stats_user *sum, 55 + struct ip_vs_cpu_stats *stats) 56 + { 57 + int i; 56 58 57 - static LIST_HEAD(est_list); 58 - static DEFINE_SPINLOCK(est_lock); 59 - static DEFINE_TIMER(est_timer, estimation_timer, 0, 0); 59 + for_each_possible_cpu(i) { 60 + struct ip_vs_cpu_stats *s = per_cpu_ptr(stats, i); 61 + unsigned int start; 62 + __u64 inbytes, outbytes; 63 + if (i) { 64 + sum->conns += s->ustats.conns; 65 + sum->inpkts += s->ustats.inpkts; 66 + sum->outpkts += s->ustats.outpkts; 67 + do { 68 + start = u64_stats_fetch_begin_bh(&s->syncp); 69 + inbytes = s->ustats.inbytes; 70 + outbytes = s->ustats.outbytes; 71 + } while (u64_stats_fetch_retry_bh(&s->syncp, start)); 72 + sum->inbytes += inbytes; 73 + sum->outbytes += outbytes; 74 + } else { 75 + sum->conns = s->ustats.conns; 76 + sum->inpkts = s->ustats.inpkts; 77 + sum->outpkts = s->ustats.outpkts; 78 + do { 79 + start = u64_stats_fetch_begin_bh(&s->syncp); 80 + sum->inbytes = s->ustats.inbytes; 81 + sum->outbytes = s->ustats.outbytes; 82 + } while (u64_stats_fetch_retry_bh(&s->syncp, start)); 83 + } 84 + } 85 + } 86 + 60 87 61 88 static void estimation_timer(unsigned long arg) 62 89 { ··· 97 62 u32 n_inpkts, n_outpkts; 98 63 u64 n_inbytes, n_outbytes; 99 64 u32 rate; 65 + struct net *net = (struct net *)arg; 66 + struct netns_ipvs *ipvs; 100 67 101 - spin_lock(&est_lock); 102 - list_for_each_entry(e, &est_list, list) { 68 + ipvs = net_ipvs(net); 69 + ip_vs_read_cpu_stats(&ipvs->tot_stats->ustats, ipvs->cpustats); 70 + spin_lock(&ipvs->est_lock); 71 + list_for_each_entry(e, &ipvs->est_list, list) { 103 72 s = container_of(e, struct ip_vs_stats, est); 104 73 74 + ip_vs_read_cpu_stats(&s->ustats, s->cpustats); 105 75 spin_lock(&s->lock); 106 76 n_conns = s->ustats.conns; 107 77 n_inpkts = s->ustats.inpkts; ··· 115 75 n_outbytes = s->ustats.outbytes; 116 76 117 77 /* scaled by 2^10, but divided 2 seconds */ 118 - rate = (n_conns - e->last_conns)<<9; 78 + rate = (n_conns - e->last_conns) << 9; 119 79 e->last_conns = n_conns; 120 - e->cps += ((long)rate - (long)e->cps)>>2; 121 - s->ustats.cps = (e->cps+0x1FF)>>10; 80 + e->cps += ((long)rate - (long)e->cps) >> 2; 81 + s->ustats.cps = (e->cps + 0x1FF) >> 10; 122 82 123 - rate = (n_inpkts - e->last_inpkts)<<9; 83 + rate = (n_inpkts - e->last_inpkts) << 9; 124 84 e->last_inpkts = n_inpkts; 125 - e->inpps += ((long)rate - (long)e->inpps)>>2; 126 - s->ustats.inpps = (e->inpps+0x1FF)>>10; 85 + e->inpps += ((long)rate - (long)e->inpps) >> 2; 86 + s->ustats.inpps = (e->inpps + 0x1FF) >> 10; 127 87 128 - rate = (n_outpkts - e->last_outpkts)<<9; 88 + rate = (n_outpkts - e->last_outpkts) << 9; 129 89 e->last_outpkts = n_outpkts; 130 - e->outpps += ((long)rate - (long)e->outpps)>>2; 131 - s->ustats.outpps = (e->outpps+0x1FF)>>10; 90 + e->outpps += ((long)rate - (long)e->outpps) >> 2; 91 + s->ustats.outpps = (e->outpps + 0x1FF) >> 10; 132 92 133 - rate = (n_inbytes - e->last_inbytes)<<4; 93 + rate = (n_inbytes - e->last_inbytes) << 4; 134 94 e->last_inbytes = n_inbytes; 135 - e->inbps += ((long)rate - (long)e->inbps)>>2; 136 - s->ustats.inbps = (e->inbps+0xF)>>5; 95 + e->inbps += ((long)rate - (long)e->inbps) >> 2; 96 + s->ustats.inbps = (e->inbps + 0xF) >> 5; 137 97 138 - rate = (n_outbytes - e->last_outbytes)<<4; 98 + rate = (n_outbytes - e->last_outbytes) << 4; 139 99 e->last_outbytes = n_outbytes; 140 - e->outbps += ((long)rate - (long)e->outbps)>>2; 141 - s->ustats.outbps = (e->outbps+0xF)>>5; 100 + e->outbps += ((long)rate - (long)e->outbps) >> 2; 101 + s->ustats.outbps = (e->outbps + 0xF) >> 5; 142 102 spin_unlock(&s->lock); 143 103 } 144 - spin_unlock(&est_lock); 145 - mod_timer(&est_timer, jiffies + 2*HZ); 104 + spin_unlock(&ipvs->est_lock); 105 + mod_timer(&ipvs->est_timer, jiffies + 2*HZ); 146 106 } 147 107 148 - void ip_vs_new_estimator(struct ip_vs_stats *stats) 108 + void ip_vs_new_estimator(struct net *net, struct ip_vs_stats *stats) 149 109 { 110 + struct netns_ipvs *ipvs = net_ipvs(net); 150 111 struct ip_vs_estimator *est = &stats->est; 151 112 152 113 INIT_LIST_HEAD(&est->list); ··· 167 126 est->last_outbytes = stats->ustats.outbytes; 168 127 est->outbps = stats->ustats.outbps<<5; 169 128 170 - spin_lock_bh(&est_lock); 171 - list_add(&est->list, &est_list); 172 - spin_unlock_bh(&est_lock); 129 + spin_lock_bh(&ipvs->est_lock); 130 + list_add(&est->list, &ipvs->est_list); 131 + spin_unlock_bh(&ipvs->est_lock); 173 132 } 174 133 175 - void ip_vs_kill_estimator(struct ip_vs_stats *stats) 134 + void ip_vs_kill_estimator(struct net *net, struct ip_vs_stats *stats) 176 135 { 136 + struct netns_ipvs *ipvs = net_ipvs(net); 177 137 struct ip_vs_estimator *est = &stats->est; 178 138 179 - spin_lock_bh(&est_lock); 139 + spin_lock_bh(&ipvs->est_lock); 180 140 list_del(&est->list); 181 - spin_unlock_bh(&est_lock); 141 + spin_unlock_bh(&ipvs->est_lock); 182 142 } 183 143 184 144 void ip_vs_zero_estimator(struct ip_vs_stats *stats) ··· 199 157 est->outbps = 0; 200 158 } 201 159 160 + static int __net_init __ip_vs_estimator_init(struct net *net) 161 + { 162 + struct netns_ipvs *ipvs = net_ipvs(net); 163 + 164 + INIT_LIST_HEAD(&ipvs->est_list); 165 + spin_lock_init(&ipvs->est_lock); 166 + setup_timer(&ipvs->est_timer, estimation_timer, (unsigned long)net); 167 + mod_timer(&ipvs->est_timer, jiffies + 2 * HZ); 168 + return 0; 169 + } 170 + 171 + static void __net_exit __ip_vs_estimator_exit(struct net *net) 172 + { 173 + del_timer_sync(&net_ipvs(net)->est_timer); 174 + } 175 + static struct pernet_operations ip_vs_app_ops = { 176 + .init = __ip_vs_estimator_init, 177 + .exit = __ip_vs_estimator_exit, 178 + }; 179 + 202 180 int __init ip_vs_estimator_init(void) 203 181 { 204 - mod_timer(&est_timer, jiffies + 2 * HZ); 205 - return 0; 182 + int rv; 183 + 184 + rv = register_pernet_subsys(&ip_vs_app_ops); 185 + return rv; 206 186 } 207 187 208 188 void ip_vs_estimator_cleanup(void) 209 189 { 210 - del_timer_sync(&est_timer); 190 + unregister_pernet_subsys(&ip_vs_app_ops); 211 191 }

+44 -17

net/netfilter/ipvs/ip_vs_ftp.c

··· 157 157 int ret = 0; 158 158 enum ip_conntrack_info ctinfo; 159 159 struct nf_conn *ct; 160 + struct net *net; 160 161 161 162 #ifdef CONFIG_IP_VS_IPV6 162 163 /* This application helper doesn't work with IPv6 yet, ··· 198 197 */ 199 198 { 200 199 struct ip_vs_conn_param p; 201 - ip_vs_conn_fill_param(AF_INET, iph->protocol, 202 - &from, port, &cp->caddr, 0, &p); 200 + ip_vs_conn_fill_param(ip_vs_conn_net(cp), AF_INET, 201 + iph->protocol, &from, port, 202 + &cp->caddr, 0, &p); 203 203 n_cp = ip_vs_conn_out_get(&p); 204 204 } 205 205 if (!n_cp) { 206 206 struct ip_vs_conn_param p; 207 - ip_vs_conn_fill_param(AF_INET, IPPROTO_TCP, &cp->caddr, 207 + ip_vs_conn_fill_param(ip_vs_conn_net(cp), 208 + AF_INET, IPPROTO_TCP, &cp->caddr, 208 209 0, &cp->vaddr, port, &p); 209 210 n_cp = ip_vs_conn_new(&p, &from, port, 210 211 IP_VS_CONN_F_NO_CPORT | 211 212 IP_VS_CONN_F_NFCT, 212 - cp->dest); 213 + cp->dest, skb->mark); 213 214 if (!n_cp) 214 215 return 0; 215 216 ··· 260 257 * would be adjusted twice. 261 258 */ 262 259 260 + net = skb_net(skb); 263 261 cp->app_data = NULL; 264 - ip_vs_tcp_conn_listen(n_cp); 262 + ip_vs_tcp_conn_listen(net, n_cp); 265 263 ip_vs_conn_put(n_cp); 266 264 return ret; 267 265 } ··· 291 287 union nf_inet_addr to; 292 288 __be16 port; 293 289 struct ip_vs_conn *n_cp; 290 + struct net *net; 294 291 295 292 #ifdef CONFIG_IP_VS_IPV6 296 293 /* This application helper doesn't work with IPv6 yet, ··· 363 358 364 359 { 365 360 struct ip_vs_conn_param p; 366 - ip_vs_conn_fill_param(AF_INET, iph->protocol, &to, port, 367 - &cp->vaddr, htons(ntohs(cp->vport)-1), 368 - &p); 361 + ip_vs_conn_fill_param(ip_vs_conn_net(cp), AF_INET, 362 + iph->protocol, &to, port, &cp->vaddr, 363 + htons(ntohs(cp->vport)-1), &p); 369 364 n_cp = ip_vs_conn_in_get(&p); 370 365 if (!n_cp) { 371 366 n_cp = ip_vs_conn_new(&p, &cp->daddr, 372 367 htons(ntohs(cp->dport)-1), 373 - IP_VS_CONN_F_NFCT, cp->dest); 368 + IP_VS_CONN_F_NFCT, cp->dest, 369 + skb->mark); 374 370 if (!n_cp) 375 371 return 0; 376 372 ··· 383 377 /* 384 378 * Move tunnel to listen state 385 379 */ 386 - ip_vs_tcp_conn_listen(n_cp); 380 + net = skb_net(skb); 381 + ip_vs_tcp_conn_listen(net, n_cp); 387 382 ip_vs_conn_put(n_cp); 388 383 389 384 return 1; ··· 405 398 .pkt_in = ip_vs_ftp_in, 406 399 }; 407 400 408 - 409 401 /* 410 - * ip_vs_ftp initialization 402 + * per netns ip_vs_ftp initialization 411 403 */ 412 - static int __init ip_vs_ftp_init(void) 404 + static int __net_init __ip_vs_ftp_init(struct net *net) 413 405 { 414 406 int i, ret; 415 407 struct ip_vs_app *app = &ip_vs_ftp; 416 408 417 - ret = register_ip_vs_app(app); 409 + ret = register_ip_vs_app(net, app); 418 410 if (ret) 419 411 return ret; 420 412 421 413 for (i=0; i<IP_VS_APP_MAX_PORTS; i++) { 422 414 if (!ports[i]) 423 415 continue; 424 - ret = register_ip_vs_app_inc(app, app->protocol, ports[i]); 416 + ret = register_ip_vs_app_inc(net, app, app->protocol, ports[i]); 425 417 if (ret) 426 418 break; 427 419 pr_info("%s: loaded support on port[%d] = %d\n", ··· 428 422 } 429 423 430 424 if (ret) 431 - unregister_ip_vs_app(app); 425 + unregister_ip_vs_app(net, app); 432 426 433 427 return ret; 434 428 } 429 + /* 430 + * netns exit 431 + */ 432 + static void __ip_vs_ftp_exit(struct net *net) 433 + { 434 + struct ip_vs_app *app = &ip_vs_ftp; 435 435 436 + unregister_ip_vs_app(net, app); 437 + } 438 + 439 + static struct pernet_operations ip_vs_ftp_ops = { 440 + .init = __ip_vs_ftp_init, 441 + .exit = __ip_vs_ftp_exit, 442 + }; 443 + 444 + int __init ip_vs_ftp_init(void) 445 + { 446 + int rv; 447 + 448 + rv = register_pernet_subsys(&ip_vs_ftp_ops); 449 + return rv; 450 + } 436 451 437 452 /* 438 453 * ip_vs_ftp finish. 439 454 */ 440 455 static void __exit ip_vs_ftp_exit(void) 441 456 { 442 - unregister_ip_vs_app(&ip_vs_ftp); 457 + unregister_pernet_subsys(&ip_vs_ftp_ops); 443 458 } 444 459 445 460

+58 -9

net/netfilter/ipvs/ip_vs_lblc.c

··· 70 70 * entries that haven't been touched for a day. 71 71 */ 72 72 #define COUNT_FOR_FULL_EXPIRATION 30 73 - static int sysctl_ip_vs_lblc_expiration = 24*60*60*HZ; 74 73 75 74 76 75 /* ··· 116 117 static ctl_table vs_vars_table[] = { 117 118 { 118 119 .procname = "lblc_expiration", 119 - .data = &sysctl_ip_vs_lblc_expiration, 120 + .data = NULL, 120 121 .maxlen = sizeof(int), 121 122 .mode = 0644, 122 123 .proc_handler = proc_dointvec_jiffies, 123 124 }, 124 125 { } 125 126 }; 126 - 127 - static struct ctl_table_header * sysctl_header; 128 127 129 128 static inline void ip_vs_lblc_free(struct ip_vs_lblc_entry *en) 130 129 { ··· 245 248 struct ip_vs_lblc_entry *en, *nxt; 246 249 unsigned long now = jiffies; 247 250 int i, j; 251 + struct netns_ipvs *ipvs = net_ipvs(svc->net); 248 252 249 253 for (i=0, j=tbl->rover; i<IP_VS_LBLC_TAB_SIZE; i++) { 250 254 j = (j + 1) & IP_VS_LBLC_TAB_MASK; ··· 253 255 write_lock(&svc->sched_lock); 254 256 list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) { 255 257 if (time_before(now, 256 - en->lastuse + sysctl_ip_vs_lblc_expiration)) 258 + en->lastuse + 259 + ipvs->sysctl_lblc_expiration)) 257 260 continue; 258 261 259 262 ip_vs_lblc_free(en); ··· 542 543 .schedule = ip_vs_lblc_schedule, 543 544 }; 544 545 546 + /* 547 + * per netns init. 548 + */ 549 + static int __net_init __ip_vs_lblc_init(struct net *net) 550 + { 551 + struct netns_ipvs *ipvs = net_ipvs(net); 552 + 553 + if (!net_eq(net, &init_net)) { 554 + ipvs->lblc_ctl_table = kmemdup(vs_vars_table, 555 + sizeof(vs_vars_table), 556 + GFP_KERNEL); 557 + if (ipvs->lblc_ctl_table == NULL) 558 + goto err_dup; 559 + } else 560 + ipvs->lblc_ctl_table = vs_vars_table; 561 + ipvs->sysctl_lblc_expiration = 24*60*60*HZ; 562 + ipvs->lblc_ctl_table[0].data = &ipvs->sysctl_lblc_expiration; 563 + 564 + ipvs->lblc_ctl_header = 565 + register_net_sysctl_table(net, net_vs_ctl_path, 566 + ipvs->lblc_ctl_table); 567 + if (!ipvs->lblc_ctl_header) 568 + goto err_reg; 569 + 570 + return 0; 571 + 572 + err_reg: 573 + if (!net_eq(net, &init_net)) 574 + kfree(ipvs->lblc_ctl_table); 575 + 576 + err_dup: 577 + return -ENOMEM; 578 + } 579 + 580 + static void __net_exit __ip_vs_lblc_exit(struct net *net) 581 + { 582 + struct netns_ipvs *ipvs = net_ipvs(net); 583 + 584 + unregister_net_sysctl_table(ipvs->lblc_ctl_header); 585 + 586 + if (!net_eq(net, &init_net)) 587 + kfree(ipvs->lblc_ctl_table); 588 + } 589 + 590 + static struct pernet_operations ip_vs_lblc_ops = { 591 + .init = __ip_vs_lblc_init, 592 + .exit = __ip_vs_lblc_exit, 593 + }; 545 594 546 595 static int __init ip_vs_lblc_init(void) 547 596 { 548 597 int ret; 549 598 550 - sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars_table); 599 + ret = register_pernet_subsys(&ip_vs_lblc_ops); 600 + if (ret) 601 + return ret; 602 + 551 603 ret = register_ip_vs_scheduler(&ip_vs_lblc_scheduler); 552 604 if (ret) 553 - unregister_sysctl_table(sysctl_header); 605 + unregister_pernet_subsys(&ip_vs_lblc_ops); 554 606 return ret; 555 607 } 556 608 557 - 558 609 static void __exit ip_vs_lblc_cleanup(void) 559 610 { 560 - unregister_sysctl_table(sysctl_header); 561 611 unregister_ip_vs_scheduler(&ip_vs_lblc_scheduler); 612 + unregister_pernet_subsys(&ip_vs_lblc_ops); 562 613 } 563 614 564 615

+60 -12

net/netfilter/ipvs/ip_vs_lblcr.c

··· 70 70 * entries that haven't been touched for a day. 71 71 */ 72 72 #define COUNT_FOR_FULL_EXPIRATION 30 73 - static int sysctl_ip_vs_lblcr_expiration = 24*60*60*HZ; 74 - 75 73 76 74 /* 77 75 * for IPVS lblcr entry hash table ··· 294 296 static ctl_table vs_vars_table[] = { 295 297 { 296 298 .procname = "lblcr_expiration", 297 - .data = &sysctl_ip_vs_lblcr_expiration, 299 + .data = NULL, 298 300 .maxlen = sizeof(int), 299 301 .mode = 0644, 300 302 .proc_handler = proc_dointvec_jiffies, 301 303 }, 302 304 { } 303 305 }; 304 - 305 - static struct ctl_table_header * sysctl_header; 306 306 307 307 static inline void ip_vs_lblcr_free(struct ip_vs_lblcr_entry *en) 308 308 { ··· 421 425 unsigned long now = jiffies; 422 426 int i, j; 423 427 struct ip_vs_lblcr_entry *en, *nxt; 428 + struct netns_ipvs *ipvs = net_ipvs(svc->net); 424 429 425 430 for (i=0, j=tbl->rover; i<IP_VS_LBLCR_TAB_SIZE; i++) { 426 431 j = (j + 1) & IP_VS_LBLCR_TAB_MASK; 427 432 428 433 write_lock(&svc->sched_lock); 429 434 list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) { 430 - if (time_after(en->lastuse+sysctl_ip_vs_lblcr_expiration, 431 - now)) 435 + if (time_after(en->lastuse 436 + + ipvs->sysctl_lblcr_expiration, now)) 432 437 continue; 433 438 434 439 ip_vs_lblcr_free(en); ··· 661 664 read_lock(&svc->sched_lock); 662 665 en = ip_vs_lblcr_get(svc->af, tbl, &iph.daddr); 663 666 if (en) { 667 + struct netns_ipvs *ipvs = net_ipvs(svc->net); 664 668 /* We only hold a read lock, but this is atomic */ 665 669 en->lastuse = jiffies; 666 670 ··· 673 675 /* More than one destination + enough time passed by, cleanup */ 674 676 if (atomic_read(&en->set.size) > 1 && 675 677 time_after(jiffies, en->set.lastmod + 676 - sysctl_ip_vs_lblcr_expiration)) { 678 + ipvs->sysctl_lblcr_expiration)) { 677 679 struct ip_vs_dest *m; 678 680 679 681 write_lock(&en->set.lock); ··· 742 744 .schedule = ip_vs_lblcr_schedule, 743 745 }; 744 746 747 + /* 748 + * per netns init. 749 + */ 750 + static int __net_init __ip_vs_lblcr_init(struct net *net) 751 + { 752 + struct netns_ipvs *ipvs = net_ipvs(net); 753 + 754 + if (!net_eq(net, &init_net)) { 755 + ipvs->lblcr_ctl_table = kmemdup(vs_vars_table, 756 + sizeof(vs_vars_table), 757 + GFP_KERNEL); 758 + if (ipvs->lblcr_ctl_table == NULL) 759 + goto err_dup; 760 + } else 761 + ipvs->lblcr_ctl_table = vs_vars_table; 762 + ipvs->sysctl_lblcr_expiration = 24*60*60*HZ; 763 + ipvs->lblcr_ctl_table[0].data = &ipvs->sysctl_lblcr_expiration; 764 + 765 + ipvs->lblcr_ctl_header = 766 + register_net_sysctl_table(net, net_vs_ctl_path, 767 + ipvs->lblcr_ctl_table); 768 + if (!ipvs->lblcr_ctl_header) 769 + goto err_reg; 770 + 771 + return 0; 772 + 773 + err_reg: 774 + if (!net_eq(net, &init_net)) 775 + kfree(ipvs->lblcr_ctl_table); 776 + 777 + err_dup: 778 + return -ENOMEM; 779 + } 780 + 781 + static void __net_exit __ip_vs_lblcr_exit(struct net *net) 782 + { 783 + struct netns_ipvs *ipvs = net_ipvs(net); 784 + 785 + unregister_net_sysctl_table(ipvs->lblcr_ctl_header); 786 + 787 + if (!net_eq(net, &init_net)) 788 + kfree(ipvs->lblcr_ctl_table); 789 + } 790 + 791 + static struct pernet_operations ip_vs_lblcr_ops = { 792 + .init = __ip_vs_lblcr_init, 793 + .exit = __ip_vs_lblcr_exit, 794 + }; 745 795 746 796 static int __init ip_vs_lblcr_init(void) 747 797 { 748 798 int ret; 749 799 750 - sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars_table); 800 + ret = register_pernet_subsys(&ip_vs_lblcr_ops); 801 + if (ret) 802 + return ret; 803 + 751 804 ret = register_ip_vs_scheduler(&ip_vs_lblcr_scheduler); 752 805 if (ret) 753 - unregister_sysctl_table(sysctl_header); 806 + unregister_pernet_subsys(&ip_vs_lblcr_ops); 754 807 return ret; 755 808 } 756 809 757 - 758 810 static void __exit ip_vs_lblcr_cleanup(void) 759 811 { 760 - unregister_sysctl_table(sysctl_header); 761 812 unregister_ip_vs_scheduler(&ip_vs_lblcr_scheduler); 813 + unregister_pernet_subsys(&ip_vs_lblcr_ops); 762 814 } 763 815 764 816

+4 -2

net/netfilter/ipvs/ip_vs_nfct.c

··· 141 141 struct nf_conntrack_tuple *orig, new_reply; 142 142 struct ip_vs_conn *cp; 143 143 struct ip_vs_conn_param p; 144 + struct net *net = nf_ct_net(ct); 144 145 145 146 if (exp->tuple.src.l3num != PF_INET) 146 147 return; ··· 156 155 157 156 /* RS->CLIENT */ 158 157 orig = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; 159 - ip_vs_conn_fill_param(exp->tuple.src.l3num, orig->dst.protonum, 158 + ip_vs_conn_fill_param(net, exp->tuple.src.l3num, orig->dst.protonum, 160 159 &orig->src.u3, orig->src.u.tcp.port, 161 160 &orig->dst.u3, orig->dst.u.tcp.port, &p); 162 161 cp = ip_vs_conn_out_get(&p); ··· 269 268 " for conn " FMT_CONN "\n", 270 269 __func__, ARG_TUPLE(&tuple), ARG_CONN(cp)); 271 270 272 - h = nf_conntrack_find_get(&init_net, NF_CT_DEFAULT_ZONE, &tuple); 271 + h = nf_conntrack_find_get(ip_vs_conn_net(cp), NF_CT_DEFAULT_ZONE, 272 + &tuple); 273 273 if (h) { 274 274 ct = nf_ct_tuplehash_to_ctrack(h); 275 275 /* Show what happens instead of calling nf_ct_kill() */

+5 -12

net/netfilter/ipvs/ip_vs_pe.c

··· 29 29 } 30 30 31 31 /* Get pe in the pe list by name */ 32 - static struct ip_vs_pe * 33 - ip_vs_pe_getbyname(const char *pe_name) 32 + struct ip_vs_pe *__ip_vs_pe_getbyname(const char *pe_name) 34 33 { 35 34 struct ip_vs_pe *pe; 36 35 37 - IP_VS_DBG(2, "%s(): pe_name \"%s\"\n", __func__, 36 + IP_VS_DBG(10, "%s(): pe_name \"%s\"\n", __func__, 38 37 pe_name); 39 38 40 39 spin_lock_bh(&ip_vs_pe_lock); ··· 59 60 } 60 61 61 62 /* Lookup pe and try to load it if it doesn't exist */ 62 - struct ip_vs_pe *ip_vs_pe_get(const char *name) 63 + struct ip_vs_pe *ip_vs_pe_getbyname(const char *name) 63 64 { 64 65 struct ip_vs_pe *pe; 65 66 66 67 /* Search for the pe by name */ 67 - pe = ip_vs_pe_getbyname(name); 68 + pe = __ip_vs_pe_getbyname(name); 68 69 69 70 /* If pe not found, load the module and search again */ 70 71 if (!pe) { 71 72 request_module("ip_vs_pe_%s", name); 72 - pe = ip_vs_pe_getbyname(name); 73 + pe = __ip_vs_pe_getbyname(name); 73 74 } 74 75 75 76 return pe; 76 - } 77 - 78 - void ip_vs_pe_put(struct ip_vs_pe *pe) 79 - { 80 - if (pe && pe->module) 81 - module_put(pe->module); 82 77 } 83 78 84 79 /* Register a pe in the pe list */

+3

net/netfilter/ipvs/ip_vs_pe_sip.c

··· 71 71 struct ip_vs_iphdr iph; 72 72 unsigned int dataoff, datalen, matchoff, matchlen; 73 73 const char *dptr; 74 + int retc; 74 75 75 76 ip_vs_fill_iphdr(p->af, skb_network_header(skb), &iph); 76 77 ··· 84 83 if (dataoff >= skb->len) 85 84 return -EINVAL; 86 85 86 + if ((retc=skb_linearize(skb)) < 0) 87 + return retc; 87 88 dptr = skb->data + dataoff; 88 89 datalen = skb->len - dataoff; 89 90

+120 -5

net/netfilter/ipvs/ip_vs_proto.c

··· 60 60 return 0; 61 61 } 62 62 63 + /* 64 + * register an ipvs protocols netns related data 65 + */ 66 + static int 67 + register_ip_vs_proto_netns(struct net *net, struct ip_vs_protocol *pp) 68 + { 69 + struct netns_ipvs *ipvs = net_ipvs(net); 70 + unsigned hash = IP_VS_PROTO_HASH(pp->protocol); 71 + struct ip_vs_proto_data *pd = 72 + kzalloc(sizeof(struct ip_vs_proto_data), GFP_ATOMIC); 73 + 74 + if (!pd) { 75 + pr_err("%s(): no memory.\n", __func__); 76 + return -ENOMEM; 77 + } 78 + pd->pp = pp; /* For speed issues */ 79 + pd->next = ipvs->proto_data_table[hash]; 80 + ipvs->proto_data_table[hash] = pd; 81 + atomic_set(&pd->appcnt, 0); /* Init app counter */ 82 + 83 + if (pp->init_netns != NULL) 84 + pp->init_netns(net, pd); 85 + 86 + return 0; 87 + } 63 88 64 89 /* 65 90 * unregister an ipvs protocol ··· 107 82 return -ESRCH; 108 83 } 109 84 85 + /* 86 + * unregister an ipvs protocols netns data 87 + */ 88 + static int 89 + unregister_ip_vs_proto_netns(struct net *net, struct ip_vs_proto_data *pd) 90 + { 91 + struct netns_ipvs *ipvs = net_ipvs(net); 92 + struct ip_vs_proto_data **pd_p; 93 + unsigned hash = IP_VS_PROTO_HASH(pd->pp->protocol); 94 + 95 + pd_p = &ipvs->proto_data_table[hash]; 96 + for (; *pd_p; pd_p = &(*pd_p)->next) { 97 + if (*pd_p == pd) { 98 + *pd_p = pd->next; 99 + if (pd->pp->exit_netns != NULL) 100 + pd->pp->exit_netns(net, pd); 101 + kfree(pd); 102 + return 0; 103 + } 104 + } 105 + 106 + return -ESRCH; 107 + } 110 108 111 109 /* 112 110 * get ip_vs_protocol object by its proto. ··· 148 100 } 149 101 EXPORT_SYMBOL(ip_vs_proto_get); 150 102 103 + /* 104 + * get ip_vs_protocol object data by netns and proto 105 + */ 106 + struct ip_vs_proto_data * 107 + __ipvs_proto_data_get(struct netns_ipvs *ipvs, unsigned short proto) 108 + { 109 + struct ip_vs_proto_data *pd; 110 + unsigned hash = IP_VS_PROTO_HASH(proto); 111 + 112 + for (pd = ipvs->proto_data_table[hash]; pd; pd = pd->next) { 113 + if (pd->pp->protocol == proto) 114 + return pd; 115 + } 116 + 117 + return NULL; 118 + } 119 + 120 + struct ip_vs_proto_data * 121 + ip_vs_proto_data_get(struct net *net, unsigned short proto) 122 + { 123 + struct netns_ipvs *ipvs = net_ipvs(net); 124 + 125 + return __ipvs_proto_data_get(ipvs, proto); 126 + } 127 + EXPORT_SYMBOL(ip_vs_proto_data_get); 151 128 152 129 /* 153 130 * Propagate event for state change to all protocols 154 131 */ 155 - void ip_vs_protocol_timeout_change(int flags) 132 + void ip_vs_protocol_timeout_change(struct netns_ipvs *ipvs, int flags) 156 133 { 157 - struct ip_vs_protocol *pp; 134 + struct ip_vs_proto_data *pd; 158 135 int i; 159 136 160 137 for (i = 0; i < IP_VS_PROTO_TAB_SIZE; i++) { 161 - for (pp = ip_vs_proto_table[i]; pp; pp = pp->next) { 162 - if (pp->timeout_change) 163 - pp->timeout_change(pp, flags); 138 + for (pd = ipvs->proto_data_table[i]; pd; pd = pd->next) { 139 + if (pd->pp->timeout_change) 140 + pd->pp->timeout_change(pd, flags); 164 141 } 165 142 } 166 143 } ··· 309 236 ip_vs_tcpudp_debug_packet_v4(pp, skb, offset, msg); 310 237 } 311 238 239 + /* 240 + * per network name-space init 241 + */ 242 + static int __net_init __ip_vs_protocol_init(struct net *net) 243 + { 244 + #ifdef CONFIG_IP_VS_PROTO_TCP 245 + register_ip_vs_proto_netns(net, &ip_vs_protocol_tcp); 246 + #endif 247 + #ifdef CONFIG_IP_VS_PROTO_UDP 248 + register_ip_vs_proto_netns(net, &ip_vs_protocol_udp); 249 + #endif 250 + #ifdef CONFIG_IP_VS_PROTO_SCTP 251 + register_ip_vs_proto_netns(net, &ip_vs_protocol_sctp); 252 + #endif 253 + #ifdef CONFIG_IP_VS_PROTO_AH 254 + register_ip_vs_proto_netns(net, &ip_vs_protocol_ah); 255 + #endif 256 + #ifdef CONFIG_IP_VS_PROTO_ESP 257 + register_ip_vs_proto_netns(net, &ip_vs_protocol_esp); 258 + #endif 259 + return 0; 260 + } 261 + 262 + static void __net_exit __ip_vs_protocol_cleanup(struct net *net) 263 + { 264 + struct netns_ipvs *ipvs = net_ipvs(net); 265 + struct ip_vs_proto_data *pd; 266 + int i; 267 + 268 + /* unregister all the ipvs proto data for this netns */ 269 + for (i = 0; i < IP_VS_PROTO_TAB_SIZE; i++) { 270 + while ((pd = ipvs->proto_data_table[i]) != NULL) 271 + unregister_ip_vs_proto_netns(net, pd); 272 + } 273 + } 274 + 275 + static struct pernet_operations ipvs_proto_ops = { 276 + .init = __ip_vs_protocol_init, 277 + .exit = __ip_vs_protocol_cleanup, 278 + }; 312 279 313 280 int __init ip_vs_protocol_init(void) 314 281 { ··· 378 265 REGISTER_PROTOCOL(&ip_vs_protocol_esp); 379 266 #endif 380 267 pr_info("Registered protocols (%s)\n", &protocols[2]); 268 + return register_pernet_subsys(&ipvs_proto_ops); 381 269 382 270 return 0; 383 271 } ··· 389 275 struct ip_vs_protocol *pp; 390 276 int i; 391 277 278 + unregister_pernet_subsys(&ipvs_proto_ops); 392 279 /* unregister all the ipvs protocols */ 393 280 for (i = 0; i < IP_VS_PROTO_TAB_SIZE; i++) { 394 281 while ((pp = ip_vs_proto_table[i]) != NULL)

+17 -28

net/netfilter/ipvs/ip_vs_proto_ah_esp.c

··· 41 41 #define PORT_ISAKMP 500 42 42 43 43 static void 44 - ah_esp_conn_fill_param_proto(int af, const struct ip_vs_iphdr *iph, 45 - int inverse, struct ip_vs_conn_param *p) 44 + ah_esp_conn_fill_param_proto(struct net *net, int af, 45 + const struct ip_vs_iphdr *iph, int inverse, 46 + struct ip_vs_conn_param *p) 46 47 { 47 48 if (likely(!inverse)) 48 - ip_vs_conn_fill_param(af, IPPROTO_UDP, 49 + ip_vs_conn_fill_param(net, af, IPPROTO_UDP, 49 50 &iph->saddr, htons(PORT_ISAKMP), 50 51 &iph->daddr, htons(PORT_ISAKMP), p); 51 52 else 52 - ip_vs_conn_fill_param(af, IPPROTO_UDP, 53 + ip_vs_conn_fill_param(net, af, IPPROTO_UDP, 53 54 &iph->daddr, htons(PORT_ISAKMP), 54 55 &iph->saddr, htons(PORT_ISAKMP), p); 55 56 } 56 57 57 58 static struct ip_vs_conn * 58 - ah_esp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp, 59 + ah_esp_conn_in_get(int af, const struct sk_buff *skb, 59 60 const struct ip_vs_iphdr *iph, unsigned int proto_off, 60 61 int inverse) 61 62 { 62 63 struct ip_vs_conn *cp; 63 64 struct ip_vs_conn_param p; 65 + struct net *net = skb_net(skb); 64 66 65 - ah_esp_conn_fill_param_proto(af, iph, inverse, &p); 67 + ah_esp_conn_fill_param_proto(net, af, iph, inverse, &p); 66 68 cp = ip_vs_conn_in_get(&p); 67 69 if (!cp) { 68 70 /* ··· 74 72 IP_VS_DBG_BUF(12, "Unknown ISAKMP entry for outin packet " 75 73 "%s%s %s->%s\n", 76 74 inverse ? "ICMP+" : "", 77 - pp->name, 75 + ip_vs_proto_get(iph->protocol)->name, 78 76 IP_VS_DBG_ADDR(af, &iph->saddr), 79 77 IP_VS_DBG_ADDR(af, &iph->daddr)); 80 78 } ··· 85 83 86 84 static struct ip_vs_conn * 87 85 ah_esp_conn_out_get(int af, const struct sk_buff *skb, 88 - struct ip_vs_protocol *pp, 89 86 const struct ip_vs_iphdr *iph, 90 87 unsigned int proto_off, 91 88 int inverse) 92 89 { 93 90 struct ip_vs_conn *cp; 94 91 struct ip_vs_conn_param p; 92 + struct net *net = skb_net(skb); 95 93 96 - ah_esp_conn_fill_param_proto(af, iph, inverse, &p); 94 + ah_esp_conn_fill_param_proto(net, af, iph, inverse, &p); 97 95 cp = ip_vs_conn_out_get(&p); 98 96 if (!cp) { 99 97 IP_VS_DBG_BUF(12, "Unknown ISAKMP entry for inout packet " 100 98 "%s%s %s->%s\n", 101 99 inverse ? "ICMP+" : "", 102 - pp->name, 100 + ip_vs_proto_get(iph->protocol)->name, 103 101 IP_VS_DBG_ADDR(af, &iph->saddr), 104 102 IP_VS_DBG_ADDR(af, &iph->daddr)); 105 103 } ··· 109 107 110 108 111 109 static int 112 - ah_esp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, 110 + ah_esp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, 113 111 int *verdict, struct ip_vs_conn **cpp) 114 112 { 115 113 /* ··· 119 117 return 0; 120 118 } 121 119 122 - static void ah_esp_init(struct ip_vs_protocol *pp) 123 - { 124 - /* nothing to do now */ 125 - } 126 - 127 - 128 - static void ah_esp_exit(struct ip_vs_protocol *pp) 129 - { 130 - /* nothing to do now */ 131 - } 132 - 133 - 134 120 #ifdef CONFIG_IP_VS_PROTO_AH 135 121 struct ip_vs_protocol ip_vs_protocol_ah = { 136 122 .name = "AH", 137 123 .protocol = IPPROTO_AH, 138 124 .num_states = 1, 139 125 .dont_defrag = 1, 140 - .init = ah_esp_init, 141 - .exit = ah_esp_exit, 126 + .init = NULL, 127 + .exit = NULL, 142 128 .conn_schedule = ah_esp_conn_schedule, 143 129 .conn_in_get = ah_esp_conn_in_get, 144 130 .conn_out_get = ah_esp_conn_out_get, ··· 139 149 .app_conn_bind = NULL, 140 150 .debug_packet = ip_vs_tcpudp_debug_packet, 141 151 .timeout_change = NULL, /* ISAKMP */ 142 - .set_state_timeout = NULL, 143 152 }; 144 153 #endif 145 154 ··· 148 159 .protocol = IPPROTO_ESP, 149 160 .num_states = 1, 150 161 .dont_defrag = 1, 151 - .init = ah_esp_init, 152 - .exit = ah_esp_exit, 162 + .init = NULL, 163 + .exit = NULL, 153 164 .conn_schedule = ah_esp_conn_schedule, 154 165 .conn_in_get = ah_esp_conn_in_get, 155 166 .conn_out_get = ah_esp_conn_out_get,

+76 -77

net/netfilter/ipvs/ip_vs_proto_sctp.c

··· 9 9 #include <net/ip_vs.h> 10 10 11 11 static int 12 - sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, 12 + sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, 13 13 int *verdict, struct ip_vs_conn **cpp) 14 14 { 15 + struct net *net; 15 16 struct ip_vs_service *svc; 16 17 sctp_chunkhdr_t _schunkh, *sch; 17 18 sctp_sctphdr_t *sh, _sctph; ··· 28 27 sizeof(_schunkh), &_schunkh); 29 28 if (sch == NULL) 30 29 return 0; 31 - 30 + net = skb_net(skb); 32 31 if ((sch->type == SCTP_CID_INIT) && 33 - (svc = ip_vs_service_get(af, skb->mark, iph.protocol, 32 + (svc = ip_vs_service_get(net, af, skb->mark, iph.protocol, 34 33 &iph.daddr, sh->dest))) { 35 34 int ignored; 36 35 37 - if (ip_vs_todrop()) { 36 + if (ip_vs_todrop(net_ipvs(net))) { 38 37 /* 39 38 * It seems that we are very loaded. 40 39 * We have to drop this packet :( ··· 47 46 * Let the virtual server select a real server for the 48 47 * incoming connection, and create a connection entry. 49 48 */ 50 - *cpp = ip_vs_schedule(svc, skb, pp, &ignored); 51 - if (!*cpp && !ignored) { 52 - *verdict = ip_vs_leave(svc, skb, pp); 49 + *cpp = ip_vs_schedule(svc, skb, pd, &ignored); 50 + if (!*cpp && ignored <= 0) { 51 + if (!ignored) 52 + *verdict = ip_vs_leave(svc, skb, pd); 53 + else { 54 + ip_vs_service_put(svc); 55 + *verdict = NF_DROP; 56 + } 53 57 return 0; 54 58 } 55 59 ip_vs_service_put(svc); 56 60 } 57 - 61 + /* NF_ACCEPT */ 58 62 return 1; 59 63 } 60 64 ··· 862 856 /* 863 857 * Timeout table[state] 864 858 */ 865 - static int sctp_timeouts[IP_VS_SCTP_S_LAST + 1] = { 859 + static const int sctp_timeouts[IP_VS_SCTP_S_LAST + 1] = { 866 860 [IP_VS_SCTP_S_NONE] = 2 * HZ, 867 861 [IP_VS_SCTP_S_INIT_CLI] = 1 * 60 * HZ, 868 862 [IP_VS_SCTP_S_INIT_SER] = 1 * 60 * HZ, ··· 906 900 return "?"; 907 901 } 908 902 909 - static void sctp_timeout_change(struct ip_vs_protocol *pp, int flags) 910 - { 911 - } 912 - 913 - static int 914 - sctp_set_state_timeout(struct ip_vs_protocol *pp, char *sname, int to) 915 - { 916 - 917 - return ip_vs_set_state_timeout(pp->timeout_table, IP_VS_SCTP_S_LAST, 918 - sctp_state_name_table, sname, to); 919 - } 920 - 921 903 static inline int 922 - set_sctp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp, 904 + set_sctp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp, 923 905 int direction, const struct sk_buff *skb) 924 906 { 925 907 sctp_chunkhdr_t _sctpch, *sch; ··· 965 971 966 972 IP_VS_DBG_BUF(8, "%s %s %s:%d->" 967 973 "%s:%d state: %s->%s conn->refcnt:%d\n", 968 - pp->name, 974 + pd->pp->name, 969 975 ((direction == IP_VS_DIR_OUTPUT) ? 970 976 "output " : "input "), 971 977 IP_VS_DBG_ADDR(cp->af, &cp->daddr), ··· 989 995 } 990 996 } 991 997 } 998 + if (likely(pd)) 999 + cp->timeout = pd->timeout_table[cp->state = next_state]; 1000 + else /* What to do ? */ 1001 + cp->timeout = sctp_timeouts[cp->state = next_state]; 992 1002 993 - cp->timeout = pp->timeout_table[cp->state = next_state]; 994 - 995 - return 1; 1003 + return 1; 996 1004 } 997 1005 998 1006 static int 999 1007 sctp_state_transition(struct ip_vs_conn *cp, int direction, 1000 - const struct sk_buff *skb, struct ip_vs_protocol *pp) 1008 + const struct sk_buff *skb, struct ip_vs_proto_data *pd) 1001 1009 { 1002 1010 int ret = 0; 1003 1011 1004 1012 spin_lock(&cp->lock); 1005 - ret = set_sctp_state(pp, cp, direction, skb); 1013 + ret = set_sctp_state(pd, cp, direction, skb); 1006 1014 spin_unlock(&cp->lock); 1007 1015 1008 1016 return ret; 1009 1017 } 1010 - 1011 - /* 1012 - * Hash table for SCTP application incarnations 1013 - */ 1014 - #define SCTP_APP_TAB_BITS 4 1015 - #define SCTP_APP_TAB_SIZE (1 << SCTP_APP_TAB_BITS) 1016 - #define SCTP_APP_TAB_MASK (SCTP_APP_TAB_SIZE - 1) 1017 - 1018 - static struct list_head sctp_apps[SCTP_APP_TAB_SIZE]; 1019 - static DEFINE_SPINLOCK(sctp_app_lock); 1020 1018 1021 1019 static inline __u16 sctp_app_hashkey(__be16 port) 1022 1020 { ··· 1016 1030 & SCTP_APP_TAB_MASK; 1017 1031 } 1018 1032 1019 - static int sctp_register_app(struct ip_vs_app *inc) 1033 + static int sctp_register_app(struct net *net, struct ip_vs_app *inc) 1020 1034 { 1021 1035 struct ip_vs_app *i; 1022 1036 __u16 hash; 1023 1037 __be16 port = inc->port; 1024 1038 int ret = 0; 1039 + struct netns_ipvs *ipvs = net_ipvs(net); 1040 + struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_SCTP); 1025 1041 1026 1042 hash = sctp_app_hashkey(port); 1027 1043 1028 - spin_lock_bh(&sctp_app_lock); 1029 - list_for_each_entry(i, &sctp_apps[hash], p_list) { 1044 + spin_lock_bh(&ipvs->sctp_app_lock); 1045 + list_for_each_entry(i, &ipvs->sctp_apps[hash], p_list) { 1030 1046 if (i->port == port) { 1031 1047 ret = -EEXIST; 1032 1048 goto out; 1033 1049 } 1034 1050 } 1035 - list_add(&inc->p_list, &sctp_apps[hash]); 1036 - atomic_inc(&ip_vs_protocol_sctp.appcnt); 1051 + list_add(&inc->p_list, &ipvs->sctp_apps[hash]); 1052 + atomic_inc(&pd->appcnt); 1037 1053 out: 1038 - spin_unlock_bh(&sctp_app_lock); 1054 + spin_unlock_bh(&ipvs->sctp_app_lock); 1039 1055 1040 1056 return ret; 1041 1057 } 1042 1058 1043 - static void sctp_unregister_app(struct ip_vs_app *inc) 1059 + static void sctp_unregister_app(struct net *net, struct ip_vs_app *inc) 1044 1060 { 1045 - spin_lock_bh(&sctp_app_lock); 1046 - atomic_dec(&ip_vs_protocol_sctp.appcnt); 1061 + struct netns_ipvs *ipvs = net_ipvs(net); 1062 + struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_SCTP); 1063 + 1064 + spin_lock_bh(&ipvs->sctp_app_lock); 1065 + atomic_dec(&pd->appcnt); 1047 1066 list_del(&inc->p_list); 1048 - spin_unlock_bh(&sctp_app_lock); 1067 + spin_unlock_bh(&ipvs->sctp_app_lock); 1049 1068 } 1050 1069 1051 1070 static int sctp_app_conn_bind(struct ip_vs_conn *cp) 1052 1071 { 1072 + struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(cp)); 1053 1073 int hash; 1054 1074 struct ip_vs_app *inc; 1055 1075 int result = 0; ··· 1066 1074 /* Lookup application incarnations and bind the right one */ 1067 1075 hash = sctp_app_hashkey(cp->vport); 1068 1076 1069 - spin_lock(&sctp_app_lock); 1070 - list_for_each_entry(inc, &sctp_apps[hash], p_list) { 1077 + spin_lock(&ipvs->sctp_app_lock); 1078 + list_for_each_entry(inc, &ipvs->sctp_apps[hash], p_list) { 1071 1079 if (inc->port == cp->vport) { 1072 1080 if (unlikely(!ip_vs_app_inc_get(inc))) 1073 1081 break; 1074 - spin_unlock(&sctp_app_lock); 1082 + spin_unlock(&ipvs->sctp_app_lock); 1075 1083 1076 1084 IP_VS_DBG_BUF(9, "%s: Binding conn %s:%u->" 1077 1085 "%s:%u to app %s on port %u\n", ··· 1087 1095 goto out; 1088 1096 } 1089 1097 } 1090 - spin_unlock(&sctp_app_lock); 1098 + spin_unlock(&ipvs->sctp_app_lock); 1091 1099 out: 1092 1100 return result; 1093 1101 } 1094 1102 1095 - static void ip_vs_sctp_init(struct ip_vs_protocol *pp) 1103 + /* --------------------------------------------- 1104 + * timeouts is netns related now. 1105 + * --------------------------------------------- 1106 + */ 1107 + static void __ip_vs_sctp_init(struct net *net, struct ip_vs_proto_data *pd) 1096 1108 { 1097 - IP_VS_INIT_HASH_TABLE(sctp_apps); 1098 - pp->timeout_table = sctp_timeouts; 1109 + struct netns_ipvs *ipvs = net_ipvs(net); 1110 + 1111 + ip_vs_init_hash_table(ipvs->sctp_apps, SCTP_APP_TAB_SIZE); 1112 + spin_lock_init(&ipvs->tcp_app_lock); 1113 + pd->timeout_table = ip_vs_create_timeout_table((int *)sctp_timeouts, 1114 + sizeof(sctp_timeouts)); 1099 1115 } 1100 1116 1101 - 1102 - static void ip_vs_sctp_exit(struct ip_vs_protocol *pp) 1117 + static void __ip_vs_sctp_exit(struct net *net, struct ip_vs_proto_data *pd) 1103 1118 { 1104 - 1119 + kfree(pd->timeout_table); 1105 1120 } 1106 1121 1107 1122 struct ip_vs_protocol ip_vs_protocol_sctp = { 1108 - .name = "SCTP", 1109 - .protocol = IPPROTO_SCTP, 1110 - .num_states = IP_VS_SCTP_S_LAST, 1111 - .dont_defrag = 0, 1112 - .appcnt = ATOMIC_INIT(0), 1113 - .init = ip_vs_sctp_init, 1114 - .exit = ip_vs_sctp_exit, 1115 - .register_app = sctp_register_app, 1123 + .name = "SCTP", 1124 + .protocol = IPPROTO_SCTP, 1125 + .num_states = IP_VS_SCTP_S_LAST, 1126 + .dont_defrag = 0, 1127 + .init = NULL, 1128 + .exit = NULL, 1129 + .init_netns = __ip_vs_sctp_init, 1130 + .exit_netns = __ip_vs_sctp_exit, 1131 + .register_app = sctp_register_app, 1116 1132 .unregister_app = sctp_unregister_app, 1117 - .conn_schedule = sctp_conn_schedule, 1118 - .conn_in_get = ip_vs_conn_in_get_proto, 1119 - .conn_out_get = ip_vs_conn_out_get_proto, 1120 - .snat_handler = sctp_snat_handler, 1121 - .dnat_handler = sctp_dnat_handler, 1122 - .csum_check = sctp_csum_check, 1123 - .state_name = sctp_state_name, 1133 + .conn_schedule = sctp_conn_schedule, 1134 + .conn_in_get = ip_vs_conn_in_get_proto, 1135 + .conn_out_get = ip_vs_conn_out_get_proto, 1136 + .snat_handler = sctp_snat_handler, 1137 + .dnat_handler = sctp_dnat_handler, 1138 + .csum_check = sctp_csum_check, 1139 + .state_name = sctp_state_name, 1124 1140 .state_transition = sctp_state_transition, 1125 - .app_conn_bind = sctp_app_conn_bind, 1126 - .debug_packet = ip_vs_tcpudp_debug_packet, 1127 - .timeout_change = sctp_timeout_change, 1128 - .set_state_timeout = sctp_set_state_timeout, 1141 + .app_conn_bind = sctp_app_conn_bind, 1142 + .debug_packet = ip_vs_tcpudp_debug_packet, 1143 + .timeout_change = NULL, 1129 1144 };

+76 -66

net/netfilter/ipvs/ip_vs_proto_tcp.c

··· 9 9 * as published by the Free Software Foundation; either version 10 10 * 2 of the License, or (at your option) any later version. 11 11 * 12 - * Changes: 12 + * Changes: Hans Schillstrom <hans.schillstrom@ericsson.com> 13 13 * 14 + * Network name space (netns) aware. 15 + * Global data moved to netns i.e struct netns_ipvs 16 + * tcp_timeouts table has copy per netns in a hash table per 17 + * protocol ip_vs_proto_data and is handled by netns 14 18 */ 15 19 16 20 #define KMSG_COMPONENT "IPVS" ··· 32 28 #include <net/ip_vs.h> 33 29 34 30 static int 35 - tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, 31 + tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, 36 32 int *verdict, struct ip_vs_conn **cpp) 37 33 { 34 + struct net *net; 38 35 struct ip_vs_service *svc; 39 36 struct tcphdr _tcph, *th; 40 37 struct ip_vs_iphdr iph; ··· 47 42 *verdict = NF_DROP; 48 43 return 0; 49 44 } 50 - 45 + net = skb_net(skb); 51 46 /* No !th->ack check to allow scheduling on SYN+ACK for Active FTP */ 52 47 if (th->syn && 53 - (svc = ip_vs_service_get(af, skb->mark, iph.protocol, &iph.daddr, 54 - th->dest))) { 48 + (svc = ip_vs_service_get(net, af, skb->mark, iph.protocol, 49 + &iph.daddr, th->dest))) { 55 50 int ignored; 56 51 57 - if (ip_vs_todrop()) { 52 + if (ip_vs_todrop(net_ipvs(net))) { 58 53 /* 59 54 * It seems that we are very loaded. 60 55 * We have to drop this packet :( ··· 68 63 * Let the virtual server select a real server for the 69 64 * incoming connection, and create a connection entry. 70 65 */ 71 - *cpp = ip_vs_schedule(svc, skb, pp, &ignored); 72 - if (!*cpp && !ignored) { 73 - *verdict = ip_vs_leave(svc, skb, pp); 66 + *cpp = ip_vs_schedule(svc, skb, pd, &ignored); 67 + if (!*cpp && ignored <= 0) { 68 + if (!ignored) 69 + *verdict = ip_vs_leave(svc, skb, pd); 70 + else { 71 + ip_vs_service_put(svc); 72 + *verdict = NF_DROP; 73 + } 74 74 return 0; 75 75 } 76 76 ip_vs_service_put(svc); 77 77 } 78 + /* NF_ACCEPT */ 78 79 return 1; 79 80 } 80 81 ··· 349 338 /* 350 339 * Timeout table[state] 351 340 */ 352 - static int tcp_timeouts[IP_VS_TCP_S_LAST+1] = { 341 + static const int tcp_timeouts[IP_VS_TCP_S_LAST+1] = { 353 342 [IP_VS_TCP_S_NONE] = 2*HZ, 354 343 [IP_VS_TCP_S_ESTABLISHED] = 15*60*HZ, 355 344 [IP_VS_TCP_S_SYN_SENT] = 2*60*HZ, ··· 448 437 /*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }}, 449 438 }; 450 439 451 - static struct tcp_states_t *tcp_state_table = tcp_states; 452 - 453 - 454 - static void tcp_timeout_change(struct ip_vs_protocol *pp, int flags) 440 + static void tcp_timeout_change(struct ip_vs_proto_data *pd, int flags) 455 441 { 456 442 int on = (flags & 1); /* secure_tcp */ 457 443 ··· 458 450 ** for most if not for all of the applications. Something 459 451 ** like "capabilities" (flags) for each object. 460 452 */ 461 - tcp_state_table = (on? tcp_states_dos : tcp_states); 462 - } 463 - 464 - static int 465 - tcp_set_state_timeout(struct ip_vs_protocol *pp, char *sname, int to) 466 - { 467 - return ip_vs_set_state_timeout(pp->timeout_table, IP_VS_TCP_S_LAST, 468 - tcp_state_name_table, sname, to); 453 + pd->tcp_state_table = (on ? tcp_states_dos : tcp_states); 469 454 } 470 455 471 456 static inline int tcp_state_idx(struct tcphdr *th) ··· 475 474 } 476 475 477 476 static inline void 478 - set_tcp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp, 477 + set_tcp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp, 479 478 int direction, struct tcphdr *th) 480 479 { 481 480 int state_idx; ··· 498 497 goto tcp_state_out; 499 498 } 500 499 501 - new_state = tcp_state_table[state_off+state_idx].next_state[cp->state]; 500 + new_state = 501 + pd->tcp_state_table[state_off+state_idx].next_state[cp->state]; 502 502 503 503 tcp_state_out: 504 504 if (new_state != cp->state) { ··· 507 505 508 506 IP_VS_DBG_BUF(8, "%s %s [%c%c%c%c] %s:%d->" 509 507 "%s:%d state: %s->%s conn->refcnt:%d\n", 510 - pp->name, 508 + pd->pp->name, 511 509 ((state_off == TCP_DIR_OUTPUT) ? 512 510 "output " : "input "), 513 511 th->syn ? 'S' : '.', ··· 537 535 } 538 536 } 539 537 540 - cp->timeout = pp->timeout_table[cp->state = new_state]; 538 + if (likely(pd)) 539 + cp->timeout = pd->timeout_table[cp->state = new_state]; 540 + else /* What to do ? */ 541 + cp->timeout = tcp_timeouts[cp->state = new_state]; 541 542 } 542 - 543 543 544 544 /* 545 545 * Handle state transitions ··· 549 545 static int 550 546 tcp_state_transition(struct ip_vs_conn *cp, int direction, 551 547 const struct sk_buff *skb, 552 - struct ip_vs_protocol *pp) 548 + struct ip_vs_proto_data *pd) 553 549 { 554 550 struct tcphdr _tcph, *th; 555 551 ··· 564 560 return 0; 565 561 566 562 spin_lock(&cp->lock); 567 - set_tcp_state(pp, cp, direction, th); 563 + set_tcp_state(pd, cp, direction, th); 568 564 spin_unlock(&cp->lock); 569 565 570 566 return 1; 571 567 } 572 - 573 - 574 - /* 575 - * Hash table for TCP application incarnations 576 - */ 577 - #define TCP_APP_TAB_BITS 4 578 - #define TCP_APP_TAB_SIZE (1 << TCP_APP_TAB_BITS) 579 - #define TCP_APP_TAB_MASK (TCP_APP_TAB_SIZE - 1) 580 - 581 - static struct list_head tcp_apps[TCP_APP_TAB_SIZE]; 582 - static DEFINE_SPINLOCK(tcp_app_lock); 583 568 584 569 static inline __u16 tcp_app_hashkey(__be16 port) 585 570 { ··· 577 584 } 578 585 579 586 580 - static int tcp_register_app(struct ip_vs_app *inc) 587 + static int tcp_register_app(struct net *net, struct ip_vs_app *inc) 581 588 { 582 589 struct ip_vs_app *i; 583 590 __u16 hash; 584 591 __be16 port = inc->port; 585 592 int ret = 0; 593 + struct netns_ipvs *ipvs = net_ipvs(net); 594 + struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_TCP); 586 595 587 596 hash = tcp_app_hashkey(port); 588 597 589 - spin_lock_bh(&tcp_app_lock); 590 - list_for_each_entry(i, &tcp_apps[hash], p_list) { 598 + spin_lock_bh(&ipvs->tcp_app_lock); 599 + list_for_each_entry(i, &ipvs->tcp_apps[hash], p_list) { 591 600 if (i->port == port) { 592 601 ret = -EEXIST; 593 602 goto out; 594 603 } 595 604 } 596 - list_add(&inc->p_list, &tcp_apps[hash]); 597 - atomic_inc(&ip_vs_protocol_tcp.appcnt); 605 + list_add(&inc->p_list, &ipvs->tcp_apps[hash]); 606 + atomic_inc(&pd->appcnt); 598 607 599 608 out: 600 - spin_unlock_bh(&tcp_app_lock); 609 + spin_unlock_bh(&ipvs->tcp_app_lock); 601 610 return ret; 602 611 } 603 612 604 613 605 614 static void 606 - tcp_unregister_app(struct ip_vs_app *inc) 615 + tcp_unregister_app(struct net *net, struct ip_vs_app *inc) 607 616 { 608 - spin_lock_bh(&tcp_app_lock); 609 - atomic_dec(&ip_vs_protocol_tcp.appcnt); 617 + struct netns_ipvs *ipvs = net_ipvs(net); 618 + struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_TCP); 619 + 620 + spin_lock_bh(&ipvs->tcp_app_lock); 621 + atomic_dec(&pd->appcnt); 610 622 list_del(&inc->p_list); 611 - spin_unlock_bh(&tcp_app_lock); 623 + spin_unlock_bh(&ipvs->tcp_app_lock); 612 624 } 613 625 614 626 615 627 static int 616 628 tcp_app_conn_bind(struct ip_vs_conn *cp) 617 629 { 630 + struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(cp)); 618 631 int hash; 619 632 struct ip_vs_app *inc; 620 633 int result = 0; ··· 632 633 /* Lookup application incarnations and bind the right one */ 633 634 hash = tcp_app_hashkey(cp->vport); 634 635 635 - spin_lock(&tcp_app_lock); 636 - list_for_each_entry(inc, &tcp_apps[hash], p_list) { 636 + spin_lock(&ipvs->tcp_app_lock); 637 + list_for_each_entry(inc, &ipvs->tcp_apps[hash], p_list) { 637 638 if (inc->port == cp->vport) { 638 639 if (unlikely(!ip_vs_app_inc_get(inc))) 639 640 break; 640 - spin_unlock(&tcp_app_lock); 641 + spin_unlock(&ipvs->tcp_app_lock); 641 642 642 643 IP_VS_DBG_BUF(9, "%s(): Binding conn %s:%u->" 643 644 "%s:%u to app %s on port %u\n", ··· 654 655 goto out; 655 656 } 656 657 } 657 - spin_unlock(&tcp_app_lock); 658 + spin_unlock(&ipvs->tcp_app_lock); 658 659 659 660 out: 660 661 return result; ··· 664 665 /* 665 666 * Set LISTEN timeout. (ip_vs_conn_put will setup timer) 666 667 */ 667 - void ip_vs_tcp_conn_listen(struct ip_vs_conn *cp) 668 + void ip_vs_tcp_conn_listen(struct net *net, struct ip_vs_conn *cp) 668 669 { 670 + struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_TCP); 671 + 669 672 spin_lock(&cp->lock); 670 673 cp->state = IP_VS_TCP_S_LISTEN; 671 - cp->timeout = ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_LISTEN]; 674 + cp->timeout = (pd ? pd->timeout_table[IP_VS_TCP_S_LISTEN] 675 + : tcp_timeouts[IP_VS_TCP_S_LISTEN]); 672 676 spin_unlock(&cp->lock); 673 677 } 674 678 675 - 676 - static void ip_vs_tcp_init(struct ip_vs_protocol *pp) 679 + /* --------------------------------------------- 680 + * timeouts is netns related now. 681 + * --------------------------------------------- 682 + */ 683 + static void __ip_vs_tcp_init(struct net *net, struct ip_vs_proto_data *pd) 677 684 { 678 - IP_VS_INIT_HASH_TABLE(tcp_apps); 679 - pp->timeout_table = tcp_timeouts; 685 + struct netns_ipvs *ipvs = net_ipvs(net); 686 + 687 + ip_vs_init_hash_table(ipvs->tcp_apps, TCP_APP_TAB_SIZE); 688 + spin_lock_init(&ipvs->tcp_app_lock); 689 + pd->timeout_table = ip_vs_create_timeout_table((int *)tcp_timeouts, 690 + sizeof(tcp_timeouts)); 691 + pd->tcp_state_table = tcp_states; 680 692 } 681 693 682 - 683 - static void ip_vs_tcp_exit(struct ip_vs_protocol *pp) 694 + static void __ip_vs_tcp_exit(struct net *net, struct ip_vs_proto_data *pd) 684 695 { 696 + kfree(pd->timeout_table); 685 697 } 686 698 687 699 ··· 701 691 .protocol = IPPROTO_TCP, 702 692 .num_states = IP_VS_TCP_S_LAST, 703 693 .dont_defrag = 0, 704 - .appcnt = ATOMIC_INIT(0), 705 - .init = ip_vs_tcp_init, 706 - .exit = ip_vs_tcp_exit, 694 + .init = NULL, 695 + .exit = NULL, 696 + .init_netns = __ip_vs_tcp_init, 697 + .exit_netns = __ip_vs_tcp_exit, 707 698 .register_app = tcp_register_app, 708 699 .unregister_app = tcp_unregister_app, 709 700 .conn_schedule = tcp_conn_schedule, ··· 718 707 .app_conn_bind = tcp_app_conn_bind, 719 708 .debug_packet = ip_vs_tcpudp_debug_packet, 720 709 .timeout_change = tcp_timeout_change, 721 - .set_state_timeout = tcp_set_state_timeout, 722 710 };

+57 -53

net/netfilter/ipvs/ip_vs_proto_udp.c

··· 9 9 * as published by the Free Software Foundation; either version 10 10 * 2 of the License, or (at your option) any later version. 11 11 * 12 - * Changes: 12 + * Changes: Hans Schillstrom <hans.schillstrom@ericsson.com> 13 + * Network name space (netns) aware. 13 14 * 14 15 */ 15 16 ··· 29 28 #include <net/ip6_checksum.h> 30 29 31 30 static int 32 - udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, 31 + udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, 33 32 int *verdict, struct ip_vs_conn **cpp) 34 33 { 34 + struct net *net; 35 35 struct ip_vs_service *svc; 36 36 struct udphdr _udph, *uh; 37 37 struct ip_vs_iphdr iph; ··· 44 42 *verdict = NF_DROP; 45 43 return 0; 46 44 } 47 - 48 - svc = ip_vs_service_get(af, skb->mark, iph.protocol, 45 + net = skb_net(skb); 46 + svc = ip_vs_service_get(net, af, skb->mark, iph.protocol, 49 47 &iph.daddr, uh->dest); 50 48 if (svc) { 51 49 int ignored; 52 50 53 - if (ip_vs_todrop()) { 51 + if (ip_vs_todrop(net_ipvs(net))) { 54 52 /* 55 53 * It seems that we are very loaded. 56 54 * We have to drop this packet :( ··· 64 62 * Let the virtual server select a real server for the 65 63 * incoming connection, and create a connection entry. 66 64 */ 67 - *cpp = ip_vs_schedule(svc, skb, pp, &ignored); 68 - if (!*cpp && !ignored) { 69 - *verdict = ip_vs_leave(svc, skb, pp); 65 + *cpp = ip_vs_schedule(svc, skb, pd, &ignored); 66 + if (!*cpp && ignored <= 0) { 67 + if (!ignored) 68 + *verdict = ip_vs_leave(svc, skb, pd); 69 + else { 70 + ip_vs_service_put(svc); 71 + *verdict = NF_DROP; 72 + } 70 73 return 0; 71 74 } 72 75 ip_vs_service_put(svc); 73 76 } 77 + /* NF_ACCEPT */ 74 78 return 1; 75 79 } 76 80 ··· 346 338 return 1; 347 339 } 348 340 349 - 350 - /* 351 - * Note: the caller guarantees that only one of register_app, 352 - * unregister_app or app_conn_bind is called each time. 353 - */ 354 - 355 - #define UDP_APP_TAB_BITS 4 356 - #define UDP_APP_TAB_SIZE (1 << UDP_APP_TAB_BITS) 357 - #define UDP_APP_TAB_MASK (UDP_APP_TAB_SIZE - 1) 358 - 359 - static struct list_head udp_apps[UDP_APP_TAB_SIZE]; 360 - static DEFINE_SPINLOCK(udp_app_lock); 361 - 362 341 static inline __u16 udp_app_hashkey(__be16 port) 363 342 { 364 343 return (((__force u16)port >> UDP_APP_TAB_BITS) ^ (__force u16)port) ··· 353 358 } 354 359 355 360 356 - static int udp_register_app(struct ip_vs_app *inc) 361 + static int udp_register_app(struct net *net, struct ip_vs_app *inc) 357 362 { 358 363 struct ip_vs_app *i; 359 364 __u16 hash; 360 365 __be16 port = inc->port; 361 366 int ret = 0; 367 + struct netns_ipvs *ipvs = net_ipvs(net); 368 + struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_UDP); 362 369 363 370 hash = udp_app_hashkey(port); 364 371 365 372 366 - spin_lock_bh(&udp_app_lock); 367 - list_for_each_entry(i, &udp_apps[hash], p_list) { 373 + spin_lock_bh(&ipvs->udp_app_lock); 374 + list_for_each_entry(i, &ipvs->udp_apps[hash], p_list) { 368 375 if (i->port == port) { 369 376 ret = -EEXIST; 370 377 goto out; 371 378 } 372 379 } 373 - list_add(&inc->p_list, &udp_apps[hash]); 374 - atomic_inc(&ip_vs_protocol_udp.appcnt); 380 + list_add(&inc->p_list, &ipvs->udp_apps[hash]); 381 + atomic_inc(&pd->appcnt); 375 382 376 383 out: 377 - spin_unlock_bh(&udp_app_lock); 384 + spin_unlock_bh(&ipvs->udp_app_lock); 378 385 return ret; 379 386 } 380 387 381 388 382 389 static void 383 - udp_unregister_app(struct ip_vs_app *inc) 390 + udp_unregister_app(struct net *net, struct ip_vs_app *inc) 384 391 { 385 - spin_lock_bh(&udp_app_lock); 386 - atomic_dec(&ip_vs_protocol_udp.appcnt); 392 + struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_UDP); 393 + struct netns_ipvs *ipvs = net_ipvs(net); 394 + 395 + spin_lock_bh(&ipvs->udp_app_lock); 396 + atomic_dec(&pd->appcnt); 387 397 list_del(&inc->p_list); 388 - spin_unlock_bh(&udp_app_lock); 398 + spin_unlock_bh(&ipvs->udp_app_lock); 389 399 } 390 400 391 401 392 402 static int udp_app_conn_bind(struct ip_vs_conn *cp) 393 403 { 404 + struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(cp)); 394 405 int hash; 395 406 struct ip_vs_app *inc; 396 407 int result = 0; ··· 408 407 /* Lookup application incarnations and bind the right one */ 409 408 hash = udp_app_hashkey(cp->vport); 410 409 411 - spin_lock(&udp_app_lock); 412 - list_for_each_entry(inc, &udp_apps[hash], p_list) { 410 + spin_lock(&ipvs->udp_app_lock); 411 + list_for_each_entry(inc, &ipvs->udp_apps[hash], p_list) { 413 412 if (inc->port == cp->vport) { 414 413 if (unlikely(!ip_vs_app_inc_get(inc))) 415 414 break; 416 - spin_unlock(&udp_app_lock); 415 + spin_unlock(&ipvs->udp_app_lock); 417 416 418 417 IP_VS_DBG_BUF(9, "%s(): Binding conn %s:%u->" 419 418 "%s:%u to app %s on port %u\n", ··· 430 429 goto out; 431 430 } 432 431 } 433 - spin_unlock(&udp_app_lock); 432 + spin_unlock(&ipvs->udp_app_lock); 434 433 435 434 out: 436 435 return result; 437 436 } 438 437 439 438 440 - static int udp_timeouts[IP_VS_UDP_S_LAST+1] = { 439 + static const int udp_timeouts[IP_VS_UDP_S_LAST+1] = { 441 440 [IP_VS_UDP_S_NORMAL] = 5*60*HZ, 442 441 [IP_VS_UDP_S_LAST] = 2*HZ, 443 442 }; ··· 446 445 [IP_VS_UDP_S_NORMAL] = "UDP", 447 446 [IP_VS_UDP_S_LAST] = "BUG!", 448 447 }; 449 - 450 - 451 - static int 452 - udp_set_state_timeout(struct ip_vs_protocol *pp, char *sname, int to) 453 - { 454 - return ip_vs_set_state_timeout(pp->timeout_table, IP_VS_UDP_S_LAST, 455 - udp_state_name_table, sname, to); 456 - } 457 448 458 449 static const char * udp_state_name(int state) 459 450 { ··· 457 464 static int 458 465 udp_state_transition(struct ip_vs_conn *cp, int direction, 459 466 const struct sk_buff *skb, 460 - struct ip_vs_protocol *pp) 467 + struct ip_vs_proto_data *pd) 461 468 { 462 - cp->timeout = pp->timeout_table[IP_VS_UDP_S_NORMAL]; 469 + if (unlikely(!pd)) { 470 + pr_err("UDP no ns data\n"); 471 + return 0; 472 + } 473 + 474 + cp->timeout = pd->timeout_table[IP_VS_UDP_S_NORMAL]; 463 475 return 1; 464 476 } 465 477 466 - static void udp_init(struct ip_vs_protocol *pp) 478 + static void __udp_init(struct net *net, struct ip_vs_proto_data *pd) 467 479 { 468 - IP_VS_INIT_HASH_TABLE(udp_apps); 469 - pp->timeout_table = udp_timeouts; 480 + struct netns_ipvs *ipvs = net_ipvs(net); 481 + 482 + ip_vs_init_hash_table(ipvs->udp_apps, UDP_APP_TAB_SIZE); 483 + spin_lock_init(&ipvs->udp_app_lock); 484 + pd->timeout_table = ip_vs_create_timeout_table((int *)udp_timeouts, 485 + sizeof(udp_timeouts)); 470 486 } 471 487 472 - static void udp_exit(struct ip_vs_protocol *pp) 488 + static void __udp_exit(struct net *net, struct ip_vs_proto_data *pd) 473 489 { 490 + kfree(pd->timeout_table); 474 491 } 475 492 476 493 ··· 489 486 .protocol = IPPROTO_UDP, 490 487 .num_states = IP_VS_UDP_S_LAST, 491 488 .dont_defrag = 0, 492 - .init = udp_init, 493 - .exit = udp_exit, 489 + .init = NULL, 490 + .exit = NULL, 491 + .init_netns = __udp_init, 492 + .exit_netns = __udp_exit, 494 493 .conn_schedule = udp_conn_schedule, 495 494 .conn_in_get = ip_vs_conn_in_get_proto, 496 495 .conn_out_get = ip_vs_conn_out_get_proto, ··· 506 501 .app_conn_bind = udp_app_conn_bind, 507 502 .debug_packet = ip_vs_tcpudp_debug_packet, 508 503 .timeout_change = NULL, 509 - .set_state_timeout = udp_set_state_timeout, 510 504 };

+1000 -275

net/netfilter/ipvs/ip_vs_sync.c

··· 5 5 * high-performance and highly available server based on a 6 6 * cluster of servers. 7 7 * 8 + * Version 1, is capable of handling both version 0 and 1 messages. 9 + * Version 0 is the plain old format. 10 + * Note Version 0 receivers will just drop Ver 1 messages. 11 + * Version 1 is capable of handle IPv6, Persistence data, 12 + * time-outs, and firewall marks. 13 + * In ver.1 "ip_vs_sync_conn_options" will be sent in netw. order. 14 + * Ver. 0 can be turned on by sysctl -w net.ipv4.vs.sync_version=0 15 + * 16 + * Definitions Message: is a complete datagram 17 + * Sync_conn: is a part of a Message 18 + * Param Data is an option to a Sync_conn. 19 + * 8 20 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> 9 21 * 10 22 * ip_vs_sync: sync connection info from master load balancer to backups ··· 27 15 * Alexandre Cassen : Added SyncID support for incoming sync 28 16 * messages filtering. 29 17 * Justin Ossevoort : Fix endian problem on sync message size. 18 + * Hans Schillstrom : Added Version 1: i.e. IPv6, 19 + * Persistence support, fwmark and time-out. 30 20 */ 31 21 32 22 #define KMSG_COMPONENT "IPVS" ··· 49 35 #include <linux/wait.h> 50 36 #include <linux/kernel.h> 51 37 38 + #include <asm/unaligned.h> /* Used for ntoh_seq and hton_seq */ 39 + 52 40 #include <net/ip.h> 53 41 #include <net/sock.h> 54 42 ··· 59 43 #define IP_VS_SYNC_GROUP 0xe0000051 /* multicast addr - 224.0.0.81 */ 60 44 #define IP_VS_SYNC_PORT 8848 /* multicast port */ 61 45 46 + #define SYNC_PROTO_VER 1 /* Protocol version in header */ 62 47 63 48 /* 64 49 * IPVS sync connection entry 50 + * Version 0, i.e. original version. 65 51 */ 66 - struct ip_vs_sync_conn { 52 + struct ip_vs_sync_conn_v0 { 67 53 __u8 reserved; 68 54 69 55 /* Protocol, addresses and port numbers */ ··· 89 71 struct ip_vs_seq out_seq; /* outgoing seq. struct */ 90 72 }; 91 73 92 - struct ip_vs_sync_thread_data { 93 - struct socket *sock; 94 - char *buf; 95 - }; 96 - 97 - #define SIMPLE_CONN_SIZE (sizeof(struct ip_vs_sync_conn)) 98 - #define FULL_CONN_SIZE \ 99 - (sizeof(struct ip_vs_sync_conn) + sizeof(struct ip_vs_sync_conn_options)) 100 - 101 - 102 74 /* 103 - The master mulitcasts messages to the backup load balancers in the 104 - following format. 75 + Sync Connection format (sync_conn) 105 76 106 77 0 1 2 3 107 78 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 108 79 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 109 - | Count Conns | SyncID | Size | 80 + | Type | Protocol | Ver. | Size | 81 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 82 + | Flags | 83 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 84 + | State | cport | 85 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 86 + | vport | dport | 87 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 88 + | fwmark | 89 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 90 + | timeout (in sec.) | 91 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 92 + | ... | 93 + | IP-Addresses (v4 or v6) | 94 + | ... | 95 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 96 + Optional Parameters. 97 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 98 + | Param. Type | Param. Length | Param. data | 99 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | 100 + | ... | 101 + | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 102 + | | Param Type | Param. Length | 103 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 104 + | Param data | 105 + | Last Param data should be padded for 32 bit alignment | 106 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 107 + */ 108 + 109 + /* 110 + * Type 0, IPv4 sync connection format 111 + */ 112 + struct ip_vs_sync_v4 { 113 + __u8 type; 114 + __u8 protocol; /* Which protocol (TCP/UDP) */ 115 + __be16 ver_size; /* Version msb 4 bits */ 116 + /* Flags and state transition */ 117 + __be32 flags; /* status flags */ 118 + __be16 state; /* state info */ 119 + /* Protocol, addresses and port numbers */ 120 + __be16 cport; 121 + __be16 vport; 122 + __be16 dport; 123 + __be32 fwmark; /* Firewall mark from skb */ 124 + __be32 timeout; /* cp timeout */ 125 + __be32 caddr; /* client address */ 126 + __be32 vaddr; /* virtual address */ 127 + __be32 daddr; /* destination address */ 128 + /* The sequence options start here */ 129 + /* PE data padded to 32bit alignment after seq. options */ 130 + }; 131 + /* 132 + * Type 2 messages IPv6 133 + */ 134 + struct ip_vs_sync_v6 { 135 + __u8 type; 136 + __u8 protocol; /* Which protocol (TCP/UDP) */ 137 + __be16 ver_size; /* Version msb 4 bits */ 138 + /* Flags and state transition */ 139 + __be32 flags; /* status flags */ 140 + __be16 state; /* state info */ 141 + /* Protocol, addresses and port numbers */ 142 + __be16 cport; 143 + __be16 vport; 144 + __be16 dport; 145 + __be32 fwmark; /* Firewall mark from skb */ 146 + __be32 timeout; /* cp timeout */ 147 + struct in6_addr caddr; /* client address */ 148 + struct in6_addr vaddr; /* virtual address */ 149 + struct in6_addr daddr; /* destination address */ 150 + /* The sequence options start here */ 151 + /* PE data padded to 32bit alignment after seq. options */ 152 + }; 153 + 154 + union ip_vs_sync_conn { 155 + struct ip_vs_sync_v4 v4; 156 + struct ip_vs_sync_v6 v6; 157 + }; 158 + 159 + /* Bits in Type field in above */ 160 + #define STYPE_INET6 0 161 + #define STYPE_F_INET6 (1 << STYPE_INET6) 162 + 163 + #define SVER_SHIFT 12 /* Shift to get version */ 164 + #define SVER_MASK 0x0fff /* Mask to strip version */ 165 + 166 + #define IPVS_OPT_SEQ_DATA 1 167 + #define IPVS_OPT_PE_DATA 2 168 + #define IPVS_OPT_PE_NAME 3 169 + #define IPVS_OPT_PARAM 7 170 + 171 + #define IPVS_OPT_F_SEQ_DATA (1 << (IPVS_OPT_SEQ_DATA-1)) 172 + #define IPVS_OPT_F_PE_DATA (1 << (IPVS_OPT_PE_DATA-1)) 173 + #define IPVS_OPT_F_PE_NAME (1 << (IPVS_OPT_PE_NAME-1)) 174 + #define IPVS_OPT_F_PARAM (1 << (IPVS_OPT_PARAM-1)) 175 + 176 + struct ip_vs_sync_thread_data { 177 + struct net *net; 178 + struct socket *sock; 179 + char *buf; 180 + }; 181 + 182 + /* Version 0 definition of packet sizes */ 183 + #define SIMPLE_CONN_SIZE (sizeof(struct ip_vs_sync_conn_v0)) 184 + #define FULL_CONN_SIZE \ 185 + (sizeof(struct ip_vs_sync_conn_v0) + sizeof(struct ip_vs_sync_conn_options)) 186 + 187 + 188 + /* 189 + The master mulitcasts messages (Datagrams) to the backup load balancers 190 + in the following format. 191 + 192 + Version 1: 193 + Note, first byte should be Zero, so ver 0 receivers will drop the packet. 194 + 195 + 0 1 2 3 196 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 197 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 198 + | 0 | SyncID | Size | 199 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 200 + | Count Conns | Version | Reserved, set to Zero | 110 201 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 111 202 | | 112 203 | IPVS Sync Connection (1) | 113 204 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 114 205 | . | 115 - | . | 206 + ~ . ~ 116 207 | . | 117 208 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 118 209 | | 119 210 | IPVS Sync Connection (n) | 120 211 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 212 + 213 + Version 0 Header 214 + 0 1 2 3 215 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 216 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 217 + | Count Conns | SyncID | Size | 218 + +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 219 + | IPVS Sync Connection (1) | 121 220 */ 122 221 123 222 #define SYNC_MESG_HEADER_LEN 4 124 223 #define MAX_CONNS_PER_SYNCBUFF 255 /* nr_conns in ip_vs_sync_mesg is 8 bit */ 125 224 126 - struct ip_vs_sync_mesg { 225 + /* Version 0 header */ 226 + struct ip_vs_sync_mesg_v0 { 127 227 __u8 nr_conns; 128 228 __u8 syncid; 129 229 __u16 size; ··· 249 113 /* ip_vs_sync_conn entries start here */ 250 114 }; 251 115 252 - /* the maximum length of sync (sending/receiving) message */ 253 - static int sync_send_mesg_maxlen; 254 - static int sync_recv_mesg_maxlen; 116 + /* Version 1 header */ 117 + struct ip_vs_sync_mesg { 118 + __u8 reserved; /* must be zero */ 119 + __u8 syncid; 120 + __u16 size; 121 + __u8 nr_conns; 122 + __s8 version; /* SYNC_PROTO_VER */ 123 + __u16 spare; 124 + /* ip_vs_sync_conn entries start here */ 125 + }; 255 126 256 127 struct ip_vs_sync_buff { 257 128 struct list_head list; ··· 270 127 unsigned char *end; 271 128 }; 272 129 273 - 274 - /* the sync_buff list head and the lock */ 275 - static LIST_HEAD(ip_vs_sync_queue); 276 - static DEFINE_SPINLOCK(ip_vs_sync_lock); 277 - 278 - /* current sync_buff for accepting new conn entries */ 279 - static struct ip_vs_sync_buff *curr_sb = NULL; 280 - static DEFINE_SPINLOCK(curr_sb_lock); 281 - 282 - /* ipvs sync daemon state */ 283 - volatile int ip_vs_sync_state = IP_VS_STATE_NONE; 284 - volatile int ip_vs_master_syncid = 0; 285 - volatile int ip_vs_backup_syncid = 0; 286 - 287 - /* multicast interface name */ 288 - char ip_vs_master_mcast_ifn[IP_VS_IFNAME_MAXLEN]; 289 - char ip_vs_backup_mcast_ifn[IP_VS_IFNAME_MAXLEN]; 290 - 291 - /* sync daemon tasks */ 292 - static struct task_struct *sync_master_thread; 293 - static struct task_struct *sync_backup_thread; 294 - 295 130 /* multicast addr */ 296 131 static struct sockaddr_in mcast_addr = { 297 132 .sin_family = AF_INET, ··· 277 156 .sin_addr.s_addr = cpu_to_be32(IP_VS_SYNC_GROUP), 278 157 }; 279 158 159 + /* 160 + * Copy of struct ip_vs_seq 161 + * From unaligned network order to aligned host order 162 + */ 163 + static void ntoh_seq(struct ip_vs_seq *no, struct ip_vs_seq *ho) 164 + { 165 + ho->init_seq = get_unaligned_be32(&no->init_seq); 166 + ho->delta = get_unaligned_be32(&no->delta); 167 + ho->previous_delta = get_unaligned_be32(&no->previous_delta); 168 + } 280 169 281 - static inline struct ip_vs_sync_buff *sb_dequeue(void) 170 + /* 171 + * Copy of struct ip_vs_seq 172 + * From Aligned host order to unaligned network order 173 + */ 174 + static void hton_seq(struct ip_vs_seq *ho, struct ip_vs_seq *no) 175 + { 176 + put_unaligned_be32(ho->init_seq, &no->init_seq); 177 + put_unaligned_be32(ho->delta, &no->delta); 178 + put_unaligned_be32(ho->previous_delta, &no->previous_delta); 179 + } 180 + 181 + static inline struct ip_vs_sync_buff *sb_dequeue(struct netns_ipvs *ipvs) 282 182 { 283 183 struct ip_vs_sync_buff *sb; 284 184 285 - spin_lock_bh(&ip_vs_sync_lock); 286 - if (list_empty(&ip_vs_sync_queue)) { 185 + spin_lock_bh(&ipvs->sync_lock); 186 + if (list_empty(&ipvs->sync_queue)) { 287 187 sb = NULL; 288 188 } else { 289 - sb = list_entry(ip_vs_sync_queue.next, 189 + sb = list_entry(ipvs->sync_queue.next, 290 190 struct ip_vs_sync_buff, 291 191 list); 292 192 list_del(&sb->list); 293 193 } 294 - spin_unlock_bh(&ip_vs_sync_lock); 194 + spin_unlock_bh(&ipvs->sync_lock); 295 195 296 196 return sb; 297 197 } 298 198 299 - static inline struct ip_vs_sync_buff * ip_vs_sync_buff_create(void) 199 + /* 200 + * Create a new sync buffer for Version 1 proto. 201 + */ 202 + static inline struct ip_vs_sync_buff * 203 + ip_vs_sync_buff_create(struct netns_ipvs *ipvs) 300 204 { 301 205 struct ip_vs_sync_buff *sb; 302 206 303 207 if (!(sb=kmalloc(sizeof(struct ip_vs_sync_buff), GFP_ATOMIC))) 304 208 return NULL; 305 209 306 - if (!(sb->mesg=kmalloc(sync_send_mesg_maxlen, GFP_ATOMIC))) { 210 + sb->mesg = kmalloc(ipvs->send_mesg_maxlen, GFP_ATOMIC); 211 + if (!sb->mesg) { 307 212 kfree(sb); 308 213 return NULL; 309 214 } 215 + sb->mesg->reserved = 0; /* old nr_conns i.e. must be zeo now */ 216 + sb->mesg->version = SYNC_PROTO_VER; 217 + sb->mesg->syncid = ipvs->master_syncid; 218 + sb->mesg->size = sizeof(struct ip_vs_sync_mesg); 310 219 sb->mesg->nr_conns = 0; 311 - sb->mesg->syncid = ip_vs_master_syncid; 312 - sb->mesg->size = 4; 313 - sb->head = (unsigned char *)sb->mesg + 4; 314 - sb->end = (unsigned char *)sb->mesg + sync_send_mesg_maxlen; 220 + sb->mesg->spare = 0; 221 + sb->head = (unsigned char *)sb->mesg + sizeof(struct ip_vs_sync_mesg); 222 + sb->end = (unsigned char *)sb->mesg + ipvs->send_mesg_maxlen; 223 + 315 224 sb->firstuse = jiffies; 316 225 return sb; 317 226 } ··· 352 201 kfree(sb); 353 202 } 354 203 355 - static inline void sb_queue_tail(struct ip_vs_sync_buff *sb) 204 + static inline void sb_queue_tail(struct netns_ipvs *ipvs) 356 205 { 357 - spin_lock(&ip_vs_sync_lock); 358 - if (ip_vs_sync_state & IP_VS_STATE_MASTER) 359 - list_add_tail(&sb->list, &ip_vs_sync_queue); 206 + struct ip_vs_sync_buff *sb = ipvs->sync_buff; 207 + 208 + spin_lock(&ipvs->sync_lock); 209 + if (ipvs->sync_state & IP_VS_STATE_MASTER) 210 + list_add_tail(&sb->list, &ipvs->sync_queue); 360 211 else 361 212 ip_vs_sync_buff_release(sb); 362 - spin_unlock(&ip_vs_sync_lock); 213 + spin_unlock(&ipvs->sync_lock); 363 214 } 364 215 365 216 /* ··· 369 216 * than the specified time or the specified time is zero. 370 217 */ 371 218 static inline struct ip_vs_sync_buff * 372 - get_curr_sync_buff(unsigned long time) 219 + get_curr_sync_buff(struct netns_ipvs *ipvs, unsigned long time) 373 220 { 374 221 struct ip_vs_sync_buff *sb; 375 222 376 - spin_lock_bh(&curr_sb_lock); 377 - if (curr_sb && (time == 0 || 378 - time_before(jiffies - curr_sb->firstuse, time))) { 379 - sb = curr_sb; 380 - curr_sb = NULL; 223 + spin_lock_bh(&ipvs->sync_buff_lock); 224 + if (ipvs->sync_buff && (time == 0 || 225 + time_before(jiffies - ipvs->sync_buff->firstuse, time))) { 226 + sb = ipvs->sync_buff; 227 + ipvs->sync_buff = NULL; 381 228 } else 382 229 sb = NULL; 383 - spin_unlock_bh(&curr_sb_lock); 230 + spin_unlock_bh(&ipvs->sync_buff_lock); 384 231 return sb; 385 232 } 386 233 234 + /* 235 + * Switch mode from sending version 0 or 1 236 + * - must handle sync_buf 237 + */ 238 + void ip_vs_sync_switch_mode(struct net *net, int mode) 239 + { 240 + struct netns_ipvs *ipvs = net_ipvs(net); 241 + 242 + if (!ipvs->sync_state & IP_VS_STATE_MASTER) 243 + return; 244 + if (mode == ipvs->sysctl_sync_ver || !ipvs->sync_buff) 245 + return; 246 + 247 + spin_lock_bh(&ipvs->sync_buff_lock); 248 + /* Buffer empty ? then let buf_create do the job */ 249 + if (ipvs->sync_buff->mesg->size <= sizeof(struct ip_vs_sync_mesg)) { 250 + kfree(ipvs->sync_buff); 251 + ipvs->sync_buff = NULL; 252 + } else { 253 + spin_lock_bh(&ipvs->sync_lock); 254 + if (ipvs->sync_state & IP_VS_STATE_MASTER) 255 + list_add_tail(&ipvs->sync_buff->list, 256 + &ipvs->sync_queue); 257 + else 258 + ip_vs_sync_buff_release(ipvs->sync_buff); 259 + spin_unlock_bh(&ipvs->sync_lock); 260 + } 261 + spin_unlock_bh(&ipvs->sync_buff_lock); 262 + } 387 263 388 264 /* 389 - * Add an ip_vs_conn information into the current sync_buff. 390 - * Called by ip_vs_in. 265 + * Create a new sync buffer for Version 0 proto. 391 266 */ 392 - void ip_vs_sync_conn(struct ip_vs_conn *cp) 267 + static inline struct ip_vs_sync_buff * 268 + ip_vs_sync_buff_create_v0(struct netns_ipvs *ipvs) 393 269 { 394 - struct ip_vs_sync_mesg *m; 395 - struct ip_vs_sync_conn *s; 270 + struct ip_vs_sync_buff *sb; 271 + struct ip_vs_sync_mesg_v0 *mesg; 272 + 273 + if (!(sb=kmalloc(sizeof(struct ip_vs_sync_buff), GFP_ATOMIC))) 274 + return NULL; 275 + 276 + sb->mesg = kmalloc(ipvs->send_mesg_maxlen, GFP_ATOMIC); 277 + if (!sb->mesg) { 278 + kfree(sb); 279 + return NULL; 280 + } 281 + mesg = (struct ip_vs_sync_mesg_v0 *)sb->mesg; 282 + mesg->nr_conns = 0; 283 + mesg->syncid = ipvs->master_syncid; 284 + mesg->size = sizeof(struct ip_vs_sync_mesg_v0); 285 + sb->head = (unsigned char *)mesg + sizeof(struct ip_vs_sync_mesg_v0); 286 + sb->end = (unsigned char *)mesg + ipvs->send_mesg_maxlen; 287 + sb->firstuse = jiffies; 288 + return sb; 289 + } 290 + 291 + /* 292 + * Version 0 , could be switched in by sys_ctl. 293 + * Add an ip_vs_conn information into the current sync_buff. 294 + */ 295 + void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp) 296 + { 297 + struct netns_ipvs *ipvs = net_ipvs(net); 298 + struct ip_vs_sync_mesg_v0 *m; 299 + struct ip_vs_sync_conn_v0 *s; 396 300 int len; 397 301 398 - spin_lock(&curr_sb_lock); 399 - if (!curr_sb) { 400 - if (!(curr_sb=ip_vs_sync_buff_create())) { 401 - spin_unlock(&curr_sb_lock); 302 + if (unlikely(cp->af != AF_INET)) 303 + return; 304 + /* Do not sync ONE PACKET */ 305 + if (cp->flags & IP_VS_CONN_F_ONE_PACKET) 306 + return; 307 + 308 + spin_lock(&ipvs->sync_buff_lock); 309 + if (!ipvs->sync_buff) { 310 + ipvs->sync_buff = 311 + ip_vs_sync_buff_create_v0(ipvs); 312 + if (!ipvs->sync_buff) { 313 + spin_unlock(&ipvs->sync_buff_lock); 402 314 pr_err("ip_vs_sync_buff_create failed.\n"); 403 315 return; 404 316 } ··· 471 253 472 254 len = (cp->flags & IP_VS_CONN_F_SEQ_MASK) ? FULL_CONN_SIZE : 473 255 SIMPLE_CONN_SIZE; 474 - m = curr_sb->mesg; 475 - s = (struct ip_vs_sync_conn *)curr_sb->head; 256 + m = (struct ip_vs_sync_mesg_v0 *)ipvs->sync_buff->mesg; 257 + s = (struct ip_vs_sync_conn_v0 *)ipvs->sync_buff->head; 476 258 477 259 /* copy members */ 260 + s->reserved = 0; 478 261 s->protocol = cp->protocol; 479 262 s->cport = cp->cport; 480 263 s->vport = cp->vport; ··· 493 274 494 275 m->nr_conns++; 495 276 m->size += len; 496 - curr_sb->head += len; 277 + ipvs->sync_buff->head += len; 497 278 498 279 /* check if there is a space for next one */ 499 - if (curr_sb->head+FULL_CONN_SIZE > curr_sb->end) { 500 - sb_queue_tail(curr_sb); 501 - curr_sb = NULL; 280 + if (ipvs->sync_buff->head + FULL_CONN_SIZE > ipvs->sync_buff->end) { 281 + sb_queue_tail(ipvs); 282 + ipvs->sync_buff = NULL; 502 283 } 503 - spin_unlock(&curr_sb_lock); 284 + spin_unlock(&ipvs->sync_buff_lock); 504 285 505 286 /* synchronize its controller if it has */ 506 287 if (cp->control) 507 - ip_vs_sync_conn(cp->control); 288 + ip_vs_sync_conn(net, cp->control); 508 289 } 509 290 510 - static inline int 511 - ip_vs_conn_fill_param_sync(int af, int protocol, 512 - const union nf_inet_addr *caddr, __be16 cport, 513 - const union nf_inet_addr *vaddr, __be16 vport, 514 - struct ip_vs_conn_param *p) 291 + /* 292 + * Add an ip_vs_conn information into the current sync_buff. 293 + * Called by ip_vs_in. 294 + * Sending Version 1 messages 295 + */ 296 + void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp) 515 297 { 516 - /* XXX: Need to take into account persistence engine */ 517 - ip_vs_conn_fill_param(af, protocol, caddr, cport, vaddr, vport, p); 298 + struct netns_ipvs *ipvs = net_ipvs(net); 299 + struct ip_vs_sync_mesg *m; 300 + union ip_vs_sync_conn *s; 301 + __u8 *p; 302 + unsigned int len, pe_name_len, pad; 303 + 304 + /* Handle old version of the protocol */ 305 + if (ipvs->sysctl_sync_ver == 0) { 306 + ip_vs_sync_conn_v0(net, cp); 307 + return; 308 + } 309 + /* Do not sync ONE PACKET */ 310 + if (cp->flags & IP_VS_CONN_F_ONE_PACKET) 311 + goto control; 312 + sloop: 313 + /* Sanity checks */ 314 + pe_name_len = 0; 315 + if (cp->pe_data_len) { 316 + if (!cp->pe_data || !cp->dest) { 317 + IP_VS_ERR_RL("SYNC, connection pe_data invalid\n"); 318 + return; 319 + } 320 + pe_name_len = strnlen(cp->pe->name, IP_VS_PENAME_MAXLEN); 321 + } 322 + 323 + spin_lock(&ipvs->sync_buff_lock); 324 + 325 + #ifdef CONFIG_IP_VS_IPV6 326 + if (cp->af == AF_INET6) 327 + len = sizeof(struct ip_vs_sync_v6); 328 + else 329 + #endif 330 + len = sizeof(struct ip_vs_sync_v4); 331 + 332 + if (cp->flags & IP_VS_CONN_F_SEQ_MASK) 333 + len += sizeof(struct ip_vs_sync_conn_options) + 2; 334 + 335 + if (cp->pe_data_len) 336 + len += cp->pe_data_len + 2; /* + Param hdr field */ 337 + if (pe_name_len) 338 + len += pe_name_len + 2; 339 + 340 + /* check if there is a space for this one */ 341 + pad = 0; 342 + if (ipvs->sync_buff) { 343 + pad = (4 - (size_t)ipvs->sync_buff->head) & 3; 344 + if (ipvs->sync_buff->head + len + pad > ipvs->sync_buff->end) { 345 + sb_queue_tail(ipvs); 346 + ipvs->sync_buff = NULL; 347 + pad = 0; 348 + } 349 + } 350 + 351 + if (!ipvs->sync_buff) { 352 + ipvs->sync_buff = ip_vs_sync_buff_create(ipvs); 353 + if (!ipvs->sync_buff) { 354 + spin_unlock(&ipvs->sync_buff_lock); 355 + pr_err("ip_vs_sync_buff_create failed.\n"); 356 + return; 357 + } 358 + } 359 + 360 + m = ipvs->sync_buff->mesg; 361 + p = ipvs->sync_buff->head; 362 + ipvs->sync_buff->head += pad + len; 363 + m->size += pad + len; 364 + /* Add ev. padding from prev. sync_conn */ 365 + while (pad--) 366 + *(p++) = 0; 367 + 368 + s = (union ip_vs_sync_conn *)p; 369 + 370 + /* Set message type & copy members */ 371 + s->v4.type = (cp->af == AF_INET6 ? STYPE_F_INET6 : 0); 372 + s->v4.ver_size = htons(len & SVER_MASK); /* Version 0 */ 373 + s->v4.flags = htonl(cp->flags & ~IP_VS_CONN_F_HASHED); 374 + s->v4.state = htons(cp->state); 375 + s->v4.protocol = cp->protocol; 376 + s->v4.cport = cp->cport; 377 + s->v4.vport = cp->vport; 378 + s->v4.dport = cp->dport; 379 + s->v4.fwmark = htonl(cp->fwmark); 380 + s->v4.timeout = htonl(cp->timeout / HZ); 381 + m->nr_conns++; 382 + 383 + #ifdef CONFIG_IP_VS_IPV6 384 + if (cp->af == AF_INET6) { 385 + p += sizeof(struct ip_vs_sync_v6); 386 + ipv6_addr_copy(&s->v6.caddr, &cp->caddr.in6); 387 + ipv6_addr_copy(&s->v6.vaddr, &cp->vaddr.in6); 388 + ipv6_addr_copy(&s->v6.daddr, &cp->daddr.in6); 389 + } else 390 + #endif 391 + { 392 + p += sizeof(struct ip_vs_sync_v4); /* options ptr */ 393 + s->v4.caddr = cp->caddr.ip; 394 + s->v4.vaddr = cp->vaddr.ip; 395 + s->v4.daddr = cp->daddr.ip; 396 + } 397 + if (cp->flags & IP_VS_CONN_F_SEQ_MASK) { 398 + *(p++) = IPVS_OPT_SEQ_DATA; 399 + *(p++) = sizeof(struct ip_vs_sync_conn_options); 400 + hton_seq((struct ip_vs_seq *)p, &cp->in_seq); 401 + p += sizeof(struct ip_vs_seq); 402 + hton_seq((struct ip_vs_seq *)p, &cp->out_seq); 403 + p += sizeof(struct ip_vs_seq); 404 + } 405 + /* Handle pe data */ 406 + if (cp->pe_data_len && cp->pe_data) { 407 + *(p++) = IPVS_OPT_PE_DATA; 408 + *(p++) = cp->pe_data_len; 409 + memcpy(p, cp->pe_data, cp->pe_data_len); 410 + p += cp->pe_data_len; 411 + if (pe_name_len) { 412 + /* Add PE_NAME */ 413 + *(p++) = IPVS_OPT_PE_NAME; 414 + *(p++) = pe_name_len; 415 + memcpy(p, cp->pe->name, pe_name_len); 416 + p += pe_name_len; 417 + } 418 + } 419 + 420 + spin_unlock(&ipvs->sync_buff_lock); 421 + 422 + control: 423 + /* synchronize its controller if it has */ 424 + cp = cp->control; 425 + if (!cp) 426 + return; 427 + /* 428 + * Reduce sync rate for templates 429 + * i.e only increment in_pkts for Templates. 430 + */ 431 + if (cp->flags & IP_VS_CONN_F_TEMPLATE) { 432 + int pkts = atomic_add_return(1, &cp->in_pkts); 433 + 434 + if (pkts % ipvs->sysctl_sync_threshold[1] != 1) 435 + return; 436 + } 437 + goto sloop; 438 + } 439 + 440 + /* 441 + * fill_param used by version 1 442 + */ 443 + static inline int 444 + ip_vs_conn_fill_param_sync(struct net *net, int af, union ip_vs_sync_conn *sc, 445 + struct ip_vs_conn_param *p, 446 + __u8 *pe_data, unsigned int pe_data_len, 447 + __u8 *pe_name, unsigned int pe_name_len) 448 + { 449 + #ifdef CONFIG_IP_VS_IPV6 450 + if (af == AF_INET6) 451 + ip_vs_conn_fill_param(net, af, sc->v6.protocol, 452 + (const union nf_inet_addr *)&sc->v6.caddr, 453 + sc->v6.cport, 454 + (const union nf_inet_addr *)&sc->v6.vaddr, 455 + sc->v6.vport, p); 456 + else 457 + #endif 458 + ip_vs_conn_fill_param(net, af, sc->v4.protocol, 459 + (const union nf_inet_addr *)&sc->v4.caddr, 460 + sc->v4.cport, 461 + (const union nf_inet_addr *)&sc->v4.vaddr, 462 + sc->v4.vport, p); 463 + /* Handle pe data */ 464 + if (pe_data_len) { 465 + if (pe_name_len) { 466 + char buff[IP_VS_PENAME_MAXLEN+1]; 467 + 468 + memcpy(buff, pe_name, pe_name_len); 469 + buff[pe_name_len]=0; 470 + p->pe = __ip_vs_pe_getbyname(buff); 471 + if (!p->pe) { 472 + IP_VS_DBG(3, "BACKUP, no %s engine found/loaded\n", 473 + buff); 474 + return 1; 475 + } 476 + } else { 477 + IP_VS_ERR_RL("BACKUP, Invalid PE parameters\n"); 478 + return 1; 479 + } 480 + 481 + p->pe_data = kmalloc(pe_data_len, GFP_ATOMIC); 482 + if (!p->pe_data) { 483 + if (p->pe->module) 484 + module_put(p->pe->module); 485 + return -ENOMEM; 486 + } 487 + memcpy(p->pe_data, pe_data, pe_data_len); 488 + p->pe_data_len = pe_data_len; 489 + } 518 490 return 0; 519 491 } 520 492 521 493 /* 522 - * Process received multicast message and create the corresponding 523 - * ip_vs_conn entries. 494 + * Connection Add / Update. 495 + * Common for version 0 and 1 reception of backup sync_conns. 496 + * Param: ... 497 + * timeout is in sec. 524 498 */ 525 - static void ip_vs_process_message(const char *buffer, const size_t buflen) 499 + static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param, 500 + unsigned int flags, unsigned int state, 501 + unsigned int protocol, unsigned int type, 502 + const union nf_inet_addr *daddr, __be16 dport, 503 + unsigned long timeout, __u32 fwmark, 504 + struct ip_vs_sync_conn_options *opt) 526 505 { 527 - struct ip_vs_sync_mesg *m = (struct ip_vs_sync_mesg *)buffer; 528 - struct ip_vs_sync_conn *s; 529 - struct ip_vs_sync_conn_options *opt; 530 - struct ip_vs_conn *cp; 531 - struct ip_vs_protocol *pp; 532 506 struct ip_vs_dest *dest; 507 + struct ip_vs_conn *cp; 508 + struct netns_ipvs *ipvs = net_ipvs(net); 509 + 510 + if (!(flags & IP_VS_CONN_F_TEMPLATE)) 511 + cp = ip_vs_conn_in_get(param); 512 + else 513 + cp = ip_vs_ct_in_get(param); 514 + 515 + if (cp && param->pe_data) /* Free pe_data */ 516 + kfree(param->pe_data); 517 + if (!cp) { 518 + /* 519 + * Find the appropriate destination for the connection. 520 + * If it is not found the connection will remain unbound 521 + * but still handled. 522 + */ 523 + dest = ip_vs_find_dest(net, type, daddr, dport, param->vaddr, 524 + param->vport, protocol, fwmark); 525 + 526 + /* Set the approprite ativity flag */ 527 + if (protocol == IPPROTO_TCP) { 528 + if (state != IP_VS_TCP_S_ESTABLISHED) 529 + flags |= IP_VS_CONN_F_INACTIVE; 530 + else 531 + flags &= ~IP_VS_CONN_F_INACTIVE; 532 + } else if (protocol == IPPROTO_SCTP) { 533 + if (state != IP_VS_SCTP_S_ESTABLISHED) 534 + flags |= IP_VS_CONN_F_INACTIVE; 535 + else 536 + flags &= ~IP_VS_CONN_F_INACTIVE; 537 + } 538 + cp = ip_vs_conn_new(param, daddr, dport, flags, dest, fwmark); 539 + if (dest) 540 + atomic_dec(&dest->refcnt); 541 + if (!cp) { 542 + if (param->pe_data) 543 + kfree(param->pe_data); 544 + IP_VS_DBG(2, "BACKUP, add new conn. failed\n"); 545 + return; 546 + } 547 + } else if (!cp->dest) { 548 + dest = ip_vs_try_bind_dest(cp); 549 + if (dest) 550 + atomic_dec(&dest->refcnt); 551 + } else if ((cp->dest) && (cp->protocol == IPPROTO_TCP) && 552 + (cp->state != state)) { 553 + /* update active/inactive flag for the connection */ 554 + dest = cp->dest; 555 + if (!(cp->flags & IP_VS_CONN_F_INACTIVE) && 556 + (state != IP_VS_TCP_S_ESTABLISHED)) { 557 + atomic_dec(&dest->activeconns); 558 + atomic_inc(&dest->inactconns); 559 + cp->flags |= IP_VS_CONN_F_INACTIVE; 560 + } else if ((cp->flags & IP_VS_CONN_F_INACTIVE) && 561 + (state == IP_VS_TCP_S_ESTABLISHED)) { 562 + atomic_inc(&dest->activeconns); 563 + atomic_dec(&dest->inactconns); 564 + cp->flags &= ~IP_VS_CONN_F_INACTIVE; 565 + } 566 + } else if ((cp->dest) && (cp->protocol == IPPROTO_SCTP) && 567 + (cp->state != state)) { 568 + dest = cp->dest; 569 + if (!(cp->flags & IP_VS_CONN_F_INACTIVE) && 570 + (state != IP_VS_SCTP_S_ESTABLISHED)) { 571 + atomic_dec(&dest->activeconns); 572 + atomic_inc(&dest->inactconns); 573 + cp->flags &= ~IP_VS_CONN_F_INACTIVE; 574 + } 575 + } 576 + 577 + if (opt) 578 + memcpy(&cp->in_seq, opt, sizeof(*opt)); 579 + atomic_set(&cp->in_pkts, ipvs->sysctl_sync_threshold[0]); 580 + cp->state = state; 581 + cp->old_state = cp->state; 582 + /* 583 + * For Ver 0 messages style 584 + * - Not possible to recover the right timeout for templates 585 + * - can not find the right fwmark 586 + * virtual service. If needed, we can do it for 587 + * non-fwmark persistent services. 588 + * Ver 1 messages style. 589 + * - No problem. 590 + */ 591 + if (timeout) { 592 + if (timeout > MAX_SCHEDULE_TIMEOUT / HZ) 593 + timeout = MAX_SCHEDULE_TIMEOUT / HZ; 594 + cp->timeout = timeout*HZ; 595 + } else { 596 + struct ip_vs_proto_data *pd; 597 + 598 + pd = ip_vs_proto_data_get(net, protocol); 599 + if (!(flags & IP_VS_CONN_F_TEMPLATE) && pd && pd->timeout_table) 600 + cp->timeout = pd->timeout_table[state]; 601 + else 602 + cp->timeout = (3*60*HZ); 603 + } 604 + ip_vs_conn_put(cp); 605 + } 606 + 607 + /* 608 + * Process received multicast message for Version 0 609 + */ 610 + static void ip_vs_process_message_v0(struct net *net, const char *buffer, 611 + const size_t buflen) 612 + { 613 + struct ip_vs_sync_mesg_v0 *m = (struct ip_vs_sync_mesg_v0 *)buffer; 614 + struct ip_vs_sync_conn_v0 *s; 615 + struct ip_vs_sync_conn_options *opt; 616 + struct ip_vs_protocol *pp; 533 617 struct ip_vs_conn_param param; 534 618 char *p; 535 619 int i; 536 620 537 - if (buflen < sizeof(struct ip_vs_sync_mesg)) { 538 - IP_VS_ERR_RL("sync message header too short\n"); 539 - return; 540 - } 541 - 542 - /* Convert size back to host byte order */ 543 - m->size = ntohs(m->size); 544 - 545 - if (buflen != m->size) { 546 - IP_VS_ERR_RL("bogus sync message size\n"); 547 - return; 548 - } 549 - 550 - /* SyncID sanity check */ 551 - if (ip_vs_backup_syncid != 0 && m->syncid != ip_vs_backup_syncid) { 552 - IP_VS_DBG(7, "Ignoring incoming msg with syncid = %d\n", 553 - m->syncid); 554 - return; 555 - } 556 - 557 - p = (char *)buffer + sizeof(struct ip_vs_sync_mesg); 621 + p = (char *)buffer + sizeof(struct ip_vs_sync_mesg_v0); 558 622 for (i=0; i<m->nr_conns; i++) { 559 623 unsigned flags, state; 560 624 561 625 if (p + SIMPLE_CONN_SIZE > buffer+buflen) { 562 - IP_VS_ERR_RL("bogus conn in sync message\n"); 626 + IP_VS_ERR_RL("BACKUP v0, bogus conn\n"); 563 627 return; 564 628 } 565 - s = (struct ip_vs_sync_conn *) p; 629 + s = (struct ip_vs_sync_conn_v0 *) p; 566 630 flags = ntohs(s->flags) | IP_VS_CONN_F_SYNC; 567 631 flags &= ~IP_VS_CONN_F_HASHED; 568 632 if (flags & IP_VS_CONN_F_SEQ_MASK) { 569 633 opt = (struct ip_vs_sync_conn_options *)&s[1]; 570 634 p += FULL_CONN_SIZE; 571 635 if (p > buffer+buflen) { 572 - IP_VS_ERR_RL("bogus conn options in sync message\n"); 636 + IP_VS_ERR_RL("BACKUP v0, Dropping buffer bogus conn options\n"); 573 637 return; 574 638 } 575 639 } else { ··· 864 362 if (!(flags & IP_VS_CONN_F_TEMPLATE)) { 865 363 pp = ip_vs_proto_get(s->protocol); 866 364 if (!pp) { 867 - IP_VS_ERR_RL("Unsupported protocol %u in sync msg\n", 365 + IP_VS_DBG(2, "BACKUP v0, Unsupported protocol %u\n", 868 366 s->protocol); 869 367 continue; 870 368 } 871 369 if (state >= pp->num_states) { 872 - IP_VS_DBG(2, "Invalid %s state %u in sync msg\n", 370 + IP_VS_DBG(2, "BACKUP v0, Invalid %s state %u\n", 873 371 pp->name, state); 874 372 continue; 875 373 } 876 374 } else { 877 375 /* protocol in templates is not used for state/timeout */ 878 - pp = NULL; 879 376 if (state > 0) { 880 - IP_VS_DBG(2, "Invalid template state %u in sync msg\n", 377 + IP_VS_DBG(2, "BACKUP v0, Invalid template state %u\n", 881 378 state); 882 379 state = 0; 883 380 } 884 381 } 885 382 886 - { 887 - if (ip_vs_conn_fill_param_sync(AF_INET, s->protocol, 888 - (union nf_inet_addr *)&s->caddr, 889 - s->cport, 890 - (union nf_inet_addr *)&s->vaddr, 891 - s->vport, &param)) { 892 - pr_err("ip_vs_conn_fill_param_sync failed"); 893 - return; 894 - } 895 - if (!(flags & IP_VS_CONN_F_TEMPLATE)) 896 - cp = ip_vs_conn_in_get(&param); 897 - else 898 - cp = ip_vs_ct_in_get(&param); 899 - } 900 - if (!cp) { 901 - /* 902 - * Find the appropriate destination for the connection. 903 - * If it is not found the connection will remain unbound 904 - * but still handled. 905 - */ 906 - dest = ip_vs_find_dest(AF_INET, 907 - (union nf_inet_addr *)&s->daddr, 908 - s->dport, 909 - (union nf_inet_addr *)&s->vaddr, 910 - s->vport, 911 - s->protocol); 912 - /* Set the approprite ativity flag */ 913 - if (s->protocol == IPPROTO_TCP) { 914 - if (state != IP_VS_TCP_S_ESTABLISHED) 915 - flags |= IP_VS_CONN_F_INACTIVE; 916 - else 917 - flags &= ~IP_VS_CONN_F_INACTIVE; 918 - } else if (s->protocol == IPPROTO_SCTP) { 919 - if (state != IP_VS_SCTP_S_ESTABLISHED) 920 - flags |= IP_VS_CONN_F_INACTIVE; 921 - else 922 - flags &= ~IP_VS_CONN_F_INACTIVE; 923 - } 924 - cp = ip_vs_conn_new(&param, 925 - (union nf_inet_addr *)&s->daddr, 926 - s->dport, flags, dest); 927 - if (dest) 928 - atomic_dec(&dest->refcnt); 929 - if (!cp) { 930 - pr_err("ip_vs_conn_new failed\n"); 931 - return; 932 - } 933 - } else if (!cp->dest) { 934 - dest = ip_vs_try_bind_dest(cp); 935 - if (dest) 936 - atomic_dec(&dest->refcnt); 937 - } else if ((cp->dest) && (cp->protocol == IPPROTO_TCP) && 938 - (cp->state != state)) { 939 - /* update active/inactive flag for the connection */ 940 - dest = cp->dest; 941 - if (!(cp->flags & IP_VS_CONN_F_INACTIVE) && 942 - (state != IP_VS_TCP_S_ESTABLISHED)) { 943 - atomic_dec(&dest->activeconns); 944 - atomic_inc(&dest->inactconns); 945 - cp->flags |= IP_VS_CONN_F_INACTIVE; 946 - } else if ((cp->flags & IP_VS_CONN_F_INACTIVE) && 947 - (state == IP_VS_TCP_S_ESTABLISHED)) { 948 - atomic_inc(&dest->activeconns); 949 - atomic_dec(&dest->inactconns); 950 - cp->flags &= ~IP_VS_CONN_F_INACTIVE; 951 - } 952 - } else if ((cp->dest) && (cp->protocol == IPPROTO_SCTP) && 953 - (cp->state != state)) { 954 - dest = cp->dest; 955 - if (!(cp->flags & IP_VS_CONN_F_INACTIVE) && 956 - (state != IP_VS_SCTP_S_ESTABLISHED)) { 957 - atomic_dec(&dest->activeconns); 958 - atomic_inc(&dest->inactconns); 959 - cp->flags &= ~IP_VS_CONN_F_INACTIVE; 960 - } 961 - } 383 + ip_vs_conn_fill_param(net, AF_INET, s->protocol, 384 + (const union nf_inet_addr *)&s->caddr, 385 + s->cport, 386 + (const union nf_inet_addr *)&s->vaddr, 387 + s->vport, &param); 962 388 963 - if (opt) 964 - memcpy(&cp->in_seq, opt, sizeof(*opt)); 965 - atomic_set(&cp->in_pkts, sysctl_ip_vs_sync_threshold[0]); 966 - cp->state = state; 967 - cp->old_state = cp->state; 968 - /* 969 - * We can not recover the right timeout for templates 970 - * in all cases, we can not find the right fwmark 971 - * virtual service. If needed, we can do it for 972 - * non-fwmark persistent services. 973 - */ 974 - if (!(flags & IP_VS_CONN_F_TEMPLATE) && pp->timeout_table) 975 - cp->timeout = pp->timeout_table[state]; 976 - else 977 - cp->timeout = (3*60*HZ); 978 - ip_vs_conn_put(cp); 389 + /* Send timeout as Zero */ 390 + ip_vs_proc_conn(net, &param, flags, state, s->protocol, AF_INET, 391 + (union nf_inet_addr *)&s->daddr, s->dport, 392 + 0, 0, opt); 393 + } 394 + } 395 + 396 + /* 397 + * Handle options 398 + */ 399 + static inline int ip_vs_proc_seqopt(__u8 *p, unsigned int plen, 400 + __u32 *opt_flags, 401 + struct ip_vs_sync_conn_options *opt) 402 + { 403 + struct ip_vs_sync_conn_options *topt; 404 + 405 + topt = (struct ip_vs_sync_conn_options *)p; 406 + 407 + if (plen != sizeof(struct ip_vs_sync_conn_options)) { 408 + IP_VS_DBG(2, "BACKUP, bogus conn options length\n"); 409 + return -EINVAL; 410 + } 411 + if (*opt_flags & IPVS_OPT_F_SEQ_DATA) { 412 + IP_VS_DBG(2, "BACKUP, conn options found twice\n"); 413 + return -EINVAL; 414 + } 415 + ntoh_seq(&topt->in_seq, &opt->in_seq); 416 + ntoh_seq(&topt->out_seq, &opt->out_seq); 417 + *opt_flags |= IPVS_OPT_F_SEQ_DATA; 418 + return 0; 419 + } 420 + 421 + static int ip_vs_proc_str(__u8 *p, unsigned int plen, unsigned int *data_len, 422 + __u8 **data, unsigned int maxlen, 423 + __u32 *opt_flags, __u32 flag) 424 + { 425 + if (plen > maxlen) { 426 + IP_VS_DBG(2, "BACKUP, bogus par.data len > %d\n", maxlen); 427 + return -EINVAL; 428 + } 429 + if (*opt_flags & flag) { 430 + IP_VS_DBG(2, "BACKUP, Par.data found twice 0x%x\n", flag); 431 + return -EINVAL; 432 + } 433 + *data_len = plen; 434 + *data = p; 435 + *opt_flags |= flag; 436 + return 0; 437 + } 438 + /* 439 + * Process a Version 1 sync. connection 440 + */ 441 + static inline int ip_vs_proc_sync_conn(struct net *net, __u8 *p, __u8 *msg_end) 442 + { 443 + struct ip_vs_sync_conn_options opt; 444 + union ip_vs_sync_conn *s; 445 + struct ip_vs_protocol *pp; 446 + struct ip_vs_conn_param param; 447 + __u32 flags; 448 + unsigned int af, state, pe_data_len=0, pe_name_len=0; 449 + __u8 *pe_data=NULL, *pe_name=NULL; 450 + __u32 opt_flags=0; 451 + int retc=0; 452 + 453 + s = (union ip_vs_sync_conn *) p; 454 + 455 + if (s->v6.type & STYPE_F_INET6) { 456 + #ifdef CONFIG_IP_VS_IPV6 457 + af = AF_INET6; 458 + p += sizeof(struct ip_vs_sync_v6); 459 + #else 460 + IP_VS_DBG(3,"BACKUP, IPv6 msg received, and IPVS is not compiled for IPv6\n"); 461 + retc = 10; 462 + goto out; 463 + #endif 464 + } else if (!s->v4.type) { 465 + af = AF_INET; 466 + p += sizeof(struct ip_vs_sync_v4); 467 + } else { 468 + return -10; 469 + } 470 + if (p > msg_end) 471 + return -20; 472 + 473 + /* Process optional params check Type & Len. */ 474 + while (p < msg_end) { 475 + int ptype; 476 + int plen; 477 + 478 + if (p+2 > msg_end) 479 + return -30; 480 + ptype = *(p++); 481 + plen = *(p++); 482 + 483 + if (!plen || ((p + plen) > msg_end)) 484 + return -40; 485 + /* Handle seq option p = param data */ 486 + switch (ptype & ~IPVS_OPT_F_PARAM) { 487 + case IPVS_OPT_SEQ_DATA: 488 + if (ip_vs_proc_seqopt(p, plen, &opt_flags, &opt)) 489 + return -50; 490 + break; 491 + 492 + case IPVS_OPT_PE_DATA: 493 + if (ip_vs_proc_str(p, plen, &pe_data_len, &pe_data, 494 + IP_VS_PEDATA_MAXLEN, &opt_flags, 495 + IPVS_OPT_F_PE_DATA)) 496 + return -60; 497 + break; 498 + 499 + case IPVS_OPT_PE_NAME: 500 + if (ip_vs_proc_str(p, plen,&pe_name_len, &pe_name, 501 + IP_VS_PENAME_MAXLEN, &opt_flags, 502 + IPVS_OPT_F_PE_NAME)) 503 + return -70; 504 + break; 505 + 506 + default: 507 + /* Param data mandatory ? */ 508 + if (!(ptype & IPVS_OPT_F_PARAM)) { 509 + IP_VS_DBG(3, "BACKUP, Unknown mandatory param %d found\n", 510 + ptype & ~IPVS_OPT_F_PARAM); 511 + retc = 20; 512 + goto out; 513 + } 514 + } 515 + p += plen; /* Next option */ 516 + } 517 + 518 + /* Get flags and Mask off unsupported */ 519 + flags = ntohl(s->v4.flags) & IP_VS_CONN_F_BACKUP_MASK; 520 + flags |= IP_VS_CONN_F_SYNC; 521 + state = ntohs(s->v4.state); 522 + 523 + if (!(flags & IP_VS_CONN_F_TEMPLATE)) { 524 + pp = ip_vs_proto_get(s->v4.protocol); 525 + if (!pp) { 526 + IP_VS_DBG(3,"BACKUP, Unsupported protocol %u\n", 527 + s->v4.protocol); 528 + retc = 30; 529 + goto out; 530 + } 531 + if (state >= pp->num_states) { 532 + IP_VS_DBG(3, "BACKUP, Invalid %s state %u\n", 533 + pp->name, state); 534 + retc = 40; 535 + goto out; 536 + } 537 + } else { 538 + /* protocol in templates is not used for state/timeout */ 539 + if (state > 0) { 540 + IP_VS_DBG(3, "BACKUP, Invalid template state %u\n", 541 + state); 542 + state = 0; 543 + } 544 + } 545 + if (ip_vs_conn_fill_param_sync(net, af, s, &param, pe_data, 546 + pe_data_len, pe_name, pe_name_len)) { 547 + retc = 50; 548 + goto out; 549 + } 550 + /* If only IPv4, just silent skip IPv6 */ 551 + if (af == AF_INET) 552 + ip_vs_proc_conn(net, &param, flags, state, s->v4.protocol, af, 553 + (union nf_inet_addr *)&s->v4.daddr, s->v4.dport, 554 + ntohl(s->v4.timeout), ntohl(s->v4.fwmark), 555 + (opt_flags & IPVS_OPT_F_SEQ_DATA ? &opt : NULL) 556 + ); 557 + #ifdef CONFIG_IP_VS_IPV6 558 + else 559 + ip_vs_proc_conn(net, &param, flags, state, s->v6.protocol, af, 560 + (union nf_inet_addr *)&s->v6.daddr, s->v6.dport, 561 + ntohl(s->v6.timeout), ntohl(s->v6.fwmark), 562 + (opt_flags & IPVS_OPT_F_SEQ_DATA ? &opt : NULL) 563 + ); 564 + #endif 565 + return 0; 566 + /* Error exit */ 567 + out: 568 + IP_VS_DBG(2, "BACKUP, Single msg dropped err:%d\n", retc); 569 + return retc; 570 + 571 + } 572 + /* 573 + * Process received multicast message and create the corresponding 574 + * ip_vs_conn entries. 575 + * Handles Version 0 & 1 576 + */ 577 + static void ip_vs_process_message(struct net *net, __u8 *buffer, 578 + const size_t buflen) 579 + { 580 + struct netns_ipvs *ipvs = net_ipvs(net); 581 + struct ip_vs_sync_mesg *m2 = (struct ip_vs_sync_mesg *)buffer; 582 + __u8 *p, *msg_end; 583 + int i, nr_conns; 584 + 585 + if (buflen < sizeof(struct ip_vs_sync_mesg_v0)) { 586 + IP_VS_DBG(2, "BACKUP, message header too short\n"); 587 + return; 588 + } 589 + /* Convert size back to host byte order */ 590 + m2->size = ntohs(m2->size); 591 + 592 + if (buflen != m2->size) { 593 + IP_VS_DBG(2, "BACKUP, bogus message size\n"); 594 + return; 595 + } 596 + /* SyncID sanity check */ 597 + if (ipvs->backup_syncid != 0 && m2->syncid != ipvs->backup_syncid) { 598 + IP_VS_DBG(7, "BACKUP, Ignoring syncid = %d\n", m2->syncid); 599 + return; 600 + } 601 + /* Handle version 1 message */ 602 + if ((m2->version == SYNC_PROTO_VER) && (m2->reserved == 0) 603 + && (m2->spare == 0)) { 604 + 605 + msg_end = buffer + sizeof(struct ip_vs_sync_mesg); 606 + nr_conns = m2->nr_conns; 607 + 608 + for (i=0; i<nr_conns; i++) { 609 + union ip_vs_sync_conn *s; 610 + unsigned size; 611 + int retc; 612 + 613 + p = msg_end; 614 + if (p + sizeof(s->v4) > buffer+buflen) { 615 + IP_VS_ERR_RL("BACKUP, Dropping buffer, to small\n"); 616 + return; 617 + } 618 + s = (union ip_vs_sync_conn *)p; 619 + size = ntohs(s->v4.ver_size) & SVER_MASK; 620 + msg_end = p + size; 621 + /* Basic sanity checks */ 622 + if (msg_end > buffer+buflen) { 623 + IP_VS_ERR_RL("BACKUP, Dropping buffer, msg > buffer\n"); 624 + return; 625 + } 626 + if (ntohs(s->v4.ver_size) >> SVER_SHIFT) { 627 + IP_VS_ERR_RL("BACKUP, Dropping buffer, Unknown version %d\n", 628 + ntohs(s->v4.ver_size) >> SVER_SHIFT); 629 + return; 630 + } 631 + /* Process a single sync_conn */ 632 + retc = ip_vs_proc_sync_conn(net, p, msg_end); 633 + if (retc < 0) { 634 + IP_VS_ERR_RL("BACKUP, Dropping buffer, Err: %d in decoding\n", 635 + retc); 636 + return; 637 + } 638 + /* Make sure we have 32 bit alignment */ 639 + msg_end = p + ((size + 3) & ~3); 640 + } 641 + } else { 642 + /* Old type of message */ 643 + ip_vs_process_message_v0(net, buffer, buflen); 644 + return; 979 645 } 980 646 } 981 647 ··· 1181 511 { 1182 512 struct net_device *dev; 1183 513 struct inet_sock *inet = inet_sk(sk); 514 + struct net *net = sock_net(sk); 1184 515 1185 - if ((dev = __dev_get_by_name(&init_net, ifname)) == NULL) 516 + dev = __dev_get_by_name(net, ifname); 517 + if (!dev) 1186 518 return -ENODEV; 1187 519 1188 520 if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if) ··· 1203 531 * Set the maximum length of sync message according to the 1204 532 * specified interface's MTU. 1205 533 */ 1206 - static int set_sync_mesg_maxlen(int sync_state) 534 + static int set_sync_mesg_maxlen(struct net *net, int sync_state) 1207 535 { 536 + struct netns_ipvs *ipvs = net_ipvs(net); 1208 537 struct net_device *dev; 1209 538 int num; 1210 539 1211 540 if (sync_state == IP_VS_STATE_MASTER) { 1212 - if ((dev = __dev_get_by_name(&init_net, ip_vs_master_mcast_ifn)) == NULL) 541 + dev = __dev_get_by_name(net, ipvs->master_mcast_ifn); 542 + if (!dev) 1213 543 return -ENODEV; 1214 544 1215 545 num = (dev->mtu - sizeof(struct iphdr) - 1216 546 sizeof(struct udphdr) - 1217 547 SYNC_MESG_HEADER_LEN - 20) / SIMPLE_CONN_SIZE; 1218 - sync_send_mesg_maxlen = SYNC_MESG_HEADER_LEN + 548 + ipvs->send_mesg_maxlen = SYNC_MESG_HEADER_LEN + 1219 549 SIMPLE_CONN_SIZE * min(num, MAX_CONNS_PER_SYNCBUFF); 1220 550 IP_VS_DBG(7, "setting the maximum length of sync sending " 1221 - "message %d.\n", sync_send_mesg_maxlen); 551 + "message %d.\n", ipvs->send_mesg_maxlen); 1222 552 } else if (sync_state == IP_VS_STATE_BACKUP) { 1223 - if ((dev = __dev_get_by_name(&init_net, ip_vs_backup_mcast_ifn)) == NULL) 553 + dev = __dev_get_by_name(net, ipvs->backup_mcast_ifn); 554 + if (!dev) 1224 555 return -ENODEV; 1225 556 1226 - sync_recv_mesg_maxlen = dev->mtu - 557 + ipvs->recv_mesg_maxlen = dev->mtu - 1227 558 sizeof(struct iphdr) - sizeof(struct udphdr); 1228 559 IP_VS_DBG(7, "setting the maximum length of sync receiving " 1229 - "message %d.\n", sync_recv_mesg_maxlen); 560 + "message %d.\n", ipvs->recv_mesg_maxlen); 1230 561 } 1231 562 1232 563 return 0; ··· 1244 569 static int 1245 570 join_mcast_group(struct sock *sk, struct in_addr *addr, char *ifname) 1246 571 { 572 + struct net *net = sock_net(sk); 1247 573 struct ip_mreqn mreq; 1248 574 struct net_device *dev; 1249 575 int ret; ··· 1252 576 memset(&mreq, 0, sizeof(mreq)); 1253 577 memcpy(&mreq.imr_multiaddr, addr, sizeof(struct in_addr)); 1254 578 1255 - if ((dev = __dev_get_by_name(&init_net, ifname)) == NULL) 579 + dev = __dev_get_by_name(net, ifname); 580 + if (!dev) 1256 581 return -ENODEV; 1257 582 if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if) 1258 583 return -EINVAL; ··· 1270 593 1271 594 static int bind_mcastif_addr(struct socket *sock, char *ifname) 1272 595 { 596 + struct net *net = sock_net(sock->sk); 1273 597 struct net_device *dev; 1274 598 __be32 addr; 1275 599 struct sockaddr_in sin; 1276 600 1277 - if ((dev = __dev_get_by_name(&init_net, ifname)) == NULL) 601 + dev = __dev_get_by_name(net, ifname); 602 + if (!dev) 1278 603 return -ENODEV; 1279 604 1280 605 addr = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE); ··· 1298 619 /* 1299 620 * Set up sending multicast socket over UDP 1300 621 */ 1301 - static struct socket * make_send_sock(void) 622 + static struct socket *make_send_sock(struct net *net) 1302 623 { 624 + struct netns_ipvs *ipvs = net_ipvs(net); 1303 625 struct socket *sock; 1304 626 int result; 1305 627 ··· 1311 631 return ERR_PTR(result); 1312 632 } 1313 633 1314 - result = set_mcast_if(sock->sk, ip_vs_master_mcast_ifn); 634 + result = set_mcast_if(sock->sk, ipvs->master_mcast_ifn); 1315 635 if (result < 0) { 1316 636 pr_err("Error setting outbound mcast interface\n"); 1317 637 goto error; ··· 1320 640 set_mcast_loop(sock->sk, 0); 1321 641 set_mcast_ttl(sock->sk, 1); 1322 642 1323 - result = bind_mcastif_addr(sock, ip_vs_master_mcast_ifn); 643 + result = bind_mcastif_addr(sock, ipvs->master_mcast_ifn); 1324 644 if (result < 0) { 1325 645 pr_err("Error binding address of the mcast interface\n"); 1326 646 goto error; ··· 1344 664 /* 1345 665 * Set up receiving multicast socket over UDP 1346 666 */ 1347 - static struct socket * make_receive_sock(void) 667 + static struct socket *make_receive_sock(struct net *net) 1348 668 { 669 + struct netns_ipvs *ipvs = net_ipvs(net); 1349 670 struct socket *sock; 1350 671 int result; 1351 672 ··· 1370 689 /* join the multicast group */ 1371 690 result = join_mcast_group(sock->sk, 1372 691 (struct in_addr *) &mcast_addr.sin_addr, 1373 - ip_vs_backup_mcast_ifn); 692 + ipvs->backup_mcast_ifn); 1374 693 if (result < 0) { 1375 694 pr_err("Error joining to the multicast group\n"); 1376 695 goto error; ··· 1441 760 static int sync_thread_master(void *data) 1442 761 { 1443 762 struct ip_vs_sync_thread_data *tinfo = data; 763 + struct netns_ipvs *ipvs = net_ipvs(tinfo->net); 1444 764 struct ip_vs_sync_buff *sb; 1445 765 1446 766 pr_info("sync thread started: state = MASTER, mcast_ifn = %s, " 1447 767 "syncid = %d\n", 1448 - ip_vs_master_mcast_ifn, ip_vs_master_syncid); 768 + ipvs->master_mcast_ifn, ipvs->master_syncid); 1449 769 1450 770 while (!kthread_should_stop()) { 1451 - while ((sb = sb_dequeue())) { 771 + while ((sb = sb_dequeue(ipvs))) { 1452 772 ip_vs_send_sync_msg(tinfo->sock, sb->mesg); 1453 773 ip_vs_sync_buff_release(sb); 1454 774 } 1455 775 1456 - /* check if entries stay in curr_sb for 2 seconds */ 1457 - sb = get_curr_sync_buff(2 * HZ); 776 + /* check if entries stay in ipvs->sync_buff for 2 seconds */ 777 + sb = get_curr_sync_buff(ipvs, 2 * HZ); 1458 778 if (sb) { 1459 779 ip_vs_send_sync_msg(tinfo->sock, sb->mesg); 1460 780 ip_vs_sync_buff_release(sb); ··· 1465 783 } 1466 784 1467 785 /* clean up the sync_buff queue */ 1468 - while ((sb=sb_dequeue())) { 786 + while ((sb = sb_dequeue(ipvs))) 1469 787 ip_vs_sync_buff_release(sb); 1470 - } 1471 788 1472 789 /* clean up the current sync_buff */ 1473 - if ((sb = get_curr_sync_buff(0))) { 790 + sb = get_curr_sync_buff(ipvs, 0); 791 + if (sb) 1474 792 ip_vs_sync_buff_release(sb); 1475 - } 1476 793 1477 794 /* release the sending multicast socket */ 1478 795 sock_release(tinfo->sock); ··· 1484 803 static int sync_thread_backup(void *data) 1485 804 { 1486 805 struct ip_vs_sync_thread_data *tinfo = data; 806 + struct netns_ipvs *ipvs = net_ipvs(tinfo->net); 1487 807 int len; 1488 808 1489 809 pr_info("sync thread started: state = BACKUP, mcast_ifn = %s, " 1490 810 "syncid = %d\n", 1491 - ip_vs_backup_mcast_ifn, ip_vs_backup_syncid); 811 + ipvs->backup_mcast_ifn, ipvs->backup_syncid); 1492 812 1493 813 while (!kthread_should_stop()) { 1494 814 wait_event_interruptible(*sk_sleep(tinfo->sock->sk), ··· 1499 817 /* do we have data now? */ 1500 818 while (!skb_queue_empty(&(tinfo->sock->sk->sk_receive_queue))) { 1501 819 len = ip_vs_receive(tinfo->sock, tinfo->buf, 1502 - sync_recv_mesg_maxlen); 820 + ipvs->recv_mesg_maxlen); 1503 821 if (len <= 0) { 1504 822 pr_err("receiving message error\n"); 1505 823 break; ··· 1508 826 /* disable bottom half, because it accesses the data 1509 827 shared by softirq while getting/creating conns */ 1510 828 local_bh_disable(); 1511 - ip_vs_process_message(tinfo->buf, len); 829 + ip_vs_process_message(tinfo->net, tinfo->buf, len); 1512 830 local_bh_enable(); 1513 831 } 1514 832 } ··· 1522 840 } 1523 841 1524 842 1525 - int start_sync_thread(int state, char *mcast_ifn, __u8 syncid) 843 + int start_sync_thread(struct net *net, int state, char *mcast_ifn, __u8 syncid) 1526 844 { 1527 845 struct ip_vs_sync_thread_data *tinfo; 1528 846 struct task_struct **realtask, *task; 1529 847 struct socket *sock; 848 + struct netns_ipvs *ipvs = net_ipvs(net); 1530 849 char *name, *buf = NULL; 1531 850 int (*threadfn)(void *data); 1532 851 int result = -ENOMEM; 1533 852 1534 853 IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current)); 1535 854 IP_VS_DBG(7, "Each ip_vs_sync_conn entry needs %Zd bytes\n", 1536 - sizeof(struct ip_vs_sync_conn)); 855 + sizeof(struct ip_vs_sync_conn_v0)); 1537 856 1538 857 if (state == IP_VS_STATE_MASTER) { 1539 - if (sync_master_thread) 858 + if (ipvs->master_thread) 1540 859 return -EEXIST; 1541 860 1542 - strlcpy(ip_vs_master_mcast_ifn, mcast_ifn, 1543 - sizeof(ip_vs_master_mcast_ifn)); 1544 - ip_vs_master_syncid = syncid; 1545 - realtask = &sync_master_thread; 1546 - name = "ipvs_syncmaster"; 861 + strlcpy(ipvs->master_mcast_ifn, mcast_ifn, 862 + sizeof(ipvs->master_mcast_ifn)); 863 + ipvs->master_syncid = syncid; 864 + realtask = &ipvs->master_thread; 865 + name = "ipvs_master:%d"; 1547 866 threadfn = sync_thread_master; 1548 - sock = make_send_sock(); 867 + sock = make_send_sock(net); 1549 868 } else if (state == IP_VS_STATE_BACKUP) { 1550 - if (sync_backup_thread) 869 + if (ipvs->backup_thread) 1551 870 return -EEXIST; 1552 871 1553 - strlcpy(ip_vs_backup_mcast_ifn, mcast_ifn, 1554 - sizeof(ip_vs_backup_mcast_ifn)); 1555 - ip_vs_backup_syncid = syncid; 1556 - realtask = &sync_backup_thread; 1557 - name = "ipvs_syncbackup"; 872 + strlcpy(ipvs->backup_mcast_ifn, mcast_ifn, 873 + sizeof(ipvs->backup_mcast_ifn)); 874 + ipvs->backup_syncid = syncid; 875 + realtask = &ipvs->backup_thread; 876 + name = "ipvs_backup:%d"; 1558 877 threadfn = sync_thread_backup; 1559 - sock = make_receive_sock(); 878 + sock = make_receive_sock(net); 1560 879 } else { 1561 880 return -EINVAL; 1562 881 } ··· 1567 884 goto out; 1568 885 } 1569 886 1570 - set_sync_mesg_maxlen(state); 887 + set_sync_mesg_maxlen(net, state); 1571 888 if (state == IP_VS_STATE_BACKUP) { 1572 - buf = kmalloc(sync_recv_mesg_maxlen, GFP_KERNEL); 889 + buf = kmalloc(ipvs->recv_mesg_maxlen, GFP_KERNEL); 1573 890 if (!buf) 1574 891 goto outsocket; 1575 892 } ··· 1578 895 if (!tinfo) 1579 896 goto outbuf; 1580 897 898 + tinfo->net = net; 1581 899 tinfo->sock = sock; 1582 900 tinfo->buf = buf; 1583 901 1584 - task = kthread_run(threadfn, tinfo, name); 902 + task = kthread_run(threadfn, tinfo, name, ipvs->gen); 1585 903 if (IS_ERR(task)) { 1586 904 result = PTR_ERR(task); 1587 905 goto outtinfo; ··· 1590 906 1591 907 /* mark as active */ 1592 908 *realtask = task; 1593 - ip_vs_sync_state |= state; 909 + ipvs->sync_state |= state; 1594 910 1595 911 /* increase the module use count */ 1596 912 ip_vs_use_count_inc(); ··· 1608 924 } 1609 925 1610 926 1611 - int stop_sync_thread(int state) 927 + int stop_sync_thread(struct net *net, int state) 1612 928 { 929 + struct netns_ipvs *ipvs = net_ipvs(net); 930 + 1613 931 IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current)); 1614 932 1615 933 if (state == IP_VS_STATE_MASTER) { 1616 - if (!sync_master_thread) 934 + if (!ipvs->master_thread) 1617 935 return -ESRCH; 1618 936 1619 937 pr_info("stopping master sync thread %d ...\n", 1620 - task_pid_nr(sync_master_thread)); 938 + task_pid_nr(ipvs->master_thread)); 1621 939 1622 940 /* 1623 941 * The lock synchronizes with sb_queue_tail(), so that we don't ··· 1627 941 * progress of stopping the master sync daemon. 1628 942 */ 1629 943 1630 - spin_lock_bh(&ip_vs_sync_lock); 1631 - ip_vs_sync_state &= ~IP_VS_STATE_MASTER; 1632 - spin_unlock_bh(&ip_vs_sync_lock); 1633 - kthread_stop(sync_master_thread); 1634 - sync_master_thread = NULL; 944 + spin_lock_bh(&ipvs->sync_lock); 945 + ipvs->sync_state &= ~IP_VS_STATE_MASTER; 946 + spin_unlock_bh(&ipvs->sync_lock); 947 + kthread_stop(ipvs->master_thread); 948 + ipvs->master_thread = NULL; 1635 949 } else if (state == IP_VS_STATE_BACKUP) { 1636 - if (!sync_backup_thread) 950 + if (!ipvs->backup_thread) 1637 951 return -ESRCH; 1638 952 1639 953 pr_info("stopping backup sync thread %d ...\n", 1640 - task_pid_nr(sync_backup_thread)); 954 + task_pid_nr(ipvs->backup_thread)); 1641 955 1642 - ip_vs_sync_state &= ~IP_VS_STATE_BACKUP; 1643 - kthread_stop(sync_backup_thread); 1644 - sync_backup_thread = NULL; 956 + ipvs->sync_state &= ~IP_VS_STATE_BACKUP; 957 + kthread_stop(ipvs->backup_thread); 958 + ipvs->backup_thread = NULL; 1645 959 } else { 1646 960 return -EINVAL; 1647 961 } ··· 1650 964 ip_vs_use_count_dec(); 1651 965 1652 966 return 0; 967 + } 968 + 969 + /* 970 + * Initialize data struct for each netns 971 + */ 972 + static int __net_init __ip_vs_sync_init(struct net *net) 973 + { 974 + struct netns_ipvs *ipvs = net_ipvs(net); 975 + 976 + INIT_LIST_HEAD(&ipvs->sync_queue); 977 + spin_lock_init(&ipvs->sync_lock); 978 + spin_lock_init(&ipvs->sync_buff_lock); 979 + 980 + ipvs->sync_mcast_addr.sin_family = AF_INET; 981 + ipvs->sync_mcast_addr.sin_port = cpu_to_be16(IP_VS_SYNC_PORT); 982 + ipvs->sync_mcast_addr.sin_addr.s_addr = cpu_to_be32(IP_VS_SYNC_GROUP); 983 + return 0; 984 + } 985 + 986 + static void __ip_vs_sync_cleanup(struct net *net) 987 + { 988 + stop_sync_thread(net, IP_VS_STATE_MASTER); 989 + stop_sync_thread(net, IP_VS_STATE_BACKUP); 990 + } 991 + 992 + static struct pernet_operations ipvs_sync_ops = { 993 + .init = __ip_vs_sync_init, 994 + .exit = __ip_vs_sync_cleanup, 995 + }; 996 + 997 + 998 + int __init ip_vs_sync_init(void) 999 + { 1000 + return register_pernet_subsys(&ipvs_sync_ops); 1001 + } 1002 + 1003 + void __exit ip_vs_sync_cleanup(void) 1004 + { 1005 + unregister_pernet_subsys(&ipvs_sync_ops); 1653 1006 }

+15 -11

net/netfilter/ipvs/ip_vs_xmit.c

··· 175 175 .fl4_tos = RT_TOS(iph->tos), 176 176 .mark = skb->mark, 177 177 }; 178 - struct rtable *rt; 179 178 180 179 if (ip_route_output_key(net, &rt, &fl)) 181 180 return 0; ··· 389 390 390 391 /* MTU checking */ 391 392 mtu = dst_mtu(&rt->dst); 392 - if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) { 393 + if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF)) && 394 + !skb_is_gso(skb)) { 393 395 ip_rt_put(rt); 394 396 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); 395 397 IP_VS_DBG_RL("%s(): frag needed\n", __func__); ··· 443 443 444 444 /* MTU checking */ 445 445 mtu = dst_mtu(&rt->dst); 446 - if (skb->len > mtu) { 446 + if (skb->len > mtu && !skb_is_gso(skb)) { 447 447 if (!skb->dev) { 448 448 struct net *net = dev_net(skb_dst(skb)->dev); 449 449 ··· 543 543 544 544 /* MTU checking */ 545 545 mtu = dst_mtu(&rt->dst); 546 - if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) { 546 + if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF)) && 547 + !skb_is_gso(skb)) { 547 548 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); 548 549 IP_VS_DBG_RL_PKT(0, AF_INET, pp, skb, 0, 549 550 "ip_vs_nat_xmit(): frag needed for"); ··· 659 658 660 659 /* MTU checking */ 661 660 mtu = dst_mtu(&rt->dst); 662 - if (skb->len > mtu) { 661 + if (skb->len > mtu && !skb_is_gso(skb)) { 663 662 if (!skb->dev) { 664 663 struct net *net = dev_net(skb_dst(skb)->dev); 665 664 ··· 774 773 775 774 df |= (old_iph->frag_off & htons(IP_DF)); 776 775 777 - if ((old_iph->frag_off & htons(IP_DF)) 778 - && mtu < ntohs(old_iph->tot_len)) { 776 + if ((old_iph->frag_off & htons(IP_DF) && 777 + mtu < ntohs(old_iph->tot_len) && !skb_is_gso(skb))) { 779 778 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); 780 779 IP_VS_DBG_RL("%s(): frag needed\n", __func__); 781 780 goto tx_error_put; ··· 887 886 if (skb_dst(skb)) 888 887 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu); 889 888 890 - if (mtu < ntohs(old_iph->payload_len) + sizeof(struct ipv6hdr)) { 889 + if (mtu < ntohs(old_iph->payload_len) + sizeof(struct ipv6hdr) && 890 + !skb_is_gso(skb)) { 891 891 if (!skb->dev) { 892 892 struct net *net = dev_net(skb_dst(skb)->dev); 893 893 ··· 993 991 994 992 /* MTU checking */ 995 993 mtu = dst_mtu(&rt->dst); 996 - if ((iph->frag_off & htons(IP_DF)) && skb->len > mtu) { 994 + if ((iph->frag_off & htons(IP_DF)) && skb->len > mtu && 995 + !skb_is_gso(skb)) { 997 996 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); 998 997 ip_rt_put(rt); 999 998 IP_VS_DBG_RL("%s(): frag needed\n", __func__); ··· 1161 1158 1162 1159 /* MTU checking */ 1163 1160 mtu = dst_mtu(&rt->dst); 1164 - if ((skb->len > mtu) && (ip_hdr(skb)->frag_off & htons(IP_DF))) { 1161 + if ((skb->len > mtu) && (ip_hdr(skb)->frag_off & htons(IP_DF)) && 1162 + !skb_is_gso(skb)) { 1165 1163 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); 1166 1164 IP_VS_DBG_RL("%s(): frag needed\n", __func__); 1167 1165 goto tx_error_put; ··· 1276 1272 1277 1273 /* MTU checking */ 1278 1274 mtu = dst_mtu(&rt->dst); 1279 - if (skb->len > mtu) { 1275 + if (skb->len > mtu && !skb_is_gso(skb)) { 1280 1276 if (!skb->dev) { 1281 1277 struct net *net = dev_net(skb_dst(skb)->dev); 1282 1278

+82

net/netfilter/nf_conntrack_broadcast.c

··· 1 + /* 2 + * broadcast connection tracking helper 3 + * 4 + * (c) 2005 Patrick McHardy <kaber@trash.net> 5 + * 6 + * This program is free software; you can redistribute it and/or 7 + * modify it under the terms of the GNU General Public License 8 + * as published by the Free Software Foundation; either version 9 + * 2 of the License, or (at your option) any later version. 10 + */ 11 + 12 + #include <linux/module.h> 13 + #include <linux/ip.h> 14 + #include <net/route.h> 15 + #include <linux/inetdevice.h> 16 + #include <linux/skbuff.h> 17 + 18 + #include <net/netfilter/nf_conntrack.h> 19 + #include <net/netfilter/nf_conntrack_helper.h> 20 + #include <net/netfilter/nf_conntrack_expect.h> 21 + 22 + int nf_conntrack_broadcast_help(struct sk_buff *skb, 23 + unsigned int protoff, 24 + struct nf_conn *ct, 25 + enum ip_conntrack_info ctinfo, 26 + unsigned int timeout) 27 + { 28 + struct nf_conntrack_expect *exp; 29 + struct iphdr *iph = ip_hdr(skb); 30 + struct rtable *rt = skb_rtable(skb); 31 + struct in_device *in_dev; 32 + struct nf_conn_help *help = nfct_help(ct); 33 + __be32 mask = 0; 34 + 35 + /* we're only interested in locally generated packets */ 36 + if (skb->sk == NULL) 37 + goto out; 38 + if (rt == NULL || !(rt->rt_flags & RTCF_BROADCAST)) 39 + goto out; 40 + if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL) 41 + goto out; 42 + 43 + rcu_read_lock(); 44 + in_dev = __in_dev_get_rcu(rt->dst.dev); 45 + if (in_dev != NULL) { 46 + for_primary_ifa(in_dev) { 47 + if (ifa->ifa_broadcast == iph->daddr) { 48 + mask = ifa->ifa_mask; 49 + break; 50 + } 51 + } endfor_ifa(in_dev); 52 + } 53 + rcu_read_unlock(); 54 + 55 + if (mask == 0) 56 + goto out; 57 + 58 + exp = nf_ct_expect_alloc(ct); 59 + if (exp == NULL) 60 + goto out; 61 + 62 + exp->tuple = ct->tuplehash[IP_CT_DIR_REPLY].tuple; 63 + exp->tuple.src.u.udp.port = help->helper->tuple.src.u.udp.port; 64 + 65 + exp->mask.src.u3.ip = mask; 66 + exp->mask.src.u.udp.port = htons(0xFFFF); 67 + 68 + exp->expectfn = NULL; 69 + exp->flags = NF_CT_EXPECT_PERMANENT; 70 + exp->class = NF_CT_EXPECT_CLASS_DEFAULT; 71 + exp->helper = NULL; 72 + 73 + nf_ct_expect_related(exp); 74 + nf_ct_expect_put(exp); 75 + 76 + nf_ct_refresh(ct, skb, timeout * HZ); 77 + out: 78 + return NF_ACCEPT; 79 + } 80 + EXPORT_SYMBOL_GPL(nf_conntrack_broadcast_help); 81 + 82 + MODULE_LICENSE("GPL");

+38 -19

net/netfilter/nf_conntrack_core.c

··· 43 43 #include <net/netfilter/nf_conntrack_acct.h> 44 44 #include <net/netfilter/nf_conntrack_ecache.h> 45 45 #include <net/netfilter/nf_conntrack_zones.h> 46 + #include <net/netfilter/nf_conntrack_timestamp.h> 46 47 #include <net/netfilter/nf_nat.h> 47 48 #include <net/netfilter/nf_nat_core.h> 48 49 ··· 283 282 static void death_by_timeout(unsigned long ul_conntrack) 284 283 { 285 284 struct nf_conn *ct = (void *)ul_conntrack; 285 + struct nf_conn_tstamp *tstamp; 286 + 287 + tstamp = nf_conn_tstamp_find(ct); 288 + if (tstamp && tstamp->stop == 0) 289 + tstamp->stop = ktime_to_ns(ktime_get_real()); 286 290 287 291 if (!test_bit(IPS_DYING_BIT, &ct->status) && 288 292 unlikely(nf_conntrack_event(IPCT_DESTROY, ct) < 0)) { ··· 425 419 struct nf_conntrack_tuple_hash *h; 426 420 struct nf_conn *ct; 427 421 struct nf_conn_help *help; 422 + struct nf_conn_tstamp *tstamp; 428 423 struct hlist_nulls_node *n; 429 424 enum ip_conntrack_info ctinfo; 430 425 struct net *net; ··· 493 486 ct->timeout.expires += jiffies; 494 487 add_timer(&ct->timeout); 495 488 atomic_inc(&ct->ct_general.use); 496 - set_bit(IPS_CONFIRMED_BIT, &ct->status); 489 + ct->status |= IPS_CONFIRMED; 497 490 491 + /* set conntrack timestamp, if enabled. */ 492 + tstamp = nf_conn_tstamp_find(ct); 493 + if (tstamp) { 494 + if (skb->tstamp.tv64 == 0) 495 + __net_timestamp((struct sk_buff *)skb); 496 + 497 + tstamp->start = ktime_to_ns(skb->tstamp); 498 + } 498 499 /* Since the lookup is lockless, hash insertion must be done after 499 500 * starting the timer and setting the CONFIRMED bit. The RCU barriers 500 501 * guarantee that no other CPU can find the conntrack before the above ··· 670 655 * and ct->tuplehash[IP_CT_DIR_REPLY].hnnode.next unchanged. 671 656 */ 672 657 memset(&ct->tuplehash[IP_CT_DIR_MAX], 0, 673 - sizeof(*ct) - offsetof(struct nf_conn, tuplehash[IP_CT_DIR_MAX])); 658 + offsetof(struct nf_conn, proto) - 659 + offsetof(struct nf_conn, tuplehash[IP_CT_DIR_MAX])); 674 660 spin_lock_init(&ct->lock); 675 661 ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig; 676 662 ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode.pprev = NULL; ··· 761 745 } 762 746 763 747 nf_ct_acct_ext_add(ct, GFP_ATOMIC); 748 + nf_ct_tstamp_ext_add(ct, GFP_ATOMIC); 764 749 765 750 ecache = tmpl ? nf_ct_ecache_find(tmpl) : NULL; 766 751 nf_ct_ecache_ext_add(ct, ecache ? ecache->ctmask : 0, ··· 1202 1185 static int kill_report(struct nf_conn *i, void *data) 1203 1186 { 1204 1187 struct __nf_ct_flush_report *fr = (struct __nf_ct_flush_report *)data; 1188 + struct nf_conn_tstamp *tstamp; 1189 + 1190 + tstamp = nf_conn_tstamp_find(i); 1191 + if (tstamp && tstamp->stop == 0) 1192 + tstamp->stop = ktime_to_ns(ktime_get_real()); 1205 1193 1206 1194 /* If we fail to deliver the event, death_by_timeout() will retry */ 1207 1195 if (nf_conntrack_event_report(IPCT_DESTROY, i, ··· 1223 1201 return 1; 1224 1202 } 1225 1203 1226 - void nf_ct_free_hashtable(void *hash, int vmalloced, unsigned int size) 1204 + void nf_ct_free_hashtable(void *hash, unsigned int size) 1227 1205 { 1228 - if (vmalloced) 1206 + if (is_vmalloc_addr(hash)) 1229 1207 vfree(hash); 1230 1208 else 1231 1209 free_pages((unsigned long)hash, ··· 1292 1270 goto i_see_dead_people; 1293 1271 } 1294 1272 1295 - nf_ct_free_hashtable(net->ct.hash, net->ct.hash_vmalloc, 1296 - net->ct.htable_size); 1273 + nf_ct_free_hashtable(net->ct.hash, net->ct.htable_size); 1297 1274 nf_conntrack_ecache_fini(net); 1298 1275 nf_conntrack_acct_fini(net); 1299 1276 nf_conntrack_expect_fini(net); ··· 1321 1300 } 1322 1301 } 1323 1302 1324 - void *nf_ct_alloc_hashtable(unsigned int *sizep, int *vmalloced, int nulls) 1303 + void *nf_ct_alloc_hashtable(unsigned int *sizep, int nulls) 1325 1304 { 1326 1305 struct hlist_nulls_head *hash; 1327 1306 unsigned int nr_slots, i; 1328 1307 size_t sz; 1329 - 1330 - *vmalloced = 0; 1331 1308 1332 1309 BUILD_BUG_ON(sizeof(struct hlist_nulls_head) != sizeof(struct hlist_head)); 1333 1310 nr_slots = *sizep = roundup(*sizep, PAGE_SIZE / sizeof(struct hlist_nulls_head)); ··· 1333 1314 hash = (void *)__get_free_pages(GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO, 1334 1315 get_order(sz)); 1335 1316 if (!hash) { 1336 - *vmalloced = 1; 1337 1317 printk(KERN_WARNING "nf_conntrack: falling back to vmalloc.\n"); 1338 1318 hash = __vmalloc(sz, GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO, 1339 1319 PAGE_KERNEL); ··· 1348 1330 1349 1331 int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp) 1350 1332 { 1351 - int i, bucket, vmalloced, old_vmalloced; 1333 + int i, bucket; 1352 1334 unsigned int hashsize, old_size; 1353 1335 struct hlist_nulls_head *hash, *old_hash; 1354 1336 struct nf_conntrack_tuple_hash *h; ··· 1365 1347 if (!hashsize) 1366 1348 return -EINVAL; 1367 1349 1368 - hash = nf_ct_alloc_hashtable(&hashsize, &vmalloced, 1); 1350 + hash = nf_ct_alloc_hashtable(&hashsize, 1); 1369 1351 if (!hash) 1370 1352 return -ENOMEM; 1371 1353 ··· 1387 1369 } 1388 1370 } 1389 1371 old_size = init_net.ct.htable_size; 1390 - old_vmalloced = init_net.ct.hash_vmalloc; 1391 1372 old_hash = init_net.ct.hash; 1392 1373 1393 1374 init_net.ct.htable_size = nf_conntrack_htable_size = hashsize; 1394 - init_net.ct.hash_vmalloc = vmalloced; 1395 1375 init_net.ct.hash = hash; 1396 1376 spin_unlock_bh(&nf_conntrack_lock); 1397 1377 1398 - nf_ct_free_hashtable(old_hash, old_vmalloced, old_size); 1378 + nf_ct_free_hashtable(old_hash, old_size); 1399 1379 return 0; 1400 1380 } 1401 1381 EXPORT_SYMBOL_GPL(nf_conntrack_set_hashsize); ··· 1506 1490 } 1507 1491 1508 1492 net->ct.htable_size = nf_conntrack_htable_size; 1509 - net->ct.hash = nf_ct_alloc_hashtable(&net->ct.htable_size, 1510 - &net->ct.hash_vmalloc, 1); 1493 + net->ct.hash = nf_ct_alloc_hashtable(&net->ct.htable_size, 1); 1511 1494 if (!net->ct.hash) { 1512 1495 ret = -ENOMEM; 1513 1496 printk(KERN_ERR "Unable to create nf_conntrack_hash\n"); ··· 1518 1503 ret = nf_conntrack_acct_init(net); 1519 1504 if (ret < 0) 1520 1505 goto err_acct; 1506 + ret = nf_conntrack_tstamp_init(net); 1507 + if (ret < 0) 1508 + goto err_tstamp; 1521 1509 ret = nf_conntrack_ecache_init(net); 1522 1510 if (ret < 0) 1523 1511 goto err_ecache; ··· 1528 1510 return 0; 1529 1511 1530 1512 err_ecache: 1513 + nf_conntrack_tstamp_fini(net); 1514 + err_tstamp: 1531 1515 nf_conntrack_acct_fini(net); 1532 1516 err_acct: 1533 1517 nf_conntrack_expect_fini(net); 1534 1518 err_expect: 1535 - nf_ct_free_hashtable(net->ct.hash, net->ct.hash_vmalloc, 1536 - net->ct.htable_size); 1519 + nf_ct_free_hashtable(net->ct.hash, net->ct.htable_size); 1537 1520 err_hash: 1538 1521 kmem_cache_destroy(net->ct.nf_conntrack_cachep); 1539 1522 err_cache:

+20 -14

net/netfilter/nf_conntrack_expect.c

··· 319 319 const struct nf_conntrack_expect_policy *p; 320 320 unsigned int h = nf_ct_expect_dst_hash(&exp->tuple); 321 321 322 - atomic_inc(&exp->use); 322 + /* two references : one for hash insert, one for the timer */ 323 + atomic_add(2, &exp->use); 323 324 324 325 if (master_help) { 325 326 hlist_add_head(&exp->lnode, &master_help->expectations); ··· 334 333 setup_timer(&exp->timeout, nf_ct_expectation_timed_out, 335 334 (unsigned long)exp); 336 335 if (master_help) { 337 - p = &master_help->helper->expect_policy[exp->class]; 336 + p = &rcu_dereference_protected( 337 + master_help->helper, 338 + lockdep_is_held(&nf_conntrack_lock) 339 + )->expect_policy[exp->class]; 338 340 exp->timeout.expires = jiffies + p->timeout * HZ; 339 341 } 340 342 add_timer(&exp->timeout); 341 343 342 - atomic_inc(&exp->use); 343 344 NF_CT_STAT_INC(net, expect_create); 344 345 } 345 346 ··· 372 369 if (!del_timer(&i->timeout)) 373 370 return 0; 374 371 375 - p = &master_help->helper->expect_policy[i->class]; 372 + p = &rcu_dereference_protected( 373 + master_help->helper, 374 + lockdep_is_held(&nf_conntrack_lock) 375 + )->expect_policy[i->class]; 376 376 i->timeout.expires = jiffies + p->timeout * HZ; 377 377 add_timer(&i->timeout); 378 378 return 1; ··· 413 407 } 414 408 /* Will be over limit? */ 415 409 if (master_help) { 416 - p = &master_help->helper->expect_policy[expect->class]; 410 + p = &rcu_dereference_protected( 411 + master_help->helper, 412 + lockdep_is_held(&nf_conntrack_lock) 413 + )->expect_policy[expect->class]; 417 414 if (p->max_expected && 418 415 master_help->expecting[expect->class] >= p->max_expected) { 419 416 evict_oldest_expect(master, expect); ··· 487 478 struct hlist_node *n; 488 479 489 480 for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) { 490 - n = rcu_dereference(net->ct.expect_hash[st->bucket].first); 481 + n = rcu_dereference(hlist_first_rcu(&net->ct.expect_hash[st->bucket])); 491 482 if (n) 492 483 return n; 493 484 } ··· 500 491 struct net *net = seq_file_net(seq); 501 492 struct ct_expect_iter_state *st = seq->private; 502 493 503 - head = rcu_dereference(head->next); 494 + head = rcu_dereference(hlist_next_rcu(head)); 504 495 while (head == NULL) { 505 496 if (++st->bucket >= nf_ct_expect_hsize) 506 497 return NULL; 507 - head = rcu_dereference(net->ct.expect_hash[st->bucket].first); 498 + head = rcu_dereference(hlist_first_rcu(&net->ct.expect_hash[st->bucket])); 508 499 } 509 500 return head; 510 501 } ··· 639 630 } 640 631 641 632 net->ct.expect_count = 0; 642 - net->ct.expect_hash = nf_ct_alloc_hashtable(&nf_ct_expect_hsize, 643 - &net->ct.expect_vmalloc, 0); 633 + net->ct.expect_hash = nf_ct_alloc_hashtable(&nf_ct_expect_hsize, 0); 644 634 if (net->ct.expect_hash == NULL) 645 635 goto err1; 646 636 ··· 661 653 if (net_eq(net, &init_net)) 662 654 kmem_cache_destroy(nf_ct_expect_cachep); 663 655 err2: 664 - nf_ct_free_hashtable(net->ct.expect_hash, net->ct.expect_vmalloc, 665 - nf_ct_expect_hsize); 656 + nf_ct_free_hashtable(net->ct.expect_hash, nf_ct_expect_hsize); 666 657 err1: 667 658 return err; 668 659 } ··· 673 666 rcu_barrier(); /* Wait for call_rcu() before destroy */ 674 667 kmem_cache_destroy(nf_ct_expect_cachep); 675 668 } 676 - nf_ct_free_hashtable(net->ct.expect_hash, net->ct.expect_vmalloc, 677 - nf_ct_expect_hsize); 669 + nf_ct_free_hashtable(net->ct.expect_hash, nf_ct_expect_hsize); 678 670 }

+6 -5

net/netfilter/nf_conntrack_extend.c

··· 140 140 /* This assumes that extended areas in conntrack for the types 141 141 whose NF_CT_EXT_F_PREALLOC bit set are allocated in order */ 142 142 for (i = min; i <= max; i++) { 143 - t1 = nf_ct_ext_types[i]; 143 + t1 = rcu_dereference_protected(nf_ct_ext_types[i], 144 + lockdep_is_held(&nf_ct_ext_type_mutex)); 144 145 if (!t1) 145 146 continue; 146 147 147 - t1->alloc_size = sizeof(struct nf_ct_ext) 148 - + ALIGN(sizeof(struct nf_ct_ext), t1->align) 149 - + t1->len; 148 + t1->alloc_size = ALIGN(sizeof(struct nf_ct_ext), t1->align) + 149 + t1->len; 150 150 for (j = 0; j < NF_CT_EXT_NUM; j++) { 151 - t2 = nf_ct_ext_types[j]; 151 + t2 = rcu_dereference_protected(nf_ct_ext_types[j], 152 + lockdep_is_held(&nf_ct_ext_type_mutex)); 152 153 if (t2 == NULL || t2 == t1 || 153 154 (t2->flags & NF_CT_EXT_F_PREALLOC) == 0) 154 155 continue;

+11 -9

net/netfilter/nf_conntrack_helper.c

··· 33 33 static struct hlist_head *nf_ct_helper_hash __read_mostly; 34 34 static unsigned int nf_ct_helper_hsize __read_mostly; 35 35 static unsigned int nf_ct_helper_count __read_mostly; 36 - static int nf_ct_helper_vmalloc; 37 36 38 37 39 38 /* Stupid hash, but collision free for the default registrations of the ··· 157 158 struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(i); 158 159 struct nf_conn_help *help = nfct_help(ct); 159 160 160 - if (help && help->helper == me) { 161 + if (help && rcu_dereference_protected( 162 + help->helper, 163 + lockdep_is_held(&nf_conntrack_lock) 164 + ) == me) { 161 165 nf_conntrack_event(IPCT_HELPER, ct); 162 166 rcu_assign_pointer(help->helper, NULL); 163 167 } ··· 212 210 hlist_for_each_entry_safe(exp, n, next, 213 211 &net->ct.expect_hash[i], hnode) { 214 212 struct nf_conn_help *help = nfct_help(exp->master); 215 - if ((help->helper == me || exp->helper == me) && 213 + if ((rcu_dereference_protected( 214 + help->helper, 215 + lockdep_is_held(&nf_conntrack_lock) 216 + ) == me || exp->helper == me) && 216 217 del_timer(&exp->timeout)) { 217 218 nf_ct_unlink_expect(exp); 218 219 nf_ct_expect_put(exp); ··· 266 261 int err; 267 262 268 263 nf_ct_helper_hsize = 1; /* gets rounded up to use one page */ 269 - nf_ct_helper_hash = nf_ct_alloc_hashtable(&nf_ct_helper_hsize, 270 - &nf_ct_helper_vmalloc, 0); 264 + nf_ct_helper_hash = nf_ct_alloc_hashtable(&nf_ct_helper_hsize, 0); 271 265 if (!nf_ct_helper_hash) 272 266 return -ENOMEM; 273 267 ··· 277 273 return 0; 278 274 279 275 err1: 280 - nf_ct_free_hashtable(nf_ct_helper_hash, nf_ct_helper_vmalloc, 281 - nf_ct_helper_hsize); 276 + nf_ct_free_hashtable(nf_ct_helper_hash, nf_ct_helper_hsize); 282 277 return err; 283 278 } 284 279 285 280 void nf_conntrack_helper_fini(void) 286 281 { 287 282 nf_ct_extend_unregister(&helper_extend); 288 - nf_ct_free_hashtable(nf_ct_helper_hash, nf_ct_helper_vmalloc, 289 - nf_ct_helper_hsize); 283 + nf_ct_free_hashtable(nf_ct_helper_hash, nf_ct_helper_hsize); 290 284 }

+9 -65

net/netfilter/nf_conntrack_netbios_ns.c

··· 18 18 #include <linux/kernel.h> 19 19 #include <linux/module.h> 20 20 #include <linux/init.h> 21 - #include <linux/skbuff.h> 22 - #include <linux/netdevice.h> 23 - #include <linux/inetdevice.h> 24 - #include <linux/if_addr.h> 25 21 #include <linux/in.h> 26 - #include <linux/ip.h> 27 - #include <linux/netfilter.h> 28 - #include <net/route.h> 29 22 30 23 #include <net/netfilter/nf_conntrack.h> 31 24 #include <net/netfilter/nf_conntrack_helper.h> ··· 33 40 MODULE_ALIAS_NFCT_HELPER("netbios_ns"); 34 41 35 42 static unsigned int timeout __read_mostly = 3; 36 - module_param(timeout, uint, 0400); 43 + module_param(timeout, uint, S_IRUSR); 37 44 MODULE_PARM_DESC(timeout, "timeout for master connection/replies in seconds"); 38 - 39 - static int help(struct sk_buff *skb, unsigned int protoff, 40 - struct nf_conn *ct, enum ip_conntrack_info ctinfo) 41 - { 42 - struct nf_conntrack_expect *exp; 43 - struct iphdr *iph = ip_hdr(skb); 44 - struct rtable *rt = skb_rtable(skb); 45 - struct in_device *in_dev; 46 - __be32 mask = 0; 47 - 48 - /* we're only interested in locally generated packets */ 49 - if (skb->sk == NULL) 50 - goto out; 51 - if (rt == NULL || !(rt->rt_flags & RTCF_BROADCAST)) 52 - goto out; 53 - if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL) 54 - goto out; 55 - 56 - rcu_read_lock(); 57 - in_dev = __in_dev_get_rcu(rt->dst.dev); 58 - if (in_dev != NULL) { 59 - for_primary_ifa(in_dev) { 60 - if (ifa->ifa_broadcast == iph->daddr) { 61 - mask = ifa->ifa_mask; 62 - break; 63 - } 64 - } endfor_ifa(in_dev); 65 - } 66 - rcu_read_unlock(); 67 - 68 - if (mask == 0) 69 - goto out; 70 - 71 - exp = nf_ct_expect_alloc(ct); 72 - if (exp == NULL) 73 - goto out; 74 - 75 - exp->tuple = ct->tuplehash[IP_CT_DIR_REPLY].tuple; 76 - exp->tuple.src.u.udp.port = htons(NMBD_PORT); 77 - 78 - exp->mask.src.u3.ip = mask; 79 - exp->mask.src.u.udp.port = htons(0xFFFF); 80 - 81 - exp->expectfn = NULL; 82 - exp->flags = NF_CT_EXPECT_PERMANENT; 83 - exp->class = NF_CT_EXPECT_CLASS_DEFAULT; 84 - exp->helper = NULL; 85 - 86 - nf_ct_expect_related(exp); 87 - nf_ct_expect_put(exp); 88 - 89 - nf_ct_refresh(ct, skb, timeout * HZ); 90 - out: 91 - return NF_ACCEPT; 92 - } 93 45 94 46 static struct nf_conntrack_expect_policy exp_policy = { 95 47 .max_expected = 1, 96 48 }; 97 49 50 + static int netbios_ns_help(struct sk_buff *skb, unsigned int protoff, 51 + struct nf_conn *ct, enum ip_conntrack_info ctinfo) 52 + { 53 + return nf_conntrack_broadcast_help(skb, protoff, ct, ctinfo, timeout); 54 + } 55 + 98 56 static struct nf_conntrack_helper helper __read_mostly = { 99 57 .name = "netbios-ns", 100 - .tuple.src.l3num = AF_INET, 58 + .tuple.src.l3num = NFPROTO_IPV4, 101 59 .tuple.src.u.udp.port = cpu_to_be16(NMBD_PORT), 102 60 .tuple.dst.protonum = IPPROTO_UDP, 103 61 .me = THIS_MODULE, 104 - .help = help, 62 + .help = netbios_ns_help, 105 63 .expect_policy = &exp_policy, 106 64 }; 107 65

+46 -1

net/netfilter/nf_conntrack_netlink.c

··· 42 42 #include <net/netfilter/nf_conntrack_tuple.h> 43 43 #include <net/netfilter/nf_conntrack_acct.h> 44 44 #include <net/netfilter/nf_conntrack_zones.h> 45 + #include <net/netfilter/nf_conntrack_timestamp.h> 45 46 #ifdef CONFIG_NF_NAT_NEEDED 46 47 #include <net/netfilter/nf_nat_core.h> 47 48 #include <net/netfilter/nf_nat_protocol.h> ··· 231 230 return -1; 232 231 } 233 232 233 + static int 234 + ctnetlink_dump_timestamp(struct sk_buff *skb, const struct nf_conn *ct) 235 + { 236 + struct nlattr *nest_count; 237 + const struct nf_conn_tstamp *tstamp; 238 + 239 + tstamp = nf_conn_tstamp_find(ct); 240 + if (!tstamp) 241 + return 0; 242 + 243 + nest_count = nla_nest_start(skb, CTA_TIMESTAMP | NLA_F_NESTED); 244 + if (!nest_count) 245 + goto nla_put_failure; 246 + 247 + NLA_PUT_BE64(skb, CTA_TIMESTAMP_START, cpu_to_be64(tstamp->start)); 248 + if (tstamp->stop != 0) { 249 + NLA_PUT_BE64(skb, CTA_TIMESTAMP_STOP, 250 + cpu_to_be64(tstamp->stop)); 251 + } 252 + nla_nest_end(skb, nest_count); 253 + 254 + return 0; 255 + 256 + nla_put_failure: 257 + return -1; 258 + } 259 + 234 260 #ifdef CONFIG_NF_CONNTRACK_MARK 235 261 static inline int 236 262 ctnetlink_dump_mark(struct sk_buff *skb, const struct nf_conn *ct) ··· 432 404 ctnetlink_dump_timeout(skb, ct) < 0 || 433 405 ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 || 434 406 ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0 || 407 + ctnetlink_dump_timestamp(skb, ct) < 0 || 435 408 ctnetlink_dump_protoinfo(skb, ct) < 0 || 436 409 ctnetlink_dump_helpinfo(skb, ct) < 0 || 437 410 ctnetlink_dump_mark(skb, ct) < 0 || ··· 500 471 } 501 472 502 473 static inline size_t 474 + ctnetlink_timestamp_size(const struct nf_conn *ct) 475 + { 476 + #ifdef CONFIG_NF_CONNTRACK_TIMESTAMP 477 + if (!nf_ct_ext_exist(ct, NF_CT_EXT_TSTAMP)) 478 + return 0; 479 + return nla_total_size(0) + 2 * nla_total_size(sizeof(uint64_t)); 480 + #else 481 + return 0; 482 + #endif 483 + } 484 + 485 + static inline size_t 503 486 ctnetlink_nlmsg_size(const struct nf_conn *ct) 504 487 { 505 488 return NLMSG_ALIGN(sizeof(struct nfgenmsg)) ··· 522 481 + nla_total_size(sizeof(u_int32_t)) /* CTA_ID */ 523 482 + nla_total_size(sizeof(u_int32_t)) /* CTA_STATUS */ 524 483 + ctnetlink_counters_size(ct) 484 + + ctnetlink_timestamp_size(ct) 525 485 + nla_total_size(sizeof(u_int32_t)) /* CTA_TIMEOUT */ 526 486 + nla_total_size(0) /* CTA_PROTOINFO */ 527 487 + nla_total_size(0) /* CTA_HELP */ ··· 613 571 614 572 if (events & (1 << IPCT_DESTROY)) { 615 573 if (ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 || 616 - ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0) 574 + ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0 || 575 + ctnetlink_dump_timestamp(skb, ct) < 0) 617 576 goto nla_put_failure; 618 577 } else { 619 578 if (ctnetlink_dump_timeout(skb, ct) < 0) ··· 1400 1357 } 1401 1358 1402 1359 nf_ct_acct_ext_add(ct, GFP_ATOMIC); 1360 + nf_ct_tstamp_ext_add(ct, GFP_ATOMIC); 1403 1361 nf_ct_ecache_ext_add(ct, 0, 0, GFP_ATOMIC); 1404 1362 /* we must add conntrack extensions before confirmation. */ 1405 1363 ct->status |= IPS_CONFIRMED; ··· 1419 1375 } 1420 1376 #endif 1421 1377 1378 + memset(&ct->proto, 0, sizeof(ct->proto)); 1422 1379 if (cda[CTA_PROTOINFO]) { 1423 1380 err = ctnetlink_change_protoinfo(ct, cda); 1424 1381 if (err < 0)

+17 -7

net/netfilter/nf_conntrack_proto.c

··· 166 166 int nf_conntrack_l3proto_register(struct nf_conntrack_l3proto *proto) 167 167 { 168 168 int ret = 0; 169 + struct nf_conntrack_l3proto *old; 169 170 170 171 if (proto->l3proto >= AF_MAX) 171 172 return -EBUSY; ··· 175 174 return -EINVAL; 176 175 177 176 mutex_lock(&nf_ct_proto_mutex); 178 - if (nf_ct_l3protos[proto->l3proto] != &nf_conntrack_l3proto_generic) { 177 + old = rcu_dereference_protected(nf_ct_l3protos[proto->l3proto], 178 + lockdep_is_held(&nf_ct_proto_mutex)); 179 + if (old != &nf_conntrack_l3proto_generic) { 179 180 ret = -EBUSY; 180 181 goto out_unlock; 181 182 } ··· 204 201 BUG_ON(proto->l3proto >= AF_MAX); 205 202 206 203 mutex_lock(&nf_ct_proto_mutex); 207 - BUG_ON(nf_ct_l3protos[proto->l3proto] != proto); 204 + BUG_ON(rcu_dereference_protected(nf_ct_l3protos[proto->l3proto], 205 + lockdep_is_held(&nf_ct_proto_mutex) 206 + ) != proto); 208 207 rcu_assign_pointer(nf_ct_l3protos[proto->l3proto], 209 208 &nf_conntrack_l3proto_generic); 210 209 nf_ct_l3proto_unregister_sysctl(proto); ··· 284 279 mutex_lock(&nf_ct_proto_mutex); 285 280 if (!nf_ct_protos[l4proto->l3proto]) { 286 281 /* l3proto may be loaded latter. */ 287 - struct nf_conntrack_l4proto **proto_array; 282 + struct nf_conntrack_l4proto __rcu **proto_array; 288 283 int i; 289 284 290 285 proto_array = kmalloc(MAX_NF_CT_PROTO * ··· 296 291 } 297 292 298 293 for (i = 0; i < MAX_NF_CT_PROTO; i++) 299 - proto_array[i] = &nf_conntrack_l4proto_generic; 294 + RCU_INIT_POINTER(proto_array[i], &nf_conntrack_l4proto_generic); 300 295 301 296 /* Before making proto_array visible to lockless readers, 302 297 * we must make sure its content is committed to memory. ··· 304 299 smp_wmb(); 305 300 306 301 nf_ct_protos[l4proto->l3proto] = proto_array; 307 - } else if (nf_ct_protos[l4proto->l3proto][l4proto->l4proto] != 308 - &nf_conntrack_l4proto_generic) { 302 + } else if (rcu_dereference_protected( 303 + nf_ct_protos[l4proto->l3proto][l4proto->l4proto], 304 + lockdep_is_held(&nf_ct_proto_mutex) 305 + ) != &nf_conntrack_l4proto_generic) { 309 306 ret = -EBUSY; 310 307 goto out_unlock; 311 308 } ··· 338 331 BUG_ON(l4proto->l3proto >= PF_MAX); 339 332 340 333 mutex_lock(&nf_ct_proto_mutex); 341 - BUG_ON(nf_ct_protos[l4proto->l3proto][l4proto->l4proto] != l4proto); 334 + BUG_ON(rcu_dereference_protected( 335 + nf_ct_protos[l4proto->l3proto][l4proto->l4proto], 336 + lockdep_is_held(&nf_ct_proto_mutex) 337 + ) != l4proto); 342 338 rcu_assign_pointer(nf_ct_protos[l4proto->l3proto][l4proto->l4proto], 343 339 &nf_conntrack_l4proto_generic); 344 340 nf_ct_l4proto_unregister_sysctl(l4proto);

+3

net/netfilter/nf_conntrack_proto_dccp.c

··· 452 452 ct->proto.dccp.role[IP_CT_DIR_ORIGINAL] = CT_DCCP_ROLE_CLIENT; 453 453 ct->proto.dccp.role[IP_CT_DIR_REPLY] = CT_DCCP_ROLE_SERVER; 454 454 ct->proto.dccp.state = CT_DCCP_NONE; 455 + ct->proto.dccp.last_pkt = DCCP_PKT_REQUEST; 456 + ct->proto.dccp.last_dir = IP_CT_DIR_ORIGINAL; 457 + ct->proto.dccp.handshake_seq = 0; 455 458 return true; 456 459 457 460 out_invalid:

+1

net/netfilter/nf_conntrack_proto_sctp.c

··· 413 413 test_bit(SCTP_CID_COOKIE_ACK, map)) 414 414 return false; 415 415 416 + memset(&ct->proto.sctp, 0, sizeof(ct->proto.sctp)); 416 417 new_state = SCTP_CONNTRACK_MAX; 417 418 for_each_sctp_chunk (skb, sch, _sch, offset, dataoff, count) { 418 419 /* Don't need lock here: this conntrack not in circulation yet */

+3 -11

net/netfilter/nf_conntrack_proto_tcp.c

··· 1066 1066 BUG_ON(th == NULL); 1067 1067 1068 1068 /* Don't need lock here: this conntrack not in circulation yet */ 1069 - new_state 1070 - = tcp_conntracks[0][get_conntrack_index(th)] 1071 - [TCP_CONNTRACK_NONE]; 1069 + new_state = tcp_conntracks[0][get_conntrack_index(th)][TCP_CONNTRACK_NONE]; 1072 1070 1073 1071 /* Invalid: delete conntrack */ 1074 1072 if (new_state >= TCP_CONNTRACK_MAX) { ··· 1075 1077 } 1076 1078 1077 1079 if (new_state == TCP_CONNTRACK_SYN_SENT) { 1080 + memset(&ct->proto.tcp, 0, sizeof(ct->proto.tcp)); 1078 1081 /* SYN packet */ 1079 1082 ct->proto.tcp.seen[0].td_end = 1080 1083 segment_seq_plus_len(ntohl(th->seq), skb->len, ··· 1087 1088 ct->proto.tcp.seen[0].td_end; 1088 1089 1089 1090 tcp_options(skb, dataoff, th, &ct->proto.tcp.seen[0]); 1090 - ct->proto.tcp.seen[1].flags = 0; 1091 1091 } else if (nf_ct_tcp_loose == 0) { 1092 1092 /* Don't try to pick up connections. */ 1093 1093 return false; 1094 1094 } else { 1095 + memset(&ct->proto.tcp, 0, sizeof(ct->proto.tcp)); 1095 1096 /* 1096 1097 * We are in the middle of a connection, 1097 1098 * its history is lost for us. ··· 1106 1107 ct->proto.tcp.seen[0].td_maxend = 1107 1108 ct->proto.tcp.seen[0].td_end + 1108 1109 ct->proto.tcp.seen[0].td_maxwin; 1109 - ct->proto.tcp.seen[0].td_scale = 0; 1110 1110 1111 1111 /* We assume SACK and liberal window checking to handle 1112 1112 * window scaling */ ··· 1114 1116 IP_CT_TCP_FLAG_BE_LIBERAL; 1115 1117 } 1116 1118 1117 - ct->proto.tcp.seen[1].td_end = 0; 1118 - ct->proto.tcp.seen[1].td_maxend = 0; 1119 - ct->proto.tcp.seen[1].td_maxwin = 0; 1120 - ct->proto.tcp.seen[1].td_scale = 0; 1121 - 1122 1119 /* tcp_packet will set them */ 1123 - ct->proto.tcp.state = TCP_CONNTRACK_NONE; 1124 1120 ct->proto.tcp.last_index = TCP_NONE_SET; 1125 1121 1126 1122 pr_debug("tcp_new: sender end=%u maxend=%u maxwin=%u scale=%i "

+77

net/netfilter/nf_conntrack_snmp.c

··· 1 + /* 2 + * SNMP service broadcast connection tracking helper 3 + * 4 + * (c) 2011 Jiri Olsa <jolsa@redhat.com> 5 + * 6 + * This program is free software; you can redistribute it and/or 7 + * modify it under the terms of the GNU General Public License 8 + * as published by the Free Software Foundation; either version 9 + * 2 of the License, or (at your option) any later version. 10 + */ 11 + #include <linux/kernel.h> 12 + #include <linux/module.h> 13 + #include <linux/init.h> 14 + #include <linux/in.h> 15 + 16 + #include <net/netfilter/nf_conntrack.h> 17 + #include <net/netfilter/nf_conntrack_helper.h> 18 + #include <net/netfilter/nf_conntrack_expect.h> 19 + 20 + #define SNMP_PORT 161 21 + 22 + MODULE_AUTHOR("Jiri Olsa <jolsa@redhat.com>"); 23 + MODULE_DESCRIPTION("SNMP service broadcast connection tracking helper"); 24 + MODULE_LICENSE("GPL"); 25 + MODULE_ALIAS_NFCT_HELPER("snmp"); 26 + 27 + static unsigned int timeout __read_mostly = 30; 28 + module_param(timeout, uint, S_IRUSR); 29 + MODULE_PARM_DESC(timeout, "timeout for master connection/replies in seconds"); 30 + 31 + int (*nf_nat_snmp_hook)(struct sk_buff *skb, 32 + unsigned int protoff, 33 + struct nf_conn *ct, 34 + enum ip_conntrack_info ctinfo); 35 + EXPORT_SYMBOL_GPL(nf_nat_snmp_hook); 36 + 37 + static int snmp_conntrack_help(struct sk_buff *skb, unsigned int protoff, 38 + struct nf_conn *ct, enum ip_conntrack_info ctinfo) 39 + { 40 + typeof(nf_nat_snmp_hook) nf_nat_snmp; 41 + 42 + nf_conntrack_broadcast_help(skb, protoff, ct, ctinfo, timeout); 43 + 44 + nf_nat_snmp = rcu_dereference(nf_nat_snmp_hook); 45 + if (nf_nat_snmp && ct->status & IPS_NAT_MASK) 46 + return nf_nat_snmp(skb, protoff, ct, ctinfo); 47 + 48 + return NF_ACCEPT; 49 + } 50 + 51 + static struct nf_conntrack_expect_policy exp_policy = { 52 + .max_expected = 1, 53 + }; 54 + 55 + static struct nf_conntrack_helper helper __read_mostly = { 56 + .name = "snmp", 57 + .tuple.src.l3num = NFPROTO_IPV4, 58 + .tuple.src.u.udp.port = cpu_to_be16(SNMP_PORT), 59 + .tuple.dst.protonum = IPPROTO_UDP, 60 + .me = THIS_MODULE, 61 + .help = snmp_conntrack_help, 62 + .expect_policy = &exp_policy, 63 + }; 64 + 65 + static int __init nf_conntrack_snmp_init(void) 66 + { 67 + exp_policy.timeout = timeout; 68 + return nf_conntrack_helper_register(&helper); 69 + } 70 + 71 + static void __exit nf_conntrack_snmp_fini(void) 72 + { 73 + nf_conntrack_helper_unregister(&helper); 74 + } 75 + 76 + module_init(nf_conntrack_snmp_init); 77 + module_exit(nf_conntrack_snmp_fini);

+42 -3

net/netfilter/nf_conntrack_standalone.c

··· 29 29 #include <net/netfilter/nf_conntrack_helper.h> 30 30 #include <net/netfilter/nf_conntrack_acct.h> 31 31 #include <net/netfilter/nf_conntrack_zones.h> 32 + #include <net/netfilter/nf_conntrack_timestamp.h> 33 + #include <linux/rculist_nulls.h> 32 34 33 35 MODULE_LICENSE("GPL"); 34 36 ··· 47 45 struct ct_iter_state { 48 46 struct seq_net_private p; 49 47 unsigned int bucket; 48 + u_int64_t time_now; 50 49 }; 51 50 52 51 static struct hlist_nulls_node *ct_get_first(struct seq_file *seq) ··· 59 56 for (st->bucket = 0; 60 57 st->bucket < net->ct.htable_size; 61 58 st->bucket++) { 62 - n = rcu_dereference(net->ct.hash[st->bucket].first); 59 + n = rcu_dereference(hlist_nulls_first_rcu(&net->ct.hash[st->bucket])); 63 60 if (!is_a_nulls(n)) 64 61 return n; 65 62 } ··· 72 69 struct net *net = seq_file_net(seq); 73 70 struct ct_iter_state *st = seq->private; 74 71 75 - head = rcu_dereference(head->next); 72 + head = rcu_dereference(hlist_nulls_next_rcu(head)); 76 73 while (is_a_nulls(head)) { 77 74 if (likely(get_nulls_value(head) == st->bucket)) { 78 75 if (++st->bucket >= net->ct.htable_size) 79 76 return NULL; 80 77 } 81 - head = rcu_dereference(net->ct.hash[st->bucket].first); 78 + head = rcu_dereference( 79 + hlist_nulls_first_rcu( 80 + &net->ct.hash[st->bucket])); 82 81 } 83 82 return head; 84 83 } ··· 98 93 static void *ct_seq_start(struct seq_file *seq, loff_t *pos) 99 94 __acquires(RCU) 100 95 { 96 + struct ct_iter_state *st = seq->private; 97 + 98 + st->time_now = ktime_to_ns(ktime_get_real()); 101 99 rcu_read_lock(); 102 100 return ct_get_idx(seq, *pos); 103 101 } ··· 135 127 } 136 128 #else 137 129 static inline int ct_show_secctx(struct seq_file *s, const struct nf_conn *ct) 130 + { 131 + return 0; 132 + } 133 + #endif 134 + 135 + #ifdef CONFIG_NF_CONNTRACK_TIMESTAMP 136 + static int ct_show_delta_time(struct seq_file *s, const struct nf_conn *ct) 137 + { 138 + struct ct_iter_state *st = s->private; 139 + struct nf_conn_tstamp *tstamp; 140 + s64 delta_time; 141 + 142 + tstamp = nf_conn_tstamp_find(ct); 143 + if (tstamp) { 144 + delta_time = st->time_now - tstamp->start; 145 + if (delta_time > 0) 146 + delta_time = div_s64(delta_time, NSEC_PER_SEC); 147 + else 148 + delta_time = 0; 149 + 150 + return seq_printf(s, "delta-time=%llu ", 151 + (unsigned long long)delta_time); 152 + } 153 + return 0; 154 + } 155 + #else 156 + static inline int 157 + ct_show_delta_time(struct seq_file *s, const struct nf_conn *ct) 138 158 { 139 159 return 0; 140 160 } ··· 235 199 if (seq_printf(s, "zone=%u ", nf_ct_zone(ct))) 236 200 goto release; 237 201 #endif 202 + 203 + if (ct_show_delta_time(s, ct)) 204 + goto release; 238 205 239 206 if (seq_printf(s, "use=%u\n", atomic_read(&ct->ct_general.use))) 240 207 goto release;

+120

net/netfilter/nf_conntrack_timestamp.c

··· 1 + /* 2 + * (C) 2010 Pablo Neira Ayuso <pablo@netfilter.org> 3 + * 4 + * This program is free software; you can redistribute it and/or modify 5 + * it under the terms of the GNU General Public License version 2 as 6 + * published by the Free Software Foundation (or any later at your option). 7 + */ 8 + 9 + #include <linux/netfilter.h> 10 + #include <linux/slab.h> 11 + #include <linux/kernel.h> 12 + #include <linux/moduleparam.h> 13 + 14 + #include <net/netfilter/nf_conntrack.h> 15 + #include <net/netfilter/nf_conntrack_extend.h> 16 + #include <net/netfilter/nf_conntrack_timestamp.h> 17 + 18 + static int nf_ct_tstamp __read_mostly; 19 + 20 + module_param_named(tstamp, nf_ct_tstamp, bool, 0644); 21 + MODULE_PARM_DESC(tstamp, "Enable connection tracking flow timestamping."); 22 + 23 + #ifdef CONFIG_SYSCTL 24 + static struct ctl_table tstamp_sysctl_table[] = { 25 + { 26 + .procname = "nf_conntrack_timestamp", 27 + .data = &init_net.ct.sysctl_tstamp, 28 + .maxlen = sizeof(unsigned int), 29 + .mode = 0644, 30 + .proc_handler = proc_dointvec, 31 + }, 32 + {} 33 + }; 34 + #endif /* CONFIG_SYSCTL */ 35 + 36 + static struct nf_ct_ext_type tstamp_extend __read_mostly = { 37 + .len = sizeof(struct nf_conn_tstamp), 38 + .align = __alignof__(struct nf_conn_tstamp), 39 + .id = NF_CT_EXT_TSTAMP, 40 + }; 41 + 42 + #ifdef CONFIG_SYSCTL 43 + static int nf_conntrack_tstamp_init_sysctl(struct net *net) 44 + { 45 + struct ctl_table *table; 46 + 47 + table = kmemdup(tstamp_sysctl_table, sizeof(tstamp_sysctl_table), 48 + GFP_KERNEL); 49 + if (!table) 50 + goto out; 51 + 52 + table[0].data = &net->ct.sysctl_tstamp; 53 + 54 + net->ct.tstamp_sysctl_header = register_net_sysctl_table(net, 55 + nf_net_netfilter_sysctl_path, table); 56 + if (!net->ct.tstamp_sysctl_header) { 57 + printk(KERN_ERR "nf_ct_tstamp: can't register to sysctl.\n"); 58 + goto out_register; 59 + } 60 + return 0; 61 + 62 + out_register: 63 + kfree(table); 64 + out: 65 + return -ENOMEM; 66 + } 67 + 68 + static void nf_conntrack_tstamp_fini_sysctl(struct net *net) 69 + { 70 + struct ctl_table *table; 71 + 72 + table = net->ct.tstamp_sysctl_header->ctl_table_arg; 73 + unregister_net_sysctl_table(net->ct.tstamp_sysctl_header); 74 + kfree(table); 75 + } 76 + #else 77 + static int nf_conntrack_tstamp_init_sysctl(struct net *net) 78 + { 79 + return 0; 80 + } 81 + 82 + static void nf_conntrack_tstamp_fini_sysctl(struct net *net) 83 + { 84 + } 85 + #endif 86 + 87 + int nf_conntrack_tstamp_init(struct net *net) 88 + { 89 + int ret; 90 + 91 + net->ct.sysctl_tstamp = nf_ct_tstamp; 92 + 93 + if (net_eq(net, &init_net)) { 94 + ret = nf_ct_extend_register(&tstamp_extend); 95 + if (ret < 0) { 96 + printk(KERN_ERR "nf_ct_tstamp: Unable to register " 97 + "extension\n"); 98 + goto out_extend_register; 99 + } 100 + } 101 + 102 + ret = nf_conntrack_tstamp_init_sysctl(net); 103 + if (ret < 0) 104 + goto out_sysctl; 105 + 106 + return 0; 107 + 108 + out_sysctl: 109 + if (net_eq(net, &init_net)) 110 + nf_ct_extend_unregister(&tstamp_extend); 111 + out_extend_register: 112 + return ret; 113 + } 114 + 115 + void nf_conntrack_tstamp_fini(struct net *net) 116 + { 117 + nf_conntrack_tstamp_fini_sysctl(net); 118 + if (net_eq(net, &init_net)) 119 + nf_ct_extend_unregister(&tstamp_extend); 120 + }

+4 -2

net/netfilter/nf_log.c

··· 161 161 struct nf_logger *t; 162 162 int ret; 163 163 164 - logger = nf_loggers[*pos]; 164 + logger = rcu_dereference_protected(nf_loggers[*pos], 165 + lockdep_is_held(&nf_log_mutex)); 165 166 166 167 if (!logger) 167 168 ret = seq_printf(s, "%2lld NONE (", *pos); ··· 250 249 mutex_unlock(&nf_log_mutex); 251 250 } else { 252 251 mutex_lock(&nf_log_mutex); 253 - logger = nf_loggers[tindex]; 252 + logger = rcu_dereference_protected(nf_loggers[tindex], 253 + lockdep_is_held(&nf_log_mutex)); 254 254 if (!logger) 255 255 table->data = "NONE"; 256 256 else

+59 -23

net/netfilter/nf_queue.c

··· 27 27 int nf_register_queue_handler(u_int8_t pf, const struct nf_queue_handler *qh) 28 28 { 29 29 int ret; 30 + const struct nf_queue_handler *old; 30 31 31 32 if (pf >= ARRAY_SIZE(queue_handler)) 32 33 return -EINVAL; 33 34 34 35 mutex_lock(&queue_handler_mutex); 35 - if (queue_handler[pf] == qh) 36 + old = rcu_dereference_protected(queue_handler[pf], 37 + lockdep_is_held(&queue_handler_mutex)); 38 + if (old == qh) 36 39 ret = -EEXIST; 37 - else if (queue_handler[pf]) 40 + else if (old) 38 41 ret = -EBUSY; 39 42 else { 40 43 rcu_assign_pointer(queue_handler[pf], qh); ··· 52 49 /* The caller must flush their queue before this */ 53 50 int nf_unregister_queue_handler(u_int8_t pf, const struct nf_queue_handler *qh) 54 51 { 52 + const struct nf_queue_handler *old; 53 + 55 54 if (pf >= ARRAY_SIZE(queue_handler)) 56 55 return -EINVAL; 57 56 58 57 mutex_lock(&queue_handler_mutex); 59 - if (queue_handler[pf] && queue_handler[pf] != qh) { 58 + old = rcu_dereference_protected(queue_handler[pf], 59 + lockdep_is_held(&queue_handler_mutex)); 60 + if (old && old != qh) { 60 61 mutex_unlock(&queue_handler_mutex); 61 62 return -EINVAL; 62 63 } ··· 80 73 81 74 mutex_lock(&queue_handler_mutex); 82 75 for (pf = 0; pf < ARRAY_SIZE(queue_handler); pf++) { 83 - if (queue_handler[pf] == qh) 76 + if (rcu_dereference_protected( 77 + queue_handler[pf], 78 + lockdep_is_held(&queue_handler_mutex) 79 + ) == qh) 84 80 rcu_assign_pointer(queue_handler[pf], NULL); 85 81 } 86 82 mutex_unlock(&queue_handler_mutex); ··· 125 115 int (*okfn)(struct sk_buff *), 126 116 unsigned int queuenum) 127 117 { 128 - int status; 118 + int status = -ENOENT; 129 119 struct nf_queue_entry *entry = NULL; 130 120 #ifdef CONFIG_BRIDGE_NETFILTER 131 121 struct net_device *physindev; ··· 138 128 rcu_read_lock(); 139 129 140 130 qh = rcu_dereference(queue_handler[pf]); 141 - if (!qh) 131 + if (!qh) { 132 + status = -ESRCH; 142 133 goto err_unlock; 134 + } 143 135 144 136 afinfo = nf_get_afinfo(pf); 145 137 if (!afinfo) 146 138 goto err_unlock; 147 139 148 140 entry = kmalloc(sizeof(*entry) + afinfo->route_key_size, GFP_ATOMIC); 149 - if (!entry) 141 + if (!entry) { 142 + status = -ENOMEM; 150 143 goto err_unlock; 144 + } 151 145 152 146 *entry = (struct nf_queue_entry) { 153 147 .skb = skb, ··· 165 151 166 152 /* If it's going away, ignore hook. */ 167 153 if (!try_module_get(entry->elem->owner)) { 168 - rcu_read_unlock(); 169 - kfree(entry); 170 - return 0; 154 + status = -ECANCELED; 155 + goto err_unlock; 171 156 } 172 - 173 157 /* Bump dev refs so they don't vanish while packet is out */ 174 158 if (indev) 175 159 dev_hold(indev); ··· 194 182 goto err; 195 183 } 196 184 197 - return 1; 185 + return 0; 198 186 199 187 err_unlock: 200 188 rcu_read_unlock(); 201 189 err: 202 - kfree_skb(skb); 203 190 kfree(entry); 204 - return 1; 191 + return status; 205 192 } 206 193 207 194 int nf_queue(struct sk_buff *skb, ··· 212 201 unsigned int queuenum) 213 202 { 214 203 struct sk_buff *segs; 204 + int err; 205 + unsigned int queued; 215 206 216 207 if (!skb_is_gso(skb)) 217 208 return __nf_queue(skb, elem, pf, hook, indev, outdev, okfn, ··· 229 216 } 230 217 231 218 segs = skb_gso_segment(skb, 0); 232 - kfree_skb(skb); 219 + /* Does not use PTR_ERR to limit the number of error codes that can be 220 + * returned by nf_queue. For instance, callers rely on -ECANCELED to mean 221 + * 'ignore this hook'. 222 + */ 233 223 if (IS_ERR(segs)) 234 - return 1; 224 + return -EINVAL; 235 225 226 + queued = 0; 227 + err = 0; 236 228 do { 237 229 struct sk_buff *nskb = segs->next; 238 230 239 231 segs->next = NULL; 240 - if (!__nf_queue(segs, elem, pf, hook, indev, outdev, okfn, 241 - queuenum)) 232 + if (err == 0) 233 + err = __nf_queue(segs, elem, pf, hook, indev, 234 + outdev, okfn, queuenum); 235 + if (err == 0) 236 + queued++; 237 + else 242 238 kfree_skb(segs); 243 239 segs = nskb; 244 240 } while (segs); 245 - return 1; 241 + 242 + /* also free orig skb if only some segments were queued */ 243 + if (unlikely(err && queued)) 244 + err = 0; 245 + if (err == 0) 246 + kfree_skb(skb); 247 + return err; 246 248 } 247 249 248 250 void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict) ··· 265 237 struct sk_buff *skb = entry->skb; 266 238 struct list_head *elem = &entry->elem->list; 267 239 const struct nf_afinfo *afinfo; 240 + int err; 268 241 269 242 rcu_read_lock(); 270 243 ··· 299 270 local_bh_enable(); 300 271 break; 301 272 case NF_QUEUE: 302 - if (!__nf_queue(skb, elem, entry->pf, entry->hook, 303 - entry->indev, entry->outdev, entry->okfn, 304 - verdict >> NF_VERDICT_BITS)) 305 - goto next_hook; 273 + err = __nf_queue(skb, elem, entry->pf, entry->hook, 274 + entry->indev, entry->outdev, entry->okfn, 275 + verdict >> NF_VERDICT_QBITS); 276 + if (err < 0) { 277 + if (err == -ECANCELED) 278 + goto next_hook; 279 + if (err == -ESRCH && 280 + (verdict & NF_VERDICT_FLAG_QUEUE_BYPASS)) 281 + goto next_hook; 282 + kfree_skb(skb); 283 + } 306 284 break; 307 285 case NF_STOLEN: 308 286 default:

+3 -3

net/netfilter/nfnetlink_log.c

··· 874 874 875 875 for (st->bucket = 0; st->bucket < INSTANCE_BUCKETS; st->bucket++) { 876 876 if (!hlist_empty(&instance_table[st->bucket])) 877 - return rcu_dereference_bh(instance_table[st->bucket].first); 877 + return rcu_dereference_bh(hlist_first_rcu(&instance_table[st->bucket])); 878 878 } 879 879 return NULL; 880 880 } 881 881 882 882 static struct hlist_node *get_next(struct iter_state *st, struct hlist_node *h) 883 883 { 884 - h = rcu_dereference_bh(h->next); 884 + h = rcu_dereference_bh(hlist_next_rcu(h)); 885 885 while (!h) { 886 886 if (++st->bucket >= INSTANCE_BUCKETS) 887 887 return NULL; 888 888 889 - h = rcu_dereference_bh(instance_table[st->bucket].first); 889 + h = rcu_dereference_bh(hlist_first_rcu(&instance_table[st->bucket])); 890 890 } 891 891 return h; 892 892 }

+14 -8

net/netfilter/nfnetlink_queue.c

··· 387 387 { 388 388 struct sk_buff *nskb; 389 389 struct nfqnl_instance *queue; 390 - int err; 390 + int err = -ENOBUFS; 391 391 392 392 /* rcu_read_lock()ed by nf_hook_slow() */ 393 393 queue = instance_lookup(queuenum); 394 - if (!queue) 394 + if (!queue) { 395 + err = -ESRCH; 395 396 goto err_out; 397 + } 396 398 397 - if (queue->copy_mode == NFQNL_COPY_NONE) 399 + if (queue->copy_mode == NFQNL_COPY_NONE) { 400 + err = -EINVAL; 398 401 goto err_out; 402 + } 399 403 400 404 nskb = nfqnl_build_packet_message(queue, entry); 401 - if (nskb == NULL) 405 + if (nskb == NULL) { 406 + err = -ENOMEM; 402 407 goto err_out; 403 - 408 + } 404 409 spin_lock_bh(&queue->lock); 405 410 406 - if (!queue->peer_pid) 411 + if (!queue->peer_pid) { 412 + err = -EINVAL; 407 413 goto err_out_free_nskb; 408 - 414 + } 409 415 if (queue->queue_total >= queue->queue_maxlen) { 410 416 queue->queue_dropped++; 411 417 if (net_ratelimit()) ··· 438 432 err_out_unlock: 439 433 spin_unlock_bh(&queue->lock); 440 434 err_out: 441 - return -1; 435 + return err; 442 436 } 443 437 444 438 static int

+64 -34

net/netfilter/x_tables.c

··· 23 23 #include <linux/mutex.h> 24 24 #include <linux/mm.h> 25 25 #include <linux/slab.h> 26 + #include <linux/audit.h> 26 27 #include <net/net_namespace.h> 27 28 28 29 #include <linux/netfilter/x_tables.h> ··· 39 38 #define SMP_ALIGN(x) (((x) + SMP_CACHE_BYTES-1) & ~(SMP_CACHE_BYTES-1)) 40 39 41 40 struct compat_delta { 42 - struct compat_delta *next; 43 - unsigned int offset; 44 - int delta; 41 + unsigned int offset; /* offset in kernel */ 42 + int delta; /* delta in 32bit user land */ 45 43 }; 46 44 47 45 struct xt_af { ··· 49 49 struct list_head target; 50 50 #ifdef CONFIG_COMPAT 51 51 struct mutex compat_mutex; 52 - struct compat_delta *compat_offsets; 52 + struct compat_delta *compat_tab; 53 + unsigned int number; /* number of slots in compat_tab[] */ 54 + unsigned int cur; /* number of used slots in compat_tab[] */ 53 55 #endif 54 56 }; 55 57 ··· 416 414 EXPORT_SYMBOL_GPL(xt_check_match); 417 415 418 416 #ifdef CONFIG_COMPAT 419 - int xt_compat_add_offset(u_int8_t af, unsigned int offset, short delta) 417 + int xt_compat_add_offset(u_int8_t af, unsigned int offset, int delta) 420 418 { 421 - struct compat_delta *tmp; 419 + struct xt_af *xp = &xt[af]; 422 420 423 - tmp = kmalloc(sizeof(struct compat_delta), GFP_KERNEL); 424 - if (!tmp) 425 - return -ENOMEM; 426 - 427 - tmp->offset = offset; 428 - tmp->delta = delta; 429 - 430 - if (xt[af].compat_offsets) { 431 - tmp->next = xt[af].compat_offsets->next; 432 - xt[af].compat_offsets->next = tmp; 433 - } else { 434 - xt[af].compat_offsets = tmp; 435 - tmp->next = NULL; 421 + if (!xp->compat_tab) { 422 + if (!xp->number) 423 + return -EINVAL; 424 + xp->compat_tab = vmalloc(sizeof(struct compat_delta) * xp->number); 425 + if (!xp->compat_tab) 426 + return -ENOMEM; 427 + xp->cur = 0; 436 428 } 429 + 430 + if (xp->cur >= xp->number) 431 + return -EINVAL; 432 + 433 + if (xp->cur) 434 + delta += xp->compat_tab[xp->cur - 1].delta; 435 + xp->compat_tab[xp->cur].offset = offset; 436 + xp->compat_tab[xp->cur].delta = delta; 437 + xp->cur++; 437 438 return 0; 438 439 } 439 440 EXPORT_SYMBOL_GPL(xt_compat_add_offset); 440 441 441 442 void xt_compat_flush_offsets(u_int8_t af) 442 443 { 443 - struct compat_delta *tmp, *next; 444 - 445 - if (xt[af].compat_offsets) { 446 - for (tmp = xt[af].compat_offsets; tmp; tmp = next) { 447 - next = tmp->next; 448 - kfree(tmp); 449 - } 450 - xt[af].compat_offsets = NULL; 444 + if (xt[af].compat_tab) { 445 + vfree(xt[af].compat_tab); 446 + xt[af].compat_tab = NULL; 447 + xt[af].number = 0; 451 448 } 452 449 } 453 450 EXPORT_SYMBOL_GPL(xt_compat_flush_offsets); 454 451 455 452 int xt_compat_calc_jump(u_int8_t af, unsigned int offset) 456 453 { 457 - struct compat_delta *tmp; 458 - int delta; 454 + struct compat_delta *tmp = xt[af].compat_tab; 455 + int mid, left = 0, right = xt[af].cur - 1; 459 456 460 - for (tmp = xt[af].compat_offsets, delta = 0; tmp; tmp = tmp->next) 461 - if (tmp->offset < offset) 462 - delta += tmp->delta; 463 - return delta; 457 + while (left <= right) { 458 + mid = (left + right) >> 1; 459 + if (offset > tmp[mid].offset) 460 + left = mid + 1; 461 + else if (offset < tmp[mid].offset) 462 + right = mid - 1; 463 + else 464 + return mid ? tmp[mid - 1].delta : 0; 465 + } 466 + WARN_ON_ONCE(1); 467 + return 0; 464 468 } 465 469 EXPORT_SYMBOL_GPL(xt_compat_calc_jump); 470 + 471 + void xt_compat_init_offsets(u_int8_t af, unsigned int number) 472 + { 473 + xt[af].number = number; 474 + xt[af].cur = 0; 475 + } 476 + EXPORT_SYMBOL(xt_compat_init_offsets); 466 477 467 478 int xt_compat_match_offset(const struct xt_match *match) 468 479 { ··· 834 819 * during the get_counters() routine. 835 820 */ 836 821 local_bh_enable(); 822 + 823 + #ifdef CONFIG_AUDIT 824 + if (audit_enabled) { 825 + struct audit_buffer *ab; 826 + 827 + ab = audit_log_start(current->audit_context, GFP_KERNEL, 828 + AUDIT_NETFILTER_CFG); 829 + if (ab) { 830 + audit_log_format(ab, "table=%s family=%u entries=%u", 831 + table->name, table->af, 832 + private->number); 833 + audit_log_end(ab); 834 + } 835 + } 836 + #endif 837 837 838 838 return private; 839 839 } ··· 1368 1338 mutex_init(&xt[i].mutex); 1369 1339 #ifdef CONFIG_COMPAT 1370 1340 mutex_init(&xt[i].compat_mutex); 1371 - xt[i].compat_offsets = NULL; 1341 + xt[i].compat_tab = NULL; 1372 1342 #endif 1373 1343 INIT_LIST_HEAD(&xt[i].target); 1374 1344 INIT_LIST_HEAD(&xt[i].match);

+204

net/netfilter/xt_AUDIT.c

··· 1 + /* 2 + * Creates audit record for dropped/accepted packets 3 + * 4 + * (C) 2010-2011 Thomas Graf <tgraf@redhat.com> 5 + * (C) 2010-2011 Red Hat, Inc. 6 + * 7 + * This program is free software; you can redistribute it and/or modify 8 + * it under the terms of the GNU General Public License version 2 as 9 + * published by the Free Software Foundation. 10 + */ 11 + 12 + #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 13 + 14 + #include <linux/audit.h> 15 + #include <linux/module.h> 16 + #include <linux/skbuff.h> 17 + #include <linux/tcp.h> 18 + #include <linux/udp.h> 19 + #include <linux/if_arp.h> 20 + #include <linux/netfilter/x_tables.h> 21 + #include <linux/netfilter/xt_AUDIT.h> 22 + #include <net/ipv6.h> 23 + #include <net/ip.h> 24 + 25 + MODULE_LICENSE("GPL"); 26 + MODULE_AUTHOR("Thomas Graf <tgraf@redhat.com>"); 27 + MODULE_DESCRIPTION("Xtables: creates audit records for dropped/accepted packets"); 28 + MODULE_ALIAS("ipt_AUDIT"); 29 + MODULE_ALIAS("ip6t_AUDIT"); 30 + MODULE_ALIAS("ebt_AUDIT"); 31 + MODULE_ALIAS("arpt_AUDIT"); 32 + 33 + static void audit_proto(struct audit_buffer *ab, struct sk_buff *skb, 34 + unsigned int proto, unsigned int offset) 35 + { 36 + switch (proto) { 37 + case IPPROTO_TCP: 38 + case IPPROTO_UDP: 39 + case IPPROTO_UDPLITE: { 40 + const __be16 *pptr; 41 + __be16 _ports[2]; 42 + 43 + pptr = skb_header_pointer(skb, offset, sizeof(_ports), _ports); 44 + if (pptr == NULL) { 45 + audit_log_format(ab, " truncated=1"); 46 + return; 47 + } 48 + 49 + audit_log_format(ab, " sport=%hu dport=%hu", 50 + ntohs(pptr[0]), ntohs(pptr[1])); 51 + } 52 + break; 53 + 54 + case IPPROTO_ICMP: 55 + case IPPROTO_ICMPV6: { 56 + const u8 *iptr; 57 + u8 _ih[2]; 58 + 59 + iptr = skb_header_pointer(skb, offset, sizeof(_ih), &_ih); 60 + if (iptr == NULL) { 61 + audit_log_format(ab, " truncated=1"); 62 + return; 63 + } 64 + 65 + audit_log_format(ab, " icmptype=%hhu icmpcode=%hhu", 66 + iptr[0], iptr[1]); 67 + 68 + } 69 + break; 70 + } 71 + } 72 + 73 + static void audit_ip4(struct audit_buffer *ab, struct sk_buff *skb) 74 + { 75 + struct iphdr _iph; 76 + const struct iphdr *ih; 77 + 78 + ih = skb_header_pointer(skb, 0, sizeof(_iph), &_iph); 79 + if (!ih) { 80 + audit_log_format(ab, " truncated=1"); 81 + return; 82 + } 83 + 84 + audit_log_format(ab, " saddr=%pI4 daddr=%pI4 ipid=%hu proto=%hhu", 85 + &ih->saddr, &ih->daddr, ntohs(ih->id), ih->protocol); 86 + 87 + if (ntohs(ih->frag_off) & IP_OFFSET) { 88 + audit_log_format(ab, " frag=1"); 89 + return; 90 + } 91 + 92 + audit_proto(ab, skb, ih->protocol, ih->ihl * 4); 93 + } 94 + 95 + static void audit_ip6(struct audit_buffer *ab, struct sk_buff *skb) 96 + { 97 + struct ipv6hdr _ip6h; 98 + const struct ipv6hdr *ih; 99 + u8 nexthdr; 100 + int offset; 101 + 102 + ih = skb_header_pointer(skb, skb_network_offset(skb), sizeof(_ip6h), &_ip6h); 103 + if (!ih) { 104 + audit_log_format(ab, " truncated=1"); 105 + return; 106 + } 107 + 108 + nexthdr = ih->nexthdr; 109 + offset = ipv6_skip_exthdr(skb, skb_network_offset(skb) + sizeof(_ip6h), 110 + &nexthdr); 111 + 112 + audit_log_format(ab, " saddr=%pI6c daddr=%pI6c proto=%hhu", 113 + &ih->saddr, &ih->daddr, nexthdr); 114 + 115 + if (offset) 116 + audit_proto(ab, skb, nexthdr, offset); 117 + } 118 + 119 + static unsigned int 120 + audit_tg(struct sk_buff *skb, const struct xt_action_param *par) 121 + { 122 + const struct xt_audit_info *info = par->targinfo; 123 + struct audit_buffer *ab; 124 + 125 + ab = audit_log_start(NULL, GFP_ATOMIC, AUDIT_NETFILTER_PKT); 126 + if (ab == NULL) 127 + goto errout; 128 + 129 + audit_log_format(ab, "action=%hhu hook=%u len=%u inif=%s outif=%s", 130 + info->type, par->hooknum, skb->len, 131 + par->in ? par->in->name : "?", 132 + par->out ? par->out->name : "?"); 133 + 134 + if (skb->mark) 135 + audit_log_format(ab, " mark=%#x", skb->mark); 136 + 137 + if (skb->dev && skb->dev->type == ARPHRD_ETHER) { 138 + audit_log_format(ab, " smac=%pM dmac=%pM macproto=0x%04x", 139 + eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest, 140 + ntohs(eth_hdr(skb)->h_proto)); 141 + 142 + if (par->family == NFPROTO_BRIDGE) { 143 + switch (eth_hdr(skb)->h_proto) { 144 + case __constant_htons(ETH_P_IP): 145 + audit_ip4(ab, skb); 146 + break; 147 + 148 + case __constant_htons(ETH_P_IPV6): 149 + audit_ip6(ab, skb); 150 + break; 151 + } 152 + } 153 + } 154 + 155 + switch (par->family) { 156 + case NFPROTO_IPV4: 157 + audit_ip4(ab, skb); 158 + break; 159 + 160 + case NFPROTO_IPV6: 161 + audit_ip6(ab, skb); 162 + break; 163 + } 164 + 165 + audit_log_end(ab); 166 + 167 + errout: 168 + return XT_CONTINUE; 169 + } 170 + 171 + static int audit_tg_check(const struct xt_tgchk_param *par) 172 + { 173 + const struct xt_audit_info *info = par->targinfo; 174 + 175 + if (info->type > XT_AUDIT_TYPE_MAX) { 176 + pr_info("Audit type out of range (valid range: 0..%hhu)\n", 177 + XT_AUDIT_TYPE_MAX); 178 + return -ERANGE; 179 + } 180 + 181 + return 0; 182 + } 183 + 184 + static struct xt_target audit_tg_reg __read_mostly = { 185 + .name = "AUDIT", 186 + .family = NFPROTO_UNSPEC, 187 + .target = audit_tg, 188 + .targetsize = sizeof(struct xt_audit_info), 189 + .checkentry = audit_tg_check, 190 + .me = THIS_MODULE, 191 + }; 192 + 193 + static int __init audit_tg_init(void) 194 + { 195 + return xt_register_target(&audit_tg_reg); 196 + } 197 + 198 + static void __exit audit_tg_exit(void) 199 + { 200 + xt_unregister_target(&audit_tg_reg); 201 + } 202 + 203 + module_init(audit_tg_init); 204 + module_exit(audit_tg_exit);

+24 -12

net/netfilter/xt_CLASSIFY.c

··· 19 19 #include <linux/netfilter_ipv6.h> 20 20 #include <linux/netfilter/x_tables.h> 21 21 #include <linux/netfilter/xt_CLASSIFY.h> 22 + #include <linux/netfilter_arp.h> 22 23 23 24 MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); 24 25 MODULE_LICENSE("GPL"); 25 26 MODULE_DESCRIPTION("Xtables: Qdisc classification"); 26 27 MODULE_ALIAS("ipt_CLASSIFY"); 27 28 MODULE_ALIAS("ip6t_CLASSIFY"); 29 + MODULE_ALIAS("arpt_CLASSIFY"); 28 30 29 31 static unsigned int 30 32 classify_tg(struct sk_buff *skb, const struct xt_action_param *par) ··· 37 35 return XT_CONTINUE; 38 36 } 39 37 40 - static struct xt_target classify_tg_reg __read_mostly = { 41 - .name = "CLASSIFY", 42 - .revision = 0, 43 - .family = NFPROTO_UNSPEC, 44 - .table = "mangle", 45 - .hooks = (1 << NF_INET_LOCAL_OUT) | (1 << NF_INET_FORWARD) | 46 - (1 << NF_INET_POST_ROUTING), 47 - .target = classify_tg, 48 - .targetsize = sizeof(struct xt_classify_target_info), 49 - .me = THIS_MODULE, 38 + static struct xt_target classify_tg_reg[] __read_mostly = { 39 + { 40 + .name = "CLASSIFY", 41 + .revision = 0, 42 + .family = NFPROTO_UNSPEC, 43 + .hooks = (1 << NF_INET_LOCAL_OUT) | (1 << NF_INET_FORWARD) | 44 + (1 << NF_INET_POST_ROUTING), 45 + .target = classify_tg, 46 + .targetsize = sizeof(struct xt_classify_target_info), 47 + .me = THIS_MODULE, 48 + }, 49 + { 50 + .name = "CLASSIFY", 51 + .revision = 0, 52 + .family = NFPROTO_ARP, 53 + .hooks = (1 << NF_ARP_OUT) | (1 << NF_ARP_FORWARD), 54 + .target = classify_tg, 55 + .targetsize = sizeof(struct xt_classify_target_info), 56 + .me = THIS_MODULE, 57 + }, 50 58 }; 51 59 52 60 static int __init classify_tg_init(void) 53 61 { 54 - return xt_register_target(&classify_tg_reg); 62 + return xt_register_targets(classify_tg_reg, ARRAY_SIZE(classify_tg_reg)); 55 63 } 56 64 57 65 static void __exit classify_tg_exit(void) 58 66 { 59 - xt_unregister_target(&classify_tg_reg); 67 + xt_unregister_targets(classify_tg_reg, ARRAY_SIZE(classify_tg_reg)); 60 68 } 61 69 62 70 module_init(classify_tg_init);

+2

net/netfilter/xt_IDLETIMER.c

··· 313 313 MODULE_AUTHOR("Luciano Coelho <luciano.coelho@nokia.com>"); 314 314 MODULE_DESCRIPTION("Xtables: idle time monitor"); 315 315 MODULE_LICENSE("GPL v2"); 316 + MODULE_ALIAS("ipt_IDLETIMER"); 317 + MODULE_ALIAS("ip6t_IDLETIMER");

+2

net/netfilter/xt_LED.c

··· 31 31 MODULE_LICENSE("GPL"); 32 32 MODULE_AUTHOR("Adam Nielsen <a.nielsen@shikadi.net>"); 33 33 MODULE_DESCRIPTION("Xtables: trigger LED devices on packet match"); 34 + MODULE_ALIAS("ipt_LED"); 35 + MODULE_ALIAS("ip6t_LED"); 34 36 35 37 static LIST_HEAD(xt_led_triggers); 36 38 static DEFINE_MUTEX(xt_led_mutex);

+29 -5

net/netfilter/xt_NFQUEUE.c

··· 72 72 73 73 if (info->queues_total > 1) { 74 74 if (par->family == NFPROTO_IPV4) 75 - queue = hash_v4(skb) % info->queues_total + queue; 75 + queue = (((u64) hash_v4(skb) * info->queues_total) >> 76 + 32) + queue; 76 77 #if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) 77 78 else if (par->family == NFPROTO_IPV6) 78 - queue = hash_v6(skb) % info->queues_total + queue; 79 + queue = (((u64) hash_v6(skb) * info->queues_total) >> 80 + 32) + queue; 79 81 #endif 80 82 } 81 83 return NF_QUEUE_NR(queue); 82 84 } 83 85 84 - static int nfqueue_tg_v1_check(const struct xt_tgchk_param *par) 86 + static unsigned int 87 + nfqueue_tg_v2(struct sk_buff *skb, const struct xt_action_param *par) 85 88 { 86 - const struct xt_NFQ_info_v1 *info = par->targinfo; 89 + const struct xt_NFQ_info_v2 *info = par->targinfo; 90 + unsigned int ret = nfqueue_tg_v1(skb, par); 91 + 92 + if (info->bypass) 93 + ret |= NF_VERDICT_FLAG_QUEUE_BYPASS; 94 + return ret; 95 + } 96 + 97 + static int nfqueue_tg_check(const struct xt_tgchk_param *par) 98 + { 99 + const struct xt_NFQ_info_v2 *info = par->targinfo; 87 100 u32 maxid; 88 101 89 102 if (unlikely(!rnd_inited)) { ··· 113 100 info->queues_total, maxid); 114 101 return -ERANGE; 115 102 } 103 + if (par->target->revision == 2 && info->bypass > 1) 104 + return -EINVAL; 116 105 return 0; 117 106 } 118 107 ··· 130 115 .name = "NFQUEUE", 131 116 .revision = 1, 132 117 .family = NFPROTO_UNSPEC, 133 - .checkentry = nfqueue_tg_v1_check, 118 + .checkentry = nfqueue_tg_check, 134 119 .target = nfqueue_tg_v1, 135 120 .targetsize = sizeof(struct xt_NFQ_info_v1), 121 + .me = THIS_MODULE, 122 + }, 123 + { 124 + .name = "NFQUEUE", 125 + .revision = 2, 126 + .family = NFPROTO_UNSPEC, 127 + .checkentry = nfqueue_tg_check, 128 + .target = nfqueue_tg_v2, 129 + .targetsize = sizeof(struct xt_NFQ_info_v2), 136 130 .me = THIS_MODULE, 137 131 }, 138 132 };

+2 -4

net/netfilter/xt_connlimit.c

··· 204 204 &info->mask, par->family); 205 205 spin_unlock_bh(&info->data->lock); 206 206 207 - if (connections < 0) { 207 + if (connections < 0) 208 208 /* kmalloc failed, drop it entirely */ 209 - par->hotdrop = true; 210 - return false; 211 - } 209 + goto hotdrop; 212 210 213 211 return (connections > info->limit) ^ info->inverse; 214 212

+73 -2

net/netfilter/xt_conntrack.c

··· 112 112 return true; 113 113 } 114 114 115 + static inline bool 116 + port_match(u16 min, u16 max, u16 port, bool invert) 117 + { 118 + return (port >= min && port <= max) ^ invert; 119 + } 120 + 121 + static inline bool 122 + ct_proto_port_check_v3(const struct xt_conntrack_mtinfo3 *info, 123 + const struct nf_conn *ct) 124 + { 125 + const struct nf_conntrack_tuple *tuple; 126 + 127 + tuple = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; 128 + if ((info->match_flags & XT_CONNTRACK_PROTO) && 129 + (nf_ct_protonum(ct) == info->l4proto) ^ 130 + !(info->invert_flags & XT_CONNTRACK_PROTO)) 131 + return false; 132 + 133 + /* Shortcut to match all recognized protocols by using ->src.all. */ 134 + if ((info->match_flags & XT_CONNTRACK_ORIGSRC_PORT) && 135 + !port_match(info->origsrc_port, info->origsrc_port_high, 136 + ntohs(tuple->src.u.all), 137 + info->invert_flags & XT_CONNTRACK_ORIGSRC_PORT)) 138 + return false; 139 + 140 + if ((info->match_flags & XT_CONNTRACK_ORIGDST_PORT) && 141 + !port_match(info->origdst_port, info->origdst_port_high, 142 + ntohs(tuple->dst.u.all), 143 + info->invert_flags & XT_CONNTRACK_ORIGDST_PORT)) 144 + return false; 145 + 146 + tuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple; 147 + 148 + if ((info->match_flags & XT_CONNTRACK_REPLSRC_PORT) && 149 + !port_match(info->replsrc_port, info->replsrc_port_high, 150 + ntohs(tuple->src.u.all), 151 + info->invert_flags & XT_CONNTRACK_REPLSRC_PORT)) 152 + return false; 153 + 154 + if ((info->match_flags & XT_CONNTRACK_REPLDST_PORT) && 155 + !port_match(info->repldst_port, info->repldst_port_high, 156 + ntohs(tuple->dst.u.all), 157 + info->invert_flags & XT_CONNTRACK_REPLDST_PORT)) 158 + return false; 159 + 160 + return true; 161 + } 162 + 115 163 static bool 116 164 conntrack_mt(const struct sk_buff *skb, struct xt_action_param *par, 117 165 u16 state_mask, u16 status_mask) ··· 218 170 !(info->invert_flags & XT_CONNTRACK_REPLDST)) 219 171 return false; 220 172 221 - if (!ct_proto_port_check(info, ct)) 222 - return false; 173 + if (par->match->revision != 3) { 174 + if (!ct_proto_port_check(info, ct)) 175 + return false; 176 + } else { 177 + if (!ct_proto_port_check_v3(par->matchinfo, ct)) 178 + return false; 179 + } 223 180 224 181 if ((info->match_flags & XT_CONNTRACK_STATUS) && 225 182 (!!(status_mask & ct->status) ^ ··· 256 203 conntrack_mt_v2(const struct sk_buff *skb, struct xt_action_param *par) 257 204 { 258 205 const struct xt_conntrack_mtinfo2 *info = par->matchinfo; 206 + 207 + return conntrack_mt(skb, par, info->state_mask, info->status_mask); 208 + } 209 + 210 + static bool 211 + conntrack_mt_v3(const struct sk_buff *skb, struct xt_action_param *par) 212 + { 213 + const struct xt_conntrack_mtinfo3 *info = par->matchinfo; 259 214 260 215 return conntrack_mt(skb, par, info->state_mask, info->status_mask); 261 216 } ··· 301 240 .family = NFPROTO_UNSPEC, 302 241 .matchsize = sizeof(struct xt_conntrack_mtinfo2), 303 242 .match = conntrack_mt_v2, 243 + .checkentry = conntrack_mt_check, 244 + .destroy = conntrack_mt_destroy, 245 + .me = THIS_MODULE, 246 + }, 247 + { 248 + .name = "conntrack", 249 + .revision = 3, 250 + .family = NFPROTO_UNSPEC, 251 + .matchsize = sizeof(struct xt_conntrack_mtinfo3), 252 + .match = conntrack_mt_v3, 304 253 .checkentry = conntrack_mt_check, 305 254 .destroy = conntrack_mt_destroy, 306 255 .me = THIS_MODULE,

+2

net/netfilter/xt_cpu.c

··· 22 22 MODULE_LICENSE("GPL"); 23 23 MODULE_AUTHOR("Eric Dumazet <eric.dumazet@gmail.com>"); 24 24 MODULE_DESCRIPTION("Xtables: CPU match"); 25 + MODULE_ALIAS("ipt_cpu"); 26 + MODULE_ALIAS("ip6t_cpu"); 25 27 26 28 static int cpu_mt_check(const struct xt_mtchk_param *par) 27 29 {

+1 -1

net/netfilter/xt_ipvs.c

··· 85 85 /* 86 86 * Check if the packet belongs to an existing entry 87 87 */ 88 - cp = pp->conn_out_get(family, skb, pp, &iph, iph.len, 1 /* inverse */); 88 + cp = pp->conn_out_get(family, skb, &iph, iph.len, 1 /* inverse */); 89 89 if (unlikely(cp == NULL)) { 90 90 match = false; 91 91 goto out;

+1 -4

net/sched/Kconfig

··· 255 255 256 256 config NET_CLS_ROUTE4 257 257 tristate "Routing decision (ROUTE)" 258 - select NET_CLS_ROUTE 258 + select IP_ROUTE_CLASSID 259 259 select NET_CLS 260 260 ---help--- 261 261 If you say Y here, you will be able to classify packets ··· 263 263 264 264 To compile this code as a module, choose M here: the 265 265 module will be called cls_route. 266 - 267 - config NET_CLS_ROUTE 268 - bool 269 266 270 267 config NET_CLS_FW 271 268 tristate "Netfilter mark (FW)"

+1 -1

net/sched/cls_flow.c

··· 276 276 277 277 static u32 flow_get_rtclassid(const struct sk_buff *skb) 278 278 { 279 - #ifdef CONFIG_NET_CLS_ROUTE 279 + #ifdef CONFIG_IP_ROUTE_CLASSID 280 280 if (skb_dst(skb)) 281 281 return skb_dst(skb)->tclassid; 282 282 #endif

+1 -1

net/sched/em_meta.c

··· 252 252 if (unlikely(skb_dst(skb) == NULL)) 253 253 *err = -1; 254 254 else 255 - #ifdef CONFIG_NET_CLS_ROUTE 255 + #ifdef CONFIG_IP_ROUTE_CLASSID 256 256 dst->value = skb_dst(skb)->tclassid; 257 257 #else 258 258 dst->value = 0;