···103103#define AUDIT_BPRM_FCAPS 1321 /* Information about fcaps increasing perms */104104#define AUDIT_CAPSET 1322 /* Record showing argument to sys_capset */105105#define AUDIT_MMAP 1323 /* Record showing descriptor and flags in mmap */106106+#define AUDIT_NETFILTER_PKT 1324 /* Packets traversing netfilter chains */107107+#define AUDIT_NETFILTER_CFG 1325 /* Netfilter chain modifications */106108107109#define AUDIT_AVC 1400 /* SE Linux avc denial or grant */108110#define AUDIT_SELINUX_ERR 1401 /* Internal SE Linux Errors */
+8
include/linux/ip_vs.h
···8989#define IP_VS_CONN_F_TEMPLATE 0x1000 /* template, not connection */9090#define IP_VS_CONN_F_ONE_PACKET 0x2000 /* forward only one packet */91919292+#define IP_VS_CONN_F_BACKUP_MASK (IP_VS_CONN_F_FWD_MASK | \9393+ IP_VS_CONN_F_NOOUTPUT | \9494+ IP_VS_CONN_F_INACTIVE | \9595+ IP_VS_CONN_F_SEQ_MASK | \9696+ IP_VS_CONN_F_NO_CPORT | \9797+ IP_VS_CONN_F_TEMPLATE \9898+ )9999+92100/* Flags that are not sent to backup server start from bit 16 */93101#define IP_VS_CONN_F_NFCT (1 << 16) /* use netfilter conntrack */94102
+19-8
include/linux/netfilter.h
···2424#define NF_MAX_VERDICT NF_STOP25252626/* we overload the higher bits for encoding auxiliary data such as the queue2727- * number. Not nice, but better than additional function arguments. */2828-#define NF_VERDICT_MASK 0x0000ffff2929-#define NF_VERDICT_BITS 162727+ * number or errno values. Not nice, but better than additional function2828+ * arguments. */2929+#define NF_VERDICT_MASK 0x000000ff30303131+/* extra verdict flags have mask 0x0000ff00 */3232+#define NF_VERDICT_FLAG_QUEUE_BYPASS 0x000080003333+3434+/* queue number (NF_QUEUE) or errno (NF_DROP) */3135#define NF_VERDICT_QMASK 0xffff00003236#define NF_VERDICT_QBITS 1633373434-#define NF_QUEUE_NR(x) ((((x) << NF_VERDICT_BITS) & NF_VERDICT_QMASK) | NF_QUEUE)3838+#define NF_QUEUE_NR(x) ((((x) << 16) & NF_VERDICT_QMASK) | NF_QUEUE)35393636-#define NF_DROP_ERR(x) (((-x) << NF_VERDICT_BITS) | NF_DROP)4040+#define NF_DROP_ERR(x) (((-x) << 16) | NF_DROP)37413842/* only for userspace compatibility */3943#ifndef __KERNEL__···4541 <= 0x2000 is used for protocol-flags. */4642#define NFC_UNKNOWN 0x40004743#define NFC_ALTERED 0x80004444+4545+/* NF_VERDICT_BITS should be 8 now, but userspace might break if this changes */4646+#define NF_VERDICT_BITS 164847#endif49485049enum nf_inet_hooks {···79728073#ifdef __KERNEL__8174#ifdef CONFIG_NETFILTER7575+static inline int NF_DROP_GETERR(int verdict)7676+{7777+ return -(verdict >> NF_VERDICT_QBITS);7878+}82798380static inline int nf_inet_addr_cmp(const union nf_inet_addr *a1,8481 const union nf_inet_addr *a2)···278267 int route_key_size;279268};280269281281-extern const struct nf_afinfo *nf_afinfo[NFPROTO_NUMPROTO];270270+extern const struct nf_afinfo __rcu *nf_afinfo[NFPROTO_NUMPROTO];282271static inline const struct nf_afinfo *nf_get_afinfo(unsigned short family)283272{284273 return rcu_dereference(nf_afinfo[family]);···368357#endif /*CONFIG_NETFILTER*/369358370359#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)371371-extern void (*ip_ct_attach)(struct sk_buff *, struct sk_buff *);360360+extern void (*ip_ct_attach)(struct sk_buff *, struct sk_buff *) __rcu;372361extern void nf_ct_attach(struct sk_buff *, struct sk_buff *);373373-extern void (*nf_ct_destroy)(struct nf_conntrack *);362362+extern void (*nf_ct_destroy)(struct nf_conntrack *) __rcu;374363#else375364static inline void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb) {}376365#endif
···611611extern void xt_compat_lock(u_int8_t af);612612extern void xt_compat_unlock(u_int8_t af);613613614614-extern int xt_compat_add_offset(u_int8_t af, unsigned int offset, short delta);614614+extern int xt_compat_add_offset(u_int8_t af, unsigned int offset, int delta);615615extern void xt_compat_flush_offsets(u_int8_t af);616616+extern void xt_compat_init_offsets(u_int8_t af, unsigned int number);616617extern int xt_compat_calc_jump(u_int8_t af, unsigned int offset);617618618619extern int xt_compat_match_offset(const struct xt_match *match);
+30
include/linux/netfilter/xt_AUDIT.h
···11+/*22+ * Header file for iptables xt_AUDIT target33+ *44+ * (C) 2010-2011 Thomas Graf <tgraf@redhat.com>55+ * (C) 2010-2011 Red Hat, Inc.66+ *77+ * This program is free software; you can redistribute it and/or modify88+ * it under the terms of the GNU General Public License version 2 as99+ * published by the Free Software Foundation.1010+ */1111+1212+#ifndef _XT_AUDIT_TARGET_H1313+#define _XT_AUDIT_TARGET_H1414+1515+#include <linux/types.h>1616+1717+enum {1818+ XT_AUDIT_TYPE_ACCEPT = 0,1919+ XT_AUDIT_TYPE_DROP,2020+ XT_AUDIT_TYPE_REJECT,2121+ __XT_AUDIT_TYPE_MAX,2222+};2323+2424+#define XT_AUDIT_TYPE_MAX (__XT_AUDIT_TYPE_MAX - 1)2525+2626+struct xt_audit_info {2727+ __u8 type; /* XT_AUDIT_TYPE_* */2828+};2929+3030+#endif /* _XT_AUDIT_TARGET_H */
···55 * redirection. We can get rid of that whenever we get support for66 * mutliple targets in the same rule. */77struct xt_tproxy_target_info {88- u_int32_t mark_mask;99- u_int32_t mark_value;88+ __u32 mark_mask;99+ __u32 mark_value;1010 __be32 laddr;1111 __be16 lport;1212};13131414struct xt_tproxy_target_info_v1 {1515- u_int32_t mark_mask;1616- u_int32_t mark_value;1515+ __u32 mark_mask;1616+ __u32 mark_value;1717 union nf_inet_addr laddr;1818 __be16 lport;1919};
···1010 seconds, or one every 59 hours. */11111212struct ebt_limit_info {1313- u_int32_t avg; /* Average secs between packets * scale */1414- u_int32_t burst; /* Period multiplier for upper limit. */1313+ __u32 avg; /* Average secs between packets * scale */1414+ __u32 burst; /* Period multiplier for upper limit. */15151616 /* Used internally by the kernel */1717 unsigned long prev;1818- u_int32_t credit;1919- u_int32_t credit_cap, cost;1818+ __u32 credit;1919+ __u32 credit_cap, cost;2020};21212222#endif
···2121 int sysctl_events;2222 unsigned int sysctl_events_retry_timeout;2323 int sysctl_acct;2424+ int sysctl_tstamp;2425 int sysctl_checksum;2526 unsigned int sysctl_log_invalid; /* Log invalid packets */2627#ifdef CONFIG_SYSCTL2728 struct ctl_table_header *sysctl_header;2829 struct ctl_table_header *acct_sysctl_header;3030+ struct ctl_table_header *tstamp_sysctl_header;2931 struct ctl_table_header *event_sysctl_header;3032#endif3131- int hash_vmalloc;3232- int expect_vmalloc;3333 char *slabname;3434};3535#endif
+143
include/net/netns/ip_vs.h
···11+/*22+ * IP Virtual Server33+ * Data structure for network namspace44+ *55+ */66+77+#ifndef IP_VS_H_88+#define IP_VS_H_99+1010+#include <linux/list.h>1111+#include <linux/mutex.h>1212+#include <linux/list_nulls.h>1313+#include <linux/ip_vs.h>1414+#include <asm/atomic.h>1515+#include <linux/in.h>1616+1717+struct ip_vs_stats;1818+struct ip_vs_sync_buff;1919+struct ctl_table_header;2020+2121+struct netns_ipvs {2222+ int gen; /* Generation */2323+ /*2424+ * Hash table: for real service lookups2525+ */2626+ #define IP_VS_RTAB_BITS 42727+ #define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS)2828+ #define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1)2929+3030+ struct list_head rs_table[IP_VS_RTAB_SIZE];3131+ /* ip_vs_app */3232+ struct list_head app_list;3333+ struct mutex app_mutex;3434+ struct lock_class_key app_key; /* mutex debuging */3535+3636+ /* ip_vs_proto */3737+ #define IP_VS_PROTO_TAB_SIZE 32 /* must be power of 2 */3838+ struct ip_vs_proto_data *proto_data_table[IP_VS_PROTO_TAB_SIZE];3939+ /* ip_vs_proto_tcp */4040+#ifdef CONFIG_IP_VS_PROTO_TCP4141+ #define TCP_APP_TAB_BITS 44242+ #define TCP_APP_TAB_SIZE (1 << TCP_APP_TAB_BITS)4343+ #define TCP_APP_TAB_MASK (TCP_APP_TAB_SIZE - 1)4444+ struct list_head tcp_apps[TCP_APP_TAB_SIZE];4545+ spinlock_t tcp_app_lock;4646+#endif4747+ /* ip_vs_proto_udp */4848+#ifdef CONFIG_IP_VS_PROTO_UDP4949+ #define UDP_APP_TAB_BITS 45050+ #define UDP_APP_TAB_SIZE (1 << UDP_APP_TAB_BITS)5151+ #define UDP_APP_TAB_MASK (UDP_APP_TAB_SIZE - 1)5252+ struct list_head udp_apps[UDP_APP_TAB_SIZE];5353+ spinlock_t udp_app_lock;5454+#endif5555+ /* ip_vs_proto_sctp */5656+#ifdef CONFIG_IP_VS_PROTO_SCTP5757+ #define SCTP_APP_TAB_BITS 45858+ #define SCTP_APP_TAB_SIZE (1 << SCTP_APP_TAB_BITS)5959+ #define SCTP_APP_TAB_MASK (SCTP_APP_TAB_SIZE - 1)6060+ /* Hash table for SCTP application incarnations */6161+ struct list_head sctp_apps[SCTP_APP_TAB_SIZE];6262+ spinlock_t sctp_app_lock;6363+#endif6464+ /* ip_vs_conn */6565+ atomic_t conn_count; /* connection counter */6666+6767+ /* ip_vs_ctl */6868+ struct ip_vs_stats *tot_stats; /* Statistics & est. */6969+ struct ip_vs_cpu_stats __percpu *cpustats; /* Stats per cpu */7070+ seqcount_t *ustats_seq; /* u64 read retry */7171+7272+ int num_services; /* no of virtual services */7373+ /* 1/rate drop and drop-entry variables */7474+ struct delayed_work defense_work; /* Work handler */7575+ int drop_rate;7676+ int drop_counter;7777+ atomic_t dropentry;7878+ /* locks in ctl.c */7979+ spinlock_t dropentry_lock; /* drop entry handling */8080+ spinlock_t droppacket_lock; /* drop packet handling */8181+ spinlock_t securetcp_lock; /* state and timeout tables */8282+ rwlock_t rs_lock; /* real services table */8383+ /* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */8484+ struct lock_class_key ctl_key; /* ctl_mutex debuging */8585+ /* Trash for destinations */8686+ struct list_head dest_trash;8787+ /* Service counters */8888+ atomic_t ftpsvc_counter;8989+ atomic_t nullsvc_counter;9090+9191+ /* sys-ctl struct */9292+ struct ctl_table_header *sysctl_hdr;9393+ struct ctl_table *sysctl_tbl;9494+ /* sysctl variables */9595+ int sysctl_amemthresh;9696+ int sysctl_am_droprate;9797+ int sysctl_drop_entry;9898+ int sysctl_drop_packet;9999+ int sysctl_secure_tcp;100100+#ifdef CONFIG_IP_VS_NFCT101101+ int sysctl_conntrack;102102+#endif103103+ int sysctl_snat_reroute;104104+ int sysctl_sync_ver;105105+ int sysctl_cache_bypass;106106+ int sysctl_expire_nodest_conn;107107+ int sysctl_expire_quiescent_template;108108+ int sysctl_sync_threshold[2];109109+ int sysctl_nat_icmp_send;110110+111111+ /* ip_vs_lblc */112112+ int sysctl_lblc_expiration;113113+ struct ctl_table_header *lblc_ctl_header;114114+ struct ctl_table *lblc_ctl_table;115115+ /* ip_vs_lblcr */116116+ int sysctl_lblcr_expiration;117117+ struct ctl_table_header *lblcr_ctl_header;118118+ struct ctl_table *lblcr_ctl_table;119119+ /* ip_vs_est */120120+ struct list_head est_list; /* estimator list */121121+ spinlock_t est_lock;122122+ struct timer_list est_timer; /* Estimation timer */123123+ /* ip_vs_sync */124124+ struct list_head sync_queue;125125+ spinlock_t sync_lock;126126+ struct ip_vs_sync_buff *sync_buff;127127+ spinlock_t sync_buff_lock;128128+ struct sockaddr_in sync_mcast_addr;129129+ struct task_struct *master_thread;130130+ struct task_struct *backup_thread;131131+ int send_mesg_maxlen;132132+ int recv_mesg_maxlen;133133+ volatile int sync_state;134134+ volatile int master_syncid;135135+ volatile int backup_syncid;136136+ /* multicast interface name */137137+ char master_mcast_ifn[IP_VS_IFNAME_MAXLEN];138138+ char backup_mcast_ifn[IP_VS_IFNAME_MAXLEN];139139+ /* net name space ptr */140140+ struct net *net; /* Needed by timer routines */141141+};142142+143143+#endif /* IP_VS_H_ */
-1
include/net/netns/ipv4.h
···4343 struct xt_table *nat_table;4444 struct hlist_head *nat_bysource;4545 unsigned int nat_htable_size;4646- int nat_vmalloced;4746#endif48474948 int sysctl_icmp_echo_ignore_all;
+2
kernel/audit.c
···7474int audit_enabled;7575int audit_ever_enabled;76767777+EXPORT_SYMBOL_GPL(audit_enabled);7878+7779/* Default state when kernel boots without any parameters. */7880static int audit_default;7981
···140140 handled by the klogd daemon which is responsible for kernel messages141141 ("man klogd").142142143143+config IP_ROUTE_CLASSID144144+ bool145145+143146config IP_PNP144147 bool "IP: kernel level autoconfiguration"145148 help···660657 on the Internet.661658662659 If unsure, say N.663663-
···300300 * that the ->target() function isn't called after ->destroy() */301301302302 ct = nf_ct_get(skb, &ctinfo);303303- if (ct == NULL) {304304- pr_info("no conntrack!\n");305305- /* FIXME: need to drop invalid ones, since replies306306- * to outgoing connections of other nodes will be307307- * marked as INVALID */303303+ if (ct == NULL)308304 return NF_DROP;309309- }310305311306 /* special case: ICMP error handling. conntrack distinguishes between312307 * error messages (RELATED) and information requests (see below) */
+1-2
net/ipv4/netfilter/ipt_LOG.c
···442442 }443443#endif444444445445- /* MAC logging for input path only. */446446- if (in && !out)445445+ if (in != NULL)447446 dump_mac_header(m, loginfo, skb);448447449448 dump_packet(m, loginfo, skb, 0);
···2020#include <net/netfilter/nf_conntrack_l4proto.h>2121#include <net/netfilter/nf_conntrack_expect.h>2222#include <net/netfilter/nf_conntrack_acct.h>2323+#include <linux/rculist_nulls.h>23242425struct ct_iter_state {2526 struct seq_net_private p;···3635 for (st->bucket = 0;3736 st->bucket < net->ct.htable_size;3837 st->bucket++) {3939- n = rcu_dereference(net->ct.hash[st->bucket].first);3838+ n = rcu_dereference(3939+ hlist_nulls_first_rcu(&net->ct.hash[st->bucket]));4040 if (!is_a_nulls(n))4141 return n;4242 }···5048 struct net *net = seq_file_net(seq);5149 struct ct_iter_state *st = seq->private;52505353- head = rcu_dereference(head->next);5151+ head = rcu_dereference(hlist_nulls_next_rcu(head));5452 while (is_a_nulls(head)) {5553 if (likely(get_nulls_value(head) == st->bucket)) {5654 if (++st->bucket >= net->ct.htable_size)5755 return NULL;5856 }5959- head = rcu_dereference(net->ct.hash[st->bucket].first);5757+ head = rcu_dereference(5858+ hlist_nulls_first_rcu(&net->ct.hash[st->bucket]));6059 }6160 return head;6261}···220217 struct hlist_node *n;221218222219 for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) {223223- n = rcu_dereference(net->ct.expect_hash[st->bucket].first);220220+ n = rcu_dereference(221221+ hlist_first_rcu(&net->ct.expect_hash[st->bucket]));224222 if (n)225223 return n;226224 }···234230 struct net *net = seq_file_net(seq);235231 struct ct_expect_iter_state *st = seq->private;236232237237- head = rcu_dereference(head->next);233233+ head = rcu_dereference(hlist_next_rcu(head));238234 while (head == NULL) {239235 if (++st->bucket >= nf_ct_expect_hsize)240236 return NULL;241241- head = rcu_dereference(net->ct.expect_hash[st->bucket].first);237237+ head = rcu_dereference(238238+ hlist_first_rcu(&net->ct.expect_hash[st->bucket]));242239 }243240 return head;244241}
+4-4
net/ipv4/netfilter/nf_nat_amanda.c
···44444545 /* Try to get same port: if not, try to change it. */4646 for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) {4747- int ret;4747+ int res;48484949 exp->tuple.dst.u.tcp.port = htons(port);5050- ret = nf_ct_expect_related(exp);5151- if (ret == 0)5050+ res = nf_ct_expect_related(exp);5151+ if (res == 0)5252 break;5353- else if (ret != -EBUSY) {5353+ else if (res != -EBUSY) {5454 port = 0;5555 break;5656 }
+8-7
net/ipv4/netfilter/nf_nat_core.c
···323323324324 /* It's done. */325325 if (maniptype == IP_NAT_MANIP_DST)326326- set_bit(IPS_DST_NAT_DONE_BIT, &ct->status);326326+ ct->status |= IPS_DST_NAT_DONE;327327 else328328- set_bit(IPS_SRC_NAT_DONE_BIT, &ct->status);328328+ ct->status |= IPS_SRC_NAT_DONE;329329330330 return NF_ACCEPT;331331}···502502 int ret = 0;503503504504 spin_lock_bh(&nf_nat_lock);505505- if (nf_nat_protos[proto->protonum] != &nf_nat_unknown_protocol) {505505+ if (rcu_dereference_protected(506506+ nf_nat_protos[proto->protonum],507507+ lockdep_is_held(&nf_nat_lock)508508+ ) != &nf_nat_unknown_protocol) {506509 ret = -EBUSY;507510 goto out;508511 }···682679{683680 /* Leave them the same for the moment. */684681 net->ipv4.nat_htable_size = net->ct.htable_size;685685- net->ipv4.nat_bysource = nf_ct_alloc_hashtable(&net->ipv4.nat_htable_size,686686- &net->ipv4.nat_vmalloced, 0);682682+ net->ipv4.nat_bysource = nf_ct_alloc_hashtable(&net->ipv4.nat_htable_size, 0);687683 if (!net->ipv4.nat_bysource)688684 return -ENOMEM;689685 return 0;···704702{705703 nf_ct_iterate_cleanup(net, &clean_nat, NULL);706704 synchronize_rcu();707707- nf_ct_free_hashtable(net->ipv4.nat_bysource, net->ipv4.nat_vmalloced,708708- net->ipv4.nat_htable_size);705705+ nf_ct_free_hashtable(net->ipv4.nat_bysource, net->ipv4.nat_htable_size);709706}710707711708static struct pernet_operations nf_nat_net_ops = {
+5-4
net/ipv4/netfilter/nf_nat_snmp_basic.c
···5454#include <net/netfilter/nf_conntrack_expect.h>5555#include <net/netfilter/nf_conntrack_helper.h>5656#include <net/netfilter/nf_nat_helper.h>5757+#include <linux/netfilter/nf_conntrack_snmp.h>57585859MODULE_LICENSE("GPL");5960MODULE_AUTHOR("James Morris <jmorris@intercode.com.au>");···13111310{13121311 int ret = 0;1313131213141314- ret = nf_conntrack_helper_register(&snmp_helper);13151315- if (ret < 0)13161316- return ret;13131313+ BUG_ON(nf_nat_snmp_hook != NULL);13141314+ rcu_assign_pointer(nf_nat_snmp_hook, help);13151315+13171316 ret = nf_conntrack_helper_register(&snmp_trap_helper);13181317 if (ret < 0) {13191318 nf_conntrack_helper_unregister(&snmp_helper);···1324132313251324static void __exit nf_nat_snmp_basic_fini(void)13261325{13271327- nf_conntrack_helper_unregister(&snmp_helper);13261326+ rcu_assign_pointer(nf_nat_snmp_hook, NULL);13281327 nf_conntrack_helper_unregister(&snmp_trap_helper);13291328}13301329
···452452 in ? in->name : "",453453 out ? out->name : "");454454455455- /* MAC logging for input path only. */456456- if (in && !out)455455+ if (in != NULL)457456 dump_mac_header(m, loginfo, skb);458457459458 dump_packet(m, loginfo, skb, skb_network_offset(skb), 1);
···85858686 If unsure, say `N'.87878888+config NF_CONNTRACK_TIMESTAMP8989+ bool 'Connection tracking timestamping'9090+ depends on NETFILTER_ADVANCED9191+ help9292+ This option enables support for connection tracking timestamping.9393+ This allows you to store the flow start-time and to obtain9494+ the flow-stop time (once it has been destroyed) via Connection9595+ tracking events.9696+9797+ If unsure, say `N'.9898+8899config NF_CT_PROTO_DCCP89100 tristate 'DCCP protocol connection tracking support (EXPERIMENTAL)'90101 depends on EXPERIMENTAL···196185197186 To compile it as a module, choose M here. If unsure, say N.198187188188+config NF_CONNTRACK_BROADCAST189189+ tristate190190+199191config NF_CONNTRACK_NETBIOS_NS200192 tristate "NetBIOS name service protocol support"201193 depends on NETFILTER_ADVANCED194194+ select NF_CONNTRACK_BROADCAST202195 help203196 NetBIOS name service requests are sent as broadcast messages from an204197 unprivileged port and responded to with unicast messages to the···216201 $ ip -4 address show eth0217202 4: eth0: <BROADCAST,MULTICAST,UP> mtu 1500 qdisc pfifo_fast qlen 1000218203 inet 172.16.2.252/24 brd 172.16.2.255 scope global eth0204204+205205+ To compile it as a module, choose M here. If unsure, say N.206206+207207+config NF_CONNTRACK_SNMP208208+ tristate "SNMP service protocol support"209209+ depends on NETFILTER_ADVANCED210210+ select NF_CONNTRACK_BROADCAST211211+ help212212+ SNMP service requests are sent as broadcast messages from an213213+ unprivileged port and responded to with unicast messages to the214214+ same port. This make them hard to firewall properly because connection215215+ tracking doesn't deal with broadcasts. This helper tracks locally216216+ originating SNMP service requests and the corresponding217217+ responses. It relies on correct IP address configuration, specifically218218+ netmask and broadcast address.219219220220 To compile it as a module, choose M here. If unsure, say N.221221···355325# alphabetically ordered list of targets356326357327comment "Xtables targets"328328+329329+config NETFILTER_XT_TARGET_AUDIT330330+ tristate "AUDIT target support"331331+ depends on AUDIT332332+ depends on NETFILTER_ADVANCED333333+ ---help---334334+ This option adds a 'AUDIT' target, which can be used to create335335+ audit records for packets dropped/accepted.336336+337337+ To compileit as a module, choose M here. If unsure, say N.358338359339config NETFILTER_XT_TARGET_CHECKSUM360340 tristate "CHECKSUM target support"···517477config NETFILTER_XT_TARGET_NFQUEUE518478 tristate '"NFQUEUE" target Support'519479 depends on NETFILTER_ADVANCED480480+ select NETFILTER_NETLINK_QUEUE520481 help521482 This target replaced the old obsolete QUEUE target.522483···927886config NETFILTER_XT_MATCH_REALM928887 tristate '"realm" match support'929888 depends on NETFILTER_ADVANCED930930- select NET_CLS_ROUTE889889+ select IP_ROUTE_CLASSID931890 help932891 This option adds a `realm' match, which allows you to use the realm933892 key from the routing subsystem inside iptables.
···175175 ret = 1;176176 } else if ((verdict & NF_VERDICT_MASK) == NF_DROP) {177177 kfree_skb(skb);178178- ret = -(verdict >> NF_VERDICT_BITS);178178+ ret = NF_DROP_GETERR(verdict);179179 if (ret == 0)180180 ret = -EPERM;181181 } else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE) {182182- if (!nf_queue(skb, elem, pf, hook, indev, outdev, okfn,183183- verdict >> NF_VERDICT_BITS))184184- goto next_hook;182182+ ret = nf_queue(skb, elem, pf, hook, indev, outdev, okfn,183183+ verdict >> NF_VERDICT_QBITS);184184+ if (ret < 0) {185185+ if (ret == -ECANCELED)186186+ goto next_hook;187187+ if (ret == -ESRCH &&188188+ (verdict & NF_VERDICT_FLAG_QUEUE_BYPASS))189189+ goto next_hook;190190+ kfree_skb(skb);191191+ }192192+ ret = 0;185193 }186194 rcu_read_unlock();187195 return ret;···222214/* This does not belong here, but locally generated errors need it if connection223215 tracking in use: without this, connection may not be in hash table, and hence224216 manufactured ICMP or RST packets will not be associated with it. */225225-void (*ip_ct_attach)(struct sk_buff *, struct sk_buff *);217217+void (*ip_ct_attach)(struct sk_buff *, struct sk_buff *) __rcu __read_mostly;226218EXPORT_SYMBOL(ip_ct_attach);227219228220void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb)···239231}240232EXPORT_SYMBOL(nf_ct_attach);241233242242-void (*nf_ct_destroy)(struct nf_conntrack *);234234+void (*nf_ct_destroy)(struct nf_conntrack *) __rcu __read_mostly;243235EXPORT_SYMBOL(nf_ct_destroy);244236245237void nf_conntrack_destroy(struct nf_conntrack *nfct)
+64-34
net/netfilter/ipvs/ip_vs_app.c
···4343EXPORT_SYMBOL(unregister_ip_vs_app);4444EXPORT_SYMBOL(register_ip_vs_app_inc);45454646-/* ipvs application list head */4747-static LIST_HEAD(ip_vs_app_list);4848-static DEFINE_MUTEX(__ip_vs_app_mutex);4949-5050-5146/*5247 * Get an ip_vs_app object5348 */···6267 * Allocate/initialize app incarnation and register it in proto apps.6368 */6469static int6565-ip_vs_app_inc_new(struct ip_vs_app *app, __u16 proto, __u16 port)7070+ip_vs_app_inc_new(struct net *net, struct ip_vs_app *app, __u16 proto,7171+ __u16 port)6672{6773 struct ip_vs_protocol *pp;6874 struct ip_vs_app *inc;···9498 }9599 }961009797- ret = pp->register_app(inc);101101+ ret = pp->register_app(net, inc);98102 if (ret)99103 goto out;100104···115119 * Release app incarnation116120 */117121static void118118-ip_vs_app_inc_release(struct ip_vs_app *inc)122122+ip_vs_app_inc_release(struct net *net, struct ip_vs_app *inc)119123{120124 struct ip_vs_protocol *pp;121125···123127 return;124128125129 if (pp->unregister_app)126126- pp->unregister_app(inc);130130+ pp->unregister_app(net, inc);127131128132 IP_VS_DBG(9, "%s App %s:%u unregistered\n",129133 pp->name, inc->name, ntohs(inc->port));···164168 * Register an application incarnation in protocol applications165169 */166170int167167-register_ip_vs_app_inc(struct ip_vs_app *app, __u16 proto, __u16 port)171171+register_ip_vs_app_inc(struct net *net, struct ip_vs_app *app, __u16 proto,172172+ __u16 port)168173{174174+ struct netns_ipvs *ipvs = net_ipvs(net);169175 int result;170176171171- mutex_lock(&__ip_vs_app_mutex);177177+ mutex_lock(&ipvs->app_mutex);172178173173- result = ip_vs_app_inc_new(app, proto, port);179179+ result = ip_vs_app_inc_new(net, app, proto, port);174180175175- mutex_unlock(&__ip_vs_app_mutex);181181+ mutex_unlock(&ipvs->app_mutex);176182177183 return result;178184}···183185/*184186 * ip_vs_app registration routine185187 */186186-int register_ip_vs_app(struct ip_vs_app *app)188188+int register_ip_vs_app(struct net *net, struct ip_vs_app *app)187189{190190+ struct netns_ipvs *ipvs = net_ipvs(net);188191 /* increase the module use count */189192 ip_vs_use_count_inc();190193191191- mutex_lock(&__ip_vs_app_mutex);194194+ mutex_lock(&ipvs->app_mutex);192195193193- list_add(&app->a_list, &ip_vs_app_list);196196+ list_add(&app->a_list, &ipvs->app_list);194197195195- mutex_unlock(&__ip_vs_app_mutex);198198+ mutex_unlock(&ipvs->app_mutex);196199197200 return 0;198201}···203204 * ip_vs_app unregistration routine204205 * We are sure there are no app incarnations attached to services205206 */206206-void unregister_ip_vs_app(struct ip_vs_app *app)207207+void unregister_ip_vs_app(struct net *net, struct ip_vs_app *app)207208{209209+ struct netns_ipvs *ipvs = net_ipvs(net);208210 struct ip_vs_app *inc, *nxt;209211210210- mutex_lock(&__ip_vs_app_mutex);212212+ mutex_lock(&ipvs->app_mutex);211213212214 list_for_each_entry_safe(inc, nxt, &app->incs_list, a_list) {213213- ip_vs_app_inc_release(inc);215215+ ip_vs_app_inc_release(net, inc);214216 }215217216218 list_del(&app->a_list);217219218218- mutex_unlock(&__ip_vs_app_mutex);220220+ mutex_unlock(&ipvs->app_mutex);219221220222 /* decrease the module use count */221223 ip_vs_use_count_dec();···226226/*227227 * Bind ip_vs_conn to its ip_vs_app (called by cp constructor)228228 */229229-int ip_vs_bind_app(struct ip_vs_conn *cp, struct ip_vs_protocol *pp)229229+int ip_vs_bind_app(struct ip_vs_conn *cp,230230+ struct ip_vs_protocol *pp)230231{231232 return pp->app_conn_bind(cp);232233}···482481 * /proc/net/ip_vs_app entry function483482 */484483485485-static struct ip_vs_app *ip_vs_app_idx(loff_t pos)484484+static struct ip_vs_app *ip_vs_app_idx(struct netns_ipvs *ipvs, loff_t pos)486485{487486 struct ip_vs_app *app, *inc;488487489489- list_for_each_entry(app, &ip_vs_app_list, a_list) {488488+ list_for_each_entry(app, &ipvs->app_list, a_list) {490489 list_for_each_entry(inc, &app->incs_list, a_list) {491490 if (pos-- == 0)492491 return inc;···498497499498static void *ip_vs_app_seq_start(struct seq_file *seq, loff_t *pos)500499{501501- mutex_lock(&__ip_vs_app_mutex);500500+ struct net *net = seq_file_net(seq);501501+ struct netns_ipvs *ipvs = net_ipvs(net);502502503503- return *pos ? ip_vs_app_idx(*pos - 1) : SEQ_START_TOKEN;503503+ mutex_lock(&ipvs->app_mutex);504504+505505+ return *pos ? ip_vs_app_idx(ipvs, *pos - 1) : SEQ_START_TOKEN;504506}505507506508static void *ip_vs_app_seq_next(struct seq_file *seq, void *v, loff_t *pos)507509{508510 struct ip_vs_app *inc, *app;509511 struct list_head *e;512512+ struct net *net = seq_file_net(seq);513513+ struct netns_ipvs *ipvs = net_ipvs(net);510514511515 ++*pos;512516 if (v == SEQ_START_TOKEN)513513- return ip_vs_app_idx(0);517517+ return ip_vs_app_idx(ipvs, 0);514518515519 inc = v;516520 app = inc->app;···524518 return list_entry(e, struct ip_vs_app, a_list);525519526520 /* go on to next application */527527- for (e = app->a_list.next; e != &ip_vs_app_list; e = e->next) {521521+ for (e = app->a_list.next; e != &ipvs->app_list; e = e->next) {528522 app = list_entry(e, struct ip_vs_app, a_list);529523 list_for_each_entry(inc, &app->incs_list, a_list) {530524 return inc;···535529536530static void ip_vs_app_seq_stop(struct seq_file *seq, void *v)537531{538538- mutex_unlock(&__ip_vs_app_mutex);532532+ struct netns_ipvs *ipvs = net_ipvs(seq_file_net(seq));533533+534534+ mutex_unlock(&ipvs->app_mutex);539535}540536541537static int ip_vs_app_seq_show(struct seq_file *seq, void *v)···565557566558static int ip_vs_app_open(struct inode *inode, struct file *file)567559{568568- return seq_open(file, &ip_vs_app_seq_ops);560560+ return seq_open_net(inode, file, &ip_vs_app_seq_ops,561561+ sizeof(struct seq_net_private));569562}570563571564static const struct file_operations ip_vs_app_fops = {···578569};579570#endif580571572572+static int __net_init __ip_vs_app_init(struct net *net)573573+{574574+ struct netns_ipvs *ipvs = net_ipvs(net);575575+576576+ INIT_LIST_HEAD(&ipvs->app_list);577577+ __mutex_init(&ipvs->app_mutex, "ipvs->app_mutex", &ipvs->app_key);578578+ proc_net_fops_create(net, "ip_vs_app", 0, &ip_vs_app_fops);579579+ return 0;580580+}581581+582582+static void __net_exit __ip_vs_app_cleanup(struct net *net)583583+{584584+ proc_net_remove(net, "ip_vs_app");585585+}586586+587587+static struct pernet_operations ip_vs_app_ops = {588588+ .init = __ip_vs_app_init,589589+ .exit = __ip_vs_app_cleanup,590590+};591591+581592int __init ip_vs_app_init(void)582593{583583- /* we will replace it with proc_net_ipvs_create() soon */584584- proc_net_fops_create(&init_net, "ip_vs_app", 0, &ip_vs_app_fops);585585- return 0;594594+ int rv;595595+596596+ rv = register_pernet_subsys(&ip_vs_app_ops);597597+ return rv;586598}587599588600589601void ip_vs_app_cleanup(void)590602{591591- proc_net_remove(&init_net, "ip_vs_app");603603+ unregister_pernet_subsys(&ip_vs_app_ops);592604}
+121-74
net/netfilter/ipvs/ip_vs_conn.c
···4848/*4949 * Connection hash size. Default is what was selected at compile time.5050*/5151-int ip_vs_conn_tab_bits = CONFIG_IP_VS_TAB_BITS;5151+static int ip_vs_conn_tab_bits = CONFIG_IP_VS_TAB_BITS;5252module_param_named(conn_tab_bits, ip_vs_conn_tab_bits, int, 0444);5353MODULE_PARM_DESC(conn_tab_bits, "Set connections' hash size");54545555/* size and mask values */5656-int ip_vs_conn_tab_size;5757-int ip_vs_conn_tab_mask;5656+int ip_vs_conn_tab_size __read_mostly;5757+static int ip_vs_conn_tab_mask __read_mostly;58585959/*6060 * Connection hash table: for input and output packets lookups of IPVS6161 */6262-static struct list_head *ip_vs_conn_tab;6262+static struct list_head *ip_vs_conn_tab __read_mostly;63636464/* SLAB cache for IPVS connections */6565static struct kmem_cache *ip_vs_conn_cachep __read_mostly;6666-6767-/* counter for current IPVS connections */6868-static atomic_t ip_vs_conn_count = ATOMIC_INIT(0);69667067/* counter for no client port connections */7168static atomic_t ip_vs_conn_no_cport_cnt = ATOMIC_INIT(0);72697370/* random value for IPVS connection hash */7474-static unsigned int ip_vs_conn_rnd;7171+static unsigned int ip_vs_conn_rnd __read_mostly;75727673/*7774 * Fine locking granularity for big connection hash table7875 */7979-#define CT_LOCKARRAY_BITS 47676+#define CT_LOCKARRAY_BITS 58077#define CT_LOCKARRAY_SIZE (1<<CT_LOCKARRAY_BITS)8178#define CT_LOCKARRAY_MASK (CT_LOCKARRAY_SIZE-1)8279···130133/*131134 * Returns hash value for IPVS connection entry132135 */133133-static unsigned int ip_vs_conn_hashkey(int af, unsigned proto,136136+static unsigned int ip_vs_conn_hashkey(struct net *net, int af, unsigned proto,134137 const union nf_inet_addr *addr,135138 __be16 port)136139{137140#ifdef CONFIG_IP_VS_IPV6138141 if (af == AF_INET6)139139- return jhash_3words(jhash(addr, 16, ip_vs_conn_rnd),140140- (__force u32)port, proto, ip_vs_conn_rnd)141141- & ip_vs_conn_tab_mask;142142+ return (jhash_3words(jhash(addr, 16, ip_vs_conn_rnd),143143+ (__force u32)port, proto, ip_vs_conn_rnd) ^144144+ ((size_t)net>>8)) & ip_vs_conn_tab_mask;142145#endif143143- return jhash_3words((__force u32)addr->ip, (__force u32)port, proto,144144- ip_vs_conn_rnd)145145- & ip_vs_conn_tab_mask;146146+ return (jhash_3words((__force u32)addr->ip, (__force u32)port, proto,147147+ ip_vs_conn_rnd) ^148148+ ((size_t)net>>8)) & ip_vs_conn_tab_mask;146149}147150148151static unsigned int ip_vs_conn_hashkey_param(const struct ip_vs_conn_param *p,···163166 port = p->vport;164167 }165168166166- return ip_vs_conn_hashkey(p->af, p->protocol, addr, port);169169+ return ip_vs_conn_hashkey(p->net, p->af, p->protocol, addr, port);167170}168171169172static unsigned int ip_vs_conn_hashkey_conn(const struct ip_vs_conn *cp)170173{171174 struct ip_vs_conn_param p;172175173173- ip_vs_conn_fill_param(cp->af, cp->protocol, &cp->caddr, cp->cport,174174- NULL, 0, &p);176176+ ip_vs_conn_fill_param(ip_vs_conn_net(cp), cp->af, cp->protocol,177177+ &cp->caddr, cp->cport, NULL, 0, &p);175178176176- if (cp->dest && cp->dest->svc->pe) {177177- p.pe = cp->dest->svc->pe;179179+ if (cp->pe) {180180+ p.pe = cp->pe;178181 p.pe_data = cp->pe_data;179182 p.pe_data_len = cp->pe_data_len;180183 }···183186}184187185188/*186186- * Hashes ip_vs_conn in ip_vs_conn_tab by proto,addr,port.189189+ * Hashes ip_vs_conn in ip_vs_conn_tab by netns,proto,addr,port.187190 * returns bool success.188191 */189192static inline int ip_vs_conn_hash(struct ip_vs_conn *cp)···266269267270 list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {268271 if (cp->af == p->af &&272272+ p->cport == cp->cport && p->vport == cp->vport &&269273 ip_vs_addr_equal(p->af, p->caddr, &cp->caddr) &&270274 ip_vs_addr_equal(p->af, p->vaddr, &cp->vaddr) &&271271- p->cport == cp->cport && p->vport == cp->vport &&272275 ((!p->cport) ^ (!(cp->flags & IP_VS_CONN_F_NO_CPORT))) &&273273- p->protocol == cp->protocol) {276276+ p->protocol == cp->protocol &&277277+ ip_vs_conn_net_eq(cp, p->net)) {274278 /* HIT */275279 atomic_inc(&cp->refcnt);276280 ct_read_unlock(hash);···311313 struct ip_vs_conn_param *p)312314{313315 __be16 _ports[2], *pptr;316316+ struct net *net = skb_net(skb);314317315318 pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);316319 if (pptr == NULL)317320 return 1;318321319322 if (likely(!inverse))320320- ip_vs_conn_fill_param(af, iph->protocol, &iph->saddr, pptr[0],321321- &iph->daddr, pptr[1], p);323323+ ip_vs_conn_fill_param(net, af, iph->protocol, &iph->saddr,324324+ pptr[0], &iph->daddr, pptr[1], p);322325 else323323- ip_vs_conn_fill_param(af, iph->protocol, &iph->daddr, pptr[1],324324- &iph->saddr, pptr[0], p);326326+ ip_vs_conn_fill_param(net, af, iph->protocol, &iph->daddr,327327+ pptr[1], &iph->saddr, pptr[0], p);325328 return 0;326329}327330328331struct ip_vs_conn *329332ip_vs_conn_in_get_proto(int af, const struct sk_buff *skb,330330- struct ip_vs_protocol *pp,331333 const struct ip_vs_iphdr *iph,332334 unsigned int proto_off, int inverse)333335{···351353 ct_read_lock(hash);352354353355 list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {356356+ if (!ip_vs_conn_net_eq(cp, p->net))357357+ continue;354358 if (p->pe_data && p->pe->ct_match) {355355- if (p->pe->ct_match(p, cp))359359+ if (p->pe == cp->pe && p->pe->ct_match(p, cp))356360 goto out;357361 continue;358362 }···404404405405 list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {406406 if (cp->af == p->af &&407407+ p->vport == cp->cport && p->cport == cp->dport &&407408 ip_vs_addr_equal(p->af, p->vaddr, &cp->caddr) &&408409 ip_vs_addr_equal(p->af, p->caddr, &cp->daddr) &&409409- p->vport == cp->cport && p->cport == cp->dport &&410410- p->protocol == cp->protocol) {410410+ p->protocol == cp->protocol &&411411+ ip_vs_conn_net_eq(cp, p->net)) {411412 /* HIT */412413 atomic_inc(&cp->refcnt);413414 ret = cp;···429428430429struct ip_vs_conn *431430ip_vs_conn_out_get_proto(int af, const struct sk_buff *skb,432432- struct ip_vs_protocol *pp,433431 const struct ip_vs_iphdr *iph,434432 unsigned int proto_off, int inverse)435433{···611611 struct ip_vs_dest *dest;612612613613 if ((cp) && (!cp->dest)) {614614- dest = ip_vs_find_dest(cp->af, &cp->daddr, cp->dport,615615- &cp->vaddr, cp->vport,616616- cp->protocol);614614+ dest = ip_vs_find_dest(ip_vs_conn_net(cp), cp->af, &cp->daddr,615615+ cp->dport, &cp->vaddr, cp->vport,616616+ cp->protocol, cp->fwmark);617617 ip_vs_bind_dest(cp, dest);618618 return dest;619619 } else···686686int ip_vs_check_template(struct ip_vs_conn *ct)687687{688688 struct ip_vs_dest *dest = ct->dest;689689+ struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(ct));689690690691 /*691692 * Checking the dest server status.692693 */693694 if ((dest == NULL) ||694695 !(dest->flags & IP_VS_DEST_F_AVAILABLE) ||695695- (sysctl_ip_vs_expire_quiescent_template &&696696+ (ipvs->sysctl_expire_quiescent_template &&696697 (atomic_read(&dest->weight) == 0))) {697698 IP_VS_DBG_BUF(9, "check_template: dest not available for "698699 "protocol %s s:%s:%d v:%s:%d "···731730static void ip_vs_conn_expire(unsigned long data)732731{733732 struct ip_vs_conn *cp = (struct ip_vs_conn *)data;733733+ struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(cp));734734735735 cp->timeout = 60*HZ;736736···767765 if (cp->flags & IP_VS_CONN_F_NFCT)768766 ip_vs_conn_drop_conntrack(cp);769767768768+ ip_vs_pe_put(cp->pe);770769 kfree(cp->pe_data);771770 if (unlikely(cp->app != NULL))772771 ip_vs_unbind_app(cp);773772 ip_vs_unbind_dest(cp);774773 if (cp->flags & IP_VS_CONN_F_NO_CPORT)775774 atomic_dec(&ip_vs_conn_no_cport_cnt);776776- atomic_dec(&ip_vs_conn_count);775775+ atomic_dec(&ipvs->conn_count);777776778777 kmem_cache_free(ip_vs_conn_cachep, cp);779778 return;···805802struct ip_vs_conn *806803ip_vs_conn_new(const struct ip_vs_conn_param *p,807804 const union nf_inet_addr *daddr, __be16 dport, unsigned flags,808808- struct ip_vs_dest *dest)805805+ struct ip_vs_dest *dest, __u32 fwmark)809806{810807 struct ip_vs_conn *cp;811811- struct ip_vs_protocol *pp = ip_vs_proto_get(p->protocol);808808+ struct netns_ipvs *ipvs = net_ipvs(p->net);809809+ struct ip_vs_proto_data *pd = ip_vs_proto_data_get(p->net,810810+ p->protocol);812811813812 cp = kmem_cache_zalloc(ip_vs_conn_cachep, GFP_ATOMIC);814813 if (cp == NULL) {···820815821816 INIT_LIST_HEAD(&cp->c_list);822817 setup_timer(&cp->timer, ip_vs_conn_expire, (unsigned long)cp);818818+ ip_vs_conn_net_set(cp, p->net);823819 cp->af = p->af;824820 cp->protocol = p->protocol;825821 ip_vs_addr_copy(p->af, &cp->caddr, p->caddr);···832826 &cp->daddr, daddr);833827 cp->dport = dport;834828 cp->flags = flags;835835- if (flags & IP_VS_CONN_F_TEMPLATE && p->pe_data) {829829+ cp->fwmark = fwmark;830830+ if (flags & IP_VS_CONN_F_TEMPLATE && p->pe) {831831+ ip_vs_pe_get(p->pe);832832+ cp->pe = p->pe;836833 cp->pe_data = p->pe_data;837834 cp->pe_data_len = p->pe_data_len;838835 }···851842 atomic_set(&cp->n_control, 0);852843 atomic_set(&cp->in_pkts, 0);853844854854- atomic_inc(&ip_vs_conn_count);845845+ atomic_inc(&ipvs->conn_count);855846 if (flags & IP_VS_CONN_F_NO_CPORT)856847 atomic_inc(&ip_vs_conn_no_cport_cnt);857848···870861#endif871862 ip_vs_bind_xmit(cp);872863873873- if (unlikely(pp && atomic_read(&pp->appcnt)))874874- ip_vs_bind_app(cp, pp);864864+ if (unlikely(pd && atomic_read(&pd->appcnt)))865865+ ip_vs_bind_app(cp, pd->pp);875866876867 /*877868 * Allow conntrack to be preserved. By default, conntrack···880871 * IP_VS_CONN_F_ONE_PACKET too.881872 */882873883883- if (ip_vs_conntrack_enabled())874874+ if (ip_vs_conntrack_enabled(ipvs))884875 cp->flags |= IP_VS_CONN_F_NFCT;885876886877 /* Hash it in the ip_vs_conn_tab finally */···893884 * /proc/net/ip_vs_conn entries894885 */895886#ifdef CONFIG_PROC_FS887887+struct ip_vs_iter_state {888888+ struct seq_net_private p;889889+ struct list_head *l;890890+};896891897892static void *ip_vs_conn_array(struct seq_file *seq, loff_t pos)898893{899894 int idx;900895 struct ip_vs_conn *cp;896896+ struct ip_vs_iter_state *iter = seq->private;901897902898 for (idx = 0; idx < ip_vs_conn_tab_size; idx++) {903899 ct_read_lock_bh(idx);904900 list_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) {905901 if (pos-- == 0) {906906- seq->private = &ip_vs_conn_tab[idx];902902+ iter->l = &ip_vs_conn_tab[idx];907903 return cp;908904 }909905 }···920906921907static void *ip_vs_conn_seq_start(struct seq_file *seq, loff_t *pos)922908{923923- seq->private = NULL;909909+ struct ip_vs_iter_state *iter = seq->private;910910+911911+ iter->l = NULL;924912 return *pos ? ip_vs_conn_array(seq, *pos - 1) :SEQ_START_TOKEN;925913}926914927915static void *ip_vs_conn_seq_next(struct seq_file *seq, void *v, loff_t *pos)928916{929917 struct ip_vs_conn *cp = v;930930- struct list_head *e, *l = seq->private;918918+ struct ip_vs_iter_state *iter = seq->private;919919+ struct list_head *e, *l = iter->l;931920 int idx;932921933922 ++*pos;···947930 while (++idx < ip_vs_conn_tab_size) {948931 ct_read_lock_bh(idx);949932 list_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) {950950- seq->private = &ip_vs_conn_tab[idx];933933+ iter->l = &ip_vs_conn_tab[idx];951934 return cp;952935 }953936 ct_read_unlock_bh(idx);954937 }955955- seq->private = NULL;938938+ iter->l = NULL;956939 return NULL;957940}958941959942static void ip_vs_conn_seq_stop(struct seq_file *seq, void *v)960943{961961- struct list_head *l = seq->private;944944+ struct ip_vs_iter_state *iter = seq->private;945945+ struct list_head *l = iter->l;962946963947 if (l)964948 ct_read_unlock_bh(l - ip_vs_conn_tab);···973955 "Pro FromIP FPrt ToIP TPrt DestIP DPrt State Expires PEName PEData\n");974956 else {975957 const struct ip_vs_conn *cp = v;958958+ struct net *net = seq_file_net(seq);976959 char pe_data[IP_VS_PENAME_MAXLEN + IP_VS_PEDATA_MAXLEN + 3];977960 size_t len = 0;978961979979- if (cp->dest && cp->pe_data &&980980- cp->dest->svc->pe->show_pe_data) {962962+ if (!ip_vs_conn_net_eq(cp, net))963963+ return 0;964964+ if (cp->pe_data) {981965 pe_data[0] = ' ';982982- len = strlen(cp->dest->svc->pe->name);983983- memcpy(pe_data + 1, cp->dest->svc->pe->name, len);966966+ len = strlen(cp->pe->name);967967+ memcpy(pe_data + 1, cp->pe->name, len);984968 pe_data[len + 1] = ' ';985969 len += 2;986986- len += cp->dest->svc->pe->show_pe_data(cp,987987- pe_data + len);970970+ len += cp->pe->show_pe_data(cp, pe_data + len);988971 }989972 pe_data[len] = '\0';990973···1023100410241005static int ip_vs_conn_open(struct inode *inode, struct file *file)10251006{10261026- return seq_open(file, &ip_vs_conn_seq_ops);10071007+ return seq_open_net(inode, file, &ip_vs_conn_seq_ops,10081008+ sizeof(struct ip_vs_iter_state));10271009}1028101010291011static const struct file_operations ip_vs_conn_fops = {···10511031 "Pro FromIP FPrt ToIP TPrt DestIP DPrt State Origin Expires\n");10521032 else {10531033 const struct ip_vs_conn *cp = v;10341034+ struct net *net = seq_file_net(seq);10351035+10361036+ if (!ip_vs_conn_net_eq(cp, net))10371037+ return 0;1054103810551039#ifdef CONFIG_IP_VS_IPV610561040 if (cp->af == AF_INET6)···1091106710921068static int ip_vs_conn_sync_open(struct inode *inode, struct file *file)10931069{10941094- return seq_open(file, &ip_vs_conn_sync_seq_ops);10701070+ return seq_open_net(inode, file, &ip_vs_conn_sync_seq_ops,10711071+ sizeof(struct ip_vs_iter_state));10951072}1096107310971074static const struct file_operations ip_vs_conn_sync_fops = {···11381113}1139111411401115/* Called from keventd and must protect itself from softirqs */11411141-void ip_vs_random_dropentry(void)11161116+void ip_vs_random_dropentry(struct net *net)11421117{11431118 int idx;11441119 struct ip_vs_conn *cp;···11581133 if (cp->flags & IP_VS_CONN_F_TEMPLATE)11591134 /* connection template */11601135 continue;11611161-11361136+ if (!ip_vs_conn_net_eq(cp, net))11371137+ continue;11621138 if (cp->protocol == IPPROTO_TCP) {11631139 switch(cp->state) {11641140 case IP_VS_TCP_S_SYN_RECV:···11941168/*11951169 * Flush all the connection entries in the ip_vs_conn_tab11961170 */11971197-static void ip_vs_conn_flush(void)11711171+static void ip_vs_conn_flush(struct net *net)11981172{11991173 int idx;12001174 struct ip_vs_conn *cp;11751175+ struct netns_ipvs *ipvs = net_ipvs(net);1201117612021202- flush_again:11771177+flush_again:12031178 for (idx = 0; idx < ip_vs_conn_tab_size; idx++) {12041179 /*12051180 * Lock is actually needed in this loop.···12081181 ct_write_lock_bh(idx);1209118212101183 list_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) {12111211-11841184+ if (!ip_vs_conn_net_eq(cp, net))11851185+ continue;12121186 IP_VS_DBG(4, "del connection\n");12131187 ip_vs_conn_expire_now(cp);12141188 if (cp->control) {···1222119412231195 /* the counter may be not NULL, because maybe some conn entries12241196 are run by slow timer handler or unhashed but still referred */12251225- if (atomic_read(&ip_vs_conn_count) != 0) {11971197+ if (atomic_read(&ipvs->conn_count) != 0) {12261198 schedule();12271199 goto flush_again;12281200 }12291201}12021202+/*12031203+ * per netns init and exit12041204+ */12051205+int __net_init __ip_vs_conn_init(struct net *net)12061206+{12071207+ struct netns_ipvs *ipvs = net_ipvs(net);1230120812091209+ atomic_set(&ipvs->conn_count, 0);12101210+12111211+ proc_net_fops_create(net, "ip_vs_conn", 0, &ip_vs_conn_fops);12121212+ proc_net_fops_create(net, "ip_vs_conn_sync", 0, &ip_vs_conn_sync_fops);12131213+ return 0;12141214+}12151215+12161216+static void __net_exit __ip_vs_conn_cleanup(struct net *net)12171217+{12181218+ /* flush all the connection entries first */12191219+ ip_vs_conn_flush(net);12201220+ proc_net_remove(net, "ip_vs_conn");12211221+ proc_net_remove(net, "ip_vs_conn_sync");12221222+}12231223+static struct pernet_operations ipvs_conn_ops = {12241224+ .init = __ip_vs_conn_init,12251225+ .exit = __ip_vs_conn_cleanup,12261226+};1231122712321228int __init ip_vs_conn_init(void)12331229{12341230 int idx;12311231+ int retc;1235123212361233 /* Compute size and mask */12371234 ip_vs_conn_tab_size = 1 << ip_vs_conn_tab_bits;···12941241 rwlock_init(&__ip_vs_conntbl_lock_array[idx].l);12951242 }1296124312971297- proc_net_fops_create(&init_net, "ip_vs_conn", 0, &ip_vs_conn_fops);12981298- proc_net_fops_create(&init_net, "ip_vs_conn_sync", 0, &ip_vs_conn_sync_fops);12441244+ retc = register_pernet_subsys(&ipvs_conn_ops);1299124513001246 /* calculate the random value for connection hash */13011247 get_random_bytes(&ip_vs_conn_rnd, sizeof(ip_vs_conn_rnd));1302124813031303- return 0;12491249+ return retc;13041250}13051305-1306125113071252void ip_vs_conn_cleanup(void)13081253{13091309- /* flush all the connection entries first */13101310- ip_vs_conn_flush();13111311-12541254+ unregister_pernet_subsys(&ipvs_conn_ops);13121255 /* Release the empty cache */13131256 kmem_cache_destroy(ip_vs_conn_cachep);13141314- proc_net_remove(&init_net, "ip_vs_conn");13151315- proc_net_remove(&init_net, "ip_vs_conn_sync");13161257 vfree(ip_vs_conn_tab);13171258}
+253-117
net/netfilter/ipvs/ip_vs_core.c
···4141#include <net/icmp.h> /* for icmp_send */4242#include <net/route.h>4343#include <net/ip6_checksum.h>4444+#include <net/netns/generic.h> /* net_generic() */44454546#include <linux/netfilter.h>4647#include <linux/netfilter_ipv4.h>···6968EXPORT_SYMBOL(ip_vs_get_debug_level);7069#endif71707171+int ip_vs_net_id __read_mostly;7272+#ifdef IP_VS_GENERIC_NETNS7373+EXPORT_SYMBOL(ip_vs_net_id);7474+#endif7575+/* netns cnt used for uniqueness */7676+static atomic_t ipvs_netns_cnt = ATOMIC_INIT(0);72777378/* ID used in ICMP lookups */7479#define icmp_id(icmph) (((icmph)->un).echo.id)···115108ip_vs_in_stats(struct ip_vs_conn *cp, struct sk_buff *skb)116109{117110 struct ip_vs_dest *dest = cp->dest;111111+ struct netns_ipvs *ipvs = net_ipvs(skb_net(skb));112112+118113 if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) {119119- spin_lock(&dest->stats.lock);120120- dest->stats.ustats.inpkts++;121121- dest->stats.ustats.inbytes += skb->len;122122- spin_unlock(&dest->stats.lock);114114+ struct ip_vs_cpu_stats *s;123115124124- spin_lock(&dest->svc->stats.lock);125125- dest->svc->stats.ustats.inpkts++;126126- dest->svc->stats.ustats.inbytes += skb->len;127127- spin_unlock(&dest->svc->stats.lock);116116+ s = this_cpu_ptr(dest->stats.cpustats);117117+ s->ustats.inpkts++;118118+ u64_stats_update_begin(&s->syncp);119119+ s->ustats.inbytes += skb->len;120120+ u64_stats_update_end(&s->syncp);128121129129- spin_lock(&ip_vs_stats.lock);130130- ip_vs_stats.ustats.inpkts++;131131- ip_vs_stats.ustats.inbytes += skb->len;132132- spin_unlock(&ip_vs_stats.lock);122122+ s = this_cpu_ptr(dest->svc->stats.cpustats);123123+ s->ustats.inpkts++;124124+ u64_stats_update_begin(&s->syncp);125125+ s->ustats.inbytes += skb->len;126126+ u64_stats_update_end(&s->syncp);127127+128128+ s = this_cpu_ptr(ipvs->cpustats);129129+ s->ustats.inpkts++;130130+ u64_stats_update_begin(&s->syncp);131131+ s->ustats.inbytes += skb->len;132132+ u64_stats_update_end(&s->syncp);133133 }134134}135135···145131ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb)146132{147133 struct ip_vs_dest *dest = cp->dest;134134+ struct netns_ipvs *ipvs = net_ipvs(skb_net(skb));135135+148136 if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) {149149- spin_lock(&dest->stats.lock);150150- dest->stats.ustats.outpkts++;151151- dest->stats.ustats.outbytes += skb->len;152152- spin_unlock(&dest->stats.lock);137137+ struct ip_vs_cpu_stats *s;153138154154- spin_lock(&dest->svc->stats.lock);155155- dest->svc->stats.ustats.outpkts++;156156- dest->svc->stats.ustats.outbytes += skb->len;157157- spin_unlock(&dest->svc->stats.lock);139139+ s = this_cpu_ptr(dest->stats.cpustats);140140+ s->ustats.outpkts++;141141+ u64_stats_update_begin(&s->syncp);142142+ s->ustats.outbytes += skb->len;143143+ u64_stats_update_end(&s->syncp);158144159159- spin_lock(&ip_vs_stats.lock);160160- ip_vs_stats.ustats.outpkts++;161161- ip_vs_stats.ustats.outbytes += skb->len;162162- spin_unlock(&ip_vs_stats.lock);145145+ s = this_cpu_ptr(dest->svc->stats.cpustats);146146+ s->ustats.outpkts++;147147+ u64_stats_update_begin(&s->syncp);148148+ s->ustats.outbytes += skb->len;149149+ u64_stats_update_end(&s->syncp);150150+151151+ s = this_cpu_ptr(ipvs->cpustats);152152+ s->ustats.outpkts++;153153+ u64_stats_update_begin(&s->syncp);154154+ s->ustats.outbytes += skb->len;155155+ u64_stats_update_end(&s->syncp);163156 }164157}165158···174153static inline void175154ip_vs_conn_stats(struct ip_vs_conn *cp, struct ip_vs_service *svc)176155{177177- spin_lock(&cp->dest->stats.lock);178178- cp->dest->stats.ustats.conns++;179179- spin_unlock(&cp->dest->stats.lock);156156+ struct netns_ipvs *ipvs = net_ipvs(svc->net);157157+ struct ip_vs_cpu_stats *s;180158181181- spin_lock(&svc->stats.lock);182182- svc->stats.ustats.conns++;183183- spin_unlock(&svc->stats.lock);159159+ s = this_cpu_ptr(cp->dest->stats.cpustats);160160+ s->ustats.conns++;184161185185- spin_lock(&ip_vs_stats.lock);186186- ip_vs_stats.ustats.conns++;187187- spin_unlock(&ip_vs_stats.lock);162162+ s = this_cpu_ptr(svc->stats.cpustats);163163+ s->ustats.conns++;164164+165165+ s = this_cpu_ptr(ipvs->cpustats);166166+ s->ustats.conns++;188167}189168190169191170static inline int192171ip_vs_set_state(struct ip_vs_conn *cp, int direction,193172 const struct sk_buff *skb,194194- struct ip_vs_protocol *pp)173173+ struct ip_vs_proto_data *pd)195174{196196- if (unlikely(!pp->state_transition))175175+ if (unlikely(!pd->pp->state_transition))197176 return 0;198198- return pp->state_transition(cp, direction, skb, pp);177177+ return pd->pp->state_transition(cp, direction, skb, pd);199178}200179201201-static inline void180180+static inline int202181ip_vs_conn_fill_param_persist(const struct ip_vs_service *svc,203182 struct sk_buff *skb, int protocol,204183 const union nf_inet_addr *caddr, __be16 cport,205184 const union nf_inet_addr *vaddr, __be16 vport,206185 struct ip_vs_conn_param *p)207186{208208- ip_vs_conn_fill_param(svc->af, protocol, caddr, cport, vaddr, vport, p);187187+ ip_vs_conn_fill_param(svc->net, svc->af, protocol, caddr, cport, vaddr,188188+ vport, p);209189 p->pe = svc->pe;210190 if (p->pe && p->pe->fill_param)211211- p->pe->fill_param(p, skb);191191+ return p->pe->fill_param(p, skb);192192+193193+ return 0;212194}213195214196/*···224200static struct ip_vs_conn *225201ip_vs_sched_persist(struct ip_vs_service *svc,226202 struct sk_buff *skb,227227- __be16 ports[2])203203+ __be16 src_port, __be16 dst_port, int *ignored)228204{229205 struct ip_vs_conn *cp = NULL;230206 struct ip_vs_iphdr iph;···248224249225 IP_VS_DBG_BUF(6, "p-schedule: src %s:%u dest %s:%u "250226 "mnet %s\n",251251- IP_VS_DBG_ADDR(svc->af, &iph.saddr), ntohs(ports[0]),252252- IP_VS_DBG_ADDR(svc->af, &iph.daddr), ntohs(ports[1]),227227+ IP_VS_DBG_ADDR(svc->af, &iph.saddr), ntohs(src_port),228228+ IP_VS_DBG_ADDR(svc->af, &iph.daddr), ntohs(dst_port),253229 IP_VS_DBG_ADDR(svc->af, &snet));254230255231 /*···271247 const union nf_inet_addr fwmark = { .ip = htonl(svc->fwmark) };272248 __be16 vport = 0;273249274274- if (ports[1] == svc->port) {250250+ if (dst_port == svc->port) {275251 /* non-FTP template:276252 * <protocol, caddr, 0, vaddr, vport, daddr, dport>277253 * FTP template:278254 * <protocol, caddr, 0, vaddr, 0, daddr, 0>279255 */280256 if (svc->port != FTPPORT)281281- vport = ports[1];257257+ vport = dst_port;282258 } else {283259 /* Note: persistent fwmark-based services and284260 * persistent port zero service are handled here.···292268 vaddr = &fwmark;293269 }294270 }295295- ip_vs_conn_fill_param_persist(svc, skb, protocol, &snet, 0,296296- vaddr, vport, ¶m);271271+ /* return *ignored = -1 so NF_DROP can be used */272272+ if (ip_vs_conn_fill_param_persist(svc, skb, protocol, &snet, 0,273273+ vaddr, vport, ¶m) < 0) {274274+ *ignored = -1;275275+ return NULL;276276+ }297277 }298278299279 /* Check if a template already exists */300280 ct = ip_vs_ct_in_get(¶m);301281 if (!ct || !ip_vs_check_template(ct)) {302302- /* No template found or the dest of the connection282282+ /*283283+ * No template found or the dest of the connection303284 * template is not available.285285+ * return *ignored=0 i.e. ICMP and NF_DROP304286 */305287 dest = svc->scheduler->schedule(svc, skb);306288 if (!dest) {307289 IP_VS_DBG(1, "p-schedule: no dest found.\n");308290 kfree(param.pe_data);291291+ *ignored = 0;309292 return NULL;310293 }311294312312- if (ports[1] == svc->port && svc->port != FTPPORT)295295+ if (dst_port == svc->port && svc->port != FTPPORT)313296 dport = dest->port;314297315298 /* Create a template···324293 * and thus param.pe_data will be destroyed325294 * when the template expires */326295 ct = ip_vs_conn_new(¶m, &dest->addr, dport,327327- IP_VS_CONN_F_TEMPLATE, dest);296296+ IP_VS_CONN_F_TEMPLATE, dest, skb->mark);328297 if (ct == NULL) {329298 kfree(param.pe_data);299299+ *ignored = -1;330300 return NULL;331301 }332302···338306 kfree(param.pe_data);339307 }340308341341- dport = ports[1];309309+ dport = dst_port;342310 if (dport == svc->port && dest->port)343311 dport = dest->port;344312···349317 /*350318 * Create a new connection according to the template351319 */352352- ip_vs_conn_fill_param(svc->af, iph.protocol, &iph.saddr, ports[0],353353- &iph.daddr, ports[1], ¶m);354354- cp = ip_vs_conn_new(¶m, &dest->addr, dport, flags, dest);320320+ ip_vs_conn_fill_param(svc->net, svc->af, iph.protocol, &iph.saddr,321321+ src_port, &iph.daddr, dst_port, ¶m);322322+323323+ cp = ip_vs_conn_new(¶m, &dest->addr, dport, flags, dest, skb->mark);355324 if (cp == NULL) {356325 ip_vs_conn_put(ct);326326+ *ignored = -1;357327 return NULL;358328 }359329···375341 * It selects a server according to the virtual service, and376342 * creates a connection entry.377343 * Protocols supported: TCP, UDP344344+ *345345+ * Usage of *ignored346346+ *347347+ * 1 : protocol tried to schedule (eg. on SYN), found svc but the348348+ * svc/scheduler decides that this packet should be accepted with349349+ * NF_ACCEPT because it must not be scheduled.350350+ *351351+ * 0 : scheduler can not find destination, so try bypass or352352+ * return ICMP and then NF_DROP (ip_vs_leave).353353+ *354354+ * -1 : scheduler tried to schedule but fatal error occurred, eg.355355+ * ip_vs_conn_new failure (ENOMEM) or ip_vs_sip_fill_param356356+ * failure such as missing Call-ID, ENOMEM on skb_linearize357357+ * or pe_data. In this case we should return NF_DROP without358358+ * any attempts to send ICMP with ip_vs_leave.378359 */379360struct ip_vs_conn *380361ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,381381- struct ip_vs_protocol *pp, int *ignored)362362+ struct ip_vs_proto_data *pd, int *ignored)382363{364364+ struct ip_vs_protocol *pp = pd->pp;383365 struct ip_vs_conn *cp = NULL;384366 struct ip_vs_iphdr iph;385367 struct ip_vs_dest *dest;···421371 }422372423373 /*424424- * Do not schedule replies from local real server. It is risky425425- * for fwmark services but mostly for persistent services.374374+ * Do not schedule replies from local real server.426375 */427376 if ((!skb->dev || skb->dev->flags & IFF_LOOPBACK) &&428428- (svc->flags & IP_VS_SVC_F_PERSISTENT || svc->fwmark) &&429429- (cp = pp->conn_in_get(svc->af, skb, pp, &iph, iph.len, 1))) {377377+ (cp = pp->conn_in_get(svc->af, skb, &iph, iph.len, 1))) {430378 IP_VS_DBG_PKT(12, svc->af, pp, skb, 0,431379 "Not scheduling reply for existing connection");432380 __ip_vs_conn_put(cp);···434386 /*435387 * Persistent service436388 */437437- if (svc->flags & IP_VS_SVC_F_PERSISTENT) {438438- *ignored = 0;439439- return ip_vs_sched_persist(svc, skb, pptr);440440- }389389+ if (svc->flags & IP_VS_SVC_F_PERSISTENT)390390+ return ip_vs_sched_persist(svc, skb, pptr[0], pptr[1], ignored);391391+392392+ *ignored = 0;441393442394 /*443395 * Non-persistent service···449401 "check your ipvs configuration\n");450402 return NULL;451403 }452452-453453- *ignored = 0;454404455405 dest = svc->scheduler->schedule(svc, skb);456406 if (dest == NULL) {···465419 */466420 {467421 struct ip_vs_conn_param p;468468- ip_vs_conn_fill_param(svc->af, iph.protocol, &iph.saddr,469469- pptr[0], &iph.daddr, pptr[1], &p);422422+423423+ ip_vs_conn_fill_param(svc->net, svc->af, iph.protocol,424424+ &iph.saddr, pptr[0], &iph.daddr, pptr[1],425425+ &p);470426 cp = ip_vs_conn_new(&p, &dest->addr,471427 dest->port ? dest->port : pptr[1],472472- flags, dest);473473- if (!cp)428428+ flags, dest, skb->mark);429429+ if (!cp) {430430+ *ignored = -1;474431 return NULL;432432+ }475433 }476434477435 IP_VS_DBG_BUF(6, "Schedule fwd:%c c:%s:%u v:%s:%u "···497447 * no destination is available for a new connection.498448 */499449int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,500500- struct ip_vs_protocol *pp)450450+ struct ip_vs_proto_data *pd)501451{452452+ struct net *net;453453+ struct netns_ipvs *ipvs;502454 __be16 _ports[2], *pptr;503455 struct ip_vs_iphdr iph;504456 int unicast;457457+505458 ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph);506459507460 pptr = skb_header_pointer(skb, iph.len, sizeof(_ports), _ports);···512459 ip_vs_service_put(svc);513460 return NF_DROP;514461 }462462+ net = skb_net(skb);515463516464#ifdef CONFIG_IP_VS_IPV6517465 if (svc->af == AF_INET6)518466 unicast = ipv6_addr_type(&iph.daddr.in6) & IPV6_ADDR_UNICAST;519467 else520468#endif521521- unicast = (inet_addr_type(&init_net, iph.daddr.ip) == RTN_UNICAST);469469+ unicast = (inet_addr_type(net, iph.daddr.ip) == RTN_UNICAST);522470523471 /* if it is fwmark-based service, the cache_bypass sysctl is up524472 and the destination is a non-local unicast, then create525473 a cache_bypass connection entry */526526- if (sysctl_ip_vs_cache_bypass && svc->fwmark && unicast) {474474+ ipvs = net_ipvs(net);475475+ if (ipvs->sysctl_cache_bypass && svc->fwmark && unicast) {527476 int ret, cs;528477 struct ip_vs_conn *cp;529478 unsigned int flags = (svc->flags & IP_VS_SVC_F_ONEPACKET &&···539484 IP_VS_DBG(6, "%s(): create a cache_bypass entry\n", __func__);540485 {541486 struct ip_vs_conn_param p;542542- ip_vs_conn_fill_param(svc->af, iph.protocol,487487+ ip_vs_conn_fill_param(svc->net, svc->af, iph.protocol,543488 &iph.saddr, pptr[0],544489 &iph.daddr, pptr[1], &p);545490 cp = ip_vs_conn_new(&p, &daddr, 0,546491 IP_VS_CONN_F_BYPASS | flags,547547- NULL);492492+ NULL, skb->mark);548493 if (!cp)549494 return NF_DROP;550495 }···553498 ip_vs_in_stats(cp, skb);554499555500 /* set state */556556- cs = ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pp);501501+ cs = ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pd);557502558503 /* transmit the first SYN packet */559559- ret = cp->packet_xmit(skb, cp, pp);504504+ ret = cp->packet_xmit(skb, cp, pd->pp);560505 /* do not touch skb anymore */561506562507 atomic_inc(&cp->in_pkts);···737682 struct ip_vs_protocol *pp,738683 unsigned int offset, unsigned int ihl)739684{685685+ struct netns_ipvs *ipvs;740686 unsigned int verdict = NF_DROP;741687742688 if (IP_VS_FWD_METHOD(cp) != 0) {···759703 if (!skb_make_writable(skb, offset))760704 goto out;761705706706+ ipvs = net_ipvs(skb_net(skb));707707+762708#ifdef CONFIG_IP_VS_IPV6763709 if (af == AF_INET6)764710 ip_vs_nat_icmp_v6(skb, pp, cp, 1);···770712771713#ifdef CONFIG_IP_VS_IPV6772714 if (af == AF_INET6) {773773- if (sysctl_ip_vs_snat_reroute && ip6_route_me_harder(skb) != 0)715715+ if (ipvs->sysctl_snat_reroute && ip6_route_me_harder(skb) != 0)774716 goto out;775717 } else776718#endif777777- if ((sysctl_ip_vs_snat_reroute ||719719+ if ((ipvs->sysctl_snat_reroute ||778720 skb_rtable(skb)->rt_flags & RTCF_LOCAL) &&779721 ip_route_me_harder(skb, RTN_LOCAL) != 0)780722 goto out;···866808867809 ip_vs_fill_iphdr(AF_INET, cih, &ciph);868810 /* The embedded headers contain source and dest in reverse order */869869- cp = pp->conn_out_get(AF_INET, skb, pp, &ciph, offset, 1);811811+ cp = pp->conn_out_get(AF_INET, skb, &ciph, offset, 1);870812 if (!cp)871813 return NF_ACCEPT;872814···943885944886 ip_vs_fill_iphdr(AF_INET6, cih, &ciph);945887 /* The embedded headers contain source and dest in reverse order */946946- cp = pp->conn_out_get(AF_INET6, skb, pp, &ciph, offset, 1);888888+ cp = pp->conn_out_get(AF_INET6, skb, &ciph, offset, 1);947889 if (!cp)948890 return NF_ACCEPT;949891···982924 * Used for NAT and local client.983925 */984926static unsigned int985985-handle_response(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,927927+handle_response(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,986928 struct ip_vs_conn *cp, int ihl)987929{930930+ struct ip_vs_protocol *pp = pd->pp;931931+ struct netns_ipvs *ipvs;932932+988933 IP_VS_DBG_PKT(11, af, pp, skb, 0, "Outgoing packet");989934990935 if (!skb_make_writable(skb, ihl))···1022961 * if it came from this machine itself. So re-compute1023962 * the routing information.1024963 */964964+ ipvs = net_ipvs(skb_net(skb));965965+1025966#ifdef CONFIG_IP_VS_IPV61026967 if (af == AF_INET6) {10271027- if (sysctl_ip_vs_snat_reroute && ip6_route_me_harder(skb) != 0)968968+ if (ipvs->sysctl_snat_reroute && ip6_route_me_harder(skb) != 0)1028969 goto drop;1029970 } else1030971#endif10311031- if ((sysctl_ip_vs_snat_reroute ||972972+ if ((ipvs->sysctl_snat_reroute ||1032973 skb_rtable(skb)->rt_flags & RTCF_LOCAL) &&1033974 ip_route_me_harder(skb, RTN_LOCAL) != 0)1034975 goto drop;···1038975 IP_VS_DBG_PKT(10, af, pp, skb, 0, "After SNAT");10399761040977 ip_vs_out_stats(cp, skb);10411041- ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pp);978978+ ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pd);1042979 skb->ipvs_property = 1;1043980 if (!(cp->flags & IP_VS_CONN_F_NFCT))1044981 ip_vs_notrack(skb);···1062999static unsigned int10631000ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)10641001{10021002+ struct net *net = NULL;10651003 struct ip_vs_iphdr iph;10661004 struct ip_vs_protocol *pp;10051005+ struct ip_vs_proto_data *pd;10671006 struct ip_vs_conn *cp;10071007+ struct netns_ipvs *ipvs;1068100810691009 EnterFunction(11);10701010···10881022 if (unlikely(!skb_dst(skb)))10891023 return NF_ACCEPT;1090102410251025+ net = skb_net(skb);10911026 ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);10921027#ifdef CONFIG_IP_VS_IPV610931028 if (af == AF_INET6) {···11121045 ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);11131046 }1114104711151115- pp = ip_vs_proto_get(iph.protocol);11161116- if (unlikely(!pp))10481048+ pd = ip_vs_proto_data_get(net, iph.protocol);10491049+ if (unlikely(!pd))11171050 return NF_ACCEPT;10511051+ pp = pd->pp;1118105211191053 /* reassemble IP fragments */11201054#ifdef CONFIG_IP_VS_IPV6···11411073 /*11421074 * Check if the packet belongs to an existing entry11431075 */11441144- cp = pp->conn_out_get(af, skb, pp, &iph, iph.len, 0);10761076+ cp = pp->conn_out_get(af, skb, &iph, iph.len, 0);10771077+ ipvs = net_ipvs(net);1145107811461079 if (likely(cp))11471147- return handle_response(af, skb, pp, cp, iph.len);11481148- if (sysctl_ip_vs_nat_icmp_send &&10801080+ return handle_response(af, skb, pd, cp, iph.len);10811081+ if (ipvs->sysctl_nat_icmp_send &&11491082 (pp->protocol == IPPROTO_TCP ||11501083 pp->protocol == IPPROTO_UDP ||11511084 pp->protocol == IPPROTO_SCTP)) {···11561087 sizeof(_ports), _ports);11571088 if (pptr == NULL)11581089 return NF_ACCEPT; /* Not for me */11591159- if (ip_vs_lookup_real_service(af, iph.protocol,10901090+ if (ip_vs_lookup_real_service(net, af, iph.protocol,11601091 &iph.saddr,11611092 pptr[0])) {11621093 /*···12711202static int12721203ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)12731204{12051205+ struct net *net = NULL;12741206 struct iphdr *iph;12751207 struct icmphdr _icmph, *ic;12761208 struct iphdr _ciph, *cih; /* The ip header contained within the ICMP */12771209 struct ip_vs_iphdr ciph;12781210 struct ip_vs_conn *cp;12791211 struct ip_vs_protocol *pp;12121212+ struct ip_vs_proto_data *pd;12801213 unsigned int offset, ihl, verdict;12811214 union nf_inet_addr snet;12821215···13201249 if (cih == NULL)13211250 return NF_ACCEPT; /* The packet looks wrong, ignore */1322125113231323- pp = ip_vs_proto_get(cih->protocol);13241324- if (!pp)12521252+ net = skb_net(skb);12531253+ pd = ip_vs_proto_data_get(net, cih->protocol);12541254+ if (!pd)13251255 return NF_ACCEPT;12561256+ pp = pd->pp;1326125713271258 /* Is the embedded protocol header present? */13281259 if (unlikely(cih->frag_off & htons(IP_OFFSET) &&···1338126513391266 ip_vs_fill_iphdr(AF_INET, cih, &ciph);13401267 /* The embedded headers contain source and dest in reverse order */13411341- cp = pp->conn_in_get(AF_INET, skb, pp, &ciph, offset, 1);12681268+ cp = pp->conn_in_get(AF_INET, skb, &ciph, offset, 1);13421269 if (!cp) {13431270 /* The packet could also belong to a local client */13441344- cp = pp->conn_out_get(AF_INET, skb, pp, &ciph, offset, 1);12711271+ cp = pp->conn_out_get(AF_INET, skb, &ciph, offset, 1);13451272 if (cp) {13461273 snet.ip = iph->saddr;13471274 return handle_response_icmp(AF_INET, skb, &snet,···13851312static int13861313ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum)13871314{13151315+ struct net *net = NULL;13881316 struct ipv6hdr *iph;13891317 struct icmp6hdr _icmph, *ic;13901318 struct ipv6hdr _ciph, *cih; /* The ip header contained···13931319 struct ip_vs_iphdr ciph;13941320 struct ip_vs_conn *cp;13951321 struct ip_vs_protocol *pp;13221322+ struct ip_vs_proto_data *pd;13961323 unsigned int offset, verdict;13971324 union nf_inet_addr snet;13981325 struct rt6_info *rt;···14361361 if (cih == NULL)14371362 return NF_ACCEPT; /* The packet looks wrong, ignore */1438136314391439- pp = ip_vs_proto_get(cih->nexthdr);14401440- if (!pp)13641364+ net = skb_net(skb);13651365+ pd = ip_vs_proto_data_get(net, cih->nexthdr);13661366+ if (!pd)14411367 return NF_ACCEPT;13681368+ pp = pd->pp;1442136914431370 /* Is the embedded protocol header present? */14441371 /* TODO: we don't support fragmentation at the moment anyways */···1454137714551378 ip_vs_fill_iphdr(AF_INET6, cih, &ciph);14561379 /* The embedded headers contain source and dest in reverse order */14571457- cp = pp->conn_in_get(AF_INET6, skb, pp, &ciph, offset, 1);13801380+ cp = pp->conn_in_get(AF_INET6, skb, &ciph, offset, 1);14581381 if (!cp) {14591382 /* The packet could also belong to a local client */14601460- cp = pp->conn_out_get(AF_INET6, skb, pp, &ciph, offset, 1);13831383+ cp = pp->conn_out_get(AF_INET6, skb, &ciph, offset, 1);14611384 if (cp) {14621385 ipv6_addr_copy(&snet.in6, &iph->saddr);14631386 return handle_response_icmp(AF_INET6, skb, &snet,···15001423static unsigned int15011424ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)15021425{14261426+ struct net *net;15031427 struct ip_vs_iphdr iph;15041428 struct ip_vs_protocol *pp;14291429+ struct ip_vs_proto_data *pd;15051430 struct ip_vs_conn *cp;15061431 int ret, restart, pkts;14321432+ struct netns_ipvs *ipvs;1507143315081434 /* Already marked as IPVS request or reply? */15091435 if (skb->ipvs_property)···15601480 ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);15611481 }1562148214831483+ net = skb_net(skb);15631484 /* Protocol supported? */15641564- pp = ip_vs_proto_get(iph.protocol);15651565- if (unlikely(!pp))14851485+ pd = ip_vs_proto_data_get(net, iph.protocol);14861486+ if (unlikely(!pd))15661487 return NF_ACCEPT;15671567-14881488+ pp = pd->pp;15681489 /*15691490 * Check if the packet belongs to an existing connection entry15701491 */15711571- cp = pp->conn_in_get(af, skb, pp, &iph, iph.len, 0);14921492+ cp = pp->conn_in_get(af, skb, &iph, iph.len, 0);1572149315731494 if (unlikely(!cp)) {15741495 int v;1575149615761576- if (!pp->conn_schedule(af, skb, pp, &v, &cp))14971497+ if (!pp->conn_schedule(af, skb, pd, &v, &cp))15771498 return v;15781499 }15791500···15861505 }1587150615881507 IP_VS_DBG_PKT(11, af, pp, skb, 0, "Incoming packet");15891589-15081508+ net = skb_net(skb);15091509+ ipvs = net_ipvs(net);15901510 /* Check the server status */15911511 if (cp->dest && !(cp->dest->flags & IP_VS_DEST_F_AVAILABLE)) {15921512 /* the destination server is not available */1593151315941594- if (sysctl_ip_vs_expire_nodest_conn) {15141514+ if (ipvs->sysctl_expire_nodest_conn) {15951515 /* try to expire the connection immediately */15961516 ip_vs_conn_expire_now(cp);15971517 }···16031521 }1604152216051523 ip_vs_in_stats(cp, skb);16061606- restart = ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pp);15241524+ restart = ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pd);16071525 if (cp->packet_xmit)16081526 ret = cp->packet_xmit(skb, cp, pp);16091527 /* do not touch skb anymore */···16171535 *16181536 * Sync connection if it is about to close to16191537 * encorage the standby servers to update the connections timeout15381538+ *15391539+ * For ONE_PKT let ip_vs_sync_conn() do the filter work.16201540 */16211621- pkts = atomic_add_return(1, &cp->in_pkts);16221622- if (af == AF_INET && (ip_vs_sync_state & IP_VS_STATE_MASTER) &&15411541+15421542+ if (cp->flags & IP_VS_CONN_F_ONE_PACKET)15431543+ pkts = ipvs->sysctl_sync_threshold[0];15441544+ else15451545+ pkts = atomic_add_return(1, &cp->in_pkts);15461546+15471547+ if ((ipvs->sync_state & IP_VS_STATE_MASTER) &&16231548 cp->protocol == IPPROTO_SCTP) {16241549 if ((cp->state == IP_VS_SCTP_S_ESTABLISHED &&16251625- (pkts % sysctl_ip_vs_sync_threshold[1]16261626- == sysctl_ip_vs_sync_threshold[0])) ||15501550+ (pkts % ipvs->sysctl_sync_threshold[1]15511551+ == ipvs->sysctl_sync_threshold[0])) ||16271552 (cp->old_state != cp->state &&16281553 ((cp->state == IP_VS_SCTP_S_CLOSED) ||16291554 (cp->state == IP_VS_SCTP_S_SHUT_ACK_CLI) ||16301555 (cp->state == IP_VS_SCTP_S_SHUT_ACK_SER)))) {16311631- ip_vs_sync_conn(cp);15561556+ ip_vs_sync_conn(net, cp);16321557 goto out;16331558 }16341559 }1635156016361561 /* Keep this block last: TCP and others with pp->num_states <= 1 */16371637- else if (af == AF_INET &&16381638- (ip_vs_sync_state & IP_VS_STATE_MASTER) &&15621562+ else if ((ipvs->sync_state & IP_VS_STATE_MASTER) &&16391563 (((cp->protocol != IPPROTO_TCP ||16401564 cp->state == IP_VS_TCP_S_ESTABLISHED) &&16411641- (pkts % sysctl_ip_vs_sync_threshold[1]16421642- == sysctl_ip_vs_sync_threshold[0])) ||15651565+ (pkts % ipvs->sysctl_sync_threshold[1]15661566+ == ipvs->sysctl_sync_threshold[0])) ||16431567 ((cp->protocol == IPPROTO_TCP) && (cp->old_state != cp->state) &&16441568 ((cp->state == IP_VS_TCP_S_FIN_WAIT) ||16451569 (cp->state == IP_VS_TCP_S_CLOSE) ||16461570 (cp->state == IP_VS_TCP_S_CLOSE_WAIT) ||16471571 (cp->state == IP_VS_TCP_S_TIME_WAIT)))))16481648- ip_vs_sync_conn(cp);15721572+ ip_vs_sync_conn(net, cp);16491573out:16501574 cp->old_state = cp->state;16511575···18701782 },18711783#endif18721784};17851785+/*17861786+ * Initialize IP Virtual Server netns mem.17871787+ */17881788+static int __net_init __ip_vs_init(struct net *net)17891789+{17901790+ struct netns_ipvs *ipvs;1873179117921792+ ipvs = net_generic(net, ip_vs_net_id);17931793+ if (ipvs == NULL) {17941794+ pr_err("%s(): no memory.\n", __func__);17951795+ return -ENOMEM;17961796+ }17971797+ ipvs->net = net;17981798+ /* Counters used for creating unique names */17991799+ ipvs->gen = atomic_read(&ipvs_netns_cnt);18001800+ atomic_inc(&ipvs_netns_cnt);18011801+ net->ipvs = ipvs;18021802+ printk(KERN_INFO "IPVS: Creating netns size=%lu id=%d\n",18031803+ sizeof(struct netns_ipvs), ipvs->gen);18041804+ return 0;18051805+}18061806+18071807+static void __net_exit __ip_vs_cleanup(struct net *net)18081808+{18091809+ struct netns_ipvs *ipvs = net_ipvs(net);18101810+18111811+ IP_VS_DBG(10, "ipvs netns %d released\n", ipvs->gen);18121812+}18131813+18141814+static struct pernet_operations ipvs_core_ops = {18151815+ .init = __ip_vs_init,18161816+ .exit = __ip_vs_cleanup,18171817+ .id = &ip_vs_net_id,18181818+ .size = sizeof(struct netns_ipvs),18191819+};1874182018751821/*18761822 * Initialize IP Virtual Server···19131791{19141792 int ret;1915179319161916- ip_vs_estimator_init();17941794+ ret = register_pernet_subsys(&ipvs_core_ops); /* Alloc ip_vs struct */17951795+ if (ret < 0)17961796+ return ret;1917179717981798+ ip_vs_estimator_init();19181799 ret = ip_vs_control_init();19191800 if (ret < 0) {19201801 pr_err("can't setup control.\n");···19381813 goto cleanup_app;19391814 }1940181518161816+ ret = ip_vs_sync_init();18171817+ if (ret < 0) {18181818+ pr_err("can't setup sync data.\n");18191819+ goto cleanup_conn;18201820+ }18211821+19411822 ret = nf_register_hooks(ip_vs_ops, ARRAY_SIZE(ip_vs_ops));19421823 if (ret < 0) {19431824 pr_err("can't register hooks.\n");19441944- goto cleanup_conn;18251825+ goto cleanup_sync;19451826 }1946182719471828 pr_info("ipvs loaded.\n");19481829 return ret;1949183018311831+cleanup_sync:18321832+ ip_vs_sync_cleanup();19501833 cleanup_conn:19511834 ip_vs_conn_cleanup();19521835 cleanup_app:···19641831 ip_vs_control_cleanup();19651832 cleanup_estimator:19661833 ip_vs_estimator_cleanup();18341834+ unregister_pernet_subsys(&ipvs_core_ops); /* free ip_vs struct */19671835 return ret;19681836}1969183719701838static void __exit ip_vs_cleanup(void)19711839{19721840 nf_unregister_hooks(ip_vs_ops, ARRAY_SIZE(ip_vs_ops));18411841+ ip_vs_sync_cleanup();19731842 ip_vs_conn_cleanup();19741843 ip_vs_app_cleanup();19751844 ip_vs_protocol_cleanup();19761845 ip_vs_control_cleanup();19771846 ip_vs_estimator_cleanup();18471847+ unregister_pernet_subsys(&ipvs_core_ops); /* free ip_vs struct */19781848 pr_info("ipvs unloaded.\n");19791849}19801850
+575-370
net/netfilter/ipvs/ip_vs_ctl.c
···3838#include <linux/mutex.h>39394040#include <net/net_namespace.h>4141+#include <linux/nsproxy.h>4142#include <net/ip.h>4243#ifdef CONFIG_IP_VS_IPV64344#include <net/ipv6.h>···5857/* lock for service table */5958static DEFINE_RWLOCK(__ip_vs_svc_lock);60596161-/* lock for table with the real services */6262-static DEFINE_RWLOCK(__ip_vs_rs_lock);6363-6464-/* lock for state and timeout tables */6565-static DEFINE_SPINLOCK(ip_vs_securetcp_lock);6666-6767-/* lock for drop entry handling */6868-static DEFINE_SPINLOCK(__ip_vs_dropentry_lock);6969-7070-/* lock for drop packet handling */7171-static DEFINE_SPINLOCK(__ip_vs_droppacket_lock);7272-7373-/* 1/rate drop and drop-entry variables */7474-int ip_vs_drop_rate = 0;7575-int ip_vs_drop_counter = 0;7676-static atomic_t ip_vs_dropentry = ATOMIC_INIT(0);7777-7878-/* number of virtual services */7979-static int ip_vs_num_services = 0;8080-8160/* sysctl variables */8282-static int sysctl_ip_vs_drop_entry = 0;8383-static int sysctl_ip_vs_drop_packet = 0;8484-static int sysctl_ip_vs_secure_tcp = 0;8585-static int sysctl_ip_vs_amemthresh = 1024;8686-static int sysctl_ip_vs_am_droprate = 10;8787-int sysctl_ip_vs_cache_bypass = 0;8888-int sysctl_ip_vs_expire_nodest_conn = 0;8989-int sysctl_ip_vs_expire_quiescent_template = 0;9090-int sysctl_ip_vs_sync_threshold[2] = { 3, 50 };9191-int sysctl_ip_vs_nat_icmp_send = 0;9292-#ifdef CONFIG_IP_VS_NFCT9393-int sysctl_ip_vs_conntrack;9494-#endif9595-int sysctl_ip_vs_snat_reroute = 1;9696-97619862#ifdef CONFIG_IP_VS_DEBUG9963static int sysctl_ip_vs_debug_level = 0;···7110572106#ifdef CONFIG_IP_VS_IPV673107/* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */7474-static int __ip_vs_addr_is_local_v6(const struct in6_addr *addr)108108+static int __ip_vs_addr_is_local_v6(struct net *net,109109+ const struct in6_addr *addr)75110{76111 struct rt6_info *rt;77112 struct flowi fl = {···81114 .fl6_src = { .s6_addr32 = {0, 0, 0, 0} },82115 };831168484- rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl);117117+ rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl);85118 if (rt && rt->rt6i_dev && (rt->rt6i_dev->flags & IFF_LOOPBACK))86119 return 1;87120···92125 * update_defense_level is called from keventd and from sysctl,93126 * so it needs to protect itself from softirqs94127 */9595-static void update_defense_level(void)128128+static void update_defense_level(struct netns_ipvs *ipvs)96129{97130 struct sysinfo i;98131 static int old_secure_tcp = 0;···108141 /* si_swapinfo(&i); */109142 /* availmem = availmem - (i.totalswap - i.freeswap); */110143111111- nomem = (availmem < sysctl_ip_vs_amemthresh);144144+ nomem = (availmem < ipvs->sysctl_amemthresh);112145113146 local_bh_disable();114147115148 /* drop_entry */116116- spin_lock(&__ip_vs_dropentry_lock);117117- switch (sysctl_ip_vs_drop_entry) {149149+ spin_lock(&ipvs->dropentry_lock);150150+ switch (ipvs->sysctl_drop_entry) {118151 case 0:119119- atomic_set(&ip_vs_dropentry, 0);152152+ atomic_set(&ipvs->dropentry, 0);120153 break;121154 case 1:122155 if (nomem) {123123- atomic_set(&ip_vs_dropentry, 1);124124- sysctl_ip_vs_drop_entry = 2;156156+ atomic_set(&ipvs->dropentry, 1);157157+ ipvs->sysctl_drop_entry = 2;125158 } else {126126- atomic_set(&ip_vs_dropentry, 0);159159+ atomic_set(&ipvs->dropentry, 0);127160 }128161 break;129162 case 2:130163 if (nomem) {131131- atomic_set(&ip_vs_dropentry, 1);164164+ atomic_set(&ipvs->dropentry, 1);132165 } else {133133- atomic_set(&ip_vs_dropentry, 0);134134- sysctl_ip_vs_drop_entry = 1;166166+ atomic_set(&ipvs->dropentry, 0);167167+ ipvs->sysctl_drop_entry = 1;135168 };136169 break;137170 case 3:138138- atomic_set(&ip_vs_dropentry, 1);171171+ atomic_set(&ipvs->dropentry, 1);139172 break;140173 }141141- spin_unlock(&__ip_vs_dropentry_lock);174174+ spin_unlock(&ipvs->dropentry_lock);142175143176 /* drop_packet */144144- spin_lock(&__ip_vs_droppacket_lock);145145- switch (sysctl_ip_vs_drop_packet) {177177+ spin_lock(&ipvs->droppacket_lock);178178+ switch (ipvs->sysctl_drop_packet) {146179 case 0:147147- ip_vs_drop_rate = 0;180180+ ipvs->drop_rate = 0;148181 break;149182 case 1:150183 if (nomem) {151151- ip_vs_drop_rate = ip_vs_drop_counter152152- = sysctl_ip_vs_amemthresh /153153- (sysctl_ip_vs_amemthresh-availmem);154154- sysctl_ip_vs_drop_packet = 2;184184+ ipvs->drop_rate = ipvs->drop_counter185185+ = ipvs->sysctl_amemthresh /186186+ (ipvs->sysctl_amemthresh-availmem);187187+ ipvs->sysctl_drop_packet = 2;155188 } else {156156- ip_vs_drop_rate = 0;189189+ ipvs->drop_rate = 0;157190 }158191 break;159192 case 2:160193 if (nomem) {161161- ip_vs_drop_rate = ip_vs_drop_counter162162- = sysctl_ip_vs_amemthresh /163163- (sysctl_ip_vs_amemthresh-availmem);194194+ ipvs->drop_rate = ipvs->drop_counter195195+ = ipvs->sysctl_amemthresh /196196+ (ipvs->sysctl_amemthresh-availmem);164197 } else {165165- ip_vs_drop_rate = 0;166166- sysctl_ip_vs_drop_packet = 1;198198+ ipvs->drop_rate = 0;199199+ ipvs->sysctl_drop_packet = 1;167200 }168201 break;169202 case 3:170170- ip_vs_drop_rate = sysctl_ip_vs_am_droprate;203203+ ipvs->drop_rate = ipvs->sysctl_am_droprate;171204 break;172205 }173173- spin_unlock(&__ip_vs_droppacket_lock);206206+ spin_unlock(&ipvs->droppacket_lock);174207175208 /* secure_tcp */176176- spin_lock(&ip_vs_securetcp_lock);177177- switch (sysctl_ip_vs_secure_tcp) {209209+ spin_lock(&ipvs->securetcp_lock);210210+ switch (ipvs->sysctl_secure_tcp) {178211 case 0:179212 if (old_secure_tcp >= 2)180213 to_change = 0;···183216 if (nomem) {184217 if (old_secure_tcp < 2)185218 to_change = 1;186186- sysctl_ip_vs_secure_tcp = 2;219219+ ipvs->sysctl_secure_tcp = 2;187220 } else {188221 if (old_secure_tcp >= 2)189222 to_change = 0;···196229 } else {197230 if (old_secure_tcp >= 2)198231 to_change = 0;199199- sysctl_ip_vs_secure_tcp = 1;232232+ ipvs->sysctl_secure_tcp = 1;200233 }201234 break;202235 case 3:···204237 to_change = 1;205238 break;206239 }207207- old_secure_tcp = sysctl_ip_vs_secure_tcp;240240+ old_secure_tcp = ipvs->sysctl_secure_tcp;208241 if (to_change >= 0)209209- ip_vs_protocol_timeout_change(sysctl_ip_vs_secure_tcp>1);210210- spin_unlock(&ip_vs_securetcp_lock);242242+ ip_vs_protocol_timeout_change(ipvs,243243+ ipvs->sysctl_secure_tcp > 1);244244+ spin_unlock(&ipvs->securetcp_lock);211245212246 local_bh_enable();213247}···218250 * Timer for checking the defense219251 */220252#define DEFENSE_TIMER_PERIOD 1*HZ221221-static void defense_work_handler(struct work_struct *work);222222-static DECLARE_DELAYED_WORK(defense_work, defense_work_handler);223253224254static void defense_work_handler(struct work_struct *work)225255{226226- update_defense_level();227227- if (atomic_read(&ip_vs_dropentry))228228- ip_vs_random_dropentry();256256+ struct netns_ipvs *ipvs =257257+ container_of(work, struct netns_ipvs, defense_work.work);229258230230- schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);259259+ update_defense_level(ipvs);260260+ if (atomic_read(&ipvs->dropentry))261261+ ip_vs_random_dropentry(ipvs->net);262262+ schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);231263}232264233265int···255287/* the service table hashed by fwmark */256288static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];257289258258-/*259259- * Hash table: for real service lookups260260- */261261-#define IP_VS_RTAB_BITS 4262262-#define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS)263263-#define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1)264264-265265-static struct list_head ip_vs_rtable[IP_VS_RTAB_SIZE];266266-267267-/*268268- * Trash for destinations269269- */270270-static LIST_HEAD(ip_vs_dest_trash);271271-272272-/*273273- * FTP & NULL virtual service counters274274- */275275-static atomic_t ip_vs_ftpsvc_counter = ATOMIC_INIT(0);276276-static atomic_t ip_vs_nullsvc_counter = ATOMIC_INIT(0);277277-278290279291/*280292 * Returns hash value for virtual service281293 */282282-static __inline__ unsigned283283-ip_vs_svc_hashkey(int af, unsigned proto, const union nf_inet_addr *addr,284284- __be16 port)294294+static inline unsigned295295+ip_vs_svc_hashkey(struct net *net, int af, unsigned proto,296296+ const union nf_inet_addr *addr, __be16 port)285297{286298 register unsigned porth = ntohs(port);287299 __be32 addr_fold = addr->ip;···271323 addr_fold = addr->ip6[0]^addr->ip6[1]^272324 addr->ip6[2]^addr->ip6[3];273325#endif326326+ addr_fold ^= ((size_t)net>>8);274327275328 return (proto^ntohl(addr_fold)^(porth>>IP_VS_SVC_TAB_BITS)^porth)276329 & IP_VS_SVC_TAB_MASK;···280331/*281332 * Returns hash value of fwmark for virtual service lookup282333 */283283-static __inline__ unsigned ip_vs_svc_fwm_hashkey(__u32 fwmark)334334+static inline unsigned ip_vs_svc_fwm_hashkey(struct net *net, __u32 fwmark)284335{285285- return fwmark & IP_VS_SVC_TAB_MASK;336336+ return (((size_t)net>>8) ^ fwmark) & IP_VS_SVC_TAB_MASK;286337}287338288339/*289289- * Hashes a service in the ip_vs_svc_table by <proto,addr,port>340340+ * Hashes a service in the ip_vs_svc_table by <netns,proto,addr,port>290341 * or in the ip_vs_svc_fwm_table by fwmark.291342 * Should be called with locked tables.292343 */···302353303354 if (svc->fwmark == 0) {304355 /*305305- * Hash it by <protocol,addr,port> in ip_vs_svc_table356356+ * Hash it by <netns,protocol,addr,port> in ip_vs_svc_table306357 */307307- hash = ip_vs_svc_hashkey(svc->af, svc->protocol, &svc->addr,308308- svc->port);358358+ hash = ip_vs_svc_hashkey(svc->net, svc->af, svc->protocol,359359+ &svc->addr, svc->port);309360 list_add(&svc->s_list, &ip_vs_svc_table[hash]);310361 } else {311362 /*312312- * Hash it by fwmark in ip_vs_svc_fwm_table363363+ * Hash it by fwmark in svc_fwm_table313364 */314314- hash = ip_vs_svc_fwm_hashkey(svc->fwmark);365365+ hash = ip_vs_svc_fwm_hashkey(svc->net, svc->fwmark);315366 list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]);316367 }317368···323374324375325376/*326326- * Unhashes a service from ip_vs_svc_table/ip_vs_svc_fwm_table.377377+ * Unhashes a service from svc_table / svc_fwm_table.327378 * Should be called with locked tables.328379 */329380static int ip_vs_svc_unhash(struct ip_vs_service *svc)···335386 }336387337388 if (svc->fwmark == 0) {338338- /* Remove it from the ip_vs_svc_table table */389389+ /* Remove it from the svc_table table */339390 list_del(&svc->s_list);340391 } else {341341- /* Remove it from the ip_vs_svc_fwm_table table */392392+ /* Remove it from the svc_fwm_table table */342393 list_del(&svc->f_list);343394 }344395···349400350401351402/*352352- * Get service by {proto,addr,port} in the service table.403403+ * Get service by {netns, proto,addr,port} in the service table.353404 */354405static inline struct ip_vs_service *355355-__ip_vs_service_find(int af, __u16 protocol, const union nf_inet_addr *vaddr,356356- __be16 vport)406406+__ip_vs_service_find(struct net *net, int af, __u16 protocol,407407+ const union nf_inet_addr *vaddr, __be16 vport)357408{358409 unsigned hash;359410 struct ip_vs_service *svc;360411361412 /* Check for "full" addressed entries */362362- hash = ip_vs_svc_hashkey(af, protocol, vaddr, vport);413413+ hash = ip_vs_svc_hashkey(net, af, protocol, vaddr, vport);363414364415 list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){365416 if ((svc->af == af)366417 && ip_vs_addr_equal(af, &svc->addr, vaddr)367418 && (svc->port == vport)368368- && (svc->protocol == protocol)) {419419+ && (svc->protocol == protocol)420420+ && net_eq(svc->net, net)) {369421 /* HIT */370422 return svc;371423 }···380430 * Get service by {fwmark} in the service table.381431 */382432static inline struct ip_vs_service *383383-__ip_vs_svc_fwm_find(int af, __u32 fwmark)433433+__ip_vs_svc_fwm_find(struct net *net, int af, __u32 fwmark)384434{385435 unsigned hash;386436 struct ip_vs_service *svc;387437388438 /* Check for fwmark addressed entries */389389- hash = ip_vs_svc_fwm_hashkey(fwmark);439439+ hash = ip_vs_svc_fwm_hashkey(net, fwmark);390440391441 list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) {392392- if (svc->fwmark == fwmark && svc->af == af) {442442+ if (svc->fwmark == fwmark && svc->af == af443443+ && net_eq(svc->net, net)) {393444 /* HIT */394445 return svc;395446 }···400449}401450402451struct ip_vs_service *403403-ip_vs_service_get(int af, __u32 fwmark, __u16 protocol,452452+ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol,404453 const union nf_inet_addr *vaddr, __be16 vport)405454{406455 struct ip_vs_service *svc;456456+ struct netns_ipvs *ipvs = net_ipvs(net);407457408458 read_lock(&__ip_vs_svc_lock);409459410460 /*411461 * Check the table hashed by fwmark first412462 */413413- if (fwmark && (svc = __ip_vs_svc_fwm_find(af, fwmark)))463463+ svc = __ip_vs_svc_fwm_find(net, af, fwmark);464464+ if (fwmark && svc)414465 goto out;415466416467 /*417468 * Check the table hashed by <protocol,addr,port>418469 * for "full" addressed entries419470 */420420- svc = __ip_vs_service_find(af, protocol, vaddr, vport);471471+ svc = __ip_vs_service_find(net, af, protocol, vaddr, vport);421472422473 if (svc == NULL423474 && protocol == IPPROTO_TCP424424- && atomic_read(&ip_vs_ftpsvc_counter)475475+ && atomic_read(&ipvs->ftpsvc_counter)425476 && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) {426477 /*427478 * Check if ftp service entry exists, the packet428479 * might belong to FTP data connections.429480 */430430- svc = __ip_vs_service_find(af, protocol, vaddr, FTPPORT);481481+ svc = __ip_vs_service_find(net, af, protocol, vaddr, FTPPORT);431482 }432483433484 if (svc == NULL434434- && atomic_read(&ip_vs_nullsvc_counter)) {485485+ && atomic_read(&ipvs->nullsvc_counter)) {435486 /*436487 * Check if the catch-all port (port zero) exists437488 */438438- svc = __ip_vs_service_find(af, protocol, vaddr, 0);489489+ svc = __ip_vs_service_find(net, af, protocol, vaddr, 0);439490 }440491441492 out:···472519 svc->fwmark,473520 IP_VS_DBG_ADDR(svc->af, &svc->addr),474521 ntohs(svc->port), atomic_read(&svc->usecnt));522522+ free_percpu(svc->stats.cpustats);475523 kfree(svc);476524 }477525}···499545}500546501547/*502502- * Hashes ip_vs_dest in ip_vs_rtable by <proto,addr,port>.548548+ * Hashes ip_vs_dest in rs_table by <proto,addr,port>.503549 * should be called with locked tables.504550 */505505-static int ip_vs_rs_hash(struct ip_vs_dest *dest)551551+static int ip_vs_rs_hash(struct netns_ipvs *ipvs, struct ip_vs_dest *dest)506552{507553 unsigned hash;508554···516562 */517563 hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port);518564519519- list_add(&dest->d_list, &ip_vs_rtable[hash]);565565+ list_add(&dest->d_list, &ipvs->rs_table[hash]);520566521567 return 1;522568}523569524570/*525525- * UNhashes ip_vs_dest from ip_vs_rtable.571571+ * UNhashes ip_vs_dest from rs_table.526572 * should be called with locked tables.527573 */528574static int ip_vs_rs_unhash(struct ip_vs_dest *dest)529575{530576 /*531531- * Remove it from the ip_vs_rtable table.577577+ * Remove it from the rs_table table.532578 */533579 if (!list_empty(&dest->d_list)) {534580 list_del(&dest->d_list);···542588 * Lookup real service by <proto,addr,port> in the real service table.543589 */544590struct ip_vs_dest *545545-ip_vs_lookup_real_service(int af, __u16 protocol,591591+ip_vs_lookup_real_service(struct net *net, int af, __u16 protocol,546592 const union nf_inet_addr *daddr,547593 __be16 dport)548594{595595+ struct netns_ipvs *ipvs = net_ipvs(net);549596 unsigned hash;550597 struct ip_vs_dest *dest;551598···556601 */557602 hash = ip_vs_rs_hashkey(af, daddr, dport);558603559559- read_lock(&__ip_vs_rs_lock);560560- list_for_each_entry(dest, &ip_vs_rtable[hash], d_list) {604604+ read_lock(&ipvs->rs_lock);605605+ list_for_each_entry(dest, &ipvs->rs_table[hash], d_list) {561606 if ((dest->af == af)562607 && ip_vs_addr_equal(af, &dest->addr, daddr)563608 && (dest->port == dport)564609 && ((dest->protocol == protocol) ||565610 dest->vfwmark)) {566611 /* HIT */567567- read_unlock(&__ip_vs_rs_lock);612612+ read_unlock(&ipvs->rs_lock);568613 return dest;569614 }570615 }571571- read_unlock(&__ip_vs_rs_lock);616616+ read_unlock(&ipvs->rs_lock);572617573618 return NULL;574619}···607652 * ip_vs_lookup_real_service() looked promissing, but608653 * seems not working as expected.609654 */610610-struct ip_vs_dest *ip_vs_find_dest(int af, const union nf_inet_addr *daddr,655655+struct ip_vs_dest *ip_vs_find_dest(struct net *net, int af,656656+ const union nf_inet_addr *daddr,611657 __be16 dport,612658 const union nf_inet_addr *vaddr,613613- __be16 vport, __u16 protocol)659659+ __be16 vport, __u16 protocol, __u32 fwmark)614660{615661 struct ip_vs_dest *dest;616662 struct ip_vs_service *svc;617663618618- svc = ip_vs_service_get(af, 0, protocol, vaddr, vport);664664+ svc = ip_vs_service_get(net, af, fwmark, protocol, vaddr, vport);619665 if (!svc)620666 return NULL;621667 dest = ip_vs_lookup_dest(svc, daddr, dport);···641685 __be16 dport)642686{643687 struct ip_vs_dest *dest, *nxt;688688+ struct netns_ipvs *ipvs = net_ipvs(svc->net);644689645690 /*646691 * Find the destination in trash647692 */648648- list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {693693+ list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, n_list) {649694 IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, "650695 "dest->refcnt=%d\n",651696 dest->vfwmark,···677720 list_del(&dest->n_list);678721 ip_vs_dst_reset(dest);679722 __ip_vs_unbind_svc(dest);723723+ free_percpu(dest->stats.cpustats);680724 kfree(dest);681725 }682726 }···695737 * are expired, and the refcnt of each destination in the trash must696738 * be 1, so we simply release them here.697739 */698698-static void ip_vs_trash_cleanup(void)740740+static void ip_vs_trash_cleanup(struct net *net)699741{700742 struct ip_vs_dest *dest, *nxt;743743+ struct netns_ipvs *ipvs = net_ipvs(net);701744702702- list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {745745+ list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, n_list) {703746 list_del(&dest->n_list);704747 ip_vs_dst_reset(dest);705748 __ip_vs_unbind_svc(dest);749749+ free_percpu(dest->stats.cpustats);706750 kfree(dest);707751 }708752}···728768__ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,729769 struct ip_vs_dest_user_kern *udest, int add)730770{771771+ struct netns_ipvs *ipvs = net_ipvs(svc->net);731772 int conn_flags;732773733774 /* set the weight and the flags */···741780 conn_flags |= IP_VS_CONN_F_NOOUTPUT;742781 } else {743782 /*744744- * Put the real service in ip_vs_rtable if not present.783783+ * Put the real service in rs_table if not present.745784 * For now only for NAT!746785 */747747- write_lock_bh(&__ip_vs_rs_lock);748748- ip_vs_rs_hash(dest);749749- write_unlock_bh(&__ip_vs_rs_lock);786786+ write_lock_bh(&ipvs->rs_lock);787787+ ip_vs_rs_hash(ipvs, dest);788788+ write_unlock_bh(&ipvs->rs_lock);750789 }751790 atomic_set(&dest->conn_flags, conn_flags);752791···774813 spin_unlock(&dest->dst_lock);775814776815 if (add)777777- ip_vs_new_estimator(&dest->stats);816816+ ip_vs_new_estimator(svc->net, &dest->stats);778817779818 write_lock_bh(&__ip_vs_svc_lock);780819···811850 atype = ipv6_addr_type(&udest->addr.in6);812851 if ((!(atype & IPV6_ADDR_UNICAST) ||813852 atype & IPV6_ADDR_LINKLOCAL) &&814814- !__ip_vs_addr_is_local_v6(&udest->addr.in6))853853+ !__ip_vs_addr_is_local_v6(svc->net, &udest->addr.in6))815854 return -EINVAL;816855 } else817856#endif818857 {819819- atype = inet_addr_type(&init_net, udest->addr.ip);858858+ atype = inet_addr_type(svc->net, udest->addr.ip);820859 if (atype != RTN_LOCAL && atype != RTN_UNICAST)821860 return -EINVAL;822861 }···825864 if (dest == NULL) {826865 pr_err("%s(): no memory.\n", __func__);827866 return -ENOMEM;867867+ }868868+ dest->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);869869+ if (!dest->stats.cpustats) {870870+ pr_err("%s() alloc_percpu failed\n", __func__);871871+ goto err_alloc;828872 }829873830874 dest->af = svc->af;···854888855889 LeaveFunction(2);856890 return 0;891891+892892+err_alloc:893893+ kfree(dest);894894+ return -ENOMEM;857895}858896859897···9761006/*9771007 * Delete a destination (must be already unlinked from the service)9781008 */979979-static void __ip_vs_del_dest(struct ip_vs_dest *dest)10091009+static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest)9801010{981981- ip_vs_kill_estimator(&dest->stats);10111011+ struct netns_ipvs *ipvs = net_ipvs(net);10121012+10131013+ ip_vs_kill_estimator(net, &dest->stats);98210149831015 /*9841016 * Remove it from the d-linked list with the real services.9851017 */986986- write_lock_bh(&__ip_vs_rs_lock);10181018+ write_lock_bh(&ipvs->rs_lock);9871019 ip_vs_rs_unhash(dest);988988- write_unlock_bh(&__ip_vs_rs_lock);10201020+ write_unlock_bh(&ipvs->rs_lock);98910219901022 /*9911023 * Decrease the refcnt of the dest, and free the dest···10061034 and only one user context can update virtual service at a10071035 time, so the operation here is OK */10081036 atomic_dec(&dest->svc->refcnt);10371037+ free_percpu(dest->stats.cpustats);10091038 kfree(dest);10101039 } else {10111040 IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, "···10141041 IP_VS_DBG_ADDR(dest->af, &dest->addr),10151042 ntohs(dest->port),10161043 atomic_read(&dest->refcnt));10171017- list_add(&dest->n_list, &ip_vs_dest_trash);10441044+ list_add(&dest->n_list, &ipvs->dest_trash);10181045 atomic_inc(&dest->refcnt);10191046 }10201047}···10781105 /*10791106 * Delete the destination10801107 */10811081- __ip_vs_del_dest(dest);11081108+ __ip_vs_del_dest(svc->net, dest);1082110910831110 LeaveFunction(2);10841111···10901117 * Add a service into the service hash table10911118 */10921119static int10931093-ip_vs_add_service(struct ip_vs_service_user_kern *u,11201120+ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,10941121 struct ip_vs_service **svc_p)10951122{10961123 int ret = 0;10971124 struct ip_vs_scheduler *sched = NULL;10981125 struct ip_vs_pe *pe = NULL;10991126 struct ip_vs_service *svc = NULL;11271127+ struct netns_ipvs *ipvs = net_ipvs(net);1100112811011129 /* increase the module use count */11021130 ip_vs_use_count_inc();···11111137 }1112113811131139 if (u->pe_name && *u->pe_name) {11141114- pe = ip_vs_pe_get(u->pe_name);11401140+ pe = ip_vs_pe_getbyname(u->pe_name);11151141 if (pe == NULL) {11161142 pr_info("persistence engine module ip_vs_pe_%s "11171143 "not found\n", u->pe_name);···11331159 ret = -ENOMEM;11341160 goto out_err;11351161 }11621162+ svc->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);11631163+ if (!svc->stats.cpustats) {11641164+ pr_err("%s() alloc_percpu failed\n", __func__);11651165+ goto out_err;11661166+ }1136116711371168 /* I'm the first user of the service */11381169 atomic_set(&svc->usecnt, 0);···11511172 svc->flags = u->flags;11521173 svc->timeout = u->timeout * HZ;11531174 svc->netmask = u->netmask;11751175+ svc->net = net;1154117611551177 INIT_LIST_HEAD(&svc->destinations);11561178 rwlock_init(&svc->sched_lock);···1169118911701190 /* Update the virtual service counters */11711191 if (svc->port == FTPPORT)11721172- atomic_inc(&ip_vs_ftpsvc_counter);11921192+ atomic_inc(&ipvs->ftpsvc_counter);11731193 else if (svc->port == 0)11741174- atomic_inc(&ip_vs_nullsvc_counter);11941194+ atomic_inc(&ipvs->nullsvc_counter);1175119511761176- ip_vs_new_estimator(&svc->stats);11961196+ ip_vs_new_estimator(net, &svc->stats);1177119711781198 /* Count only IPv4 services for old get/setsockopt interface */11791199 if (svc->af == AF_INET)11801180- ip_vs_num_services++;12001200+ ipvs->num_services++;1181120111821202 /* Hash the service into the service table */11831203 write_lock_bh(&__ip_vs_svc_lock);···11871207 *svc_p = svc;11881208 return 0;1189120912101210+11901211 out_err:11911212 if (svc != NULL) {11921213 ip_vs_unbind_scheduler(svc);···11961215 ip_vs_app_inc_put(svc->inc);11971216 local_bh_enable();11981217 }12181218+ if (svc->stats.cpustats)12191219+ free_percpu(svc->stats.cpustats);11991220 kfree(svc);12001221 }12011222 ip_vs_scheduler_put(sched);···12311248 old_sched = sched;1232124912331250 if (u->pe_name && *u->pe_name) {12341234- pe = ip_vs_pe_get(u->pe_name);12511251+ pe = ip_vs_pe_getbyname(u->pe_name);12351252 if (pe == NULL) {12361253 pr_info("persistence engine module ip_vs_pe_%s "12371254 "not found\n", u->pe_name);···13171334 struct ip_vs_dest *dest, *nxt;13181335 struct ip_vs_scheduler *old_sched;13191336 struct ip_vs_pe *old_pe;13371337+ struct netns_ipvs *ipvs = net_ipvs(svc->net);1320133813211339 pr_info("%s: enter\n", __func__);1322134013231341 /* Count only IPv4 services for old get/setsockopt interface */13241342 if (svc->af == AF_INET)13251325- ip_vs_num_services--;13431343+ ipvs->num_services--;1326134413271327- ip_vs_kill_estimator(&svc->stats);13451345+ ip_vs_kill_estimator(svc->net, &svc->stats);1328134613291347 /* Unbind scheduler */13301348 old_sched = svc->scheduler;···13481364 */13491365 list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) {13501366 __ip_vs_unlink_dest(svc, dest, 0);13511351- __ip_vs_del_dest(dest);13671367+ __ip_vs_del_dest(svc->net, dest);13521368 }1353136913541370 /*13551371 * Update the virtual service counters13561372 */13571373 if (svc->port == FTPPORT)13581358- atomic_dec(&ip_vs_ftpsvc_counter);13741374+ atomic_dec(&ipvs->ftpsvc_counter);13591375 else if (svc->port == 0)13601360- atomic_dec(&ip_vs_nullsvc_counter);13761376+ atomic_dec(&ipvs->nullsvc_counter);1361137713621378 /*13631379 * Free the service if nobody refers to it···13671383 svc->fwmark,13681384 IP_VS_DBG_ADDR(svc->af, &svc->addr),13691385 ntohs(svc->port), atomic_read(&svc->usecnt));13861386+ free_percpu(svc->stats.cpustats);13701387 kfree(svc);13711388 }13721389···14131428/*14141429 * Flush all the virtual services14151430 */14161416-static int ip_vs_flush(void)14311431+static int ip_vs_flush(struct net *net)14171432{14181433 int idx;14191434 struct ip_vs_service *svc, *nxt;1420143514211436 /*14221422- * Flush the service table hashed by <protocol,addr,port>14371437+ * Flush the service table hashed by <netns,protocol,addr,port>14231438 */14241439 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {14251425- list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx], s_list) {14261426- ip_vs_unlink_service(svc);14401440+ list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx],14411441+ s_list) {14421442+ if (net_eq(svc->net, net))14431443+ ip_vs_unlink_service(svc);14271444 }14281445 }14291446···14351448 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {14361449 list_for_each_entry_safe(svc, nxt,14371450 &ip_vs_svc_fwm_table[idx], f_list) {14381438- ip_vs_unlink_service(svc);14511451+ if (net_eq(svc->net, net))14521452+ ip_vs_unlink_service(svc);14391453 }14401454 }14411455···14601472 return 0;14611473}1462147414631463-static int ip_vs_zero_all(void)14751475+static int ip_vs_zero_all(struct net *net)14641476{14651477 int idx;14661478 struct ip_vs_service *svc;1467147914681480 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {14691481 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {14701470- ip_vs_zero_service(svc);14821482+ if (net_eq(svc->net, net))14831483+ ip_vs_zero_service(svc);14711484 }14721485 }1473148614741487 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {14751488 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {14761476- ip_vs_zero_service(svc);14891489+ if (net_eq(svc->net, net))14901490+ ip_vs_zero_service(svc);14771491 }14781492 }1479149314801480- ip_vs_zero_stats(&ip_vs_stats);14941494+ ip_vs_zero_stats(net_ipvs(net)->tot_stats);14811495 return 0;14821496}14831497···14881498proc_do_defense_mode(ctl_table *table, int write,14891499 void __user *buffer, size_t *lenp, loff_t *ppos)14901500{15011501+ struct net *net = current->nsproxy->net_ns;14911502 int *valp = table->data;14921503 int val = *valp;14931504 int rc;···14991508 /* Restore the correct value */15001509 *valp = val;15011510 } else {15021502- update_defense_level();15111511+ update_defense_level(net_ipvs(net));15031512 }15041513 }15051514 return rc;···15251534 return rc;15261535}1527153615371537+static int15381538+proc_do_sync_mode(ctl_table *table, int write,15391539+ void __user *buffer, size_t *lenp, loff_t *ppos)15401540+{15411541+ int *valp = table->data;15421542+ int val = *valp;15431543+ int rc;15441544+15451545+ rc = proc_dointvec(table, write, buffer, lenp, ppos);15461546+ if (write && (*valp != val)) {15471547+ if ((*valp < 0) || (*valp > 1)) {15481548+ /* Restore the correct value */15491549+ *valp = val;15501550+ } else {15511551+ struct net *net = current->nsproxy->net_ns;15521552+ ip_vs_sync_switch_mode(net, val);15531553+ }15541554+ }15551555+ return rc;15561556+}1528155715291558/*15301559 * IPVS sysctl table (under the /proc/sys/net/ipv4/vs/)15601560+ * Do not change order or insert new entries without15611561+ * align with netns init in __ip_vs_control_init()15311562 */1532156315331564static struct ctl_table vs_vars[] = {15341565 {15351566 .procname = "amemthresh",15361536- .data = &sysctl_ip_vs_amemthresh,15671567+ .maxlen = sizeof(int),15681568+ .mode = 0644,15691569+ .proc_handler = proc_dointvec,15701570+ },15711571+ {15721572+ .procname = "am_droprate",15731573+ .maxlen = sizeof(int),15741574+ .mode = 0644,15751575+ .proc_handler = proc_dointvec,15761576+ },15771577+ {15781578+ .procname = "drop_entry",15791579+ .maxlen = sizeof(int),15801580+ .mode = 0644,15811581+ .proc_handler = proc_do_defense_mode,15821582+ },15831583+ {15841584+ .procname = "drop_packet",15851585+ .maxlen = sizeof(int),15861586+ .mode = 0644,15871587+ .proc_handler = proc_do_defense_mode,15881588+ },15891589+#ifdef CONFIG_IP_VS_NFCT15901590+ {15911591+ .procname = "conntrack",15921592+ .maxlen = sizeof(int),15931593+ .mode = 0644,15941594+ .proc_handler = &proc_dointvec,15951595+ },15961596+#endif15971597+ {15981598+ .procname = "secure_tcp",15991599+ .maxlen = sizeof(int),16001600+ .mode = 0644,16011601+ .proc_handler = proc_do_defense_mode,16021602+ },16031603+ {16041604+ .procname = "snat_reroute",16051605+ .maxlen = sizeof(int),16061606+ .mode = 0644,16071607+ .proc_handler = &proc_dointvec,16081608+ },16091609+ {16101610+ .procname = "sync_version",16111611+ .maxlen = sizeof(int),16121612+ .mode = 0644,16131613+ .proc_handler = &proc_do_sync_mode,16141614+ },16151615+ {16161616+ .procname = "cache_bypass",16171617+ .maxlen = sizeof(int),16181618+ .mode = 0644,16191619+ .proc_handler = proc_dointvec,16201620+ },16211621+ {16221622+ .procname = "expire_nodest_conn",16231623+ .maxlen = sizeof(int),16241624+ .mode = 0644,16251625+ .proc_handler = proc_dointvec,16261626+ },16271627+ {16281628+ .procname = "expire_quiescent_template",16291629+ .maxlen = sizeof(int),16301630+ .mode = 0644,16311631+ .proc_handler = proc_dointvec,16321632+ },16331633+ {16341634+ .procname = "sync_threshold",16351635+ .maxlen =16361636+ sizeof(((struct netns_ipvs *)0)->sysctl_sync_threshold),16371637+ .mode = 0644,16381638+ .proc_handler = proc_do_sync_threshold,16391639+ },16401640+ {16411641+ .procname = "nat_icmp_send",15371642 .maxlen = sizeof(int),15381643 .mode = 0644,15391644 .proc_handler = proc_dointvec,···16431556 .proc_handler = proc_dointvec,16441557 },16451558#endif16461646- {16471647- .procname = "am_droprate",16481648- .data = &sysctl_ip_vs_am_droprate,16491649- .maxlen = sizeof(int),16501650- .mode = 0644,16511651- .proc_handler = proc_dointvec,16521652- },16531653- {16541654- .procname = "drop_entry",16551655- .data = &sysctl_ip_vs_drop_entry,16561656- .maxlen = sizeof(int),16571657- .mode = 0644,16581658- .proc_handler = proc_do_defense_mode,16591659- },16601660- {16611661- .procname = "drop_packet",16621662- .data = &sysctl_ip_vs_drop_packet,16631663- .maxlen = sizeof(int),16641664- .mode = 0644,16651665- .proc_handler = proc_do_defense_mode,16661666- },16671667-#ifdef CONFIG_IP_VS_NFCT16681668- {16691669- .procname = "conntrack",16701670- .data = &sysctl_ip_vs_conntrack,16711671- .maxlen = sizeof(int),16721672- .mode = 0644,16731673- .proc_handler = &proc_dointvec,16741674- },16751675-#endif16761676- {16771677- .procname = "secure_tcp",16781678- .data = &sysctl_ip_vs_secure_tcp,16791679- .maxlen = sizeof(int),16801680- .mode = 0644,16811681- .proc_handler = proc_do_defense_mode,16821682- },16831683- {16841684- .procname = "snat_reroute",16851685- .data = &sysctl_ip_vs_snat_reroute,16861686- .maxlen = sizeof(int),16871687- .mode = 0644,16881688- .proc_handler = &proc_dointvec,16891689- },16901559#if 016911560 {16921561 .procname = "timeout_established",···17291686 .proc_handler = proc_dointvec_jiffies,17301687 },17311688#endif17321732- {17331733- .procname = "cache_bypass",17341734- .data = &sysctl_ip_vs_cache_bypass,17351735- .maxlen = sizeof(int),17361736- .mode = 0644,17371737- .proc_handler = proc_dointvec,17381738- },17391739- {17401740- .procname = "expire_nodest_conn",17411741- .data = &sysctl_ip_vs_expire_nodest_conn,17421742- .maxlen = sizeof(int),17431743- .mode = 0644,17441744- .proc_handler = proc_dointvec,17451745- },17461746- {17471747- .procname = "expire_quiescent_template",17481748- .data = &sysctl_ip_vs_expire_quiescent_template,17491749- .maxlen = sizeof(int),17501750- .mode = 0644,17511751- .proc_handler = proc_dointvec,17521752- },17531753- {17541754- .procname = "sync_threshold",17551755- .data = &sysctl_ip_vs_sync_threshold,17561756- .maxlen = sizeof(sysctl_ip_vs_sync_threshold),17571757- .mode = 0644,17581758- .proc_handler = proc_do_sync_threshold,17591759- },17601760- {17611761- .procname = "nat_icmp_send",17621762- .data = &sysctl_ip_vs_nat_icmp_send,17631763- .maxlen = sizeof(int),17641764- .mode = 0644,17651765- .proc_handler = proc_dointvec,17661766- },17671689 { }17681690};17691691···17401732};17411733EXPORT_SYMBOL_GPL(net_vs_ctl_path);1742173417431743-static struct ctl_table_header * sysctl_header;17441744-17451735#ifdef CONFIG_PROC_FS1746173617471737struct ip_vs_iter {17381738+ struct seq_net_private p; /* Do not move this, netns depends upon it*/17481739 struct list_head *table;17491740 int bucket;17501741};···17701763/* Get the Nth entry in the two lists */17711764static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)17721765{17661766+ struct net *net = seq_file_net(seq);17731767 struct ip_vs_iter *iter = seq->private;17741768 int idx;17751769 struct ip_vs_service *svc;···17781770 /* look in hash by protocol */17791771 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {17801772 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {17811781- if (pos-- == 0){17731773+ if (net_eq(svc->net, net) && pos-- == 0) {17821774 iter->table = ip_vs_svc_table;17831775 iter->bucket = idx;17841776 return svc;···17891781 /* keep looking in fwmark */17901782 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {17911783 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {17921792- if (pos-- == 0) {17841784+ if (net_eq(svc->net, net) && pos-- == 0) {17931785 iter->table = ip_vs_svc_fwm_table;17941786 iter->bucket = idx;17951787 return svc;···1943193519441936static int ip_vs_info_open(struct inode *inode, struct file *file)19451937{19461946- return seq_open_private(file, &ip_vs_info_seq_ops,19381938+ return seq_open_net(inode, file, &ip_vs_info_seq_ops,19471939 sizeof(struct ip_vs_iter));19481940}19491941···1957194919581950#endif1959195119601960-struct ip_vs_stats ip_vs_stats = {19611961- .lock = __SPIN_LOCK_UNLOCKED(ip_vs_stats.lock),19621962-};19631963-19641952#ifdef CONFIG_PROC_FS19651953static int ip_vs_stats_show(struct seq_file *seq, void *v)19661954{19551955+ struct net *net = seq_file_single_net(seq);19561956+ struct ip_vs_stats *tot_stats = net_ipvs(net)->tot_stats;1967195719681958/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */19691959 seq_puts(seq,···19691963 seq_printf(seq,19701964 " Conns Packets Packets Bytes Bytes\n");1971196519721972- spin_lock_bh(&ip_vs_stats.lock);19731973- seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", ip_vs_stats.ustats.conns,19741974- ip_vs_stats.ustats.inpkts, ip_vs_stats.ustats.outpkts,19751975- (unsigned long long) ip_vs_stats.ustats.inbytes,19761976- (unsigned long long) ip_vs_stats.ustats.outbytes);19661966+ spin_lock_bh(&tot_stats->lock);19671967+ seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", tot_stats->ustats.conns,19681968+ tot_stats->ustats.inpkts, tot_stats->ustats.outpkts,19691969+ (unsigned long long) tot_stats->ustats.inbytes,19701970+ (unsigned long long) tot_stats->ustats.outbytes);1977197119781972/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */19791973 seq_puts(seq,19801974 " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");19811975 seq_printf(seq,"%8X %8X %8X %16X %16X\n",19821982- ip_vs_stats.ustats.cps,19831983- ip_vs_stats.ustats.inpps,19841984- ip_vs_stats.ustats.outpps,19851985- ip_vs_stats.ustats.inbps,19861986- ip_vs_stats.ustats.outbps);19871987- spin_unlock_bh(&ip_vs_stats.lock);19761976+ tot_stats->ustats.cps,19771977+ tot_stats->ustats.inpps,19781978+ tot_stats->ustats.outpps,19791979+ tot_stats->ustats.inbps,19801980+ tot_stats->ustats.outbps);19811981+ spin_unlock_bh(&tot_stats->lock);1988198219891983 return 0;19901984}1991198519921986static int ip_vs_stats_seq_open(struct inode *inode, struct file *file)19931987{19941994- return single_open(file, ip_vs_stats_show, NULL);19881988+ return single_open_net(inode, file, ip_vs_stats_show);19951989}1996199019971991static const struct file_operations ip_vs_stats_fops = {···20021996 .release = single_release,20031997};2004199819991999+static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v)20002000+{20012001+ struct net *net = seq_file_single_net(seq);20022002+ struct ip_vs_stats *tot_stats = net_ipvs(net)->tot_stats;20032003+ int i;20042004+20052005+/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */20062006+ seq_puts(seq,20072007+ " Total Incoming Outgoing Incoming Outgoing\n");20082008+ seq_printf(seq,20092009+ "CPU Conns Packets Packets Bytes Bytes\n");20102010+20112011+ for_each_possible_cpu(i) {20122012+ struct ip_vs_cpu_stats *u = per_cpu_ptr(net->ipvs->cpustats, i);20132013+ seq_printf(seq, "%3X %8X %8X %8X %16LX %16LX\n",20142014+ i, u->ustats.conns, u->ustats.inpkts,20152015+ u->ustats.outpkts, (__u64)u->ustats.inbytes,20162016+ (__u64)u->ustats.outbytes);20172017+ }20182018+20192019+ spin_lock_bh(&tot_stats->lock);20202020+ seq_printf(seq, " ~ %8X %8X %8X %16LX %16LX\n\n",20212021+ tot_stats->ustats.conns, tot_stats->ustats.inpkts,20222022+ tot_stats->ustats.outpkts,20232023+ (unsigned long long) tot_stats->ustats.inbytes,20242024+ (unsigned long long) tot_stats->ustats.outbytes);20252025+20262026+/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */20272027+ seq_puts(seq,20282028+ " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");20292029+ seq_printf(seq, " %8X %8X %8X %16X %16X\n",20302030+ tot_stats->ustats.cps,20312031+ tot_stats->ustats.inpps,20322032+ tot_stats->ustats.outpps,20332033+ tot_stats->ustats.inbps,20342034+ tot_stats->ustats.outbps);20352035+ spin_unlock_bh(&tot_stats->lock);20362036+20372037+ return 0;20382038+}20392039+20402040+static int ip_vs_stats_percpu_seq_open(struct inode *inode, struct file *file)20412041+{20422042+ return single_open_net(inode, file, ip_vs_stats_percpu_show);20432043+}20442044+20452045+static const struct file_operations ip_vs_stats_percpu_fops = {20462046+ .owner = THIS_MODULE,20472047+ .open = ip_vs_stats_percpu_seq_open,20482048+ .read = seq_read,20492049+ .llseek = seq_lseek,20502050+ .release = single_release,20512051+};20052052#endif2006205320072054/*20082055 * Set timeout values for tcp tcpfin udp in the timeout_table.20092056 */20102010-static int ip_vs_set_timeout(struct ip_vs_timeout_user *u)20572057+static int ip_vs_set_timeout(struct net *net, struct ip_vs_timeout_user *u)20112058{20592059+ struct ip_vs_proto_data *pd;20602060+20122061 IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",20132062 u->tcp_timeout,20142063 u->tcp_fin_timeout,···2071201020722011#ifdef CONFIG_IP_VS_PROTO_TCP20732012 if (u->tcp_timeout) {20742074- ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED]20132013+ pd = ip_vs_proto_data_get(net, IPPROTO_TCP);20142014+ pd->timeout_table[IP_VS_TCP_S_ESTABLISHED]20752015 = u->tcp_timeout * HZ;20762016 }2077201720782018 if (u->tcp_fin_timeout) {20792079- ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT]20192019+ pd = ip_vs_proto_data_get(net, IPPROTO_TCP);20202020+ pd->timeout_table[IP_VS_TCP_S_FIN_WAIT]20802021 = u->tcp_fin_timeout * HZ;20812022 }20822023#endif2083202420842025#ifdef CONFIG_IP_VS_PROTO_UDP20852026 if (u->udp_timeout) {20862086- ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL]20272027+ pd = ip_vs_proto_data_get(net, IPPROTO_UDP);20282028+ pd->timeout_table[IP_VS_UDP_S_NORMAL]20872029 = u->udp_timeout * HZ;20882030 }20892031#endif···21512087static int21522088do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)21532089{20902090+ struct net *net = sock_net(sk);21542091 int ret;21552092 unsigned char arg[MAX_ARG_LEN];21562093 struct ip_vs_service_user *usvc_compat;···2186212121872122 if (cmd == IP_VS_SO_SET_FLUSH) {21882123 /* Flush the virtual service */21892189- ret = ip_vs_flush();21242124+ ret = ip_vs_flush(net);21902125 goto out_unlock;21912126 } else if (cmd == IP_VS_SO_SET_TIMEOUT) {21922127 /* Set timeout values for (tcp tcpfin udp) */21932193- ret = ip_vs_set_timeout((struct ip_vs_timeout_user *)arg);21282128+ ret = ip_vs_set_timeout(net, (struct ip_vs_timeout_user *)arg);21942129 goto out_unlock;21952130 } else if (cmd == IP_VS_SO_SET_STARTDAEMON) {21962131 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;21972197- ret = start_sync_thread(dm->state, dm->mcast_ifn, dm->syncid);21322132+ ret = start_sync_thread(net, dm->state, dm->mcast_ifn,21332133+ dm->syncid);21982134 goto out_unlock;21992135 } else if (cmd == IP_VS_SO_SET_STOPDAEMON) {22002136 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;22012201- ret = stop_sync_thread(dm->state);21372137+ ret = stop_sync_thread(net, dm->state);22022138 goto out_unlock;22032139 }22042140···22142148 if (cmd == IP_VS_SO_SET_ZERO) {22152149 /* if no service address is set, zero counters in all */22162150 if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) {22172217- ret = ip_vs_zero_all();21512151+ ret = ip_vs_zero_all(net);22182152 goto out_unlock;22192153 }22202154 }···2231216522322166 /* Lookup the exact service by <protocol, addr, port> or fwmark */22332167 if (usvc.fwmark == 0)22342234- svc = __ip_vs_service_find(usvc.af, usvc.protocol,21682168+ svc = __ip_vs_service_find(net, usvc.af, usvc.protocol,22352169 &usvc.addr, usvc.port);22362170 else22372237- svc = __ip_vs_svc_fwm_find(usvc.af, usvc.fwmark);21712171+ svc = __ip_vs_svc_fwm_find(net, usvc.af, usvc.fwmark);2238217222392173 if (cmd != IP_VS_SO_SET_ADD22402174 && (svc == NULL || svc->protocol != usvc.protocol)) {···22472181 if (svc != NULL)22482182 ret = -EEXIST;22492183 else22502250- ret = ip_vs_add_service(&usvc, &svc);21842184+ ret = ip_vs_add_service(net, &usvc, &svc);22512185 break;22522186 case IP_VS_SO_SET_EDIT:22532187 ret = ip_vs_edit_service(svc, &usvc);···23072241}2308224223092243static inline int23102310-__ip_vs_get_service_entries(const struct ip_vs_get_services *get,22442244+__ip_vs_get_service_entries(struct net *net,22452245+ const struct ip_vs_get_services *get,23112246 struct ip_vs_get_services __user *uptr)23122247{23132248 int idx, count=0;···23192252 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {23202253 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {23212254 /* Only expose IPv4 entries to old interface */23222322- if (svc->af != AF_INET)22552255+ if (svc->af != AF_INET || !net_eq(svc->net, net))23232256 continue;2324225723252258 if (count >= get->num_services)···23382271 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {23392272 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {23402273 /* Only expose IPv4 entries to old interface */23412341- if (svc->af != AF_INET)22742274+ if (svc->af != AF_INET || !net_eq(svc->net, net))23422275 continue;2343227623442277 if (count >= get->num_services)···23582291}2359229223602293static inline int23612361-__ip_vs_get_dest_entries(const struct ip_vs_get_dests *get,22942294+__ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get,23622295 struct ip_vs_get_dests __user *uptr)23632296{23642297 struct ip_vs_service *svc;···23662299 int ret = 0;2367230023682301 if (get->fwmark)23692369- svc = __ip_vs_svc_fwm_find(AF_INET, get->fwmark);23022302+ svc = __ip_vs_svc_fwm_find(net, AF_INET, get->fwmark);23702303 else23712371- svc = __ip_vs_service_find(AF_INET, get->protocol, &addr,23042304+ svc = __ip_vs_service_find(net, AF_INET, get->protocol, &addr,23722305 get->port);2373230623742307 if (svc) {···24032336}2404233724052338static inline void24062406-__ip_vs_get_timeouts(struct ip_vs_timeout_user *u)23392339+__ip_vs_get_timeouts(struct net *net, struct ip_vs_timeout_user *u)24072340{23412341+ struct ip_vs_proto_data *pd;23422342+24082343#ifdef CONFIG_IP_VS_PROTO_TCP24092409- u->tcp_timeout =24102410- ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;24112411- u->tcp_fin_timeout =24122412- ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;23442344+ pd = ip_vs_proto_data_get(net, IPPROTO_TCP);23452345+ u->tcp_timeout = pd->timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;23462346+ u->tcp_fin_timeout = pd->timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;24132347#endif24142348#ifdef CONFIG_IP_VS_PROTO_UDP23492349+ pd = ip_vs_proto_data_get(net, IPPROTO_UDP);24152350 u->udp_timeout =24162416- ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL] / HZ;23512351+ pd->timeout_table[IP_VS_UDP_S_NORMAL] / HZ;24172352#endif24182353}24192354···24442375 unsigned char arg[128];24452376 int ret = 0;24462377 unsigned int copylen;23782378+ struct net *net = sock_net(sk);23792379+ struct netns_ipvs *ipvs = net_ipvs(net);2447238023812381+ BUG_ON(!net);24482382 if (!capable(CAP_NET_ADMIN))24492383 return -EPERM;24502384···24902418 struct ip_vs_getinfo info;24912419 info.version = IP_VS_VERSION_CODE;24922420 info.size = ip_vs_conn_tab_size;24932493- info.num_services = ip_vs_num_services;24212421+ info.num_services = ipvs->num_services;24942422 if (copy_to_user(user, &info, sizeof(info)) != 0)24952423 ret = -EFAULT;24962424 }···25092437 ret = -EINVAL;25102438 goto out;25112439 }25122512- ret = __ip_vs_get_service_entries(get, user);24402440+ ret = __ip_vs_get_service_entries(net, get, user);25132441 }25142442 break;25152443···25222450 entry = (struct ip_vs_service_entry *)arg;25232451 addr.ip = entry->addr;25242452 if (entry->fwmark)25252525- svc = __ip_vs_svc_fwm_find(AF_INET, entry->fwmark);24532453+ svc = __ip_vs_svc_fwm_find(net, AF_INET, entry->fwmark);25262454 else25272527- svc = __ip_vs_service_find(AF_INET, entry->protocol,25282528- &addr, entry->port);24552455+ svc = __ip_vs_service_find(net, AF_INET,24562456+ entry->protocol, &addr,24572457+ entry->port);25292458 if (svc) {25302459 ip_vs_copy_service(entry, svc);25312460 if (copy_to_user(user, entry, sizeof(*entry)) != 0)···25492476 ret = -EINVAL;25502477 goto out;25512478 }25522552- ret = __ip_vs_get_dest_entries(get, user);24792479+ ret = __ip_vs_get_dest_entries(net, get, user);25532480 }25542481 break;25552482···25572484 {25582485 struct ip_vs_timeout_user t;2559248625602560- __ip_vs_get_timeouts(&t);24872487+ __ip_vs_get_timeouts(net, &t);25612488 if (copy_to_user(user, &t, sizeof(t)) != 0)25622489 ret = -EFAULT;25632490 }···25682495 struct ip_vs_daemon_user d[2];2569249625702497 memset(&d, 0, sizeof(d));25712571- if (ip_vs_sync_state & IP_VS_STATE_MASTER) {24982498+ if (ipvs->sync_state & IP_VS_STATE_MASTER) {25722499 d[0].state = IP_VS_STATE_MASTER;25732573- strlcpy(d[0].mcast_ifn, ip_vs_master_mcast_ifn, sizeof(d[0].mcast_ifn));25742574- d[0].syncid = ip_vs_master_syncid;25002500+ strlcpy(d[0].mcast_ifn, ipvs->master_mcast_ifn,25012501+ sizeof(d[0].mcast_ifn));25022502+ d[0].syncid = ipvs->master_syncid;25752503 }25762576- if (ip_vs_sync_state & IP_VS_STATE_BACKUP) {25042504+ if (ipvs->sync_state & IP_VS_STATE_BACKUP) {25772505 d[1].state = IP_VS_STATE_BACKUP;25782578- strlcpy(d[1].mcast_ifn, ip_vs_backup_mcast_ifn, sizeof(d[1].mcast_ifn));25792579- d[1].syncid = ip_vs_backup_syncid;25062506+ strlcpy(d[1].mcast_ifn, ipvs->backup_mcast_ifn,25072507+ sizeof(d[1].mcast_ifn));25082508+ d[1].syncid = ipvs->backup_syncid;25802509 }25812510 if (copy_to_user(user, &d, sizeof(d)) != 0)25822511 ret = -EFAULT;···26172542 .name = IPVS_GENL_NAME,26182543 .version = IPVS_GENL_VERSION,26192544 .maxattr = IPVS_CMD_MAX,25452545+ .netnsok = true, /* Make ipvsadm to work on netns */26202546};2621254726222548/* Policy used for first-level command attributes */···27722696 int idx = 0, i;27732697 int start = cb->args[0];27742698 struct ip_vs_service *svc;26992699+ struct net *net = skb_sknet(skb);2775270027762701 mutex_lock(&__ip_vs_mutex);27772702 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {27782703 list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {27792779- if (++idx <= start)27042704+ if (++idx <= start || !net_eq(svc->net, net))27802705 continue;27812706 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {27822707 idx--;···2788271127892712 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {27902713 list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {27912791- if (++idx <= start)27142714+ if (++idx <= start || !net_eq(svc->net, net))27922715 continue;27932716 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {27942717 idx--;···28042727 return skb->len;28052728}2806272928072807-static int ip_vs_genl_parse_service(struct ip_vs_service_user_kern *usvc,27302730+static int ip_vs_genl_parse_service(struct net *net,27312731+ struct ip_vs_service_user_kern *usvc,28082732 struct nlattr *nla, int full_entry,28092733 struct ip_vs_service **ret_svc)28102734{···28482770 }2849277128502772 if (usvc->fwmark)28512851- svc = __ip_vs_svc_fwm_find(usvc->af, usvc->fwmark);27732773+ svc = __ip_vs_svc_fwm_find(net, usvc->af, usvc->fwmark);28522774 else28532853- svc = __ip_vs_service_find(usvc->af, usvc->protocol,27752775+ svc = __ip_vs_service_find(net, usvc->af, usvc->protocol,28542776 &usvc->addr, usvc->port);28552777 *ret_svc = svc;28562778···28872809 return 0;28882810}2889281128902890-static struct ip_vs_service *ip_vs_genl_find_service(struct nlattr *nla)28122812+static struct ip_vs_service *ip_vs_genl_find_service(struct net *net,28132813+ struct nlattr *nla)28912814{28922815 struct ip_vs_service_user_kern usvc;28932816 struct ip_vs_service *svc;28942817 int ret;2895281828962896- ret = ip_vs_genl_parse_service(&usvc, nla, 0, &svc);28192819+ ret = ip_vs_genl_parse_service(net, &usvc, nla, 0, &svc);28972820 return ret ? ERR_PTR(ret) : svc;28982821}28992822···29622883 struct ip_vs_service *svc;29632884 struct ip_vs_dest *dest;29642885 struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1];28862886+ struct net *net = skb_sknet(skb);2965288729662888 mutex_lock(&__ip_vs_mutex);29672889···29712891 IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy))29722892 goto out_err;2973289329742974- svc = ip_vs_genl_find_service(attrs[IPVS_CMD_ATTR_SERVICE]);28942894+28952895+ svc = ip_vs_genl_find_service(net, attrs[IPVS_CMD_ATTR_SERVICE]);29752896 if (IS_ERR(svc) || svc == NULL)29762897 goto out_err;29772898···30863005static int ip_vs_genl_dump_daemons(struct sk_buff *skb,30873006 struct netlink_callback *cb)30883007{30083008+ struct net *net = skb_net(skb);30093009+ struct netns_ipvs *ipvs = net_ipvs(net);30103010+30893011 mutex_lock(&__ip_vs_mutex);30903090- if ((ip_vs_sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {30123012+ if ((ipvs->sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {30913013 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER,30923092- ip_vs_master_mcast_ifn,30933093- ip_vs_master_syncid, cb) < 0)30143014+ ipvs->master_mcast_ifn,30153015+ ipvs->master_syncid, cb) < 0)30943016 goto nla_put_failure;3095301730963018 cb->args[0] = 1;30973019 }3098302030993099- if ((ip_vs_sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {30213021+ if ((ipvs->sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {31003022 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP,31013101- ip_vs_backup_mcast_ifn,31023102- ip_vs_backup_syncid, cb) < 0)30233023+ ipvs->backup_mcast_ifn,30243024+ ipvs->backup_syncid, cb) < 0)31033025 goto nla_put_failure;3104302631053027 cb->args[1] = 1;···31143030 return skb->len;31153031}3116303231173117-static int ip_vs_genl_new_daemon(struct nlattr **attrs)30333033+static int ip_vs_genl_new_daemon(struct net *net, struct nlattr **attrs)31183034{31193035 if (!(attrs[IPVS_DAEMON_ATTR_STATE] &&31203036 attrs[IPVS_DAEMON_ATTR_MCAST_IFN] &&31213037 attrs[IPVS_DAEMON_ATTR_SYNC_ID]))31223038 return -EINVAL;3123303931243124- return start_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),30403040+ return start_sync_thread(net,30413041+ nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),31253042 nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),31263043 nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]));31273044}3128304531293129-static int ip_vs_genl_del_daemon(struct nlattr **attrs)30463046+static int ip_vs_genl_del_daemon(struct net *net, struct nlattr **attrs)31303047{31313048 if (!attrs[IPVS_DAEMON_ATTR_STATE])31323049 return -EINVAL;3133305031343134- return stop_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));30513051+ return stop_sync_thread(net,30523052+ nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));31353053}3136305431373137-static int ip_vs_genl_set_config(struct nlattr **attrs)30553055+static int ip_vs_genl_set_config(struct net *net, struct nlattr **attrs)31383056{31393057 struct ip_vs_timeout_user t;3140305831413141- __ip_vs_get_timeouts(&t);30593059+ __ip_vs_get_timeouts(net, &t);3142306031433061 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP])31443062 t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]);···31523066 if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP])31533067 t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]);3154306831553155- return ip_vs_set_timeout(&t);30693069+ return ip_vs_set_timeout(net, &t);31563070}3157307131583072static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)···31623076 struct ip_vs_dest_user_kern udest;31633077 int ret = 0, cmd;31643078 int need_full_svc = 0, need_full_dest = 0;30793079+ struct net *net;30803080+ struct netns_ipvs *ipvs;3165308130823082+ net = skb_sknet(skb);30833083+ ipvs = net_ipvs(net);31663084 cmd = info->genlhdr->cmd;3167308531683086 mutex_lock(&__ip_vs_mutex);3169308731703088 if (cmd == IPVS_CMD_FLUSH) {31713171- ret = ip_vs_flush();30893089+ ret = ip_vs_flush(net);31723090 goto out;31733091 } else if (cmd == IPVS_CMD_SET_CONFIG) {31743174- ret = ip_vs_genl_set_config(info->attrs);30923092+ ret = ip_vs_genl_set_config(net, info->attrs);31753093 goto out;31763094 } else if (cmd == IPVS_CMD_NEW_DAEMON ||31773095 cmd == IPVS_CMD_DEL_DAEMON) {···31913101 }3192310231933103 if (cmd == IPVS_CMD_NEW_DAEMON)31943194- ret = ip_vs_genl_new_daemon(daemon_attrs);31043104+ ret = ip_vs_genl_new_daemon(net, daemon_attrs);31953105 else31963196- ret = ip_vs_genl_del_daemon(daemon_attrs);31063106+ ret = ip_vs_genl_del_daemon(net, daemon_attrs);31973107 goto out;31983108 } else if (cmd == IPVS_CMD_ZERO &&31993109 !info->attrs[IPVS_CMD_ATTR_SERVICE]) {32003200- ret = ip_vs_zero_all();31103110+ ret = ip_vs_zero_all(net);32013111 goto out;32023112 }32033113···32073117 if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE)32083118 need_full_svc = 1;3209311932103210- ret = ip_vs_genl_parse_service(&usvc,31203120+ ret = ip_vs_genl_parse_service(net, &usvc,32113121 info->attrs[IPVS_CMD_ATTR_SERVICE],32123122 need_full_svc, &svc);32133123 if (ret)···32373147 switch (cmd) {32383148 case IPVS_CMD_NEW_SERVICE:32393149 if (svc == NULL)32403240- ret = ip_vs_add_service(&usvc, &svc);31503150+ ret = ip_vs_add_service(net, &usvc, &svc);32413151 else32423152 ret = -EEXIST;32433153 break;···32753185 struct sk_buff *msg;32763186 void *reply;32773187 int ret, cmd, reply_cmd;31883188+ struct net *net;31893189+ struct netns_ipvs *ipvs;3278319031913191+ net = skb_sknet(skb);31923192+ ipvs = net_ipvs(net);32793193 cmd = info->genlhdr->cmd;3280319432813195 if (cmd == IPVS_CMD_GET_SERVICE)···33083214 {33093215 struct ip_vs_service *svc;3310321633113311- svc = ip_vs_genl_find_service(info->attrs[IPVS_CMD_ATTR_SERVICE]);32173217+ svc = ip_vs_genl_find_service(net,32183218+ info->attrs[IPVS_CMD_ATTR_SERVICE]);33123219 if (IS_ERR(svc)) {33133220 ret = PTR_ERR(svc);33143221 goto out_err;···33293234 {33303235 struct ip_vs_timeout_user t;3331323633323332- __ip_vs_get_timeouts(&t);32373237+ __ip_vs_get_timeouts(net, &t);33333238#ifdef CONFIG_IP_VS_PROTO_TCP33343239 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, t.tcp_timeout);33353240 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN,···3475338034763381/* End of Generic Netlink interface definitions */3477338233833383+/*33843384+ * per netns intit/exit func.33853385+ */33863386+int __net_init __ip_vs_control_init(struct net *net)33873387+{33883388+ int idx;33893389+ struct netns_ipvs *ipvs = net_ipvs(net);33903390+ struct ctl_table *tbl;33913391+33923392+ atomic_set(&ipvs->dropentry, 0);33933393+ spin_lock_init(&ipvs->dropentry_lock);33943394+ spin_lock_init(&ipvs->droppacket_lock);33953395+ spin_lock_init(&ipvs->securetcp_lock);33963396+ ipvs->rs_lock = __RW_LOCK_UNLOCKED(ipvs->rs_lock);33973397+33983398+ /* Initialize rs_table */33993399+ for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++)34003400+ INIT_LIST_HEAD(&ipvs->rs_table[idx]);34013401+34023402+ INIT_LIST_HEAD(&ipvs->dest_trash);34033403+ atomic_set(&ipvs->ftpsvc_counter, 0);34043404+ atomic_set(&ipvs->nullsvc_counter, 0);34053405+34063406+ /* procfs stats */34073407+ ipvs->tot_stats = kzalloc(sizeof(struct ip_vs_stats), GFP_KERNEL);34083408+ if (ipvs->tot_stats == NULL) {34093409+ pr_err("%s(): no memory.\n", __func__);34103410+ return -ENOMEM;34113411+ }34123412+ ipvs->cpustats = alloc_percpu(struct ip_vs_cpu_stats);34133413+ if (!ipvs->cpustats) {34143414+ pr_err("%s() alloc_percpu failed\n", __func__);34153415+ goto err_alloc;34163416+ }34173417+ spin_lock_init(&ipvs->tot_stats->lock);34183418+34193419+ for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++)34203420+ INIT_LIST_HEAD(&ipvs->rs_table[idx]);34213421+34223422+ proc_net_fops_create(net, "ip_vs", 0, &ip_vs_info_fops);34233423+ proc_net_fops_create(net, "ip_vs_stats", 0, &ip_vs_stats_fops);34243424+ proc_net_fops_create(net, "ip_vs_stats_percpu", 0,34253425+ &ip_vs_stats_percpu_fops);34263426+34273427+ if (!net_eq(net, &init_net)) {34283428+ tbl = kmemdup(vs_vars, sizeof(vs_vars), GFP_KERNEL);34293429+ if (tbl == NULL)34303430+ goto err_dup;34313431+ } else34323432+ tbl = vs_vars;34333433+ /* Initialize sysctl defaults */34343434+ idx = 0;34353435+ ipvs->sysctl_amemthresh = 1024;34363436+ tbl[idx++].data = &ipvs->sysctl_amemthresh;34373437+ ipvs->sysctl_am_droprate = 10;34383438+ tbl[idx++].data = &ipvs->sysctl_am_droprate;34393439+ tbl[idx++].data = &ipvs->sysctl_drop_entry;34403440+ tbl[idx++].data = &ipvs->sysctl_drop_packet;34413441+#ifdef CONFIG_IP_VS_NFCT34423442+ tbl[idx++].data = &ipvs->sysctl_conntrack;34433443+#endif34443444+ tbl[idx++].data = &ipvs->sysctl_secure_tcp;34453445+ ipvs->sysctl_snat_reroute = 1;34463446+ tbl[idx++].data = &ipvs->sysctl_snat_reroute;34473447+ ipvs->sysctl_sync_ver = 1;34483448+ tbl[idx++].data = &ipvs->sysctl_sync_ver;34493449+ tbl[idx++].data = &ipvs->sysctl_cache_bypass;34503450+ tbl[idx++].data = &ipvs->sysctl_expire_nodest_conn;34513451+ tbl[idx++].data = &ipvs->sysctl_expire_quiescent_template;34523452+ ipvs->sysctl_sync_threshold[0] = 3;34533453+ ipvs->sysctl_sync_threshold[1] = 50;34543454+ tbl[idx].data = &ipvs->sysctl_sync_threshold;34553455+ tbl[idx++].maxlen = sizeof(ipvs->sysctl_sync_threshold);34563456+ tbl[idx++].data = &ipvs->sysctl_nat_icmp_send;34573457+34583458+34593459+ ipvs->sysctl_hdr = register_net_sysctl_table(net, net_vs_ctl_path,34603460+ vs_vars);34613461+ if (ipvs->sysctl_hdr == NULL)34623462+ goto err_reg;34633463+ ip_vs_new_estimator(net, ipvs->tot_stats);34643464+ ipvs->sysctl_tbl = tbl;34653465+ /* Schedule defense work */34663466+ INIT_DELAYED_WORK(&ipvs->defense_work, defense_work_handler);34673467+ schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);34683468+ return 0;34693469+34703470+err_reg:34713471+ if (!net_eq(net, &init_net))34723472+ kfree(tbl);34733473+err_dup:34743474+ free_percpu(ipvs->cpustats);34753475+err_alloc:34763476+ kfree(ipvs->tot_stats);34773477+ return -ENOMEM;34783478+}34793479+34803480+static void __net_exit __ip_vs_control_cleanup(struct net *net)34813481+{34823482+ struct netns_ipvs *ipvs = net_ipvs(net);34833483+34843484+ ip_vs_trash_cleanup(net);34853485+ ip_vs_kill_estimator(net, ipvs->tot_stats);34863486+ cancel_delayed_work_sync(&ipvs->defense_work);34873487+ cancel_work_sync(&ipvs->defense_work.work);34883488+ unregister_net_sysctl_table(ipvs->sysctl_hdr);34893489+ proc_net_remove(net, "ip_vs_stats_percpu");34903490+ proc_net_remove(net, "ip_vs_stats");34913491+ proc_net_remove(net, "ip_vs");34923492+ free_percpu(ipvs->cpustats);34933493+ kfree(ipvs->tot_stats);34943494+}34953495+34963496+static struct pernet_operations ipvs_control_ops = {34973497+ .init = __ip_vs_control_init,34983498+ .exit = __ip_vs_control_cleanup,34993499+};3478350034793501int __init ip_vs_control_init(void)34803502{34813481- int ret;34823503 int idx;35043504+ int ret;3483350534843506 EnterFunction(2);3485350734863486- /* Initialize ip_vs_svc_table, ip_vs_svc_fwm_table, ip_vs_rtable */35083508+ /* Initialize svc_table, ip_vs_svc_fwm_table, rs_table */34873509 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {34883510 INIT_LIST_HEAD(&ip_vs_svc_table[idx]);34893511 INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);34903512 }34913491- for(idx = 0; idx < IP_VS_RTAB_SIZE; idx++) {34923492- INIT_LIST_HEAD(&ip_vs_rtable[idx]);35133513+35143514+ ret = register_pernet_subsys(&ipvs_control_ops);35153515+ if (ret) {35163516+ pr_err("cannot register namespace.\n");35173517+ goto err;34933518 }34943494- smp_wmb();35193519+35203520+ smp_wmb(); /* Do we really need it now ? */3495352134963522 ret = nf_register_sockopt(&ip_vs_sockopts);34973523 if (ret) {34983524 pr_err("cannot register sockopt.\n");34993499- return ret;35253525+ goto err_net;35003526 }3501352735023528 ret = ip_vs_genl_register();35033529 if (ret) {35043530 pr_err("cannot register Generic Netlink interface.\n");35053531 nf_unregister_sockopt(&ip_vs_sockopts);35063506- return ret;35323532+ goto err_net;35073533 }35083508-35093509- proc_net_fops_create(&init_net, "ip_vs", 0, &ip_vs_info_fops);35103510- proc_net_fops_create(&init_net, "ip_vs_stats",0, &ip_vs_stats_fops);35113511-35123512- sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars);35133513-35143514- ip_vs_new_estimator(&ip_vs_stats);35153515-35163516- /* Hook the defense timer */35173517- schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);3518353435193535 LeaveFunction(2);35203536 return 0;35373537+35383538+err_net:35393539+ unregister_pernet_subsys(&ipvs_control_ops);35403540+err:35413541+ return ret;35213542}352235433523354435243545void ip_vs_control_cleanup(void)35253546{35263547 EnterFunction(2);35273527- ip_vs_trash_cleanup();35283528- cancel_delayed_work_sync(&defense_work);35293529- cancel_work_sync(&defense_work.work);35303530- ip_vs_kill_estimator(&ip_vs_stats);35313531- unregister_sysctl_table(sysctl_header);35323532- proc_net_remove(&init_net, "ip_vs_stats");35333533- proc_net_remove(&init_net, "ip_vs");35483548+ unregister_pernet_subsys(&ipvs_control_ops);35343549 ip_vs_genl_unregister();35353550 nf_unregister_sockopt(&ip_vs_sockopts);35363551 LeaveFunction(2);
···157157 int ret = 0;158158 enum ip_conntrack_info ctinfo;159159 struct nf_conn *ct;160160+ struct net *net;160161161162#ifdef CONFIG_IP_VS_IPV6162163 /* This application helper doesn't work with IPv6 yet,···198197 */199198 {200199 struct ip_vs_conn_param p;201201- ip_vs_conn_fill_param(AF_INET, iph->protocol,202202- &from, port, &cp->caddr, 0, &p);200200+ ip_vs_conn_fill_param(ip_vs_conn_net(cp), AF_INET,201201+ iph->protocol, &from, port,202202+ &cp->caddr, 0, &p);203203 n_cp = ip_vs_conn_out_get(&p);204204 }205205 if (!n_cp) {206206 struct ip_vs_conn_param p;207207- ip_vs_conn_fill_param(AF_INET, IPPROTO_TCP, &cp->caddr,207207+ ip_vs_conn_fill_param(ip_vs_conn_net(cp),208208+ AF_INET, IPPROTO_TCP, &cp->caddr,208209 0, &cp->vaddr, port, &p);209210 n_cp = ip_vs_conn_new(&p, &from, port,210211 IP_VS_CONN_F_NO_CPORT |211212 IP_VS_CONN_F_NFCT,212212- cp->dest);213213+ cp->dest, skb->mark);213214 if (!n_cp)214215 return 0;215216···260257 * would be adjusted twice.261258 */262259260260+ net = skb_net(skb);263261 cp->app_data = NULL;264264- ip_vs_tcp_conn_listen(n_cp);262262+ ip_vs_tcp_conn_listen(net, n_cp);265263 ip_vs_conn_put(n_cp);266264 return ret;267265 }···291287 union nf_inet_addr to;292288 __be16 port;293289 struct ip_vs_conn *n_cp;290290+ struct net *net;294291295292#ifdef CONFIG_IP_VS_IPV6296293 /* This application helper doesn't work with IPv6 yet,···363358364359 {365360 struct ip_vs_conn_param p;366366- ip_vs_conn_fill_param(AF_INET, iph->protocol, &to, port,367367- &cp->vaddr, htons(ntohs(cp->vport)-1),368368- &p);361361+ ip_vs_conn_fill_param(ip_vs_conn_net(cp), AF_INET,362362+ iph->protocol, &to, port, &cp->vaddr,363363+ htons(ntohs(cp->vport)-1), &p);369364 n_cp = ip_vs_conn_in_get(&p);370365 if (!n_cp) {371366 n_cp = ip_vs_conn_new(&p, &cp->daddr,372367 htons(ntohs(cp->dport)-1),373373- IP_VS_CONN_F_NFCT, cp->dest);368368+ IP_VS_CONN_F_NFCT, cp->dest,369369+ skb->mark);374370 if (!n_cp)375371 return 0;376372···383377 /*384378 * Move tunnel to listen state385379 */386386- ip_vs_tcp_conn_listen(n_cp);380380+ net = skb_net(skb);381381+ ip_vs_tcp_conn_listen(net, n_cp);387382 ip_vs_conn_put(n_cp);388383389384 return 1;···405398 .pkt_in = ip_vs_ftp_in,406399};407400408408-409401/*410410- * ip_vs_ftp initialization402402+ * per netns ip_vs_ftp initialization411403 */412412-static int __init ip_vs_ftp_init(void)404404+static int __net_init __ip_vs_ftp_init(struct net *net)413405{414406 int i, ret;415407 struct ip_vs_app *app = &ip_vs_ftp;416408417417- ret = register_ip_vs_app(app);409409+ ret = register_ip_vs_app(net, app);418410 if (ret)419411 return ret;420412421413 for (i=0; i<IP_VS_APP_MAX_PORTS; i++) {422414 if (!ports[i])423415 continue;424424- ret = register_ip_vs_app_inc(app, app->protocol, ports[i]);416416+ ret = register_ip_vs_app_inc(net, app, app->protocol, ports[i]);425417 if (ret)426418 break;427419 pr_info("%s: loaded support on port[%d] = %d\n",···428422 }429423430424 if (ret)431431- unregister_ip_vs_app(app);425425+ unregister_ip_vs_app(net, app);432426433427 return ret;434428}429429+/*430430+ * netns exit431431+ */432432+static void __ip_vs_ftp_exit(struct net *net)433433+{434434+ struct ip_vs_app *app = &ip_vs_ftp;435435436436+ unregister_ip_vs_app(net, app);437437+}438438+439439+static struct pernet_operations ip_vs_ftp_ops = {440440+ .init = __ip_vs_ftp_init,441441+ .exit = __ip_vs_ftp_exit,442442+};443443+444444+int __init ip_vs_ftp_init(void)445445+{446446+ int rv;447447+448448+ rv = register_pernet_subsys(&ip_vs_ftp_ops);449449+ return rv;450450+}436451437452/*438453 * ip_vs_ftp finish.439454 */440455static void __exit ip_vs_ftp_exit(void)441456{442442- unregister_ip_vs_app(&ip_vs_ftp);457457+ unregister_pernet_subsys(&ip_vs_ftp_ops);443458}444459445460
+58-9
net/netfilter/ipvs/ip_vs_lblc.c
···7070 * entries that haven't been touched for a day.7171 */7272#define COUNT_FOR_FULL_EXPIRATION 307373-static int sysctl_ip_vs_lblc_expiration = 24*60*60*HZ;747375747675/*···116117static ctl_table vs_vars_table[] = {117118 {118119 .procname = "lblc_expiration",119119- .data = &sysctl_ip_vs_lblc_expiration,120120+ .data = NULL,120121 .maxlen = sizeof(int),121122 .mode = 0644,122123 .proc_handler = proc_dointvec_jiffies,123124 },124125 { }125126};126126-127127-static struct ctl_table_header * sysctl_header;128127129128static inline void ip_vs_lblc_free(struct ip_vs_lblc_entry *en)130129{···245248 struct ip_vs_lblc_entry *en, *nxt;246249 unsigned long now = jiffies;247250 int i, j;251251+ struct netns_ipvs *ipvs = net_ipvs(svc->net);248252249253 for (i=0, j=tbl->rover; i<IP_VS_LBLC_TAB_SIZE; i++) {250254 j = (j + 1) & IP_VS_LBLC_TAB_MASK;···253255 write_lock(&svc->sched_lock);254256 list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) {255257 if (time_before(now,256256- en->lastuse + sysctl_ip_vs_lblc_expiration))258258+ en->lastuse +259259+ ipvs->sysctl_lblc_expiration))257260 continue;258261259262 ip_vs_lblc_free(en);···542543 .schedule = ip_vs_lblc_schedule,543544};544545546546+/*547547+ * per netns init.548548+ */549549+static int __net_init __ip_vs_lblc_init(struct net *net)550550+{551551+ struct netns_ipvs *ipvs = net_ipvs(net);552552+553553+ if (!net_eq(net, &init_net)) {554554+ ipvs->lblc_ctl_table = kmemdup(vs_vars_table,555555+ sizeof(vs_vars_table),556556+ GFP_KERNEL);557557+ if (ipvs->lblc_ctl_table == NULL)558558+ goto err_dup;559559+ } else560560+ ipvs->lblc_ctl_table = vs_vars_table;561561+ ipvs->sysctl_lblc_expiration = 24*60*60*HZ;562562+ ipvs->lblc_ctl_table[0].data = &ipvs->sysctl_lblc_expiration;563563+564564+ ipvs->lblc_ctl_header =565565+ register_net_sysctl_table(net, net_vs_ctl_path,566566+ ipvs->lblc_ctl_table);567567+ if (!ipvs->lblc_ctl_header)568568+ goto err_reg;569569+570570+ return 0;571571+572572+err_reg:573573+ if (!net_eq(net, &init_net))574574+ kfree(ipvs->lblc_ctl_table);575575+576576+err_dup:577577+ return -ENOMEM;578578+}579579+580580+static void __net_exit __ip_vs_lblc_exit(struct net *net)581581+{582582+ struct netns_ipvs *ipvs = net_ipvs(net);583583+584584+ unregister_net_sysctl_table(ipvs->lblc_ctl_header);585585+586586+ if (!net_eq(net, &init_net))587587+ kfree(ipvs->lblc_ctl_table);588588+}589589+590590+static struct pernet_operations ip_vs_lblc_ops = {591591+ .init = __ip_vs_lblc_init,592592+ .exit = __ip_vs_lblc_exit,593593+};545594546595static int __init ip_vs_lblc_init(void)547596{548597 int ret;549598550550- sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars_table);599599+ ret = register_pernet_subsys(&ip_vs_lblc_ops);600600+ if (ret)601601+ return ret;602602+551603 ret = register_ip_vs_scheduler(&ip_vs_lblc_scheduler);552604 if (ret)553553- unregister_sysctl_table(sysctl_header);605605+ unregister_pernet_subsys(&ip_vs_lblc_ops);554606 return ret;555607}556608557557-558609static void __exit ip_vs_lblc_cleanup(void)559610{560560- unregister_sysctl_table(sysctl_header);561611 unregister_ip_vs_scheduler(&ip_vs_lblc_scheduler);612612+ unregister_pernet_subsys(&ip_vs_lblc_ops);562613}563614564615
+60-12
net/netfilter/ipvs/ip_vs_lblcr.c
···7070 * entries that haven't been touched for a day.7171 */7272#define COUNT_FOR_FULL_EXPIRATION 307373-static int sysctl_ip_vs_lblcr_expiration = 24*60*60*HZ;7474-75737674/*7775 * for IPVS lblcr entry hash table···294296static ctl_table vs_vars_table[] = {295297 {296298 .procname = "lblcr_expiration",297297- .data = &sysctl_ip_vs_lblcr_expiration,299299+ .data = NULL,298300 .maxlen = sizeof(int),299301 .mode = 0644,300302 .proc_handler = proc_dointvec_jiffies,301303 },302304 { }303305};304304-305305-static struct ctl_table_header * sysctl_header;306306307307static inline void ip_vs_lblcr_free(struct ip_vs_lblcr_entry *en)308308{···421425 unsigned long now = jiffies;422426 int i, j;423427 struct ip_vs_lblcr_entry *en, *nxt;428428+ struct netns_ipvs *ipvs = net_ipvs(svc->net);424429425430 for (i=0, j=tbl->rover; i<IP_VS_LBLCR_TAB_SIZE; i++) {426431 j = (j + 1) & IP_VS_LBLCR_TAB_MASK;427432428433 write_lock(&svc->sched_lock);429434 list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) {430430- if (time_after(en->lastuse+sysctl_ip_vs_lblcr_expiration,431431- now))435435+ if (time_after(en->lastuse436436+ + ipvs->sysctl_lblcr_expiration, now))432437 continue;433438434439 ip_vs_lblcr_free(en);···661664 read_lock(&svc->sched_lock);662665 en = ip_vs_lblcr_get(svc->af, tbl, &iph.daddr);663666 if (en) {667667+ struct netns_ipvs *ipvs = net_ipvs(svc->net);664668 /* We only hold a read lock, but this is atomic */665669 en->lastuse = jiffies;666670···673675 /* More than one destination + enough time passed by, cleanup */674676 if (atomic_read(&en->set.size) > 1 &&675677 time_after(jiffies, en->set.lastmod +676676- sysctl_ip_vs_lblcr_expiration)) {678678+ ipvs->sysctl_lblcr_expiration)) {677679 struct ip_vs_dest *m;678680679681 write_lock(&en->set.lock);···742744 .schedule = ip_vs_lblcr_schedule,743745};744746747747+/*748748+ * per netns init.749749+ */750750+static int __net_init __ip_vs_lblcr_init(struct net *net)751751+{752752+ struct netns_ipvs *ipvs = net_ipvs(net);753753+754754+ if (!net_eq(net, &init_net)) {755755+ ipvs->lblcr_ctl_table = kmemdup(vs_vars_table,756756+ sizeof(vs_vars_table),757757+ GFP_KERNEL);758758+ if (ipvs->lblcr_ctl_table == NULL)759759+ goto err_dup;760760+ } else761761+ ipvs->lblcr_ctl_table = vs_vars_table;762762+ ipvs->sysctl_lblcr_expiration = 24*60*60*HZ;763763+ ipvs->lblcr_ctl_table[0].data = &ipvs->sysctl_lblcr_expiration;764764+765765+ ipvs->lblcr_ctl_header =766766+ register_net_sysctl_table(net, net_vs_ctl_path,767767+ ipvs->lblcr_ctl_table);768768+ if (!ipvs->lblcr_ctl_header)769769+ goto err_reg;770770+771771+ return 0;772772+773773+err_reg:774774+ if (!net_eq(net, &init_net))775775+ kfree(ipvs->lblcr_ctl_table);776776+777777+err_dup:778778+ return -ENOMEM;779779+}780780+781781+static void __net_exit __ip_vs_lblcr_exit(struct net *net)782782+{783783+ struct netns_ipvs *ipvs = net_ipvs(net);784784+785785+ unregister_net_sysctl_table(ipvs->lblcr_ctl_header);786786+787787+ if (!net_eq(net, &init_net))788788+ kfree(ipvs->lblcr_ctl_table);789789+}790790+791791+static struct pernet_operations ip_vs_lblcr_ops = {792792+ .init = __ip_vs_lblcr_init,793793+ .exit = __ip_vs_lblcr_exit,794794+};745795746796static int __init ip_vs_lblcr_init(void)747797{748798 int ret;749799750750- sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars_table);800800+ ret = register_pernet_subsys(&ip_vs_lblcr_ops);801801+ if (ret)802802+ return ret;803803+751804 ret = register_ip_vs_scheduler(&ip_vs_lblcr_scheduler);752805 if (ret)753753- unregister_sysctl_table(sysctl_header);806806+ unregister_pernet_subsys(&ip_vs_lblcr_ops);754807 return ret;755808}756809757757-758810static void __exit ip_vs_lblcr_cleanup(void)759811{760760- unregister_sysctl_table(sysctl_header);761812 unregister_ip_vs_scheduler(&ip_vs_lblcr_scheduler);813813+ unregister_pernet_subsys(&ip_vs_lblcr_ops);762814}763815764816
+4-2
net/netfilter/ipvs/ip_vs_nfct.c
···141141 struct nf_conntrack_tuple *orig, new_reply;142142 struct ip_vs_conn *cp;143143 struct ip_vs_conn_param p;144144+ struct net *net = nf_ct_net(ct);144145145146 if (exp->tuple.src.l3num != PF_INET)146147 return;···156155157156 /* RS->CLIENT */158157 orig = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;159159- ip_vs_conn_fill_param(exp->tuple.src.l3num, orig->dst.protonum,158158+ ip_vs_conn_fill_param(net, exp->tuple.src.l3num, orig->dst.protonum,160159 &orig->src.u3, orig->src.u.tcp.port,161160 &orig->dst.u3, orig->dst.u.tcp.port, &p);162161 cp = ip_vs_conn_out_get(&p);···269268 " for conn " FMT_CONN "\n",270269 __func__, ARG_TUPLE(&tuple), ARG_CONN(cp));271270272272- h = nf_conntrack_find_get(&init_net, NF_CT_DEFAULT_ZONE, &tuple);271271+ h = nf_conntrack_find_get(ip_vs_conn_net(cp), NF_CT_DEFAULT_ZONE,272272+ &tuple);273273 if (h) {274274 ct = nf_ct_tuplehash_to_ctrack(h);275275 /* Show what happens instead of calling nf_ct_kill() */
+5-12
net/netfilter/ipvs/ip_vs_pe.c
···2929}30303131/* Get pe in the pe list by name */3232-static struct ip_vs_pe *3333-ip_vs_pe_getbyname(const char *pe_name)3232+struct ip_vs_pe *__ip_vs_pe_getbyname(const char *pe_name)3433{3534 struct ip_vs_pe *pe;36353737- IP_VS_DBG(2, "%s(): pe_name \"%s\"\n", __func__,3636+ IP_VS_DBG(10, "%s(): pe_name \"%s\"\n", __func__,3837 pe_name);39384039 spin_lock_bh(&ip_vs_pe_lock);···5960}60616162/* Lookup pe and try to load it if it doesn't exist */6262-struct ip_vs_pe *ip_vs_pe_get(const char *name)6363+struct ip_vs_pe *ip_vs_pe_getbyname(const char *name)6364{6465 struct ip_vs_pe *pe;65666667 /* Search for the pe by name */6767- pe = ip_vs_pe_getbyname(name);6868+ pe = __ip_vs_pe_getbyname(name);68696970 /* If pe not found, load the module and search again */7071 if (!pe) {7172 request_module("ip_vs_pe_%s", name);7272- pe = ip_vs_pe_getbyname(name);7373+ pe = __ip_vs_pe_getbyname(name);7374 }74757576 return pe;7676-}7777-7878-void ip_vs_pe_put(struct ip_vs_pe *pe)7979-{8080- if (pe && pe->module)8181- module_put(pe->module);8277}83788479/* Register a pe in the pe list */
+3
net/netfilter/ipvs/ip_vs_pe_sip.c
···7171 struct ip_vs_iphdr iph;7272 unsigned int dataoff, datalen, matchoff, matchlen;7373 const char *dptr;7474+ int retc;74757576 ip_vs_fill_iphdr(p->af, skb_network_header(skb), &iph);7677···8483 if (dataoff >= skb->len)8584 return -EINVAL;86858686+ if ((retc=skb_linearize(skb)) < 0)8787+ return retc;8788 dptr = skb->data + dataoff;8889 datalen = skb->len - dataoff;8990
+120-5
net/netfilter/ipvs/ip_vs_proto.c
···6060 return 0;6161}62626363+/*6464+ * register an ipvs protocols netns related data6565+ */6666+static int6767+register_ip_vs_proto_netns(struct net *net, struct ip_vs_protocol *pp)6868+{6969+ struct netns_ipvs *ipvs = net_ipvs(net);7070+ unsigned hash = IP_VS_PROTO_HASH(pp->protocol);7171+ struct ip_vs_proto_data *pd =7272+ kzalloc(sizeof(struct ip_vs_proto_data), GFP_ATOMIC);7373+7474+ if (!pd) {7575+ pr_err("%s(): no memory.\n", __func__);7676+ return -ENOMEM;7777+ }7878+ pd->pp = pp; /* For speed issues */7979+ pd->next = ipvs->proto_data_table[hash];8080+ ipvs->proto_data_table[hash] = pd;8181+ atomic_set(&pd->appcnt, 0); /* Init app counter */8282+8383+ if (pp->init_netns != NULL)8484+ pp->init_netns(net, pd);8585+8686+ return 0;8787+}63886489/*6590 * unregister an ipvs protocol···10782 return -ESRCH;10883}109848585+/*8686+ * unregister an ipvs protocols netns data8787+ */8888+static int8989+unregister_ip_vs_proto_netns(struct net *net, struct ip_vs_proto_data *pd)9090+{9191+ struct netns_ipvs *ipvs = net_ipvs(net);9292+ struct ip_vs_proto_data **pd_p;9393+ unsigned hash = IP_VS_PROTO_HASH(pd->pp->protocol);9494+9595+ pd_p = &ipvs->proto_data_table[hash];9696+ for (; *pd_p; pd_p = &(*pd_p)->next) {9797+ if (*pd_p == pd) {9898+ *pd_p = pd->next;9999+ if (pd->pp->exit_netns != NULL)100100+ pd->pp->exit_netns(net, pd);101101+ kfree(pd);102102+ return 0;103103+ }104104+ }105105+106106+ return -ESRCH;107107+}110108111109/*112110 * get ip_vs_protocol object by its proto.···148100}149101EXPORT_SYMBOL(ip_vs_proto_get);150102103103+/*104104+ * get ip_vs_protocol object data by netns and proto105105+ */106106+struct ip_vs_proto_data *107107+__ipvs_proto_data_get(struct netns_ipvs *ipvs, unsigned short proto)108108+{109109+ struct ip_vs_proto_data *pd;110110+ unsigned hash = IP_VS_PROTO_HASH(proto);111111+112112+ for (pd = ipvs->proto_data_table[hash]; pd; pd = pd->next) {113113+ if (pd->pp->protocol == proto)114114+ return pd;115115+ }116116+117117+ return NULL;118118+}119119+120120+struct ip_vs_proto_data *121121+ip_vs_proto_data_get(struct net *net, unsigned short proto)122122+{123123+ struct netns_ipvs *ipvs = net_ipvs(net);124124+125125+ return __ipvs_proto_data_get(ipvs, proto);126126+}127127+EXPORT_SYMBOL(ip_vs_proto_data_get);151128152129/*153130 * Propagate event for state change to all protocols154131 */155155-void ip_vs_protocol_timeout_change(int flags)132132+void ip_vs_protocol_timeout_change(struct netns_ipvs *ipvs, int flags)156133{157157- struct ip_vs_protocol *pp;134134+ struct ip_vs_proto_data *pd;158135 int i;159136160137 for (i = 0; i < IP_VS_PROTO_TAB_SIZE; i++) {161161- for (pp = ip_vs_proto_table[i]; pp; pp = pp->next) {162162- if (pp->timeout_change)163163- pp->timeout_change(pp, flags);138138+ for (pd = ipvs->proto_data_table[i]; pd; pd = pd->next) {139139+ if (pd->pp->timeout_change)140140+ pd->pp->timeout_change(pd, flags);164141 }165142 }166143}···309236 ip_vs_tcpudp_debug_packet_v4(pp, skb, offset, msg);310237}311238239239+/*240240+ * per network name-space init241241+ */242242+static int __net_init __ip_vs_protocol_init(struct net *net)243243+{244244+#ifdef CONFIG_IP_VS_PROTO_TCP245245+ register_ip_vs_proto_netns(net, &ip_vs_protocol_tcp);246246+#endif247247+#ifdef CONFIG_IP_VS_PROTO_UDP248248+ register_ip_vs_proto_netns(net, &ip_vs_protocol_udp);249249+#endif250250+#ifdef CONFIG_IP_VS_PROTO_SCTP251251+ register_ip_vs_proto_netns(net, &ip_vs_protocol_sctp);252252+#endif253253+#ifdef CONFIG_IP_VS_PROTO_AH254254+ register_ip_vs_proto_netns(net, &ip_vs_protocol_ah);255255+#endif256256+#ifdef CONFIG_IP_VS_PROTO_ESP257257+ register_ip_vs_proto_netns(net, &ip_vs_protocol_esp);258258+#endif259259+ return 0;260260+}261261+262262+static void __net_exit __ip_vs_protocol_cleanup(struct net *net)263263+{264264+ struct netns_ipvs *ipvs = net_ipvs(net);265265+ struct ip_vs_proto_data *pd;266266+ int i;267267+268268+ /* unregister all the ipvs proto data for this netns */269269+ for (i = 0; i < IP_VS_PROTO_TAB_SIZE; i++) {270270+ while ((pd = ipvs->proto_data_table[i]) != NULL)271271+ unregister_ip_vs_proto_netns(net, pd);272272+ }273273+}274274+275275+static struct pernet_operations ipvs_proto_ops = {276276+ .init = __ip_vs_protocol_init,277277+ .exit = __ip_vs_protocol_cleanup,278278+};312279313280int __init ip_vs_protocol_init(void)314281{···378265 REGISTER_PROTOCOL(&ip_vs_protocol_esp);379266#endif380267 pr_info("Registered protocols (%s)\n", &protocols[2]);268268+ return register_pernet_subsys(&ipvs_proto_ops);381269382270 return 0;383271}···389275 struct ip_vs_protocol *pp;390276 int i;391277278278+ unregister_pernet_subsys(&ipvs_proto_ops);392279 /* unregister all the ipvs protocols */393280 for (i = 0; i < IP_VS_PROTO_TAB_SIZE; i++) {394281 while ((pp = ip_vs_proto_table[i]) != NULL)
+17-28
net/netfilter/ipvs/ip_vs_proto_ah_esp.c
···4141#define PORT_ISAKMP 50042424343static void4444-ah_esp_conn_fill_param_proto(int af, const struct ip_vs_iphdr *iph,4545- int inverse, struct ip_vs_conn_param *p)4444+ah_esp_conn_fill_param_proto(struct net *net, int af,4545+ const struct ip_vs_iphdr *iph, int inverse,4646+ struct ip_vs_conn_param *p)4647{4748 if (likely(!inverse))4848- ip_vs_conn_fill_param(af, IPPROTO_UDP,4949+ ip_vs_conn_fill_param(net, af, IPPROTO_UDP,4950 &iph->saddr, htons(PORT_ISAKMP),5051 &iph->daddr, htons(PORT_ISAKMP), p);5152 else5252- ip_vs_conn_fill_param(af, IPPROTO_UDP,5353+ ip_vs_conn_fill_param(net, af, IPPROTO_UDP,5354 &iph->daddr, htons(PORT_ISAKMP),5455 &iph->saddr, htons(PORT_ISAKMP), p);5556}56575758static struct ip_vs_conn *5858-ah_esp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp,5959+ah_esp_conn_in_get(int af, const struct sk_buff *skb,5960 const struct ip_vs_iphdr *iph, unsigned int proto_off,6061 int inverse)6162{6263 struct ip_vs_conn *cp;6364 struct ip_vs_conn_param p;6565+ struct net *net = skb_net(skb);64666565- ah_esp_conn_fill_param_proto(af, iph, inverse, &p);6767+ ah_esp_conn_fill_param_proto(net, af, iph, inverse, &p);6668 cp = ip_vs_conn_in_get(&p);6769 if (!cp) {6870 /*···7472 IP_VS_DBG_BUF(12, "Unknown ISAKMP entry for outin packet "7573 "%s%s %s->%s\n",7674 inverse ? "ICMP+" : "",7777- pp->name,7575+ ip_vs_proto_get(iph->protocol)->name,7876 IP_VS_DBG_ADDR(af, &iph->saddr),7977 IP_VS_DBG_ADDR(af, &iph->daddr));8078 }···85838684static struct ip_vs_conn *8785ah_esp_conn_out_get(int af, const struct sk_buff *skb,8888- struct ip_vs_protocol *pp,8986 const struct ip_vs_iphdr *iph,9087 unsigned int proto_off,9188 int inverse)9289{9390 struct ip_vs_conn *cp;9491 struct ip_vs_conn_param p;9292+ struct net *net = skb_net(skb);95939696- ah_esp_conn_fill_param_proto(af, iph, inverse, &p);9494+ ah_esp_conn_fill_param_proto(net, af, iph, inverse, &p);9795 cp = ip_vs_conn_out_get(&p);9896 if (!cp) {9997 IP_VS_DBG_BUF(12, "Unknown ISAKMP entry for inout packet "10098 "%s%s %s->%s\n",10199 inverse ? "ICMP+" : "",102102- pp->name,100100+ ip_vs_proto_get(iph->protocol)->name,103101 IP_VS_DBG_ADDR(af, &iph->saddr),104102 IP_VS_DBG_ADDR(af, &iph->daddr));105103 }···109107110108111109static int112112-ah_esp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,110110+ah_esp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,113111 int *verdict, struct ip_vs_conn **cpp)114112{115113 /*···119117 return 0;120118}121119122122-static void ah_esp_init(struct ip_vs_protocol *pp)123123-{124124- /* nothing to do now */125125-}126126-127127-128128-static void ah_esp_exit(struct ip_vs_protocol *pp)129129-{130130- /* nothing to do now */131131-}132132-133133-134120#ifdef CONFIG_IP_VS_PROTO_AH135121struct ip_vs_protocol ip_vs_protocol_ah = {136122 .name = "AH",137123 .protocol = IPPROTO_AH,138124 .num_states = 1,139125 .dont_defrag = 1,140140- .init = ah_esp_init,141141- .exit = ah_esp_exit,126126+ .init = NULL,127127+ .exit = NULL,142128 .conn_schedule = ah_esp_conn_schedule,143129 .conn_in_get = ah_esp_conn_in_get,144130 .conn_out_get = ah_esp_conn_out_get,···139149 .app_conn_bind = NULL,140150 .debug_packet = ip_vs_tcpudp_debug_packet,141151 .timeout_change = NULL, /* ISAKMP */142142- .set_state_timeout = NULL,143152};144153#endif145154···148159 .protocol = IPPROTO_ESP,149160 .num_states = 1,150161 .dont_defrag = 1,151151- .init = ah_esp_init,152152- .exit = ah_esp_exit,162162+ .init = NULL,163163+ .exit = NULL,153164 .conn_schedule = ah_esp_conn_schedule,154165 .conn_in_get = ah_esp_conn_in_get,155166 .conn_out_get = ah_esp_conn_out_get,
+76-77
net/netfilter/ipvs/ip_vs_proto_sctp.c
···99#include <net/ip_vs.h>10101111static int1212-sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,1212+sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,1313 int *verdict, struct ip_vs_conn **cpp)1414{1515+ struct net *net;1516 struct ip_vs_service *svc;1617 sctp_chunkhdr_t _schunkh, *sch;1718 sctp_sctphdr_t *sh, _sctph;···2827 sizeof(_schunkh), &_schunkh);2928 if (sch == NULL)3029 return 0;3131-3030+ net = skb_net(skb);3231 if ((sch->type == SCTP_CID_INIT) &&3333- (svc = ip_vs_service_get(af, skb->mark, iph.protocol,3232+ (svc = ip_vs_service_get(net, af, skb->mark, iph.protocol,3433 &iph.daddr, sh->dest))) {3534 int ignored;36353737- if (ip_vs_todrop()) {3636+ if (ip_vs_todrop(net_ipvs(net))) {3837 /*3938 * It seems that we are very loaded.4039 * We have to drop this packet :(···4746 * Let the virtual server select a real server for the4847 * incoming connection, and create a connection entry.4948 */5050- *cpp = ip_vs_schedule(svc, skb, pp, &ignored);5151- if (!*cpp && !ignored) {5252- *verdict = ip_vs_leave(svc, skb, pp);4949+ *cpp = ip_vs_schedule(svc, skb, pd, &ignored);5050+ if (!*cpp && ignored <= 0) {5151+ if (!ignored)5252+ *verdict = ip_vs_leave(svc, skb, pd);5353+ else {5454+ ip_vs_service_put(svc);5555+ *verdict = NF_DROP;5656+ }5357 return 0;5458 }5559 ip_vs_service_put(svc);5660 }5757-6161+ /* NF_ACCEPT */5862 return 1;5963}6064···862856/*863857 * Timeout table[state]864858 */865865-static int sctp_timeouts[IP_VS_SCTP_S_LAST + 1] = {859859+static const int sctp_timeouts[IP_VS_SCTP_S_LAST + 1] = {866860 [IP_VS_SCTP_S_NONE] = 2 * HZ,867861 [IP_VS_SCTP_S_INIT_CLI] = 1 * 60 * HZ,868862 [IP_VS_SCTP_S_INIT_SER] = 1 * 60 * HZ,···906900 return "?";907901}908902909909-static void sctp_timeout_change(struct ip_vs_protocol *pp, int flags)910910-{911911-}912912-913913-static int914914-sctp_set_state_timeout(struct ip_vs_protocol *pp, char *sname, int to)915915-{916916-917917-return ip_vs_set_state_timeout(pp->timeout_table, IP_VS_SCTP_S_LAST,918918- sctp_state_name_table, sname, to);919919-}920920-921903static inline int922922-set_sctp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp,904904+set_sctp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp,923905 int direction, const struct sk_buff *skb)924906{925907 sctp_chunkhdr_t _sctpch, *sch;···965971966972 IP_VS_DBG_BUF(8, "%s %s %s:%d->"967973 "%s:%d state: %s->%s conn->refcnt:%d\n",968968- pp->name,974974+ pd->pp->name,969975 ((direction == IP_VS_DIR_OUTPUT) ?970976 "output " : "input "),971977 IP_VS_DBG_ADDR(cp->af, &cp->daddr),···989995 }990996 }991997 }998998+ if (likely(pd))999999+ cp->timeout = pd->timeout_table[cp->state = next_state];10001000+ else /* What to do ? */10011001+ cp->timeout = sctp_timeouts[cp->state = next_state];9921002993993- cp->timeout = pp->timeout_table[cp->state = next_state];994994-995995- return 1;10031003+ return 1;9961004}99710059981006static int9991007sctp_state_transition(struct ip_vs_conn *cp, int direction,10001000- const struct sk_buff *skb, struct ip_vs_protocol *pp)10081008+ const struct sk_buff *skb, struct ip_vs_proto_data *pd)10011009{10021010 int ret = 0;1003101110041012 spin_lock(&cp->lock);10051005- ret = set_sctp_state(pp, cp, direction, skb);10131013+ ret = set_sctp_state(pd, cp, direction, skb);10061014 spin_unlock(&cp->lock);1007101510081016 return ret;10091017}10101010-10111011-/*10121012- * Hash table for SCTP application incarnations10131013- */10141014-#define SCTP_APP_TAB_BITS 410151015-#define SCTP_APP_TAB_SIZE (1 << SCTP_APP_TAB_BITS)10161016-#define SCTP_APP_TAB_MASK (SCTP_APP_TAB_SIZE - 1)10171017-10181018-static struct list_head sctp_apps[SCTP_APP_TAB_SIZE];10191019-static DEFINE_SPINLOCK(sctp_app_lock);1020101810211019static inline __u16 sctp_app_hashkey(__be16 port)10221020{···10161030 & SCTP_APP_TAB_MASK;10171031}1018103210191019-static int sctp_register_app(struct ip_vs_app *inc)10331033+static int sctp_register_app(struct net *net, struct ip_vs_app *inc)10201034{10211035 struct ip_vs_app *i;10221036 __u16 hash;10231037 __be16 port = inc->port;10241038 int ret = 0;10391039+ struct netns_ipvs *ipvs = net_ipvs(net);10401040+ struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_SCTP);1025104110261042 hash = sctp_app_hashkey(port);1027104310281028- spin_lock_bh(&sctp_app_lock);10291029- list_for_each_entry(i, &sctp_apps[hash], p_list) {10441044+ spin_lock_bh(&ipvs->sctp_app_lock);10451045+ list_for_each_entry(i, &ipvs->sctp_apps[hash], p_list) {10301046 if (i->port == port) {10311047 ret = -EEXIST;10321048 goto out;10331049 }10341050 }10351035- list_add(&inc->p_list, &sctp_apps[hash]);10361036- atomic_inc(&ip_vs_protocol_sctp.appcnt);10511051+ list_add(&inc->p_list, &ipvs->sctp_apps[hash]);10521052+ atomic_inc(&pd->appcnt);10371053out:10381038- spin_unlock_bh(&sctp_app_lock);10541054+ spin_unlock_bh(&ipvs->sctp_app_lock);1039105510401056 return ret;10411057}1042105810431043-static void sctp_unregister_app(struct ip_vs_app *inc)10591059+static void sctp_unregister_app(struct net *net, struct ip_vs_app *inc)10441060{10451045- spin_lock_bh(&sctp_app_lock);10461046- atomic_dec(&ip_vs_protocol_sctp.appcnt);10611061+ struct netns_ipvs *ipvs = net_ipvs(net);10621062+ struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_SCTP);10631063+10641064+ spin_lock_bh(&ipvs->sctp_app_lock);10651065+ atomic_dec(&pd->appcnt);10471066 list_del(&inc->p_list);10481048- spin_unlock_bh(&sctp_app_lock);10671067+ spin_unlock_bh(&ipvs->sctp_app_lock);10491068}1050106910511070static int sctp_app_conn_bind(struct ip_vs_conn *cp)10521071{10721072+ struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(cp));10531073 int hash;10541074 struct ip_vs_app *inc;10551075 int result = 0;···10661074 /* Lookup application incarnations and bind the right one */10671075 hash = sctp_app_hashkey(cp->vport);1068107610691069- spin_lock(&sctp_app_lock);10701070- list_for_each_entry(inc, &sctp_apps[hash], p_list) {10771077+ spin_lock(&ipvs->sctp_app_lock);10781078+ list_for_each_entry(inc, &ipvs->sctp_apps[hash], p_list) {10711079 if (inc->port == cp->vport) {10721080 if (unlikely(!ip_vs_app_inc_get(inc)))10731081 break;10741074- spin_unlock(&sctp_app_lock);10821082+ spin_unlock(&ipvs->sctp_app_lock);1075108310761084 IP_VS_DBG_BUF(9, "%s: Binding conn %s:%u->"10771085 "%s:%u to app %s on port %u\n",···10871095 goto out;10881096 }10891097 }10901090- spin_unlock(&sctp_app_lock);10981098+ spin_unlock(&ipvs->sctp_app_lock);10911099out:10921100 return result;10931101}1094110210951095-static void ip_vs_sctp_init(struct ip_vs_protocol *pp)11031103+/* ---------------------------------------------11041104+ * timeouts is netns related now.11051105+ * ---------------------------------------------11061106+ */11071107+static void __ip_vs_sctp_init(struct net *net, struct ip_vs_proto_data *pd)10961108{10971097- IP_VS_INIT_HASH_TABLE(sctp_apps);10981098- pp->timeout_table = sctp_timeouts;11091109+ struct netns_ipvs *ipvs = net_ipvs(net);11101110+11111111+ ip_vs_init_hash_table(ipvs->sctp_apps, SCTP_APP_TAB_SIZE);11121112+ spin_lock_init(&ipvs->tcp_app_lock);11131113+ pd->timeout_table = ip_vs_create_timeout_table((int *)sctp_timeouts,11141114+ sizeof(sctp_timeouts));10991115}1100111611011101-11021102-static void ip_vs_sctp_exit(struct ip_vs_protocol *pp)11171117+static void __ip_vs_sctp_exit(struct net *net, struct ip_vs_proto_data *pd)11031118{11041104-11191119+ kfree(pd->timeout_table);11051120}1106112111071122struct ip_vs_protocol ip_vs_protocol_sctp = {11081108- .name = "SCTP",11091109- .protocol = IPPROTO_SCTP,11101110- .num_states = IP_VS_SCTP_S_LAST,11111111- .dont_defrag = 0,11121112- .appcnt = ATOMIC_INIT(0),11131113- .init = ip_vs_sctp_init,11141114- .exit = ip_vs_sctp_exit,11151115- .register_app = sctp_register_app,11231123+ .name = "SCTP",11241124+ .protocol = IPPROTO_SCTP,11251125+ .num_states = IP_VS_SCTP_S_LAST,11261126+ .dont_defrag = 0,11271127+ .init = NULL,11281128+ .exit = NULL,11291129+ .init_netns = __ip_vs_sctp_init,11301130+ .exit_netns = __ip_vs_sctp_exit,11311131+ .register_app = sctp_register_app,11161132 .unregister_app = sctp_unregister_app,11171117- .conn_schedule = sctp_conn_schedule,11181118- .conn_in_get = ip_vs_conn_in_get_proto,11191119- .conn_out_get = ip_vs_conn_out_get_proto,11201120- .snat_handler = sctp_snat_handler,11211121- .dnat_handler = sctp_dnat_handler,11221122- .csum_check = sctp_csum_check,11231123- .state_name = sctp_state_name,11331133+ .conn_schedule = sctp_conn_schedule,11341134+ .conn_in_get = ip_vs_conn_in_get_proto,11351135+ .conn_out_get = ip_vs_conn_out_get_proto,11361136+ .snat_handler = sctp_snat_handler,11371137+ .dnat_handler = sctp_dnat_handler,11381138+ .csum_check = sctp_csum_check,11391139+ .state_name = sctp_state_name,11241140 .state_transition = sctp_state_transition,11251125- .app_conn_bind = sctp_app_conn_bind,11261126- .debug_packet = ip_vs_tcpudp_debug_packet,11271127- .timeout_change = sctp_timeout_change,11281128- .set_state_timeout = sctp_set_state_timeout,11411141+ .app_conn_bind = sctp_app_conn_bind,11421142+ .debug_packet = ip_vs_tcpudp_debug_packet,11431143+ .timeout_change = NULL,11291144};
+76-66
net/netfilter/ipvs/ip_vs_proto_tcp.c
···99 * as published by the Free Software Foundation; either version1010 * 2 of the License, or (at your option) any later version.1111 *1212- * Changes:1212+ * Changes: Hans Schillstrom <hans.schillstrom@ericsson.com>1313 *1414+ * Network name space (netns) aware.1515+ * Global data moved to netns i.e struct netns_ipvs1616+ * tcp_timeouts table has copy per netns in a hash table per1717+ * protocol ip_vs_proto_data and is handled by netns1418 */15191620#define KMSG_COMPONENT "IPVS"···3228#include <net/ip_vs.h>33293430static int3535-tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,3131+tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,3632 int *verdict, struct ip_vs_conn **cpp)3733{3434+ struct net *net;3835 struct ip_vs_service *svc;3936 struct tcphdr _tcph, *th;4037 struct ip_vs_iphdr iph;···4742 *verdict = NF_DROP;4843 return 0;4944 }5050-4545+ net = skb_net(skb);5146 /* No !th->ack check to allow scheduling on SYN+ACK for Active FTP */5247 if (th->syn &&5353- (svc = ip_vs_service_get(af, skb->mark, iph.protocol, &iph.daddr,5454- th->dest))) {4848+ (svc = ip_vs_service_get(net, af, skb->mark, iph.protocol,4949+ &iph.daddr, th->dest))) {5550 int ignored;56515757- if (ip_vs_todrop()) {5252+ if (ip_vs_todrop(net_ipvs(net))) {5853 /*5954 * It seems that we are very loaded.6055 * We have to drop this packet :(···6863 * Let the virtual server select a real server for the6964 * incoming connection, and create a connection entry.7065 */7171- *cpp = ip_vs_schedule(svc, skb, pp, &ignored);7272- if (!*cpp && !ignored) {7373- *verdict = ip_vs_leave(svc, skb, pp);6666+ *cpp = ip_vs_schedule(svc, skb, pd, &ignored);6767+ if (!*cpp && ignored <= 0) {6868+ if (!ignored)6969+ *verdict = ip_vs_leave(svc, skb, pd);7070+ else {7171+ ip_vs_service_put(svc);7272+ *verdict = NF_DROP;7373+ }7474 return 0;7575 }7676 ip_vs_service_put(svc);7777 }7878+ /* NF_ACCEPT */7879 return 1;7980}8081···349338/*350339 * Timeout table[state]351340 */352352-static int tcp_timeouts[IP_VS_TCP_S_LAST+1] = {341341+static const int tcp_timeouts[IP_VS_TCP_S_LAST+1] = {353342 [IP_VS_TCP_S_NONE] = 2*HZ,354343 [IP_VS_TCP_S_ESTABLISHED] = 15*60*HZ,355344 [IP_VS_TCP_S_SYN_SENT] = 2*60*HZ,···448437/*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }},449438};450439451451-static struct tcp_states_t *tcp_state_table = tcp_states;452452-453453-454454-static void tcp_timeout_change(struct ip_vs_protocol *pp, int flags)440440+static void tcp_timeout_change(struct ip_vs_proto_data *pd, int flags)455441{456442 int on = (flags & 1); /* secure_tcp */457443···458450 ** for most if not for all of the applications. Something459451 ** like "capabilities" (flags) for each object.460452 */461461- tcp_state_table = (on? tcp_states_dos : tcp_states);462462-}463463-464464-static int465465-tcp_set_state_timeout(struct ip_vs_protocol *pp, char *sname, int to)466466-{467467- return ip_vs_set_state_timeout(pp->timeout_table, IP_VS_TCP_S_LAST,468468- tcp_state_name_table, sname, to);453453+ pd->tcp_state_table = (on ? tcp_states_dos : tcp_states);469454}470455471456static inline int tcp_state_idx(struct tcphdr *th)···475474}476475477476static inline void478478-set_tcp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp,477477+set_tcp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp,479478 int direction, struct tcphdr *th)480479{481480 int state_idx;···498497 goto tcp_state_out;499498 }500499501501- new_state = tcp_state_table[state_off+state_idx].next_state[cp->state];500500+ new_state =501501+ pd->tcp_state_table[state_off+state_idx].next_state[cp->state];502502503503 tcp_state_out:504504 if (new_state != cp->state) {···507505508506 IP_VS_DBG_BUF(8, "%s %s [%c%c%c%c] %s:%d->"509507 "%s:%d state: %s->%s conn->refcnt:%d\n",510510- pp->name,508508+ pd->pp->name,511509 ((state_off == TCP_DIR_OUTPUT) ?512510 "output " : "input "),513511 th->syn ? 'S' : '.',···537535 }538536 }539537540540- cp->timeout = pp->timeout_table[cp->state = new_state];538538+ if (likely(pd))539539+ cp->timeout = pd->timeout_table[cp->state = new_state];540540+ else /* What to do ? */541541+ cp->timeout = tcp_timeouts[cp->state = new_state];541542}542542-543543544544/*545545 * Handle state transitions···549545static int550546tcp_state_transition(struct ip_vs_conn *cp, int direction,551547 const struct sk_buff *skb,552552- struct ip_vs_protocol *pp)548548+ struct ip_vs_proto_data *pd)553549{554550 struct tcphdr _tcph, *th;555551···564560 return 0;565561566562 spin_lock(&cp->lock);567567- set_tcp_state(pp, cp, direction, th);563563+ set_tcp_state(pd, cp, direction, th);568564 spin_unlock(&cp->lock);569565570566 return 1;571567}572572-573573-574574-/*575575- * Hash table for TCP application incarnations576576- */577577-#define TCP_APP_TAB_BITS 4578578-#define TCP_APP_TAB_SIZE (1 << TCP_APP_TAB_BITS)579579-#define TCP_APP_TAB_MASK (TCP_APP_TAB_SIZE - 1)580580-581581-static struct list_head tcp_apps[TCP_APP_TAB_SIZE];582582-static DEFINE_SPINLOCK(tcp_app_lock);583568584569static inline __u16 tcp_app_hashkey(__be16 port)585570{···577584}578585579586580580-static int tcp_register_app(struct ip_vs_app *inc)587587+static int tcp_register_app(struct net *net, struct ip_vs_app *inc)581588{582589 struct ip_vs_app *i;583590 __u16 hash;584591 __be16 port = inc->port;585592 int ret = 0;593593+ struct netns_ipvs *ipvs = net_ipvs(net);594594+ struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_TCP);586595587596 hash = tcp_app_hashkey(port);588597589589- spin_lock_bh(&tcp_app_lock);590590- list_for_each_entry(i, &tcp_apps[hash], p_list) {598598+ spin_lock_bh(&ipvs->tcp_app_lock);599599+ list_for_each_entry(i, &ipvs->tcp_apps[hash], p_list) {591600 if (i->port == port) {592601 ret = -EEXIST;593602 goto out;594603 }595604 }596596- list_add(&inc->p_list, &tcp_apps[hash]);597597- atomic_inc(&ip_vs_protocol_tcp.appcnt);605605+ list_add(&inc->p_list, &ipvs->tcp_apps[hash]);606606+ atomic_inc(&pd->appcnt);598607599608 out:600600- spin_unlock_bh(&tcp_app_lock);609609+ spin_unlock_bh(&ipvs->tcp_app_lock);601610 return ret;602611}603612604613605614static void606606-tcp_unregister_app(struct ip_vs_app *inc)615615+tcp_unregister_app(struct net *net, struct ip_vs_app *inc)607616{608608- spin_lock_bh(&tcp_app_lock);609609- atomic_dec(&ip_vs_protocol_tcp.appcnt);617617+ struct netns_ipvs *ipvs = net_ipvs(net);618618+ struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_TCP);619619+620620+ spin_lock_bh(&ipvs->tcp_app_lock);621621+ atomic_dec(&pd->appcnt);610622 list_del(&inc->p_list);611611- spin_unlock_bh(&tcp_app_lock);623623+ spin_unlock_bh(&ipvs->tcp_app_lock);612624}613625614626615627static int616628tcp_app_conn_bind(struct ip_vs_conn *cp)617629{630630+ struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(cp));618631 int hash;619632 struct ip_vs_app *inc;620633 int result = 0;···632633 /* Lookup application incarnations and bind the right one */633634 hash = tcp_app_hashkey(cp->vport);634635635635- spin_lock(&tcp_app_lock);636636- list_for_each_entry(inc, &tcp_apps[hash], p_list) {636636+ spin_lock(&ipvs->tcp_app_lock);637637+ list_for_each_entry(inc, &ipvs->tcp_apps[hash], p_list) {637638 if (inc->port == cp->vport) {638639 if (unlikely(!ip_vs_app_inc_get(inc)))639640 break;640640- spin_unlock(&tcp_app_lock);641641+ spin_unlock(&ipvs->tcp_app_lock);641642642643 IP_VS_DBG_BUF(9, "%s(): Binding conn %s:%u->"643644 "%s:%u to app %s on port %u\n",···654655 goto out;655656 }656657 }657657- spin_unlock(&tcp_app_lock);658658+ spin_unlock(&ipvs->tcp_app_lock);658659659660 out:660661 return result;···664665/*665666 * Set LISTEN timeout. (ip_vs_conn_put will setup timer)666667 */667667-void ip_vs_tcp_conn_listen(struct ip_vs_conn *cp)668668+void ip_vs_tcp_conn_listen(struct net *net, struct ip_vs_conn *cp)668669{670670+ struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_TCP);671671+669672 spin_lock(&cp->lock);670673 cp->state = IP_VS_TCP_S_LISTEN;671671- cp->timeout = ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_LISTEN];674674+ cp->timeout = (pd ? pd->timeout_table[IP_VS_TCP_S_LISTEN]675675+ : tcp_timeouts[IP_VS_TCP_S_LISTEN]);672676 spin_unlock(&cp->lock);673677}674678675675-676676-static void ip_vs_tcp_init(struct ip_vs_protocol *pp)679679+/* ---------------------------------------------680680+ * timeouts is netns related now.681681+ * ---------------------------------------------682682+ */683683+static void __ip_vs_tcp_init(struct net *net, struct ip_vs_proto_data *pd)677684{678678- IP_VS_INIT_HASH_TABLE(tcp_apps);679679- pp->timeout_table = tcp_timeouts;685685+ struct netns_ipvs *ipvs = net_ipvs(net);686686+687687+ ip_vs_init_hash_table(ipvs->tcp_apps, TCP_APP_TAB_SIZE);688688+ spin_lock_init(&ipvs->tcp_app_lock);689689+ pd->timeout_table = ip_vs_create_timeout_table((int *)tcp_timeouts,690690+ sizeof(tcp_timeouts));691691+ pd->tcp_state_table = tcp_states;680692}681693682682-683683-static void ip_vs_tcp_exit(struct ip_vs_protocol *pp)694694+static void __ip_vs_tcp_exit(struct net *net, struct ip_vs_proto_data *pd)684695{696696+ kfree(pd->timeout_table);685697}686698687699···701691 .protocol = IPPROTO_TCP,702692 .num_states = IP_VS_TCP_S_LAST,703693 .dont_defrag = 0,704704- .appcnt = ATOMIC_INIT(0),705705- .init = ip_vs_tcp_init,706706- .exit = ip_vs_tcp_exit,694694+ .init = NULL,695695+ .exit = NULL,696696+ .init_netns = __ip_vs_tcp_init,697697+ .exit_netns = __ip_vs_tcp_exit,707698 .register_app = tcp_register_app,708699 .unregister_app = tcp_unregister_app,709700 .conn_schedule = tcp_conn_schedule,···718707 .app_conn_bind = tcp_app_conn_bind,719708 .debug_packet = ip_vs_tcpudp_debug_packet,720709 .timeout_change = tcp_timeout_change,721721- .set_state_timeout = tcp_set_state_timeout,722710};
+57-53
net/netfilter/ipvs/ip_vs_proto_udp.c
···99 * as published by the Free Software Foundation; either version1010 * 2 of the License, or (at your option) any later version.1111 *1212- * Changes:1212+ * Changes: Hans Schillstrom <hans.schillstrom@ericsson.com>1313+ * Network name space (netns) aware.1314 *1415 */1516···2928#include <net/ip6_checksum.h>30293130static int3232-udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,3131+udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,3332 int *verdict, struct ip_vs_conn **cpp)3433{3434+ struct net *net;3535 struct ip_vs_service *svc;3636 struct udphdr _udph, *uh;3737 struct ip_vs_iphdr iph;···4442 *verdict = NF_DROP;4543 return 0;4644 }4747-4848- svc = ip_vs_service_get(af, skb->mark, iph.protocol,4545+ net = skb_net(skb);4646+ svc = ip_vs_service_get(net, af, skb->mark, iph.protocol,4947 &iph.daddr, uh->dest);5048 if (svc) {5149 int ignored;52505353- if (ip_vs_todrop()) {5151+ if (ip_vs_todrop(net_ipvs(net))) {5452 /*5553 * It seems that we are very loaded.5654 * We have to drop this packet :(···6462 * Let the virtual server select a real server for the6563 * incoming connection, and create a connection entry.6664 */6767- *cpp = ip_vs_schedule(svc, skb, pp, &ignored);6868- if (!*cpp && !ignored) {6969- *verdict = ip_vs_leave(svc, skb, pp);6565+ *cpp = ip_vs_schedule(svc, skb, pd, &ignored);6666+ if (!*cpp && ignored <= 0) {6767+ if (!ignored)6868+ *verdict = ip_vs_leave(svc, skb, pd);6969+ else {7070+ ip_vs_service_put(svc);7171+ *verdict = NF_DROP;7272+ }7073 return 0;7174 }7275 ip_vs_service_put(svc);7376 }7777+ /* NF_ACCEPT */7478 return 1;7579}7680···346338 return 1;347339}348340349349-350350-/*351351- * Note: the caller guarantees that only one of register_app,352352- * unregister_app or app_conn_bind is called each time.353353- */354354-355355-#define UDP_APP_TAB_BITS 4356356-#define UDP_APP_TAB_SIZE (1 << UDP_APP_TAB_BITS)357357-#define UDP_APP_TAB_MASK (UDP_APP_TAB_SIZE - 1)358358-359359-static struct list_head udp_apps[UDP_APP_TAB_SIZE];360360-static DEFINE_SPINLOCK(udp_app_lock);361361-362341static inline __u16 udp_app_hashkey(__be16 port)363342{364343 return (((__force u16)port >> UDP_APP_TAB_BITS) ^ (__force u16)port)···353358}354359355360356356-static int udp_register_app(struct ip_vs_app *inc)361361+static int udp_register_app(struct net *net, struct ip_vs_app *inc)357362{358363 struct ip_vs_app *i;359364 __u16 hash;360365 __be16 port = inc->port;361366 int ret = 0;367367+ struct netns_ipvs *ipvs = net_ipvs(net);368368+ struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_UDP);362369363370 hash = udp_app_hashkey(port);364371365372366366- spin_lock_bh(&udp_app_lock);367367- list_for_each_entry(i, &udp_apps[hash], p_list) {373373+ spin_lock_bh(&ipvs->udp_app_lock);374374+ list_for_each_entry(i, &ipvs->udp_apps[hash], p_list) {368375 if (i->port == port) {369376 ret = -EEXIST;370377 goto out;371378 }372379 }373373- list_add(&inc->p_list, &udp_apps[hash]);374374- atomic_inc(&ip_vs_protocol_udp.appcnt);380380+ list_add(&inc->p_list, &ipvs->udp_apps[hash]);381381+ atomic_inc(&pd->appcnt);375382376383 out:377377- spin_unlock_bh(&udp_app_lock);384384+ spin_unlock_bh(&ipvs->udp_app_lock);378385 return ret;379386}380387381388382389static void383383-udp_unregister_app(struct ip_vs_app *inc)390390+udp_unregister_app(struct net *net, struct ip_vs_app *inc)384391{385385- spin_lock_bh(&udp_app_lock);386386- atomic_dec(&ip_vs_protocol_udp.appcnt);392392+ struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_UDP);393393+ struct netns_ipvs *ipvs = net_ipvs(net);394394+395395+ spin_lock_bh(&ipvs->udp_app_lock);396396+ atomic_dec(&pd->appcnt);387397 list_del(&inc->p_list);388388- spin_unlock_bh(&udp_app_lock);398398+ spin_unlock_bh(&ipvs->udp_app_lock);389399}390400391401392402static int udp_app_conn_bind(struct ip_vs_conn *cp)393403{404404+ struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(cp));394405 int hash;395406 struct ip_vs_app *inc;396407 int result = 0;···408407 /* Lookup application incarnations and bind the right one */409408 hash = udp_app_hashkey(cp->vport);410409411411- spin_lock(&udp_app_lock);412412- list_for_each_entry(inc, &udp_apps[hash], p_list) {410410+ spin_lock(&ipvs->udp_app_lock);411411+ list_for_each_entry(inc, &ipvs->udp_apps[hash], p_list) {413412 if (inc->port == cp->vport) {414413 if (unlikely(!ip_vs_app_inc_get(inc)))415414 break;416416- spin_unlock(&udp_app_lock);415415+ spin_unlock(&ipvs->udp_app_lock);417416418417 IP_VS_DBG_BUF(9, "%s(): Binding conn %s:%u->"419418 "%s:%u to app %s on port %u\n",···430429 goto out;431430 }432431 }433433- spin_unlock(&udp_app_lock);432432+ spin_unlock(&ipvs->udp_app_lock);434433435434 out:436435 return result;437436}438437439438440440-static int udp_timeouts[IP_VS_UDP_S_LAST+1] = {439439+static const int udp_timeouts[IP_VS_UDP_S_LAST+1] = {441440 [IP_VS_UDP_S_NORMAL] = 5*60*HZ,442441 [IP_VS_UDP_S_LAST] = 2*HZ,443442};···446445 [IP_VS_UDP_S_NORMAL] = "UDP",447446 [IP_VS_UDP_S_LAST] = "BUG!",448447};449449-450450-451451-static int452452-udp_set_state_timeout(struct ip_vs_protocol *pp, char *sname, int to)453453-{454454- return ip_vs_set_state_timeout(pp->timeout_table, IP_VS_UDP_S_LAST,455455- udp_state_name_table, sname, to);456456-}457448458449static const char * udp_state_name(int state)459450{···457464static int458465udp_state_transition(struct ip_vs_conn *cp, int direction,459466 const struct sk_buff *skb,460460- struct ip_vs_protocol *pp)467467+ struct ip_vs_proto_data *pd)461468{462462- cp->timeout = pp->timeout_table[IP_VS_UDP_S_NORMAL];469469+ if (unlikely(!pd)) {470470+ pr_err("UDP no ns data\n");471471+ return 0;472472+ }473473+474474+ cp->timeout = pd->timeout_table[IP_VS_UDP_S_NORMAL];463475 return 1;464476}465477466466-static void udp_init(struct ip_vs_protocol *pp)478478+static void __udp_init(struct net *net, struct ip_vs_proto_data *pd)467479{468468- IP_VS_INIT_HASH_TABLE(udp_apps);469469- pp->timeout_table = udp_timeouts;480480+ struct netns_ipvs *ipvs = net_ipvs(net);481481+482482+ ip_vs_init_hash_table(ipvs->udp_apps, UDP_APP_TAB_SIZE);483483+ spin_lock_init(&ipvs->udp_app_lock);484484+ pd->timeout_table = ip_vs_create_timeout_table((int *)udp_timeouts,485485+ sizeof(udp_timeouts));470486}471487472472-static void udp_exit(struct ip_vs_protocol *pp)488488+static void __udp_exit(struct net *net, struct ip_vs_proto_data *pd)473489{490490+ kfree(pd->timeout_table);474491}475492476493···489486 .protocol = IPPROTO_UDP,490487 .num_states = IP_VS_UDP_S_LAST,491488 .dont_defrag = 0,492492- .init = udp_init,493493- .exit = udp_exit,489489+ .init = NULL,490490+ .exit = NULL,491491+ .init_netns = __udp_init,492492+ .exit_netns = __udp_exit,494493 .conn_schedule = udp_conn_schedule,495494 .conn_in_get = ip_vs_conn_in_get_proto,496495 .conn_out_get = ip_vs_conn_out_get_proto,···506501 .app_conn_bind = udp_app_conn_bind,507502 .debug_packet = ip_vs_tcpudp_debug_packet,508503 .timeout_change = NULL,509509- .set_state_timeout = udp_set_state_timeout,510504};
+1000-275
net/netfilter/ipvs/ip_vs_sync.c
···55 * high-performance and highly available server based on a66 * cluster of servers.77 *88+ * Version 1, is capable of handling both version 0 and 1 messages.99+ * Version 0 is the plain old format.1010+ * Note Version 0 receivers will just drop Ver 1 messages.1111+ * Version 1 is capable of handle IPv6, Persistence data,1212+ * time-outs, and firewall marks.1313+ * In ver.1 "ip_vs_sync_conn_options" will be sent in netw. order.1414+ * Ver. 0 can be turned on by sysctl -w net.ipv4.vs.sync_version=01515+ *1616+ * Definitions Message: is a complete datagram1717+ * Sync_conn: is a part of a Message1818+ * Param Data is an option to a Sync_conn.1919+ *820 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>921 *1022 * ip_vs_sync: sync connection info from master load balancer to backups···2715 * Alexandre Cassen : Added SyncID support for incoming sync2816 * messages filtering.2917 * Justin Ossevoort : Fix endian problem on sync message size.1818+ * Hans Schillstrom : Added Version 1: i.e. IPv6,1919+ * Persistence support, fwmark and time-out.3020 */31213222#define KMSG_COMPONENT "IPVS"···4935#include <linux/wait.h>5036#include <linux/kernel.h>51373838+#include <asm/unaligned.h> /* Used for ntoh_seq and hton_seq */3939+5240#include <net/ip.h>5341#include <net/sock.h>5442···5943#define IP_VS_SYNC_GROUP 0xe0000051 /* multicast addr - 224.0.0.81 */6044#define IP_VS_SYNC_PORT 8848 /* multicast port */61454646+#define SYNC_PROTO_VER 1 /* Protocol version in header */62476348/*6449 * IPVS sync connection entry5050+ * Version 0, i.e. original version.6551 */6666-struct ip_vs_sync_conn {5252+struct ip_vs_sync_conn_v0 {6753 __u8 reserved;68546955 /* Protocol, addresses and port numbers */···8971 struct ip_vs_seq out_seq; /* outgoing seq. struct */9072};91739292-struct ip_vs_sync_thread_data {9393- struct socket *sock;9494- char *buf;9595-};9696-9797-#define SIMPLE_CONN_SIZE (sizeof(struct ip_vs_sync_conn))9898-#define FULL_CONN_SIZE \9999-(sizeof(struct ip_vs_sync_conn) + sizeof(struct ip_vs_sync_conn_options))100100-101101-10274/*103103- The master mulitcasts messages to the backup load balancers in the104104- following format.7575+ Sync Connection format (sync_conn)1057610677 0 1 2 310778 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 110879 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+109109- | Count Conns | SyncID | Size |8080+ | Type | Protocol | Ver. | Size |8181+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+8282+ | Flags |8383+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+8484+ | State | cport |8585+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+8686+ | vport | dport |8787+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+8888+ | fwmark |8989+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+9090+ | timeout (in sec.) |9191+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+9292+ | ... |9393+ | IP-Addresses (v4 or v6) |9494+ | ... |9595+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+9696+ Optional Parameters.9797+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+9898+ | Param. Type | Param. Length | Param. data |9999+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |100100+ | ... |101101+ | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+102102+ | | Param Type | Param. Length |103103+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+104104+ | Param data |105105+ | Last Param data should be padded for 32 bit alignment |106106+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+107107+*/108108+109109+/*110110+ * Type 0, IPv4 sync connection format111111+ */112112+struct ip_vs_sync_v4 {113113+ __u8 type;114114+ __u8 protocol; /* Which protocol (TCP/UDP) */115115+ __be16 ver_size; /* Version msb 4 bits */116116+ /* Flags and state transition */117117+ __be32 flags; /* status flags */118118+ __be16 state; /* state info */119119+ /* Protocol, addresses and port numbers */120120+ __be16 cport;121121+ __be16 vport;122122+ __be16 dport;123123+ __be32 fwmark; /* Firewall mark from skb */124124+ __be32 timeout; /* cp timeout */125125+ __be32 caddr; /* client address */126126+ __be32 vaddr; /* virtual address */127127+ __be32 daddr; /* destination address */128128+ /* The sequence options start here */129129+ /* PE data padded to 32bit alignment after seq. options */130130+};131131+/*132132+ * Type 2 messages IPv6133133+ */134134+struct ip_vs_sync_v6 {135135+ __u8 type;136136+ __u8 protocol; /* Which protocol (TCP/UDP) */137137+ __be16 ver_size; /* Version msb 4 bits */138138+ /* Flags and state transition */139139+ __be32 flags; /* status flags */140140+ __be16 state; /* state info */141141+ /* Protocol, addresses and port numbers */142142+ __be16 cport;143143+ __be16 vport;144144+ __be16 dport;145145+ __be32 fwmark; /* Firewall mark from skb */146146+ __be32 timeout; /* cp timeout */147147+ struct in6_addr caddr; /* client address */148148+ struct in6_addr vaddr; /* virtual address */149149+ struct in6_addr daddr; /* destination address */150150+ /* The sequence options start here */151151+ /* PE data padded to 32bit alignment after seq. options */152152+};153153+154154+union ip_vs_sync_conn {155155+ struct ip_vs_sync_v4 v4;156156+ struct ip_vs_sync_v6 v6;157157+};158158+159159+/* Bits in Type field in above */160160+#define STYPE_INET6 0161161+#define STYPE_F_INET6 (1 << STYPE_INET6)162162+163163+#define SVER_SHIFT 12 /* Shift to get version */164164+#define SVER_MASK 0x0fff /* Mask to strip version */165165+166166+#define IPVS_OPT_SEQ_DATA 1167167+#define IPVS_OPT_PE_DATA 2168168+#define IPVS_OPT_PE_NAME 3169169+#define IPVS_OPT_PARAM 7170170+171171+#define IPVS_OPT_F_SEQ_DATA (1 << (IPVS_OPT_SEQ_DATA-1))172172+#define IPVS_OPT_F_PE_DATA (1 << (IPVS_OPT_PE_DATA-1))173173+#define IPVS_OPT_F_PE_NAME (1 << (IPVS_OPT_PE_NAME-1))174174+#define IPVS_OPT_F_PARAM (1 << (IPVS_OPT_PARAM-1))175175+176176+struct ip_vs_sync_thread_data {177177+ struct net *net;178178+ struct socket *sock;179179+ char *buf;180180+};181181+182182+/* Version 0 definition of packet sizes */183183+#define SIMPLE_CONN_SIZE (sizeof(struct ip_vs_sync_conn_v0))184184+#define FULL_CONN_SIZE \185185+(sizeof(struct ip_vs_sync_conn_v0) + sizeof(struct ip_vs_sync_conn_options))186186+187187+188188+/*189189+ The master mulitcasts messages (Datagrams) to the backup load balancers190190+ in the following format.191191+192192+ Version 1:193193+ Note, first byte should be Zero, so ver 0 receivers will drop the packet.194194+195195+ 0 1 2 3196196+ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1197197+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+198198+ | 0 | SyncID | Size |199199+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+200200+ | Count Conns | Version | Reserved, set to Zero |110201 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+111202 | |112203 | IPVS Sync Connection (1) |113204 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+114205 | . |115115- | . |206206+ ~ . ~116207 | . |117208 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+118209 | |119210 | IPVS Sync Connection (n) |120211 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+212212+213213+ Version 0 Header214214+ 0 1 2 3215215+ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1216216+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+217217+ | Count Conns | SyncID | Size |218218+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+219219+ | IPVS Sync Connection (1) |121220*/122221123222#define SYNC_MESG_HEADER_LEN 4124223#define MAX_CONNS_PER_SYNCBUFF 255 /* nr_conns in ip_vs_sync_mesg is 8 bit */125224126126-struct ip_vs_sync_mesg {225225+/* Version 0 header */226226+struct ip_vs_sync_mesg_v0 {127227 __u8 nr_conns;128228 __u8 syncid;129229 __u16 size;···249113 /* ip_vs_sync_conn entries start here */250114};251115252252-/* the maximum length of sync (sending/receiving) message */253253-static int sync_send_mesg_maxlen;254254-static int sync_recv_mesg_maxlen;116116+/* Version 1 header */117117+struct ip_vs_sync_mesg {118118+ __u8 reserved; /* must be zero */119119+ __u8 syncid;120120+ __u16 size;121121+ __u8 nr_conns;122122+ __s8 version; /* SYNC_PROTO_VER */123123+ __u16 spare;124124+ /* ip_vs_sync_conn entries start here */125125+};255126256127struct ip_vs_sync_buff {257128 struct list_head list;···270127 unsigned char *end;271128};272129273273-274274-/* the sync_buff list head and the lock */275275-static LIST_HEAD(ip_vs_sync_queue);276276-static DEFINE_SPINLOCK(ip_vs_sync_lock);277277-278278-/* current sync_buff for accepting new conn entries */279279-static struct ip_vs_sync_buff *curr_sb = NULL;280280-static DEFINE_SPINLOCK(curr_sb_lock);281281-282282-/* ipvs sync daemon state */283283-volatile int ip_vs_sync_state = IP_VS_STATE_NONE;284284-volatile int ip_vs_master_syncid = 0;285285-volatile int ip_vs_backup_syncid = 0;286286-287287-/* multicast interface name */288288-char ip_vs_master_mcast_ifn[IP_VS_IFNAME_MAXLEN];289289-char ip_vs_backup_mcast_ifn[IP_VS_IFNAME_MAXLEN];290290-291291-/* sync daemon tasks */292292-static struct task_struct *sync_master_thread;293293-static struct task_struct *sync_backup_thread;294294-295130/* multicast addr */296131static struct sockaddr_in mcast_addr = {297132 .sin_family = AF_INET,···277156 .sin_addr.s_addr = cpu_to_be32(IP_VS_SYNC_GROUP),278157};279158159159+/*160160+ * Copy of struct ip_vs_seq161161+ * From unaligned network order to aligned host order162162+ */163163+static void ntoh_seq(struct ip_vs_seq *no, struct ip_vs_seq *ho)164164+{165165+ ho->init_seq = get_unaligned_be32(&no->init_seq);166166+ ho->delta = get_unaligned_be32(&no->delta);167167+ ho->previous_delta = get_unaligned_be32(&no->previous_delta);168168+}280169281281-static inline struct ip_vs_sync_buff *sb_dequeue(void)170170+/*171171+ * Copy of struct ip_vs_seq172172+ * From Aligned host order to unaligned network order173173+ */174174+static void hton_seq(struct ip_vs_seq *ho, struct ip_vs_seq *no)175175+{176176+ put_unaligned_be32(ho->init_seq, &no->init_seq);177177+ put_unaligned_be32(ho->delta, &no->delta);178178+ put_unaligned_be32(ho->previous_delta, &no->previous_delta);179179+}180180+181181+static inline struct ip_vs_sync_buff *sb_dequeue(struct netns_ipvs *ipvs)282182{283183 struct ip_vs_sync_buff *sb;284184285285- spin_lock_bh(&ip_vs_sync_lock);286286- if (list_empty(&ip_vs_sync_queue)) {185185+ spin_lock_bh(&ipvs->sync_lock);186186+ if (list_empty(&ipvs->sync_queue)) {287187 sb = NULL;288188 } else {289289- sb = list_entry(ip_vs_sync_queue.next,189189+ sb = list_entry(ipvs->sync_queue.next,290190 struct ip_vs_sync_buff,291191 list);292192 list_del(&sb->list);293193 }294294- spin_unlock_bh(&ip_vs_sync_lock);194194+ spin_unlock_bh(&ipvs->sync_lock);295195296196 return sb;297197}298198299299-static inline struct ip_vs_sync_buff * ip_vs_sync_buff_create(void)199199+/*200200+ * Create a new sync buffer for Version 1 proto.201201+ */202202+static inline struct ip_vs_sync_buff *203203+ip_vs_sync_buff_create(struct netns_ipvs *ipvs)300204{301205 struct ip_vs_sync_buff *sb;302206303207 if (!(sb=kmalloc(sizeof(struct ip_vs_sync_buff), GFP_ATOMIC)))304208 return NULL;305209306306- if (!(sb->mesg=kmalloc(sync_send_mesg_maxlen, GFP_ATOMIC))) {210210+ sb->mesg = kmalloc(ipvs->send_mesg_maxlen, GFP_ATOMIC);211211+ if (!sb->mesg) {307212 kfree(sb);308213 return NULL;309214 }215215+ sb->mesg->reserved = 0; /* old nr_conns i.e. must be zeo now */216216+ sb->mesg->version = SYNC_PROTO_VER;217217+ sb->mesg->syncid = ipvs->master_syncid;218218+ sb->mesg->size = sizeof(struct ip_vs_sync_mesg);310219 sb->mesg->nr_conns = 0;311311- sb->mesg->syncid = ip_vs_master_syncid;312312- sb->mesg->size = 4;313313- sb->head = (unsigned char *)sb->mesg + 4;314314- sb->end = (unsigned char *)sb->mesg + sync_send_mesg_maxlen;220220+ sb->mesg->spare = 0;221221+ sb->head = (unsigned char *)sb->mesg + sizeof(struct ip_vs_sync_mesg);222222+ sb->end = (unsigned char *)sb->mesg + ipvs->send_mesg_maxlen;223223+315224 sb->firstuse = jiffies;316225 return sb;317226}···352201 kfree(sb);353202}354203355355-static inline void sb_queue_tail(struct ip_vs_sync_buff *sb)204204+static inline void sb_queue_tail(struct netns_ipvs *ipvs)356205{357357- spin_lock(&ip_vs_sync_lock);358358- if (ip_vs_sync_state & IP_VS_STATE_MASTER)359359- list_add_tail(&sb->list, &ip_vs_sync_queue);206206+ struct ip_vs_sync_buff *sb = ipvs->sync_buff;207207+208208+ spin_lock(&ipvs->sync_lock);209209+ if (ipvs->sync_state & IP_VS_STATE_MASTER)210210+ list_add_tail(&sb->list, &ipvs->sync_queue);360211 else361212 ip_vs_sync_buff_release(sb);362362- spin_unlock(&ip_vs_sync_lock);213213+ spin_unlock(&ipvs->sync_lock);363214}364215365216/*···369216 * than the specified time or the specified time is zero.370217 */371218static inline struct ip_vs_sync_buff *372372-get_curr_sync_buff(unsigned long time)219219+get_curr_sync_buff(struct netns_ipvs *ipvs, unsigned long time)373220{374221 struct ip_vs_sync_buff *sb;375222376376- spin_lock_bh(&curr_sb_lock);377377- if (curr_sb && (time == 0 ||378378- time_before(jiffies - curr_sb->firstuse, time))) {379379- sb = curr_sb;380380- curr_sb = NULL;223223+ spin_lock_bh(&ipvs->sync_buff_lock);224224+ if (ipvs->sync_buff && (time == 0 ||225225+ time_before(jiffies - ipvs->sync_buff->firstuse, time))) {226226+ sb = ipvs->sync_buff;227227+ ipvs->sync_buff = NULL;381228 } else382229 sb = NULL;383383- spin_unlock_bh(&curr_sb_lock);230230+ spin_unlock_bh(&ipvs->sync_buff_lock);384231 return sb;385232}386233234234+/*235235+ * Switch mode from sending version 0 or 1236236+ * - must handle sync_buf237237+ */238238+void ip_vs_sync_switch_mode(struct net *net, int mode)239239+{240240+ struct netns_ipvs *ipvs = net_ipvs(net);241241+242242+ if (!ipvs->sync_state & IP_VS_STATE_MASTER)243243+ return;244244+ if (mode == ipvs->sysctl_sync_ver || !ipvs->sync_buff)245245+ return;246246+247247+ spin_lock_bh(&ipvs->sync_buff_lock);248248+ /* Buffer empty ? then let buf_create do the job */249249+ if (ipvs->sync_buff->mesg->size <= sizeof(struct ip_vs_sync_mesg)) {250250+ kfree(ipvs->sync_buff);251251+ ipvs->sync_buff = NULL;252252+ } else {253253+ spin_lock_bh(&ipvs->sync_lock);254254+ if (ipvs->sync_state & IP_VS_STATE_MASTER)255255+ list_add_tail(&ipvs->sync_buff->list,256256+ &ipvs->sync_queue);257257+ else258258+ ip_vs_sync_buff_release(ipvs->sync_buff);259259+ spin_unlock_bh(&ipvs->sync_lock);260260+ }261261+ spin_unlock_bh(&ipvs->sync_buff_lock);262262+}387263388264/*389389- * Add an ip_vs_conn information into the current sync_buff.390390- * Called by ip_vs_in.265265+ * Create a new sync buffer for Version 0 proto.391266 */392392-void ip_vs_sync_conn(struct ip_vs_conn *cp)267267+static inline struct ip_vs_sync_buff *268268+ip_vs_sync_buff_create_v0(struct netns_ipvs *ipvs)393269{394394- struct ip_vs_sync_mesg *m;395395- struct ip_vs_sync_conn *s;270270+ struct ip_vs_sync_buff *sb;271271+ struct ip_vs_sync_mesg_v0 *mesg;272272+273273+ if (!(sb=kmalloc(sizeof(struct ip_vs_sync_buff), GFP_ATOMIC)))274274+ return NULL;275275+276276+ sb->mesg = kmalloc(ipvs->send_mesg_maxlen, GFP_ATOMIC);277277+ if (!sb->mesg) {278278+ kfree(sb);279279+ return NULL;280280+ }281281+ mesg = (struct ip_vs_sync_mesg_v0 *)sb->mesg;282282+ mesg->nr_conns = 0;283283+ mesg->syncid = ipvs->master_syncid;284284+ mesg->size = sizeof(struct ip_vs_sync_mesg_v0);285285+ sb->head = (unsigned char *)mesg + sizeof(struct ip_vs_sync_mesg_v0);286286+ sb->end = (unsigned char *)mesg + ipvs->send_mesg_maxlen;287287+ sb->firstuse = jiffies;288288+ return sb;289289+}290290+291291+/*292292+ * Version 0 , could be switched in by sys_ctl.293293+ * Add an ip_vs_conn information into the current sync_buff.294294+ */295295+void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp)296296+{297297+ struct netns_ipvs *ipvs = net_ipvs(net);298298+ struct ip_vs_sync_mesg_v0 *m;299299+ struct ip_vs_sync_conn_v0 *s;396300 int len;397301398398- spin_lock(&curr_sb_lock);399399- if (!curr_sb) {400400- if (!(curr_sb=ip_vs_sync_buff_create())) {401401- spin_unlock(&curr_sb_lock);302302+ if (unlikely(cp->af != AF_INET))303303+ return;304304+ /* Do not sync ONE PACKET */305305+ if (cp->flags & IP_VS_CONN_F_ONE_PACKET)306306+ return;307307+308308+ spin_lock(&ipvs->sync_buff_lock);309309+ if (!ipvs->sync_buff) {310310+ ipvs->sync_buff =311311+ ip_vs_sync_buff_create_v0(ipvs);312312+ if (!ipvs->sync_buff) {313313+ spin_unlock(&ipvs->sync_buff_lock);402314 pr_err("ip_vs_sync_buff_create failed.\n");403315 return;404316 }···471253472254 len = (cp->flags & IP_VS_CONN_F_SEQ_MASK) ? FULL_CONN_SIZE :473255 SIMPLE_CONN_SIZE;474474- m = curr_sb->mesg;475475- s = (struct ip_vs_sync_conn *)curr_sb->head;256256+ m = (struct ip_vs_sync_mesg_v0 *)ipvs->sync_buff->mesg;257257+ s = (struct ip_vs_sync_conn_v0 *)ipvs->sync_buff->head;476258477259 /* copy members */260260+ s->reserved = 0;478261 s->protocol = cp->protocol;479262 s->cport = cp->cport;480263 s->vport = cp->vport;···493274494275 m->nr_conns++;495276 m->size += len;496496- curr_sb->head += len;277277+ ipvs->sync_buff->head += len;497278498279 /* check if there is a space for next one */499499- if (curr_sb->head+FULL_CONN_SIZE > curr_sb->end) {500500- sb_queue_tail(curr_sb);501501- curr_sb = NULL;280280+ if (ipvs->sync_buff->head + FULL_CONN_SIZE > ipvs->sync_buff->end) {281281+ sb_queue_tail(ipvs);282282+ ipvs->sync_buff = NULL;502283 }503503- spin_unlock(&curr_sb_lock);284284+ spin_unlock(&ipvs->sync_buff_lock);504285505286 /* synchronize its controller if it has */506287 if (cp->control)507507- ip_vs_sync_conn(cp->control);288288+ ip_vs_sync_conn(net, cp->control);508289}509290510510-static inline int511511-ip_vs_conn_fill_param_sync(int af, int protocol,512512- const union nf_inet_addr *caddr, __be16 cport,513513- const union nf_inet_addr *vaddr, __be16 vport,514514- struct ip_vs_conn_param *p)291291+/*292292+ * Add an ip_vs_conn information into the current sync_buff.293293+ * Called by ip_vs_in.294294+ * Sending Version 1 messages295295+ */296296+void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp)515297{516516- /* XXX: Need to take into account persistence engine */517517- ip_vs_conn_fill_param(af, protocol, caddr, cport, vaddr, vport, p);298298+ struct netns_ipvs *ipvs = net_ipvs(net);299299+ struct ip_vs_sync_mesg *m;300300+ union ip_vs_sync_conn *s;301301+ __u8 *p;302302+ unsigned int len, pe_name_len, pad;303303+304304+ /* Handle old version of the protocol */305305+ if (ipvs->sysctl_sync_ver == 0) {306306+ ip_vs_sync_conn_v0(net, cp);307307+ return;308308+ }309309+ /* Do not sync ONE PACKET */310310+ if (cp->flags & IP_VS_CONN_F_ONE_PACKET)311311+ goto control;312312+sloop:313313+ /* Sanity checks */314314+ pe_name_len = 0;315315+ if (cp->pe_data_len) {316316+ if (!cp->pe_data || !cp->dest) {317317+ IP_VS_ERR_RL("SYNC, connection pe_data invalid\n");318318+ return;319319+ }320320+ pe_name_len = strnlen(cp->pe->name, IP_VS_PENAME_MAXLEN);321321+ }322322+323323+ spin_lock(&ipvs->sync_buff_lock);324324+325325+#ifdef CONFIG_IP_VS_IPV6326326+ if (cp->af == AF_INET6)327327+ len = sizeof(struct ip_vs_sync_v6);328328+ else329329+#endif330330+ len = sizeof(struct ip_vs_sync_v4);331331+332332+ if (cp->flags & IP_VS_CONN_F_SEQ_MASK)333333+ len += sizeof(struct ip_vs_sync_conn_options) + 2;334334+335335+ if (cp->pe_data_len)336336+ len += cp->pe_data_len + 2; /* + Param hdr field */337337+ if (pe_name_len)338338+ len += pe_name_len + 2;339339+340340+ /* check if there is a space for this one */341341+ pad = 0;342342+ if (ipvs->sync_buff) {343343+ pad = (4 - (size_t)ipvs->sync_buff->head) & 3;344344+ if (ipvs->sync_buff->head + len + pad > ipvs->sync_buff->end) {345345+ sb_queue_tail(ipvs);346346+ ipvs->sync_buff = NULL;347347+ pad = 0;348348+ }349349+ }350350+351351+ if (!ipvs->sync_buff) {352352+ ipvs->sync_buff = ip_vs_sync_buff_create(ipvs);353353+ if (!ipvs->sync_buff) {354354+ spin_unlock(&ipvs->sync_buff_lock);355355+ pr_err("ip_vs_sync_buff_create failed.\n");356356+ return;357357+ }358358+ }359359+360360+ m = ipvs->sync_buff->mesg;361361+ p = ipvs->sync_buff->head;362362+ ipvs->sync_buff->head += pad + len;363363+ m->size += pad + len;364364+ /* Add ev. padding from prev. sync_conn */365365+ while (pad--)366366+ *(p++) = 0;367367+368368+ s = (union ip_vs_sync_conn *)p;369369+370370+ /* Set message type & copy members */371371+ s->v4.type = (cp->af == AF_INET6 ? STYPE_F_INET6 : 0);372372+ s->v4.ver_size = htons(len & SVER_MASK); /* Version 0 */373373+ s->v4.flags = htonl(cp->flags & ~IP_VS_CONN_F_HASHED);374374+ s->v4.state = htons(cp->state);375375+ s->v4.protocol = cp->protocol;376376+ s->v4.cport = cp->cport;377377+ s->v4.vport = cp->vport;378378+ s->v4.dport = cp->dport;379379+ s->v4.fwmark = htonl(cp->fwmark);380380+ s->v4.timeout = htonl(cp->timeout / HZ);381381+ m->nr_conns++;382382+383383+#ifdef CONFIG_IP_VS_IPV6384384+ if (cp->af == AF_INET6) {385385+ p += sizeof(struct ip_vs_sync_v6);386386+ ipv6_addr_copy(&s->v6.caddr, &cp->caddr.in6);387387+ ipv6_addr_copy(&s->v6.vaddr, &cp->vaddr.in6);388388+ ipv6_addr_copy(&s->v6.daddr, &cp->daddr.in6);389389+ } else390390+#endif391391+ {392392+ p += sizeof(struct ip_vs_sync_v4); /* options ptr */393393+ s->v4.caddr = cp->caddr.ip;394394+ s->v4.vaddr = cp->vaddr.ip;395395+ s->v4.daddr = cp->daddr.ip;396396+ }397397+ if (cp->flags & IP_VS_CONN_F_SEQ_MASK) {398398+ *(p++) = IPVS_OPT_SEQ_DATA;399399+ *(p++) = sizeof(struct ip_vs_sync_conn_options);400400+ hton_seq((struct ip_vs_seq *)p, &cp->in_seq);401401+ p += sizeof(struct ip_vs_seq);402402+ hton_seq((struct ip_vs_seq *)p, &cp->out_seq);403403+ p += sizeof(struct ip_vs_seq);404404+ }405405+ /* Handle pe data */406406+ if (cp->pe_data_len && cp->pe_data) {407407+ *(p++) = IPVS_OPT_PE_DATA;408408+ *(p++) = cp->pe_data_len;409409+ memcpy(p, cp->pe_data, cp->pe_data_len);410410+ p += cp->pe_data_len;411411+ if (pe_name_len) {412412+ /* Add PE_NAME */413413+ *(p++) = IPVS_OPT_PE_NAME;414414+ *(p++) = pe_name_len;415415+ memcpy(p, cp->pe->name, pe_name_len);416416+ p += pe_name_len;417417+ }418418+ }419419+420420+ spin_unlock(&ipvs->sync_buff_lock);421421+422422+control:423423+ /* synchronize its controller if it has */424424+ cp = cp->control;425425+ if (!cp)426426+ return;427427+ /*428428+ * Reduce sync rate for templates429429+ * i.e only increment in_pkts for Templates.430430+ */431431+ if (cp->flags & IP_VS_CONN_F_TEMPLATE) {432432+ int pkts = atomic_add_return(1, &cp->in_pkts);433433+434434+ if (pkts % ipvs->sysctl_sync_threshold[1] != 1)435435+ return;436436+ }437437+ goto sloop;438438+}439439+440440+/*441441+ * fill_param used by version 1442442+ */443443+static inline int444444+ip_vs_conn_fill_param_sync(struct net *net, int af, union ip_vs_sync_conn *sc,445445+ struct ip_vs_conn_param *p,446446+ __u8 *pe_data, unsigned int pe_data_len,447447+ __u8 *pe_name, unsigned int pe_name_len)448448+{449449+#ifdef CONFIG_IP_VS_IPV6450450+ if (af == AF_INET6)451451+ ip_vs_conn_fill_param(net, af, sc->v6.protocol,452452+ (const union nf_inet_addr *)&sc->v6.caddr,453453+ sc->v6.cport,454454+ (const union nf_inet_addr *)&sc->v6.vaddr,455455+ sc->v6.vport, p);456456+ else457457+#endif458458+ ip_vs_conn_fill_param(net, af, sc->v4.protocol,459459+ (const union nf_inet_addr *)&sc->v4.caddr,460460+ sc->v4.cport,461461+ (const union nf_inet_addr *)&sc->v4.vaddr,462462+ sc->v4.vport, p);463463+ /* Handle pe data */464464+ if (pe_data_len) {465465+ if (pe_name_len) {466466+ char buff[IP_VS_PENAME_MAXLEN+1];467467+468468+ memcpy(buff, pe_name, pe_name_len);469469+ buff[pe_name_len]=0;470470+ p->pe = __ip_vs_pe_getbyname(buff);471471+ if (!p->pe) {472472+ IP_VS_DBG(3, "BACKUP, no %s engine found/loaded\n",473473+ buff);474474+ return 1;475475+ }476476+ } else {477477+ IP_VS_ERR_RL("BACKUP, Invalid PE parameters\n");478478+ return 1;479479+ }480480+481481+ p->pe_data = kmalloc(pe_data_len, GFP_ATOMIC);482482+ if (!p->pe_data) {483483+ if (p->pe->module)484484+ module_put(p->pe->module);485485+ return -ENOMEM;486486+ }487487+ memcpy(p->pe_data, pe_data, pe_data_len);488488+ p->pe_data_len = pe_data_len;489489+ }518490 return 0;519491}520492521493/*522522- * Process received multicast message and create the corresponding523523- * ip_vs_conn entries.494494+ * Connection Add / Update.495495+ * Common for version 0 and 1 reception of backup sync_conns.496496+ * Param: ...497497+ * timeout is in sec.524498 */525525-static void ip_vs_process_message(const char *buffer, const size_t buflen)499499+static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param,500500+ unsigned int flags, unsigned int state,501501+ unsigned int protocol, unsigned int type,502502+ const union nf_inet_addr *daddr, __be16 dport,503503+ unsigned long timeout, __u32 fwmark,504504+ struct ip_vs_sync_conn_options *opt)526505{527527- struct ip_vs_sync_mesg *m = (struct ip_vs_sync_mesg *)buffer;528528- struct ip_vs_sync_conn *s;529529- struct ip_vs_sync_conn_options *opt;530530- struct ip_vs_conn *cp;531531- struct ip_vs_protocol *pp;532506 struct ip_vs_dest *dest;507507+ struct ip_vs_conn *cp;508508+ struct netns_ipvs *ipvs = net_ipvs(net);509509+510510+ if (!(flags & IP_VS_CONN_F_TEMPLATE))511511+ cp = ip_vs_conn_in_get(param);512512+ else513513+ cp = ip_vs_ct_in_get(param);514514+515515+ if (cp && param->pe_data) /* Free pe_data */516516+ kfree(param->pe_data);517517+ if (!cp) {518518+ /*519519+ * Find the appropriate destination for the connection.520520+ * If it is not found the connection will remain unbound521521+ * but still handled.522522+ */523523+ dest = ip_vs_find_dest(net, type, daddr, dport, param->vaddr,524524+ param->vport, protocol, fwmark);525525+526526+ /* Set the approprite ativity flag */527527+ if (protocol == IPPROTO_TCP) {528528+ if (state != IP_VS_TCP_S_ESTABLISHED)529529+ flags |= IP_VS_CONN_F_INACTIVE;530530+ else531531+ flags &= ~IP_VS_CONN_F_INACTIVE;532532+ } else if (protocol == IPPROTO_SCTP) {533533+ if (state != IP_VS_SCTP_S_ESTABLISHED)534534+ flags |= IP_VS_CONN_F_INACTIVE;535535+ else536536+ flags &= ~IP_VS_CONN_F_INACTIVE;537537+ }538538+ cp = ip_vs_conn_new(param, daddr, dport, flags, dest, fwmark);539539+ if (dest)540540+ atomic_dec(&dest->refcnt);541541+ if (!cp) {542542+ if (param->pe_data)543543+ kfree(param->pe_data);544544+ IP_VS_DBG(2, "BACKUP, add new conn. failed\n");545545+ return;546546+ }547547+ } else if (!cp->dest) {548548+ dest = ip_vs_try_bind_dest(cp);549549+ if (dest)550550+ atomic_dec(&dest->refcnt);551551+ } else if ((cp->dest) && (cp->protocol == IPPROTO_TCP) &&552552+ (cp->state != state)) {553553+ /* update active/inactive flag for the connection */554554+ dest = cp->dest;555555+ if (!(cp->flags & IP_VS_CONN_F_INACTIVE) &&556556+ (state != IP_VS_TCP_S_ESTABLISHED)) {557557+ atomic_dec(&dest->activeconns);558558+ atomic_inc(&dest->inactconns);559559+ cp->flags |= IP_VS_CONN_F_INACTIVE;560560+ } else if ((cp->flags & IP_VS_CONN_F_INACTIVE) &&561561+ (state == IP_VS_TCP_S_ESTABLISHED)) {562562+ atomic_inc(&dest->activeconns);563563+ atomic_dec(&dest->inactconns);564564+ cp->flags &= ~IP_VS_CONN_F_INACTIVE;565565+ }566566+ } else if ((cp->dest) && (cp->protocol == IPPROTO_SCTP) &&567567+ (cp->state != state)) {568568+ dest = cp->dest;569569+ if (!(cp->flags & IP_VS_CONN_F_INACTIVE) &&570570+ (state != IP_VS_SCTP_S_ESTABLISHED)) {571571+ atomic_dec(&dest->activeconns);572572+ atomic_inc(&dest->inactconns);573573+ cp->flags &= ~IP_VS_CONN_F_INACTIVE;574574+ }575575+ }576576+577577+ if (opt)578578+ memcpy(&cp->in_seq, opt, sizeof(*opt));579579+ atomic_set(&cp->in_pkts, ipvs->sysctl_sync_threshold[0]);580580+ cp->state = state;581581+ cp->old_state = cp->state;582582+ /*583583+ * For Ver 0 messages style584584+ * - Not possible to recover the right timeout for templates585585+ * - can not find the right fwmark586586+ * virtual service. If needed, we can do it for587587+ * non-fwmark persistent services.588588+ * Ver 1 messages style.589589+ * - No problem.590590+ */591591+ if (timeout) {592592+ if (timeout > MAX_SCHEDULE_TIMEOUT / HZ)593593+ timeout = MAX_SCHEDULE_TIMEOUT / HZ;594594+ cp->timeout = timeout*HZ;595595+ } else {596596+ struct ip_vs_proto_data *pd;597597+598598+ pd = ip_vs_proto_data_get(net, protocol);599599+ if (!(flags & IP_VS_CONN_F_TEMPLATE) && pd && pd->timeout_table)600600+ cp->timeout = pd->timeout_table[state];601601+ else602602+ cp->timeout = (3*60*HZ);603603+ }604604+ ip_vs_conn_put(cp);605605+}606606+607607+/*608608+ * Process received multicast message for Version 0609609+ */610610+static void ip_vs_process_message_v0(struct net *net, const char *buffer,611611+ const size_t buflen)612612+{613613+ struct ip_vs_sync_mesg_v0 *m = (struct ip_vs_sync_mesg_v0 *)buffer;614614+ struct ip_vs_sync_conn_v0 *s;615615+ struct ip_vs_sync_conn_options *opt;616616+ struct ip_vs_protocol *pp;533617 struct ip_vs_conn_param param;534618 char *p;535619 int i;536620537537- if (buflen < sizeof(struct ip_vs_sync_mesg)) {538538- IP_VS_ERR_RL("sync message header too short\n");539539- return;540540- }541541-542542- /* Convert size back to host byte order */543543- m->size = ntohs(m->size);544544-545545- if (buflen != m->size) {546546- IP_VS_ERR_RL("bogus sync message size\n");547547- return;548548- }549549-550550- /* SyncID sanity check */551551- if (ip_vs_backup_syncid != 0 && m->syncid != ip_vs_backup_syncid) {552552- IP_VS_DBG(7, "Ignoring incoming msg with syncid = %d\n",553553- m->syncid);554554- return;555555- }556556-557557- p = (char *)buffer + sizeof(struct ip_vs_sync_mesg);621621+ p = (char *)buffer + sizeof(struct ip_vs_sync_mesg_v0);558622 for (i=0; i<m->nr_conns; i++) {559623 unsigned flags, state;560624561625 if (p + SIMPLE_CONN_SIZE > buffer+buflen) {562562- IP_VS_ERR_RL("bogus conn in sync message\n");626626+ IP_VS_ERR_RL("BACKUP v0, bogus conn\n");563627 return;564628 }565565- s = (struct ip_vs_sync_conn *) p;629629+ s = (struct ip_vs_sync_conn_v0 *) p;566630 flags = ntohs(s->flags) | IP_VS_CONN_F_SYNC;567631 flags &= ~IP_VS_CONN_F_HASHED;568632 if (flags & IP_VS_CONN_F_SEQ_MASK) {569633 opt = (struct ip_vs_sync_conn_options *)&s[1];570634 p += FULL_CONN_SIZE;571635 if (p > buffer+buflen) {572572- IP_VS_ERR_RL("bogus conn options in sync message\n");636636+ IP_VS_ERR_RL("BACKUP v0, Dropping buffer bogus conn options\n");573637 return;574638 }575639 } else {···864362 if (!(flags & IP_VS_CONN_F_TEMPLATE)) {865363 pp = ip_vs_proto_get(s->protocol);866364 if (!pp) {867867- IP_VS_ERR_RL("Unsupported protocol %u in sync msg\n",365365+ IP_VS_DBG(2, "BACKUP v0, Unsupported protocol %u\n",868366 s->protocol);869367 continue;870368 }871369 if (state >= pp->num_states) {872872- IP_VS_DBG(2, "Invalid %s state %u in sync msg\n",370370+ IP_VS_DBG(2, "BACKUP v0, Invalid %s state %u\n",873371 pp->name, state);874372 continue;875373 }876374 } else {877375 /* protocol in templates is not used for state/timeout */878878- pp = NULL;879376 if (state > 0) {880880- IP_VS_DBG(2, "Invalid template state %u in sync msg\n",377377+ IP_VS_DBG(2, "BACKUP v0, Invalid template state %u\n",881378 state);882379 state = 0;883380 }884381 }885382886886- {887887- if (ip_vs_conn_fill_param_sync(AF_INET, s->protocol,888888- (union nf_inet_addr *)&s->caddr,889889- s->cport,890890- (union nf_inet_addr *)&s->vaddr,891891- s->vport, ¶m)) {892892- pr_err("ip_vs_conn_fill_param_sync failed");893893- return;894894- }895895- if (!(flags & IP_VS_CONN_F_TEMPLATE))896896- cp = ip_vs_conn_in_get(¶m);897897- else898898- cp = ip_vs_ct_in_get(¶m);899899- }900900- if (!cp) {901901- /*902902- * Find the appropriate destination for the connection.903903- * If it is not found the connection will remain unbound904904- * but still handled.905905- */906906- dest = ip_vs_find_dest(AF_INET,907907- (union nf_inet_addr *)&s->daddr,908908- s->dport,909909- (union nf_inet_addr *)&s->vaddr,910910- s->vport,911911- s->protocol);912912- /* Set the approprite ativity flag */913913- if (s->protocol == IPPROTO_TCP) {914914- if (state != IP_VS_TCP_S_ESTABLISHED)915915- flags |= IP_VS_CONN_F_INACTIVE;916916- else917917- flags &= ~IP_VS_CONN_F_INACTIVE;918918- } else if (s->protocol == IPPROTO_SCTP) {919919- if (state != IP_VS_SCTP_S_ESTABLISHED)920920- flags |= IP_VS_CONN_F_INACTIVE;921921- else922922- flags &= ~IP_VS_CONN_F_INACTIVE;923923- }924924- cp = ip_vs_conn_new(¶m,925925- (union nf_inet_addr *)&s->daddr,926926- s->dport, flags, dest);927927- if (dest)928928- atomic_dec(&dest->refcnt);929929- if (!cp) {930930- pr_err("ip_vs_conn_new failed\n");931931- return;932932- }933933- } else if (!cp->dest) {934934- dest = ip_vs_try_bind_dest(cp);935935- if (dest)936936- atomic_dec(&dest->refcnt);937937- } else if ((cp->dest) && (cp->protocol == IPPROTO_TCP) &&938938- (cp->state != state)) {939939- /* update active/inactive flag for the connection */940940- dest = cp->dest;941941- if (!(cp->flags & IP_VS_CONN_F_INACTIVE) &&942942- (state != IP_VS_TCP_S_ESTABLISHED)) {943943- atomic_dec(&dest->activeconns);944944- atomic_inc(&dest->inactconns);945945- cp->flags |= IP_VS_CONN_F_INACTIVE;946946- } else if ((cp->flags & IP_VS_CONN_F_INACTIVE) &&947947- (state == IP_VS_TCP_S_ESTABLISHED)) {948948- atomic_inc(&dest->activeconns);949949- atomic_dec(&dest->inactconns);950950- cp->flags &= ~IP_VS_CONN_F_INACTIVE;951951- }952952- } else if ((cp->dest) && (cp->protocol == IPPROTO_SCTP) &&953953- (cp->state != state)) {954954- dest = cp->dest;955955- if (!(cp->flags & IP_VS_CONN_F_INACTIVE) &&956956- (state != IP_VS_SCTP_S_ESTABLISHED)) {957957- atomic_dec(&dest->activeconns);958958- atomic_inc(&dest->inactconns);959959- cp->flags &= ~IP_VS_CONN_F_INACTIVE;960960- }961961- }383383+ ip_vs_conn_fill_param(net, AF_INET, s->protocol,384384+ (const union nf_inet_addr *)&s->caddr,385385+ s->cport,386386+ (const union nf_inet_addr *)&s->vaddr,387387+ s->vport, ¶m);962388963963- if (opt)964964- memcpy(&cp->in_seq, opt, sizeof(*opt));965965- atomic_set(&cp->in_pkts, sysctl_ip_vs_sync_threshold[0]);966966- cp->state = state;967967- cp->old_state = cp->state;968968- /*969969- * We can not recover the right timeout for templates970970- * in all cases, we can not find the right fwmark971971- * virtual service. If needed, we can do it for972972- * non-fwmark persistent services.973973- */974974- if (!(flags & IP_VS_CONN_F_TEMPLATE) && pp->timeout_table)975975- cp->timeout = pp->timeout_table[state];976976- else977977- cp->timeout = (3*60*HZ);978978- ip_vs_conn_put(cp);389389+ /* Send timeout as Zero */390390+ ip_vs_proc_conn(net, ¶m, flags, state, s->protocol, AF_INET,391391+ (union nf_inet_addr *)&s->daddr, s->dport,392392+ 0, 0, opt);393393+ }394394+}395395+396396+/*397397+ * Handle options398398+ */399399+static inline int ip_vs_proc_seqopt(__u8 *p, unsigned int plen,400400+ __u32 *opt_flags,401401+ struct ip_vs_sync_conn_options *opt)402402+{403403+ struct ip_vs_sync_conn_options *topt;404404+405405+ topt = (struct ip_vs_sync_conn_options *)p;406406+407407+ if (plen != sizeof(struct ip_vs_sync_conn_options)) {408408+ IP_VS_DBG(2, "BACKUP, bogus conn options length\n");409409+ return -EINVAL;410410+ }411411+ if (*opt_flags & IPVS_OPT_F_SEQ_DATA) {412412+ IP_VS_DBG(2, "BACKUP, conn options found twice\n");413413+ return -EINVAL;414414+ }415415+ ntoh_seq(&topt->in_seq, &opt->in_seq);416416+ ntoh_seq(&topt->out_seq, &opt->out_seq);417417+ *opt_flags |= IPVS_OPT_F_SEQ_DATA;418418+ return 0;419419+}420420+421421+static int ip_vs_proc_str(__u8 *p, unsigned int plen, unsigned int *data_len,422422+ __u8 **data, unsigned int maxlen,423423+ __u32 *opt_flags, __u32 flag)424424+{425425+ if (plen > maxlen) {426426+ IP_VS_DBG(2, "BACKUP, bogus par.data len > %d\n", maxlen);427427+ return -EINVAL;428428+ }429429+ if (*opt_flags & flag) {430430+ IP_VS_DBG(2, "BACKUP, Par.data found twice 0x%x\n", flag);431431+ return -EINVAL;432432+ }433433+ *data_len = plen;434434+ *data = p;435435+ *opt_flags |= flag;436436+ return 0;437437+}438438+/*439439+ * Process a Version 1 sync. connection440440+ */441441+static inline int ip_vs_proc_sync_conn(struct net *net, __u8 *p, __u8 *msg_end)442442+{443443+ struct ip_vs_sync_conn_options opt;444444+ union ip_vs_sync_conn *s;445445+ struct ip_vs_protocol *pp;446446+ struct ip_vs_conn_param param;447447+ __u32 flags;448448+ unsigned int af, state, pe_data_len=0, pe_name_len=0;449449+ __u8 *pe_data=NULL, *pe_name=NULL;450450+ __u32 opt_flags=0;451451+ int retc=0;452452+453453+ s = (union ip_vs_sync_conn *) p;454454+455455+ if (s->v6.type & STYPE_F_INET6) {456456+#ifdef CONFIG_IP_VS_IPV6457457+ af = AF_INET6;458458+ p += sizeof(struct ip_vs_sync_v6);459459+#else460460+ IP_VS_DBG(3,"BACKUP, IPv6 msg received, and IPVS is not compiled for IPv6\n");461461+ retc = 10;462462+ goto out;463463+#endif464464+ } else if (!s->v4.type) {465465+ af = AF_INET;466466+ p += sizeof(struct ip_vs_sync_v4);467467+ } else {468468+ return -10;469469+ }470470+ if (p > msg_end)471471+ return -20;472472+473473+ /* Process optional params check Type & Len. */474474+ while (p < msg_end) {475475+ int ptype;476476+ int plen;477477+478478+ if (p+2 > msg_end)479479+ return -30;480480+ ptype = *(p++);481481+ plen = *(p++);482482+483483+ if (!plen || ((p + plen) > msg_end))484484+ return -40;485485+ /* Handle seq option p = param data */486486+ switch (ptype & ~IPVS_OPT_F_PARAM) {487487+ case IPVS_OPT_SEQ_DATA:488488+ if (ip_vs_proc_seqopt(p, plen, &opt_flags, &opt))489489+ return -50;490490+ break;491491+492492+ case IPVS_OPT_PE_DATA:493493+ if (ip_vs_proc_str(p, plen, &pe_data_len, &pe_data,494494+ IP_VS_PEDATA_MAXLEN, &opt_flags,495495+ IPVS_OPT_F_PE_DATA))496496+ return -60;497497+ break;498498+499499+ case IPVS_OPT_PE_NAME:500500+ if (ip_vs_proc_str(p, plen,&pe_name_len, &pe_name,501501+ IP_VS_PENAME_MAXLEN, &opt_flags,502502+ IPVS_OPT_F_PE_NAME))503503+ return -70;504504+ break;505505+506506+ default:507507+ /* Param data mandatory ? */508508+ if (!(ptype & IPVS_OPT_F_PARAM)) {509509+ IP_VS_DBG(3, "BACKUP, Unknown mandatory param %d found\n",510510+ ptype & ~IPVS_OPT_F_PARAM);511511+ retc = 20;512512+ goto out;513513+ }514514+ }515515+ p += plen; /* Next option */516516+ }517517+518518+ /* Get flags and Mask off unsupported */519519+ flags = ntohl(s->v4.flags) & IP_VS_CONN_F_BACKUP_MASK;520520+ flags |= IP_VS_CONN_F_SYNC;521521+ state = ntohs(s->v4.state);522522+523523+ if (!(flags & IP_VS_CONN_F_TEMPLATE)) {524524+ pp = ip_vs_proto_get(s->v4.protocol);525525+ if (!pp) {526526+ IP_VS_DBG(3,"BACKUP, Unsupported protocol %u\n",527527+ s->v4.protocol);528528+ retc = 30;529529+ goto out;530530+ }531531+ if (state >= pp->num_states) {532532+ IP_VS_DBG(3, "BACKUP, Invalid %s state %u\n",533533+ pp->name, state);534534+ retc = 40;535535+ goto out;536536+ }537537+ } else {538538+ /* protocol in templates is not used for state/timeout */539539+ if (state > 0) {540540+ IP_VS_DBG(3, "BACKUP, Invalid template state %u\n",541541+ state);542542+ state = 0;543543+ }544544+ }545545+ if (ip_vs_conn_fill_param_sync(net, af, s, ¶m, pe_data,546546+ pe_data_len, pe_name, pe_name_len)) {547547+ retc = 50;548548+ goto out;549549+ }550550+ /* If only IPv4, just silent skip IPv6 */551551+ if (af == AF_INET)552552+ ip_vs_proc_conn(net, ¶m, flags, state, s->v4.protocol, af,553553+ (union nf_inet_addr *)&s->v4.daddr, s->v4.dport,554554+ ntohl(s->v4.timeout), ntohl(s->v4.fwmark),555555+ (opt_flags & IPVS_OPT_F_SEQ_DATA ? &opt : NULL)556556+ );557557+#ifdef CONFIG_IP_VS_IPV6558558+ else559559+ ip_vs_proc_conn(net, ¶m, flags, state, s->v6.protocol, af,560560+ (union nf_inet_addr *)&s->v6.daddr, s->v6.dport,561561+ ntohl(s->v6.timeout), ntohl(s->v6.fwmark),562562+ (opt_flags & IPVS_OPT_F_SEQ_DATA ? &opt : NULL)563563+ );564564+#endif565565+ return 0;566566+ /* Error exit */567567+out:568568+ IP_VS_DBG(2, "BACKUP, Single msg dropped err:%d\n", retc);569569+ return retc;570570+571571+}572572+/*573573+ * Process received multicast message and create the corresponding574574+ * ip_vs_conn entries.575575+ * Handles Version 0 & 1576576+ */577577+static void ip_vs_process_message(struct net *net, __u8 *buffer,578578+ const size_t buflen)579579+{580580+ struct netns_ipvs *ipvs = net_ipvs(net);581581+ struct ip_vs_sync_mesg *m2 = (struct ip_vs_sync_mesg *)buffer;582582+ __u8 *p, *msg_end;583583+ int i, nr_conns;584584+585585+ if (buflen < sizeof(struct ip_vs_sync_mesg_v0)) {586586+ IP_VS_DBG(2, "BACKUP, message header too short\n");587587+ return;588588+ }589589+ /* Convert size back to host byte order */590590+ m2->size = ntohs(m2->size);591591+592592+ if (buflen != m2->size) {593593+ IP_VS_DBG(2, "BACKUP, bogus message size\n");594594+ return;595595+ }596596+ /* SyncID sanity check */597597+ if (ipvs->backup_syncid != 0 && m2->syncid != ipvs->backup_syncid) {598598+ IP_VS_DBG(7, "BACKUP, Ignoring syncid = %d\n", m2->syncid);599599+ return;600600+ }601601+ /* Handle version 1 message */602602+ if ((m2->version == SYNC_PROTO_VER) && (m2->reserved == 0)603603+ && (m2->spare == 0)) {604604+605605+ msg_end = buffer + sizeof(struct ip_vs_sync_mesg);606606+ nr_conns = m2->nr_conns;607607+608608+ for (i=0; i<nr_conns; i++) {609609+ union ip_vs_sync_conn *s;610610+ unsigned size;611611+ int retc;612612+613613+ p = msg_end;614614+ if (p + sizeof(s->v4) > buffer+buflen) {615615+ IP_VS_ERR_RL("BACKUP, Dropping buffer, to small\n");616616+ return;617617+ }618618+ s = (union ip_vs_sync_conn *)p;619619+ size = ntohs(s->v4.ver_size) & SVER_MASK;620620+ msg_end = p + size;621621+ /* Basic sanity checks */622622+ if (msg_end > buffer+buflen) {623623+ IP_VS_ERR_RL("BACKUP, Dropping buffer, msg > buffer\n");624624+ return;625625+ }626626+ if (ntohs(s->v4.ver_size) >> SVER_SHIFT) {627627+ IP_VS_ERR_RL("BACKUP, Dropping buffer, Unknown version %d\n",628628+ ntohs(s->v4.ver_size) >> SVER_SHIFT);629629+ return;630630+ }631631+ /* Process a single sync_conn */632632+ retc = ip_vs_proc_sync_conn(net, p, msg_end);633633+ if (retc < 0) {634634+ IP_VS_ERR_RL("BACKUP, Dropping buffer, Err: %d in decoding\n",635635+ retc);636636+ return;637637+ }638638+ /* Make sure we have 32 bit alignment */639639+ msg_end = p + ((size + 3) & ~3);640640+ }641641+ } else {642642+ /* Old type of message */643643+ ip_vs_process_message_v0(net, buffer, buflen);644644+ return;979645 }980646}981647···1181511{1182512 struct net_device *dev;1183513 struct inet_sock *inet = inet_sk(sk);514514+ struct net *net = sock_net(sk);118451511851185- if ((dev = __dev_get_by_name(&init_net, ifname)) == NULL)516516+ dev = __dev_get_by_name(net, ifname);517517+ if (!dev)1186518 return -ENODEV;11875191188520 if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if)···1203531 * Set the maximum length of sync message according to the1204532 * specified interface's MTU.1205533 */12061206-static int set_sync_mesg_maxlen(int sync_state)534534+static int set_sync_mesg_maxlen(struct net *net, int sync_state)1207535{536536+ struct netns_ipvs *ipvs = net_ipvs(net);1208537 struct net_device *dev;1209538 int num;12105391211540 if (sync_state == IP_VS_STATE_MASTER) {12121212- if ((dev = __dev_get_by_name(&init_net, ip_vs_master_mcast_ifn)) == NULL)541541+ dev = __dev_get_by_name(net, ipvs->master_mcast_ifn);542542+ if (!dev)1213543 return -ENODEV;12145441215545 num = (dev->mtu - sizeof(struct iphdr) -1216546 sizeof(struct udphdr) -1217547 SYNC_MESG_HEADER_LEN - 20) / SIMPLE_CONN_SIZE;12181218- sync_send_mesg_maxlen = SYNC_MESG_HEADER_LEN +548548+ ipvs->send_mesg_maxlen = SYNC_MESG_HEADER_LEN +1219549 SIMPLE_CONN_SIZE * min(num, MAX_CONNS_PER_SYNCBUFF);1220550 IP_VS_DBG(7, "setting the maximum length of sync sending "12211221- "message %d.\n", sync_send_mesg_maxlen);551551+ "message %d.\n", ipvs->send_mesg_maxlen);1222552 } else if (sync_state == IP_VS_STATE_BACKUP) {12231223- if ((dev = __dev_get_by_name(&init_net, ip_vs_backup_mcast_ifn)) == NULL)553553+ dev = __dev_get_by_name(net, ipvs->backup_mcast_ifn);554554+ if (!dev)1224555 return -ENODEV;122555612261226- sync_recv_mesg_maxlen = dev->mtu -557557+ ipvs->recv_mesg_maxlen = dev->mtu -1227558 sizeof(struct iphdr) - sizeof(struct udphdr);1228559 IP_VS_DBG(7, "setting the maximum length of sync receiving "12291229- "message %d.\n", sync_recv_mesg_maxlen);560560+ "message %d.\n", ipvs->recv_mesg_maxlen);1230561 }12315621232563 return 0;···1244569static int1245570join_mcast_group(struct sock *sk, struct in_addr *addr, char *ifname)1246571{572572+ struct net *net = sock_net(sk);1247573 struct ip_mreqn mreq;1248574 struct net_device *dev;1249575 int ret;···1252576 memset(&mreq, 0, sizeof(mreq));1253577 memcpy(&mreq.imr_multiaddr, addr, sizeof(struct in_addr));125457812551255- if ((dev = __dev_get_by_name(&init_net, ifname)) == NULL)579579+ dev = __dev_get_by_name(net, ifname);580580+ if (!dev)1256581 return -ENODEV;1257582 if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if)1258583 return -EINVAL;···12705931271594static int bind_mcastif_addr(struct socket *sock, char *ifname)1272595{596596+ struct net *net = sock_net(sock->sk);1273597 struct net_device *dev;1274598 __be32 addr;1275599 struct sockaddr_in sin;127660012771277- if ((dev = __dev_get_by_name(&init_net, ifname)) == NULL)601601+ dev = __dev_get_by_name(net, ifname);602602+ if (!dev)1278603 return -ENODEV;12796041280605 addr = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);···1298619/*1299620 * Set up sending multicast socket over UDP1300621 */13011301-static struct socket * make_send_sock(void)622622+static struct socket *make_send_sock(struct net *net)1302623{624624+ struct netns_ipvs *ipvs = net_ipvs(net);1303625 struct socket *sock;1304626 int result;1305627···1311631 return ERR_PTR(result);1312632 }131363313141314- result = set_mcast_if(sock->sk, ip_vs_master_mcast_ifn);634634+ result = set_mcast_if(sock->sk, ipvs->master_mcast_ifn);1315635 if (result < 0) {1316636 pr_err("Error setting outbound mcast interface\n");1317637 goto error;···1320640 set_mcast_loop(sock->sk, 0);1321641 set_mcast_ttl(sock->sk, 1);132264213231323- result = bind_mcastif_addr(sock, ip_vs_master_mcast_ifn);643643+ result = bind_mcastif_addr(sock, ipvs->master_mcast_ifn);1324644 if (result < 0) {1325645 pr_err("Error binding address of the mcast interface\n");1326646 goto error;···1344664/*1345665 * Set up receiving multicast socket over UDP1346666 */13471347-static struct socket * make_receive_sock(void)667667+static struct socket *make_receive_sock(struct net *net)1348668{669669+ struct netns_ipvs *ipvs = net_ipvs(net);1349670 struct socket *sock;1350671 int result;1351672···1370689 /* join the multicast group */1371690 result = join_mcast_group(sock->sk,1372691 (struct in_addr *) &mcast_addr.sin_addr,13731373- ip_vs_backup_mcast_ifn);692692+ ipvs->backup_mcast_ifn);1374693 if (result < 0) {1375694 pr_err("Error joining to the multicast group\n");1376695 goto error;···1441760static int sync_thread_master(void *data)1442761{1443762 struct ip_vs_sync_thread_data *tinfo = data;763763+ struct netns_ipvs *ipvs = net_ipvs(tinfo->net);1444764 struct ip_vs_sync_buff *sb;14457651446766 pr_info("sync thread started: state = MASTER, mcast_ifn = %s, "1447767 "syncid = %d\n",14481448- ip_vs_master_mcast_ifn, ip_vs_master_syncid);768768+ ipvs->master_mcast_ifn, ipvs->master_syncid);14497691450770 while (!kthread_should_stop()) {14511451- while ((sb = sb_dequeue())) {771771+ while ((sb = sb_dequeue(ipvs))) {1452772 ip_vs_send_sync_msg(tinfo->sock, sb->mesg);1453773 ip_vs_sync_buff_release(sb);1454774 }145577514561456- /* check if entries stay in curr_sb for 2 seconds */14571457- sb = get_curr_sync_buff(2 * HZ);776776+ /* check if entries stay in ipvs->sync_buff for 2 seconds */777777+ sb = get_curr_sync_buff(ipvs, 2 * HZ);1458778 if (sb) {1459779 ip_vs_send_sync_msg(tinfo->sock, sb->mesg);1460780 ip_vs_sync_buff_release(sb);···1465783 }14667841467785 /* clean up the sync_buff queue */14681468- while ((sb=sb_dequeue())) {786786+ while ((sb = sb_dequeue(ipvs)))1469787 ip_vs_sync_buff_release(sb);14701470- }14717881472789 /* clean up the current sync_buff */14731473- if ((sb = get_curr_sync_buff(0))) {790790+ sb = get_curr_sync_buff(ipvs, 0);791791+ if (sb)1474792 ip_vs_sync_buff_release(sb);14751475- }14767931477794 /* release the sending multicast socket */1478795 sock_release(tinfo->sock);···1484803static int sync_thread_backup(void *data)1485804{1486805 struct ip_vs_sync_thread_data *tinfo = data;806806+ struct netns_ipvs *ipvs = net_ipvs(tinfo->net);1487807 int len;14888081489809 pr_info("sync thread started: state = BACKUP, mcast_ifn = %s, "1490810 "syncid = %d\n",14911491- ip_vs_backup_mcast_ifn, ip_vs_backup_syncid);811811+ ipvs->backup_mcast_ifn, ipvs->backup_syncid);14928121493813 while (!kthread_should_stop()) {1494814 wait_event_interruptible(*sk_sleep(tinfo->sock->sk),···1499817 /* do we have data now? */1500818 while (!skb_queue_empty(&(tinfo->sock->sk->sk_receive_queue))) {1501819 len = ip_vs_receive(tinfo->sock, tinfo->buf,15021502- sync_recv_mesg_maxlen);820820+ ipvs->recv_mesg_maxlen);1503821 if (len <= 0) {1504822 pr_err("receiving message error\n");1505823 break;···1508826 /* disable bottom half, because it accesses the data1509827 shared by softirq while getting/creating conns */1510828 local_bh_disable();15111511- ip_vs_process_message(tinfo->buf, len);829829+ ip_vs_process_message(tinfo->net, tinfo->buf, len);1512830 local_bh_enable();1513831 }1514832 }···1522840}1523841152484215251525-int start_sync_thread(int state, char *mcast_ifn, __u8 syncid)843843+int start_sync_thread(struct net *net, int state, char *mcast_ifn, __u8 syncid)1526844{1527845 struct ip_vs_sync_thread_data *tinfo;1528846 struct task_struct **realtask, *task;1529847 struct socket *sock;848848+ struct netns_ipvs *ipvs = net_ipvs(net);1530849 char *name, *buf = NULL;1531850 int (*threadfn)(void *data);1532851 int result = -ENOMEM;15338521534853 IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current));1535854 IP_VS_DBG(7, "Each ip_vs_sync_conn entry needs %Zd bytes\n",15361536- sizeof(struct ip_vs_sync_conn));855855+ sizeof(struct ip_vs_sync_conn_v0));15378561538857 if (state == IP_VS_STATE_MASTER) {15391539- if (sync_master_thread)858858+ if (ipvs->master_thread)1540859 return -EEXIST;154186015421542- strlcpy(ip_vs_master_mcast_ifn, mcast_ifn,15431543- sizeof(ip_vs_master_mcast_ifn));15441544- ip_vs_master_syncid = syncid;15451545- realtask = &sync_master_thread;15461546- name = "ipvs_syncmaster";861861+ strlcpy(ipvs->master_mcast_ifn, mcast_ifn,862862+ sizeof(ipvs->master_mcast_ifn));863863+ ipvs->master_syncid = syncid;864864+ realtask = &ipvs->master_thread;865865+ name = "ipvs_master:%d";1547866 threadfn = sync_thread_master;15481548- sock = make_send_sock();867867+ sock = make_send_sock(net);1549868 } else if (state == IP_VS_STATE_BACKUP) {15501550- if (sync_backup_thread)869869+ if (ipvs->backup_thread)1551870 return -EEXIST;155287115531553- strlcpy(ip_vs_backup_mcast_ifn, mcast_ifn,15541554- sizeof(ip_vs_backup_mcast_ifn));15551555- ip_vs_backup_syncid = syncid;15561556- realtask = &sync_backup_thread;15571557- name = "ipvs_syncbackup";872872+ strlcpy(ipvs->backup_mcast_ifn, mcast_ifn,873873+ sizeof(ipvs->backup_mcast_ifn));874874+ ipvs->backup_syncid = syncid;875875+ realtask = &ipvs->backup_thread;876876+ name = "ipvs_backup:%d";1558877 threadfn = sync_thread_backup;15591559- sock = make_receive_sock();878878+ sock = make_receive_sock(net);1560879 } else {1561880 return -EINVAL;1562881 }···1567884 goto out;1568885 }156988615701570- set_sync_mesg_maxlen(state);887887+ set_sync_mesg_maxlen(net, state);1571888 if (state == IP_VS_STATE_BACKUP) {15721572- buf = kmalloc(sync_recv_mesg_maxlen, GFP_KERNEL);889889+ buf = kmalloc(ipvs->recv_mesg_maxlen, GFP_KERNEL);1573890 if (!buf)1574891 goto outsocket;1575892 }···1578895 if (!tinfo)1579896 goto outbuf;1580897898898+ tinfo->net = net;1581899 tinfo->sock = sock;1582900 tinfo->buf = buf;158390115841584- task = kthread_run(threadfn, tinfo, name);902902+ task = kthread_run(threadfn, tinfo, name, ipvs->gen);1585903 if (IS_ERR(task)) {1586904 result = PTR_ERR(task);1587905 goto outtinfo;···15909061591907 /* mark as active */1592908 *realtask = task;15931593- ip_vs_sync_state |= state;909909+ ipvs->sync_state |= state;15949101595911 /* increase the module use count */1596912 ip_vs_use_count_inc();···1608924}1609925161092616111611-int stop_sync_thread(int state)927927+int stop_sync_thread(struct net *net, int state)1612928{929929+ struct netns_ipvs *ipvs = net_ipvs(net);930930+1613931 IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current));16149321615933 if (state == IP_VS_STATE_MASTER) {16161616- if (!sync_master_thread)934934+ if (!ipvs->master_thread)1617935 return -ESRCH;16189361619937 pr_info("stopping master sync thread %d ...\n",16201620- task_pid_nr(sync_master_thread));938938+ task_pid_nr(ipvs->master_thread));16219391622940 /*1623941 * The lock synchronizes with sb_queue_tail(), so that we don't···1627941 * progress of stopping the master sync daemon.1628942 */162994316301630- spin_lock_bh(&ip_vs_sync_lock);16311631- ip_vs_sync_state &= ~IP_VS_STATE_MASTER;16321632- spin_unlock_bh(&ip_vs_sync_lock);16331633- kthread_stop(sync_master_thread);16341634- sync_master_thread = NULL;944944+ spin_lock_bh(&ipvs->sync_lock);945945+ ipvs->sync_state &= ~IP_VS_STATE_MASTER;946946+ spin_unlock_bh(&ipvs->sync_lock);947947+ kthread_stop(ipvs->master_thread);948948+ ipvs->master_thread = NULL;1635949 } else if (state == IP_VS_STATE_BACKUP) {16361636- if (!sync_backup_thread)950950+ if (!ipvs->backup_thread)1637951 return -ESRCH;16389521639953 pr_info("stopping backup sync thread %d ...\n",16401640- task_pid_nr(sync_backup_thread));954954+ task_pid_nr(ipvs->backup_thread));164195516421642- ip_vs_sync_state &= ~IP_VS_STATE_BACKUP;16431643- kthread_stop(sync_backup_thread);16441644- sync_backup_thread = NULL;956956+ ipvs->sync_state &= ~IP_VS_STATE_BACKUP;957957+ kthread_stop(ipvs->backup_thread);958958+ ipvs->backup_thread = NULL;1645959 } else {1646960 return -EINVAL;1647961 }···1650964 ip_vs_use_count_dec();16519651652966 return 0;967967+}968968+969969+/*970970+ * Initialize data struct for each netns971971+ */972972+static int __net_init __ip_vs_sync_init(struct net *net)973973+{974974+ struct netns_ipvs *ipvs = net_ipvs(net);975975+976976+ INIT_LIST_HEAD(&ipvs->sync_queue);977977+ spin_lock_init(&ipvs->sync_lock);978978+ spin_lock_init(&ipvs->sync_buff_lock);979979+980980+ ipvs->sync_mcast_addr.sin_family = AF_INET;981981+ ipvs->sync_mcast_addr.sin_port = cpu_to_be16(IP_VS_SYNC_PORT);982982+ ipvs->sync_mcast_addr.sin_addr.s_addr = cpu_to_be32(IP_VS_SYNC_GROUP);983983+ return 0;984984+}985985+986986+static void __ip_vs_sync_cleanup(struct net *net)987987+{988988+ stop_sync_thread(net, IP_VS_STATE_MASTER);989989+ stop_sync_thread(net, IP_VS_STATE_BACKUP);990990+}991991+992992+static struct pernet_operations ipvs_sync_ops = {993993+ .init = __ip_vs_sync_init,994994+ .exit = __ip_vs_sync_cleanup,995995+};996996+997997+998998+int __init ip_vs_sync_init(void)999999+{10001000+ return register_pernet_subsys(&ipvs_sync_ops);10011001+}10021002+10031003+void __exit ip_vs_sync_cleanup(void)10041004+{10051005+ unregister_pernet_subsys(&ipvs_sync_ops);16531006}
+15-11
net/netfilter/ipvs/ip_vs_xmit.c
···175175 .fl4_tos = RT_TOS(iph->tos),176176 .mark = skb->mark,177177 };178178- struct rtable *rt;179178180179 if (ip_route_output_key(net, &rt, &fl))181180 return 0;···389390390391 /* MTU checking */391392 mtu = dst_mtu(&rt->dst);392392- if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) {393393+ if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF)) &&394394+ !skb_is_gso(skb)) {393395 ip_rt_put(rt);394396 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));395397 IP_VS_DBG_RL("%s(): frag needed\n", __func__);···443443444444 /* MTU checking */445445 mtu = dst_mtu(&rt->dst);446446- if (skb->len > mtu) {446446+ if (skb->len > mtu && !skb_is_gso(skb)) {447447 if (!skb->dev) {448448 struct net *net = dev_net(skb_dst(skb)->dev);449449···543543544544 /* MTU checking */545545 mtu = dst_mtu(&rt->dst);546546- if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) {546546+ if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF)) &&547547+ !skb_is_gso(skb)) {547548 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));548549 IP_VS_DBG_RL_PKT(0, AF_INET, pp, skb, 0,549550 "ip_vs_nat_xmit(): frag needed for");···659658660659 /* MTU checking */661660 mtu = dst_mtu(&rt->dst);662662- if (skb->len > mtu) {661661+ if (skb->len > mtu && !skb_is_gso(skb)) {663662 if (!skb->dev) {664663 struct net *net = dev_net(skb_dst(skb)->dev);665664···774773775774 df |= (old_iph->frag_off & htons(IP_DF));776775777777- if ((old_iph->frag_off & htons(IP_DF))778778- && mtu < ntohs(old_iph->tot_len)) {776776+ if ((old_iph->frag_off & htons(IP_DF) &&777777+ mtu < ntohs(old_iph->tot_len) && !skb_is_gso(skb))) {779778 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));780779 IP_VS_DBG_RL("%s(): frag needed\n", __func__);781780 goto tx_error_put;···887886 if (skb_dst(skb))888887 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);889888890890- if (mtu < ntohs(old_iph->payload_len) + sizeof(struct ipv6hdr)) {889889+ if (mtu < ntohs(old_iph->payload_len) + sizeof(struct ipv6hdr) &&890890+ !skb_is_gso(skb)) {891891 if (!skb->dev) {892892 struct net *net = dev_net(skb_dst(skb)->dev);893893···993991994992 /* MTU checking */995993 mtu = dst_mtu(&rt->dst);996996- if ((iph->frag_off & htons(IP_DF)) && skb->len > mtu) {994994+ if ((iph->frag_off & htons(IP_DF)) && skb->len > mtu &&995995+ !skb_is_gso(skb)) {997996 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));998997 ip_rt_put(rt);999998 IP_VS_DBG_RL("%s(): frag needed\n", __func__);···1161115811621159 /* MTU checking */11631160 mtu = dst_mtu(&rt->dst);11641164- if ((skb->len > mtu) && (ip_hdr(skb)->frag_off & htons(IP_DF))) {11611161+ if ((skb->len > mtu) && (ip_hdr(skb)->frag_off & htons(IP_DF)) &&11621162+ !skb_is_gso(skb)) {11651163 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));11661164 IP_VS_DBG_RL("%s(): frag needed\n", __func__);11671165 goto tx_error_put;···1276127212771273 /* MTU checking */12781274 mtu = dst_mtu(&rt->dst);12791279- if (skb->len > mtu) {12751275+ if (skb->len > mtu && !skb_is_gso(skb)) {12801276 if (!skb->dev) {12811277 struct net *net = dev_net(skb_dst(skb)->dev);12821278
+82
net/netfilter/nf_conntrack_broadcast.c
···11+/*22+ * broadcast connection tracking helper33+ *44+ * (c) 2005 Patrick McHardy <kaber@trash.net>55+ *66+ * This program is free software; you can redistribute it and/or77+ * modify it under the terms of the GNU General Public License88+ * as published by the Free Software Foundation; either version99+ * 2 of the License, or (at your option) any later version.1010+ */1111+1212+#include <linux/module.h>1313+#include <linux/ip.h>1414+#include <net/route.h>1515+#include <linux/inetdevice.h>1616+#include <linux/skbuff.h>1717+1818+#include <net/netfilter/nf_conntrack.h>1919+#include <net/netfilter/nf_conntrack_helper.h>2020+#include <net/netfilter/nf_conntrack_expect.h>2121+2222+int nf_conntrack_broadcast_help(struct sk_buff *skb,2323+ unsigned int protoff,2424+ struct nf_conn *ct,2525+ enum ip_conntrack_info ctinfo,2626+ unsigned int timeout)2727+{2828+ struct nf_conntrack_expect *exp;2929+ struct iphdr *iph = ip_hdr(skb);3030+ struct rtable *rt = skb_rtable(skb);3131+ struct in_device *in_dev;3232+ struct nf_conn_help *help = nfct_help(ct);3333+ __be32 mask = 0;3434+3535+ /* we're only interested in locally generated packets */3636+ if (skb->sk == NULL)3737+ goto out;3838+ if (rt == NULL || !(rt->rt_flags & RTCF_BROADCAST))3939+ goto out;4040+ if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)4141+ goto out;4242+4343+ rcu_read_lock();4444+ in_dev = __in_dev_get_rcu(rt->dst.dev);4545+ if (in_dev != NULL) {4646+ for_primary_ifa(in_dev) {4747+ if (ifa->ifa_broadcast == iph->daddr) {4848+ mask = ifa->ifa_mask;4949+ break;5050+ }5151+ } endfor_ifa(in_dev);5252+ }5353+ rcu_read_unlock();5454+5555+ if (mask == 0)5656+ goto out;5757+5858+ exp = nf_ct_expect_alloc(ct);5959+ if (exp == NULL)6060+ goto out;6161+6262+ exp->tuple = ct->tuplehash[IP_CT_DIR_REPLY].tuple;6363+ exp->tuple.src.u.udp.port = help->helper->tuple.src.u.udp.port;6464+6565+ exp->mask.src.u3.ip = mask;6666+ exp->mask.src.u.udp.port = htons(0xFFFF);6767+6868+ exp->expectfn = NULL;6969+ exp->flags = NF_CT_EXPECT_PERMANENT;7070+ exp->class = NF_CT_EXPECT_CLASS_DEFAULT;7171+ exp->helper = NULL;7272+7373+ nf_ct_expect_related(exp);7474+ nf_ct_expect_put(exp);7575+7676+ nf_ct_refresh(ct, skb, timeout * HZ);7777+out:7878+ return NF_ACCEPT;7979+}8080+EXPORT_SYMBOL_GPL(nf_conntrack_broadcast_help);8181+8282+MODULE_LICENSE("GPL");
+38-19
net/netfilter/nf_conntrack_core.c
···4343#include <net/netfilter/nf_conntrack_acct.h>4444#include <net/netfilter/nf_conntrack_ecache.h>4545#include <net/netfilter/nf_conntrack_zones.h>4646+#include <net/netfilter/nf_conntrack_timestamp.h>4647#include <net/netfilter/nf_nat.h>4748#include <net/netfilter/nf_nat_core.h>4849···283282static void death_by_timeout(unsigned long ul_conntrack)284283{285284 struct nf_conn *ct = (void *)ul_conntrack;285285+ struct nf_conn_tstamp *tstamp;286286+287287+ tstamp = nf_conn_tstamp_find(ct);288288+ if (tstamp && tstamp->stop == 0)289289+ tstamp->stop = ktime_to_ns(ktime_get_real());286290287291 if (!test_bit(IPS_DYING_BIT, &ct->status) &&288292 unlikely(nf_conntrack_event(IPCT_DESTROY, ct) < 0)) {···425419 struct nf_conntrack_tuple_hash *h;426420 struct nf_conn *ct;427421 struct nf_conn_help *help;422422+ struct nf_conn_tstamp *tstamp;428423 struct hlist_nulls_node *n;429424 enum ip_conntrack_info ctinfo;430425 struct net *net;···493486 ct->timeout.expires += jiffies;494487 add_timer(&ct->timeout);495488 atomic_inc(&ct->ct_general.use);496496- set_bit(IPS_CONFIRMED_BIT, &ct->status);489489+ ct->status |= IPS_CONFIRMED;497490491491+ /* set conntrack timestamp, if enabled. */492492+ tstamp = nf_conn_tstamp_find(ct);493493+ if (tstamp) {494494+ if (skb->tstamp.tv64 == 0)495495+ __net_timestamp((struct sk_buff *)skb);496496+497497+ tstamp->start = ktime_to_ns(skb->tstamp);498498+ }498499 /* Since the lookup is lockless, hash insertion must be done after499500 * starting the timer and setting the CONFIRMED bit. The RCU barriers500501 * guarantee that no other CPU can find the conntrack before the above···670655 * and ct->tuplehash[IP_CT_DIR_REPLY].hnnode.next unchanged.671656 */672657 memset(&ct->tuplehash[IP_CT_DIR_MAX], 0,673673- sizeof(*ct) - offsetof(struct nf_conn, tuplehash[IP_CT_DIR_MAX]));658658+ offsetof(struct nf_conn, proto) -659659+ offsetof(struct nf_conn, tuplehash[IP_CT_DIR_MAX]));674660 spin_lock_init(&ct->lock);675661 ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig;676662 ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode.pprev = NULL;···761745 }762746763747 nf_ct_acct_ext_add(ct, GFP_ATOMIC);748748+ nf_ct_tstamp_ext_add(ct, GFP_ATOMIC);764749765750 ecache = tmpl ? nf_ct_ecache_find(tmpl) : NULL;766751 nf_ct_ecache_ext_add(ct, ecache ? ecache->ctmask : 0,···12021185static int kill_report(struct nf_conn *i, void *data)12031186{12041187 struct __nf_ct_flush_report *fr = (struct __nf_ct_flush_report *)data;11881188+ struct nf_conn_tstamp *tstamp;11891189+11901190+ tstamp = nf_conn_tstamp_find(i);11911191+ if (tstamp && tstamp->stop == 0)11921192+ tstamp->stop = ktime_to_ns(ktime_get_real());1205119312061194 /* If we fail to deliver the event, death_by_timeout() will retry */12071195 if (nf_conntrack_event_report(IPCT_DESTROY, i,···12231201 return 1;12241202}1225120312261226-void nf_ct_free_hashtable(void *hash, int vmalloced, unsigned int size)12041204+void nf_ct_free_hashtable(void *hash, unsigned int size)12271205{12281228- if (vmalloced)12061206+ if (is_vmalloc_addr(hash))12291207 vfree(hash);12301208 else12311209 free_pages((unsigned long)hash,···12921270 goto i_see_dead_people;12931271 }1294127212951295- nf_ct_free_hashtable(net->ct.hash, net->ct.hash_vmalloc,12961296- net->ct.htable_size);12731273+ nf_ct_free_hashtable(net->ct.hash, net->ct.htable_size);12971274 nf_conntrack_ecache_fini(net);12981275 nf_conntrack_acct_fini(net);12991276 nf_conntrack_expect_fini(net);···13211300 }13221301}1323130213241324-void *nf_ct_alloc_hashtable(unsigned int *sizep, int *vmalloced, int nulls)13031303+void *nf_ct_alloc_hashtable(unsigned int *sizep, int nulls)13251304{13261305 struct hlist_nulls_head *hash;13271306 unsigned int nr_slots, i;13281307 size_t sz;13291329-13301330- *vmalloced = 0;1331130813321309 BUILD_BUG_ON(sizeof(struct hlist_nulls_head) != sizeof(struct hlist_head));13331310 nr_slots = *sizep = roundup(*sizep, PAGE_SIZE / sizeof(struct hlist_nulls_head));···13331314 hash = (void *)__get_free_pages(GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO,13341315 get_order(sz));13351316 if (!hash) {13361336- *vmalloced = 1;13371317 printk(KERN_WARNING "nf_conntrack: falling back to vmalloc.\n");13381318 hash = __vmalloc(sz, GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO,13391319 PAGE_KERNEL);···1348133013491331int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp)13501332{13511351- int i, bucket, vmalloced, old_vmalloced;13331333+ int i, bucket;13521334 unsigned int hashsize, old_size;13531335 struct hlist_nulls_head *hash, *old_hash;13541336 struct nf_conntrack_tuple_hash *h;···13651347 if (!hashsize)13661348 return -EINVAL;1367134913681368- hash = nf_ct_alloc_hashtable(&hashsize, &vmalloced, 1);13501350+ hash = nf_ct_alloc_hashtable(&hashsize, 1);13691351 if (!hash)13701352 return -ENOMEM;13711353···13871369 }13881370 }13891371 old_size = init_net.ct.htable_size;13901390- old_vmalloced = init_net.ct.hash_vmalloc;13911372 old_hash = init_net.ct.hash;1392137313931374 init_net.ct.htable_size = nf_conntrack_htable_size = hashsize;13941394- init_net.ct.hash_vmalloc = vmalloced;13951375 init_net.ct.hash = hash;13961376 spin_unlock_bh(&nf_conntrack_lock);1397137713981398- nf_ct_free_hashtable(old_hash, old_vmalloced, old_size);13781378+ nf_ct_free_hashtable(old_hash, old_size);13991379 return 0;14001380}14011381EXPORT_SYMBOL_GPL(nf_conntrack_set_hashsize);···15061490 }1507149115081492 net->ct.htable_size = nf_conntrack_htable_size;15091509- net->ct.hash = nf_ct_alloc_hashtable(&net->ct.htable_size,15101510- &net->ct.hash_vmalloc, 1);14931493+ net->ct.hash = nf_ct_alloc_hashtable(&net->ct.htable_size, 1);15111494 if (!net->ct.hash) {15121495 ret = -ENOMEM;15131496 printk(KERN_ERR "Unable to create nf_conntrack_hash\n");···15181503 ret = nf_conntrack_acct_init(net);15191504 if (ret < 0)15201505 goto err_acct;15061506+ ret = nf_conntrack_tstamp_init(net);15071507+ if (ret < 0)15081508+ goto err_tstamp;15211509 ret = nf_conntrack_ecache_init(net);15221510 if (ret < 0)15231511 goto err_ecache;···15281510 return 0;1529151115301512err_ecache:15131513+ nf_conntrack_tstamp_fini(net);15141514+err_tstamp:15311515 nf_conntrack_acct_fini(net);15321516err_acct:15331517 nf_conntrack_expect_fini(net);15341518err_expect:15351535- nf_ct_free_hashtable(net->ct.hash, net->ct.hash_vmalloc,15361536- net->ct.htable_size);15191519+ nf_ct_free_hashtable(net->ct.hash, net->ct.htable_size);15371520err_hash:15381521 kmem_cache_destroy(net->ct.nf_conntrack_cachep);15391522err_cache:
+20-14
net/netfilter/nf_conntrack_expect.c
···319319 const struct nf_conntrack_expect_policy *p;320320 unsigned int h = nf_ct_expect_dst_hash(&exp->tuple);321321322322- atomic_inc(&exp->use);322322+ /* two references : one for hash insert, one for the timer */323323+ atomic_add(2, &exp->use);323324324325 if (master_help) {325326 hlist_add_head(&exp->lnode, &master_help->expectations);···334333 setup_timer(&exp->timeout, nf_ct_expectation_timed_out,335334 (unsigned long)exp);336335 if (master_help) {337337- p = &master_help->helper->expect_policy[exp->class];336336+ p = &rcu_dereference_protected(337337+ master_help->helper,338338+ lockdep_is_held(&nf_conntrack_lock)339339+ )->expect_policy[exp->class];338340 exp->timeout.expires = jiffies + p->timeout * HZ;339341 }340342 add_timer(&exp->timeout);341343342342- atomic_inc(&exp->use);343344 NF_CT_STAT_INC(net, expect_create);344345}345346···372369 if (!del_timer(&i->timeout))373370 return 0;374371375375- p = &master_help->helper->expect_policy[i->class];372372+ p = &rcu_dereference_protected(373373+ master_help->helper,374374+ lockdep_is_held(&nf_conntrack_lock)375375+ )->expect_policy[i->class];376376 i->timeout.expires = jiffies + p->timeout * HZ;377377 add_timer(&i->timeout);378378 return 1;···413407 }414408 /* Will be over limit? */415409 if (master_help) {416416- p = &master_help->helper->expect_policy[expect->class];410410+ p = &rcu_dereference_protected(411411+ master_help->helper,412412+ lockdep_is_held(&nf_conntrack_lock)413413+ )->expect_policy[expect->class];417414 if (p->max_expected &&418415 master_help->expecting[expect->class] >= p->max_expected) {419416 evict_oldest_expect(master, expect);···487478 struct hlist_node *n;488479489480 for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) {490490- n = rcu_dereference(net->ct.expect_hash[st->bucket].first);481481+ n = rcu_dereference(hlist_first_rcu(&net->ct.expect_hash[st->bucket]));491482 if (n)492483 return n;493484 }···500491 struct net *net = seq_file_net(seq);501492 struct ct_expect_iter_state *st = seq->private;502493503503- head = rcu_dereference(head->next);494494+ head = rcu_dereference(hlist_next_rcu(head));504495 while (head == NULL) {505496 if (++st->bucket >= nf_ct_expect_hsize)506497 return NULL;507507- head = rcu_dereference(net->ct.expect_hash[st->bucket].first);498498+ head = rcu_dereference(hlist_first_rcu(&net->ct.expect_hash[st->bucket]));508499 }509500 return head;510501}···639630 }640631641632 net->ct.expect_count = 0;642642- net->ct.expect_hash = nf_ct_alloc_hashtable(&nf_ct_expect_hsize,643643- &net->ct.expect_vmalloc, 0);633633+ net->ct.expect_hash = nf_ct_alloc_hashtable(&nf_ct_expect_hsize, 0);644634 if (net->ct.expect_hash == NULL)645635 goto err1;646636···661653 if (net_eq(net, &init_net))662654 kmem_cache_destroy(nf_ct_expect_cachep);663655err2:664664- nf_ct_free_hashtable(net->ct.expect_hash, net->ct.expect_vmalloc,665665- nf_ct_expect_hsize);656656+ nf_ct_free_hashtable(net->ct.expect_hash, nf_ct_expect_hsize);666657err1:667658 return err;668659}···673666 rcu_barrier(); /* Wait for call_rcu() before destroy */674667 kmem_cache_destroy(nf_ct_expect_cachep);675668 }676676- nf_ct_free_hashtable(net->ct.expect_hash, net->ct.expect_vmalloc,677677- nf_ct_expect_hsize);669669+ nf_ct_free_hashtable(net->ct.expect_hash, nf_ct_expect_hsize);678670}
+6-5
net/netfilter/nf_conntrack_extend.c
···140140 /* This assumes that extended areas in conntrack for the types141141 whose NF_CT_EXT_F_PREALLOC bit set are allocated in order */142142 for (i = min; i <= max; i++) {143143- t1 = nf_ct_ext_types[i];143143+ t1 = rcu_dereference_protected(nf_ct_ext_types[i],144144+ lockdep_is_held(&nf_ct_ext_type_mutex));144145 if (!t1)145146 continue;146147147147- t1->alloc_size = sizeof(struct nf_ct_ext)148148- + ALIGN(sizeof(struct nf_ct_ext), t1->align)149149- + t1->len;148148+ t1->alloc_size = ALIGN(sizeof(struct nf_ct_ext), t1->align) +149149+ t1->len;150150 for (j = 0; j < NF_CT_EXT_NUM; j++) {151151- t2 = nf_ct_ext_types[j];151151+ t2 = rcu_dereference_protected(nf_ct_ext_types[j],152152+ lockdep_is_held(&nf_ct_ext_type_mutex));152153 if (t2 == NULL || t2 == t1 ||153154 (t2->flags & NF_CT_EXT_F_PREALLOC) == 0)154155 continue;
+11-9
net/netfilter/nf_conntrack_helper.c
···3333static struct hlist_head *nf_ct_helper_hash __read_mostly;3434static unsigned int nf_ct_helper_hsize __read_mostly;3535static unsigned int nf_ct_helper_count __read_mostly;3636-static int nf_ct_helper_vmalloc;373638373938/* Stupid hash, but collision free for the default registrations of the···157158 struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(i);158159 struct nf_conn_help *help = nfct_help(ct);159160160160- if (help && help->helper == me) {161161+ if (help && rcu_dereference_protected(162162+ help->helper,163163+ lockdep_is_held(&nf_conntrack_lock)164164+ ) == me) {161165 nf_conntrack_event(IPCT_HELPER, ct);162166 rcu_assign_pointer(help->helper, NULL);163167 }···212210 hlist_for_each_entry_safe(exp, n, next,213211 &net->ct.expect_hash[i], hnode) {214212 struct nf_conn_help *help = nfct_help(exp->master);215215- if ((help->helper == me || exp->helper == me) &&213213+ if ((rcu_dereference_protected(214214+ help->helper,215215+ lockdep_is_held(&nf_conntrack_lock)216216+ ) == me || exp->helper == me) &&216217 del_timer(&exp->timeout)) {217218 nf_ct_unlink_expect(exp);218219 nf_ct_expect_put(exp);···266261 int err;267262268263 nf_ct_helper_hsize = 1; /* gets rounded up to use one page */269269- nf_ct_helper_hash = nf_ct_alloc_hashtable(&nf_ct_helper_hsize,270270- &nf_ct_helper_vmalloc, 0);264264+ nf_ct_helper_hash = nf_ct_alloc_hashtable(&nf_ct_helper_hsize, 0);271265 if (!nf_ct_helper_hash)272266 return -ENOMEM;273267···277273 return 0;278274279275err1:280280- nf_ct_free_hashtable(nf_ct_helper_hash, nf_ct_helper_vmalloc,281281- nf_ct_helper_hsize);276276+ nf_ct_free_hashtable(nf_ct_helper_hash, nf_ct_helper_hsize);282277 return err;283278}284279285280void nf_conntrack_helper_fini(void)286281{287282 nf_ct_extend_unregister(&helper_extend);288288- nf_ct_free_hashtable(nf_ct_helper_hash, nf_ct_helper_vmalloc,289289- nf_ct_helper_hsize);283283+ nf_ct_free_hashtable(nf_ct_helper_hash, nf_ct_helper_hsize);290284}
···2222MODULE_LICENSE("GPL");2323MODULE_AUTHOR("Eric Dumazet <eric.dumazet@gmail.com>");2424MODULE_DESCRIPTION("Xtables: CPU match");2525+MODULE_ALIAS("ipt_cpu");2626+MODULE_ALIAS("ip6t_cpu");25272628static int cpu_mt_check(const struct xt_mtchk_param *par)2729{
+1-1
net/netfilter/xt_ipvs.c
···8585 /*8686 * Check if the packet belongs to an existing entry8787 */8888- cp = pp->conn_out_get(family, skb, pp, &iph, iph.len, 1 /* inverse */);8888+ cp = pp->conn_out_get(family, skb, &iph, iph.len, 1 /* inverse */);8989 if (unlikely(cp == NULL)) {9090 match = false;9191 goto out;
+1-4
net/sched/Kconfig
···255255256256config NET_CLS_ROUTE4257257 tristate "Routing decision (ROUTE)"258258- select NET_CLS_ROUTE258258+ select IP_ROUTE_CLASSID259259 select NET_CLS260260 ---help---261261 If you say Y here, you will be able to classify packets···263263264264 To compile this code as a module, choose M here: the265265 module will be called cls_route.266266-267267-config NET_CLS_ROUTE268268- bool269266270267config NET_CLS_FW271268 tristate "Netfilter mark (FW)"