···11+What: ip_queue22+Date: finally removed in kernel v3.5.033+Contact: Pablo Neira Ayuso <pablo@netfilter.org>44+Description:55+ ip_queue has been replaced by nfnetlink_queue which provides66+ more advanced queueing mechanism to user-space. The ip_queue77+ module was already announced to become obsolete years ago.88+99+Users:
+11-2
Documentation/networking/ip-sysctl.txt
···13011301bridge-nf-filter-vlan-tagged - BOOLEAN13021302 1 : pass bridged vlan-tagged ARP/IP/IPv6 traffic to {arp,ip,ip6}tables.13031303 0 : disable this.13041304- Default: 113041304+ Default: 01305130513061306bridge-nf-filter-pppoe-tagged - BOOLEAN13071307 1 : pass bridged pppoe-tagged IP/IPv6 traffic to {ip,ip6}tables.13081308 0 : disable this.13091309- Default: 113091309+ Default: 01310131013111311+bridge-nf-pass-vlan-input-dev - BOOLEAN13121312+ 1: if bridge-nf-filter-vlan-tagged is enabled, try to find a vlan13131313+ interface on the bridge and set the netfilter input device to the vlan.13141314+ This allows use of e.g. "iptables -i br0.1" and makes the REDIRECT13151315+ target work with vlan-on-top-of-bridge interfaces. When no matching13161316+ vlan interface is found, or this switch is off, the input device is13171317+ set to the bridge interface.13181318+ 0: disable bridge netfilter vlan interface lookup.13191319+ Default: 01311132013121321proc/sys/net/sctp/* Variables:13131322
+5
include/linux/ip_vs.h
···8989#define IP_VS_CONN_F_TEMPLATE 0x1000 /* template, not connection */9090#define IP_VS_CONN_F_ONE_PACKET 0x2000 /* forward only one packet */91919292+/* Initial bits allowed in backup server */9293#define IP_VS_CONN_F_BACKUP_MASK (IP_VS_CONN_F_FWD_MASK | \9394 IP_VS_CONN_F_NOOUTPUT | \9495 IP_VS_CONN_F_INACTIVE | \···9796 IP_VS_CONN_F_NO_CPORT | \9897 IP_VS_CONN_F_TEMPLATE \9998 )9999+100100+/* Bits allowed to update in backup server */101101+#define IP_VS_CONN_F_BACKUP_UPD_MASK (IP_VS_CONN_F_INACTIVE | \102102+ IP_VS_CONN_F_SEQ_MASK)100103101104/* Flags that are not sent to backup server start from bit 16 */102105#define IP_VS_CONN_F_NFCT (1 << 16) /* use netfilter conntrack */
···6060 return nf_ct_ext_find(ct, NF_CT_EXT_HELPER);6161}62626363-extern int nf_conntrack_helper_init(void);6464-extern void nf_conntrack_helper_fini(void);6363+extern int nf_conntrack_helper_init(struct net *net);6464+extern void nf_conntrack_helper_fini(struct net *net);65656666extern int nf_conntrack_broadcast_help(struct sk_buff *skb,6767 unsigned int protoff,
+3
include/net/netns/conntrack.h
···2626 int sysctl_tstamp;2727 int sysctl_checksum;2828 unsigned int sysctl_log_invalid; /* Log invalid packets */2929+ int sysctl_auto_assign_helper;3030+ bool auto_assign_helper_warned;2931#ifdef CONFIG_SYSCTL3032 struct ctl_table_header *sysctl_header;3133 struct ctl_table_header *acct_sysctl_header;3234 struct ctl_table_header *tstamp_sysctl_header;3335 struct ctl_table_header *event_sysctl_header;3636+ struct ctl_table_header *helper_sysctl_header;3437#endif3538 char *slabname;3639};
···66666767# just filtering instance of ARP tables for now6868obj-$(CONFIG_IP_NF_ARPFILTER) += arptable_filter.o6969-7070-obj-$(CONFIG_IP_NF_QUEUE) += ip_queue.o7171-
-639
net/ipv4/netfilter/ip_queue.c
···11-/*22- * This is a module which is used for queueing IPv4 packets and33- * communicating with userspace via netlink.44- *55- * (C) 2000-2002 James Morris <jmorris@intercode.com.au>66- * (C) 2003-2005 Netfilter Core Team <coreteam@netfilter.org>77- *88- * This program is free software; you can redistribute it and/or modify99- * it under the terms of the GNU General Public License version 2 as1010- * published by the Free Software Foundation.1111- */1212-#include <linux/module.h>1313-#include <linux/skbuff.h>1414-#include <linux/init.h>1515-#include <linux/ip.h>1616-#include <linux/notifier.h>1717-#include <linux/netdevice.h>1818-#include <linux/netfilter.h>1919-#include <linux/netfilter_ipv4/ip_queue.h>2020-#include <linux/netfilter_ipv4/ip_tables.h>2121-#include <linux/netlink.h>2222-#include <linux/spinlock.h>2323-#include <linux/sysctl.h>2424-#include <linux/proc_fs.h>2525-#include <linux/seq_file.h>2626-#include <linux/security.h>2727-#include <linux/net.h>2828-#include <linux/mutex.h>2929-#include <linux/slab.h>3030-#include <net/net_namespace.h>3131-#include <net/sock.h>3232-#include <net/route.h>3333-#include <net/netfilter/nf_queue.h>3434-#include <net/ip.h>3535-3636-#define IPQ_QMAX_DEFAULT 10243737-#define IPQ_PROC_FS_NAME "ip_queue"3838-#define NET_IPQ_QMAX 20883939-#define NET_IPQ_QMAX_NAME "ip_queue_maxlen"4040-4141-typedef int (*ipq_cmpfn)(struct nf_queue_entry *, unsigned long);4242-4343-static unsigned char copy_mode __read_mostly = IPQ_COPY_NONE;4444-static unsigned int queue_maxlen __read_mostly = IPQ_QMAX_DEFAULT;4545-static DEFINE_SPINLOCK(queue_lock);4646-static int peer_pid __read_mostly;4747-static unsigned int copy_range __read_mostly;4848-static unsigned int queue_total;4949-static unsigned int queue_dropped = 0;5050-static unsigned int queue_user_dropped = 0;5151-static struct sock *ipqnl __read_mostly;5252-static LIST_HEAD(queue_list);5353-static DEFINE_MUTEX(ipqnl_mutex);5454-5555-static inline void5656-__ipq_enqueue_entry(struct nf_queue_entry *entry)5757-{5858- list_add_tail(&entry->list, &queue_list);5959- queue_total++;6060-}6161-6262-static inline int6363-__ipq_set_mode(unsigned char mode, unsigned int range)6464-{6565- int status = 0;6666-6767- switch(mode) {6868- case IPQ_COPY_NONE:6969- case IPQ_COPY_META:7070- copy_mode = mode;7171- copy_range = 0;7272- break;7373-7474- case IPQ_COPY_PACKET:7575- if (range > 0xFFFF)7676- range = 0xFFFF;7777- copy_range = range;7878- copy_mode = mode;7979- break;8080-8181- default:8282- status = -EINVAL;8383-8484- }8585- return status;8686-}8787-8888-static void __ipq_flush(ipq_cmpfn cmpfn, unsigned long data);8989-9090-static inline void9191-__ipq_reset(void)9292-{9393- peer_pid = 0;9494- net_disable_timestamp();9595- __ipq_set_mode(IPQ_COPY_NONE, 0);9696- __ipq_flush(NULL, 0);9797-}9898-9999-static struct nf_queue_entry *100100-ipq_find_dequeue_entry(unsigned long id)101101-{102102- struct nf_queue_entry *entry = NULL, *i;103103-104104- spin_lock_bh(&queue_lock);105105-106106- list_for_each_entry(i, &queue_list, list) {107107- if ((unsigned long)i == id) {108108- entry = i;109109- break;110110- }111111- }112112-113113- if (entry) {114114- list_del(&entry->list);115115- queue_total--;116116- }117117-118118- spin_unlock_bh(&queue_lock);119119- return entry;120120-}121121-122122-static void123123-__ipq_flush(ipq_cmpfn cmpfn, unsigned long data)124124-{125125- struct nf_queue_entry *entry, *next;126126-127127- list_for_each_entry_safe(entry, next, &queue_list, list) {128128- if (!cmpfn || cmpfn(entry, data)) {129129- list_del(&entry->list);130130- queue_total--;131131- nf_reinject(entry, NF_DROP);132132- }133133- }134134-}135135-136136-static void137137-ipq_flush(ipq_cmpfn cmpfn, unsigned long data)138138-{139139- spin_lock_bh(&queue_lock);140140- __ipq_flush(cmpfn, data);141141- spin_unlock_bh(&queue_lock);142142-}143143-144144-static struct sk_buff *145145-ipq_build_packet_message(struct nf_queue_entry *entry, int *errp)146146-{147147- sk_buff_data_t old_tail;148148- size_t size = 0;149149- size_t data_len = 0;150150- struct sk_buff *skb;151151- struct ipq_packet_msg *pmsg;152152- struct nlmsghdr *nlh;153153- struct timeval tv;154154-155155- switch (ACCESS_ONCE(copy_mode)) {156156- case IPQ_COPY_META:157157- case IPQ_COPY_NONE:158158- size = NLMSG_SPACE(sizeof(*pmsg));159159- break;160160-161161- case IPQ_COPY_PACKET:162162- if (entry->skb->ip_summed == CHECKSUM_PARTIAL &&163163- (*errp = skb_checksum_help(entry->skb)))164164- return NULL;165165-166166- data_len = ACCESS_ONCE(copy_range);167167- if (data_len == 0 || data_len > entry->skb->len)168168- data_len = entry->skb->len;169169-170170- size = NLMSG_SPACE(sizeof(*pmsg) + data_len);171171- break;172172-173173- default:174174- *errp = -EINVAL;175175- return NULL;176176- }177177-178178- skb = alloc_skb(size, GFP_ATOMIC);179179- if (!skb)180180- goto nlmsg_failure;181181-182182- old_tail = skb->tail;183183- nlh = NLMSG_PUT(skb, 0, 0, IPQM_PACKET, size - sizeof(*nlh));184184- pmsg = NLMSG_DATA(nlh);185185- memset(pmsg, 0, sizeof(*pmsg));186186-187187- pmsg->packet_id = (unsigned long )entry;188188- pmsg->data_len = data_len;189189- tv = ktime_to_timeval(entry->skb->tstamp);190190- pmsg->timestamp_sec = tv.tv_sec;191191- pmsg->timestamp_usec = tv.tv_usec;192192- pmsg->mark = entry->skb->mark;193193- pmsg->hook = entry->hook;194194- pmsg->hw_protocol = entry->skb->protocol;195195-196196- if (entry->indev)197197- strcpy(pmsg->indev_name, entry->indev->name);198198- else199199- pmsg->indev_name[0] = '\0';200200-201201- if (entry->outdev)202202- strcpy(pmsg->outdev_name, entry->outdev->name);203203- else204204- pmsg->outdev_name[0] = '\0';205205-206206- if (entry->indev && entry->skb->dev &&207207- entry->skb->mac_header != entry->skb->network_header) {208208- pmsg->hw_type = entry->skb->dev->type;209209- pmsg->hw_addrlen = dev_parse_header(entry->skb,210210- pmsg->hw_addr);211211- }212212-213213- if (data_len)214214- if (skb_copy_bits(entry->skb, 0, pmsg->payload, data_len))215215- BUG();216216-217217- nlh->nlmsg_len = skb->tail - old_tail;218218- return skb;219219-220220-nlmsg_failure:221221- kfree_skb(skb);222222- *errp = -EINVAL;223223- printk(KERN_ERR "ip_queue: error creating packet message\n");224224- return NULL;225225-}226226-227227-static int228228-ipq_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)229229-{230230- int status = -EINVAL;231231- struct sk_buff *nskb;232232-233233- if (copy_mode == IPQ_COPY_NONE)234234- return -EAGAIN;235235-236236- nskb = ipq_build_packet_message(entry, &status);237237- if (nskb == NULL)238238- return status;239239-240240- spin_lock_bh(&queue_lock);241241-242242- if (!peer_pid)243243- goto err_out_free_nskb;244244-245245- if (queue_total >= queue_maxlen) {246246- queue_dropped++;247247- status = -ENOSPC;248248- if (net_ratelimit())249249- printk (KERN_WARNING "ip_queue: full at %d entries, "250250- "dropping packets(s). Dropped: %d\n", queue_total,251251- queue_dropped);252252- goto err_out_free_nskb;253253- }254254-255255- /* netlink_unicast will either free the nskb or attach it to a socket */256256- status = netlink_unicast(ipqnl, nskb, peer_pid, MSG_DONTWAIT);257257- if (status < 0) {258258- queue_user_dropped++;259259- goto err_out_unlock;260260- }261261-262262- __ipq_enqueue_entry(entry);263263-264264- spin_unlock_bh(&queue_lock);265265- return status;266266-267267-err_out_free_nskb:268268- kfree_skb(nskb);269269-270270-err_out_unlock:271271- spin_unlock_bh(&queue_lock);272272- return status;273273-}274274-275275-static int276276-ipq_mangle_ipv4(ipq_verdict_msg_t *v, struct nf_queue_entry *e)277277-{278278- int diff;279279- struct iphdr *user_iph = (struct iphdr *)v->payload;280280- struct sk_buff *nskb;281281-282282- if (v->data_len < sizeof(*user_iph))283283- return 0;284284- diff = v->data_len - e->skb->len;285285- if (diff < 0) {286286- if (pskb_trim(e->skb, v->data_len))287287- return -ENOMEM;288288- } else if (diff > 0) {289289- if (v->data_len > 0xFFFF)290290- return -EINVAL;291291- if (diff > skb_tailroom(e->skb)) {292292- nskb = skb_copy_expand(e->skb, skb_headroom(e->skb),293293- diff, GFP_ATOMIC);294294- if (!nskb) {295295- printk(KERN_WARNING "ip_queue: error "296296- "in mangle, dropping packet\n");297297- return -ENOMEM;298298- }299299- kfree_skb(e->skb);300300- e->skb = nskb;301301- }302302- skb_put(e->skb, diff);303303- }304304- if (!skb_make_writable(e->skb, v->data_len))305305- return -ENOMEM;306306- skb_copy_to_linear_data(e->skb, v->payload, v->data_len);307307- e->skb->ip_summed = CHECKSUM_NONE;308308-309309- return 0;310310-}311311-312312-static int313313-ipq_set_verdict(struct ipq_verdict_msg *vmsg, unsigned int len)314314-{315315- struct nf_queue_entry *entry;316316-317317- if (vmsg->value > NF_MAX_VERDICT || vmsg->value == NF_STOLEN)318318- return -EINVAL;319319-320320- entry = ipq_find_dequeue_entry(vmsg->id);321321- if (entry == NULL)322322- return -ENOENT;323323- else {324324- int verdict = vmsg->value;325325-326326- if (vmsg->data_len && vmsg->data_len == len)327327- if (ipq_mangle_ipv4(vmsg, entry) < 0)328328- verdict = NF_DROP;329329-330330- nf_reinject(entry, verdict);331331- return 0;332332- }333333-}334334-335335-static int336336-ipq_set_mode(unsigned char mode, unsigned int range)337337-{338338- int status;339339-340340- spin_lock_bh(&queue_lock);341341- status = __ipq_set_mode(mode, range);342342- spin_unlock_bh(&queue_lock);343343- return status;344344-}345345-346346-static int347347-ipq_receive_peer(struct ipq_peer_msg *pmsg,348348- unsigned char type, unsigned int len)349349-{350350- int status = 0;351351-352352- if (len < sizeof(*pmsg))353353- return -EINVAL;354354-355355- switch (type) {356356- case IPQM_MODE:357357- status = ipq_set_mode(pmsg->msg.mode.value,358358- pmsg->msg.mode.range);359359- break;360360-361361- case IPQM_VERDICT:362362- status = ipq_set_verdict(&pmsg->msg.verdict,363363- len - sizeof(*pmsg));364364- break;365365- default:366366- status = -EINVAL;367367- }368368- return status;369369-}370370-371371-static int372372-dev_cmp(struct nf_queue_entry *entry, unsigned long ifindex)373373-{374374- if (entry->indev)375375- if (entry->indev->ifindex == ifindex)376376- return 1;377377- if (entry->outdev)378378- if (entry->outdev->ifindex == ifindex)379379- return 1;380380-#ifdef CONFIG_BRIDGE_NETFILTER381381- if (entry->skb->nf_bridge) {382382- if (entry->skb->nf_bridge->physindev &&383383- entry->skb->nf_bridge->physindev->ifindex == ifindex)384384- return 1;385385- if (entry->skb->nf_bridge->physoutdev &&386386- entry->skb->nf_bridge->physoutdev->ifindex == ifindex)387387- return 1;388388- }389389-#endif390390- return 0;391391-}392392-393393-static void394394-ipq_dev_drop(int ifindex)395395-{396396- ipq_flush(dev_cmp, ifindex);397397-}398398-399399-#define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err)); return; } while (0)400400-401401-static inline void402402-__ipq_rcv_skb(struct sk_buff *skb)403403-{404404- int status, type, pid, flags;405405- unsigned int nlmsglen, skblen;406406- struct nlmsghdr *nlh;407407- bool enable_timestamp = false;408408-409409- skblen = skb->len;410410- if (skblen < sizeof(*nlh))411411- return;412412-413413- nlh = nlmsg_hdr(skb);414414- nlmsglen = nlh->nlmsg_len;415415- if (nlmsglen < sizeof(*nlh) || skblen < nlmsglen)416416- return;417417-418418- pid = nlh->nlmsg_pid;419419- flags = nlh->nlmsg_flags;420420-421421- if(pid <= 0 || !(flags & NLM_F_REQUEST) || flags & NLM_F_MULTI)422422- RCV_SKB_FAIL(-EINVAL);423423-424424- if (flags & MSG_TRUNC)425425- RCV_SKB_FAIL(-ECOMM);426426-427427- type = nlh->nlmsg_type;428428- if (type < NLMSG_NOOP || type >= IPQM_MAX)429429- RCV_SKB_FAIL(-EINVAL);430430-431431- if (type <= IPQM_BASE)432432- return;433433-434434- if (!capable(CAP_NET_ADMIN))435435- RCV_SKB_FAIL(-EPERM);436436-437437- spin_lock_bh(&queue_lock);438438-439439- if (peer_pid) {440440- if (peer_pid != pid) {441441- spin_unlock_bh(&queue_lock);442442- RCV_SKB_FAIL(-EBUSY);443443- }444444- } else {445445- enable_timestamp = true;446446- peer_pid = pid;447447- }448448-449449- spin_unlock_bh(&queue_lock);450450- if (enable_timestamp)451451- net_enable_timestamp();452452- status = ipq_receive_peer(NLMSG_DATA(nlh), type,453453- nlmsglen - NLMSG_LENGTH(0));454454- if (status < 0)455455- RCV_SKB_FAIL(status);456456-457457- if (flags & NLM_F_ACK)458458- netlink_ack(skb, nlh, 0);459459-}460460-461461-static void462462-ipq_rcv_skb(struct sk_buff *skb)463463-{464464- mutex_lock(&ipqnl_mutex);465465- __ipq_rcv_skb(skb);466466- mutex_unlock(&ipqnl_mutex);467467-}468468-469469-static int470470-ipq_rcv_dev_event(struct notifier_block *this,471471- unsigned long event, void *ptr)472472-{473473- struct net_device *dev = ptr;474474-475475- if (!net_eq(dev_net(dev), &init_net))476476- return NOTIFY_DONE;477477-478478- /* Drop any packets associated with the downed device */479479- if (event == NETDEV_DOWN)480480- ipq_dev_drop(dev->ifindex);481481- return NOTIFY_DONE;482482-}483483-484484-static struct notifier_block ipq_dev_notifier = {485485- .notifier_call = ipq_rcv_dev_event,486486-};487487-488488-static int489489-ipq_rcv_nl_event(struct notifier_block *this,490490- unsigned long event, void *ptr)491491-{492492- struct netlink_notify *n = ptr;493493-494494- if (event == NETLINK_URELEASE && n->protocol == NETLINK_FIREWALL) {495495- spin_lock_bh(&queue_lock);496496- if ((net_eq(n->net, &init_net)) && (n->pid == peer_pid))497497- __ipq_reset();498498- spin_unlock_bh(&queue_lock);499499- }500500- return NOTIFY_DONE;501501-}502502-503503-static struct notifier_block ipq_nl_notifier = {504504- .notifier_call = ipq_rcv_nl_event,505505-};506506-507507-#ifdef CONFIG_SYSCTL508508-static struct ctl_table_header *ipq_sysctl_header;509509-510510-static ctl_table ipq_table[] = {511511- {512512- .procname = NET_IPQ_QMAX_NAME,513513- .data = &queue_maxlen,514514- .maxlen = sizeof(queue_maxlen),515515- .mode = 0644,516516- .proc_handler = proc_dointvec517517- },518518- { }519519-};520520-#endif521521-522522-#ifdef CONFIG_PROC_FS523523-static int ip_queue_show(struct seq_file *m, void *v)524524-{525525- spin_lock_bh(&queue_lock);526526-527527- seq_printf(m,528528- "Peer PID : %d\n"529529- "Copy mode : %hu\n"530530- "Copy range : %u\n"531531- "Queue length : %u\n"532532- "Queue max. length : %u\n"533533- "Queue dropped : %u\n"534534- "Netlink dropped : %u\n",535535- peer_pid,536536- copy_mode,537537- copy_range,538538- queue_total,539539- queue_maxlen,540540- queue_dropped,541541- queue_user_dropped);542542-543543- spin_unlock_bh(&queue_lock);544544- return 0;545545-}546546-547547-static int ip_queue_open(struct inode *inode, struct file *file)548548-{549549- return single_open(file, ip_queue_show, NULL);550550-}551551-552552-static const struct file_operations ip_queue_proc_fops = {553553- .open = ip_queue_open,554554- .read = seq_read,555555- .llseek = seq_lseek,556556- .release = single_release,557557- .owner = THIS_MODULE,558558-};559559-#endif560560-561561-static const struct nf_queue_handler nfqh = {562562- .name = "ip_queue",563563- .outfn = &ipq_enqueue_packet,564564-};565565-566566-static int __init ip_queue_init(void)567567-{568568- int status = -ENOMEM;569569- struct proc_dir_entry *proc __maybe_unused;570570-571571- netlink_register_notifier(&ipq_nl_notifier);572572- ipqnl = netlink_kernel_create(&init_net, NETLINK_FIREWALL, 0,573573- ipq_rcv_skb, NULL, THIS_MODULE);574574- if (ipqnl == NULL) {575575- printk(KERN_ERR "ip_queue: failed to create netlink socket\n");576576- goto cleanup_netlink_notifier;577577- }578578-579579-#ifdef CONFIG_PROC_FS580580- proc = proc_create(IPQ_PROC_FS_NAME, 0, init_net.proc_net,581581- &ip_queue_proc_fops);582582- if (!proc) {583583- printk(KERN_ERR "ip_queue: failed to create proc entry\n");584584- goto cleanup_ipqnl;585585- }586586-#endif587587- register_netdevice_notifier(&ipq_dev_notifier);588588-#ifdef CONFIG_SYSCTL589589- ipq_sysctl_header = register_net_sysctl(&init_net, "net/ipv4", ipq_table);590590-#endif591591- status = nf_register_queue_handler(NFPROTO_IPV4, &nfqh);592592- if (status < 0) {593593- printk(KERN_ERR "ip_queue: failed to register queue handler\n");594594- goto cleanup_sysctl;595595- }596596- return status;597597-598598-cleanup_sysctl:599599-#ifdef CONFIG_SYSCTL600600- unregister_net_sysctl_table(ipq_sysctl_header);601601-#endif602602- unregister_netdevice_notifier(&ipq_dev_notifier);603603- proc_net_remove(&init_net, IPQ_PROC_FS_NAME);604604-cleanup_ipqnl: __maybe_unused605605- netlink_kernel_release(ipqnl);606606- mutex_lock(&ipqnl_mutex);607607- mutex_unlock(&ipqnl_mutex);608608-609609-cleanup_netlink_notifier:610610- netlink_unregister_notifier(&ipq_nl_notifier);611611- return status;612612-}613613-614614-static void __exit ip_queue_fini(void)615615-{616616- nf_unregister_queue_handlers(&nfqh);617617-618618- ipq_flush(NULL, 0);619619-620620-#ifdef CONFIG_SYSCTL621621- unregister_net_sysctl_table(ipq_sysctl_header);622622-#endif623623- unregister_netdevice_notifier(&ipq_dev_notifier);624624- proc_net_remove(&init_net, IPQ_PROC_FS_NAME);625625-626626- netlink_kernel_release(ipqnl);627627- mutex_lock(&ipqnl_mutex);628628- mutex_unlock(&ipqnl_mutex);629629-630630- netlink_unregister_notifier(&ipq_nl_notifier);631631-}632632-633633-MODULE_DESCRIPTION("IPv4 packet queue handler");634634-MODULE_AUTHOR("James Morris <jmorris@intercode.com.au>");635635-MODULE_LICENSE("GPL");636636-MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_FIREWALL);637637-638638-module_init(ip_queue_init);639639-module_exit(ip_queue_fini);
-22
net/ipv6/netfilter/Kconfig
···25252626 To compile it as a module, choose M here. If unsure, say N.27272828-config IP6_NF_QUEUE2929- tristate "IP6 Userspace queueing via NETLINK (OBSOLETE)"3030- depends on INET && IPV6 && NETFILTER3131- depends on NETFILTER_ADVANCED3232- ---help---3333-3434- This option adds a queue handler to the kernel for IPv63535- packets which enables users to receive the filtered packets3636- with QUEUE target using libipq.3737-3838- This option enables the old IPv6-only "ip6_queue" implementation3939- which has been obsoleted by the new "nfnetlink_queue" code (see4040- CONFIG_NETFILTER_NETLINK_QUEUE).4141-4242- (C) Fernando Anton 20014343- IPv64 Project - Work based in IPv64 draft by Arturo Azcorra.4444- Universidad Carlos III de Madrid4545- Universidad Politecnica de Alcala de Henares4646- email: <fanton@it.uc3m.es>.4747-4848- To compile it as a module, choose M here. If unsure, say N.4949-5028config IP6_NF_IPTABLES5129 tristate "IP6 tables support (required for filtering)"5230 depends on INET && IPV6
···11-/*22- * This is a module which is used for queueing IPv6 packets and33- * communicating with userspace via netlink.44- *55- * (C) 2001 Fernando Anton, this code is GPL.66- * IPv64 Project - Work based in IPv64 draft by Arturo Azcorra.77- * Universidad Carlos III de Madrid - Leganes (Madrid) - Spain88- * Universidad Politecnica de Alcala de Henares - Alcala de H. (Madrid) - Spain99- * email: fanton@it.uc3m.es1010- *1111- * This program is free software; you can redistribute it and/or modify1212- * it under the terms of the GNU General Public License version 2 as1313- * published by the Free Software Foundation.1414- */1515-#include <linux/module.h>1616-#include <linux/skbuff.h>1717-#include <linux/init.h>1818-#include <linux/ipv6.h>1919-#include <linux/notifier.h>2020-#include <linux/netdevice.h>2121-#include <linux/netfilter.h>2222-#include <linux/netlink.h>2323-#include <linux/spinlock.h>2424-#include <linux/sysctl.h>2525-#include <linux/proc_fs.h>2626-#include <linux/seq_file.h>2727-#include <linux/mutex.h>2828-#include <linux/slab.h>2929-#include <net/net_namespace.h>3030-#include <net/sock.h>3131-#include <net/ipv6.h>3232-#include <net/ip6_route.h>3333-#include <net/netfilter/nf_queue.h>3434-#include <linux/netfilter_ipv4/ip_queue.h>3535-#include <linux/netfilter_ipv4/ip_tables.h>3636-#include <linux/netfilter_ipv6/ip6_tables.h>3737-3838-#define IPQ_QMAX_DEFAULT 10243939-#define IPQ_PROC_FS_NAME "ip6_queue"4040-#define NET_IPQ_QMAX_NAME "ip6_queue_maxlen"4141-4242-typedef int (*ipq_cmpfn)(struct nf_queue_entry *, unsigned long);4343-4444-static unsigned char copy_mode __read_mostly = IPQ_COPY_NONE;4545-static unsigned int queue_maxlen __read_mostly = IPQ_QMAX_DEFAULT;4646-static DEFINE_SPINLOCK(queue_lock);4747-static int peer_pid __read_mostly;4848-static unsigned int copy_range __read_mostly;4949-static unsigned int queue_total;5050-static unsigned int queue_dropped = 0;5151-static unsigned int queue_user_dropped = 0;5252-static struct sock *ipqnl __read_mostly;5353-static LIST_HEAD(queue_list);5454-static DEFINE_MUTEX(ipqnl_mutex);5555-5656-static inline void5757-__ipq_enqueue_entry(struct nf_queue_entry *entry)5858-{5959- list_add_tail(&entry->list, &queue_list);6060- queue_total++;6161-}6262-6363-static inline int6464-__ipq_set_mode(unsigned char mode, unsigned int range)6565-{6666- int status = 0;6767-6868- switch(mode) {6969- case IPQ_COPY_NONE:7070- case IPQ_COPY_META:7171- copy_mode = mode;7272- copy_range = 0;7373- break;7474-7575- case IPQ_COPY_PACKET:7676- if (range > 0xFFFF)7777- range = 0xFFFF;7878- copy_range = range;7979- copy_mode = mode;8080- break;8181-8282- default:8383- status = -EINVAL;8484-8585- }8686- return status;8787-}8888-8989-static void __ipq_flush(ipq_cmpfn cmpfn, unsigned long data);9090-9191-static inline void9292-__ipq_reset(void)9393-{9494- peer_pid = 0;9595- net_disable_timestamp();9696- __ipq_set_mode(IPQ_COPY_NONE, 0);9797- __ipq_flush(NULL, 0);9898-}9999-100100-static struct nf_queue_entry *101101-ipq_find_dequeue_entry(unsigned long id)102102-{103103- struct nf_queue_entry *entry = NULL, *i;104104-105105- spin_lock_bh(&queue_lock);106106-107107- list_for_each_entry(i, &queue_list, list) {108108- if ((unsigned long)i == id) {109109- entry = i;110110- break;111111- }112112- }113113-114114- if (entry) {115115- list_del(&entry->list);116116- queue_total--;117117- }118118-119119- spin_unlock_bh(&queue_lock);120120- return entry;121121-}122122-123123-static void124124-__ipq_flush(ipq_cmpfn cmpfn, unsigned long data)125125-{126126- struct nf_queue_entry *entry, *next;127127-128128- list_for_each_entry_safe(entry, next, &queue_list, list) {129129- if (!cmpfn || cmpfn(entry, data)) {130130- list_del(&entry->list);131131- queue_total--;132132- nf_reinject(entry, NF_DROP);133133- }134134- }135135-}136136-137137-static void138138-ipq_flush(ipq_cmpfn cmpfn, unsigned long data)139139-{140140- spin_lock_bh(&queue_lock);141141- __ipq_flush(cmpfn, data);142142- spin_unlock_bh(&queue_lock);143143-}144144-145145-static struct sk_buff *146146-ipq_build_packet_message(struct nf_queue_entry *entry, int *errp)147147-{148148- sk_buff_data_t old_tail;149149- size_t size = 0;150150- size_t data_len = 0;151151- struct sk_buff *skb;152152- struct ipq_packet_msg *pmsg;153153- struct nlmsghdr *nlh;154154- struct timeval tv;155155-156156- switch (ACCESS_ONCE(copy_mode)) {157157- case IPQ_COPY_META:158158- case IPQ_COPY_NONE:159159- size = NLMSG_SPACE(sizeof(*pmsg));160160- break;161161-162162- case IPQ_COPY_PACKET:163163- if (entry->skb->ip_summed == CHECKSUM_PARTIAL &&164164- (*errp = skb_checksum_help(entry->skb)))165165- return NULL;166166-167167- data_len = ACCESS_ONCE(copy_range);168168- if (data_len == 0 || data_len > entry->skb->len)169169- data_len = entry->skb->len;170170-171171- size = NLMSG_SPACE(sizeof(*pmsg) + data_len);172172- break;173173-174174- default:175175- *errp = -EINVAL;176176- return NULL;177177- }178178-179179- skb = alloc_skb(size, GFP_ATOMIC);180180- if (!skb)181181- goto nlmsg_failure;182182-183183- old_tail = skb->tail;184184- nlh = NLMSG_PUT(skb, 0, 0, IPQM_PACKET, size - sizeof(*nlh));185185- pmsg = NLMSG_DATA(nlh);186186- memset(pmsg, 0, sizeof(*pmsg));187187-188188- pmsg->packet_id = (unsigned long )entry;189189- pmsg->data_len = data_len;190190- tv = ktime_to_timeval(entry->skb->tstamp);191191- pmsg->timestamp_sec = tv.tv_sec;192192- pmsg->timestamp_usec = tv.tv_usec;193193- pmsg->mark = entry->skb->mark;194194- pmsg->hook = entry->hook;195195- pmsg->hw_protocol = entry->skb->protocol;196196-197197- if (entry->indev)198198- strcpy(pmsg->indev_name, entry->indev->name);199199- else200200- pmsg->indev_name[0] = '\0';201201-202202- if (entry->outdev)203203- strcpy(pmsg->outdev_name, entry->outdev->name);204204- else205205- pmsg->outdev_name[0] = '\0';206206-207207- if (entry->indev && entry->skb->dev &&208208- entry->skb->mac_header != entry->skb->network_header) {209209- pmsg->hw_type = entry->skb->dev->type;210210- pmsg->hw_addrlen = dev_parse_header(entry->skb, pmsg->hw_addr);211211- }212212-213213- if (data_len)214214- if (skb_copy_bits(entry->skb, 0, pmsg->payload, data_len))215215- BUG();216216-217217- nlh->nlmsg_len = skb->tail - old_tail;218218- return skb;219219-220220-nlmsg_failure:221221- kfree_skb(skb);222222- *errp = -EINVAL;223223- printk(KERN_ERR "ip6_queue: error creating packet message\n");224224- return NULL;225225-}226226-227227-static int228228-ipq_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)229229-{230230- int status = -EINVAL;231231- struct sk_buff *nskb;232232-233233- if (copy_mode == IPQ_COPY_NONE)234234- return -EAGAIN;235235-236236- nskb = ipq_build_packet_message(entry, &status);237237- if (nskb == NULL)238238- return status;239239-240240- spin_lock_bh(&queue_lock);241241-242242- if (!peer_pid)243243- goto err_out_free_nskb;244244-245245- if (queue_total >= queue_maxlen) {246246- queue_dropped++;247247- status = -ENOSPC;248248- if (net_ratelimit())249249- printk (KERN_WARNING "ip6_queue: fill at %d entries, "250250- "dropping packet(s). Dropped: %d\n", queue_total,251251- queue_dropped);252252- goto err_out_free_nskb;253253- }254254-255255- /* netlink_unicast will either free the nskb or attach it to a socket */256256- status = netlink_unicast(ipqnl, nskb, peer_pid, MSG_DONTWAIT);257257- if (status < 0) {258258- queue_user_dropped++;259259- goto err_out_unlock;260260- }261261-262262- __ipq_enqueue_entry(entry);263263-264264- spin_unlock_bh(&queue_lock);265265- return status;266266-267267-err_out_free_nskb:268268- kfree_skb(nskb);269269-270270-err_out_unlock:271271- spin_unlock_bh(&queue_lock);272272- return status;273273-}274274-275275-static int276276-ipq_mangle_ipv6(ipq_verdict_msg_t *v, struct nf_queue_entry *e)277277-{278278- int diff;279279- struct ipv6hdr *user_iph = (struct ipv6hdr *)v->payload;280280- struct sk_buff *nskb;281281-282282- if (v->data_len < sizeof(*user_iph))283283- return 0;284284- diff = v->data_len - e->skb->len;285285- if (diff < 0) {286286- if (pskb_trim(e->skb, v->data_len))287287- return -ENOMEM;288288- } else if (diff > 0) {289289- if (v->data_len > 0xFFFF)290290- return -EINVAL;291291- if (diff > skb_tailroom(e->skb)) {292292- nskb = skb_copy_expand(e->skb, skb_headroom(e->skb),293293- diff, GFP_ATOMIC);294294- if (!nskb) {295295- printk(KERN_WARNING "ip6_queue: OOM "296296- "in mangle, dropping packet\n");297297- return -ENOMEM;298298- }299299- kfree_skb(e->skb);300300- e->skb = nskb;301301- }302302- skb_put(e->skb, diff);303303- }304304- if (!skb_make_writable(e->skb, v->data_len))305305- return -ENOMEM;306306- skb_copy_to_linear_data(e->skb, v->payload, v->data_len);307307- e->skb->ip_summed = CHECKSUM_NONE;308308-309309- return 0;310310-}311311-312312-static int313313-ipq_set_verdict(struct ipq_verdict_msg *vmsg, unsigned int len)314314-{315315- struct nf_queue_entry *entry;316316-317317- if (vmsg->value > NF_MAX_VERDICT || vmsg->value == NF_STOLEN)318318- return -EINVAL;319319-320320- entry = ipq_find_dequeue_entry(vmsg->id);321321- if (entry == NULL)322322- return -ENOENT;323323- else {324324- int verdict = vmsg->value;325325-326326- if (vmsg->data_len && vmsg->data_len == len)327327- if (ipq_mangle_ipv6(vmsg, entry) < 0)328328- verdict = NF_DROP;329329-330330- nf_reinject(entry, verdict);331331- return 0;332332- }333333-}334334-335335-static int336336-ipq_set_mode(unsigned char mode, unsigned int range)337337-{338338- int status;339339-340340- spin_lock_bh(&queue_lock);341341- status = __ipq_set_mode(mode, range);342342- spin_unlock_bh(&queue_lock);343343- return status;344344-}345345-346346-static int347347-ipq_receive_peer(struct ipq_peer_msg *pmsg,348348- unsigned char type, unsigned int len)349349-{350350- int status = 0;351351-352352- if (len < sizeof(*pmsg))353353- return -EINVAL;354354-355355- switch (type) {356356- case IPQM_MODE:357357- status = ipq_set_mode(pmsg->msg.mode.value,358358- pmsg->msg.mode.range);359359- break;360360-361361- case IPQM_VERDICT:362362- status = ipq_set_verdict(&pmsg->msg.verdict,363363- len - sizeof(*pmsg));364364- break;365365- default:366366- status = -EINVAL;367367- }368368- return status;369369-}370370-371371-static int372372-dev_cmp(struct nf_queue_entry *entry, unsigned long ifindex)373373-{374374- if (entry->indev)375375- if (entry->indev->ifindex == ifindex)376376- return 1;377377-378378- if (entry->outdev)379379- if (entry->outdev->ifindex == ifindex)380380- return 1;381381-#ifdef CONFIG_BRIDGE_NETFILTER382382- if (entry->skb->nf_bridge) {383383- if (entry->skb->nf_bridge->physindev &&384384- entry->skb->nf_bridge->physindev->ifindex == ifindex)385385- return 1;386386- if (entry->skb->nf_bridge->physoutdev &&387387- entry->skb->nf_bridge->physoutdev->ifindex == ifindex)388388- return 1;389389- }390390-#endif391391- return 0;392392-}393393-394394-static void395395-ipq_dev_drop(int ifindex)396396-{397397- ipq_flush(dev_cmp, ifindex);398398-}399399-400400-#define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err)); return; } while (0)401401-402402-static inline void403403-__ipq_rcv_skb(struct sk_buff *skb)404404-{405405- int status, type, pid, flags;406406- unsigned int nlmsglen, skblen;407407- struct nlmsghdr *nlh;408408- bool enable_timestamp = false;409409-410410- skblen = skb->len;411411- if (skblen < sizeof(*nlh))412412- return;413413-414414- nlh = nlmsg_hdr(skb);415415- nlmsglen = nlh->nlmsg_len;416416- if (nlmsglen < sizeof(*nlh) || skblen < nlmsglen)417417- return;418418-419419- pid = nlh->nlmsg_pid;420420- flags = nlh->nlmsg_flags;421421-422422- if(pid <= 0 || !(flags & NLM_F_REQUEST) || flags & NLM_F_MULTI)423423- RCV_SKB_FAIL(-EINVAL);424424-425425- if (flags & MSG_TRUNC)426426- RCV_SKB_FAIL(-ECOMM);427427-428428- type = nlh->nlmsg_type;429429- if (type < NLMSG_NOOP || type >= IPQM_MAX)430430- RCV_SKB_FAIL(-EINVAL);431431-432432- if (type <= IPQM_BASE)433433- return;434434-435435- if (!capable(CAP_NET_ADMIN))436436- RCV_SKB_FAIL(-EPERM);437437-438438- spin_lock_bh(&queue_lock);439439-440440- if (peer_pid) {441441- if (peer_pid != pid) {442442- spin_unlock_bh(&queue_lock);443443- RCV_SKB_FAIL(-EBUSY);444444- }445445- } else {446446- enable_timestamp = true;447447- peer_pid = pid;448448- }449449-450450- spin_unlock_bh(&queue_lock);451451- if (enable_timestamp)452452- net_enable_timestamp();453453-454454- status = ipq_receive_peer(NLMSG_DATA(nlh), type,455455- nlmsglen - NLMSG_LENGTH(0));456456- if (status < 0)457457- RCV_SKB_FAIL(status);458458-459459- if (flags & NLM_F_ACK)460460- netlink_ack(skb, nlh, 0);461461-}462462-463463-static void464464-ipq_rcv_skb(struct sk_buff *skb)465465-{466466- mutex_lock(&ipqnl_mutex);467467- __ipq_rcv_skb(skb);468468- mutex_unlock(&ipqnl_mutex);469469-}470470-471471-static int472472-ipq_rcv_dev_event(struct notifier_block *this,473473- unsigned long event, void *ptr)474474-{475475- struct net_device *dev = ptr;476476-477477- if (!net_eq(dev_net(dev), &init_net))478478- return NOTIFY_DONE;479479-480480- /* Drop any packets associated with the downed device */481481- if (event == NETDEV_DOWN)482482- ipq_dev_drop(dev->ifindex);483483- return NOTIFY_DONE;484484-}485485-486486-static struct notifier_block ipq_dev_notifier = {487487- .notifier_call = ipq_rcv_dev_event,488488-};489489-490490-static int491491-ipq_rcv_nl_event(struct notifier_block *this,492492- unsigned long event, void *ptr)493493-{494494- struct netlink_notify *n = ptr;495495-496496- if (event == NETLINK_URELEASE && n->protocol == NETLINK_IP6_FW) {497497- spin_lock_bh(&queue_lock);498498- if ((net_eq(n->net, &init_net)) && (n->pid == peer_pid))499499- __ipq_reset();500500- spin_unlock_bh(&queue_lock);501501- }502502- return NOTIFY_DONE;503503-}504504-505505-static struct notifier_block ipq_nl_notifier = {506506- .notifier_call = ipq_rcv_nl_event,507507-};508508-509509-#ifdef CONFIG_SYSCTL510510-static struct ctl_table_header *ipq_sysctl_header;511511-512512-static ctl_table ipq_table[] = {513513- {514514- .procname = NET_IPQ_QMAX_NAME,515515- .data = &queue_maxlen,516516- .maxlen = sizeof(queue_maxlen),517517- .mode = 0644,518518- .proc_handler = proc_dointvec519519- },520520- { }521521-};522522-#endif523523-524524-#ifdef CONFIG_PROC_FS525525-static int ip6_queue_show(struct seq_file *m, void *v)526526-{527527- spin_lock_bh(&queue_lock);528528-529529- seq_printf(m,530530- "Peer PID : %d\n"531531- "Copy mode : %hu\n"532532- "Copy range : %u\n"533533- "Queue length : %u\n"534534- "Queue max. length : %u\n"535535- "Queue dropped : %u\n"536536- "Netfilter dropped : %u\n",537537- peer_pid,538538- copy_mode,539539- copy_range,540540- queue_total,541541- queue_maxlen,542542- queue_dropped,543543- queue_user_dropped);544544-545545- spin_unlock_bh(&queue_lock);546546- return 0;547547-}548548-549549-static int ip6_queue_open(struct inode *inode, struct file *file)550550-{551551- return single_open(file, ip6_queue_show, NULL);552552-}553553-554554-static const struct file_operations ip6_queue_proc_fops = {555555- .open = ip6_queue_open,556556- .read = seq_read,557557- .llseek = seq_lseek,558558- .release = single_release,559559- .owner = THIS_MODULE,560560-};561561-#endif562562-563563-static const struct nf_queue_handler nfqh = {564564- .name = "ip6_queue",565565- .outfn = &ipq_enqueue_packet,566566-};567567-568568-static int __init ip6_queue_init(void)569569-{570570- int status = -ENOMEM;571571- struct proc_dir_entry *proc __maybe_unused;572572-573573- netlink_register_notifier(&ipq_nl_notifier);574574- ipqnl = netlink_kernel_create(&init_net, NETLINK_IP6_FW, 0,575575- ipq_rcv_skb, NULL, THIS_MODULE);576576- if (ipqnl == NULL) {577577- printk(KERN_ERR "ip6_queue: failed to create netlink socket\n");578578- goto cleanup_netlink_notifier;579579- }580580-581581-#ifdef CONFIG_PROC_FS582582- proc = proc_create(IPQ_PROC_FS_NAME, 0, init_net.proc_net,583583- &ip6_queue_proc_fops);584584- if (!proc) {585585- printk(KERN_ERR "ip6_queue: failed to create proc entry\n");586586- goto cleanup_ipqnl;587587- }588588-#endif589589- register_netdevice_notifier(&ipq_dev_notifier);590590-#ifdef CONFIG_SYSCTL591591- ipq_sysctl_header = register_net_sysctl(&init_net, "net/ipv6", ipq_table);592592-#endif593593- status = nf_register_queue_handler(NFPROTO_IPV6, &nfqh);594594- if (status < 0) {595595- printk(KERN_ERR "ip6_queue: failed to register queue handler\n");596596- goto cleanup_sysctl;597597- }598598- return status;599599-600600-cleanup_sysctl:601601-#ifdef CONFIG_SYSCTL602602- unregister_net_sysctl_table(ipq_sysctl_header);603603-#endif604604- unregister_netdevice_notifier(&ipq_dev_notifier);605605- proc_net_remove(&init_net, IPQ_PROC_FS_NAME);606606-607607-cleanup_ipqnl: __maybe_unused608608- netlink_kernel_release(ipqnl);609609- mutex_lock(&ipqnl_mutex);610610- mutex_unlock(&ipqnl_mutex);611611-612612-cleanup_netlink_notifier:613613- netlink_unregister_notifier(&ipq_nl_notifier);614614- return status;615615-}616616-617617-static void __exit ip6_queue_fini(void)618618-{619619- nf_unregister_queue_handlers(&nfqh);620620-621621- ipq_flush(NULL, 0);622622-623623-#ifdef CONFIG_SYSCTL624624- unregister_net_sysctl_table(ipq_sysctl_header);625625-#endif626626- unregister_netdevice_notifier(&ipq_dev_notifier);627627- proc_net_remove(&init_net, IPQ_PROC_FS_NAME);628628-629629- netlink_kernel_release(ipqnl);630630- mutex_lock(&ipqnl_mutex);631631- mutex_unlock(&ipqnl_mutex);632632-633633- netlink_unregister_notifier(&ipq_nl_notifier);634634-}635635-636636-MODULE_DESCRIPTION("IPv6 packet queue handler");637637-MODULE_LICENSE("GPL");638638-MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_IP6_FW);639639-640640-module_init(ip6_queue_init);641641-module_exit(ip6_queue_fini);
+52-18
net/netfilter/ipvs/ip_vs_conn.c
···548548ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest)549549{550550 unsigned int conn_flags;551551+ __u32 flags;551552552553 /* if dest is NULL, then return directly */553554 if (!dest)···560559 conn_flags = atomic_read(&dest->conn_flags);561560 if (cp->protocol != IPPROTO_UDP)562561 conn_flags &= ~IP_VS_CONN_F_ONE_PACKET;562562+ flags = cp->flags;563563 /* Bind with the destination and its corresponding transmitter */564564- if (cp->flags & IP_VS_CONN_F_SYNC) {564564+ if (flags & IP_VS_CONN_F_SYNC) {565565 /* if the connection is not template and is created566566 * by sync, preserve the activity flag.567567 */568568- if (!(cp->flags & IP_VS_CONN_F_TEMPLATE))568568+ if (!(flags & IP_VS_CONN_F_TEMPLATE))569569 conn_flags &= ~IP_VS_CONN_F_INACTIVE;570570 /* connections inherit forwarding method from dest */571571- cp->flags &= ~IP_VS_CONN_F_FWD_MASK;571571+ flags &= ~(IP_VS_CONN_F_FWD_MASK | IP_VS_CONN_F_NOOUTPUT);572572 }573573- cp->flags |= conn_flags;573573+ flags |= conn_flags;574574+ cp->flags = flags;574575 cp->dest = dest;575576576577 IP_VS_DBG_BUF(7, "Bind-dest %s c:%s:%d v:%s:%d "···587584 atomic_read(&dest->refcnt));588585589586 /* Update the connection counters */590590- if (!(cp->flags & IP_VS_CONN_F_TEMPLATE)) {591591- /* It is a normal connection, so increase the inactive592592- connection counter because it is in TCP SYNRECV593593- state (inactive) or other protocol inacive state */594594- if ((cp->flags & IP_VS_CONN_F_SYNC) &&595595- (!(cp->flags & IP_VS_CONN_F_INACTIVE)))587587+ if (!(flags & IP_VS_CONN_F_TEMPLATE)) {588588+ /* It is a normal connection, so modify the counters589589+ * according to the flags, later the protocol can590590+ * update them on state change591591+ */592592+ if (!(flags & IP_VS_CONN_F_INACTIVE))596593 atomic_inc(&dest->activeconns);597594 else598595 atomic_inc(&dest->inactconns);···616613{617614 struct ip_vs_dest *dest;618615619619- if ((cp) && (!cp->dest)) {620620- dest = ip_vs_find_dest(ip_vs_conn_net(cp), cp->af, &cp->daddr,621621- cp->dport, &cp->vaddr, cp->vport,622622- cp->protocol, cp->fwmark, cp->flags);616616+ dest = ip_vs_find_dest(ip_vs_conn_net(cp), cp->af, &cp->daddr,617617+ cp->dport, &cp->vaddr, cp->vport,618618+ cp->protocol, cp->fwmark, cp->flags);619619+ if (dest) {620620+ struct ip_vs_proto_data *pd;621621+622622+ spin_lock(&cp->lock);623623+ if (cp->dest) {624624+ spin_unlock(&cp->lock);625625+ return dest;626626+ }627627+628628+ /* Applications work depending on the forwarding method629629+ * but better to reassign them always when binding dest */630630+ if (cp->app)631631+ ip_vs_unbind_app(cp);632632+623633 ip_vs_bind_dest(cp, dest);624624- return dest;625625- } else626626- return NULL;634634+ spin_unlock(&cp->lock);635635+636636+ /* Update its packet transmitter */637637+ cp->packet_xmit = NULL;638638+#ifdef CONFIG_IP_VS_IPV6639639+ if (cp->af == AF_INET6)640640+ ip_vs_bind_xmit_v6(cp);641641+ else642642+#endif643643+ ip_vs_bind_xmit(cp);644644+645645+ pd = ip_vs_proto_data_get(ip_vs_conn_net(cp), cp->protocol);646646+ if (pd && atomic_read(&pd->appcnt))647647+ ip_vs_bind_app(cp, pd->pp);648648+ }649649+ return dest;627650}628651629652···772743static void ip_vs_conn_expire(unsigned long data)773744{774745 struct ip_vs_conn *cp = (struct ip_vs_conn *)data;775775- struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(cp));746746+ struct net *net = ip_vs_conn_net(cp);747747+ struct netns_ipvs *ipvs = net_ipvs(net);776748777749 cp->timeout = 60*HZ;778750···837807 IP_VS_DBG(7, "delayed: conn->refcnt-1=%d conn->n_control=%d\n",838808 atomic_read(&cp->refcnt)-1,839809 atomic_read(&cp->n_control));810810+811811+ if (ipvs->sync_state & IP_VS_STATE_MASTER)812812+ ip_vs_sync_conn(net, cp, sysctl_sync_threshold(ipvs));840813841814 ip_vs_conn_put(cp);842815}···914881 /* Set its state and timeout */915882 cp->state = 0;916883 cp->timeout = 3*HZ;884884+ cp->sync_endtime = jiffies & ~3UL;917885918886 /* Bind its packet transmitter */919887#ifdef CONFIG_IP_VS_IPV6
···149149150150 /* allocate the DH table for this service */151151 tbl = kmalloc(sizeof(struct ip_vs_dh_bucket)*IP_VS_DH_TAB_SIZE,152152- GFP_ATOMIC);152152+ GFP_KERNEL);153153 if (tbl == NULL)154154 return -ENOMEM;155155
···342342 /*343343 * Allocate the ip_vs_lblc_table for this service344344 */345345- tbl = kmalloc(sizeof(*tbl), GFP_ATOMIC);345345+ tbl = kmalloc(sizeof(*tbl), GFP_KERNEL);346346 if (tbl == NULL)347347 return -ENOMEM;348348
+1-1
net/netfilter/ipvs/ip_vs_lblcr.c
···511511 /*512512 * Allocate the ip_vs_lblcr_table for this service513513 */514514- tbl = kmalloc(sizeof(*tbl), GFP_ATOMIC);514514+ tbl = kmalloc(sizeof(*tbl), GFP_KERNEL);515515 if (tbl == NULL)516516 return -ENOMEM;517517
+3-3
net/netfilter/ipvs/ip_vs_proto.c
···6868 struct netns_ipvs *ipvs = net_ipvs(net);6969 unsigned int hash = IP_VS_PROTO_HASH(pp->protocol);7070 struct ip_vs_proto_data *pd =7171- kzalloc(sizeof(struct ip_vs_proto_data), GFP_ATOMIC);7171+ kzalloc(sizeof(struct ip_vs_proto_data), GFP_KERNEL);72727373 if (!pd)7474 return -ENOMEM;···156156/*157157 * get ip_vs_protocol object data by netns and proto158158 */159159-struct ip_vs_proto_data *159159+static struct ip_vs_proto_data *160160__ipvs_proto_data_get(struct netns_ipvs *ipvs, unsigned short proto)161161{162162 struct ip_vs_proto_data *pd;···199199int *200200ip_vs_create_timeout_table(int *table, int size)201201{202202- return kmemdup(table, size, GFP_ATOMIC);202202+ return kmemdup(table, size, GFP_KERNEL);203203}204204205205
+1-1
net/netfilter/ipvs/ip_vs_sh.c
···162162163163 /* allocate the SH table for this service */164164 tbl = kmalloc(sizeof(struct ip_vs_sh_bucket)*IP_VS_SH_TAB_SIZE,165165- GFP_ATOMIC);165165+ GFP_KERNEL);166166 if (tbl == NULL)167167 return -ENOMEM;168168
+455-207
net/netfilter/ipvs/ip_vs_sync.c
···196196 struct net *net;197197 struct socket *sock;198198 char *buf;199199+ int id;199200};200201201202/* Version 0 definition of packet sizes */···272271 unsigned char *end;273272};274273275275-/* multicast addr */276276-static struct sockaddr_in mcast_addr = {277277- .sin_family = AF_INET,278278- .sin_port = cpu_to_be16(IP_VS_SYNC_PORT),279279- .sin_addr.s_addr = cpu_to_be32(IP_VS_SYNC_GROUP),280280-};281281-282274/*283275 * Copy of struct ip_vs_seq284276 * From unaligned network order to aligned host order···294300 put_unaligned_be32(ho->previous_delta, &no->previous_delta);295301}296302297297-static inline struct ip_vs_sync_buff *sb_dequeue(struct netns_ipvs *ipvs)303303+static inline struct ip_vs_sync_buff *304304+sb_dequeue(struct netns_ipvs *ipvs, struct ipvs_master_sync_state *ms)298305{299306 struct ip_vs_sync_buff *sb;300307301308 spin_lock_bh(&ipvs->sync_lock);302302- if (list_empty(&ipvs->sync_queue)) {309309+ if (list_empty(&ms->sync_queue)) {303310 sb = NULL;311311+ __set_current_state(TASK_INTERRUPTIBLE);304312 } else {305305- sb = list_entry(ipvs->sync_queue.next,306306- struct ip_vs_sync_buff,313313+ sb = list_entry(ms->sync_queue.next, struct ip_vs_sync_buff,307314 list);308315 list_del(&sb->list);316316+ ms->sync_queue_len--;317317+ if (!ms->sync_queue_len)318318+ ms->sync_queue_delay = 0;309319 }310320 spin_unlock_bh(&ipvs->sync_lock);311321···332334 kfree(sb);333335 return NULL;334336 }335335- sb->mesg->reserved = 0; /* old nr_conns i.e. must be zeo now */337337+ sb->mesg->reserved = 0; /* old nr_conns i.e. must be zero now */336338 sb->mesg->version = SYNC_PROTO_VER;337339 sb->mesg->syncid = ipvs->master_syncid;338340 sb->mesg->size = sizeof(struct ip_vs_sync_mesg);···351353 kfree(sb);352354}353355354354-static inline void sb_queue_tail(struct netns_ipvs *ipvs)356356+static inline void sb_queue_tail(struct netns_ipvs *ipvs,357357+ struct ipvs_master_sync_state *ms)355358{356356- struct ip_vs_sync_buff *sb = ipvs->sync_buff;359359+ struct ip_vs_sync_buff *sb = ms->sync_buff;357360358361 spin_lock(&ipvs->sync_lock);359359- if (ipvs->sync_state & IP_VS_STATE_MASTER)360360- list_add_tail(&sb->list, &ipvs->sync_queue);361361- else362362+ if (ipvs->sync_state & IP_VS_STATE_MASTER &&363363+ ms->sync_queue_len < sysctl_sync_qlen_max(ipvs)) {364364+ if (!ms->sync_queue_len)365365+ schedule_delayed_work(&ms->master_wakeup_work,366366+ max(IPVS_SYNC_SEND_DELAY, 1));367367+ ms->sync_queue_len++;368368+ list_add_tail(&sb->list, &ms->sync_queue);369369+ if ((++ms->sync_queue_delay) == IPVS_SYNC_WAKEUP_RATE)370370+ wake_up_process(ms->master_thread);371371+ } else362372 ip_vs_sync_buff_release(sb);363373 spin_unlock(&ipvs->sync_lock);364374}···376370 * than the specified time or the specified time is zero.377371 */378372static inline struct ip_vs_sync_buff *379379-get_curr_sync_buff(struct netns_ipvs *ipvs, unsigned long time)373373+get_curr_sync_buff(struct netns_ipvs *ipvs, struct ipvs_master_sync_state *ms,374374+ unsigned long time)380375{381376 struct ip_vs_sync_buff *sb;382377383378 spin_lock_bh(&ipvs->sync_buff_lock);384384- if (ipvs->sync_buff &&385385- time_after_eq(jiffies - ipvs->sync_buff->firstuse, time)) {386386- sb = ipvs->sync_buff;387387- ipvs->sync_buff = NULL;379379+ sb = ms->sync_buff;380380+ if (sb && time_after_eq(jiffies - sb->firstuse, time)) {381381+ ms->sync_buff = NULL;382382+ __set_current_state(TASK_RUNNING);388383 } else389384 sb = NULL;390385 spin_unlock_bh(&ipvs->sync_buff_lock);391386 return sb;392387}393388394394-/*395395- * Switch mode from sending version 0 or 1396396- * - must handle sync_buf397397- */398398-void ip_vs_sync_switch_mode(struct net *net, int mode)389389+static inline int390390+select_master_thread_id(struct netns_ipvs *ipvs, struct ip_vs_conn *cp)399391{400400- struct netns_ipvs *ipvs = net_ipvs(net);401401-402402- if (!(ipvs->sync_state & IP_VS_STATE_MASTER))403403- return;404404- if (mode == sysctl_sync_ver(ipvs) || !ipvs->sync_buff)405405- return;406406-407407- spin_lock_bh(&ipvs->sync_buff_lock);408408- /* Buffer empty ? then let buf_create do the job */409409- if (ipvs->sync_buff->mesg->size <= sizeof(struct ip_vs_sync_mesg)) {410410- kfree(ipvs->sync_buff);411411- ipvs->sync_buff = NULL;412412- } else {413413- spin_lock_bh(&ipvs->sync_lock);414414- if (ipvs->sync_state & IP_VS_STATE_MASTER)415415- list_add_tail(&ipvs->sync_buff->list,416416- &ipvs->sync_queue);417417- else418418- ip_vs_sync_buff_release(ipvs->sync_buff);419419- spin_unlock_bh(&ipvs->sync_lock);420420- }421421- spin_unlock_bh(&ipvs->sync_buff_lock);392392+ return ((long) cp >> (1 + ilog2(sizeof(*cp)))) & ipvs->threads_mask;422393}423394424395/*···425442 return sb;426443}427444445445+/* Check if conn should be synced.446446+ * pkts: conn packets, use sysctl_sync_threshold to avoid packet check447447+ * - (1) sync_refresh_period: reduce sync rate. Additionally, retry448448+ * sync_retries times with period of sync_refresh_period/8449449+ * - (2) if both sync_refresh_period and sync_period are 0 send sync only450450+ * for state changes or only once when pkts matches sync_threshold451451+ * - (3) templates: rate can be reduced only with sync_refresh_period or452452+ * with (2)453453+ */454454+static int ip_vs_sync_conn_needed(struct netns_ipvs *ipvs,455455+ struct ip_vs_conn *cp, int pkts)456456+{457457+ unsigned long orig = ACCESS_ONCE(cp->sync_endtime);458458+ unsigned long now = jiffies;459459+ unsigned long n = (now + cp->timeout) & ~3UL;460460+ unsigned int sync_refresh_period;461461+ int sync_period;462462+ int force;463463+464464+ /* Check if we sync in current state */465465+ if (unlikely(cp->flags & IP_VS_CONN_F_TEMPLATE))466466+ force = 0;467467+ else if (likely(cp->protocol == IPPROTO_TCP)) {468468+ if (!((1 << cp->state) &469469+ ((1 << IP_VS_TCP_S_ESTABLISHED) |470470+ (1 << IP_VS_TCP_S_FIN_WAIT) |471471+ (1 << IP_VS_TCP_S_CLOSE) |472472+ (1 << IP_VS_TCP_S_CLOSE_WAIT) |473473+ (1 << IP_VS_TCP_S_TIME_WAIT))))474474+ return 0;475475+ force = cp->state != cp->old_state;476476+ if (force && cp->state != IP_VS_TCP_S_ESTABLISHED)477477+ goto set;478478+ } else if (unlikely(cp->protocol == IPPROTO_SCTP)) {479479+ if (!((1 << cp->state) &480480+ ((1 << IP_VS_SCTP_S_ESTABLISHED) |481481+ (1 << IP_VS_SCTP_S_CLOSED) |482482+ (1 << IP_VS_SCTP_S_SHUT_ACK_CLI) |483483+ (1 << IP_VS_SCTP_S_SHUT_ACK_SER))))484484+ return 0;485485+ force = cp->state != cp->old_state;486486+ if (force && cp->state != IP_VS_SCTP_S_ESTABLISHED)487487+ goto set;488488+ } else {489489+ /* UDP or another protocol with single state */490490+ force = 0;491491+ }492492+493493+ sync_refresh_period = sysctl_sync_refresh_period(ipvs);494494+ if (sync_refresh_period > 0) {495495+ long diff = n - orig;496496+ long min_diff = max(cp->timeout >> 1, 10UL * HZ);497497+498498+ /* Avoid sync if difference is below sync_refresh_period499499+ * and below the half timeout.500500+ */501501+ if (abs(diff) < min_t(long, sync_refresh_period, min_diff)) {502502+ int retries = orig & 3;503503+504504+ if (retries >= sysctl_sync_retries(ipvs))505505+ return 0;506506+ if (time_before(now, orig - cp->timeout +507507+ (sync_refresh_period >> 3)))508508+ return 0;509509+ n |= retries + 1;510510+ }511511+ }512512+ sync_period = sysctl_sync_period(ipvs);513513+ if (sync_period > 0) {514514+ if (!(cp->flags & IP_VS_CONN_F_TEMPLATE) &&515515+ pkts % sync_period != sysctl_sync_threshold(ipvs))516516+ return 0;517517+ } else if (sync_refresh_period <= 0 &&518518+ pkts != sysctl_sync_threshold(ipvs))519519+ return 0;520520+521521+set:522522+ cp->old_state = cp->state;523523+ n = cmpxchg(&cp->sync_endtime, orig, n);524524+ return n == orig || force;525525+}526526+428527/*429528 * Version 0 , could be switched in by sys_ctl.430529 * Add an ip_vs_conn information into the current sync_buff.431530 */432432-void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp)531531+static void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp,532532+ int pkts)433533{434534 struct netns_ipvs *ipvs = net_ipvs(net);435535 struct ip_vs_sync_mesg_v0 *m;436536 struct ip_vs_sync_conn_v0 *s;537537+ struct ip_vs_sync_buff *buff;538538+ struct ipvs_master_sync_state *ms;539539+ int id;437540 int len;438541439542 if (unlikely(cp->af != AF_INET))···528459 if (cp->flags & IP_VS_CONN_F_ONE_PACKET)529460 return;530461462462+ if (!ip_vs_sync_conn_needed(ipvs, cp, pkts))463463+ return;464464+531465 spin_lock(&ipvs->sync_buff_lock);532532- if (!ipvs->sync_buff) {533533- ipvs->sync_buff =534534- ip_vs_sync_buff_create_v0(ipvs);535535- if (!ipvs->sync_buff) {466466+ if (!(ipvs->sync_state & IP_VS_STATE_MASTER)) {467467+ spin_unlock(&ipvs->sync_buff_lock);468468+ return;469469+ }470470+471471+ id = select_master_thread_id(ipvs, cp);472472+ ms = &ipvs->ms[id];473473+ buff = ms->sync_buff;474474+ if (buff) {475475+ m = (struct ip_vs_sync_mesg_v0 *) buff->mesg;476476+ /* Send buffer if it is for v1 */477477+ if (!m->nr_conns) {478478+ sb_queue_tail(ipvs, ms);479479+ ms->sync_buff = NULL;480480+ buff = NULL;481481+ }482482+ }483483+ if (!buff) {484484+ buff = ip_vs_sync_buff_create_v0(ipvs);485485+ if (!buff) {536486 spin_unlock(&ipvs->sync_buff_lock);537487 pr_err("ip_vs_sync_buff_create failed.\n");538488 return;539489 }490490+ ms->sync_buff = buff;540491 }541492542493 len = (cp->flags & IP_VS_CONN_F_SEQ_MASK) ? FULL_CONN_SIZE :543494 SIMPLE_CONN_SIZE;544544- m = (struct ip_vs_sync_mesg_v0 *)ipvs->sync_buff->mesg;545545- s = (struct ip_vs_sync_conn_v0 *)ipvs->sync_buff->head;495495+ m = (struct ip_vs_sync_mesg_v0 *) buff->mesg;496496+ s = (struct ip_vs_sync_conn_v0 *) buff->head;546497547498 /* copy members */548499 s->reserved = 0;···583494584495 m->nr_conns++;585496 m->size += len;586586- ipvs->sync_buff->head += len;497497+ buff->head += len;587498588499 /* check if there is a space for next one */589589- if (ipvs->sync_buff->head + FULL_CONN_SIZE > ipvs->sync_buff->end) {590590- sb_queue_tail(ipvs);591591- ipvs->sync_buff = NULL;500500+ if (buff->head + FULL_CONN_SIZE > buff->end) {501501+ sb_queue_tail(ipvs, ms);502502+ ms->sync_buff = NULL;592503 }593504 spin_unlock(&ipvs->sync_buff_lock);594505595506 /* synchronize its controller if it has */596596- if (cp->control)597597- ip_vs_sync_conn(net, cp->control);507507+ cp = cp->control;508508+ if (cp) {509509+ if (cp->flags & IP_VS_CONN_F_TEMPLATE)510510+ pkts = atomic_add_return(1, &cp->in_pkts);511511+ else512512+ pkts = sysctl_sync_threshold(ipvs);513513+ ip_vs_sync_conn(net, cp->control, pkts);514514+ }598515}599516600517/*···608513 * Called by ip_vs_in.609514 * Sending Version 1 messages610515 */611611-void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp)516516+void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp, int pkts)612517{613518 struct netns_ipvs *ipvs = net_ipvs(net);614519 struct ip_vs_sync_mesg *m;615520 union ip_vs_sync_conn *s;521521+ struct ip_vs_sync_buff *buff;522522+ struct ipvs_master_sync_state *ms;523523+ int id;616524 __u8 *p;617525 unsigned int len, pe_name_len, pad;618526619527 /* Handle old version of the protocol */620528 if (sysctl_sync_ver(ipvs) == 0) {621621- ip_vs_sync_conn_v0(net, cp);529529+ ip_vs_sync_conn_v0(net, cp, pkts);622530 return;623531 }624532 /* Do not sync ONE PACKET */625533 if (cp->flags & IP_VS_CONN_F_ONE_PACKET)626534 goto control;627535sloop:536536+ if (!ip_vs_sync_conn_needed(ipvs, cp, pkts))537537+ goto control;538538+628539 /* Sanity checks */629540 pe_name_len = 0;630541 if (cp->pe_data_len) {···642541 }643542644543 spin_lock(&ipvs->sync_buff_lock);544544+ if (!(ipvs->sync_state & IP_VS_STATE_MASTER)) {545545+ spin_unlock(&ipvs->sync_buff_lock);546546+ return;547547+ }548548+549549+ id = select_master_thread_id(ipvs, cp);550550+ ms = &ipvs->ms[id];645551646552#ifdef CONFIG_IP_VS_IPV6647553 if (cp->af == AF_INET6)···667559668560 /* check if there is a space for this one */669561 pad = 0;670670- if (ipvs->sync_buff) {671671- pad = (4 - (size_t)ipvs->sync_buff->head) & 3;672672- if (ipvs->sync_buff->head + len + pad > ipvs->sync_buff->end) {673673- sb_queue_tail(ipvs);674674- ipvs->sync_buff = NULL;562562+ buff = ms->sync_buff;563563+ if (buff) {564564+ m = buff->mesg;565565+ pad = (4 - (size_t) buff->head) & 3;566566+ /* Send buffer if it is for v0 */567567+ if (buff->head + len + pad > buff->end || m->reserved) {568568+ sb_queue_tail(ipvs, ms);569569+ ms->sync_buff = NULL;570570+ buff = NULL;675571 pad = 0;676572 }677573 }678574679679- if (!ipvs->sync_buff) {680680- ipvs->sync_buff = ip_vs_sync_buff_create(ipvs);681681- if (!ipvs->sync_buff) {575575+ if (!buff) {576576+ buff = ip_vs_sync_buff_create(ipvs);577577+ if (!buff) {682578 spin_unlock(&ipvs->sync_buff_lock);683579 pr_err("ip_vs_sync_buff_create failed.\n");684580 return;685581 }582582+ ms->sync_buff = buff;583583+ m = buff->mesg;686584 }687585688688- m = ipvs->sync_buff->mesg;689689- p = ipvs->sync_buff->head;690690- ipvs->sync_buff->head += pad + len;586586+ p = buff->head;587587+ buff->head += pad + len;691588 m->size += pad + len;692589 /* Add ev. padding from prev. sync_conn */693590 while (pad--)···757644 cp = cp->control;758645 if (!cp)759646 return;760760- /*761761- * Reduce sync rate for templates762762- * i.e only increment in_pkts for Templates.763763- */764764- if (cp->flags & IP_VS_CONN_F_TEMPLATE) {765765- int pkts = atomic_add_return(1, &cp->in_pkts);766766-767767- if (pkts % sysctl_sync_period(ipvs) != 1)768768- return;769769- }647647+ if (cp->flags & IP_VS_CONN_F_TEMPLATE)648648+ pkts = atomic_add_return(1, &cp->in_pkts);649649+ else650650+ pkts = sysctl_sync_threshold(ipvs);770651 goto sloop;771652}772653···838731 else839732 cp = ip_vs_ct_in_get(param);840733841841- if (cp && param->pe_data) /* Free pe_data */734734+ if (cp) {735735+ /* Free pe_data */842736 kfree(param->pe_data);843843- if (!cp) {737737+738738+ dest = cp->dest;739739+ spin_lock(&cp->lock);740740+ if ((cp->flags ^ flags) & IP_VS_CONN_F_INACTIVE &&741741+ !(flags & IP_VS_CONN_F_TEMPLATE) && dest) {742742+ if (flags & IP_VS_CONN_F_INACTIVE) {743743+ atomic_dec(&dest->activeconns);744744+ atomic_inc(&dest->inactconns);745745+ } else {746746+ atomic_inc(&dest->activeconns);747747+ atomic_dec(&dest->inactconns);748748+ }749749+ }750750+ flags &= IP_VS_CONN_F_BACKUP_UPD_MASK;751751+ flags |= cp->flags & ~IP_VS_CONN_F_BACKUP_UPD_MASK;752752+ cp->flags = flags;753753+ spin_unlock(&cp->lock);754754+ if (!dest) {755755+ dest = ip_vs_try_bind_dest(cp);756756+ if (dest)757757+ atomic_dec(&dest->refcnt);758758+ }759759+ } else {844760 /*845761 * Find the appropriate destination for the connection.846762 * If it is not found the connection will remain unbound···872742 dest = ip_vs_find_dest(net, type, daddr, dport, param->vaddr,873743 param->vport, protocol, fwmark, flags);874744875875- /* Set the approprite ativity flag */876876- if (protocol == IPPROTO_TCP) {877877- if (state != IP_VS_TCP_S_ESTABLISHED)878878- flags |= IP_VS_CONN_F_INACTIVE;879879- else880880- flags &= ~IP_VS_CONN_F_INACTIVE;881881- } else if (protocol == IPPROTO_SCTP) {882882- if (state != IP_VS_SCTP_S_ESTABLISHED)883883- flags |= IP_VS_CONN_F_INACTIVE;884884- else885885- flags &= ~IP_VS_CONN_F_INACTIVE;886886- }887745 cp = ip_vs_conn_new(param, daddr, dport, flags, dest, fwmark);888746 if (dest)889747 atomic_dec(&dest->refcnt);···880762 kfree(param->pe_data);881763 IP_VS_DBG(2, "BACKUP, add new conn. failed\n");882764 return;883883- }884884- } else if (!cp->dest) {885885- dest = ip_vs_try_bind_dest(cp);886886- if (dest)887887- atomic_dec(&dest->refcnt);888888- } else if ((cp->dest) && (cp->protocol == IPPROTO_TCP) &&889889- (cp->state != state)) {890890- /* update active/inactive flag for the connection */891891- dest = cp->dest;892892- if (!(cp->flags & IP_VS_CONN_F_INACTIVE) &&893893- (state != IP_VS_TCP_S_ESTABLISHED)) {894894- atomic_dec(&dest->activeconns);895895- atomic_inc(&dest->inactconns);896896- cp->flags |= IP_VS_CONN_F_INACTIVE;897897- } else if ((cp->flags & IP_VS_CONN_F_INACTIVE) &&898898- (state == IP_VS_TCP_S_ESTABLISHED)) {899899- atomic_inc(&dest->activeconns);900900- atomic_dec(&dest->inactconns);901901- cp->flags &= ~IP_VS_CONN_F_INACTIVE;902902- }903903- } else if ((cp->dest) && (cp->protocol == IPPROTO_SCTP) &&904904- (cp->state != state)) {905905- dest = cp->dest;906906- if (!(cp->flags & IP_VS_CONN_F_INACTIVE) &&907907- (state != IP_VS_SCTP_S_ESTABLISHED)) {908908- atomic_dec(&dest->activeconns);909909- atomic_inc(&dest->inactconns);910910- cp->flags &= ~IP_VS_CONN_F_INACTIVE;911765 }912766 }913767···123911491240115012411151/*11521152+ * Setup sndbuf (mode=1) or rcvbuf (mode=0)11531153+ */11541154+static void set_sock_size(struct sock *sk, int mode, int val)11551155+{11561156+ /* setsockopt(sock, SOL_SOCKET, SO_SNDBUF, &val, sizeof(val)); */11571157+ /* setsockopt(sock, SOL_SOCKET, SO_RCVBUF, &val, sizeof(val)); */11581158+ lock_sock(sk);11591159+ if (mode) {11601160+ val = clamp_t(int, val, (SOCK_MIN_SNDBUF + 1) / 2,11611161+ sysctl_wmem_max);11621162+ sk->sk_sndbuf = val * 2;11631163+ sk->sk_userlocks |= SOCK_SNDBUF_LOCK;11641164+ } else {11651165+ val = clamp_t(int, val, (SOCK_MIN_RCVBUF + 1) / 2,11661166+ sysctl_rmem_max);11671167+ sk->sk_rcvbuf = val * 2;11681168+ sk->sk_userlocks |= SOCK_RCVBUF_LOCK;11691169+ }11701170+ release_sock(sk);11711171+}11721172+11731173+/*12421174 * Setup loopback of outgoing multicasts on a sending socket12431175 */12441176static void set_mcast_loop(struct sock *sk, u_char loop)···14101298/*14111299 * Set up sending multicast socket over UDP14121300 */14131413-static struct socket *make_send_sock(struct net *net)13011301+static struct socket *make_send_sock(struct net *net, int id)14141302{14151303 struct netns_ipvs *ipvs = net_ipvs(net);13041304+ /* multicast addr */13051305+ struct sockaddr_in mcast_addr = {13061306+ .sin_family = AF_INET,13071307+ .sin_port = cpu_to_be16(IP_VS_SYNC_PORT + id),13081308+ .sin_addr.s_addr = cpu_to_be32(IP_VS_SYNC_GROUP),13091309+ };14161310 struct socket *sock;14171311 int result;14181312···1442132414431325 set_mcast_loop(sock->sk, 0);14441326 set_mcast_ttl(sock->sk, 1);13271327+ result = sysctl_sync_sock_size(ipvs);13281328+ if (result > 0)13291329+ set_sock_size(sock->sk, 1, result);1445133014461331 result = bind_mcastif_addr(sock, ipvs->master_mcast_ifn);14471332 if (result < 0) {···14701349/*14711350 * Set up receiving multicast socket over UDP14721351 */14731473-static struct socket *make_receive_sock(struct net *net)13521352+static struct socket *make_receive_sock(struct net *net, int id)14741353{14751354 struct netns_ipvs *ipvs = net_ipvs(net);13551355+ /* multicast addr */13561356+ struct sockaddr_in mcast_addr = {13571357+ .sin_family = AF_INET,13581358+ .sin_port = cpu_to_be16(IP_VS_SYNC_PORT + id),13591359+ .sin_addr.s_addr = cpu_to_be32(IP_VS_SYNC_GROUP),13601360+ };14761361 struct socket *sock;14771362 int result;14781363···14961369 sk_change_net(sock->sk, net);14971370 /* it is equivalent to the REUSEADDR option in user-space */14981371 sock->sk->sk_reuse = SK_CAN_REUSE;13721372+ result = sysctl_sync_sock_size(ipvs);13731373+ if (result > 0)13741374+ set_sock_size(sock->sk, 0, result);1499137515001376 result = sock->ops->bind(sock, (struct sockaddr *) &mcast_addr,15011377 sizeof(struct sockaddr));···15411411 return len;15421412}1543141315441544-static void14141414+static int15451415ip_vs_send_sync_msg(struct socket *sock, struct ip_vs_sync_mesg *msg)15461416{15471417 int msize;14181418+ int ret;1548141915491420 msize = msg->size;1550142115511422 /* Put size in network byte order */15521423 msg->size = htons(msg->size);1553142415541554- if (ip_vs_send_async(sock, (char *)msg, msize) != msize)15551555- pr_err("ip_vs_send_async error\n");14251425+ ret = ip_vs_send_async(sock, (char *)msg, msize);14261426+ if (ret >= 0 || ret == -EAGAIN)14271427+ return ret;14281428+ pr_err("ip_vs_send_async error %d\n", ret);14291429+ return 0;15561430}1557143115581432static int···15721438 iov.iov_base = buffer;15731439 iov.iov_len = (size_t)buflen;1574144015751575- len = kernel_recvmsg(sock, &msg, &iov, 1, buflen, 0);14411441+ len = kernel_recvmsg(sock, &msg, &iov, 1, buflen, MSG_DONTWAIT);1576144215771443 if (len < 0)15781578- return -1;14441444+ return len;1579144515801446 LeaveFunction(7);15811447 return len;15821448}1583144914501450+/* Wakeup the master thread for sending */14511451+static void master_wakeup_work_handler(struct work_struct *work)14521452+{14531453+ struct ipvs_master_sync_state *ms =14541454+ container_of(work, struct ipvs_master_sync_state,14551455+ master_wakeup_work.work);14561456+ struct netns_ipvs *ipvs = ms->ipvs;14571457+14581458+ spin_lock_bh(&ipvs->sync_lock);14591459+ if (ms->sync_queue_len &&14601460+ ms->sync_queue_delay < IPVS_SYNC_WAKEUP_RATE) {14611461+ ms->sync_queue_delay = IPVS_SYNC_WAKEUP_RATE;14621462+ wake_up_process(ms->master_thread);14631463+ }14641464+ spin_unlock_bh(&ipvs->sync_lock);14651465+}14661466+14671467+/* Get next buffer to send */14681468+static inline struct ip_vs_sync_buff *14691469+next_sync_buff(struct netns_ipvs *ipvs, struct ipvs_master_sync_state *ms)14701470+{14711471+ struct ip_vs_sync_buff *sb;14721472+14731473+ sb = sb_dequeue(ipvs, ms);14741474+ if (sb)14751475+ return sb;14761476+ /* Do not delay entries in buffer for more than 2 seconds */14771477+ return get_curr_sync_buff(ipvs, ms, IPVS_SYNC_FLUSH_TIME);14781478+}1584147915851480static int sync_thread_master(void *data)15861481{15871482 struct ip_vs_sync_thread_data *tinfo = data;15881483 struct netns_ipvs *ipvs = net_ipvs(tinfo->net);14841484+ struct ipvs_master_sync_state *ms = &ipvs->ms[tinfo->id];14851485+ struct sock *sk = tinfo->sock->sk;15891486 struct ip_vs_sync_buff *sb;1590148715911488 pr_info("sync thread started: state = MASTER, mcast_ifn = %s, "15921592- "syncid = %d\n",15931593- ipvs->master_mcast_ifn, ipvs->master_syncid);14891489+ "syncid = %d, id = %d\n",14901490+ ipvs->master_mcast_ifn, ipvs->master_syncid, tinfo->id);1594149115951595- while (!kthread_should_stop()) {15961596- while ((sb = sb_dequeue(ipvs))) {15971597- ip_vs_send_sync_msg(tinfo->sock, sb->mesg);15981598- ip_vs_sync_buff_release(sb);14921492+ for (;;) {14931493+ sb = next_sync_buff(ipvs, ms);14941494+ if (unlikely(kthread_should_stop()))14951495+ break;14961496+ if (!sb) {14971497+ schedule_timeout(IPVS_SYNC_CHECK_PERIOD);14981498+ continue;15991499 }15001500+ while (ip_vs_send_sync_msg(tinfo->sock, sb->mesg) < 0) {15011501+ int ret = 0;1600150216011601- /* check if entries stay in ipvs->sync_buff for 2 seconds */16021602- sb = get_curr_sync_buff(ipvs, 2 * HZ);16031603- if (sb) {16041604- ip_vs_send_sync_msg(tinfo->sock, sb->mesg);16051605- ip_vs_sync_buff_release(sb);15031503+ __wait_event_interruptible(*sk_sleep(sk),15041504+ sock_writeable(sk) ||15051505+ kthread_should_stop(),15061506+ ret);15071507+ if (unlikely(kthread_should_stop()))15081508+ goto done;16061509 }16071607-16081608- schedule_timeout_interruptible(HZ);15101510+ ip_vs_sync_buff_release(sb);16091511 }1610151216111611- /* clean up the sync_buff queue */16121612- while ((sb = sb_dequeue(ipvs)))15131513+done:15141514+ __set_current_state(TASK_RUNNING);15151515+ if (sb)16131516 ip_vs_sync_buff_release(sb);1614151715181518+ /* clean up the sync_buff queue */15191519+ while ((sb = sb_dequeue(ipvs, ms)))15201520+ ip_vs_sync_buff_release(sb);15211521+ __set_current_state(TASK_RUNNING);15221522+16151523 /* clean up the current sync_buff */16161616- sb = get_curr_sync_buff(ipvs, 0);15241524+ sb = get_curr_sync_buff(ipvs, ms, 0);16171525 if (sb)16181526 ip_vs_sync_buff_release(sb);16191527···16741498 int len;1675149916761500 pr_info("sync thread started: state = BACKUP, mcast_ifn = %s, "16771677- "syncid = %d\n",16781678- ipvs->backup_mcast_ifn, ipvs->backup_syncid);15011501+ "syncid = %d, id = %d\n",15021502+ ipvs->backup_mcast_ifn, ipvs->backup_syncid, tinfo->id);1679150316801504 while (!kthread_should_stop()) {16811505 wait_event_interruptible(*sk_sleep(tinfo->sock->sk),···16871511 len = ip_vs_receive(tinfo->sock, tinfo->buf,16881512 ipvs->recv_mesg_maxlen);16891513 if (len <= 0) {16901690- pr_err("receiving message error\n");15141514+ if (len != -EAGAIN)15151515+ pr_err("receiving message error\n");16911516 break;16921517 }16931518···17121535int start_sync_thread(struct net *net, int state, char *mcast_ifn, __u8 syncid)17131536{17141537 struct ip_vs_sync_thread_data *tinfo;17151715- struct task_struct **realtask, *task;15381538+ struct task_struct **array = NULL, *task;17161539 struct socket *sock;17171540 struct netns_ipvs *ipvs = net_ipvs(net);17181718- char *name, *buf = NULL;15411541+ char *name;17191542 int (*threadfn)(void *data);15431543+ int id, count;17201544 int result = -ENOMEM;1721154517221546 IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current));17231547 IP_VS_DBG(7, "Each ip_vs_sync_conn entry needs %Zd bytes\n",17241548 sizeof(struct ip_vs_sync_conn_v0));1725154915501550+ if (!ipvs->sync_state) {15511551+ count = clamp(sysctl_sync_ports(ipvs), 1, IPVS_SYNC_PORTS_MAX);15521552+ ipvs->threads_mask = count - 1;15531553+ } else15541554+ count = ipvs->threads_mask + 1;1726155517271556 if (state == IP_VS_STATE_MASTER) {17281728- if (ipvs->master_thread)15571557+ if (ipvs->ms)17291558 return -EEXIST;1730155917311560 strlcpy(ipvs->master_mcast_ifn, mcast_ifn,17321561 sizeof(ipvs->master_mcast_ifn));17331562 ipvs->master_syncid = syncid;17341734- realtask = &ipvs->master_thread;17351735- name = "ipvs_master:%d";15631563+ name = "ipvs-m:%d:%d";17361564 threadfn = sync_thread_master;17371737- sock = make_send_sock(net);17381565 } else if (state == IP_VS_STATE_BACKUP) {17391739- if (ipvs->backup_thread)15661566+ if (ipvs->backup_threads)17401567 return -EEXIST;1741156817421569 strlcpy(ipvs->backup_mcast_ifn, mcast_ifn,17431570 sizeof(ipvs->backup_mcast_ifn));17441571 ipvs->backup_syncid = syncid;17451745- realtask = &ipvs->backup_thread;17461746- name = "ipvs_backup:%d";15721572+ name = "ipvs-b:%d:%d";17471573 threadfn = sync_thread_backup;17481748- sock = make_receive_sock(net);17491574 } else {17501575 return -EINVAL;17511576 }1752157717531753- if (IS_ERR(sock)) {17541754- result = PTR_ERR(sock);17551755- goto out;17561756- }15781578+ if (state == IP_VS_STATE_MASTER) {15791579+ struct ipvs_master_sync_state *ms;1757158015811581+ ipvs->ms = kzalloc(count * sizeof(ipvs->ms[0]), GFP_KERNEL);15821582+ if (!ipvs->ms)15831583+ goto out;15841584+ ms = ipvs->ms;15851585+ for (id = 0; id < count; id++, ms++) {15861586+ INIT_LIST_HEAD(&ms->sync_queue);15871587+ ms->sync_queue_len = 0;15881588+ ms->sync_queue_delay = 0;15891589+ INIT_DELAYED_WORK(&ms->master_wakeup_work,15901590+ master_wakeup_work_handler);15911591+ ms->ipvs = ipvs;15921592+ }15931593+ } else {15941594+ array = kzalloc(count * sizeof(struct task_struct *),15951595+ GFP_KERNEL);15961596+ if (!array)15971597+ goto out;15981598+ }17581599 set_sync_mesg_maxlen(net, state);17591759- if (state == IP_VS_STATE_BACKUP) {17601760- buf = kmalloc(ipvs->recv_mesg_maxlen, GFP_KERNEL);17611761- if (!buf)16001600+16011601+ tinfo = NULL;16021602+ for (id = 0; id < count; id++) {16031603+ if (state == IP_VS_STATE_MASTER)16041604+ sock = make_send_sock(net, id);16051605+ else16061606+ sock = make_receive_sock(net, id);16071607+ if (IS_ERR(sock)) {16081608+ result = PTR_ERR(sock);16091609+ goto outtinfo;16101610+ }16111611+ tinfo = kmalloc(sizeof(*tinfo), GFP_KERNEL);16121612+ if (!tinfo)17621613 goto outsocket;17631763- }16141614+ tinfo->net = net;16151615+ tinfo->sock = sock;16161616+ if (state == IP_VS_STATE_BACKUP) {16171617+ tinfo->buf = kmalloc(ipvs->recv_mesg_maxlen,16181618+ GFP_KERNEL);16191619+ if (!tinfo->buf)16201620+ goto outtinfo;16211621+ }16221622+ tinfo->id = id;1764162317651765- tinfo = kmalloc(sizeof(*tinfo), GFP_KERNEL);17661766- if (!tinfo)17671767- goto outbuf;17681768-17691769- tinfo->net = net;17701770- tinfo->sock = sock;17711771- tinfo->buf = buf;17721772-17731773- task = kthread_run(threadfn, tinfo, name, ipvs->gen);17741774- if (IS_ERR(task)) {17751775- result = PTR_ERR(task);17761776- goto outtinfo;16241624+ task = kthread_run(threadfn, tinfo, name, ipvs->gen, id);16251625+ if (IS_ERR(task)) {16261626+ result = PTR_ERR(task);16271627+ goto outtinfo;16281628+ }16291629+ tinfo = NULL;16301630+ if (state == IP_VS_STATE_MASTER)16311631+ ipvs->ms[id].master_thread = task;16321632+ else16331633+ array[id] = task;17771634 }1778163517791636 /* mark as active */17801780- *realtask = task;16371637+16381638+ if (state == IP_VS_STATE_BACKUP)16391639+ ipvs->backup_threads = array;16401640+ spin_lock_bh(&ipvs->sync_buff_lock);17811641 ipvs->sync_state |= state;16421642+ spin_unlock_bh(&ipvs->sync_buff_lock);1782164317831644 /* increase the module use count */17841645 ip_vs_use_count_inc();1785164617861647 return 0;1787164817881788-outtinfo:17891789- kfree(tinfo);17901790-outbuf:17911791- kfree(buf);17921649outsocket:17931650 sk_release_kernel(sock->sk);16511651+16521652+outtinfo:16531653+ if (tinfo) {16541654+ sk_release_kernel(tinfo->sock->sk);16551655+ kfree(tinfo->buf);16561656+ kfree(tinfo);16571657+ }16581658+ count = id;16591659+ while (count-- > 0) {16601660+ if (state == IP_VS_STATE_MASTER)16611661+ kthread_stop(ipvs->ms[count].master_thread);16621662+ else16631663+ kthread_stop(array[count]);16641664+ }16651665+ kfree(array);16661666+17941667out:16681668+ if (!(ipvs->sync_state & IP_VS_STATE_MASTER)) {16691669+ kfree(ipvs->ms);16701670+ ipvs->ms = NULL;16711671+ }17951672 return result;17961673}17971674···18531622int stop_sync_thread(struct net *net, int state)18541623{18551624 struct netns_ipvs *ipvs = net_ipvs(net);16251625+ struct task_struct **array;16261626+ int id;18561627 int retc = -EINVAL;1857162818581629 IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current));1859163018601631 if (state == IP_VS_STATE_MASTER) {18611861- if (!ipvs->master_thread)16321632+ if (!ipvs->ms)18621633 return -ESRCH;18631863-18641864- pr_info("stopping master sync thread %d ...\n",18651865- task_pid_nr(ipvs->master_thread));1866163418671635 /*18681636 * The lock synchronizes with sb_queue_tail(), so that we don't···18691639 * progress of stopping the master sync daemon.18701640 */1871164118721872- spin_lock_bh(&ipvs->sync_lock);16421642+ spin_lock_bh(&ipvs->sync_buff_lock);16431643+ spin_lock(&ipvs->sync_lock);18731644 ipvs->sync_state &= ~IP_VS_STATE_MASTER;18741874- spin_unlock_bh(&ipvs->sync_lock);18751875- retc = kthread_stop(ipvs->master_thread);18761876- ipvs->master_thread = NULL;16451645+ spin_unlock(&ipvs->sync_lock);16461646+ spin_unlock_bh(&ipvs->sync_buff_lock);16471647+16481648+ retc = 0;16491649+ for (id = ipvs->threads_mask; id >= 0; id--) {16501650+ struct ipvs_master_sync_state *ms = &ipvs->ms[id];16511651+ int ret;16521652+16531653+ pr_info("stopping master sync thread %d ...\n",16541654+ task_pid_nr(ms->master_thread));16551655+ cancel_delayed_work_sync(&ms->master_wakeup_work);16561656+ ret = kthread_stop(ms->master_thread);16571657+ if (retc >= 0)16581658+ retc = ret;16591659+ }16601660+ kfree(ipvs->ms);16611661+ ipvs->ms = NULL;18771662 } else if (state == IP_VS_STATE_BACKUP) {18781878- if (!ipvs->backup_thread)16631663+ if (!ipvs->backup_threads)18791664 return -ESRCH;1880166518811881- pr_info("stopping backup sync thread %d ...\n",18821882- task_pid_nr(ipvs->backup_thread));18831883-18841666 ipvs->sync_state &= ~IP_VS_STATE_BACKUP;18851885- retc = kthread_stop(ipvs->backup_thread);18861886- ipvs->backup_thread = NULL;16671667+ array = ipvs->backup_threads;16681668+ retc = 0;16691669+ for (id = ipvs->threads_mask; id >= 0; id--) {16701670+ int ret;16711671+16721672+ pr_info("stopping backup sync thread %d ...\n",16731673+ task_pid_nr(array[id]));16741674+ ret = kthread_stop(array[id]);16751675+ if (retc >= 0)16761676+ retc = ret;16771677+ }16781678+ kfree(array);16791679+ ipvs->backup_threads = NULL;18871680 }1888168118891682 /* decrease the module use count */···19231670 struct netns_ipvs *ipvs = net_ipvs(net);1924167119251672 __mutex_init(&ipvs->sync_mutex, "ipvs->sync_mutex", &__ipvs_sync_key);19261926- INIT_LIST_HEAD(&ipvs->sync_queue);19271673 spin_lock_init(&ipvs->sync_lock);19281674 spin_lock_init(&ipvs->sync_buff_lock);19291929-19301930- ipvs->sync_mcast_addr.sin_family = AF_INET;19311931- ipvs->sync_mcast_addr.sin_port = cpu_to_be16(IP_VS_SYNC_PORT);19321932- ipvs->sync_mcast_addr.sin_addr.s_addr = cpu_to_be32(IP_VS_SYNC_GROUP);19331675 return 0;19341676}19351677
+1-1
net/netfilter/ipvs/ip_vs_wrr.c
···8484 /*8585 * Allocate the mark variable for WRR scheduling8686 */8787- mark = kmalloc(sizeof(struct ip_vs_wrr_mark), GFP_ATOMIC);8787+ mark = kmalloc(sizeof(struct ip_vs_wrr_mark), GFP_KERNEL);8888 if (mark == NULL)8989 return -ENOMEM;9090
+7-8
net/netfilter/nf_conntrack_core.c
···13361336 while (untrack_refs() > 0)13371337 schedule();1338133813391339- nf_conntrack_helper_fini();13401339 nf_conntrack_proto_fini();13411340#ifdef CONFIG_NF_CONNTRACK_ZONES13421341 nf_ct_extend_unregister(&nf_ct_zone_extend);···13531354 }1354135513551356 nf_ct_free_hashtable(net->ct.hash, net->ct.htable_size);13571357+ nf_conntrack_helper_fini(net);13561358 nf_conntrack_timeout_fini(net);13571359 nf_conntrack_ecache_fini(net);13581360 nf_conntrack_tstamp_fini(net);···15041504 if (ret < 0)15051505 goto err_proto;1506150615071507- ret = nf_conntrack_helper_init();15081508- if (ret < 0)15091509- goto err_helper;15101510-15111507#ifdef CONFIG_NF_CONNTRACK_ZONES15121508 ret = nf_ct_extend_register(&nf_ct_zone_extend);15131509 if (ret < 0)···1521152515221526#ifdef CONFIG_NF_CONNTRACK_ZONES15231527err_extend:15241524- nf_conntrack_helper_fini();15251525-#endif15261526-err_helper:15271528 nf_conntrack_proto_fini();15291529+#endif15281530err_proto:15291531 return ret;15301532}···15831589 ret = nf_conntrack_timeout_init(net);15841590 if (ret < 0)15851591 goto err_timeout;15921592+ ret = nf_conntrack_helper_init(net);15931593+ if (ret < 0)15941594+ goto err_helper;1586159515871596 return 0;1588159715981598+err_helper:15991599+ nf_conntrack_timeout_fini(net);15891600err_timeout:15901601 nf_conntrack_ecache_fini(net);15911602err_ecache:
+4-6
net/netfilter/nf_conntrack_ecache.c
···8484int nf_conntrack_register_notifier(struct net *net,8585 struct nf_ct_event_notifier *new)8686{8787- int ret = 0;8787+ int ret;8888 struct nf_ct_event_notifier *notify;89899090 mutex_lock(&nf_ct_ecache_mutex);···9595 goto out_unlock;9696 }9797 rcu_assign_pointer(net->ct.nf_conntrack_event_cb, new);9898- mutex_unlock(&nf_ct_ecache_mutex);9999- return ret;9898+ ret = 0;10099101100out_unlock:102101 mutex_unlock(&nf_ct_ecache_mutex);···120121int nf_ct_expect_register_notifier(struct net *net,121122 struct nf_exp_event_notifier *new)122123{123123- int ret = 0;124124+ int ret;124125 struct nf_exp_event_notifier *notify;125126126127 mutex_lock(&nf_ct_ecache_mutex);···131132 goto out_unlock;132133 }133134 rcu_assign_pointer(net->ct.nf_expect_event_cb, new);134134- mutex_unlock(&nf_ct_ecache_mutex);135135- return ret;135135+ ret = 0;136136137137out_unlock:138138 mutex_unlock(&nf_ct_ecache_mutex);
+110-12
net/netfilter/nf_conntrack_helper.c
···3434static unsigned int nf_ct_helper_hsize __read_mostly;3535static unsigned int nf_ct_helper_count __read_mostly;36363737+static bool nf_ct_auto_assign_helper __read_mostly = true;3838+module_param_named(nf_conntrack_helper, nf_ct_auto_assign_helper, bool, 0644);3939+MODULE_PARM_DESC(nf_conntrack_helper,4040+ "Enable automatic conntrack helper assignment (default 1)");4141+4242+#ifdef CONFIG_SYSCTL4343+static struct ctl_table helper_sysctl_table[] = {4444+ {4545+ .procname = "nf_conntrack_helper",4646+ .data = &init_net.ct.sysctl_auto_assign_helper,4747+ .maxlen = sizeof(unsigned int),4848+ .mode = 0644,4949+ .proc_handler = proc_dointvec,5050+ },5151+ {}5252+};5353+5454+static int nf_conntrack_helper_init_sysctl(struct net *net)5555+{5656+ struct ctl_table *table;5757+5858+ table = kmemdup(helper_sysctl_table, sizeof(helper_sysctl_table),5959+ GFP_KERNEL);6060+ if (!table)6161+ goto out;6262+6363+ table[0].data = &net->ct.sysctl_auto_assign_helper;6464+6565+ net->ct.helper_sysctl_header =6666+ register_net_sysctl(net, "net/netfilter", table);6767+6868+ if (!net->ct.helper_sysctl_header) {6969+ pr_err("nf_conntrack_helper: can't register to sysctl.\n");7070+ goto out_register;7171+ }7272+ return 0;7373+7474+out_register:7575+ kfree(table);7676+out:7777+ return -ENOMEM;7878+}7979+8080+static void nf_conntrack_helper_fini_sysctl(struct net *net)8181+{8282+ struct ctl_table *table;8383+8484+ table = net->ct.helper_sysctl_header->ctl_table_arg;8585+ unregister_net_sysctl_table(net->ct.helper_sysctl_header);8686+ kfree(table);8787+}8888+#else8989+static int nf_conntrack_helper_init_sysctl(struct net *net)9090+{9191+ return 0;9292+}9393+9494+static void nf_conntrack_helper_fini_sysctl(struct net *net)9595+{9696+}9797+#endif /* CONFIG_SYSCTL */37983899/* Stupid hash, but collision free for the default registrations of the39100 * helpers currently in the kernel. */···179118{180119 struct nf_conntrack_helper *helper = NULL;181120 struct nf_conn_help *help;121121+ struct net *net = nf_ct_net(ct);182122 int ret = 0;123123+124124+ /* We already got a helper explicitly attached. The function125125+ * nf_conntrack_alter_reply - in case NAT is in use - asks for looking126126+ * the helper up again. Since now the user is in full control of127127+ * making consistent helper configurations, skip this automatic128128+ * re-lookup, otherwise we'll lose the helper.129129+ */130130+ if (test_bit(IPS_HELPER_BIT, &ct->status))131131+ return 0;183132184133 if (tmpl != NULL) {185134 help = nfct_help(tmpl);186186- if (help != NULL)135135+ if (help != NULL) {187136 helper = help->helper;137137+ set_bit(IPS_HELPER_BIT, &ct->status);138138+ }188139 }189140190141 help = nfct_help(ct);191191- if (helper == NULL)142142+ if (net->ct.sysctl_auto_assign_helper && helper == NULL) {192143 helper = __nf_ct_helper_find(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);144144+ if (unlikely(!net->ct.auto_assign_helper_warned && helper)) {145145+ pr_info("nf_conntrack: automatic helper "146146+ "assignment is deprecated and it will "147147+ "be removed soon. Use the iptables CT target "148148+ "to attach helpers instead.\n");149149+ net->ct.auto_assign_helper_warned = true;150150+ }151151+ }152152+193153 if (helper == NULL) {194154 if (help)195155 RCU_INIT_POINTER(help->helper, NULL);···397315 .id = NF_CT_EXT_HELPER,398316};399317400400-int nf_conntrack_helper_init(void)318318+int nf_conntrack_helper_init(struct net *net)401319{402320 int err;403321404404- nf_ct_helper_hsize = 1; /* gets rounded up to use one page */405405- nf_ct_helper_hash = nf_ct_alloc_hashtable(&nf_ct_helper_hsize, 0);406406- if (!nf_ct_helper_hash)407407- return -ENOMEM;322322+ net->ct.auto_assign_helper_warned = false;323323+ net->ct.sysctl_auto_assign_helper = nf_ct_auto_assign_helper;408324409409- err = nf_ct_extend_register(&helper_extend);325325+ if (net_eq(net, &init_net)) {326326+ nf_ct_helper_hsize = 1; /* gets rounded up to use one page */327327+ nf_ct_helper_hash =328328+ nf_ct_alloc_hashtable(&nf_ct_helper_hsize, 0);329329+ if (!nf_ct_helper_hash)330330+ return -ENOMEM;331331+332332+ err = nf_ct_extend_register(&helper_extend);333333+ if (err < 0)334334+ goto err1;335335+ }336336+337337+ err = nf_conntrack_helper_init_sysctl(net);410338 if (err < 0)411411- goto err1;339339+ goto out_sysctl;412340413341 return 0;414342343343+out_sysctl:344344+ if (net_eq(net, &init_net))345345+ nf_ct_extend_unregister(&helper_extend);415346err1:416347 nf_ct_free_hashtable(nf_ct_helper_hash, nf_ct_helper_hsize);417348 return err;418349}419350420420-void nf_conntrack_helper_fini(void)351351+void nf_conntrack_helper_fini(struct net *net)421352{422422- nf_ct_extend_unregister(&helper_extend);423423- nf_ct_free_hashtable(nf_ct_helper_hash, nf_ct_helper_hsize);353353+ nf_conntrack_helper_fini_sysctl(net);354354+ if (net_eq(net, &init_net)) {355355+ nf_ct_extend_unregister(&helper_extend);356356+ nf_ct_free_hashtable(nf_ct_helper_hash, nf_ct_helper_hsize);357357+ }424358}