Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next

Pablo Neira Ayuso says:

====================
Netfilter/IPVS updates for net-next

The following patchset contains Netfilter/IPVS updates for your net-next
tree:

1) No need to set ttl from reject action for the bridge family, from
Taehee Yoo.

2) Use a fixed timeout for flow that are passed up from the flowtable
to conntrack, from Florian Westphal.

3) More preparation patches for tproxy support for nf_tables, from Mate
Eckl.

4) Remove unnecessary indirection in core IPv6 checksum function, from
Florian Westphal.

5) Use nf_ct_get_tuplepr() from openvswitch, instead of opencoding it.
From Florian Westphal.

6) socket match now selects socket infrastructure, instead of depending
on it. From Mate Eckl.

7) Patch series to simplify conntrack tuple building/parsing from packet
path and ctnetlink, from Florian Westphal.

8) Fetch timeout policy from protocol helpers, instead of doing it from
core, from Florian Westphal.

9) Merge IPv4 and IPv6 protocol trackers into conntrack core, from
Florian Westphal.

10) Depend on CONFIG_NF_TABLES_IPV6 and CONFIG_IP6_NF_IPTABLES
respectively, instead of IPV6. Patch from Mate Eckl.

11) Add specific function for garbage collection in conncount,
from Yi-Hung Wei.

12) Catch number of elements in the connlimit list, from Yi-Hung Wei.

13) Move locking to nf_conncount, from Yi-Hung Wei.

14) Series of patches to add lockless tree traversal in nf_conncount,
from Yi-Hung Wei.

15) Resolve clash in matching conntracks when race happens, from
Martynas Pumputis.

16) If connection entry times out, remove template entry from the
ip_vs_conn_tab table to improve behaviour under flood, from
Julian Anastasov.

17) Remove useless parameter from nf_ct_helper_ext_add(), from Gao feng.

18) Call abort from 2-phase commit protocol before requesting modules,
make sure this is done under the mutex, from Florian Westphal.

19) Grab module reference when starting transaction, also from Florian.

20) Dynamically allocate expression info array for pre-parsing, from
Florian.

21) Add per netns mutex for nf_tables, from Florian Westphal.

22) A couple of patches to simplify and refactor nf_osf code to prepare
for nft_osf support.

23) Break evaluation on missing socket, from Mate Eckl.

24) Allow to match socket mark from nft_socket, from Mate Eckl.

25) Remove dependency on nf_defrag_ipv6, now that IPv6 tracker is
built-in into nf_conntrack. From Florian Westphal.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>

+2035 -2414
+1
include/linux/netfilter/nfnetlink.h
··· 29 29 __u8 subsys_id; /* nfnetlink subsystem ID */ 30 30 __u8 cb_count; /* number of callbacks */ 31 31 const struct nfnl_callback *cb; /* callback for individual types */ 32 + struct module *owner; 32 33 int (*commit)(struct net *net, struct sk_buff *skb); 33 34 int (*abort)(struct net *net, struct sk_buff *skb); 34 35 void (*cleanup)(struct net *net);
-11
include/linux/netfilter_ipv4.h
··· 23 23 #ifdef CONFIG_INET 24 24 __sum16 nf_ip_checksum(struct sk_buff *skb, unsigned int hook, 25 25 unsigned int dataoff, u_int8_t protocol); 26 - __sum16 nf_ip_checksum_partial(struct sk_buff *skb, unsigned int hook, 27 - unsigned int dataoff, unsigned int len, 28 - u_int8_t protocol); 29 26 int nf_ip_route(struct net *net, struct dst_entry **dst, struct flowi *fl, 30 27 bool strict); 31 28 int nf_ip_reroute(struct sk_buff *skb, const struct nf_queue_entry *entry); 32 29 #else 33 30 static inline __sum16 nf_ip_checksum(struct sk_buff *skb, unsigned int hook, 34 31 unsigned int dataoff, u_int8_t protocol) 35 - { 36 - return 0; 37 - } 38 - static inline __sum16 nf_ip_checksum_partial(struct sk_buff *skb, 39 - unsigned int hook, 40 - unsigned int dataoff, 41 - unsigned int len, 42 - u_int8_t protocol) 43 32 { 44 33 return 0; 45 34 }
-5
include/linux/netfilter_ipv6.h
··· 30 30 void (*route_input)(struct sk_buff *skb); 31 31 int (*fragment)(struct net *net, struct sock *sk, struct sk_buff *skb, 32 32 int (*output)(struct net *, struct sock *, struct sk_buff *)); 33 - __sum16 (*checksum)(struct sk_buff *skb, unsigned int hook, 34 - unsigned int dataoff, u_int8_t protocol); 35 - __sum16 (*checksum_partial)(struct sk_buff *skb, unsigned int hook, 36 - unsigned int dataoff, unsigned int len, 37 - u_int8_t protocol); 38 33 int (*route)(struct net *net, struct dst_entry **dst, struct flowi *fl, 39 34 bool strict); 40 35 int (*reroute)(struct sk_buff *skb, const struct nf_queue_entry *entry);
+17 -1
include/net/ip_vs.h
··· 335 335 IP_VS_SCTP_S_LAST 336 336 }; 337 337 338 + /* Connection templates use bits from state */ 339 + #define IP_VS_CTPL_S_NONE 0x0000 340 + #define IP_VS_CTPL_S_ASSURED 0x0001 341 + #define IP_VS_CTPL_S_LAST 0x0002 342 + 338 343 /* Delta sequence info structure 339 344 * Each ip_vs_conn has 2 (output AND input seq. changes). 340 345 * Only used in the VS/NAT. ··· 1226 1221 struct ip_vs_dest *dest, __u32 fwmark); 1227 1222 void ip_vs_conn_expire_now(struct ip_vs_conn *cp); 1228 1223 1229 - const char *ip_vs_state_name(__u16 proto, int state); 1224 + const char *ip_vs_state_name(const struct ip_vs_conn *cp); 1230 1225 1231 1226 void ip_vs_tcp_conn_listen(struct ip_vs_conn *cp); 1232 1227 int ip_vs_check_template(struct ip_vs_conn *ct, struct ip_vs_dest *cdest); ··· 1292 1287 1293 1288 cp->control = ctl_cp; 1294 1289 atomic_inc(&ctl_cp->n_control); 1290 + } 1291 + 1292 + /* Mark our template as assured */ 1293 + static inline void 1294 + ip_vs_control_assure_ct(struct ip_vs_conn *cp) 1295 + { 1296 + struct ip_vs_conn *ct = cp->control; 1297 + 1298 + if (ct && !(ct->state & IP_VS_CTPL_S_ASSURED) && 1299 + (ct->flags & IP_VS_CONN_F_TEMPLATE)) 1300 + ct->state |= IP_VS_CTPL_S_ASSURED; 1295 1301 } 1296 1302 1297 1303 /* IPVS netns init & cleanup functions */
-28
include/net/ipv6.h
··· 574 574 } 575 575 #endif 576 576 577 - struct inet_frag_queue; 578 - 579 - enum ip6_defrag_users { 580 - IP6_DEFRAG_LOCAL_DELIVER, 581 - IP6_DEFRAG_CONNTRACK_IN, 582 - __IP6_DEFRAG_CONNTRACK_IN = IP6_DEFRAG_CONNTRACK_IN + USHRT_MAX, 583 - IP6_DEFRAG_CONNTRACK_OUT, 584 - __IP6_DEFRAG_CONNTRACK_OUT = IP6_DEFRAG_CONNTRACK_OUT + USHRT_MAX, 585 - IP6_DEFRAG_CONNTRACK_BRIDGE_IN, 586 - __IP6_DEFRAG_CONNTRACK_BRIDGE_IN = IP6_DEFRAG_CONNTRACK_BRIDGE_IN + USHRT_MAX, 587 - }; 588 - 589 - void ip6_frag_init(struct inet_frag_queue *q, const void *a); 590 - extern const struct rhashtable_params ip6_rhash_params; 591 - 592 - /* 593 - * Equivalent of ipv4 struct ip 594 - */ 595 - struct frag_queue { 596 - struct inet_frag_queue q; 597 - 598 - int iif; 599 - __u16 nhoffset; 600 - u8 ecn; 601 - }; 602 - 603 - void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq); 604 - 605 577 static inline bool ipv6_addr_any(const struct in6_addr *a) 606 578 { 607 579 #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64
+104
include/net/ipv6_frag.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + #ifndef _IPV6_FRAG_H 3 + #define _IPV6_FRAG_H 4 + #include <linux/kernel.h> 5 + #include <net/addrconf.h> 6 + #include <net/ipv6.h> 7 + #include <net/inet_frag.h> 8 + 9 + enum ip6_defrag_users { 10 + IP6_DEFRAG_LOCAL_DELIVER, 11 + IP6_DEFRAG_CONNTRACK_IN, 12 + __IP6_DEFRAG_CONNTRACK_IN = IP6_DEFRAG_CONNTRACK_IN + USHRT_MAX, 13 + IP6_DEFRAG_CONNTRACK_OUT, 14 + __IP6_DEFRAG_CONNTRACK_OUT = IP6_DEFRAG_CONNTRACK_OUT + USHRT_MAX, 15 + IP6_DEFRAG_CONNTRACK_BRIDGE_IN, 16 + __IP6_DEFRAG_CONNTRACK_BRIDGE_IN = IP6_DEFRAG_CONNTRACK_BRIDGE_IN + USHRT_MAX, 17 + }; 18 + 19 + /* 20 + * Equivalent of ipv4 struct ip 21 + */ 22 + struct frag_queue { 23 + struct inet_frag_queue q; 24 + 25 + int iif; 26 + __u16 nhoffset; 27 + u8 ecn; 28 + }; 29 + 30 + #if IS_ENABLED(CONFIG_IPV6) 31 + static inline void ip6frag_init(struct inet_frag_queue *q, const void *a) 32 + { 33 + struct frag_queue *fq = container_of(q, struct frag_queue, q); 34 + const struct frag_v6_compare_key *key = a; 35 + 36 + q->key.v6 = *key; 37 + fq->ecn = 0; 38 + } 39 + 40 + static inline u32 ip6frag_key_hashfn(const void *data, u32 len, u32 seed) 41 + { 42 + return jhash2(data, 43 + sizeof(struct frag_v6_compare_key) / sizeof(u32), seed); 44 + } 45 + 46 + static inline u32 ip6frag_obj_hashfn(const void *data, u32 len, u32 seed) 47 + { 48 + const struct inet_frag_queue *fq = data; 49 + 50 + return jhash2((const u32 *)&fq->key.v6, 51 + sizeof(struct frag_v6_compare_key) / sizeof(u32), seed); 52 + } 53 + 54 + static inline int 55 + ip6frag_obj_cmpfn(struct rhashtable_compare_arg *arg, const void *ptr) 56 + { 57 + const struct frag_v6_compare_key *key = arg->key; 58 + const struct inet_frag_queue *fq = ptr; 59 + 60 + return !!memcmp(&fq->key, key, sizeof(*key)); 61 + } 62 + 63 + static inline void 64 + ip6frag_expire_frag_queue(struct net *net, struct frag_queue *fq) 65 + { 66 + struct net_device *dev = NULL; 67 + struct sk_buff *head; 68 + 69 + rcu_read_lock(); 70 + spin_lock(&fq->q.lock); 71 + 72 + if (fq->q.flags & INET_FRAG_COMPLETE) 73 + goto out; 74 + 75 + inet_frag_kill(&fq->q); 76 + 77 + dev = dev_get_by_index_rcu(net, fq->iif); 78 + if (!dev) 79 + goto out; 80 + 81 + __IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS); 82 + __IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMTIMEOUT); 83 + 84 + /* Don't send error if the first segment did not arrive. */ 85 + head = fq->q.fragments; 86 + if (!(fq->q.flags & INET_FRAG_FIRST_IN) || !head) 87 + goto out; 88 + 89 + head->dev = dev; 90 + skb_get(head); 91 + spin_unlock(&fq->q.lock); 92 + 93 + icmpv6_send(head, ICMPV6_TIME_EXCEED, ICMPV6_EXC_FRAGTIME, 0); 94 + kfree_skb(head); 95 + goto out_rcu_unlock; 96 + 97 + out: 98 + spin_unlock(&fq->q.lock); 99 + out_rcu_unlock: 100 + rcu_read_unlock(); 101 + inet_frag_put(&fq->q); 102 + } 103 + #endif 104 + #endif
-3
include/net/netfilter/ipv4/nf_conntrack_ipv4.h
··· 10 10 #ifndef _NF_CONNTRACK_IPV4_H 11 11 #define _NF_CONNTRACK_IPV4_H 12 12 13 - 14 - const extern struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4; 15 - 16 13 extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4; 17 14 extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4; 18 15 extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp;
+5
include/net/netfilter/nf_conntrack.h
··· 41 41 /* insert expect proto private data here */ 42 42 }; 43 43 44 + struct nf_conntrack_net { 45 + unsigned int users4; 46 + unsigned int users6; 47 + }; 48 + 44 49 #include <linux/types.h> 45 50 #include <linux/skbuff.h> 46 51
+2 -13
include/net/netfilter/nf_conntrack_core.h
··· 14 14 #define _NF_CONNTRACK_CORE_H 15 15 16 16 #include <linux/netfilter.h> 17 - #include <net/netfilter/nf_conntrack_l3proto.h> 18 17 #include <net/netfilter/nf_conntrack_l4proto.h> 19 18 #include <net/netfilter/nf_conntrack_ecache.h> 20 19 ··· 39 40 void nf_conntrack_init_end(void); 40 41 void nf_conntrack_cleanup_end(void); 41 42 42 - bool nf_ct_get_tuple(const struct sk_buff *skb, unsigned int nhoff, 43 - unsigned int dataoff, u_int16_t l3num, u_int8_t protonum, 44 - struct net *net, 45 - struct nf_conntrack_tuple *tuple, 46 - const struct nf_conntrack_l3proto *l3proto, 47 - const struct nf_conntrack_l4proto *l4proto); 48 - 49 43 bool nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse, 50 44 const struct nf_conntrack_tuple *orig, 51 - const struct nf_conntrack_l3proto *l3proto, 52 45 const struct nf_conntrack_l4proto *l4proto); 53 46 54 47 /* Find a connection corresponding to a tuple. */ ··· 66 75 return ret; 67 76 } 68 77 69 - void 70 - print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple, 71 - const struct nf_conntrack_l3proto *l3proto, 72 - const struct nf_conntrack_l4proto *proto); 78 + void print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple, 79 + const struct nf_conntrack_l4proto *proto); 73 80 74 81 #define CONNTRACK_LOCKS 1024 75 82
+29 -8
include/net/netfilter/nf_conntrack_count.h
··· 1 1 #ifndef _NF_CONNTRACK_COUNT_H 2 2 #define _NF_CONNTRACK_COUNT_H 3 3 4 + #include <linux/list.h> 5 + 4 6 struct nf_conncount_data; 7 + 8 + enum nf_conncount_list_add { 9 + NF_CONNCOUNT_ADDED, /* list add was ok */ 10 + NF_CONNCOUNT_ERR, /* -ENOMEM, must drop skb */ 11 + NF_CONNCOUNT_SKIP, /* list is already reclaimed by gc */ 12 + }; 13 + 14 + struct nf_conncount_list { 15 + spinlock_t list_lock; 16 + struct list_head head; /* connections with the same filtering key */ 17 + unsigned int count; /* length of list */ 18 + bool dead; 19 + }; 5 20 6 21 struct nf_conncount_data *nf_conncount_init(struct net *net, unsigned int family, 7 22 unsigned int keylen); ··· 29 14 const struct nf_conntrack_tuple *tuple, 30 15 const struct nf_conntrack_zone *zone); 31 16 32 - unsigned int nf_conncount_lookup(struct net *net, struct hlist_head *head, 33 - const struct nf_conntrack_tuple *tuple, 34 - const struct nf_conntrack_zone *zone, 35 - bool *addit); 17 + void nf_conncount_lookup(struct net *net, struct nf_conncount_list *list, 18 + const struct nf_conntrack_tuple *tuple, 19 + const struct nf_conntrack_zone *zone, 20 + bool *addit); 36 21 37 - bool nf_conncount_add(struct hlist_head *head, 38 - const struct nf_conntrack_tuple *tuple, 39 - const struct nf_conntrack_zone *zone); 22 + void nf_conncount_list_init(struct nf_conncount_list *list); 40 23 41 - void nf_conncount_cache_free(struct hlist_head *hhead); 24 + enum nf_conncount_list_add 25 + nf_conncount_add(struct nf_conncount_list *list, 26 + const struct nf_conntrack_tuple *tuple, 27 + const struct nf_conntrack_zone *zone); 28 + 29 + bool nf_conncount_gc_list(struct net *net, 30 + struct nf_conncount_list *list); 31 + 32 + void nf_conncount_cache_free(struct nf_conncount_list *list); 42 33 43 34 #endif
+1 -3
include/net/netfilter/nf_conntrack_helper.h
··· 103 103 void nf_conntrack_helpers_unregister(struct nf_conntrack_helper *, 104 104 unsigned int); 105 105 106 - struct nf_conn_help *nf_ct_helper_ext_add(struct nf_conn *ct, 107 - struct nf_conntrack_helper *helper, 108 - gfp_t gfp); 106 + struct nf_conn_help *nf_ct_helper_ext_add(struct nf_conn *ct, gfp_t gfp); 109 107 110 108 int __nf_ct_try_assign_helper(struct nf_conn *ct, struct nf_conn *tmpl, 111 109 gfp_t flags);
-84
include/net/netfilter/nf_conntrack_l3proto.h
··· 1 - /* SPDX-License-Identifier: GPL-2.0 */ 2 - /* 3 - * Copyright (C)2003,2004 USAGI/WIDE Project 4 - * 5 - * Header for use in defining a given L3 protocol for connection tracking. 6 - * 7 - * Author: 8 - * Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp> 9 - * 10 - * Derived from include/netfilter_ipv4/ip_conntrack_protocol.h 11 - */ 12 - 13 - #ifndef _NF_CONNTRACK_L3PROTO_H 14 - #define _NF_CONNTRACK_L3PROTO_H 15 - #include <linux/netlink.h> 16 - #include <net/netlink.h> 17 - #include <linux/seq_file.h> 18 - #include <net/netfilter/nf_conntrack.h> 19 - 20 - struct nf_conntrack_l3proto { 21 - /* L3 Protocol Family number. ex) PF_INET */ 22 - u_int16_t l3proto; 23 - 24 - /* size of tuple nlattr, fills a hole */ 25 - u16 nla_size; 26 - 27 - /* 28 - * Try to fill in the third arg: nhoff is offset of l3 proto 29 - * hdr. Return true if possible. 30 - */ 31 - bool (*pkt_to_tuple)(const struct sk_buff *skb, unsigned int nhoff, 32 - struct nf_conntrack_tuple *tuple); 33 - 34 - /* 35 - * Invert the per-proto part of the tuple: ie. turn xmit into reply. 36 - * Some packets can't be inverted: return 0 in that case. 37 - */ 38 - bool (*invert_tuple)(struct nf_conntrack_tuple *inverse, 39 - const struct nf_conntrack_tuple *orig); 40 - 41 - /* 42 - * Called before tracking. 43 - * *dataoff: offset of protocol header (TCP, UDP,...) in skb 44 - * *protonum: protocol number 45 - */ 46 - int (*get_l4proto)(const struct sk_buff *skb, unsigned int nhoff, 47 - unsigned int *dataoff, u_int8_t *protonum); 48 - 49 - #if IS_ENABLED(CONFIG_NF_CT_NETLINK) 50 - int (*tuple_to_nlattr)(struct sk_buff *skb, 51 - const struct nf_conntrack_tuple *t); 52 - int (*nlattr_to_tuple)(struct nlattr *tb[], 53 - struct nf_conntrack_tuple *t); 54 - const struct nla_policy *nla_policy; 55 - #endif 56 - 57 - /* Called when netns wants to use connection tracking */ 58 - int (*net_ns_get)(struct net *); 59 - void (*net_ns_put)(struct net *); 60 - 61 - /* Module (if any) which this is connected to. */ 62 - struct module *me; 63 - }; 64 - 65 - extern struct nf_conntrack_l3proto __rcu *nf_ct_l3protos[NFPROTO_NUMPROTO]; 66 - 67 - /* Protocol global registration. */ 68 - int nf_ct_l3proto_register(const struct nf_conntrack_l3proto *proto); 69 - void nf_ct_l3proto_unregister(const struct nf_conntrack_l3proto *proto); 70 - 71 - const struct nf_conntrack_l3proto *nf_ct_l3proto_find_get(u_int16_t l3proto); 72 - 73 - /* Existing built-in protocols */ 74 - extern struct nf_conntrack_l3proto nf_conntrack_l3proto_generic; 75 - 76 - static inline struct nf_conntrack_l3proto * 77 - __nf_ct_l3proto_find(u_int16_t l3proto) 78 - { 79 - if (unlikely(l3proto >= NFPROTO_NUMPROTO)) 80 - return &nf_conntrack_l3proto_generic; 81 - return rcu_dereference(nf_ct_l3protos[l3proto]); 82 - } 83 - 84 - #endif /*_NF_CONNTRACK_L3PROTO_H*/
+3 -11
include/net/netfilter/nf_conntrack_l4proto.h
··· 36 36 struct net *net, struct nf_conntrack_tuple *tuple); 37 37 38 38 /* Invert the per-proto part of the tuple: ie. turn xmit into reply. 39 - * Some packets can't be inverted: return 0 in that case. 39 + * Only used by icmp, most protocols use a generic version. 40 40 */ 41 41 bool (*invert_tuple)(struct nf_conntrack_tuple *inverse, 42 42 const struct nf_conntrack_tuple *orig); ··· 45 45 int (*packet)(struct nf_conn *ct, 46 46 const struct sk_buff *skb, 47 47 unsigned int dataoff, 48 - enum ip_conntrack_info ctinfo, 49 - unsigned int *timeouts); 48 + enum ip_conntrack_info ctinfo); 50 49 51 50 /* Called when a new connection for this protocol found; 52 51 * returns TRUE if it's OK. If so, packet() called next. */ 53 52 bool (*new)(struct nf_conn *ct, const struct sk_buff *skb, 54 - unsigned int dataoff, unsigned int *timeouts); 53 + unsigned int dataoff); 55 54 56 55 /* Called when a conntrack entry is destroyed */ 57 56 void (*destroy)(struct nf_conn *ct); ··· 61 62 62 63 /* called by gc worker if table is full */ 63 64 bool (*can_early_drop)(const struct nf_conn *ct); 64 - 65 - /* Return the array of timeouts for this protocol. */ 66 - unsigned int *(*get_timeouts)(struct net *net); 67 65 68 66 /* convert protoinfo to nfnetink attributes */ 69 67 int (*to_nlattr)(struct sk_buff *skb, struct nlattr *nla, ··· 130 134 /* Protocol global registration. */ 131 135 int nf_ct_l4proto_register_one(const struct nf_conntrack_l4proto *proto); 132 136 void nf_ct_l4proto_unregister_one(const struct nf_conntrack_l4proto *proto); 133 - int nf_ct_l4proto_register(const struct nf_conntrack_l4proto * const proto[], 134 - unsigned int num_proto); 135 - void nf_ct_l4proto_unregister(const struct nf_conntrack_l4proto * const proto[], 136 - unsigned int num_proto); 137 137 138 138 /* Generic netlink helpers */ 139 139 int nf_ct_port_tuple_to_nlattr(struct sk_buff *skb,
+4 -14
include/net/netfilter/nf_conntrack_timeout.h
··· 67 67 #endif 68 68 }; 69 69 70 - static inline unsigned int * 71 - nf_ct_timeout_lookup(struct net *net, struct nf_conn *ct, 72 - const struct nf_conntrack_l4proto *l4proto) 70 + static inline unsigned int *nf_ct_timeout_lookup(const struct nf_conn *ct) 73 71 { 72 + unsigned int *timeouts = NULL; 74 73 #ifdef CONFIG_NF_CONNTRACK_TIMEOUT 75 74 struct nf_conn_timeout *timeout_ext; 76 - unsigned int *timeouts; 77 75 78 76 timeout_ext = nf_ct_timeout_find(ct); 79 - if (timeout_ext) { 77 + if (timeout_ext) 80 78 timeouts = nf_ct_timeout_data(timeout_ext); 81 - if (unlikely(!timeouts)) 82 - timeouts = l4proto->get_timeouts(net); 83 - } else { 84 - timeouts = l4proto->get_timeouts(net); 85 - } 86 - 87 - return timeouts; 88 - #else 89 - return l4proto->get_timeouts(net); 90 79 #endif 80 + return timeouts; 91 81 } 92 82 93 83 #ifdef CONFIG_NF_CONNTRACK_TIMEOUT
+8
include/net/netfilter/nf_tproxy.h
··· 17 17 return false; 18 18 } 19 19 20 + /* assign a socket to the skb -- consumes sk */ 21 + static inline void nf_tproxy_assign_sock(struct sk_buff *skb, struct sock *sk) 22 + { 23 + skb_orphan(skb); 24 + skb->sk = sk; 25 + skb->destructor = sock_edemux; 26 + } 27 + 20 28 __be32 nf_tproxy_laddr4(struct sk_buff *skb, __be32 user_laddr, __be32 daddr); 21 29 22 30 /**
+1
include/net/netns/nftables.h
··· 7 7 struct netns_nftables { 8 8 struct list_head tables; 9 9 struct list_head commit_list; 10 + struct mutex commit_mutex; 10 11 unsigned int base_seq; 11 12 u8 gencursor; 12 13 u8 validate_state;
+11
include/uapi/linux/netfilter/nf_osf.h
··· 16 16 17 17 #define NF_OSF_TTL_TRUE 0 /* True ip and fingerprint TTL comparison */ 18 18 19 + /* Check if ip TTL is less than fingerprint one */ 20 + #define NF_OSF_TTL_LESS 1 21 + 19 22 /* Do not compare ip and fingerprint TTL at all */ 20 23 #define NF_OSF_TTL_NOCHECK 2 21 24 25 + #define NF_OSF_FLAGMASK (NF_OSF_GENRE | NF_OSF_TTL | \ 26 + NF_OSF_LOG | NF_OSF_INVERT) 22 27 /* Wildcard MSS (kind of). 23 28 * It is used to implement a state machine for the different wildcard values 24 29 * of the MSS and window sizes. ··· 86 81 87 82 /* Others are not used in the current OSF */ 88 83 OSFOPT_EMPTY = 255, 84 + }; 85 + 86 + enum nf_osf_attr_type { 87 + OSF_ATTR_UNSPEC, 88 + OSF_ATTR_FINGER, 89 + OSF_ATTR_MAX, 89 90 }; 90 91 91 92 #endif /* _NF_OSF_H */
+3 -1
include/uapi/linux/netfilter/nf_tables.h
··· 921 921 /* 922 922 * enum nft_socket_keys - nf_tables socket expression keys 923 923 * 924 - * @NFT_SOCKET_TRANSPARENT: Value of the IP(V6)_TRANSPARENT socket option_ 924 + * @NFT_SOCKET_TRANSPARENT: Value of the IP(V6)_TRANSPARENT socket option 925 + * @NFT_SOCKET_MARK: Value of the socket mark 925 926 */ 926 927 enum nft_socket_keys { 927 928 NFT_SOCKET_TRANSPARENT, 929 + NFT_SOCKET_MARK, 928 930 __NFT_SOCKET_MAX 929 931 }; 930 932 #define NFT_SOCKET_MAX (__NFT_SOCKET_MAX - 1)
+2 -8
include/uapi/linux/netfilter/xt_osf.h
··· 37 37 38 38 #define XT_OSF_TTL_TRUE NF_OSF_TTL_TRUE 39 39 #define XT_OSF_TTL_NOCHECK NF_OSF_TTL_NOCHECK 40 - 41 - #define XT_OSF_TTL_LESS 1 /* Check if ip TTL is less than fingerprint one */ 40 + #define XT_OSF_TTL_LESS NF_OSF_TTL_LESS 42 41 43 42 #define xt_osf_wc nf_osf_wc 44 43 #define xt_osf_opt nf_osf_opt ··· 46 47 #define xt_osf_finger nf_osf_finger 47 48 #define xt_osf_nlmsg nf_osf_nlmsg 48 49 50 + #define xt_osf_attr_type nf_osf_attr_type 49 51 /* 50 52 * Add/remove fingerprint from the kernel. 51 53 */ ··· 54 54 OSF_MSG_ADD, 55 55 OSF_MSG_REMOVE, 56 56 OSF_MSG_MAX, 57 - }; 58 - 59 - enum xt_osf_attr_type { 60 - OSF_ATTR_UNSPEC, 61 - OSF_ATTR_FINGER, 62 - OSF_ATTR_MAX, 63 57 }; 64 58 65 59 #endif /* _XT_OSF_H */
+1 -2
net/bridge/netfilter/nft_reject_bridge.c
··· 89 89 niph = nf_reject_iphdr_put(nskb, oldskb, IPPROTO_TCP, 90 90 net->ipv4.sysctl_ip_default_ttl); 91 91 nf_reject_ip_tcphdr_put(nskb, oldskb, oth); 92 - niph->ttl = net->ipv4.sysctl_ip_default_ttl; 93 - niph->tot_len = htons(nskb->len); 92 + niph->tot_len = htons(nskb->len); 94 93 ip_send_check(niph); 95 94 96 95 nft_reject_br_push_etherhdr(oldskb, nskb);
+1 -1
net/ieee802154/6lowpan/reassembly.c
··· 25 25 26 26 #include <net/ieee802154_netdev.h> 27 27 #include <net/6lowpan.h> 28 - #include <net/ipv6.h> 28 + #include <net/ipv6_frag.h> 29 29 #include <net/inet_frag.h> 30 30 31 31 #include "6lowpan_i.h"
-53
net/ipv4/netfilter.c
··· 98 98 } 99 99 EXPORT_SYMBOL_GPL(nf_ip_reroute); 100 100 101 - __sum16 nf_ip_checksum(struct sk_buff *skb, unsigned int hook, 102 - unsigned int dataoff, u_int8_t protocol) 103 - { 104 - const struct iphdr *iph = ip_hdr(skb); 105 - __sum16 csum = 0; 106 - 107 - switch (skb->ip_summed) { 108 - case CHECKSUM_COMPLETE: 109 - if (hook != NF_INET_PRE_ROUTING && hook != NF_INET_LOCAL_IN) 110 - break; 111 - if ((protocol == 0 && !csum_fold(skb->csum)) || 112 - !csum_tcpudp_magic(iph->saddr, iph->daddr, 113 - skb->len - dataoff, protocol, 114 - skb->csum)) { 115 - skb->ip_summed = CHECKSUM_UNNECESSARY; 116 - break; 117 - } 118 - /* fall through */ 119 - case CHECKSUM_NONE: 120 - if (protocol == 0) 121 - skb->csum = 0; 122 - else 123 - skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr, 124 - skb->len - dataoff, 125 - protocol, 0); 126 - csum = __skb_checksum_complete(skb); 127 - } 128 - return csum; 129 - } 130 - EXPORT_SYMBOL(nf_ip_checksum); 131 - 132 - __sum16 nf_ip_checksum_partial(struct sk_buff *skb, unsigned int hook, 133 - unsigned int dataoff, unsigned int len, 134 - u_int8_t protocol) 135 - { 136 - const struct iphdr *iph = ip_hdr(skb); 137 - __sum16 csum = 0; 138 - 139 - switch (skb->ip_summed) { 140 - case CHECKSUM_COMPLETE: 141 - if (len == skb->len - dataoff) 142 - return nf_ip_checksum(skb, hook, dataoff, protocol); 143 - /* fall through */ 144 - case CHECKSUM_NONE: 145 - skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr, protocol, 146 - skb->len - dataoff, 0); 147 - skb->ip_summed = CHECKSUM_NONE; 148 - return __skb_checksum_complete_head(skb, dataoff + len); 149 - } 150 - return csum; 151 - } 152 - EXPORT_SYMBOL_GPL(nf_ip_checksum_partial); 153 - 154 101 int nf_ip_route(struct net *net, struct dst_entry **dst, struct flowi *fl, 155 102 bool strict __always_unused) 156 103 {
+3 -19
net/ipv4/netfilter/Kconfig
··· 9 9 tristate 10 10 default n 11 11 12 - config NF_CONNTRACK_IPV4 13 - tristate "IPv4 connection tracking support (required for NAT)" 14 - depends on NF_CONNTRACK 15 - default m if NETFILTER_ADVANCED=n 16 - select NF_DEFRAG_IPV4 17 - ---help--- 18 - Connection tracking keeps a record of what packets have passed 19 - through your machine, in order to figure out how they are related 20 - into connections. 21 - 22 - This is IPv4 support on Layer 3 independent connection tracking. 23 - Layer 3 independent connection tracking is experimental scheme 24 - which generalize ip_conntrack to support other layer 3 protocols. 25 - 26 - To compile it as a module, choose M here. If unsure, say N. 27 - 28 12 config NF_SOCKET_IPV4 29 13 tristate "IPv4 socket lookup support" 30 14 help ··· 96 112 97 113 config NF_NAT_IPV4 98 114 tristate "IPv4 NAT" 99 - depends on NF_CONNTRACK_IPV4 115 + depends on NF_CONNTRACK 100 116 default m if NETFILTER_ADVANCED=n 101 117 select NF_NAT 102 118 help ··· 263 279 # NAT + specific targets: nf_conntrack 264 280 config IP_NF_NAT 265 281 tristate "iptables NAT support" 266 - depends on NF_CONNTRACK_IPV4 282 + depends on NF_CONNTRACK 267 283 default m if NETFILTER_ADVANCED=n 268 284 select NF_NAT 269 285 select NF_NAT_IPV4 ··· 324 340 config IP_NF_TARGET_CLUSTERIP 325 341 tristate "CLUSTERIP target support" 326 342 depends on IP_NF_MANGLE 327 - depends on NF_CONNTRACK_IPV4 343 + depends on NF_CONNTRACK 328 344 depends on NETFILTER_ADVANCED 329 345 select NF_CONNTRACK_MARK 330 346 select NETFILTER_FAMILY_ARP
-6
net/ipv4/netfilter/Makefile
··· 3 3 # Makefile for the netfilter modules on top of IPv4. 4 4 # 5 5 6 - # objects for l3 independent conntrack 7 - nf_conntrack_ipv4-y := nf_conntrack_l3proto_ipv4.o nf_conntrack_proto_icmp.o 8 - 9 - # connection tracking 10 - obj-$(CONFIG_NF_CONNTRACK_IPV4) += nf_conntrack_ipv4.o 11 - 12 6 nf_nat_ipv4-y := nf_nat_l3proto_ipv4.o nf_nat_proto_icmp.o 13 7 nf_nat_ipv4-$(CONFIG_NF_NAT_MASQUERADE_IPV4) += nf_nat_masquerade_ipv4.o 14 8 obj-$(CONFIG_NF_NAT_IPV4) += nf_nat_ipv4.o
-472
net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
··· 1 - 2 - /* (C) 1999-2001 Paul `Rusty' Russell 3 - * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> 4 - * (C) 2006-2012 Patrick McHardy <kaber@trash.net> 5 - * 6 - * This program is free software; you can redistribute it and/or modify 7 - * it under the terms of the GNU General Public License version 2 as 8 - * published by the Free Software Foundation. 9 - */ 10 - 11 - #include <linux/types.h> 12 - #include <linux/ip.h> 13 - #include <linux/netfilter.h> 14 - #include <linux/module.h> 15 - #include <linux/skbuff.h> 16 - #include <linux/icmp.h> 17 - #include <linux/sysctl.h> 18 - #include <net/route.h> 19 - #include <net/ip.h> 20 - 21 - #include <linux/netfilter_ipv4.h> 22 - #include <net/netfilter/nf_conntrack.h> 23 - #include <net/netfilter/nf_conntrack_helper.h> 24 - #include <net/netfilter/nf_conntrack_l4proto.h> 25 - #include <net/netfilter/nf_conntrack_l3proto.h> 26 - #include <net/netfilter/nf_conntrack_zones.h> 27 - #include <net/netfilter/nf_conntrack_core.h> 28 - #include <net/netfilter/nf_conntrack_seqadj.h> 29 - #include <net/netfilter/ipv4/nf_conntrack_ipv4.h> 30 - #include <net/netfilter/nf_nat_helper.h> 31 - #include <net/netfilter/ipv4/nf_defrag_ipv4.h> 32 - #include <net/netfilter/nf_log.h> 33 - 34 - static int conntrack4_net_id __read_mostly; 35 - static DEFINE_MUTEX(register_ipv4_hooks); 36 - 37 - struct conntrack4_net { 38 - unsigned int users; 39 - }; 40 - 41 - static bool ipv4_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff, 42 - struct nf_conntrack_tuple *tuple) 43 - { 44 - const __be32 *ap; 45 - __be32 _addrs[2]; 46 - ap = skb_header_pointer(skb, nhoff + offsetof(struct iphdr, saddr), 47 - sizeof(u_int32_t) * 2, _addrs); 48 - if (ap == NULL) 49 - return false; 50 - 51 - tuple->src.u3.ip = ap[0]; 52 - tuple->dst.u3.ip = ap[1]; 53 - 54 - return true; 55 - } 56 - 57 - static bool ipv4_invert_tuple(struct nf_conntrack_tuple *tuple, 58 - const struct nf_conntrack_tuple *orig) 59 - { 60 - tuple->src.u3.ip = orig->dst.u3.ip; 61 - tuple->dst.u3.ip = orig->src.u3.ip; 62 - 63 - return true; 64 - } 65 - 66 - static int ipv4_get_l4proto(const struct sk_buff *skb, unsigned int nhoff, 67 - unsigned int *dataoff, u_int8_t *protonum) 68 - { 69 - const struct iphdr *iph; 70 - struct iphdr _iph; 71 - 72 - iph = skb_header_pointer(skb, nhoff, sizeof(_iph), &_iph); 73 - if (iph == NULL) 74 - return -NF_ACCEPT; 75 - 76 - /* Conntrack defragments packets, we might still see fragments 77 - * inside ICMP packets though. */ 78 - if (iph->frag_off & htons(IP_OFFSET)) 79 - return -NF_ACCEPT; 80 - 81 - *dataoff = nhoff + (iph->ihl << 2); 82 - *protonum = iph->protocol; 83 - 84 - /* Check bogus IP headers */ 85 - if (*dataoff > skb->len) { 86 - pr_debug("nf_conntrack_ipv4: bogus IPv4 packet: " 87 - "nhoff %u, ihl %u, skblen %u\n", 88 - nhoff, iph->ihl << 2, skb->len); 89 - return -NF_ACCEPT; 90 - } 91 - 92 - return NF_ACCEPT; 93 - } 94 - 95 - static unsigned int ipv4_helper(void *priv, 96 - struct sk_buff *skb, 97 - const struct nf_hook_state *state) 98 - { 99 - struct nf_conn *ct; 100 - enum ip_conntrack_info ctinfo; 101 - const struct nf_conn_help *help; 102 - const struct nf_conntrack_helper *helper; 103 - 104 - /* This is where we call the helper: as the packet goes out. */ 105 - ct = nf_ct_get(skb, &ctinfo); 106 - if (!ct || ctinfo == IP_CT_RELATED_REPLY) 107 - return NF_ACCEPT; 108 - 109 - help = nfct_help(ct); 110 - if (!help) 111 - return NF_ACCEPT; 112 - 113 - /* rcu_read_lock()ed by nf_hook_thresh */ 114 - helper = rcu_dereference(help->helper); 115 - if (!helper) 116 - return NF_ACCEPT; 117 - 118 - return helper->help(skb, skb_network_offset(skb) + ip_hdrlen(skb), 119 - ct, ctinfo); 120 - } 121 - 122 - static unsigned int ipv4_confirm(void *priv, 123 - struct sk_buff *skb, 124 - const struct nf_hook_state *state) 125 - { 126 - struct nf_conn *ct; 127 - enum ip_conntrack_info ctinfo; 128 - 129 - ct = nf_ct_get(skb, &ctinfo); 130 - if (!ct || ctinfo == IP_CT_RELATED_REPLY) 131 - goto out; 132 - 133 - /* adjust seqs for loopback traffic only in outgoing direction */ 134 - if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) && 135 - !nf_is_loopback_packet(skb)) { 136 - if (!nf_ct_seq_adjust(skb, ct, ctinfo, ip_hdrlen(skb))) { 137 - NF_CT_STAT_INC_ATOMIC(nf_ct_net(ct), drop); 138 - return NF_DROP; 139 - } 140 - } 141 - out: 142 - /* We've seen it coming out the other side: confirm it */ 143 - return nf_conntrack_confirm(skb); 144 - } 145 - 146 - static unsigned int ipv4_conntrack_in(void *priv, 147 - struct sk_buff *skb, 148 - const struct nf_hook_state *state) 149 - { 150 - return nf_conntrack_in(state->net, PF_INET, state->hook, skb); 151 - } 152 - 153 - static unsigned int ipv4_conntrack_local(void *priv, 154 - struct sk_buff *skb, 155 - const struct nf_hook_state *state) 156 - { 157 - if (ip_is_fragment(ip_hdr(skb))) { /* IP_NODEFRAG setsockopt set */ 158 - enum ip_conntrack_info ctinfo; 159 - struct nf_conn *tmpl; 160 - 161 - tmpl = nf_ct_get(skb, &ctinfo); 162 - if (tmpl && nf_ct_is_template(tmpl)) { 163 - /* when skipping ct, clear templates to avoid fooling 164 - * later targets/matches 165 - */ 166 - skb->_nfct = 0; 167 - nf_ct_put(tmpl); 168 - } 169 - return NF_ACCEPT; 170 - } 171 - 172 - return nf_conntrack_in(state->net, PF_INET, state->hook, skb); 173 - } 174 - 175 - /* Connection tracking may drop packets, but never alters them, so 176 - make it the first hook. */ 177 - static const struct nf_hook_ops ipv4_conntrack_ops[] = { 178 - { 179 - .hook = ipv4_conntrack_in, 180 - .pf = NFPROTO_IPV4, 181 - .hooknum = NF_INET_PRE_ROUTING, 182 - .priority = NF_IP_PRI_CONNTRACK, 183 - }, 184 - { 185 - .hook = ipv4_conntrack_local, 186 - .pf = NFPROTO_IPV4, 187 - .hooknum = NF_INET_LOCAL_OUT, 188 - .priority = NF_IP_PRI_CONNTRACK, 189 - }, 190 - { 191 - .hook = ipv4_helper, 192 - .pf = NFPROTO_IPV4, 193 - .hooknum = NF_INET_POST_ROUTING, 194 - .priority = NF_IP_PRI_CONNTRACK_HELPER, 195 - }, 196 - { 197 - .hook = ipv4_confirm, 198 - .pf = NFPROTO_IPV4, 199 - .hooknum = NF_INET_POST_ROUTING, 200 - .priority = NF_IP_PRI_CONNTRACK_CONFIRM, 201 - }, 202 - { 203 - .hook = ipv4_helper, 204 - .pf = NFPROTO_IPV4, 205 - .hooknum = NF_INET_LOCAL_IN, 206 - .priority = NF_IP_PRI_CONNTRACK_HELPER, 207 - }, 208 - { 209 - .hook = ipv4_confirm, 210 - .pf = NFPROTO_IPV4, 211 - .hooknum = NF_INET_LOCAL_IN, 212 - .priority = NF_IP_PRI_CONNTRACK_CONFIRM, 213 - }, 214 - }; 215 - 216 - /* Fast function for those who don't want to parse /proc (and I don't 217 - blame them). */ 218 - /* Reversing the socket's dst/src point of view gives us the reply 219 - mapping. */ 220 - static int 221 - getorigdst(struct sock *sk, int optval, void __user *user, int *len) 222 - { 223 - const struct inet_sock *inet = inet_sk(sk); 224 - const struct nf_conntrack_tuple_hash *h; 225 - struct nf_conntrack_tuple tuple; 226 - 227 - memset(&tuple, 0, sizeof(tuple)); 228 - 229 - lock_sock(sk); 230 - tuple.src.u3.ip = inet->inet_rcv_saddr; 231 - tuple.src.u.tcp.port = inet->inet_sport; 232 - tuple.dst.u3.ip = inet->inet_daddr; 233 - tuple.dst.u.tcp.port = inet->inet_dport; 234 - tuple.src.l3num = PF_INET; 235 - tuple.dst.protonum = sk->sk_protocol; 236 - release_sock(sk); 237 - 238 - /* We only do TCP and SCTP at the moment: is there a better way? */ 239 - if (tuple.dst.protonum != IPPROTO_TCP && 240 - tuple.dst.protonum != IPPROTO_SCTP) { 241 - pr_debug("SO_ORIGINAL_DST: Not a TCP/SCTP socket\n"); 242 - return -ENOPROTOOPT; 243 - } 244 - 245 - if ((unsigned int) *len < sizeof(struct sockaddr_in)) { 246 - pr_debug("SO_ORIGINAL_DST: len %d not %zu\n", 247 - *len, sizeof(struct sockaddr_in)); 248 - return -EINVAL; 249 - } 250 - 251 - h = nf_conntrack_find_get(sock_net(sk), &nf_ct_zone_dflt, &tuple); 252 - if (h) { 253 - struct sockaddr_in sin; 254 - struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); 255 - 256 - sin.sin_family = AF_INET; 257 - sin.sin_port = ct->tuplehash[IP_CT_DIR_ORIGINAL] 258 - .tuple.dst.u.tcp.port; 259 - sin.sin_addr.s_addr = ct->tuplehash[IP_CT_DIR_ORIGINAL] 260 - .tuple.dst.u3.ip; 261 - memset(sin.sin_zero, 0, sizeof(sin.sin_zero)); 262 - 263 - pr_debug("SO_ORIGINAL_DST: %pI4 %u\n", 264 - &sin.sin_addr.s_addr, ntohs(sin.sin_port)); 265 - nf_ct_put(ct); 266 - if (copy_to_user(user, &sin, sizeof(sin)) != 0) 267 - return -EFAULT; 268 - else 269 - return 0; 270 - } 271 - pr_debug("SO_ORIGINAL_DST: Can't find %pI4/%u-%pI4/%u.\n", 272 - &tuple.src.u3.ip, ntohs(tuple.src.u.tcp.port), 273 - &tuple.dst.u3.ip, ntohs(tuple.dst.u.tcp.port)); 274 - return -ENOENT; 275 - } 276 - 277 - #if IS_ENABLED(CONFIG_NF_CT_NETLINK) 278 - 279 - #include <linux/netfilter/nfnetlink.h> 280 - #include <linux/netfilter/nfnetlink_conntrack.h> 281 - 282 - static int ipv4_tuple_to_nlattr(struct sk_buff *skb, 283 - const struct nf_conntrack_tuple *tuple) 284 - { 285 - if (nla_put_in_addr(skb, CTA_IP_V4_SRC, tuple->src.u3.ip) || 286 - nla_put_in_addr(skb, CTA_IP_V4_DST, tuple->dst.u3.ip)) 287 - goto nla_put_failure; 288 - return 0; 289 - 290 - nla_put_failure: 291 - return -1; 292 - } 293 - 294 - static const struct nla_policy ipv4_nla_policy[CTA_IP_MAX+1] = { 295 - [CTA_IP_V4_SRC] = { .type = NLA_U32 }, 296 - [CTA_IP_V4_DST] = { .type = NLA_U32 }, 297 - }; 298 - 299 - static int ipv4_nlattr_to_tuple(struct nlattr *tb[], 300 - struct nf_conntrack_tuple *t) 301 - { 302 - if (!tb[CTA_IP_V4_SRC] || !tb[CTA_IP_V4_DST]) 303 - return -EINVAL; 304 - 305 - t->src.u3.ip = nla_get_in_addr(tb[CTA_IP_V4_SRC]); 306 - t->dst.u3.ip = nla_get_in_addr(tb[CTA_IP_V4_DST]); 307 - 308 - return 0; 309 - } 310 - #endif 311 - 312 - static struct nf_sockopt_ops so_getorigdst = { 313 - .pf = PF_INET, 314 - .get_optmin = SO_ORIGINAL_DST, 315 - .get_optmax = SO_ORIGINAL_DST+1, 316 - .get = getorigdst, 317 - .owner = THIS_MODULE, 318 - }; 319 - 320 - static int ipv4_hooks_register(struct net *net) 321 - { 322 - struct conntrack4_net *cnet = net_generic(net, conntrack4_net_id); 323 - int err = 0; 324 - 325 - mutex_lock(&register_ipv4_hooks); 326 - 327 - cnet->users++; 328 - if (cnet->users > 1) 329 - goto out_unlock; 330 - 331 - err = nf_defrag_ipv4_enable(net); 332 - if (err) { 333 - cnet->users = 0; 334 - goto out_unlock; 335 - } 336 - 337 - err = nf_register_net_hooks(net, ipv4_conntrack_ops, 338 - ARRAY_SIZE(ipv4_conntrack_ops)); 339 - 340 - if (err) 341 - cnet->users = 0; 342 - out_unlock: 343 - mutex_unlock(&register_ipv4_hooks); 344 - return err; 345 - } 346 - 347 - static void ipv4_hooks_unregister(struct net *net) 348 - { 349 - struct conntrack4_net *cnet = net_generic(net, conntrack4_net_id); 350 - 351 - mutex_lock(&register_ipv4_hooks); 352 - if (cnet->users && (--cnet->users == 0)) 353 - nf_unregister_net_hooks(net, ipv4_conntrack_ops, 354 - ARRAY_SIZE(ipv4_conntrack_ops)); 355 - mutex_unlock(&register_ipv4_hooks); 356 - } 357 - 358 - const struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 = { 359 - .l3proto = PF_INET, 360 - .pkt_to_tuple = ipv4_pkt_to_tuple, 361 - .invert_tuple = ipv4_invert_tuple, 362 - .get_l4proto = ipv4_get_l4proto, 363 - #if IS_ENABLED(CONFIG_NF_CT_NETLINK) 364 - .tuple_to_nlattr = ipv4_tuple_to_nlattr, 365 - .nlattr_to_tuple = ipv4_nlattr_to_tuple, 366 - .nla_policy = ipv4_nla_policy, 367 - .nla_size = NLA_ALIGN(NLA_HDRLEN + sizeof(u32)) + /* CTA_IP_V4_SRC */ 368 - NLA_ALIGN(NLA_HDRLEN + sizeof(u32)), /* CTA_IP_V4_DST */ 369 - #endif 370 - .net_ns_get = ipv4_hooks_register, 371 - .net_ns_put = ipv4_hooks_unregister, 372 - .me = THIS_MODULE, 373 - }; 374 - 375 - module_param_call(hashsize, nf_conntrack_set_hashsize, param_get_uint, 376 - &nf_conntrack_htable_size, 0600); 377 - 378 - MODULE_ALIAS("nf_conntrack-" __stringify(AF_INET)); 379 - MODULE_ALIAS("ip_conntrack"); 380 - MODULE_LICENSE("GPL"); 381 - 382 - static const struct nf_conntrack_l4proto * const builtin_l4proto4[] = { 383 - &nf_conntrack_l4proto_tcp4, 384 - &nf_conntrack_l4proto_udp4, 385 - &nf_conntrack_l4proto_icmp, 386 - #ifdef CONFIG_NF_CT_PROTO_DCCP 387 - &nf_conntrack_l4proto_dccp4, 388 - #endif 389 - #ifdef CONFIG_NF_CT_PROTO_SCTP 390 - &nf_conntrack_l4proto_sctp4, 391 - #endif 392 - #ifdef CONFIG_NF_CT_PROTO_UDPLITE 393 - &nf_conntrack_l4proto_udplite4, 394 - #endif 395 - }; 396 - 397 - static int ipv4_net_init(struct net *net) 398 - { 399 - return nf_ct_l4proto_pernet_register(net, builtin_l4proto4, 400 - ARRAY_SIZE(builtin_l4proto4)); 401 - } 402 - 403 - static void ipv4_net_exit(struct net *net) 404 - { 405 - nf_ct_l4proto_pernet_unregister(net, builtin_l4proto4, 406 - ARRAY_SIZE(builtin_l4proto4)); 407 - } 408 - 409 - static struct pernet_operations ipv4_net_ops = { 410 - .init = ipv4_net_init, 411 - .exit = ipv4_net_exit, 412 - .id = &conntrack4_net_id, 413 - .size = sizeof(struct conntrack4_net), 414 - }; 415 - 416 - static int __init nf_conntrack_l3proto_ipv4_init(void) 417 - { 418 - int ret = 0; 419 - 420 - need_conntrack(); 421 - 422 - #if IS_ENABLED(CONFIG_NF_CT_NETLINK) 423 - if (WARN_ON(nla_policy_len(ipv4_nla_policy, CTA_IP_MAX + 1) != 424 - nf_conntrack_l3proto_ipv4.nla_size)) 425 - return -EINVAL; 426 - #endif 427 - ret = nf_register_sockopt(&so_getorigdst); 428 - if (ret < 0) { 429 - pr_err("Unable to register netfilter socket option\n"); 430 - return ret; 431 - } 432 - 433 - ret = register_pernet_subsys(&ipv4_net_ops); 434 - if (ret < 0) { 435 - pr_err("nf_conntrack_ipv4: can't register pernet ops\n"); 436 - goto cleanup_sockopt; 437 - } 438 - 439 - ret = nf_ct_l4proto_register(builtin_l4proto4, 440 - ARRAY_SIZE(builtin_l4proto4)); 441 - if (ret < 0) 442 - goto cleanup_pernet; 443 - 444 - ret = nf_ct_l3proto_register(&nf_conntrack_l3proto_ipv4); 445 - if (ret < 0) { 446 - pr_err("nf_conntrack_ipv4: can't register ipv4 proto.\n"); 447 - goto cleanup_l4proto; 448 - } 449 - 450 - return ret; 451 - cleanup_l4proto: 452 - nf_ct_l4proto_unregister(builtin_l4proto4, 453 - ARRAY_SIZE(builtin_l4proto4)); 454 - cleanup_pernet: 455 - unregister_pernet_subsys(&ipv4_net_ops); 456 - cleanup_sockopt: 457 - nf_unregister_sockopt(&so_getorigdst); 458 - return ret; 459 - } 460 - 461 - static void __exit nf_conntrack_l3proto_ipv4_fini(void) 462 - { 463 - synchronize_net(); 464 - nf_ct_l3proto_unregister(&nf_conntrack_l3proto_ipv4); 465 - nf_ct_l4proto_unregister(builtin_l4proto4, 466 - ARRAY_SIZE(builtin_l4proto4)); 467 - unregister_pernet_subsys(&ipv4_net_ops); 468 - nf_unregister_sockopt(&so_getorigdst); 469 - } 470 - 471 - module_init(nf_conntrack_l3proto_ipv4_init); 472 - module_exit(nf_conntrack_l3proto_ipv4_fini);
+12 -7
net/ipv4/netfilter/nf_conntrack_proto_icmp.c net/netfilter/nf_conntrack_proto_icmp.c
··· 19 19 #include <net/netfilter/nf_conntrack_tuple.h> 20 20 #include <net/netfilter/nf_conntrack_l4proto.h> 21 21 #include <net/netfilter/nf_conntrack_core.h> 22 + #include <net/netfilter/nf_conntrack_timeout.h> 22 23 #include <net/netfilter/nf_conntrack_zones.h> 23 24 #include <net/netfilter/nf_log.h> 24 25 ··· 81 80 static int icmp_packet(struct nf_conn *ct, 82 81 const struct sk_buff *skb, 83 82 unsigned int dataoff, 84 - enum ip_conntrack_info ctinfo, 85 - unsigned int *timeout) 83 + enum ip_conntrack_info ctinfo) 86 84 { 87 85 /* Do not immediately delete the connection after the first 88 86 successful reply to avoid excessive conntrackd traffic 89 87 and also to handle correctly ICMP echo reply duplicates. */ 88 + unsigned int *timeout = nf_ct_timeout_lookup(ct); 89 + 90 + if (!timeout) 91 + timeout = icmp_get_timeouts(nf_ct_net(ct)); 92 + 90 93 nf_ct_refresh_acct(ct, ctinfo, skb, *timeout); 91 94 92 95 return NF_ACCEPT; ··· 98 93 99 94 /* Called when a new connection for this protocol found. */ 100 95 static bool icmp_new(struct nf_conn *ct, const struct sk_buff *skb, 101 - unsigned int dataoff, unsigned int *timeouts) 96 + unsigned int dataoff) 102 97 { 103 98 static const u_int8_t valid_new[] = { 104 99 [ICMP_ECHO] = 1, ··· 147 142 148 143 /* Ordinarily, we'd expect the inverted tupleproto, but it's 149 144 been preserved inside the ICMP. */ 150 - if (!nf_ct_invert_tuple(&innertuple, &origtuple, 151 - &nf_conntrack_l3proto_ipv4, innerproto)) { 145 + if (!nf_ct_invert_tuple(&innertuple, &origtuple, innerproto)) { 152 146 pr_debug("icmp_error_message: no match\n"); 153 147 return -NF_ACCEPT; 154 148 } ··· 285 281 struct nf_icmp_net *in = icmp_pernet(net); 286 282 287 283 if (tb[CTA_TIMEOUT_ICMP_TIMEOUT]) { 284 + if (!timeout) 285 + timeout = &in->timeout; 288 286 *timeout = 289 287 ntohl(nla_get_be32(tb[CTA_TIMEOUT_ICMP_TIMEOUT])) * HZ; 290 - } else { 288 + } else if (timeout) { 291 289 /* Set default ICMP timeout. */ 292 290 *timeout = in->timeout; 293 291 } ··· 364 358 .pkt_to_tuple = icmp_pkt_to_tuple, 365 359 .invert_tuple = icmp_invert_tuple, 366 360 .packet = icmp_packet, 367 - .get_timeouts = icmp_get_timeouts, 368 361 .new = icmp_new, 369 362 .error = icmp_error, 370 363 .destroy = NULL,
-62
net/ipv6/netfilter.c
··· 15 15 #include <net/ipv6.h> 16 16 #include <net/ip6_route.h> 17 17 #include <net/xfrm.h> 18 - #include <net/ip6_checksum.h> 19 18 #include <net/netfilter/nf_queue.h> 20 19 21 20 int ip6_route_me_harder(struct net *net, struct sk_buff *skb) ··· 105 106 return err; 106 107 } 107 108 108 - __sum16 nf_ip6_checksum(struct sk_buff *skb, unsigned int hook, 109 - unsigned int dataoff, u_int8_t protocol) 110 - { 111 - const struct ipv6hdr *ip6h = ipv6_hdr(skb); 112 - __sum16 csum = 0; 113 - 114 - switch (skb->ip_summed) { 115 - case CHECKSUM_COMPLETE: 116 - if (hook != NF_INET_PRE_ROUTING && hook != NF_INET_LOCAL_IN) 117 - break; 118 - if (!csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr, 119 - skb->len - dataoff, protocol, 120 - csum_sub(skb->csum, 121 - skb_checksum(skb, 0, 122 - dataoff, 0)))) { 123 - skb->ip_summed = CHECKSUM_UNNECESSARY; 124 - break; 125 - } 126 - /* fall through */ 127 - case CHECKSUM_NONE: 128 - skb->csum = ~csum_unfold( 129 - csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr, 130 - skb->len - dataoff, 131 - protocol, 132 - csum_sub(0, 133 - skb_checksum(skb, 0, 134 - dataoff, 0)))); 135 - csum = __skb_checksum_complete(skb); 136 - } 137 - return csum; 138 - } 139 - EXPORT_SYMBOL(nf_ip6_checksum); 140 - 141 - static __sum16 nf_ip6_checksum_partial(struct sk_buff *skb, unsigned int hook, 142 - unsigned int dataoff, unsigned int len, 143 - u_int8_t protocol) 144 - { 145 - const struct ipv6hdr *ip6h = ipv6_hdr(skb); 146 - __wsum hsum; 147 - __sum16 csum = 0; 148 - 149 - switch (skb->ip_summed) { 150 - case CHECKSUM_COMPLETE: 151 - if (len == skb->len - dataoff) 152 - return nf_ip6_checksum(skb, hook, dataoff, protocol); 153 - /* fall through */ 154 - case CHECKSUM_NONE: 155 - hsum = skb_checksum(skb, 0, dataoff, 0); 156 - skb->csum = ~csum_unfold(csum_ipv6_magic(&ip6h->saddr, 157 - &ip6h->daddr, 158 - skb->len - dataoff, 159 - protocol, 160 - csum_sub(0, hsum))); 161 - skb->ip_summed = CHECKSUM_NONE; 162 - return __skb_checksum_complete_head(skb, dataoff + len); 163 - } 164 - return csum; 165 - }; 166 - 167 109 static const struct nf_ipv6_ops ipv6ops = { 168 110 .chk_addr = ipv6_chk_addr, 169 111 .route_input = ip6_route_input, 170 112 .fragment = ip6_fragment, 171 - .checksum = nf_ip6_checksum, 172 - .checksum_partial = nf_ip6_checksum_partial, 173 113 .route = nf_ip6_route, 174 114 .reroute = nf_ip6_reroute, 175 115 };
+4 -23
net/ipv6/netfilter/Kconfig
··· 5 5 menu "IPv6: Netfilter Configuration" 6 6 depends on INET && IPV6 && NETFILTER 7 7 8 - config NF_DEFRAG_IPV6 9 - tristate 10 - default n 11 - 12 - config NF_CONNTRACK_IPV6 13 - tristate "IPv6 connection tracking support" 14 - depends on INET && IPV6 && NF_CONNTRACK 15 - default m if NETFILTER_ADVANCED=n 16 - select NF_DEFRAG_IPV6 17 - ---help--- 18 - Connection tracking keeps a record of what packets have passed 19 - through your machine, in order to figure out how they are related 20 - into connections. 21 - 22 - This is IPv6 support on Layer 3 independent connection tracking. 23 - Layer 3 independent connection tracking is experimental scheme 24 - which generalize ip_conntrack to support other layer 3 protocols. 25 - 26 - To compile it as a module, choose M here. If unsure, say N. 27 - 28 8 config NF_SOCKET_IPV6 29 9 tristate "IPv6 socket lookup support" 30 10 help ··· 108 128 109 129 config NF_NAT_IPV6 110 130 tristate "IPv6 NAT" 111 - depends on NF_CONNTRACK_IPV6 131 + depends on NF_CONNTRACK 112 132 depends on NETFILTER_ADVANCED 113 133 select NF_NAT 114 134 help ··· 308 328 309 329 config IP6_NF_NAT 310 330 tristate "ip6tables NAT support" 311 - depends on NF_CONNTRACK_IPV6 331 + depends on NF_CONNTRACK 312 332 depends on NETFILTER_ADVANCED 313 333 select NF_NAT 314 334 select NF_NAT_IPV6 ··· 345 365 endif # IP6_NF_NAT 346 366 347 367 endif # IP6_NF_IPTABLES 348 - 349 368 endmenu 350 369 370 + config NF_DEFRAG_IPV6 371 + tristate
-6
net/ipv6/netfilter/Makefile
··· 11 11 obj-$(CONFIG_IP6_NF_SECURITY) += ip6table_security.o 12 12 obj-$(CONFIG_IP6_NF_NAT) += ip6table_nat.o 13 13 14 - # objects for l3 independent conntrack 15 - nf_conntrack_ipv6-y := nf_conntrack_l3proto_ipv6.o nf_conntrack_proto_icmpv6.o 16 - 17 - # l3 independent conntrack 18 - obj-$(CONFIG_NF_CONNTRACK_IPV6) += nf_conntrack_ipv6.o 19 - 20 14 nf_nat_ipv6-y := nf_nat_l3proto_ipv6.o nf_nat_proto_icmpv6.o 21 15 nf_nat_ipv6-$(CONFIG_NF_NAT_MASQUERADE_IPV6) += nf_nat_masquerade_ipv6.o 22 16 obj-$(CONFIG_NF_NAT_IPV6) += nf_nat_ipv6.o
-460
net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
··· 1 - /* 2 - * Copyright (C)2004 USAGI/WIDE Project 3 - * 4 - * This program is free software; you can redistribute it and/or modify 5 - * it under the terms of the GNU General Public License version 2 as 6 - * published by the Free Software Foundation. 7 - * 8 - * Author: 9 - * Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp> 10 - */ 11 - 12 - #include <linux/types.h> 13 - #include <linux/ipv6.h> 14 - #include <linux/in6.h> 15 - #include <linux/netfilter.h> 16 - #include <linux/module.h> 17 - #include <linux/skbuff.h> 18 - #include <linux/icmp.h> 19 - #include <net/ipv6.h> 20 - #include <net/inet_frag.h> 21 - 22 - #include <linux/netfilter_bridge.h> 23 - #include <linux/netfilter_ipv6.h> 24 - #include <linux/netfilter_ipv6/ip6_tables.h> 25 - #include <net/netfilter/nf_conntrack.h> 26 - #include <net/netfilter/nf_conntrack_helper.h> 27 - #include <net/netfilter/nf_conntrack_l4proto.h> 28 - #include <net/netfilter/nf_conntrack_l3proto.h> 29 - #include <net/netfilter/nf_conntrack_core.h> 30 - #include <net/netfilter/nf_conntrack_zones.h> 31 - #include <net/netfilter/nf_conntrack_seqadj.h> 32 - #include <net/netfilter/ipv6/nf_conntrack_ipv6.h> 33 - #include <net/netfilter/nf_nat_helper.h> 34 - #include <net/netfilter/ipv6/nf_defrag_ipv6.h> 35 - #include <net/netfilter/nf_log.h> 36 - 37 - static int conntrack6_net_id; 38 - static DEFINE_MUTEX(register_ipv6_hooks); 39 - 40 - struct conntrack6_net { 41 - unsigned int users; 42 - }; 43 - 44 - static bool ipv6_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff, 45 - struct nf_conntrack_tuple *tuple) 46 - { 47 - const u_int32_t *ap; 48 - u_int32_t _addrs[8]; 49 - 50 - ap = skb_header_pointer(skb, nhoff + offsetof(struct ipv6hdr, saddr), 51 - sizeof(_addrs), _addrs); 52 - if (ap == NULL) 53 - return false; 54 - 55 - memcpy(tuple->src.u3.ip6, ap, sizeof(tuple->src.u3.ip6)); 56 - memcpy(tuple->dst.u3.ip6, ap + 4, sizeof(tuple->dst.u3.ip6)); 57 - 58 - return true; 59 - } 60 - 61 - static bool ipv6_invert_tuple(struct nf_conntrack_tuple *tuple, 62 - const struct nf_conntrack_tuple *orig) 63 - { 64 - memcpy(tuple->src.u3.ip6, orig->dst.u3.ip6, sizeof(tuple->src.u3.ip6)); 65 - memcpy(tuple->dst.u3.ip6, orig->src.u3.ip6, sizeof(tuple->dst.u3.ip6)); 66 - 67 - return true; 68 - } 69 - 70 - static int ipv6_get_l4proto(const struct sk_buff *skb, unsigned int nhoff, 71 - unsigned int *dataoff, u_int8_t *protonum) 72 - { 73 - unsigned int extoff = nhoff + sizeof(struct ipv6hdr); 74 - __be16 frag_off; 75 - int protoff; 76 - u8 nexthdr; 77 - 78 - if (skb_copy_bits(skb, nhoff + offsetof(struct ipv6hdr, nexthdr), 79 - &nexthdr, sizeof(nexthdr)) != 0) { 80 - pr_debug("ip6_conntrack_core: can't get nexthdr\n"); 81 - return -NF_ACCEPT; 82 - } 83 - protoff = ipv6_skip_exthdr(skb, extoff, &nexthdr, &frag_off); 84 - /* 85 - * (protoff == skb->len) means the packet has not data, just 86 - * IPv6 and possibly extensions headers, but it is tracked anyway 87 - */ 88 - if (protoff < 0 || (frag_off & htons(~0x7)) != 0) { 89 - pr_debug("ip6_conntrack_core: can't find proto in pkt\n"); 90 - return -NF_ACCEPT; 91 - } 92 - 93 - *dataoff = protoff; 94 - *protonum = nexthdr; 95 - return NF_ACCEPT; 96 - } 97 - 98 - static unsigned int ipv6_helper(void *priv, 99 - struct sk_buff *skb, 100 - const struct nf_hook_state *state) 101 - { 102 - struct nf_conn *ct; 103 - const struct nf_conn_help *help; 104 - const struct nf_conntrack_helper *helper; 105 - enum ip_conntrack_info ctinfo; 106 - __be16 frag_off; 107 - int protoff; 108 - u8 nexthdr; 109 - 110 - /* This is where we call the helper: as the packet goes out. */ 111 - ct = nf_ct_get(skb, &ctinfo); 112 - if (!ct || ctinfo == IP_CT_RELATED_REPLY) 113 - return NF_ACCEPT; 114 - 115 - help = nfct_help(ct); 116 - if (!help) 117 - return NF_ACCEPT; 118 - /* rcu_read_lock()ed by nf_hook_thresh */ 119 - helper = rcu_dereference(help->helper); 120 - if (!helper) 121 - return NF_ACCEPT; 122 - 123 - nexthdr = ipv6_hdr(skb)->nexthdr; 124 - protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr, 125 - &frag_off); 126 - if (protoff < 0 || (frag_off & htons(~0x7)) != 0) { 127 - pr_debug("proto header not found\n"); 128 - return NF_ACCEPT; 129 - } 130 - 131 - return helper->help(skb, protoff, ct, ctinfo); 132 - } 133 - 134 - static unsigned int ipv6_confirm(void *priv, 135 - struct sk_buff *skb, 136 - const struct nf_hook_state *state) 137 - { 138 - struct nf_conn *ct; 139 - enum ip_conntrack_info ctinfo; 140 - unsigned char pnum = ipv6_hdr(skb)->nexthdr; 141 - int protoff; 142 - __be16 frag_off; 143 - 144 - ct = nf_ct_get(skb, &ctinfo); 145 - if (!ct || ctinfo == IP_CT_RELATED_REPLY) 146 - goto out; 147 - 148 - protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &pnum, 149 - &frag_off); 150 - if (protoff < 0 || (frag_off & htons(~0x7)) != 0) { 151 - pr_debug("proto header not found\n"); 152 - goto out; 153 - } 154 - 155 - /* adjust seqs for loopback traffic only in outgoing direction */ 156 - if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) && 157 - !nf_is_loopback_packet(skb)) { 158 - if (!nf_ct_seq_adjust(skb, ct, ctinfo, protoff)) { 159 - NF_CT_STAT_INC_ATOMIC(nf_ct_net(ct), drop); 160 - return NF_DROP; 161 - } 162 - } 163 - out: 164 - /* We've seen it coming out the other side: confirm it */ 165 - return nf_conntrack_confirm(skb); 166 - } 167 - 168 - static unsigned int ipv6_conntrack_in(void *priv, 169 - struct sk_buff *skb, 170 - const struct nf_hook_state *state) 171 - { 172 - return nf_conntrack_in(state->net, PF_INET6, state->hook, skb); 173 - } 174 - 175 - static unsigned int ipv6_conntrack_local(void *priv, 176 - struct sk_buff *skb, 177 - const struct nf_hook_state *state) 178 - { 179 - return nf_conntrack_in(state->net, PF_INET6, state->hook, skb); 180 - } 181 - 182 - static const struct nf_hook_ops ipv6_conntrack_ops[] = { 183 - { 184 - .hook = ipv6_conntrack_in, 185 - .pf = NFPROTO_IPV6, 186 - .hooknum = NF_INET_PRE_ROUTING, 187 - .priority = NF_IP6_PRI_CONNTRACK, 188 - }, 189 - { 190 - .hook = ipv6_conntrack_local, 191 - .pf = NFPROTO_IPV6, 192 - .hooknum = NF_INET_LOCAL_OUT, 193 - .priority = NF_IP6_PRI_CONNTRACK, 194 - }, 195 - { 196 - .hook = ipv6_helper, 197 - .pf = NFPROTO_IPV6, 198 - .hooknum = NF_INET_POST_ROUTING, 199 - .priority = NF_IP6_PRI_CONNTRACK_HELPER, 200 - }, 201 - { 202 - .hook = ipv6_confirm, 203 - .pf = NFPROTO_IPV6, 204 - .hooknum = NF_INET_POST_ROUTING, 205 - .priority = NF_IP6_PRI_LAST, 206 - }, 207 - { 208 - .hook = ipv6_helper, 209 - .pf = NFPROTO_IPV6, 210 - .hooknum = NF_INET_LOCAL_IN, 211 - .priority = NF_IP6_PRI_CONNTRACK_HELPER, 212 - }, 213 - { 214 - .hook = ipv6_confirm, 215 - .pf = NFPROTO_IPV6, 216 - .hooknum = NF_INET_LOCAL_IN, 217 - .priority = NF_IP6_PRI_LAST-1, 218 - }, 219 - }; 220 - 221 - static int 222 - ipv6_getorigdst(struct sock *sk, int optval, void __user *user, int *len) 223 - { 224 - struct nf_conntrack_tuple tuple = { .src.l3num = NFPROTO_IPV6 }; 225 - const struct ipv6_pinfo *inet6 = inet6_sk(sk); 226 - const struct inet_sock *inet = inet_sk(sk); 227 - const struct nf_conntrack_tuple_hash *h; 228 - struct sockaddr_in6 sin6; 229 - struct nf_conn *ct; 230 - __be32 flow_label; 231 - int bound_dev_if; 232 - 233 - lock_sock(sk); 234 - tuple.src.u3.in6 = sk->sk_v6_rcv_saddr; 235 - tuple.src.u.tcp.port = inet->inet_sport; 236 - tuple.dst.u3.in6 = sk->sk_v6_daddr; 237 - tuple.dst.u.tcp.port = inet->inet_dport; 238 - tuple.dst.protonum = sk->sk_protocol; 239 - bound_dev_if = sk->sk_bound_dev_if; 240 - flow_label = inet6->flow_label; 241 - release_sock(sk); 242 - 243 - if (tuple.dst.protonum != IPPROTO_TCP && 244 - tuple.dst.protonum != IPPROTO_SCTP) 245 - return -ENOPROTOOPT; 246 - 247 - if (*len < 0 || (unsigned int) *len < sizeof(sin6)) 248 - return -EINVAL; 249 - 250 - h = nf_conntrack_find_get(sock_net(sk), &nf_ct_zone_dflt, &tuple); 251 - if (!h) { 252 - pr_debug("IP6T_SO_ORIGINAL_DST: Can't find %pI6c/%u-%pI6c/%u.\n", 253 - &tuple.src.u3.ip6, ntohs(tuple.src.u.tcp.port), 254 - &tuple.dst.u3.ip6, ntohs(tuple.dst.u.tcp.port)); 255 - return -ENOENT; 256 - } 257 - 258 - ct = nf_ct_tuplehash_to_ctrack(h); 259 - 260 - sin6.sin6_family = AF_INET6; 261 - sin6.sin6_port = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u.tcp.port; 262 - sin6.sin6_flowinfo = flow_label & IPV6_FLOWINFO_MASK; 263 - memcpy(&sin6.sin6_addr, 264 - &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.in6, 265 - sizeof(sin6.sin6_addr)); 266 - 267 - nf_ct_put(ct); 268 - sin6.sin6_scope_id = ipv6_iface_scope_id(&sin6.sin6_addr, bound_dev_if); 269 - return copy_to_user(user, &sin6, sizeof(sin6)) ? -EFAULT : 0; 270 - } 271 - 272 - #if IS_ENABLED(CONFIG_NF_CT_NETLINK) 273 - 274 - #include <linux/netfilter/nfnetlink.h> 275 - #include <linux/netfilter/nfnetlink_conntrack.h> 276 - 277 - static int ipv6_tuple_to_nlattr(struct sk_buff *skb, 278 - const struct nf_conntrack_tuple *tuple) 279 - { 280 - if (nla_put_in6_addr(skb, CTA_IP_V6_SRC, &tuple->src.u3.in6) || 281 - nla_put_in6_addr(skb, CTA_IP_V6_DST, &tuple->dst.u3.in6)) 282 - goto nla_put_failure; 283 - return 0; 284 - 285 - nla_put_failure: 286 - return -1; 287 - } 288 - 289 - static const struct nla_policy ipv6_nla_policy[CTA_IP_MAX+1] = { 290 - [CTA_IP_V6_SRC] = { .len = sizeof(u_int32_t)*4 }, 291 - [CTA_IP_V6_DST] = { .len = sizeof(u_int32_t)*4 }, 292 - }; 293 - 294 - static int ipv6_nlattr_to_tuple(struct nlattr *tb[], 295 - struct nf_conntrack_tuple *t) 296 - { 297 - if (!tb[CTA_IP_V6_SRC] || !tb[CTA_IP_V6_DST]) 298 - return -EINVAL; 299 - 300 - t->src.u3.in6 = nla_get_in6_addr(tb[CTA_IP_V6_SRC]); 301 - t->dst.u3.in6 = nla_get_in6_addr(tb[CTA_IP_V6_DST]); 302 - 303 - return 0; 304 - } 305 - #endif 306 - 307 - static int ipv6_hooks_register(struct net *net) 308 - { 309 - struct conntrack6_net *cnet = net_generic(net, conntrack6_net_id); 310 - int err = 0; 311 - 312 - mutex_lock(&register_ipv6_hooks); 313 - cnet->users++; 314 - if (cnet->users > 1) 315 - goto out_unlock; 316 - 317 - err = nf_defrag_ipv6_enable(net); 318 - if (err < 0) { 319 - cnet->users = 0; 320 - goto out_unlock; 321 - } 322 - 323 - err = nf_register_net_hooks(net, ipv6_conntrack_ops, 324 - ARRAY_SIZE(ipv6_conntrack_ops)); 325 - if (err) 326 - cnet->users = 0; 327 - out_unlock: 328 - mutex_unlock(&register_ipv6_hooks); 329 - return err; 330 - } 331 - 332 - static void ipv6_hooks_unregister(struct net *net) 333 - { 334 - struct conntrack6_net *cnet = net_generic(net, conntrack6_net_id); 335 - 336 - mutex_lock(&register_ipv6_hooks); 337 - if (cnet->users && (--cnet->users == 0)) 338 - nf_unregister_net_hooks(net, ipv6_conntrack_ops, 339 - ARRAY_SIZE(ipv6_conntrack_ops)); 340 - mutex_unlock(&register_ipv6_hooks); 341 - } 342 - 343 - const struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6 = { 344 - .l3proto = PF_INET6, 345 - .pkt_to_tuple = ipv6_pkt_to_tuple, 346 - .invert_tuple = ipv6_invert_tuple, 347 - .get_l4proto = ipv6_get_l4proto, 348 - #if IS_ENABLED(CONFIG_NF_CT_NETLINK) 349 - .tuple_to_nlattr = ipv6_tuple_to_nlattr, 350 - .nlattr_to_tuple = ipv6_nlattr_to_tuple, 351 - .nla_policy = ipv6_nla_policy, 352 - .nla_size = NLA_ALIGN(NLA_HDRLEN + sizeof(u32[4])) + 353 - NLA_ALIGN(NLA_HDRLEN + sizeof(u32[4])), 354 - #endif 355 - .net_ns_get = ipv6_hooks_register, 356 - .net_ns_put = ipv6_hooks_unregister, 357 - .me = THIS_MODULE, 358 - }; 359 - 360 - MODULE_ALIAS("nf_conntrack-" __stringify(AF_INET6)); 361 - MODULE_LICENSE("GPL"); 362 - MODULE_AUTHOR("Yasuyuki KOZAKAI @USAGI <yasuyuki.kozakai@toshiba.co.jp>"); 363 - 364 - static struct nf_sockopt_ops so_getorigdst6 = { 365 - .pf = NFPROTO_IPV6, 366 - .get_optmin = IP6T_SO_ORIGINAL_DST, 367 - .get_optmax = IP6T_SO_ORIGINAL_DST + 1, 368 - .get = ipv6_getorigdst, 369 - .owner = THIS_MODULE, 370 - }; 371 - 372 - static const struct nf_conntrack_l4proto * const builtin_l4proto6[] = { 373 - &nf_conntrack_l4proto_tcp6, 374 - &nf_conntrack_l4proto_udp6, 375 - &nf_conntrack_l4proto_icmpv6, 376 - #ifdef CONFIG_NF_CT_PROTO_DCCP 377 - &nf_conntrack_l4proto_dccp6, 378 - #endif 379 - #ifdef CONFIG_NF_CT_PROTO_SCTP 380 - &nf_conntrack_l4proto_sctp6, 381 - #endif 382 - #ifdef CONFIG_NF_CT_PROTO_UDPLITE 383 - &nf_conntrack_l4proto_udplite6, 384 - #endif 385 - }; 386 - 387 - static int ipv6_net_init(struct net *net) 388 - { 389 - return nf_ct_l4proto_pernet_register(net, builtin_l4proto6, 390 - ARRAY_SIZE(builtin_l4proto6)); 391 - } 392 - 393 - static void ipv6_net_exit(struct net *net) 394 - { 395 - nf_ct_l4proto_pernet_unregister(net, builtin_l4proto6, 396 - ARRAY_SIZE(builtin_l4proto6)); 397 - } 398 - 399 - static struct pernet_operations ipv6_net_ops = { 400 - .init = ipv6_net_init, 401 - .exit = ipv6_net_exit, 402 - .id = &conntrack6_net_id, 403 - .size = sizeof(struct conntrack6_net), 404 - }; 405 - 406 - static int __init nf_conntrack_l3proto_ipv6_init(void) 407 - { 408 - int ret = 0; 409 - 410 - need_conntrack(); 411 - 412 - #if IS_ENABLED(CONFIG_NF_CT_NETLINK) 413 - if (WARN_ON(nla_policy_len(ipv6_nla_policy, CTA_IP_MAX + 1) != 414 - nf_conntrack_l3proto_ipv6.nla_size)) 415 - return -EINVAL; 416 - #endif 417 - 418 - ret = nf_register_sockopt(&so_getorigdst6); 419 - if (ret < 0) { 420 - pr_err("Unable to register netfilter socket option\n"); 421 - return ret; 422 - } 423 - 424 - ret = register_pernet_subsys(&ipv6_net_ops); 425 - if (ret < 0) 426 - goto cleanup_sockopt; 427 - 428 - ret = nf_ct_l4proto_register(builtin_l4proto6, 429 - ARRAY_SIZE(builtin_l4proto6)); 430 - if (ret < 0) 431 - goto cleanup_pernet; 432 - 433 - ret = nf_ct_l3proto_register(&nf_conntrack_l3proto_ipv6); 434 - if (ret < 0) { 435 - pr_err("nf_conntrack_ipv6: can't register ipv6 proto.\n"); 436 - goto cleanup_l4proto; 437 - } 438 - return ret; 439 - cleanup_l4proto: 440 - nf_ct_l4proto_unregister(builtin_l4proto6, 441 - ARRAY_SIZE(builtin_l4proto6)); 442 - cleanup_pernet: 443 - unregister_pernet_subsys(&ipv6_net_ops); 444 - cleanup_sockopt: 445 - nf_unregister_sockopt(&so_getorigdst6); 446 - return ret; 447 - } 448 - 449 - static void __exit nf_conntrack_l3proto_ipv6_fini(void) 450 - { 451 - synchronize_net(); 452 - nf_ct_l3proto_unregister(&nf_conntrack_l3proto_ipv6); 453 - nf_ct_l4proto_unregister(builtin_l4proto6, 454 - ARRAY_SIZE(builtin_l4proto6)); 455 - unregister_pernet_subsys(&ipv6_net_ops); 456 - nf_unregister_sockopt(&so_getorigdst6); 457 - } 458 - 459 - module_init(nf_conntrack_l3proto_ipv6_init); 460 - module_exit(nf_conntrack_l3proto_ipv6_fini);
+11 -6
net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c net/netfilter/nf_conntrack_proto_icmpv6.c
··· 23 23 #include <net/netfilter/nf_conntrack_tuple.h> 24 24 #include <net/netfilter/nf_conntrack_l4proto.h> 25 25 #include <net/netfilter/nf_conntrack_core.h> 26 + #include <net/netfilter/nf_conntrack_timeout.h> 26 27 #include <net/netfilter/nf_conntrack_zones.h> 27 28 #include <net/netfilter/ipv6/nf_conntrack_icmpv6.h> 28 29 #include <net/netfilter/nf_log.h> ··· 94 93 static int icmpv6_packet(struct nf_conn *ct, 95 94 const struct sk_buff *skb, 96 95 unsigned int dataoff, 97 - enum ip_conntrack_info ctinfo, 98 - unsigned int *timeout) 96 + enum ip_conntrack_info ctinfo) 99 97 { 98 + unsigned int *timeout = nf_ct_timeout_lookup(ct); 99 + 100 + if (!timeout) 101 + timeout = icmpv6_get_timeouts(nf_ct_net(ct)); 102 + 100 103 /* Do not immediately delete the connection after the first 101 104 successful reply to avoid excessive conntrackd traffic 102 105 and also to handle correctly ICMP echo reply duplicates. */ ··· 111 106 112 107 /* Called when a new connection for this protocol found. */ 113 108 static bool icmpv6_new(struct nf_conn *ct, const struct sk_buff *skb, 114 - unsigned int dataoff, unsigned int *timeouts) 109 + unsigned int dataoff) 115 110 { 116 111 static const u_int8_t valid_new[] = { 117 112 [ICMPV6_ECHO_REQUEST - 128] = 1, ··· 157 152 158 153 /* Ordinarily, we'd expect the inverted tupleproto, but it's 159 154 been preserved inside the ICMP. */ 160 - if (!nf_ct_invert_tuple(&intuple, &origtuple, 161 - &nf_conntrack_l3proto_ipv6, inproto)) { 155 + if (!nf_ct_invert_tuple(&intuple, &origtuple, inproto)) { 162 156 pr_debug("icmpv6_error: Can't invert tuple\n"); 163 157 return -NF_ACCEPT; 164 158 } ··· 285 281 unsigned int *timeout = data; 286 282 struct nf_icmp_net *in = icmpv6_pernet(net); 287 283 284 + if (!timeout) 285 + timeout = icmpv6_get_timeouts(net); 288 286 if (tb[CTA_TIMEOUT_ICMPV6_TIMEOUT]) { 289 287 *timeout = 290 288 ntohl(nla_get_be32(tb[CTA_TIMEOUT_ICMPV6_TIMEOUT])) * HZ; ··· 365 359 .pkt_to_tuple = icmpv6_pkt_to_tuple, 366 360 .invert_tuple = icmpv6_invert_tuple, 367 361 .packet = icmpv6_packet, 368 - .get_timeouts = icmpv6_get_timeouts, 369 362 .new = icmpv6_new, 370 363 .error = icmpv6_error, 371 364 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
+12 -5
net/ipv6/netfilter/nf_conntrack_reasm.c
··· 33 33 34 34 #include <net/sock.h> 35 35 #include <net/snmp.h> 36 - #include <net/inet_frag.h> 36 + #include <net/ipv6_frag.h> 37 37 38 - #include <net/ipv6.h> 39 38 #include <net/protocol.h> 40 39 #include <net/transp_v6.h> 41 40 #include <net/rawv6.h> ··· 150 151 fq = container_of(frag, struct frag_queue, q); 151 152 net = container_of(fq->q.net, struct net, nf_frag.frags); 152 153 153 - ip6_expire_frag_queue(net, fq); 154 + ip6frag_expire_frag_queue(net, fq); 154 155 } 155 156 156 157 /* Creation primitives. */ ··· 623 624 .exit = nf_ct_net_exit, 624 625 }; 625 626 627 + static const struct rhashtable_params nfct_rhash_params = { 628 + .head_offset = offsetof(struct inet_frag_queue, node), 629 + .hashfn = ip6frag_key_hashfn, 630 + .obj_hashfn = ip6frag_obj_hashfn, 631 + .obj_cmpfn = ip6frag_obj_cmpfn, 632 + .automatic_shrinking = true, 633 + }; 634 + 626 635 int nf_ct_frag6_init(void) 627 636 { 628 637 int ret = 0; 629 638 630 - nf_frags.constructor = ip6_frag_init; 639 + nf_frags.constructor = ip6frag_init; 631 640 nf_frags.destructor = NULL; 632 641 nf_frags.qsize = sizeof(struct frag_queue); 633 642 nf_frags.frag_expire = nf_ct_frag6_expire; 634 643 nf_frags.frags_cache_name = nf_frags_cache_name; 635 - nf_frags.rhash_params = ip6_rhash_params; 644 + nf_frags.rhash_params = nfct_rhash_params; 636 645 ret = inet_frags_init(&nf_frags); 637 646 if (ret) 638 647 goto out;
+1 -3
net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
··· 14 14 #include <linux/skbuff.h> 15 15 #include <linux/icmp.h> 16 16 #include <linux/sysctl.h> 17 - #include <net/ipv6.h> 18 - #include <net/inet_frag.h> 17 + #include <net/ipv6_frag.h> 19 18 20 19 #include <linux/netfilter_ipv6.h> 21 20 #include <linux/netfilter_bridge.h> ··· 22 23 #include <net/netfilter/nf_conntrack.h> 23 24 #include <net/netfilter/nf_conntrack_helper.h> 24 25 #include <net/netfilter/nf_conntrack_l4proto.h> 25 - #include <net/netfilter/nf_conntrack_l3proto.h> 26 26 #include <net/netfilter/nf_conntrack_core.h> 27 27 #include <net/netfilter/ipv6/nf_conntrack_ipv6.h> 28 28 #endif
+7 -85
net/ipv6/reassembly.c
··· 57 57 #include <net/rawv6.h> 58 58 #include <net/ndisc.h> 59 59 #include <net/addrconf.h> 60 - #include <net/inet_frag.h> 60 + #include <net/ipv6_frag.h> 61 61 #include <net/inet_ecn.h> 62 62 63 63 static const char ip6_frag_cache_name[] = "ip6-frags"; ··· 72 72 static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, 73 73 struct net_device *dev); 74 74 75 - void ip6_frag_init(struct inet_frag_queue *q, const void *a) 76 - { 77 - struct frag_queue *fq = container_of(q, struct frag_queue, q); 78 - const struct frag_v6_compare_key *key = a; 79 - 80 - q->key.v6 = *key; 81 - fq->ecn = 0; 82 - } 83 - EXPORT_SYMBOL(ip6_frag_init); 84 - 85 - void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq) 86 - { 87 - struct net_device *dev = NULL; 88 - struct sk_buff *head; 89 - 90 - rcu_read_lock(); 91 - spin_lock(&fq->q.lock); 92 - 93 - if (fq->q.flags & INET_FRAG_COMPLETE) 94 - goto out; 95 - 96 - inet_frag_kill(&fq->q); 97 - 98 - dev = dev_get_by_index_rcu(net, fq->iif); 99 - if (!dev) 100 - goto out; 101 - 102 - __IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS); 103 - __IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMTIMEOUT); 104 - 105 - /* Don't send error if the first segment did not arrive. */ 106 - head = fq->q.fragments; 107 - if (!(fq->q.flags & INET_FRAG_FIRST_IN) || !head) 108 - goto out; 109 - 110 - /* But use as source device on which LAST ARRIVED 111 - * segment was received. And do not use fq->dev 112 - * pointer directly, device might already disappeared. 113 - */ 114 - head->dev = dev; 115 - skb_get(head); 116 - spin_unlock(&fq->q.lock); 117 - 118 - icmpv6_send(head, ICMPV6_TIME_EXCEED, ICMPV6_EXC_FRAGTIME, 0); 119 - kfree_skb(head); 120 - goto out_rcu_unlock; 121 - 122 - out: 123 - spin_unlock(&fq->q.lock); 124 - out_rcu_unlock: 125 - rcu_read_unlock(); 126 - inet_frag_put(&fq->q); 127 - } 128 - EXPORT_SYMBOL(ip6_expire_frag_queue); 129 - 130 75 static void ip6_frag_expire(struct timer_list *t) 131 76 { 132 77 struct inet_frag_queue *frag = from_timer(frag, t, timer); ··· 81 136 fq = container_of(frag, struct frag_queue, q); 82 137 net = container_of(fq->q.net, struct net, ipv6.frags); 83 138 84 - ip6_expire_frag_queue(net, fq); 139 + ip6frag_expire_frag_queue(net, fq); 85 140 } 86 141 87 142 static struct frag_queue * ··· 641 696 .exit = ipv6_frags_exit_net, 642 697 }; 643 698 644 - static u32 ip6_key_hashfn(const void *data, u32 len, u32 seed) 645 - { 646 - return jhash2(data, 647 - sizeof(struct frag_v6_compare_key) / sizeof(u32), seed); 648 - } 649 - 650 - static u32 ip6_obj_hashfn(const void *data, u32 len, u32 seed) 651 - { 652 - const struct inet_frag_queue *fq = data; 653 - 654 - return jhash2((const u32 *)&fq->key.v6, 655 - sizeof(struct frag_v6_compare_key) / sizeof(u32), seed); 656 - } 657 - 658 - static int ip6_obj_cmpfn(struct rhashtable_compare_arg *arg, const void *ptr) 659 - { 660 - const struct frag_v6_compare_key *key = arg->key; 661 - const struct inet_frag_queue *fq = ptr; 662 - 663 - return !!memcmp(&fq->key, key, sizeof(*key)); 664 - } 665 - 666 - const struct rhashtable_params ip6_rhash_params = { 699 + static const struct rhashtable_params ip6_rhash_params = { 667 700 .head_offset = offsetof(struct inet_frag_queue, node), 668 - .hashfn = ip6_key_hashfn, 669 - .obj_hashfn = ip6_obj_hashfn, 670 - .obj_cmpfn = ip6_obj_cmpfn, 701 + .hashfn = ip6frag_key_hashfn, 702 + .obj_hashfn = ip6frag_obj_hashfn, 703 + .obj_cmpfn = ip6frag_obj_cmpfn, 671 704 .automatic_shrinking = true, 672 705 }; 673 - EXPORT_SYMBOL(ip6_rhash_params); 674 706 675 707 int __init ipv6_frag_init(void) 676 708 { 677 709 int ret; 678 710 679 - ip6_frags.constructor = ip6_frag_init; 711 + ip6_frags.constructor = ip6frag_init; 680 712 ip6_frags.destructor = NULL; 681 713 ip6_frags.qsize = sizeof(struct frag_queue); 682 714 ip6_frags.frag_expire = ip6_frag_expire;
+7 -5
net/netfilter/Kconfig
··· 49 49 config NF_CONNTRACK 50 50 tristate "Netfilter connection tracking support" 51 51 default m if NETFILTER_ADVANCED=n 52 + select NF_DEFRAG_IPV4 53 + select NF_DEFRAG_IPV6 if IPV6 != n 52 54 help 53 55 Connection tracking keeps a record of what packets have passed 54 56 through your machine, in order to figure out how they are related ··· 617 615 tristate "Netfilter nf_tables socket match support" 618 616 depends on IPV6 || IPV6=n 619 617 select NF_SOCKET_IPV4 620 - select NF_SOCKET_IPV6 if IPV6 618 + select NF_SOCKET_IPV6 if NF_TABLES_IPV6 621 619 help 622 620 This option allows matching for the presence or absence of a 623 621 corresponding socket and its attributes. ··· 883 881 tristate "LOG target support" 884 882 select NF_LOG_COMMON 885 883 select NF_LOG_IPV4 886 - select NF_LOG_IPV6 if IPV6 884 + select NF_LOG_IPV6 if IP6_NF_IPTABLES 887 885 default m if NETFILTER_ADVANCED=n 888 886 help 889 887 This option adds a `LOG' target, which allows you to create rules in ··· 975 973 depends on IPV6 || IPV6=n 976 974 depends on !NF_CONNTRACK || NF_CONNTRACK 977 975 select NF_DUP_IPV4 978 - select NF_DUP_IPV6 if IPV6 976 + select NF_DUP_IPV6 if IP6_NF_IPTABLES 979 977 ---help--- 980 978 This option adds a "TEE" target with which a packet can be cloned and 981 979 this clone be rerouted to another nexthop. ··· 1483 1481 depends on NETFILTER_ADVANCED 1484 1482 depends on IPV6 || IPV6=n 1485 1483 depends on IP6_NF_IPTABLES || IP6_NF_IPTABLES=n 1486 - depends on NF_SOCKET_IPV4 1487 - depends on NF_SOCKET_IPV6 1484 + select NF_SOCKET_IPV4 1485 + select NF_SOCKET_IPV6 if IP6_NF_IPTABLES 1488 1486 select NF_DEFRAG_IPV4 1489 1487 select NF_DEFRAG_IPV6 if IP6_NF_IPTABLES != n 1490 1488 help
+6 -1
net/netfilter/Makefile
··· 1 1 # SPDX-License-Identifier: GPL-2.0 2 2 netfilter-objs := core.o nf_log.o nf_queue.o nf_sockopt.o utils.o 3 3 4 - nf_conntrack-y := nf_conntrack_core.o nf_conntrack_standalone.o nf_conntrack_expect.o nf_conntrack_helper.o nf_conntrack_proto.o nf_conntrack_l3proto_generic.o nf_conntrack_proto_generic.o nf_conntrack_proto_tcp.o nf_conntrack_proto_udp.o nf_conntrack_extend.o nf_conntrack_acct.o nf_conntrack_seqadj.o 4 + nf_conntrack-y := nf_conntrack_core.o nf_conntrack_standalone.o nf_conntrack_expect.o nf_conntrack_helper.o \ 5 + nf_conntrack_proto.o nf_conntrack_proto_generic.o nf_conntrack_proto_tcp.o nf_conntrack_proto_udp.o \ 6 + nf_conntrack_proto_icmp.o \ 7 + nf_conntrack_extend.o nf_conntrack_acct.o nf_conntrack_seqadj.o 8 + 9 + nf_conntrack-$(subst m,y,$(CONFIG_IPV6)) += nf_conntrack_proto_icmpv6.o 5 10 nf_conntrack-$(CONFIG_NF_CONNTRACK_TIMEOUT) += nf_conntrack_timeout.o 6 11 nf_conntrack-$(CONFIG_NF_CONNTRACK_TIMESTAMP) += nf_conntrack_timestamp.o 7 12 nf_conntrack-$(CONFIG_NF_CONNTRACK_EVENTS) += nf_conntrack_ecache.o
+43 -24
net/netfilter/ipvs/ip_vs_conn.c
··· 825 825 826 826 /* Unlink conn if not referenced anymore */ 827 827 if (likely(ip_vs_conn_unlink(cp))) { 828 + struct ip_vs_conn *ct = cp->control; 829 + 828 830 /* delete the timer if it is activated by other users */ 829 831 del_timer(&cp->timer); 830 832 831 833 /* does anybody control me? */ 832 - if (cp->control) 834 + if (ct) { 833 835 ip_vs_control_del(cp); 836 + /* Drop CTL or non-assured TPL if not used anymore */ 837 + if (!cp->timeout && !atomic_read(&ct->n_control) && 838 + (!(ct->flags & IP_VS_CONN_F_TEMPLATE) || 839 + !(ct->state & IP_VS_CTPL_S_ASSURED))) { 840 + IP_VS_DBG(4, "drop controlling connection\n"); 841 + ct->timeout = 0; 842 + ip_vs_conn_expire_now(ct); 843 + } 844 + } 834 845 835 846 if ((cp->flags & IP_VS_CONN_F_NFCT) && 836 847 !(cp->flags & IP_VS_CONN_F_ONE_PACKET)) { ··· 883 872 884 873 /* Modify timer, so that it expires as soon as possible. 885 874 * Can be called without reference only if under RCU lock. 875 + * We can have such chain of conns linked with ->control: DATA->CTL->TPL 876 + * - DATA (eg. FTP) and TPL (persistence) can be present depending on setup 877 + * - cp->timeout=0 indicates all conns from chain should be dropped but 878 + * TPL is not dropped if in assured state 886 879 */ 887 880 void ip_vs_conn_expire_now(struct ip_vs_conn *cp) 888 881 { ··· 1122 1107 &cp->caddr.in6, ntohs(cp->cport), 1123 1108 &cp->vaddr.in6, ntohs(cp->vport), 1124 1109 dbuf, ntohs(cp->dport), 1125 - ip_vs_state_name(cp->protocol, cp->state), 1110 + ip_vs_state_name(cp), 1126 1111 (cp->timer.expires-jiffies)/HZ, pe_data); 1127 1112 else 1128 1113 #endif ··· 1133 1118 ntohl(cp->caddr.ip), ntohs(cp->cport), 1134 1119 ntohl(cp->vaddr.ip), ntohs(cp->vport), 1135 1120 dbuf, ntohs(cp->dport), 1136 - ip_vs_state_name(cp->protocol, cp->state), 1121 + ip_vs_state_name(cp), 1137 1122 (cp->timer.expires-jiffies)/HZ, pe_data); 1138 1123 } 1139 1124 return 0; ··· 1184 1169 &cp->caddr.in6, ntohs(cp->cport), 1185 1170 &cp->vaddr.in6, ntohs(cp->vport), 1186 1171 dbuf, ntohs(cp->dport), 1187 - ip_vs_state_name(cp->protocol, cp->state), 1172 + ip_vs_state_name(cp), 1188 1173 ip_vs_origin_name(cp->flags), 1189 1174 (cp->timer.expires-jiffies)/HZ); 1190 1175 else ··· 1196 1181 ntohl(cp->caddr.ip), ntohs(cp->cport), 1197 1182 ntohl(cp->vaddr.ip), ntohs(cp->vport), 1198 1183 dbuf, ntohs(cp->dport), 1199 - ip_vs_state_name(cp->protocol, cp->state), 1184 + ip_vs_state_name(cp), 1200 1185 ip_vs_origin_name(cp->flags), 1201 1186 (cp->timer.expires-jiffies)/HZ); 1202 1187 } ··· 1212 1197 #endif 1213 1198 1214 1199 1215 - /* 1216 - * Randomly drop connection entries before running out of memory 1200 + /* Randomly drop connection entries before running out of memory 1201 + * Can be used for DATA and CTL conns. For TPL conns there are exceptions: 1202 + * - traffic for services in OPS mode increases ct->in_pkts, so it is supported 1203 + * - traffic for services not in OPS mode does not increase ct->in_pkts in 1204 + * all cases, so it is not supported 1217 1205 */ 1218 1206 static inline int todrop_entry(struct ip_vs_conn *cp) 1219 1207 { ··· 1260 1242 void ip_vs_random_dropentry(struct netns_ipvs *ipvs) 1261 1243 { 1262 1244 int idx; 1263 - struct ip_vs_conn *cp, *cp_c; 1245 + struct ip_vs_conn *cp; 1264 1246 1265 1247 rcu_read_lock(); 1266 1248 /* ··· 1272 1254 hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[hash], c_list) { 1273 1255 if (cp->ipvs != ipvs) 1274 1256 continue; 1257 + if (atomic_read(&cp->n_control)) 1258 + continue; 1275 1259 if (cp->flags & IP_VS_CONN_F_TEMPLATE) { 1276 - if (atomic_read(&cp->n_control) || 1277 - !ip_vs_conn_ops_mode(cp)) 1278 - continue; 1279 - else 1280 - /* connection template of OPS */ 1260 + /* connection template of OPS */ 1261 + if (ip_vs_conn_ops_mode(cp)) 1281 1262 goto try_drop; 1263 + if (!(cp->state & IP_VS_CTPL_S_ASSURED)) 1264 + goto drop; 1265 + continue; 1282 1266 } 1283 1267 if (cp->protocol == IPPROTO_TCP) { 1284 1268 switch(cp->state) { ··· 1314 1294 continue; 1315 1295 } 1316 1296 1317 - IP_VS_DBG(4, "del connection\n"); 1297 + drop: 1298 + IP_VS_DBG(4, "drop connection\n"); 1299 + cp->timeout = 0; 1318 1300 ip_vs_conn_expire_now(cp); 1319 - cp_c = cp->control; 1320 - /* cp->control is valid only with reference to cp */ 1321 - if (cp_c && __ip_vs_conn_get(cp)) { 1322 - IP_VS_DBG(4, "del conn template\n"); 1323 - ip_vs_conn_expire_now(cp_c); 1324 - __ip_vs_conn_put(cp); 1325 - } 1326 1301 } 1327 1302 cond_resched_rcu(); 1328 1303 } ··· 1340 1325 hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[idx], c_list) { 1341 1326 if (cp->ipvs != ipvs) 1342 1327 continue; 1343 - IP_VS_DBG(4, "del connection\n"); 1344 - ip_vs_conn_expire_now(cp); 1328 + /* As timers are expired in LIFO order, restart 1329 + * the timer of controlling connection first, so 1330 + * that it is expired after us. 1331 + */ 1345 1332 cp_c = cp->control; 1346 1333 /* cp->control is valid only with reference to cp */ 1347 1334 if (cp_c && __ip_vs_conn_get(cp)) { 1348 - IP_VS_DBG(4, "del conn template\n"); 1335 + IP_VS_DBG(4, "del controlling connection\n"); 1349 1336 ip_vs_conn_expire_now(cp_c); 1350 1337 __ip_vs_conn_put(cp); 1351 1338 } 1339 + IP_VS_DBG(4, "del connection\n"); 1340 + ip_vs_conn_expire_now(cp); 1352 1341 } 1353 1342 cond_resched_rcu(); 1354 1343 }
+16 -3
net/netfilter/ipvs/ip_vs_proto.c
··· 42 42 43 43 static struct ip_vs_protocol *ip_vs_proto_table[IP_VS_PROTO_TAB_SIZE]; 44 44 45 + /* States for conn templates: NONE or words separated with ",", max 15 chars */ 46 + static const char *ip_vs_ctpl_state_name_table[IP_VS_CTPL_S_LAST] = { 47 + [IP_VS_CTPL_S_NONE] = "NONE", 48 + [IP_VS_CTPL_S_ASSURED] = "ASSURED", 49 + }; 45 50 46 51 /* 47 52 * register an ipvs protocol ··· 198 193 } 199 194 200 195 201 - const char * ip_vs_state_name(__u16 proto, int state) 196 + const char *ip_vs_state_name(const struct ip_vs_conn *cp) 202 197 { 203 - struct ip_vs_protocol *pp = ip_vs_proto_get(proto); 198 + unsigned int state = cp->state; 199 + struct ip_vs_protocol *pp; 204 200 201 + if (cp->flags & IP_VS_CONN_F_TEMPLATE) { 202 + 203 + if (state >= IP_VS_CTPL_S_LAST) 204 + return "ERR!"; 205 + return ip_vs_ctpl_state_name_table[state] ? : "?"; 206 + } 207 + pp = ip_vs_proto_get(cp->protocol); 205 208 if (pp == NULL || pp->state_name == NULL) 206 - return (IPPROTO_IP == proto) ? "NONE" : "ERR!"; 209 + return (cp->protocol == IPPROTO_IP) ? "NONE" : "ERR!"; 207 210 return pp->state_name(state); 208 211 } 209 212
+2
net/netfilter/ipvs/ip_vs_proto_sctp.c
··· 461 461 cp->flags &= ~IP_VS_CONN_F_INACTIVE; 462 462 } 463 463 } 464 + if (next_state == IP_VS_SCTP_S_ESTABLISHED) 465 + ip_vs_control_assure_ct(cp); 464 466 } 465 467 if (likely(pd)) 466 468 cp->timeout = pd->timeout_table[cp->state = next_state];
+2
net/netfilter/ipvs/ip_vs_proto_tcp.c
··· 569 569 cp->flags &= ~IP_VS_CONN_F_INACTIVE; 570 570 } 571 571 } 572 + if (new_state == IP_VS_TCP_S_ESTABLISHED) 573 + ip_vs_control_assure_ct(cp); 572 574 } 573 575 574 576 if (likely(pd))
+2
net/netfilter/ipvs/ip_vs_proto_udp.c
··· 460 460 } 461 461 462 462 cp->timeout = pd->timeout_table[IP_VS_UDP_S_NORMAL]; 463 + if (direction == IP_VS_DIR_OUTPUT) 464 + ip_vs_control_assure_ct(cp); 463 465 } 464 466 465 467 static int __udp_init(struct netns_ipvs *ipvs, struct ip_vs_proto_data *pd)
+6 -12
net/netfilter/ipvs/ip_vs_sync.c
··· 1003 1003 continue; 1004 1004 } 1005 1005 } else { 1006 - /* protocol in templates is not used for state/timeout */ 1007 - if (state > 0) { 1008 - IP_VS_DBG(2, "BACKUP v0, Invalid template state %u\n", 1009 - state); 1010 - state = 0; 1011 - } 1006 + if (state >= IP_VS_CTPL_S_LAST) 1007 + IP_VS_DBG(7, "BACKUP v0, Invalid tpl state %u\n", 1008 + state); 1012 1009 } 1013 1010 1014 1011 ip_vs_conn_fill_param(ipvs, AF_INET, s->protocol, ··· 1163 1166 goto out; 1164 1167 } 1165 1168 } else { 1166 - /* protocol in templates is not used for state/timeout */ 1167 - if (state > 0) { 1168 - IP_VS_DBG(3, "BACKUP, Invalid template state %u\n", 1169 - state); 1170 - state = 0; 1171 - } 1169 + if (state >= IP_VS_CTPL_S_LAST) 1170 + IP_VS_DBG(7, "BACKUP, Invalid tpl state %u\n", 1171 + state); 1172 1172 } 1173 1173 if (ip_vs_conn_fill_param_sync(ipvs, af, s, &param, pe_data, 1174 1174 pe_data_len, pe_name, pe_name_len)) {
+300 -88
net/netfilter/nf_conncount.c
··· 44 44 45 45 /* we will save the tuples of all connections we care about */ 46 46 struct nf_conncount_tuple { 47 - struct hlist_node node; 47 + struct list_head node; 48 48 struct nf_conntrack_tuple tuple; 49 49 struct nf_conntrack_zone zone; 50 50 int cpu; 51 51 u32 jiffies32; 52 + struct rcu_head rcu_head; 52 53 }; 53 54 54 55 struct nf_conncount_rb { 55 56 struct rb_node node; 56 - struct hlist_head hhead; /* connections/hosts in same subnet */ 57 + struct nf_conncount_list list; 57 58 u32 key[MAX_KEYLEN]; 59 + struct rcu_head rcu_head; 58 60 }; 59 61 60 62 static spinlock_t nf_conncount_locks[CONNCOUNT_LOCK_SLOTS] __cacheline_aligned_in_smp; ··· 64 62 struct nf_conncount_data { 65 63 unsigned int keylen; 66 64 struct rb_root root[CONNCOUNT_SLOTS]; 65 + struct net *net; 66 + struct work_struct gc_work; 67 + unsigned long pending_trees[BITS_TO_LONGS(CONNCOUNT_SLOTS)]; 68 + unsigned int gc_tree; 67 69 }; 68 70 69 71 static u_int32_t conncount_rnd __read_mostly; ··· 88 82 return memcmp(a, b, klen * sizeof(u32)); 89 83 } 90 84 91 - bool nf_conncount_add(struct hlist_head *head, 92 - const struct nf_conntrack_tuple *tuple, 93 - const struct nf_conntrack_zone *zone) 85 + enum nf_conncount_list_add 86 + nf_conncount_add(struct nf_conncount_list *list, 87 + const struct nf_conntrack_tuple *tuple, 88 + const struct nf_conntrack_zone *zone) 94 89 { 95 90 struct nf_conncount_tuple *conn; 96 91 92 + if (WARN_ON_ONCE(list->count > INT_MAX)) 93 + return NF_CONNCOUNT_ERR; 94 + 97 95 conn = kmem_cache_alloc(conncount_conn_cachep, GFP_ATOMIC); 98 96 if (conn == NULL) 99 - return false; 97 + return NF_CONNCOUNT_ERR; 98 + 100 99 conn->tuple = *tuple; 101 100 conn->zone = *zone; 102 101 conn->cpu = raw_smp_processor_id(); 103 102 conn->jiffies32 = (u32)jiffies; 104 - hlist_add_head(&conn->node, head); 105 - return true; 103 + spin_lock(&list->list_lock); 104 + if (list->dead == true) { 105 + kmem_cache_free(conncount_conn_cachep, conn); 106 + spin_unlock(&list->list_lock); 107 + return NF_CONNCOUNT_SKIP; 108 + } 109 + list_add_tail(&conn->node, &list->head); 110 + list->count++; 111 + spin_unlock(&list->list_lock); 112 + return NF_CONNCOUNT_ADDED; 106 113 } 107 114 EXPORT_SYMBOL_GPL(nf_conncount_add); 108 115 116 + static void __conn_free(struct rcu_head *h) 117 + { 118 + struct nf_conncount_tuple *conn; 119 + 120 + conn = container_of(h, struct nf_conncount_tuple, rcu_head); 121 + kmem_cache_free(conncount_conn_cachep, conn); 122 + } 123 + 124 + static bool conn_free(struct nf_conncount_list *list, 125 + struct nf_conncount_tuple *conn) 126 + { 127 + bool free_entry = false; 128 + 129 + spin_lock(&list->list_lock); 130 + 131 + if (list->count == 0) { 132 + spin_unlock(&list->list_lock); 133 + return free_entry; 134 + } 135 + 136 + list->count--; 137 + list_del_rcu(&conn->node); 138 + if (list->count == 0) 139 + free_entry = true; 140 + 141 + spin_unlock(&list->list_lock); 142 + call_rcu(&conn->rcu_head, __conn_free); 143 + return free_entry; 144 + } 145 + 109 146 static const struct nf_conntrack_tuple_hash * 110 - find_or_evict(struct net *net, struct nf_conncount_tuple *conn) 147 + find_or_evict(struct net *net, struct nf_conncount_list *list, 148 + struct nf_conncount_tuple *conn, bool *free_entry) 111 149 { 112 150 const struct nf_conntrack_tuple_hash *found; 113 151 unsigned long a, b; ··· 171 121 */ 172 122 age = a - b; 173 123 if (conn->cpu == cpu || age >= 2) { 174 - hlist_del(&conn->node); 175 - kmem_cache_free(conncount_conn_cachep, conn); 124 + *free_entry = conn_free(list, conn); 176 125 return ERR_PTR(-ENOENT); 177 126 } 178 127 179 128 return ERR_PTR(-EAGAIN); 180 129 } 181 130 182 - unsigned int nf_conncount_lookup(struct net *net, struct hlist_head *head, 183 - const struct nf_conntrack_tuple *tuple, 184 - const struct nf_conntrack_zone *zone, 185 - bool *addit) 131 + void nf_conncount_lookup(struct net *net, 132 + struct nf_conncount_list *list, 133 + const struct nf_conntrack_tuple *tuple, 134 + const struct nf_conntrack_zone *zone, 135 + bool *addit) 186 136 { 187 137 const struct nf_conntrack_tuple_hash *found; 188 - struct nf_conncount_tuple *conn; 138 + struct nf_conncount_tuple *conn, *conn_n; 189 139 struct nf_conn *found_ct; 190 - struct hlist_node *n; 191 - unsigned int length = 0; 140 + unsigned int collect = 0; 141 + bool free_entry = false; 192 142 143 + /* best effort only */ 193 144 *addit = tuple ? true : false; 194 145 195 146 /* check the saved connections */ 196 - hlist_for_each_entry_safe(conn, n, head, node) { 197 - found = find_or_evict(net, conn); 147 + list_for_each_entry_safe(conn, conn_n, &list->head, node) { 148 + if (collect > CONNCOUNT_GC_MAX_NODES) 149 + break; 150 + 151 + found = find_or_evict(net, list, conn, &free_entry); 198 152 if (IS_ERR(found)) { 199 153 /* Not found, but might be about to be confirmed */ 200 154 if (PTR_ERR(found) == -EAGAIN) { 201 - length++; 202 155 if (!tuple) 203 156 continue; 204 157 ··· 209 156 nf_ct_zone_id(&conn->zone, conn->zone.dir) == 210 157 nf_ct_zone_id(zone, zone->dir)) 211 158 *addit = false; 212 - } 159 + } else if (PTR_ERR(found) == -ENOENT) 160 + collect++; 213 161 continue; 214 162 } 215 163 ··· 219 165 if (tuple && nf_ct_tuple_equal(&conn->tuple, tuple) && 220 166 nf_ct_zone_equal(found_ct, zone, zone->dir)) { 221 167 /* 222 - * Just to be sure we have it only once in the list. 223 168 * We should not see tuples twice unless someone hooks 224 169 * this into a table without "-p tcp --syn". 170 + * 171 + * Attempt to avoid a re-add in this case. 225 172 */ 226 173 *addit = false; 227 174 } else if (already_closed(found_ct)) { ··· 231 176 * closed already -> ditch it 232 177 */ 233 178 nf_ct_put(found_ct); 234 - hlist_del(&conn->node); 235 - kmem_cache_free(conncount_conn_cachep, conn); 179 + conn_free(list, conn); 180 + collect++; 236 181 continue; 237 182 } 238 183 239 184 nf_ct_put(found_ct); 240 - length++; 241 185 } 242 - 243 - return length; 244 186 } 245 187 EXPORT_SYMBOL_GPL(nf_conncount_lookup); 188 + 189 + void nf_conncount_list_init(struct nf_conncount_list *list) 190 + { 191 + spin_lock_init(&list->list_lock); 192 + INIT_LIST_HEAD(&list->head); 193 + list->count = 1; 194 + list->dead = false; 195 + } 196 + EXPORT_SYMBOL_GPL(nf_conncount_list_init); 197 + 198 + /* Return true if the list is empty */ 199 + bool nf_conncount_gc_list(struct net *net, 200 + struct nf_conncount_list *list) 201 + { 202 + const struct nf_conntrack_tuple_hash *found; 203 + struct nf_conncount_tuple *conn, *conn_n; 204 + struct nf_conn *found_ct; 205 + unsigned int collected = 0; 206 + bool free_entry = false; 207 + 208 + list_for_each_entry_safe(conn, conn_n, &list->head, node) { 209 + found = find_or_evict(net, list, conn, &free_entry); 210 + if (IS_ERR(found)) { 211 + if (PTR_ERR(found) == -ENOENT) { 212 + if (free_entry) 213 + return true; 214 + collected++; 215 + } 216 + continue; 217 + } 218 + 219 + found_ct = nf_ct_tuplehash_to_ctrack(found); 220 + if (already_closed(found_ct)) { 221 + /* 222 + * we do not care about connections which are 223 + * closed already -> ditch it 224 + */ 225 + nf_ct_put(found_ct); 226 + if (conn_free(list, conn)) 227 + return true; 228 + collected++; 229 + continue; 230 + } 231 + 232 + nf_ct_put(found_ct); 233 + if (collected > CONNCOUNT_GC_MAX_NODES) 234 + return false; 235 + } 236 + return false; 237 + } 238 + EXPORT_SYMBOL_GPL(nf_conncount_gc_list); 239 + 240 + static void __tree_nodes_free(struct rcu_head *h) 241 + { 242 + struct nf_conncount_rb *rbconn; 243 + 244 + rbconn = container_of(h, struct nf_conncount_rb, rcu_head); 245 + kmem_cache_free(conncount_rb_cachep, rbconn); 246 + } 246 247 247 248 static void tree_nodes_free(struct rb_root *root, 248 249 struct nf_conncount_rb *gc_nodes[], ··· 308 197 309 198 while (gc_count) { 310 199 rbconn = gc_nodes[--gc_count]; 311 - rb_erase(&rbconn->node, root); 312 - kmem_cache_free(conncount_rb_cachep, rbconn); 200 + spin_lock(&rbconn->list.list_lock); 201 + if (rbconn->list.count == 0 && rbconn->list.dead == false) { 202 + rbconn->list.dead = true; 203 + rb_erase(&rbconn->node, root); 204 + call_rcu(&rbconn->rcu_head, __tree_nodes_free); 205 + } 206 + spin_unlock(&rbconn->list.list_lock); 313 207 } 314 208 } 315 209 316 - static unsigned int 317 - count_tree(struct net *net, struct rb_root *root, 318 - const u32 *key, u8 keylen, 319 - const struct nf_conntrack_tuple *tuple, 320 - const struct nf_conntrack_zone *zone) 210 + static void schedule_gc_worker(struct nf_conncount_data *data, int tree) 321 211 { 212 + set_bit(tree, data->pending_trees); 213 + schedule_work(&data->gc_work); 214 + } 215 + 216 + static unsigned int 217 + insert_tree(struct net *net, 218 + struct nf_conncount_data *data, 219 + struct rb_root *root, 220 + unsigned int hash, 221 + const u32 *key, 222 + u8 keylen, 223 + const struct nf_conntrack_tuple *tuple, 224 + const struct nf_conntrack_zone *zone) 225 + { 226 + enum nf_conncount_list_add ret; 322 227 struct nf_conncount_rb *gc_nodes[CONNCOUNT_GC_MAX_NODES]; 323 228 struct rb_node **rbnode, *parent; 324 229 struct nf_conncount_rb *rbconn; 325 230 struct nf_conncount_tuple *conn; 326 - unsigned int gc_count; 327 - bool no_gc = false; 231 + unsigned int count = 0, gc_count = 0; 232 + bool node_found = false; 328 233 329 - restart: 330 - gc_count = 0; 234 + spin_lock_bh(&nf_conncount_locks[hash % CONNCOUNT_LOCK_SLOTS]); 235 + 331 236 parent = NULL; 332 237 rbnode = &(root->rb_node); 333 238 while (*rbnode) { 334 239 int diff; 335 - bool addit; 336 - 337 240 rbconn = rb_entry(*rbnode, struct nf_conncount_rb, node); 338 241 339 242 parent = *rbnode; ··· 357 232 } else if (diff > 0) { 358 233 rbnode = &((*rbnode)->rb_right); 359 234 } else { 360 - /* same source network -> be counted! */ 361 - unsigned int count; 362 - 363 - count = nf_conncount_lookup(net, &rbconn->hhead, tuple, 364 - zone, &addit); 365 - 366 - tree_nodes_free(root, gc_nodes, gc_count); 367 - if (!addit) 368 - return count; 369 - 370 - if (!nf_conncount_add(&rbconn->hhead, tuple, zone)) 371 - return 0; /* hotdrop */ 372 - 373 - return count + 1; 235 + /* unlikely: other cpu added node already */ 236 + node_found = true; 237 + ret = nf_conncount_add(&rbconn->list, tuple, zone); 238 + if (ret == NF_CONNCOUNT_ERR) { 239 + count = 0; /* hotdrop */ 240 + } else if (ret == NF_CONNCOUNT_ADDED) { 241 + count = rbconn->list.count; 242 + } else { 243 + /* NF_CONNCOUNT_SKIP, rbconn is already 244 + * reclaimed by gc, insert a new tree node 245 + */ 246 + node_found = false; 247 + } 248 + break; 374 249 } 375 250 376 - if (no_gc || gc_count >= ARRAY_SIZE(gc_nodes)) 251 + if (gc_count >= ARRAY_SIZE(gc_nodes)) 377 252 continue; 378 253 379 - /* only used for GC on hhead, retval and 'addit' ignored */ 380 - nf_conncount_lookup(net, &rbconn->hhead, tuple, zone, &addit); 381 - if (hlist_empty(&rbconn->hhead)) 254 + if (nf_conncount_gc_list(net, &rbconn->list)) 382 255 gc_nodes[gc_count++] = rbconn; 383 256 } 384 257 385 258 if (gc_count) { 386 - no_gc = true; 387 259 tree_nodes_free(root, gc_nodes, gc_count); 388 260 /* tree_node_free before new allocation permits 389 261 * allocator to re-use newly free'd object. ··· 388 266 * This is a rare event; in most cases we will find 389 267 * existing node to re-use. (or gc_count is 0). 390 268 */ 391 - goto restart; 269 + 270 + if (gc_count >= ARRAY_SIZE(gc_nodes)) 271 + schedule_gc_worker(data, hash); 392 272 } 393 273 394 - if (!tuple) 395 - return 0; 274 + if (node_found) 275 + goto out_unlock; 396 276 397 - /* no match, need to insert new node */ 277 + /* expected case: match, insert new node */ 398 278 rbconn = kmem_cache_alloc(conncount_rb_cachep, GFP_ATOMIC); 399 279 if (rbconn == NULL) 400 - return 0; 280 + goto out_unlock; 401 281 402 282 conn = kmem_cache_alloc(conncount_conn_cachep, GFP_ATOMIC); 403 283 if (conn == NULL) { 404 284 kmem_cache_free(conncount_rb_cachep, rbconn); 405 - return 0; 285 + goto out_unlock; 406 286 } 407 287 408 288 conn->tuple = *tuple; 409 289 conn->zone = *zone; 410 290 memcpy(rbconn->key, key, sizeof(u32) * keylen); 411 291 412 - INIT_HLIST_HEAD(&rbconn->hhead); 413 - hlist_add_head(&conn->node, &rbconn->hhead); 292 + nf_conncount_list_init(&rbconn->list); 293 + list_add(&conn->node, &rbconn->list.head); 294 + count = 1; 414 295 415 296 rb_link_node(&rbconn->node, parent, rbnode); 416 297 rb_insert_color(&rbconn->node, root); 417 - return 1; 298 + out_unlock: 299 + spin_unlock_bh(&nf_conncount_locks[hash % CONNCOUNT_LOCK_SLOTS]); 300 + return count; 301 + } 302 + 303 + static unsigned int 304 + count_tree(struct net *net, 305 + struct nf_conncount_data *data, 306 + const u32 *key, 307 + const struct nf_conntrack_tuple *tuple, 308 + const struct nf_conntrack_zone *zone) 309 + { 310 + enum nf_conncount_list_add ret; 311 + struct rb_root *root; 312 + struct rb_node *parent; 313 + struct nf_conncount_rb *rbconn; 314 + unsigned int hash; 315 + u8 keylen = data->keylen; 316 + 317 + hash = jhash2(key, data->keylen, conncount_rnd) % CONNCOUNT_SLOTS; 318 + root = &data->root[hash]; 319 + 320 + parent = rcu_dereference_raw(root->rb_node); 321 + while (parent) { 322 + int diff; 323 + bool addit; 324 + 325 + rbconn = rb_entry(parent, struct nf_conncount_rb, node); 326 + 327 + diff = key_diff(key, rbconn->key, keylen); 328 + if (diff < 0) { 329 + parent = rcu_dereference_raw(parent->rb_left); 330 + } else if (diff > 0) { 331 + parent = rcu_dereference_raw(parent->rb_right); 332 + } else { 333 + /* same source network -> be counted! */ 334 + nf_conncount_lookup(net, &rbconn->list, tuple, zone, 335 + &addit); 336 + 337 + if (!addit) 338 + return rbconn->list.count; 339 + 340 + ret = nf_conncount_add(&rbconn->list, tuple, zone); 341 + if (ret == NF_CONNCOUNT_ERR) { 342 + return 0; /* hotdrop */ 343 + } else if (ret == NF_CONNCOUNT_ADDED) { 344 + return rbconn->list.count; 345 + } else { 346 + /* NF_CONNCOUNT_SKIP, rbconn is already 347 + * reclaimed by gc, insert a new tree node 348 + */ 349 + break; 350 + } 351 + } 352 + } 353 + 354 + if (!tuple) 355 + return 0; 356 + 357 + return insert_tree(net, data, root, hash, key, keylen, tuple, zone); 358 + } 359 + 360 + static void tree_gc_worker(struct work_struct *work) 361 + { 362 + struct nf_conncount_data *data = container_of(work, struct nf_conncount_data, gc_work); 363 + struct nf_conncount_rb *gc_nodes[CONNCOUNT_GC_MAX_NODES], *rbconn; 364 + struct rb_root *root; 365 + struct rb_node *node; 366 + unsigned int tree, next_tree, gc_count = 0; 367 + 368 + tree = data->gc_tree % CONNCOUNT_LOCK_SLOTS; 369 + root = &data->root[tree]; 370 + 371 + rcu_read_lock(); 372 + for (node = rb_first(root); node != NULL; node = rb_next(node)) { 373 + rbconn = rb_entry(node, struct nf_conncount_rb, node); 374 + if (nf_conncount_gc_list(data->net, &rbconn->list)) 375 + gc_nodes[gc_count++] = rbconn; 376 + } 377 + rcu_read_unlock(); 378 + 379 + spin_lock_bh(&nf_conncount_locks[tree]); 380 + 381 + if (gc_count) { 382 + tree_nodes_free(root, gc_nodes, gc_count); 383 + } 384 + 385 + clear_bit(tree, data->pending_trees); 386 + 387 + next_tree = (tree + 1) % CONNCOUNT_SLOTS; 388 + next_tree = find_next_bit(data->pending_trees, next_tree, CONNCOUNT_SLOTS); 389 + 390 + if (next_tree < CONNCOUNT_SLOTS) { 391 + data->gc_tree = next_tree; 392 + schedule_work(work); 393 + } 394 + 395 + spin_unlock_bh(&nf_conncount_locks[tree]); 418 396 } 419 397 420 398 /* Count and return number of conntrack entries in 'net' with particular 'key'. 421 399 * If 'tuple' is not null, insert it into the accounting data structure. 400 + * Call with RCU read lock. 422 401 */ 423 402 unsigned int nf_conncount_count(struct net *net, 424 403 struct nf_conncount_data *data, ··· 527 304 const struct nf_conntrack_tuple *tuple, 528 305 const struct nf_conntrack_zone *zone) 529 306 { 530 - struct rb_root *root; 531 - int count; 532 - u32 hash; 533 - 534 - hash = jhash2(key, data->keylen, conncount_rnd) % CONNCOUNT_SLOTS; 535 - root = &data->root[hash]; 536 - 537 - spin_lock_bh(&nf_conncount_locks[hash % CONNCOUNT_LOCK_SLOTS]); 538 - 539 - count = count_tree(net, root, key, data->keylen, tuple, zone); 540 - 541 - spin_unlock_bh(&nf_conncount_locks[hash % CONNCOUNT_LOCK_SLOTS]); 542 - 543 - return count; 307 + return count_tree(net, data, key, tuple, zone); 544 308 } 545 309 EXPORT_SYMBOL_GPL(nf_conncount_count); 546 310 ··· 558 348 data->root[i] = RB_ROOT; 559 349 560 350 data->keylen = keylen / sizeof(u32); 351 + data->net = net; 352 + INIT_WORK(&data->gc_work, tree_gc_worker); 561 353 562 354 return data; 563 355 } 564 356 EXPORT_SYMBOL_GPL(nf_conncount_init); 565 357 566 - void nf_conncount_cache_free(struct hlist_head *hhead) 358 + void nf_conncount_cache_free(struct nf_conncount_list *list) 567 359 { 568 - struct nf_conncount_tuple *conn; 569 - struct hlist_node *n; 360 + struct nf_conncount_tuple *conn, *conn_n; 570 361 571 - hlist_for_each_entry_safe(conn, n, hhead, node) 362 + list_for_each_entry_safe(conn, conn_n, &list->head, node) 572 363 kmem_cache_free(conncount_conn_cachep, conn); 573 364 } 574 365 EXPORT_SYMBOL_GPL(nf_conncount_cache_free); ··· 584 373 585 374 rb_erase(node, r); 586 375 587 - nf_conncount_cache_free(&rbconn->hhead); 376 + nf_conncount_cache_free(&rbconn->list); 588 377 589 378 kmem_cache_free(conncount_rb_cachep, rbconn); 590 379 } ··· 595 384 { 596 385 unsigned int i; 597 386 387 + cancel_work_sync(&data->gc_work); 598 388 nf_ct_netns_put(net, family); 599 389 600 390 for (i = 0; i < ARRAY_SIZE(data->root); ++i)
+185 -67
net/netfilter/nf_conntrack_core.c
··· 37 37 #include <linux/rculist_nulls.h> 38 38 39 39 #include <net/netfilter/nf_conntrack.h> 40 - #include <net/netfilter/nf_conntrack_l3proto.h> 41 40 #include <net/netfilter/nf_conntrack_l4proto.h> 42 41 #include <net/netfilter/nf_conntrack_expect.h> 43 42 #include <net/netfilter/nf_conntrack_helper.h> ··· 54 55 #include <net/netfilter/nf_nat_core.h> 55 56 #include <net/netfilter/nf_nat_helper.h> 56 57 #include <net/netns/hash.h> 58 + #include <net/ip.h> 57 59 58 60 #include "nf_internals.h" 59 61 ··· 222 222 return scale_hash(hash_conntrack_raw(tuple, net)); 223 223 } 224 224 225 - bool 225 + static bool 226 226 nf_ct_get_tuple(const struct sk_buff *skb, 227 227 unsigned int nhoff, 228 228 unsigned int dataoff, ··· 230 230 u_int8_t protonum, 231 231 struct net *net, 232 232 struct nf_conntrack_tuple *tuple, 233 - const struct nf_conntrack_l3proto *l3proto, 234 233 const struct nf_conntrack_l4proto *l4proto) 235 234 { 235 + unsigned int size; 236 + const __be32 *ap; 237 + __be32 _addrs[8]; 238 + struct { 239 + __be16 sport; 240 + __be16 dport; 241 + } _inet_hdr, *inet_hdr; 242 + 236 243 memset(tuple, 0, sizeof(*tuple)); 237 244 238 245 tuple->src.l3num = l3num; 239 - if (l3proto->pkt_to_tuple(skb, nhoff, tuple) == 0) 246 + switch (l3num) { 247 + case NFPROTO_IPV4: 248 + nhoff += offsetof(struct iphdr, saddr); 249 + size = 2 * sizeof(__be32); 250 + break; 251 + case NFPROTO_IPV6: 252 + nhoff += offsetof(struct ipv6hdr, saddr); 253 + size = sizeof(_addrs); 254 + break; 255 + default: 256 + return true; 257 + } 258 + 259 + ap = skb_header_pointer(skb, nhoff, size, _addrs); 260 + if (!ap) 240 261 return false; 262 + 263 + switch (l3num) { 264 + case NFPROTO_IPV4: 265 + tuple->src.u3.ip = ap[0]; 266 + tuple->dst.u3.ip = ap[1]; 267 + break; 268 + case NFPROTO_IPV6: 269 + memcpy(tuple->src.u3.ip6, ap, sizeof(tuple->src.u3.ip6)); 270 + memcpy(tuple->dst.u3.ip6, ap + 4, sizeof(tuple->dst.u3.ip6)); 271 + break; 272 + } 241 273 242 274 tuple->dst.protonum = protonum; 243 275 tuple->dst.dir = IP_CT_DIR_ORIGINAL; 244 276 245 - return l4proto->pkt_to_tuple(skb, dataoff, net, tuple); 277 + if (unlikely(l4proto->pkt_to_tuple)) 278 + return l4proto->pkt_to_tuple(skb, dataoff, net, tuple); 279 + 280 + /* Actually only need first 4 bytes to get ports. */ 281 + inet_hdr = skb_header_pointer(skb, dataoff, sizeof(_inet_hdr), &_inet_hdr); 282 + if (!inet_hdr) 283 + return false; 284 + 285 + tuple->src.u.udp.port = inet_hdr->sport; 286 + tuple->dst.u.udp.port = inet_hdr->dport; 287 + return true; 246 288 } 247 - EXPORT_SYMBOL_GPL(nf_ct_get_tuple); 289 + 290 + static int ipv4_get_l4proto(const struct sk_buff *skb, unsigned int nhoff, 291 + u_int8_t *protonum) 292 + { 293 + int dataoff = -1; 294 + const struct iphdr *iph; 295 + struct iphdr _iph; 296 + 297 + iph = skb_header_pointer(skb, nhoff, sizeof(_iph), &_iph); 298 + if (!iph) 299 + return -1; 300 + 301 + /* Conntrack defragments packets, we might still see fragments 302 + * inside ICMP packets though. 303 + */ 304 + if (iph->frag_off & htons(IP_OFFSET)) 305 + return -1; 306 + 307 + dataoff = nhoff + (iph->ihl << 2); 308 + *protonum = iph->protocol; 309 + 310 + /* Check bogus IP headers */ 311 + if (dataoff > skb->len) { 312 + pr_debug("bogus IPv4 packet: nhoff %u, ihl %u, skblen %u\n", 313 + nhoff, iph->ihl << 2, skb->len); 314 + return -1; 315 + } 316 + return dataoff; 317 + } 318 + 319 + #if IS_ENABLED(CONFIG_IPV6) 320 + static int ipv6_get_l4proto(const struct sk_buff *skb, unsigned int nhoff, 321 + u8 *protonum) 322 + { 323 + int protoff = -1; 324 + unsigned int extoff = nhoff + sizeof(struct ipv6hdr); 325 + __be16 frag_off; 326 + u8 nexthdr; 327 + 328 + if (skb_copy_bits(skb, nhoff + offsetof(struct ipv6hdr, nexthdr), 329 + &nexthdr, sizeof(nexthdr)) != 0) { 330 + pr_debug("can't get nexthdr\n"); 331 + return -1; 332 + } 333 + protoff = ipv6_skip_exthdr(skb, extoff, &nexthdr, &frag_off); 334 + /* 335 + * (protoff == skb->len) means the packet has not data, just 336 + * IPv6 and possibly extensions headers, but it is tracked anyway 337 + */ 338 + if (protoff < 0 || (frag_off & htons(~0x7)) != 0) { 339 + pr_debug("can't find proto in pkt\n"); 340 + return -1; 341 + } 342 + 343 + *protonum = nexthdr; 344 + return protoff; 345 + } 346 + #endif 347 + 348 + static int get_l4proto(const struct sk_buff *skb, 349 + unsigned int nhoff, u8 pf, u8 *l4num) 350 + { 351 + switch (pf) { 352 + case NFPROTO_IPV4: 353 + return ipv4_get_l4proto(skb, nhoff, l4num); 354 + #if IS_ENABLED(CONFIG_IPV6) 355 + case NFPROTO_IPV6: 356 + return ipv6_get_l4proto(skb, nhoff, l4num); 357 + #endif 358 + default: 359 + *l4num = 0; 360 + break; 361 + } 362 + return -1; 363 + } 248 364 249 365 bool nf_ct_get_tuplepr(const struct sk_buff *skb, unsigned int nhoff, 250 366 u_int16_t l3num, 251 367 struct net *net, struct nf_conntrack_tuple *tuple) 252 368 { 253 - const struct nf_conntrack_l3proto *l3proto; 254 369 const struct nf_conntrack_l4proto *l4proto; 255 - unsigned int protoff; 256 - u_int8_t protonum; 370 + u8 protonum; 371 + int protoff; 257 372 int ret; 258 373 259 374 rcu_read_lock(); 260 375 261 - l3proto = __nf_ct_l3proto_find(l3num); 262 - ret = l3proto->get_l4proto(skb, nhoff, &protoff, &protonum); 263 - if (ret != NF_ACCEPT) { 376 + protoff = get_l4proto(skb, nhoff, l3num, &protonum); 377 + if (protoff <= 0) { 264 378 rcu_read_unlock(); 265 379 return false; 266 380 } ··· 382 268 l4proto = __nf_ct_l4proto_find(l3num, protonum); 383 269 384 270 ret = nf_ct_get_tuple(skb, nhoff, protoff, l3num, protonum, net, tuple, 385 - l3proto, l4proto); 271 + l4proto); 386 272 387 273 rcu_read_unlock(); 388 274 return ret; ··· 392 278 bool 393 279 nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse, 394 280 const struct nf_conntrack_tuple *orig, 395 - const struct nf_conntrack_l3proto *l3proto, 396 281 const struct nf_conntrack_l4proto *l4proto) 397 282 { 398 283 memset(inverse, 0, sizeof(*inverse)); 399 284 400 285 inverse->src.l3num = orig->src.l3num; 401 - if (l3proto->invert_tuple(inverse, orig) == 0) 402 - return false; 286 + 287 + switch (orig->src.l3num) { 288 + case NFPROTO_IPV4: 289 + inverse->src.u3.ip = orig->dst.u3.ip; 290 + inverse->dst.u3.ip = orig->src.u3.ip; 291 + break; 292 + case NFPROTO_IPV6: 293 + inverse->src.u3.in6 = orig->dst.u3.in6; 294 + inverse->dst.u3.in6 = orig->src.u3.in6; 295 + break; 296 + default: 297 + break; 298 + } 403 299 404 300 inverse->dst.dir = !orig->dst.dir; 405 301 406 302 inverse->dst.protonum = orig->dst.protonum; 407 - return l4proto->invert_tuple(inverse, orig); 303 + 304 + if (unlikely(l4proto->invert_tuple)) 305 + return l4proto->invert_tuple(inverse, orig); 306 + 307 + inverse->src.u.all = orig->dst.u.all; 308 + inverse->dst.u.all = orig->src.u.all; 309 + return true; 408 310 } 409 311 EXPORT_SYMBOL_GPL(nf_ct_invert_tuple); 410 312 ··· 632 502 net_eq(net, nf_ct_net(ct)); 633 503 } 634 504 505 + static inline bool 506 + nf_ct_match(const struct nf_conn *ct1, const struct nf_conn *ct2) 507 + { 508 + return nf_ct_tuple_equal(&ct1->tuplehash[IP_CT_DIR_ORIGINAL].tuple, 509 + &ct2->tuplehash[IP_CT_DIR_ORIGINAL].tuple) && 510 + nf_ct_tuple_equal(&ct1->tuplehash[IP_CT_DIR_REPLY].tuple, 511 + &ct2->tuplehash[IP_CT_DIR_REPLY].tuple) && 512 + nf_ct_zone_equal(ct1, nf_ct_zone(ct2), IP_CT_DIR_ORIGINAL) && 513 + nf_ct_zone_equal(ct1, nf_ct_zone(ct2), IP_CT_DIR_REPLY) && 514 + net_eq(nf_ct_net(ct1), nf_ct_net(ct2)); 515 + } 516 + 635 517 /* caller must hold rcu readlock and none of the nf_conntrack_locks */ 636 518 static void nf_ct_gc_expired(struct nf_conn *ct) 637 519 { ··· 837 695 /* This is the conntrack entry already in hashes that won race. */ 838 696 struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); 839 697 const struct nf_conntrack_l4proto *l4proto; 698 + enum ip_conntrack_info oldinfo; 699 + struct nf_conn *loser_ct = nf_ct_get(skb, &oldinfo); 840 700 841 701 l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct)); 842 702 if (l4proto->allow_clash && 843 - ((ct->status & IPS_NAT_DONE_MASK) == 0) && 844 703 !nf_ct_is_dying(ct) && 845 704 atomic_inc_not_zero(&ct->ct_general.use)) { 846 - enum ip_conntrack_info oldinfo; 847 - struct nf_conn *loser_ct = nf_ct_get(skb, &oldinfo); 848 - 849 - nf_ct_acct_merge(ct, ctinfo, loser_ct); 850 - nf_conntrack_put(&loser_ct->ct_general); 851 - nf_ct_set(skb, ct, oldinfo); 852 - return NF_ACCEPT; 705 + if (((ct->status & IPS_NAT_DONE_MASK) == 0) || 706 + nf_ct_match(ct, loser_ct)) { 707 + nf_ct_acct_merge(ct, ctinfo, loser_ct); 708 + nf_conntrack_put(&loser_ct->ct_general); 709 + nf_ct_set(skb, ct, oldinfo); 710 + return NF_ACCEPT; 711 + } 712 + nf_ct_put(ct); 853 713 } 854 714 NF_CT_STAT_INC(net, drop); 855 715 return NF_DROP; ··· 1339 1195 static noinline struct nf_conntrack_tuple_hash * 1340 1196 init_conntrack(struct net *net, struct nf_conn *tmpl, 1341 1197 const struct nf_conntrack_tuple *tuple, 1342 - const struct nf_conntrack_l3proto *l3proto, 1343 1198 const struct nf_conntrack_l4proto *l4proto, 1344 1199 struct sk_buff *skb, 1345 1200 unsigned int dataoff, u32 hash) ··· 1351 1208 const struct nf_conntrack_zone *zone; 1352 1209 struct nf_conn_timeout *timeout_ext; 1353 1210 struct nf_conntrack_zone tmp; 1354 - unsigned int *timeouts; 1355 1211 1356 - if (!nf_ct_invert_tuple(&repl_tuple, tuple, l3proto, l4proto)) { 1212 + if (!nf_ct_invert_tuple(&repl_tuple, tuple, l4proto)) { 1357 1213 pr_debug("Can't invert tuple.\n"); 1358 1214 return NULL; 1359 1215 } ··· 1369 1227 } 1370 1228 1371 1229 timeout_ext = tmpl ? nf_ct_timeout_find(tmpl) : NULL; 1372 - if (timeout_ext) { 1373 - timeouts = nf_ct_timeout_data(timeout_ext); 1374 - if (unlikely(!timeouts)) 1375 - timeouts = l4proto->get_timeouts(net); 1376 - } else { 1377 - timeouts = l4proto->get_timeouts(net); 1378 - } 1379 1230 1380 - if (!l4proto->new(ct, skb, dataoff, timeouts)) { 1231 + if (!l4proto->new(ct, skb, dataoff)) { 1381 1232 nf_conntrack_free(ct); 1382 1233 pr_debug("can't track with proto module\n"); 1383 1234 return NULL; ··· 1401 1266 /* exp->master safe, refcnt bumped in nf_ct_find_expectation */ 1402 1267 ct->master = exp->master; 1403 1268 if (exp->helper) { 1404 - help = nf_ct_helper_ext_add(ct, exp->helper, 1405 - GFP_ATOMIC); 1269 + help = nf_ct_helper_ext_add(ct, GFP_ATOMIC); 1406 1270 if (help) 1407 1271 rcu_assign_pointer(help->helper, exp->helper); 1408 1272 } ··· 1441 1307 unsigned int dataoff, 1442 1308 u_int16_t l3num, 1443 1309 u_int8_t protonum, 1444 - const struct nf_conntrack_l3proto *l3proto, 1445 1310 const struct nf_conntrack_l4proto *l4proto) 1446 1311 { 1447 1312 const struct nf_conntrack_zone *zone; ··· 1452 1319 u32 hash; 1453 1320 1454 1321 if (!nf_ct_get_tuple(skb, skb_network_offset(skb), 1455 - dataoff, l3num, protonum, net, &tuple, l3proto, 1456 - l4proto)) { 1322 + dataoff, l3num, protonum, net, &tuple, l4proto)) { 1457 1323 pr_debug("Can't get tuple\n"); 1458 1324 return 0; 1459 1325 } ··· 1462 1330 hash = hash_conntrack_raw(&tuple, net); 1463 1331 h = __nf_conntrack_find_get(net, zone, &tuple, hash); 1464 1332 if (!h) { 1465 - h = init_conntrack(net, tmpl, &tuple, l3proto, l4proto, 1333 + h = init_conntrack(net, tmpl, &tuple, l4proto, 1466 1334 skb, dataoff, hash); 1467 1335 if (!h) 1468 1336 return 0; ··· 1495 1363 nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum, 1496 1364 struct sk_buff *skb) 1497 1365 { 1498 - const struct nf_conntrack_l3proto *l3proto; 1499 1366 const struct nf_conntrack_l4proto *l4proto; 1500 1367 struct nf_conn *ct, *tmpl; 1501 1368 enum ip_conntrack_info ctinfo; 1502 - unsigned int *timeouts; 1503 - unsigned int dataoff; 1504 1369 u_int8_t protonum; 1505 - int ret; 1370 + int dataoff, ret; 1506 1371 1507 1372 tmpl = nf_ct_get(skb, &ctinfo); 1508 1373 if (tmpl || ctinfo == IP_CT_UNTRACKED) { ··· 1513 1384 } 1514 1385 1515 1386 /* rcu_read_lock()ed by nf_hook_thresh */ 1516 - l3proto = __nf_ct_l3proto_find(pf); 1517 - ret = l3proto->get_l4proto(skb, skb_network_offset(skb), 1518 - &dataoff, &protonum); 1519 - if (ret <= 0) { 1387 + dataoff = get_l4proto(skb, skb_network_offset(skb), pf, &protonum); 1388 + if (dataoff <= 0) { 1520 1389 pr_debug("not prepared to track yet or error occurred\n"); 1521 1390 NF_CT_STAT_INC_ATOMIC(net, error); 1522 1391 NF_CT_STAT_INC_ATOMIC(net, invalid); 1523 - ret = -ret; 1392 + ret = NF_ACCEPT; 1524 1393 goto out; 1525 1394 } 1526 1395 ··· 1540 1413 goto out; 1541 1414 } 1542 1415 repeat: 1543 - ret = resolve_normal_ct(net, tmpl, skb, dataoff, pf, protonum, 1544 - l3proto, l4proto); 1416 + ret = resolve_normal_ct(net, tmpl, skb, dataoff, pf, protonum, l4proto); 1545 1417 if (ret < 0) { 1546 1418 /* Too stressed to deal. */ 1547 1419 NF_CT_STAT_INC_ATOMIC(net, drop); ··· 1556 1430 goto out; 1557 1431 } 1558 1432 1559 - /* Decide what timeout policy we want to apply to this flow. */ 1560 - timeouts = nf_ct_timeout_lookup(net, ct, l4proto); 1561 - 1562 - ret = l4proto->packet(ct, skb, dataoff, ctinfo, timeouts); 1433 + ret = l4proto->packet(ct, skb, dataoff, ctinfo); 1563 1434 if (ret <= 0) { 1564 1435 /* Invalid: inverse of the return code tells 1565 1436 * the netfilter core what to do */ ··· 1594 1471 1595 1472 rcu_read_lock(); 1596 1473 ret = nf_ct_invert_tuple(inverse, orig, 1597 - __nf_ct_l3proto_find(orig->src.l3num), 1598 1474 __nf_ct_l4proto_find(orig->src.l3num, 1599 1475 orig->dst.protonum)); 1600 1476 rcu_read_unlock(); ··· 1731 1609 1732 1610 static int nf_conntrack_update(struct net *net, struct sk_buff *skb) 1733 1611 { 1734 - const struct nf_conntrack_l3proto *l3proto; 1735 1612 const struct nf_conntrack_l4proto *l4proto; 1736 1613 struct nf_conntrack_tuple_hash *h; 1737 1614 struct nf_conntrack_tuple tuple; 1738 1615 enum ip_conntrack_info ctinfo; 1739 1616 struct nf_nat_hook *nat_hook; 1740 - unsigned int dataoff, status; 1617 + unsigned int status; 1741 1618 struct nf_conn *ct; 1619 + int dataoff; 1742 1620 u16 l3num; 1743 1621 u8 l4num; 1744 1622 ··· 1747 1625 return 0; 1748 1626 1749 1627 l3num = nf_ct_l3num(ct); 1750 - l3proto = nf_ct_l3proto_find_get(l3num); 1751 1628 1752 - if (l3proto->get_l4proto(skb, skb_network_offset(skb), &dataoff, 1753 - &l4num) <= 0) 1629 + dataoff = get_l4proto(skb, skb_network_offset(skb), l3num, &l4num); 1630 + if (dataoff <= 0) 1754 1631 return -1; 1755 1632 1756 1633 l4proto = nf_ct_l4proto_find_get(l3num, l4num); 1757 1634 1758 1635 if (!nf_ct_get_tuple(skb, skb_network_offset(skb), dataoff, l3num, 1759 - l4num, net, &tuple, l3proto, l4proto)) 1636 + l4num, net, &tuple, l4proto)) 1760 1637 return -1; 1761 1638 1762 1639 if (ct->status & IPS_SRC_NAT) { ··· 2209 2088 return nf_conntrack_hash_resize(hashsize); 2210 2089 } 2211 2090 EXPORT_SYMBOL_GPL(nf_conntrack_set_hashsize); 2212 - 2213 - module_param_call(hashsize, nf_conntrack_set_hashsize, param_get_uint, 2214 - &nf_conntrack_htable_size, 0600); 2215 2091 2216 2092 static __always_inline unsigned int total_extension_size(void) 2217 2093 {
-1
net/netfilter/nf_conntrack_expect.c
··· 610 610 expect->tuple.src.l3num, 611 611 expect->tuple.dst.protonum); 612 612 print_tuple(s, &expect->tuple, 613 - __nf_ct_l3proto_find(expect->tuple.src.l3num), 614 613 __nf_ct_l4proto_find(expect->tuple.src.l3num, 615 614 expect->tuple.dst.protonum)); 616 615
+2 -4
net/netfilter/nf_conntrack_helper.c
··· 24 24 #include <linux/rtnetlink.h> 25 25 26 26 #include <net/netfilter/nf_conntrack.h> 27 - #include <net/netfilter/nf_conntrack_l3proto.h> 28 27 #include <net/netfilter/nf_conntrack_l4proto.h> 29 28 #include <net/netfilter/nf_conntrack_helper.h> 30 29 #include <net/netfilter/nf_conntrack_core.h> ··· 192 193 EXPORT_SYMBOL_GPL(nf_conntrack_helper_put); 193 194 194 195 struct nf_conn_help * 195 - nf_ct_helper_ext_add(struct nf_conn *ct, 196 - struct nf_conntrack_helper *helper, gfp_t gfp) 196 + nf_ct_helper_ext_add(struct nf_conn *ct, gfp_t gfp) 197 197 { 198 198 struct nf_conn_help *help; 199 199 ··· 261 263 } 262 264 263 265 if (help == NULL) { 264 - help = nf_ct_helper_ext_add(ct, helper, flags); 266 + help = nf_ct_helper_ext_add(ct, flags); 265 267 if (help == NULL) 266 268 return -ENOMEM; 267 269 } else {
-66
net/netfilter/nf_conntrack_l3proto_generic.c
··· 1 - /* 2 - * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org> 3 - * 4 - * Based largely upon the original ip_conntrack code which 5 - * had the following copyright information: 6 - * 7 - * (C) 1999-2001 Paul `Rusty' Russell 8 - * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> 9 - * 10 - * This program is free software; you can redistribute it and/or modify 11 - * it under the terms of the GNU General Public License version 2 as 12 - * published by the Free Software Foundation. 13 - * 14 - * Author: 15 - * Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp> 16 - */ 17 - 18 - #include <linux/types.h> 19 - #include <linux/ip.h> 20 - #include <linux/netfilter.h> 21 - #include <linux/module.h> 22 - #include <linux/skbuff.h> 23 - #include <linux/icmp.h> 24 - #include <linux/sysctl.h> 25 - #include <net/ip.h> 26 - 27 - #include <linux/netfilter_ipv4.h> 28 - #include <net/netfilter/nf_conntrack.h> 29 - #include <net/netfilter/nf_conntrack_l4proto.h> 30 - #include <net/netfilter/nf_conntrack_l3proto.h> 31 - #include <net/netfilter/nf_conntrack_core.h> 32 - #include <net/netfilter/ipv4/nf_conntrack_ipv4.h> 33 - 34 - static bool generic_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff, 35 - struct nf_conntrack_tuple *tuple) 36 - { 37 - memset(&tuple->src.u3, 0, sizeof(tuple->src.u3)); 38 - memset(&tuple->dst.u3, 0, sizeof(tuple->dst.u3)); 39 - 40 - return true; 41 - } 42 - 43 - static bool generic_invert_tuple(struct nf_conntrack_tuple *tuple, 44 - const struct nf_conntrack_tuple *orig) 45 - { 46 - memset(&tuple->src.u3, 0, sizeof(tuple->src.u3)); 47 - memset(&tuple->dst.u3, 0, sizeof(tuple->dst.u3)); 48 - 49 - return true; 50 - } 51 - 52 - static int generic_get_l4proto(const struct sk_buff *skb, unsigned int nhoff, 53 - unsigned int *dataoff, u_int8_t *protonum) 54 - { 55 - /* Never track !!! */ 56 - return -NF_ACCEPT; 57 - } 58 - 59 - 60 - struct nf_conntrack_l3proto nf_conntrack_l3proto_generic __read_mostly = { 61 - .l3proto = PF_UNSPEC, 62 - .pkt_to_tuple = generic_pkt_to_tuple, 63 - .invert_tuple = generic_invert_tuple, 64 - .get_l4proto = generic_get_l4proto, 65 - }; 66 - EXPORT_SYMBOL_GPL(nf_conntrack_l3proto_generic);
+73 -25
net/netfilter/nf_conntrack_netlink.c
··· 38 38 #include <net/netfilter/nf_conntrack_expect.h> 39 39 #include <net/netfilter/nf_conntrack_helper.h> 40 40 #include <net/netfilter/nf_conntrack_seqadj.h> 41 - #include <net/netfilter/nf_conntrack_l3proto.h> 42 41 #include <net/netfilter/nf_conntrack_l4proto.h> 43 42 #include <net/netfilter/nf_conntrack_tuple.h> 44 43 #include <net/netfilter/nf_conntrack_acct.h> ··· 80 81 return -1; 81 82 } 82 83 84 + static int ipv4_tuple_to_nlattr(struct sk_buff *skb, 85 + const struct nf_conntrack_tuple *tuple) 86 + { 87 + if (nla_put_in_addr(skb, CTA_IP_V4_SRC, tuple->src.u3.ip) || 88 + nla_put_in_addr(skb, CTA_IP_V4_DST, tuple->dst.u3.ip)) 89 + return -EMSGSIZE; 90 + return 0; 91 + } 92 + 93 + static int ipv6_tuple_to_nlattr(struct sk_buff *skb, 94 + const struct nf_conntrack_tuple *tuple) 95 + { 96 + if (nla_put_in6_addr(skb, CTA_IP_V6_SRC, &tuple->src.u3.in6) || 97 + nla_put_in6_addr(skb, CTA_IP_V6_DST, &tuple->dst.u3.in6)) 98 + return -EMSGSIZE; 99 + return 0; 100 + } 101 + 83 102 static int ctnetlink_dump_tuples_ip(struct sk_buff *skb, 84 - const struct nf_conntrack_tuple *tuple, 85 - const struct nf_conntrack_l3proto *l3proto) 103 + const struct nf_conntrack_tuple *tuple) 86 104 { 87 105 int ret = 0; 88 106 struct nlattr *nest_parms; ··· 108 92 if (!nest_parms) 109 93 goto nla_put_failure; 110 94 111 - if (likely(l3proto->tuple_to_nlattr)) 112 - ret = l3proto->tuple_to_nlattr(skb, tuple); 95 + switch (tuple->src.l3num) { 96 + case NFPROTO_IPV4: 97 + ret = ipv4_tuple_to_nlattr(skb, tuple); 98 + break; 99 + case NFPROTO_IPV6: 100 + ret = ipv6_tuple_to_nlattr(skb, tuple); 101 + break; 102 + } 113 103 114 104 nla_nest_end(skb, nest_parms); 115 105 ··· 128 106 static int ctnetlink_dump_tuples(struct sk_buff *skb, 129 107 const struct nf_conntrack_tuple *tuple) 130 108 { 131 - const struct nf_conntrack_l3proto *l3proto; 132 109 const struct nf_conntrack_l4proto *l4proto; 133 110 int ret; 134 111 135 112 rcu_read_lock(); 136 - l3proto = __nf_ct_l3proto_find(tuple->src.l3num); 137 - ret = ctnetlink_dump_tuples_ip(skb, tuple, l3proto); 113 + ret = ctnetlink_dump_tuples_ip(skb, tuple); 138 114 139 115 if (ret >= 0) { 140 116 l4proto = __nf_ct_l4proto_find(tuple->src.l3num, ··· 576 556 return -1; 577 557 } 578 558 559 + static const struct nla_policy cta_ip_nla_policy[CTA_IP_MAX + 1] = { 560 + [CTA_IP_V4_SRC] = { .type = NLA_U32 }, 561 + [CTA_IP_V4_DST] = { .type = NLA_U32 }, 562 + [CTA_IP_V6_SRC] = { .len = sizeof(__be32) * 4 }, 563 + [CTA_IP_V6_DST] = { .len = sizeof(__be32) * 4 }, 564 + }; 565 + 579 566 #if defined(CONFIG_NETFILTER_NETLINK_GLUE_CT) || defined(CONFIG_NF_CONNTRACK_EVENTS) 580 567 static size_t ctnetlink_proto_size(const struct nf_conn *ct) 581 568 { 582 - const struct nf_conntrack_l3proto *l3proto; 583 569 const struct nf_conntrack_l4proto *l4proto; 584 570 size_t len, len4 = 0; 585 571 586 - l3proto = __nf_ct_l3proto_find(nf_ct_l3num(ct)); 587 - len = l3proto->nla_size; 572 + len = nla_policy_len(cta_ip_nla_policy, CTA_IP_MAX + 1); 588 573 len *= 3u; /* ORIG, REPLY, MASTER */ 589 574 590 575 l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct)); ··· 961 936 return skb->len; 962 937 } 963 938 939 + static int ipv4_nlattr_to_tuple(struct nlattr *tb[], 940 + struct nf_conntrack_tuple *t) 941 + { 942 + if (!tb[CTA_IP_V4_SRC] || !tb[CTA_IP_V4_DST]) 943 + return -EINVAL; 944 + 945 + t->src.u3.ip = nla_get_in_addr(tb[CTA_IP_V4_SRC]); 946 + t->dst.u3.ip = nla_get_in_addr(tb[CTA_IP_V4_DST]); 947 + 948 + return 0; 949 + } 950 + 951 + static int ipv6_nlattr_to_tuple(struct nlattr *tb[], 952 + struct nf_conntrack_tuple *t) 953 + { 954 + if (!tb[CTA_IP_V6_SRC] || !tb[CTA_IP_V6_DST]) 955 + return -EINVAL; 956 + 957 + t->src.u3.in6 = nla_get_in6_addr(tb[CTA_IP_V6_SRC]); 958 + t->dst.u3.in6 = nla_get_in6_addr(tb[CTA_IP_V6_DST]); 959 + 960 + return 0; 961 + } 962 + 964 963 static int ctnetlink_parse_tuple_ip(struct nlattr *attr, 965 964 struct nf_conntrack_tuple *tuple) 966 965 { 967 966 struct nlattr *tb[CTA_IP_MAX+1]; 968 - struct nf_conntrack_l3proto *l3proto; 969 967 int ret = 0; 970 968 971 969 ret = nla_parse_nested(tb, CTA_IP_MAX, attr, NULL, NULL); 972 970 if (ret < 0) 973 971 return ret; 974 972 975 - rcu_read_lock(); 976 - l3proto = __nf_ct_l3proto_find(tuple->src.l3num); 973 + ret = nla_validate_nested(attr, CTA_IP_MAX, 974 + cta_ip_nla_policy, NULL); 975 + if (ret) 976 + return ret; 977 977 978 - if (likely(l3proto->nlattr_to_tuple)) { 979 - ret = nla_validate_nested(attr, CTA_IP_MAX, 980 - l3proto->nla_policy, NULL); 981 - if (ret == 0) 982 - ret = l3proto->nlattr_to_tuple(tb, tuple); 978 + switch (tuple->src.l3num) { 979 + case NFPROTO_IPV4: 980 + ret = ipv4_nlattr_to_tuple(tb, tuple); 981 + break; 982 + case NFPROTO_IPV6: 983 + ret = ipv6_nlattr_to_tuple(tb, tuple); 984 + break; 983 985 } 984 - 985 - rcu_read_unlock(); 986 986 987 987 return ret; 988 988 } ··· 1947 1897 } else { 1948 1898 struct nf_conn_help *help; 1949 1899 1950 - help = nf_ct_helper_ext_add(ct, helper, GFP_ATOMIC); 1900 + help = nf_ct_helper_ext_add(ct, GFP_ATOMIC); 1951 1901 if (help == NULL) { 1952 1902 err = -ENOMEM; 1953 1903 goto err2; ··· 2631 2581 const struct nf_conntrack_tuple *tuple, 2632 2582 const struct nf_conntrack_tuple_mask *mask) 2633 2583 { 2634 - const struct nf_conntrack_l3proto *l3proto; 2635 2584 const struct nf_conntrack_l4proto *l4proto; 2636 2585 struct nf_conntrack_tuple m; 2637 2586 struct nlattr *nest_parms; ··· 2646 2597 goto nla_put_failure; 2647 2598 2648 2599 rcu_read_lock(); 2649 - l3proto = __nf_ct_l3proto_find(tuple->src.l3num); 2650 - ret = ctnetlink_dump_tuples_ip(skb, &m, l3proto); 2600 + ret = ctnetlink_dump_tuples_ip(skb, &m); 2651 2601 if (ret >= 0) { 2652 2602 l4proto = __nf_ct_l4proto_find(tuple->src.l3num, 2653 2603 tuple->dst.protonum);
+617 -228
net/netfilter/nf_conntrack_proto.c
··· 1 - /* L3/L4 protocol support for nf_conntrack. */ 2 - 3 - /* (C) 1999-2001 Paul `Rusty' Russell 4 - * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org> 5 - * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org> 6 - * (C) 2006-2012 Patrick McHardy <kaber@trash.net> 7 - * 8 - * This program is free software; you can redistribute it and/or modify 9 - * it under the terms of the GNU General Public License version 2 as 10 - * published by the Free Software Foundation. 11 - */ 1 + // SPDX-License-Identifier: GPL-2.0 12 2 13 3 #include <linux/types.h> 14 4 #include <linux/netfilter.h> ··· 14 24 #include <linux/netdevice.h> 15 25 16 26 #include <net/netfilter/nf_conntrack.h> 17 - #include <net/netfilter/nf_conntrack_l3proto.h> 18 27 #include <net/netfilter/nf_conntrack_l4proto.h> 19 28 #include <net/netfilter/nf_conntrack_core.h> 20 29 #include <net/netfilter/nf_log.h> 21 30 31 + #include <linux/ip.h> 32 + #include <linux/icmp.h> 33 + #include <linux/sysctl.h> 34 + #include <net/route.h> 35 + #include <net/ip.h> 36 + 37 + #include <linux/netfilter_ipv4.h> 38 + #include <linux/netfilter_ipv6.h> 39 + #include <linux/netfilter_ipv6/ip6_tables.h> 40 + #include <net/netfilter/nf_conntrack_helper.h> 41 + #include <net/netfilter/nf_conntrack_zones.h> 42 + #include <net/netfilter/nf_conntrack_seqadj.h> 43 + #include <net/netfilter/ipv4/nf_conntrack_ipv4.h> 44 + #include <net/netfilter/ipv6/nf_conntrack_ipv6.h> 45 + #include <net/netfilter/nf_nat_helper.h> 46 + #include <net/netfilter/ipv4/nf_defrag_ipv4.h> 47 + #include <net/netfilter/ipv6/nf_defrag_ipv6.h> 48 + 49 + #include <linux/ipv6.h> 50 + #include <linux/in6.h> 51 + #include <net/ipv6.h> 52 + #include <net/inet_frag.h> 53 + 54 + extern unsigned int nf_conntrack_net_id; 55 + 22 56 static struct nf_conntrack_l4proto __rcu **nf_ct_protos[NFPROTO_NUMPROTO] __read_mostly; 23 - struct nf_conntrack_l3proto __rcu *nf_ct_l3protos[NFPROTO_NUMPROTO] __read_mostly; 24 - EXPORT_SYMBOL_GPL(nf_ct_l3protos); 25 57 26 58 static DEFINE_MUTEX(nf_ct_proto_mutex); 27 59 ··· 134 122 } 135 123 EXPORT_SYMBOL_GPL(__nf_ct_l4proto_find); 136 124 137 - /* this is guaranteed to always return a valid protocol helper, since 138 - * it falls back to generic_protocol */ 139 - const struct nf_conntrack_l3proto * 140 - nf_ct_l3proto_find_get(u_int16_t l3proto) 141 - { 142 - struct nf_conntrack_l3proto *p; 143 - 144 - rcu_read_lock(); 145 - p = __nf_ct_l3proto_find(l3proto); 146 - if (!try_module_get(p->me)) 147 - p = &nf_conntrack_l3proto_generic; 148 - rcu_read_unlock(); 149 - 150 - return p; 151 - } 152 - EXPORT_SYMBOL_GPL(nf_ct_l3proto_find_get); 153 - 154 - int 155 - nf_ct_l3proto_try_module_get(unsigned short l3proto) 156 - { 157 - const struct nf_conntrack_l3proto *p; 158 - int ret; 159 - 160 - retry: p = nf_ct_l3proto_find_get(l3proto); 161 - if (p == &nf_conntrack_l3proto_generic) { 162 - ret = request_module("nf_conntrack-%d", l3proto); 163 - if (!ret) 164 - goto retry; 165 - 166 - return -EPROTOTYPE; 167 - } 168 - 169 - return 0; 170 - } 171 - EXPORT_SYMBOL_GPL(nf_ct_l3proto_try_module_get); 172 - 173 - void nf_ct_l3proto_module_put(unsigned short l3proto) 174 - { 175 - struct nf_conntrack_l3proto *p; 176 - 177 - /* rcu_read_lock not necessary since the caller holds a reference, but 178 - * taken anyways to avoid lockdep warnings in __nf_ct_l3proto_find() 179 - */ 180 - rcu_read_lock(); 181 - p = __nf_ct_l3proto_find(l3proto); 182 - module_put(p->me); 183 - rcu_read_unlock(); 184 - } 185 - EXPORT_SYMBOL_GPL(nf_ct_l3proto_module_put); 186 - 187 - static int nf_ct_netns_do_get(struct net *net, u8 nfproto) 188 - { 189 - const struct nf_conntrack_l3proto *l3proto; 190 - int ret; 191 - 192 - might_sleep(); 193 - 194 - ret = nf_ct_l3proto_try_module_get(nfproto); 195 - if (ret < 0) 196 - return ret; 197 - 198 - /* we already have a reference, can't fail */ 199 - rcu_read_lock(); 200 - l3proto = __nf_ct_l3proto_find(nfproto); 201 - rcu_read_unlock(); 202 - 203 - if (!l3proto->net_ns_get) 204 - return 0; 205 - 206 - ret = l3proto->net_ns_get(net); 207 - if (ret < 0) 208 - nf_ct_l3proto_module_put(nfproto); 209 - 210 - return ret; 211 - } 212 - 213 - int nf_ct_netns_get(struct net *net, u8 nfproto) 214 - { 215 - int err; 216 - 217 - if (nfproto == NFPROTO_INET) { 218 - err = nf_ct_netns_do_get(net, NFPROTO_IPV4); 219 - if (err < 0) 220 - goto err1; 221 - err = nf_ct_netns_do_get(net, NFPROTO_IPV6); 222 - if (err < 0) 223 - goto err2; 224 - } else { 225 - err = nf_ct_netns_do_get(net, nfproto); 226 - if (err < 0) 227 - goto err1; 228 - } 229 - return 0; 230 - 231 - err2: 232 - nf_ct_netns_put(net, NFPROTO_IPV4); 233 - err1: 234 - return err; 235 - } 236 - EXPORT_SYMBOL_GPL(nf_ct_netns_get); 237 - 238 - static void nf_ct_netns_do_put(struct net *net, u8 nfproto) 239 - { 240 - const struct nf_conntrack_l3proto *l3proto; 241 - 242 - might_sleep(); 243 - 244 - /* same as nf_conntrack_netns_get(), reference assumed */ 245 - rcu_read_lock(); 246 - l3proto = __nf_ct_l3proto_find(nfproto); 247 - rcu_read_unlock(); 248 - 249 - if (WARN_ON(!l3proto)) 250 - return; 251 - 252 - if (l3proto->net_ns_put) 253 - l3proto->net_ns_put(net); 254 - 255 - nf_ct_l3proto_module_put(nfproto); 256 - } 257 - 258 - void nf_ct_netns_put(struct net *net, uint8_t nfproto) 259 - { 260 - if (nfproto == NFPROTO_INET) { 261 - nf_ct_netns_do_put(net, NFPROTO_IPV4); 262 - nf_ct_netns_do_put(net, NFPROTO_IPV6); 263 - } else 264 - nf_ct_netns_do_put(net, nfproto); 265 - } 266 - EXPORT_SYMBOL_GPL(nf_ct_netns_put); 267 - 268 125 const struct nf_conntrack_l4proto * 269 126 nf_ct_l4proto_find_get(u_int16_t l3num, u_int8_t l4num) 270 127 { ··· 155 274 } 156 275 EXPORT_SYMBOL_GPL(nf_ct_l4proto_put); 157 276 158 - static int kill_l3proto(struct nf_conn *i, void *data) 159 - { 160 - return nf_ct_l3num(i) == ((const struct nf_conntrack_l3proto *)data)->l3proto; 161 - } 162 - 163 277 static int kill_l4proto(struct nf_conn *i, void *data) 164 278 { 165 279 const struct nf_conntrack_l4proto *l4proto; ··· 162 286 return nf_ct_protonum(i) == l4proto->l4proto && 163 287 nf_ct_l3num(i) == l4proto->l3proto; 164 288 } 165 - 166 - int nf_ct_l3proto_register(const struct nf_conntrack_l3proto *proto) 167 - { 168 - int ret = 0; 169 - struct nf_conntrack_l3proto *old; 170 - 171 - if (proto->l3proto >= NFPROTO_NUMPROTO) 172 - return -EBUSY; 173 - #if IS_ENABLED(CONFIG_NF_CT_NETLINK) 174 - if (proto->tuple_to_nlattr && proto->nla_size == 0) 175 - return -EINVAL; 176 - #endif 177 - mutex_lock(&nf_ct_proto_mutex); 178 - old = rcu_dereference_protected(nf_ct_l3protos[proto->l3proto], 179 - lockdep_is_held(&nf_ct_proto_mutex)); 180 - if (old != &nf_conntrack_l3proto_generic) { 181 - ret = -EBUSY; 182 - goto out_unlock; 183 - } 184 - 185 - rcu_assign_pointer(nf_ct_l3protos[proto->l3proto], proto); 186 - 187 - out_unlock: 188 - mutex_unlock(&nf_ct_proto_mutex); 189 - return ret; 190 - 191 - } 192 - EXPORT_SYMBOL_GPL(nf_ct_l3proto_register); 193 - 194 - void nf_ct_l3proto_unregister(const struct nf_conntrack_l3proto *proto) 195 - { 196 - BUG_ON(proto->l3proto >= NFPROTO_NUMPROTO); 197 - 198 - mutex_lock(&nf_ct_proto_mutex); 199 - BUG_ON(rcu_dereference_protected(nf_ct_l3protos[proto->l3proto], 200 - lockdep_is_held(&nf_ct_proto_mutex) 201 - ) != proto); 202 - rcu_assign_pointer(nf_ct_l3protos[proto->l3proto], 203 - &nf_conntrack_l3proto_generic); 204 - mutex_unlock(&nf_ct_proto_mutex); 205 - 206 - synchronize_rcu(); 207 - /* Remove all contrack entries for this protocol */ 208 - nf_ct_iterate_destroy(kill_l3proto, (void*)proto); 209 - } 210 - EXPORT_SYMBOL_GPL(nf_ct_l3proto_unregister); 211 289 212 290 static struct nf_proto_net *nf_ct_l4proto_net(struct net *net, 213 291 const struct nf_conntrack_l4proto *l4proto) ··· 329 499 } 330 500 EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_unregister_one); 331 501 332 - int nf_ct_l4proto_register(const struct nf_conntrack_l4proto * const l4proto[], 333 - unsigned int num_proto) 502 + static void 503 + nf_ct_l4proto_unregister(const struct nf_conntrack_l4proto * const l4proto[], 504 + unsigned int num_proto) 505 + { 506 + mutex_lock(&nf_ct_proto_mutex); 507 + while (num_proto-- != 0) 508 + __nf_ct_l4proto_unregister_one(l4proto[num_proto]); 509 + mutex_unlock(&nf_ct_proto_mutex); 510 + 511 + synchronize_net(); 512 + /* Remove all contrack entries for this protocol */ 513 + nf_ct_iterate_destroy(kill_l4proto, (void *)l4proto); 514 + } 515 + 516 + static int 517 + nf_ct_l4proto_register(const struct nf_conntrack_l4proto * const l4proto[], 518 + unsigned int num_proto) 334 519 { 335 520 int ret = -EINVAL, ver; 336 521 unsigned int i; ··· 363 518 } 364 519 return ret; 365 520 } 366 - EXPORT_SYMBOL_GPL(nf_ct_l4proto_register); 367 521 368 522 int nf_ct_l4proto_pernet_register(struct net *net, 369 523 const struct nf_conntrack_l4proto *const l4proto[], ··· 386 542 } 387 543 EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_register); 388 544 389 - void nf_ct_l4proto_unregister(const struct nf_conntrack_l4proto * const l4proto[], 390 - unsigned int num_proto) 391 - { 392 - mutex_lock(&nf_ct_proto_mutex); 393 - while (num_proto-- != 0) 394 - __nf_ct_l4proto_unregister_one(l4proto[num_proto]); 395 - mutex_unlock(&nf_ct_proto_mutex); 396 - 397 - synchronize_net(); 398 - /* Remove all contrack entries for this protocol */ 399 - nf_ct_iterate_destroy(kill_l4proto, (void *)l4proto); 400 - } 401 - EXPORT_SYMBOL_GPL(nf_ct_l4proto_unregister); 402 - 403 545 void nf_ct_l4proto_pernet_unregister(struct net *net, 404 546 const struct nf_conntrack_l4proto *const l4proto[], 405 547 unsigned int num_proto) ··· 394 564 nf_ct_l4proto_pernet_unregister_one(net, l4proto[num_proto]); 395 565 } 396 566 EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_unregister); 567 + 568 + static unsigned int ipv4_helper(void *priv, 569 + struct sk_buff *skb, 570 + const struct nf_hook_state *state) 571 + { 572 + struct nf_conn *ct; 573 + enum ip_conntrack_info ctinfo; 574 + const struct nf_conn_help *help; 575 + const struct nf_conntrack_helper *helper; 576 + 577 + /* This is where we call the helper: as the packet goes out. */ 578 + ct = nf_ct_get(skb, &ctinfo); 579 + if (!ct || ctinfo == IP_CT_RELATED_REPLY) 580 + return NF_ACCEPT; 581 + 582 + help = nfct_help(ct); 583 + if (!help) 584 + return NF_ACCEPT; 585 + 586 + /* rcu_read_lock()ed by nf_hook_thresh */ 587 + helper = rcu_dereference(help->helper); 588 + if (!helper) 589 + return NF_ACCEPT; 590 + 591 + return helper->help(skb, skb_network_offset(skb) + ip_hdrlen(skb), 592 + ct, ctinfo); 593 + } 594 + 595 + static unsigned int ipv4_confirm(void *priv, 596 + struct sk_buff *skb, 597 + const struct nf_hook_state *state) 598 + { 599 + struct nf_conn *ct; 600 + enum ip_conntrack_info ctinfo; 601 + 602 + ct = nf_ct_get(skb, &ctinfo); 603 + if (!ct || ctinfo == IP_CT_RELATED_REPLY) 604 + goto out; 605 + 606 + /* adjust seqs for loopback traffic only in outgoing direction */ 607 + if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) && 608 + !nf_is_loopback_packet(skb)) { 609 + if (!nf_ct_seq_adjust(skb, ct, ctinfo, ip_hdrlen(skb))) { 610 + NF_CT_STAT_INC_ATOMIC(nf_ct_net(ct), drop); 611 + return NF_DROP; 612 + } 613 + } 614 + out: 615 + /* We've seen it coming out the other side: confirm it */ 616 + return nf_conntrack_confirm(skb); 617 + } 618 + 619 + static unsigned int ipv4_conntrack_in(void *priv, 620 + struct sk_buff *skb, 621 + const struct nf_hook_state *state) 622 + { 623 + return nf_conntrack_in(state->net, PF_INET, state->hook, skb); 624 + } 625 + 626 + static unsigned int ipv4_conntrack_local(void *priv, 627 + struct sk_buff *skb, 628 + const struct nf_hook_state *state) 629 + { 630 + if (ip_is_fragment(ip_hdr(skb))) { /* IP_NODEFRAG setsockopt set */ 631 + enum ip_conntrack_info ctinfo; 632 + struct nf_conn *tmpl; 633 + 634 + tmpl = nf_ct_get(skb, &ctinfo); 635 + if (tmpl && nf_ct_is_template(tmpl)) { 636 + /* when skipping ct, clear templates to avoid fooling 637 + * later targets/matches 638 + */ 639 + skb->_nfct = 0; 640 + nf_ct_put(tmpl); 641 + } 642 + return NF_ACCEPT; 643 + } 644 + 645 + return nf_conntrack_in(state->net, PF_INET, state->hook, skb); 646 + } 647 + 648 + /* Connection tracking may drop packets, but never alters them, so 649 + * make it the first hook. 650 + */ 651 + static const struct nf_hook_ops ipv4_conntrack_ops[] = { 652 + { 653 + .hook = ipv4_conntrack_in, 654 + .pf = NFPROTO_IPV4, 655 + .hooknum = NF_INET_PRE_ROUTING, 656 + .priority = NF_IP_PRI_CONNTRACK, 657 + }, 658 + { 659 + .hook = ipv4_conntrack_local, 660 + .pf = NFPROTO_IPV4, 661 + .hooknum = NF_INET_LOCAL_OUT, 662 + .priority = NF_IP_PRI_CONNTRACK, 663 + }, 664 + { 665 + .hook = ipv4_helper, 666 + .pf = NFPROTO_IPV4, 667 + .hooknum = NF_INET_POST_ROUTING, 668 + .priority = NF_IP_PRI_CONNTRACK_HELPER, 669 + }, 670 + { 671 + .hook = ipv4_confirm, 672 + .pf = NFPROTO_IPV4, 673 + .hooknum = NF_INET_POST_ROUTING, 674 + .priority = NF_IP_PRI_CONNTRACK_CONFIRM, 675 + }, 676 + { 677 + .hook = ipv4_helper, 678 + .pf = NFPROTO_IPV4, 679 + .hooknum = NF_INET_LOCAL_IN, 680 + .priority = NF_IP_PRI_CONNTRACK_HELPER, 681 + }, 682 + { 683 + .hook = ipv4_confirm, 684 + .pf = NFPROTO_IPV4, 685 + .hooknum = NF_INET_LOCAL_IN, 686 + .priority = NF_IP_PRI_CONNTRACK_CONFIRM, 687 + }, 688 + }; 689 + 690 + /* Fast function for those who don't want to parse /proc (and I don't 691 + * blame them). 692 + * Reversing the socket's dst/src point of view gives us the reply 693 + * mapping. 694 + */ 695 + static int 696 + getorigdst(struct sock *sk, int optval, void __user *user, int *len) 697 + { 698 + const struct inet_sock *inet = inet_sk(sk); 699 + const struct nf_conntrack_tuple_hash *h; 700 + struct nf_conntrack_tuple tuple; 701 + 702 + memset(&tuple, 0, sizeof(tuple)); 703 + 704 + lock_sock(sk); 705 + tuple.src.u3.ip = inet->inet_rcv_saddr; 706 + tuple.src.u.tcp.port = inet->inet_sport; 707 + tuple.dst.u3.ip = inet->inet_daddr; 708 + tuple.dst.u.tcp.port = inet->inet_dport; 709 + tuple.src.l3num = PF_INET; 710 + tuple.dst.protonum = sk->sk_protocol; 711 + release_sock(sk); 712 + 713 + /* We only do TCP and SCTP at the moment: is there a better way? */ 714 + if (tuple.dst.protonum != IPPROTO_TCP && 715 + tuple.dst.protonum != IPPROTO_SCTP) { 716 + pr_debug("SO_ORIGINAL_DST: Not a TCP/SCTP socket\n"); 717 + return -ENOPROTOOPT; 718 + } 719 + 720 + if ((unsigned int)*len < sizeof(struct sockaddr_in)) { 721 + pr_debug("SO_ORIGINAL_DST: len %d not %zu\n", 722 + *len, sizeof(struct sockaddr_in)); 723 + return -EINVAL; 724 + } 725 + 726 + h = nf_conntrack_find_get(sock_net(sk), &nf_ct_zone_dflt, &tuple); 727 + if (h) { 728 + struct sockaddr_in sin; 729 + struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); 730 + 731 + sin.sin_family = AF_INET; 732 + sin.sin_port = ct->tuplehash[IP_CT_DIR_ORIGINAL] 733 + .tuple.dst.u.tcp.port; 734 + sin.sin_addr.s_addr = ct->tuplehash[IP_CT_DIR_ORIGINAL] 735 + .tuple.dst.u3.ip; 736 + memset(sin.sin_zero, 0, sizeof(sin.sin_zero)); 737 + 738 + pr_debug("SO_ORIGINAL_DST: %pI4 %u\n", 739 + &sin.sin_addr.s_addr, ntohs(sin.sin_port)); 740 + nf_ct_put(ct); 741 + if (copy_to_user(user, &sin, sizeof(sin)) != 0) 742 + return -EFAULT; 743 + else 744 + return 0; 745 + } 746 + pr_debug("SO_ORIGINAL_DST: Can't find %pI4/%u-%pI4/%u.\n", 747 + &tuple.src.u3.ip, ntohs(tuple.src.u.tcp.port), 748 + &tuple.dst.u3.ip, ntohs(tuple.dst.u.tcp.port)); 749 + return -ENOENT; 750 + } 751 + 752 + static struct nf_sockopt_ops so_getorigdst = { 753 + .pf = PF_INET, 754 + .get_optmin = SO_ORIGINAL_DST, 755 + .get_optmax = SO_ORIGINAL_DST + 1, 756 + .get = getorigdst, 757 + .owner = THIS_MODULE, 758 + }; 759 + 760 + #if IS_ENABLED(CONFIG_IPV6) 761 + static int 762 + ipv6_getorigdst(struct sock *sk, int optval, void __user *user, int *len) 763 + { 764 + struct nf_conntrack_tuple tuple = { .src.l3num = NFPROTO_IPV6 }; 765 + const struct ipv6_pinfo *inet6 = inet6_sk(sk); 766 + const struct inet_sock *inet = inet_sk(sk); 767 + const struct nf_conntrack_tuple_hash *h; 768 + struct sockaddr_in6 sin6; 769 + struct nf_conn *ct; 770 + __be32 flow_label; 771 + int bound_dev_if; 772 + 773 + lock_sock(sk); 774 + tuple.src.u3.in6 = sk->sk_v6_rcv_saddr; 775 + tuple.src.u.tcp.port = inet->inet_sport; 776 + tuple.dst.u3.in6 = sk->sk_v6_daddr; 777 + tuple.dst.u.tcp.port = inet->inet_dport; 778 + tuple.dst.protonum = sk->sk_protocol; 779 + bound_dev_if = sk->sk_bound_dev_if; 780 + flow_label = inet6->flow_label; 781 + release_sock(sk); 782 + 783 + if (tuple.dst.protonum != IPPROTO_TCP && 784 + tuple.dst.protonum != IPPROTO_SCTP) 785 + return -ENOPROTOOPT; 786 + 787 + if (*len < 0 || (unsigned int)*len < sizeof(sin6)) 788 + return -EINVAL; 789 + 790 + h = nf_conntrack_find_get(sock_net(sk), &nf_ct_zone_dflt, &tuple); 791 + if (!h) { 792 + pr_debug("IP6T_SO_ORIGINAL_DST: Can't find %pI6c/%u-%pI6c/%u.\n", 793 + &tuple.src.u3.ip6, ntohs(tuple.src.u.tcp.port), 794 + &tuple.dst.u3.ip6, ntohs(tuple.dst.u.tcp.port)); 795 + return -ENOENT; 796 + } 797 + 798 + ct = nf_ct_tuplehash_to_ctrack(h); 799 + 800 + sin6.sin6_family = AF_INET6; 801 + sin6.sin6_port = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u.tcp.port; 802 + sin6.sin6_flowinfo = flow_label & IPV6_FLOWINFO_MASK; 803 + memcpy(&sin6.sin6_addr, 804 + &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.in6, 805 + sizeof(sin6.sin6_addr)); 806 + 807 + nf_ct_put(ct); 808 + sin6.sin6_scope_id = ipv6_iface_scope_id(&sin6.sin6_addr, bound_dev_if); 809 + return copy_to_user(user, &sin6, sizeof(sin6)) ? -EFAULT : 0; 810 + } 811 + 812 + static struct nf_sockopt_ops so_getorigdst6 = { 813 + .pf = NFPROTO_IPV6, 814 + .get_optmin = IP6T_SO_ORIGINAL_DST, 815 + .get_optmax = IP6T_SO_ORIGINAL_DST + 1, 816 + .get = ipv6_getorigdst, 817 + .owner = THIS_MODULE, 818 + }; 819 + 820 + static unsigned int ipv6_confirm(void *priv, 821 + struct sk_buff *skb, 822 + const struct nf_hook_state *state) 823 + { 824 + struct nf_conn *ct; 825 + enum ip_conntrack_info ctinfo; 826 + unsigned char pnum = ipv6_hdr(skb)->nexthdr; 827 + int protoff; 828 + __be16 frag_off; 829 + 830 + ct = nf_ct_get(skb, &ctinfo); 831 + if (!ct || ctinfo == IP_CT_RELATED_REPLY) 832 + goto out; 833 + 834 + protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &pnum, 835 + &frag_off); 836 + if (protoff < 0 || (frag_off & htons(~0x7)) != 0) { 837 + pr_debug("proto header not found\n"); 838 + goto out; 839 + } 840 + 841 + /* adjust seqs for loopback traffic only in outgoing direction */ 842 + if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) && 843 + !nf_is_loopback_packet(skb)) { 844 + if (!nf_ct_seq_adjust(skb, ct, ctinfo, protoff)) { 845 + NF_CT_STAT_INC_ATOMIC(nf_ct_net(ct), drop); 846 + return NF_DROP; 847 + } 848 + } 849 + out: 850 + /* We've seen it coming out the other side: confirm it */ 851 + return nf_conntrack_confirm(skb); 852 + } 853 + 854 + static unsigned int ipv6_conntrack_in(void *priv, 855 + struct sk_buff *skb, 856 + const struct nf_hook_state *state) 857 + { 858 + return nf_conntrack_in(state->net, PF_INET6, state->hook, skb); 859 + } 860 + 861 + static unsigned int ipv6_conntrack_local(void *priv, 862 + struct sk_buff *skb, 863 + const struct nf_hook_state *state) 864 + { 865 + return nf_conntrack_in(state->net, PF_INET6, state->hook, skb); 866 + } 867 + 868 + static unsigned int ipv6_helper(void *priv, 869 + struct sk_buff *skb, 870 + const struct nf_hook_state *state) 871 + { 872 + struct nf_conn *ct; 873 + const struct nf_conn_help *help; 874 + const struct nf_conntrack_helper *helper; 875 + enum ip_conntrack_info ctinfo; 876 + __be16 frag_off; 877 + int protoff; 878 + u8 nexthdr; 879 + 880 + /* This is where we call the helper: as the packet goes out. */ 881 + ct = nf_ct_get(skb, &ctinfo); 882 + if (!ct || ctinfo == IP_CT_RELATED_REPLY) 883 + return NF_ACCEPT; 884 + 885 + help = nfct_help(ct); 886 + if (!help) 887 + return NF_ACCEPT; 888 + /* rcu_read_lock()ed by nf_hook_thresh */ 889 + helper = rcu_dereference(help->helper); 890 + if (!helper) 891 + return NF_ACCEPT; 892 + 893 + nexthdr = ipv6_hdr(skb)->nexthdr; 894 + protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr, 895 + &frag_off); 896 + if (protoff < 0 || (frag_off & htons(~0x7)) != 0) { 897 + pr_debug("proto header not found\n"); 898 + return NF_ACCEPT; 899 + } 900 + 901 + return helper->help(skb, protoff, ct, ctinfo); 902 + } 903 + 904 + static const struct nf_hook_ops ipv6_conntrack_ops[] = { 905 + { 906 + .hook = ipv6_conntrack_in, 907 + .pf = NFPROTO_IPV6, 908 + .hooknum = NF_INET_PRE_ROUTING, 909 + .priority = NF_IP6_PRI_CONNTRACK, 910 + }, 911 + { 912 + .hook = ipv6_conntrack_local, 913 + .pf = NFPROTO_IPV6, 914 + .hooknum = NF_INET_LOCAL_OUT, 915 + .priority = NF_IP6_PRI_CONNTRACK, 916 + }, 917 + { 918 + .hook = ipv6_helper, 919 + .pf = NFPROTO_IPV6, 920 + .hooknum = NF_INET_POST_ROUTING, 921 + .priority = NF_IP6_PRI_CONNTRACK_HELPER, 922 + }, 923 + { 924 + .hook = ipv6_confirm, 925 + .pf = NFPROTO_IPV6, 926 + .hooknum = NF_INET_POST_ROUTING, 927 + .priority = NF_IP6_PRI_LAST, 928 + }, 929 + { 930 + .hook = ipv6_helper, 931 + .pf = NFPROTO_IPV6, 932 + .hooknum = NF_INET_LOCAL_IN, 933 + .priority = NF_IP6_PRI_CONNTRACK_HELPER, 934 + }, 935 + { 936 + .hook = ipv6_confirm, 937 + .pf = NFPROTO_IPV6, 938 + .hooknum = NF_INET_LOCAL_IN, 939 + .priority = NF_IP6_PRI_LAST - 1, 940 + }, 941 + }; 942 + #endif 943 + 944 + static int nf_ct_netns_do_get(struct net *net, u8 nfproto) 945 + { 946 + struct nf_conntrack_net *cnet = net_generic(net, nf_conntrack_net_id); 947 + int err = 0; 948 + 949 + mutex_lock(&nf_ct_proto_mutex); 950 + 951 + switch (nfproto) { 952 + case NFPROTO_IPV4: 953 + cnet->users4++; 954 + if (cnet->users4 > 1) 955 + goto out_unlock; 956 + err = nf_defrag_ipv4_enable(net); 957 + if (err) { 958 + cnet->users4 = 0; 959 + goto out_unlock; 960 + } 961 + 962 + err = nf_register_net_hooks(net, ipv4_conntrack_ops, 963 + ARRAY_SIZE(ipv4_conntrack_ops)); 964 + if (err) 965 + cnet->users4 = 0; 966 + break; 967 + #if IS_ENABLED(CONFIG_IPV6) 968 + case NFPROTO_IPV6: 969 + cnet->users6++; 970 + if (cnet->users6 > 1) 971 + goto out_unlock; 972 + err = nf_defrag_ipv6_enable(net); 973 + if (err < 0) { 974 + cnet->users6 = 0; 975 + goto out_unlock; 976 + } 977 + 978 + err = nf_register_net_hooks(net, ipv6_conntrack_ops, 979 + ARRAY_SIZE(ipv6_conntrack_ops)); 980 + if (err) 981 + cnet->users6 = 0; 982 + break; 983 + #endif 984 + default: 985 + err = -EPROTO; 986 + break; 987 + } 988 + out_unlock: 989 + mutex_unlock(&nf_ct_proto_mutex); 990 + return err; 991 + } 992 + 993 + static void nf_ct_netns_do_put(struct net *net, u8 nfproto) 994 + { 995 + struct nf_conntrack_net *cnet = net_generic(net, nf_conntrack_net_id); 996 + 997 + mutex_lock(&nf_ct_proto_mutex); 998 + switch (nfproto) { 999 + case NFPROTO_IPV4: 1000 + if (cnet->users4 && (--cnet->users4 == 0)) 1001 + nf_unregister_net_hooks(net, ipv4_conntrack_ops, 1002 + ARRAY_SIZE(ipv4_conntrack_ops)); 1003 + break; 1004 + #if IS_ENABLED(CONFIG_IPV6) 1005 + case NFPROTO_IPV6: 1006 + if (cnet->users6 && (--cnet->users6 == 0)) 1007 + nf_unregister_net_hooks(net, ipv6_conntrack_ops, 1008 + ARRAY_SIZE(ipv6_conntrack_ops)); 1009 + break; 1010 + #endif 1011 + } 1012 + 1013 + mutex_unlock(&nf_ct_proto_mutex); 1014 + } 1015 + 1016 + int nf_ct_netns_get(struct net *net, u8 nfproto) 1017 + { 1018 + int err; 1019 + 1020 + if (nfproto == NFPROTO_INET) { 1021 + err = nf_ct_netns_do_get(net, NFPROTO_IPV4); 1022 + if (err < 0) 1023 + goto err1; 1024 + err = nf_ct_netns_do_get(net, NFPROTO_IPV6); 1025 + if (err < 0) 1026 + goto err2; 1027 + } else { 1028 + err = nf_ct_netns_do_get(net, nfproto); 1029 + if (err < 0) 1030 + goto err1; 1031 + } 1032 + return 0; 1033 + 1034 + err2: 1035 + nf_ct_netns_put(net, NFPROTO_IPV4); 1036 + err1: 1037 + return err; 1038 + } 1039 + EXPORT_SYMBOL_GPL(nf_ct_netns_get); 1040 + 1041 + void nf_ct_netns_put(struct net *net, uint8_t nfproto) 1042 + { 1043 + if (nfproto == NFPROTO_INET) { 1044 + nf_ct_netns_do_put(net, NFPROTO_IPV4); 1045 + nf_ct_netns_do_put(net, NFPROTO_IPV6); 1046 + } else { 1047 + nf_ct_netns_do_put(net, nfproto); 1048 + } 1049 + } 1050 + EXPORT_SYMBOL_GPL(nf_ct_netns_put); 1051 + 1052 + static const struct nf_conntrack_l4proto * const builtin_l4proto[] = { 1053 + &nf_conntrack_l4proto_tcp4, 1054 + &nf_conntrack_l4proto_udp4, 1055 + &nf_conntrack_l4proto_icmp, 1056 + #ifdef CONFIG_NF_CT_PROTO_DCCP 1057 + &nf_conntrack_l4proto_dccp4, 1058 + #endif 1059 + #ifdef CONFIG_NF_CT_PROTO_SCTP 1060 + &nf_conntrack_l4proto_sctp4, 1061 + #endif 1062 + #ifdef CONFIG_NF_CT_PROTO_UDPLITE 1063 + &nf_conntrack_l4proto_udplite4, 1064 + #endif 1065 + #if IS_ENABLED(CONFIG_IPV6) 1066 + &nf_conntrack_l4proto_tcp6, 1067 + &nf_conntrack_l4proto_udp6, 1068 + &nf_conntrack_l4proto_icmpv6, 1069 + #ifdef CONFIG_NF_CT_PROTO_DCCP 1070 + &nf_conntrack_l4proto_dccp6, 1071 + #endif 1072 + #ifdef CONFIG_NF_CT_PROTO_SCTP 1073 + &nf_conntrack_l4proto_sctp6, 1074 + #endif 1075 + #ifdef CONFIG_NF_CT_PROTO_UDPLITE 1076 + &nf_conntrack_l4proto_udplite6, 1077 + #endif 1078 + #endif /* CONFIG_IPV6 */ 1079 + }; 1080 + 1081 + int nf_conntrack_proto_init(void) 1082 + { 1083 + int ret = 0; 1084 + 1085 + ret = nf_register_sockopt(&so_getorigdst); 1086 + if (ret < 0) 1087 + return ret; 1088 + 1089 + #if IS_ENABLED(CONFIG_IPV6) 1090 + ret = nf_register_sockopt(&so_getorigdst6); 1091 + if (ret < 0) 1092 + goto cleanup_sockopt; 1093 + #endif 1094 + ret = nf_ct_l4proto_register(builtin_l4proto, 1095 + ARRAY_SIZE(builtin_l4proto)); 1096 + if (ret < 0) 1097 + goto cleanup_sockopt2; 1098 + 1099 + return ret; 1100 + cleanup_sockopt2: 1101 + nf_unregister_sockopt(&so_getorigdst); 1102 + #if IS_ENABLED(CONFIG_IPV6) 1103 + cleanup_sockopt: 1104 + nf_unregister_sockopt(&so_getorigdst6); 1105 + #endif 1106 + return ret; 1107 + } 1108 + 1109 + void nf_conntrack_proto_fini(void) 1110 + { 1111 + unsigned int i; 1112 + 1113 + nf_ct_l4proto_unregister(builtin_l4proto, 1114 + ARRAY_SIZE(builtin_l4proto)); 1115 + nf_unregister_sockopt(&so_getorigdst); 1116 + #if IS_ENABLED(CONFIG_IPV6) 1117 + nf_unregister_sockopt(&so_getorigdst6); 1118 + #endif 1119 + 1120 + /* free l3proto protocol tables */ 1121 + for (i = 0; i < ARRAY_SIZE(nf_ct_protos); i++) 1122 + kfree(nf_ct_protos[i]); 1123 + } 397 1124 398 1125 int nf_conntrack_proto_pernet_init(struct net *net) 399 1126 { ··· 968 581 if (err < 0) 969 582 return err; 970 583 584 + err = nf_ct_l4proto_pernet_register(net, builtin_l4proto, 585 + ARRAY_SIZE(builtin_l4proto)); 586 + if (err < 0) { 587 + nf_ct_l4proto_unregister_sysctl(net, pn, 588 + &nf_conntrack_l4proto_generic); 589 + return err; 590 + } 591 + 971 592 pn->users++; 972 593 return 0; 973 594 } ··· 985 590 struct nf_proto_net *pn = nf_ct_l4proto_net(net, 986 591 &nf_conntrack_l4proto_generic); 987 592 593 + nf_ct_l4proto_pernet_unregister(net, builtin_l4proto, 594 + ARRAY_SIZE(builtin_l4proto)); 988 595 pn->users--; 989 596 nf_ct_l4proto_unregister_sysctl(net, 990 597 pn, 991 598 &nf_conntrack_l4proto_generic); 992 599 } 993 600 994 - int nf_conntrack_proto_init(void) 995 - { 996 - unsigned int i; 997 - for (i = 0; i < NFPROTO_NUMPROTO; i++) 998 - rcu_assign_pointer(nf_ct_l3protos[i], 999 - &nf_conntrack_l3proto_generic); 1000 - return 0; 1001 - } 1002 601 1003 - void nf_conntrack_proto_fini(void) 1004 - { 1005 - unsigned int i; 1006 - /* free l3proto protocol tables */ 1007 - for (i = 0; i < ARRAY_SIZE(nf_ct_protos); i++) 1008 - kfree(nf_ct_protos[i]); 1009 - } 602 + module_param_call(hashsize, nf_conntrack_set_hashsize, param_get_uint, 603 + &nf_conntrack_htable_size, 0600); 604 + 605 + MODULE_ALIAS("ip_conntrack"); 606 + MODULE_ALIAS("nf_conntrack-" __stringify(AF_INET)); 607 + MODULE_ALIAS("nf_conntrack-" __stringify(AF_INET6)); 608 + MODULE_LICENSE("GPL");
+7 -37
net/netfilter/nf_conntrack_proto_dccp.c
··· 23 23 #include <net/netfilter/nf_conntrack.h> 24 24 #include <net/netfilter/nf_conntrack_l4proto.h> 25 25 #include <net/netfilter/nf_conntrack_ecache.h> 26 + #include <net/netfilter/nf_conntrack_timeout.h> 26 27 #include <net/netfilter/nf_log.h> 27 28 28 29 /* Timeouts are based on values from RFC4340: ··· 389 388 return &net->ct.nf_ct_proto.dccp; 390 389 } 391 390 392 - static bool dccp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, 393 - struct net *net, struct nf_conntrack_tuple *tuple) 394 - { 395 - struct dccp_hdr _hdr, *dh; 396 - 397 - /* Actually only need first 4 bytes to get ports. */ 398 - dh = skb_header_pointer(skb, dataoff, 4, &_hdr); 399 - if (dh == NULL) 400 - return false; 401 - 402 - tuple->src.u.dccp.port = dh->dccph_sport; 403 - tuple->dst.u.dccp.port = dh->dccph_dport; 404 - return true; 405 - } 406 - 407 - static bool dccp_invert_tuple(struct nf_conntrack_tuple *inv, 408 - const struct nf_conntrack_tuple *tuple) 409 - { 410 - inv->src.u.dccp.port = tuple->dst.u.dccp.port; 411 - inv->dst.u.dccp.port = tuple->src.u.dccp.port; 412 - return true; 413 - } 414 - 415 391 static bool dccp_new(struct nf_conn *ct, const struct sk_buff *skb, 416 - unsigned int dataoff, unsigned int *timeouts) 392 + unsigned int dataoff) 417 393 { 418 394 struct net *net = nf_ct_net(ct); 419 395 struct nf_dccp_net *dn; ··· 438 460 ntohl(dhack->dccph_ack_nr_low); 439 461 } 440 462 441 - static unsigned int *dccp_get_timeouts(struct net *net) 442 - { 443 - return dccp_pernet(net)->dccp_timeout; 444 - } 445 - 446 463 static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb, 447 - unsigned int dataoff, enum ip_conntrack_info ctinfo, 448 - unsigned int *timeouts) 464 + unsigned int dataoff, enum ip_conntrack_info ctinfo) 449 465 { 450 466 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); 451 467 struct dccp_hdr _dh, *dh; 452 468 u_int8_t type, old_state, new_state; 453 469 enum ct_dccp_roles role; 470 + unsigned int *timeouts; 454 471 455 472 dh = skb_header_pointer(skb, dataoff, sizeof(_dh), &_dh); 456 473 BUG_ON(dh == NULL); ··· 519 546 if (new_state != old_state) 520 547 nf_conntrack_event_cache(IPCT_PROTOINFO, ct); 521 548 549 + timeouts = nf_ct_timeout_lookup(ct); 550 + if (!timeouts) 551 + timeouts = dccp_pernet(nf_ct_net(ct))->dccp_timeout; 522 552 nf_ct_refresh_acct(ct, ctinfo, skb, timeouts[new_state]); 523 553 524 554 return NF_ACCEPT; ··· 840 864 const struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp4 = { 841 865 .l3proto = AF_INET, 842 866 .l4proto = IPPROTO_DCCP, 843 - .pkt_to_tuple = dccp_pkt_to_tuple, 844 - .invert_tuple = dccp_invert_tuple, 845 867 .new = dccp_new, 846 868 .packet = dccp_packet, 847 - .get_timeouts = dccp_get_timeouts, 848 869 .error = dccp_error, 849 870 .can_early_drop = dccp_can_early_drop, 850 871 #ifdef CONFIG_NF_CONNTRACK_PROCFS ··· 873 900 const struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp6 = { 874 901 .l3proto = AF_INET6, 875 902 .l4proto = IPPROTO_DCCP, 876 - .pkt_to_tuple = dccp_pkt_to_tuple, 877 - .invert_tuple = dccp_invert_tuple, 878 903 .new = dccp_new, 879 904 .packet = dccp_packet, 880 - .get_timeouts = dccp_get_timeouts, 881 905 .error = dccp_error, 882 906 .can_early_drop = dccp_can_early_drop, 883 907 #ifdef CONFIG_NF_CONNTRACK_PROCFS
+12 -20
net/netfilter/nf_conntrack_proto_generic.c
··· 11 11 #include <linux/timer.h> 12 12 #include <linux/netfilter.h> 13 13 #include <net/netfilter/nf_conntrack_l4proto.h> 14 + #include <net/netfilter/nf_conntrack_timeout.h> 14 15 15 16 static const unsigned int nf_ct_generic_timeout = 600*HZ; 16 17 ··· 42 41 return true; 43 42 } 44 43 45 - static bool generic_invert_tuple(struct nf_conntrack_tuple *tuple, 46 - const struct nf_conntrack_tuple *orig) 47 - { 48 - tuple->src.u.all = 0; 49 - tuple->dst.u.all = 0; 50 - 51 - return true; 52 - } 53 - 54 - static unsigned int *generic_get_timeouts(struct net *net) 55 - { 56 - return &(generic_pernet(net)->timeout); 57 - } 58 - 59 44 /* Returns verdict for packet, or -1 for invalid. */ 60 45 static int generic_packet(struct nf_conn *ct, 61 46 const struct sk_buff *skb, 62 47 unsigned int dataoff, 63 - enum ip_conntrack_info ctinfo, 64 - unsigned int *timeout) 48 + enum ip_conntrack_info ctinfo) 65 49 { 50 + const unsigned int *timeout = nf_ct_timeout_lookup(ct); 51 + 52 + if (!timeout) 53 + timeout = &generic_pernet(nf_ct_net(ct))->timeout; 54 + 66 55 nf_ct_refresh_acct(ct, ctinfo, skb, *timeout); 67 56 return NF_ACCEPT; 68 57 } 69 58 70 59 /* Called when a new connection for this protocol found. */ 71 60 static bool generic_new(struct nf_conn *ct, const struct sk_buff *skb, 72 - unsigned int dataoff, unsigned int *timeouts) 61 + unsigned int dataoff) 73 62 { 74 63 bool ret; 75 64 ··· 78 87 static int generic_timeout_nlattr_to_obj(struct nlattr *tb[], 79 88 struct net *net, void *data) 80 89 { 81 - unsigned int *timeout = data; 82 90 struct nf_generic_net *gn = generic_pernet(net); 91 + unsigned int *timeout = data; 92 + 93 + if (!timeout) 94 + timeout = &gn->timeout; 83 95 84 96 if (tb[CTA_TIMEOUT_GENERIC_TIMEOUT]) 85 97 *timeout = ··· 162 168 .l3proto = PF_UNSPEC, 163 169 .l4proto = 255, 164 170 .pkt_to_tuple = generic_pkt_to_tuple, 165 - .invert_tuple = generic_invert_tuple, 166 171 .packet = generic_packet, 167 - .get_timeouts = generic_get_timeouts, 168 172 .new = generic_new, 169 173 #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) 170 174 .ctnl_timeout = {
+10 -14
net/netfilter/nf_conntrack_proto_gre.c
··· 39 39 #include <net/netfilter/nf_conntrack_l4proto.h> 40 40 #include <net/netfilter/nf_conntrack_helper.h> 41 41 #include <net/netfilter/nf_conntrack_core.h> 42 + #include <net/netfilter/nf_conntrack_timeout.h> 42 43 #include <linux/netfilter/nf_conntrack_proto_gre.h> 43 44 #include <linux/netfilter/nf_conntrack_pptp.h> 44 45 ··· 180 179 181 180 /* PUBLIC CONNTRACK PROTO HELPER FUNCTIONS */ 182 181 183 - /* invert gre part of tuple */ 184 - static bool gre_invert_tuple(struct nf_conntrack_tuple *tuple, 185 - const struct nf_conntrack_tuple *orig) 186 - { 187 - tuple->dst.u.gre.key = orig->src.u.gre.key; 188 - tuple->src.u.gre.key = orig->dst.u.gre.key; 189 - return true; 190 - } 191 - 192 182 /* gre hdr info to tuple */ 193 183 static bool gre_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, 194 184 struct net *net, struct nf_conntrack_tuple *tuple) ··· 235 243 static int gre_packet(struct nf_conn *ct, 236 244 const struct sk_buff *skb, 237 245 unsigned int dataoff, 238 - enum ip_conntrack_info ctinfo, 239 - unsigned int *timeouts) 246 + enum ip_conntrack_info ctinfo) 240 247 { 241 248 /* If we've seen traffic both ways, this is a GRE connection. 242 249 * Extend timeout. */ ··· 254 263 255 264 /* Called when a new connection for this protocol found. */ 256 265 static bool gre_new(struct nf_conn *ct, const struct sk_buff *skb, 257 - unsigned int dataoff, unsigned int *timeouts) 266 + unsigned int dataoff) 258 267 { 268 + unsigned int *timeouts = nf_ct_timeout_lookup(ct); 269 + 270 + if (!timeouts) 271 + timeouts = gre_get_timeouts(nf_ct_net(ct)); 272 + 259 273 pr_debug(": "); 260 274 nf_ct_dump_tuple(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); 261 275 ··· 296 300 unsigned int *timeouts = data; 297 301 struct netns_proto_gre *net_gre = gre_pernet(net); 298 302 303 + if (!timeouts) 304 + timeouts = gre_get_timeouts(net); 299 305 /* set default timeouts for GRE. */ 300 306 timeouts[GRE_CT_UNREPLIED] = net_gre->gre_timeouts[GRE_CT_UNREPLIED]; 301 307 timeouts[GRE_CT_REPLIED] = net_gre->gre_timeouts[GRE_CT_REPLIED]; ··· 354 356 .l3proto = AF_INET, 355 357 .l4proto = IPPROTO_GRE, 356 358 .pkt_to_tuple = gre_pkt_to_tuple, 357 - .invert_tuple = gre_invert_tuple, 358 359 #ifdef CONFIG_NF_CONNTRACK_PROCFS 359 360 .print_conntrack = gre_print_conntrack, 360 361 #endif 361 - .get_timeouts = gre_get_timeouts, 362 362 .packet = gre_packet, 363 363 .new = gre_new, 364 364 .destroy = gre_destroy,
+8 -38
net/netfilter/nf_conntrack_proto_sctp.c
··· 28 28 #include <net/netfilter/nf_conntrack.h> 29 29 #include <net/netfilter/nf_conntrack_l4proto.h> 30 30 #include <net/netfilter/nf_conntrack_ecache.h> 31 + #include <net/netfilter/nf_conntrack_timeout.h> 31 32 32 33 /* FIXME: Examine ipfilter's timeouts and conntrack transitions more 33 34 closely. They're more complex. --RR ··· 149 148 static inline struct nf_sctp_net *sctp_pernet(struct net *net) 150 149 { 151 150 return &net->ct.nf_ct_proto.sctp; 152 - } 153 - 154 - static bool sctp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, 155 - struct net *net, struct nf_conntrack_tuple *tuple) 156 - { 157 - const struct sctphdr *hp; 158 - struct sctphdr _hdr; 159 - 160 - /* Actually only need first 4 bytes to get ports. */ 161 - hp = skb_header_pointer(skb, dataoff, 4, &_hdr); 162 - if (hp == NULL) 163 - return false; 164 - 165 - tuple->src.u.sctp.port = hp->source; 166 - tuple->dst.u.sctp.port = hp->dest; 167 - return true; 168 - } 169 - 170 - static bool sctp_invert_tuple(struct nf_conntrack_tuple *tuple, 171 - const struct nf_conntrack_tuple *orig) 172 - { 173 - tuple->src.u.sctp.port = orig->dst.u.sctp.port; 174 - tuple->dst.u.sctp.port = orig->src.u.sctp.port; 175 - return true; 176 151 } 177 152 178 153 #ifdef CONFIG_NF_CONNTRACK_PROCFS ··· 273 296 return sctp_conntracks[dir][i][cur_state]; 274 297 } 275 298 276 - static unsigned int *sctp_get_timeouts(struct net *net) 277 - { 278 - return sctp_pernet(net)->timeouts; 279 - } 280 - 281 299 /* Returns verdict for packet, or -NF_ACCEPT for invalid. */ 282 300 static int sctp_packet(struct nf_conn *ct, 283 301 const struct sk_buff *skb, 284 302 unsigned int dataoff, 285 - enum ip_conntrack_info ctinfo, 286 - unsigned int *timeouts) 303 + enum ip_conntrack_info ctinfo) 287 304 { 288 305 enum sctp_conntrack new_state, old_state; 289 306 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); ··· 286 315 const struct sctp_chunkhdr *sch; 287 316 struct sctp_chunkhdr _sch; 288 317 u_int32_t offset, count; 318 + unsigned int *timeouts; 289 319 unsigned long map[256 / sizeof(unsigned long)] = { 0 }; 290 320 291 321 sh = skb_header_pointer(skb, dataoff, sizeof(_sctph), &_sctph); ··· 375 403 } 376 404 spin_unlock_bh(&ct->lock); 377 405 406 + timeouts = nf_ct_timeout_lookup(ct); 407 + if (!timeouts) 408 + timeouts = sctp_pernet(nf_ct_net(ct))->timeouts; 409 + 378 410 nf_ct_refresh_acct(ct, ctinfo, skb, timeouts[new_state]); 379 411 380 412 if (old_state == SCTP_CONNTRACK_COOKIE_ECHOED && ··· 399 423 400 424 /* Called when a new connection for this protocol found. */ 401 425 static bool sctp_new(struct nf_conn *ct, const struct sk_buff *skb, 402 - unsigned int dataoff, unsigned int *timeouts) 426 + unsigned int dataoff) 403 427 { 404 428 enum sctp_conntrack new_state; 405 429 const struct sctphdr *sh; ··· 756 780 const struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4 = { 757 781 .l3proto = PF_INET, 758 782 .l4proto = IPPROTO_SCTP, 759 - .pkt_to_tuple = sctp_pkt_to_tuple, 760 - .invert_tuple = sctp_invert_tuple, 761 783 #ifdef CONFIG_NF_CONNTRACK_PROCFS 762 784 .print_conntrack = sctp_print_conntrack, 763 785 #endif 764 786 .packet = sctp_packet, 765 - .get_timeouts = sctp_get_timeouts, 766 787 .new = sctp_new, 767 788 .error = sctp_error, 768 789 .can_early_drop = sctp_can_early_drop, ··· 790 817 const struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6 = { 791 818 .l3proto = PF_INET6, 792 819 .l4proto = IPPROTO_SCTP, 793 - .pkt_to_tuple = sctp_pkt_to_tuple, 794 - .invert_tuple = sctp_invert_tuple, 795 820 #ifdef CONFIG_NF_CONNTRACK_PROCFS 796 821 .print_conntrack = sctp_print_conntrack, 797 822 #endif 798 823 .packet = sctp_packet, 799 - .get_timeouts = sctp_get_timeouts, 800 824 .new = sctp_new, 801 825 .error = sctp_error, 802 826 .can_early_drop = sctp_can_early_drop,
+11 -41
net/netfilter/nf_conntrack_proto_tcp.c
··· 29 29 #include <net/netfilter/nf_conntrack_ecache.h> 30 30 #include <net/netfilter/nf_conntrack_seqadj.h> 31 31 #include <net/netfilter/nf_conntrack_synproxy.h> 32 + #include <net/netfilter/nf_conntrack_timeout.h> 32 33 #include <net/netfilter/nf_log.h> 33 34 #include <net/netfilter/ipv4/nf_conntrack_ipv4.h> 34 35 #include <net/netfilter/ipv6/nf_conntrack_ipv6.h> ··· 275 274 static inline struct nf_tcp_net *tcp_pernet(struct net *net) 276 275 { 277 276 return &net->ct.nf_ct_proto.tcp; 278 - } 279 - 280 - static bool tcp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, 281 - struct net *net, struct nf_conntrack_tuple *tuple) 282 - { 283 - const struct tcphdr *hp; 284 - struct tcphdr _hdr; 285 - 286 - /* Actually only need first 4 bytes to get ports. */ 287 - hp = skb_header_pointer(skb, dataoff, 4, &_hdr); 288 - if (hp == NULL) 289 - return false; 290 - 291 - tuple->src.u.tcp.port = hp->source; 292 - tuple->dst.u.tcp.port = hp->dest; 293 - 294 - return true; 295 - } 296 - 297 - static bool tcp_invert_tuple(struct nf_conntrack_tuple *tuple, 298 - const struct nf_conntrack_tuple *orig) 299 - { 300 - tuple->src.u.tcp.port = orig->dst.u.tcp.port; 301 - tuple->dst.u.tcp.port = orig->src.u.tcp.port; 302 - return true; 303 277 } 304 278 305 279 #ifdef CONFIG_NF_CONNTRACK_PROCFS ··· 769 793 return NF_ACCEPT; 770 794 } 771 795 772 - static unsigned int *tcp_get_timeouts(struct net *net) 773 - { 774 - return tcp_pernet(net)->timeouts; 775 - } 776 - 777 796 /* Returns verdict for packet, or -1 for invalid. */ 778 797 static int tcp_packet(struct nf_conn *ct, 779 798 const struct sk_buff *skb, 780 799 unsigned int dataoff, 781 - enum ip_conntrack_info ctinfo, 782 - unsigned int *timeouts) 800 + enum ip_conntrack_info ctinfo) 783 801 { 784 802 struct net *net = nf_ct_net(ct); 785 803 struct nf_tcp_net *tn = tcp_pernet(net); 786 804 struct nf_conntrack_tuple *tuple; 787 805 enum tcp_conntrack new_state, old_state; 806 + unsigned int index, *timeouts; 788 807 enum ip_conntrack_dir dir; 789 808 const struct tcphdr *th; 790 809 struct tcphdr _tcph; 791 810 unsigned long timeout; 792 - unsigned int index; 793 811 794 812 th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph); 795 813 BUG_ON(th == NULL); ··· 1016 1046 && new_state == TCP_CONNTRACK_FIN_WAIT) 1017 1047 ct->proto.tcp.seen[dir].flags |= IP_CT_TCP_FLAG_CLOSE_INIT; 1018 1048 1049 + timeouts = nf_ct_timeout_lookup(ct); 1050 + if (!timeouts) 1051 + timeouts = tn->timeouts; 1052 + 1019 1053 if (ct->proto.tcp.retrans >= tn->tcp_max_retrans && 1020 1054 timeouts[new_state] > timeouts[TCP_CONNTRACK_RETRANS]) 1021 1055 timeout = timeouts[TCP_CONNTRACK_RETRANS]; ··· 1069 1095 1070 1096 /* Called when a new connection for this protocol found. */ 1071 1097 static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb, 1072 - unsigned int dataoff, unsigned int *timeouts) 1098 + unsigned int dataoff) 1073 1099 { 1074 1100 enum tcp_conntrack new_state; 1075 1101 const struct tcphdr *th; ··· 1287 1313 static int tcp_timeout_nlattr_to_obj(struct nlattr *tb[], 1288 1314 struct net *net, void *data) 1289 1315 { 1290 - unsigned int *timeouts = data; 1291 1316 struct nf_tcp_net *tn = tcp_pernet(net); 1317 + unsigned int *timeouts = data; 1292 1318 int i; 1293 1319 1320 + if (!timeouts) 1321 + timeouts = tn->timeouts; 1294 1322 /* set default TCP timeouts. */ 1295 1323 for (i=0; i<TCP_CONNTRACK_TIMEOUT_MAX; i++) 1296 1324 timeouts[i] = tn->timeouts[i]; ··· 1535 1559 { 1536 1560 .l3proto = PF_INET, 1537 1561 .l4proto = IPPROTO_TCP, 1538 - .pkt_to_tuple = tcp_pkt_to_tuple, 1539 - .invert_tuple = tcp_invert_tuple, 1540 1562 #ifdef CONFIG_NF_CONNTRACK_PROCFS 1541 1563 .print_conntrack = tcp_print_conntrack, 1542 1564 #endif 1543 1565 .packet = tcp_packet, 1544 - .get_timeouts = tcp_get_timeouts, 1545 1566 .new = tcp_new, 1546 1567 .error = tcp_error, 1547 1568 .can_early_drop = tcp_can_early_drop, ··· 1570 1597 { 1571 1598 .l3proto = PF_INET6, 1572 1599 .l4proto = IPPROTO_TCP, 1573 - .pkt_to_tuple = tcp_pkt_to_tuple, 1574 - .invert_tuple = tcp_invert_tuple, 1575 1600 #ifdef CONFIG_NF_CONNTRACK_PROCFS 1576 1601 .print_conntrack = tcp_print_conntrack, 1577 1602 #endif 1578 1603 .packet = tcp_packet, 1579 - .get_timeouts = tcp_get_timeouts, 1580 1604 .new = tcp_new, 1581 1605 .error = tcp_error, 1582 1606 .can_early_drop = tcp_can_early_drop,
+13 -42
net/netfilter/nf_conntrack_proto_udp.c
··· 22 22 #include <linux/netfilter_ipv6.h> 23 23 #include <net/netfilter/nf_conntrack_l4proto.h> 24 24 #include <net/netfilter/nf_conntrack_ecache.h> 25 + #include <net/netfilter/nf_conntrack_timeout.h> 25 26 #include <net/netfilter/nf_log.h> 26 27 #include <net/netfilter/ipv4/nf_conntrack_ipv4.h> 27 28 #include <net/netfilter/ipv6/nf_conntrack_ipv6.h> ··· 37 36 return &net->ct.nf_ct_proto.udp; 38 37 } 39 38 40 - static bool udp_pkt_to_tuple(const struct sk_buff *skb, 41 - unsigned int dataoff, 42 - struct net *net, 43 - struct nf_conntrack_tuple *tuple) 44 - { 45 - const struct udphdr *hp; 46 - struct udphdr _hdr; 47 - 48 - /* Actually only need first 4 bytes to get ports. */ 49 - hp = skb_header_pointer(skb, dataoff, 4, &_hdr); 50 - if (hp == NULL) 51 - return false; 52 - 53 - tuple->src.u.udp.port = hp->source; 54 - tuple->dst.u.udp.port = hp->dest; 55 - 56 - return true; 57 - } 58 - 59 - static bool udp_invert_tuple(struct nf_conntrack_tuple *tuple, 60 - const struct nf_conntrack_tuple *orig) 61 - { 62 - tuple->src.u.udp.port = orig->dst.u.udp.port; 63 - tuple->dst.u.udp.port = orig->src.u.udp.port; 64 - return true; 65 - } 66 - 67 39 static unsigned int *udp_get_timeouts(struct net *net) 68 40 { 69 41 return udp_pernet(net)->timeouts; ··· 46 72 static int udp_packet(struct nf_conn *ct, 47 73 const struct sk_buff *skb, 48 74 unsigned int dataoff, 49 - enum ip_conntrack_info ctinfo, 50 - unsigned int *timeouts) 75 + enum ip_conntrack_info ctinfo) 51 76 { 77 + unsigned int *timeouts; 78 + 79 + timeouts = nf_ct_timeout_lookup(ct); 80 + if (!timeouts) 81 + timeouts = udp_get_timeouts(nf_ct_net(ct)); 82 + 52 83 /* If we've seen traffic both ways, this is some kind of UDP 53 84 stream. Extend timeout. */ 54 85 if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) { ··· 71 92 72 93 /* Called when a new connection for this protocol found. */ 73 94 static bool udp_new(struct nf_conn *ct, const struct sk_buff *skb, 74 - unsigned int dataoff, unsigned int *timeouts) 95 + unsigned int dataoff) 75 96 { 76 97 return true; 77 98 } ··· 182 203 unsigned int *timeouts = data; 183 204 struct nf_udp_net *un = udp_pernet(net); 184 205 206 + if (!timeouts) 207 + timeouts = un->timeouts; 208 + 185 209 /* set default timeouts for UDP. */ 186 210 timeouts[UDP_CT_UNREPLIED] = un->timeouts[UDP_CT_UNREPLIED]; 187 211 timeouts[UDP_CT_REPLIED] = un->timeouts[UDP_CT_REPLIED]; ··· 283 301 .l3proto = PF_INET, 284 302 .l4proto = IPPROTO_UDP, 285 303 .allow_clash = true, 286 - .pkt_to_tuple = udp_pkt_to_tuple, 287 - .invert_tuple = udp_invert_tuple, 288 304 .packet = udp_packet, 289 - .get_timeouts = udp_get_timeouts, 290 305 .new = udp_new, 291 306 .error = udp_error, 292 307 #if IS_ENABLED(CONFIG_NF_CT_NETLINK) ··· 312 333 .l3proto = PF_INET, 313 334 .l4proto = IPPROTO_UDPLITE, 314 335 .allow_clash = true, 315 - .pkt_to_tuple = udp_pkt_to_tuple, 316 - .invert_tuple = udp_invert_tuple, 317 336 .packet = udp_packet, 318 - .get_timeouts = udp_get_timeouts, 319 337 .new = udp_new, 320 338 .error = udplite_error, 321 339 #if IS_ENABLED(CONFIG_NF_CT_NETLINK) ··· 341 365 .l3proto = PF_INET6, 342 366 .l4proto = IPPROTO_UDP, 343 367 .allow_clash = true, 344 - .pkt_to_tuple = udp_pkt_to_tuple, 345 - .invert_tuple = udp_invert_tuple, 346 368 .packet = udp_packet, 347 - .get_timeouts = udp_get_timeouts, 348 369 .new = udp_new, 349 370 .error = udp_error, 350 371 #if IS_ENABLED(CONFIG_NF_CT_NETLINK) ··· 370 397 .l3proto = PF_INET6, 371 398 .l4proto = IPPROTO_UDPLITE, 372 399 .allow_clash = true, 373 - .pkt_to_tuple = udp_pkt_to_tuple, 374 - .invert_tuple = udp_invert_tuple, 375 400 .packet = udp_packet, 376 - .get_timeouts = udp_get_timeouts, 377 401 .new = udp_new, 378 402 .error = udplite_error, 379 403 #if IS_ENABLED(CONFIG_NF_CT_NETLINK) ··· 393 423 }; 394 424 EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_udplite6); 395 425 #endif 426 + #include <net/netfilter/nf_conntrack_timeout.h>
+8 -20
net/netfilter/nf_conntrack_standalone.c
··· 1 - /* (C) 1999-2001 Paul `Rusty' Russell 2 - * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> 3 - * (C) 2005-2012 Patrick McHardy <kaber@trash.net> 4 - * 5 - * This program is free software; you can redistribute it and/or modify 6 - * it under the terms of the GNU General Public License version 2 as 7 - * published by the Free Software Foundation. 8 - */ 9 - 1 + // SPDX-License-Identifier: GPL-2.0 10 2 #include <linux/types.h> 11 3 #include <linux/netfilter.h> 12 4 #include <linux/slab.h> ··· 16 24 17 25 #include <net/netfilter/nf_conntrack.h> 18 26 #include <net/netfilter/nf_conntrack_core.h> 19 - #include <net/netfilter/nf_conntrack_l3proto.h> 20 27 #include <net/netfilter/nf_conntrack_l4proto.h> 21 28 #include <net/netfilter/nf_conntrack_expect.h> 22 29 #include <net/netfilter/nf_conntrack_helper.h> ··· 24 33 #include <net/netfilter/nf_conntrack_timestamp.h> 25 34 #include <linux/rculist_nulls.h> 26 35 27 - MODULE_LICENSE("GPL"); 36 + unsigned int nf_conntrack_net_id __read_mostly; 28 37 29 38 #ifdef CONFIG_NF_CONNTRACK_PROCFS 30 39 void 31 40 print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple, 32 - const struct nf_conntrack_l3proto *l3proto, 33 41 const struct nf_conntrack_l4proto *l4proto) 34 42 { 35 - switch (l3proto->l3proto) { 43 + switch (tuple->src.l3num) { 36 44 case NFPROTO_IPV4: 37 45 seq_printf(s, "src=%pI4 dst=%pI4 ", 38 46 &tuple->src.u3.ip, &tuple->dst.u3.ip); ··· 272 282 { 273 283 struct nf_conntrack_tuple_hash *hash = v; 274 284 struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(hash); 275 - const struct nf_conntrack_l3proto *l3proto; 276 285 const struct nf_conntrack_l4proto *l4proto; 277 286 struct net *net = seq_file_net(s); 278 287 int ret = 0; ··· 292 303 if (!net_eq(nf_ct_net(ct), net)) 293 304 goto release; 294 305 295 - l3proto = __nf_ct_l3proto_find(nf_ct_l3num(ct)); 296 - WARN_ON(!l3proto); 297 306 l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct)); 298 307 WARN_ON(!l4proto); 299 308 300 309 ret = -ENOSPC; 301 310 seq_printf(s, "%-8s %u %-8s %u ", 302 - l3proto_name(l3proto->l3proto), nf_ct_l3num(ct), 311 + l3proto_name(nf_ct_l3num(ct)), nf_ct_l3num(ct), 303 312 l4proto_name(l4proto->l4proto), nf_ct_protonum(ct)); 304 313 305 314 if (!test_bit(IPS_OFFLOAD_BIT, &ct->status)) ··· 307 320 l4proto->print_conntrack(s, ct); 308 321 309 322 print_tuple(s, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, 310 - l3proto, l4proto); 323 + l4proto); 311 324 312 325 ct_show_zone(s, ct, NF_CT_ZONE_DIR_ORIG); 313 326 ··· 320 333 if (!(test_bit(IPS_SEEN_REPLY_BIT, &ct->status))) 321 334 seq_puts(s, "[UNREPLIED] "); 322 335 323 - print_tuple(s, &ct->tuplehash[IP_CT_DIR_REPLY].tuple, 324 - l3proto, l4proto); 336 + print_tuple(s, &ct->tuplehash[IP_CT_DIR_REPLY].tuple, l4proto); 325 337 326 338 ct_show_zone(s, ct, NF_CT_ZONE_DIR_REPL); 327 339 ··· 666 680 static struct pernet_operations nf_conntrack_net_ops = { 667 681 .init = nf_conntrack_pernet_init, 668 682 .exit_batch = nf_conntrack_pernet_exit, 683 + .id = &nf_conntrack_net_id, 684 + .size = sizeof(struct nf_conntrack_net), 669 685 }; 670 686 671 687 static int __init nf_conntrack_standalone_init(void)
+5 -8
net/netfilter/nf_flow_table_core.c
··· 107 107 tcp->seen[1].td_maxwin = 0; 108 108 } 109 109 110 + #define NF_FLOWTABLE_TCP_PICKUP_TIMEOUT (120 * HZ) 111 + #define NF_FLOWTABLE_UDP_PICKUP_TIMEOUT (30 * HZ) 112 + 110 113 static void flow_offload_fixup_ct_state(struct nf_conn *ct) 111 114 { 112 115 const struct nf_conntrack_l4proto *l4proto; 113 - struct net *net = nf_ct_net(ct); 114 - unsigned int *timeouts; 115 116 unsigned int timeout; 116 117 int l4num; 117 118 ··· 124 123 if (!l4proto) 125 124 return; 126 125 127 - timeouts = l4proto->get_timeouts(net); 128 - if (!timeouts) 129 - return; 130 - 131 126 if (l4num == IPPROTO_TCP) 132 - timeout = timeouts[TCP_CONNTRACK_ESTABLISHED]; 127 + timeout = NF_FLOWTABLE_TCP_PICKUP_TIMEOUT; 133 128 else if (l4num == IPPROTO_UDP) 134 - timeout = timeouts[UDP_CT_REPLIED]; 129 + timeout = NF_FLOWTABLE_UDP_PICKUP_TIMEOUT; 135 130 else 136 131 return; 137 132
-8
net/netfilter/nf_nat_core.c
··· 28 28 #include <net/netfilter/nf_nat_helper.h> 29 29 #include <net/netfilter/nf_conntrack_helper.h> 30 30 #include <net/netfilter/nf_conntrack_seqadj.h> 31 - #include <net/netfilter/nf_conntrack_l3proto.h> 32 31 #include <net/netfilter/nf_conntrack_zones.h> 33 32 #include <linux/netfilter/nf_nat.h> 34 33 ··· 742 743 743 744 int nf_nat_l3proto_register(const struct nf_nat_l3proto *l3proto) 744 745 { 745 - int err; 746 - 747 - err = nf_ct_l3proto_try_module_get(l3proto->l3proto); 748 - if (err < 0) 749 - return err; 750 - 751 746 mutex_lock(&nf_nat_proto_mutex); 752 747 RCU_INIT_POINTER(nf_nat_l4protos[l3proto->l3proto][IPPROTO_TCP], 753 748 &nf_nat_l4proto_tcp); ··· 774 781 synchronize_rcu(); 775 782 776 783 nf_nat_l3proto_clean(l3proto->l3proto); 777 - nf_ct_l3proto_module_put(l3proto->l3proto); 778 784 } 779 785 EXPORT_SYMBOL_GPL(nf_nat_l3proto_unregister); 780 786
+148 -114
net/netfilter/nf_osf.c
··· 21 21 #include <linux/netfilter/nf_osf.h> 22 22 23 23 static inline int nf_osf_ttl(const struct sk_buff *skb, 24 - const struct nf_osf_info *info, 25 - unsigned char f_ttl) 24 + int ttl_check, unsigned char f_ttl) 26 25 { 27 26 const struct iphdr *ip = ip_hdr(skb); 28 27 29 - if (info->flags & NF_OSF_TTL) { 30 - if (info->ttl == NF_OSF_TTL_TRUE) 28 + if (ttl_check != -1) { 29 + if (ttl_check == NF_OSF_TTL_TRUE) 31 30 return ip->ttl == f_ttl; 32 - if (info->ttl == NF_OSF_TTL_NOCHECK) 31 + if (ttl_check == NF_OSF_TTL_NOCHECK) 33 32 return 1; 34 33 else if (ip->ttl <= f_ttl) 35 34 return 1; ··· 51 52 return ip->ttl == f_ttl; 52 53 } 53 54 55 + struct nf_osf_hdr_ctx { 56 + bool df; 57 + u16 window; 58 + u16 totlen; 59 + const unsigned char *optp; 60 + unsigned int optsize; 61 + }; 62 + 63 + static bool nf_osf_match_one(const struct sk_buff *skb, 64 + const struct nf_osf_user_finger *f, 65 + int ttl_check, 66 + struct nf_osf_hdr_ctx *ctx) 67 + { 68 + unsigned int check_WSS = 0; 69 + int fmatch = FMATCH_WRONG; 70 + int foptsize, optnum; 71 + u16 mss = 0; 72 + 73 + if (ctx->totlen != f->ss || !nf_osf_ttl(skb, ttl_check, f->ttl)) 74 + return false; 75 + 76 + /* 77 + * Should not happen if userspace parser was written correctly. 78 + */ 79 + if (f->wss.wc >= OSF_WSS_MAX) 80 + return false; 81 + 82 + /* Check options */ 83 + 84 + foptsize = 0; 85 + for (optnum = 0; optnum < f->opt_num; ++optnum) 86 + foptsize += f->opt[optnum].length; 87 + 88 + if (foptsize > MAX_IPOPTLEN || 89 + ctx->optsize > MAX_IPOPTLEN || 90 + ctx->optsize != foptsize) 91 + return false; 92 + 93 + check_WSS = f->wss.wc; 94 + 95 + for (optnum = 0; optnum < f->opt_num; ++optnum) { 96 + if (f->opt[optnum].kind == *ctx->optp) { 97 + __u32 len = f->opt[optnum].length; 98 + const __u8 *optend = ctx->optp + len; 99 + 100 + fmatch = FMATCH_OK; 101 + 102 + switch (*ctx->optp) { 103 + case OSFOPT_MSS: 104 + mss = ctx->optp[3]; 105 + mss <<= 8; 106 + mss |= ctx->optp[2]; 107 + 108 + mss = ntohs((__force __be16)mss); 109 + break; 110 + case OSFOPT_TS: 111 + break; 112 + } 113 + 114 + ctx->optp = optend; 115 + } else 116 + fmatch = FMATCH_OPT_WRONG; 117 + 118 + if (fmatch != FMATCH_OK) 119 + break; 120 + } 121 + 122 + if (fmatch != FMATCH_OPT_WRONG) { 123 + fmatch = FMATCH_WRONG; 124 + 125 + switch (check_WSS) { 126 + case OSF_WSS_PLAIN: 127 + if (f->wss.val == 0 || ctx->window == f->wss.val) 128 + fmatch = FMATCH_OK; 129 + break; 130 + case OSF_WSS_MSS: 131 + /* 132 + * Some smart modems decrease mangle MSS to 133 + * SMART_MSS_2, so we check standard, decreased 134 + * and the one provided in the fingerprint MSS 135 + * values. 136 + */ 137 + #define SMART_MSS_1 1460 138 + #define SMART_MSS_2 1448 139 + if (ctx->window == f->wss.val * mss || 140 + ctx->window == f->wss.val * SMART_MSS_1 || 141 + ctx->window == f->wss.val * SMART_MSS_2) 142 + fmatch = FMATCH_OK; 143 + break; 144 + case OSF_WSS_MTU: 145 + if (ctx->window == f->wss.val * (mss + 40) || 146 + ctx->window == f->wss.val * (SMART_MSS_1 + 40) || 147 + ctx->window == f->wss.val * (SMART_MSS_2 + 40)) 148 + fmatch = FMATCH_OK; 149 + break; 150 + case OSF_WSS_MODULO: 151 + if ((ctx->window % f->wss.val) == 0) 152 + fmatch = FMATCH_OK; 153 + break; 154 + } 155 + } 156 + 157 + return fmatch == FMATCH_OK; 158 + } 159 + 160 + static const struct tcphdr *nf_osf_hdr_ctx_init(struct nf_osf_hdr_ctx *ctx, 161 + const struct sk_buff *skb, 162 + const struct iphdr *ip, 163 + unsigned char *opts) 164 + { 165 + const struct tcphdr *tcp; 166 + struct tcphdr _tcph; 167 + 168 + tcp = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(struct tcphdr), &_tcph); 169 + if (!tcp) 170 + return NULL; 171 + 172 + if (!tcp->syn) 173 + return NULL; 174 + 175 + ctx->totlen = ntohs(ip->tot_len); 176 + ctx->df = ntohs(ip->frag_off) & IP_DF; 177 + ctx->window = ntohs(tcp->window); 178 + 179 + if (tcp->doff * 4 > sizeof(struct tcphdr)) { 180 + ctx->optsize = tcp->doff * 4 - sizeof(struct tcphdr); 181 + 182 + ctx->optp = skb_header_pointer(skb, ip_hdrlen(skb) + 183 + sizeof(struct tcphdr), ctx->optsize, opts); 184 + } 185 + 186 + return tcp; 187 + } 188 + 54 189 bool 55 190 nf_osf_match(const struct sk_buff *skb, u_int8_t family, 56 191 int hooknum, struct net_device *in, struct net_device *out, 57 192 const struct nf_osf_info *info, struct net *net, 58 193 const struct list_head *nf_osf_fingers) 59 194 { 60 - const unsigned char *optp = NULL, *_optp = NULL; 61 - unsigned int optsize = 0, check_WSS = 0; 62 - int fmatch = FMATCH_WRONG, fcount = 0; 63 195 const struct iphdr *ip = ip_hdr(skb); 64 196 const struct nf_osf_user_finger *f; 65 197 unsigned char opts[MAX_IPOPTLEN]; 66 198 const struct nf_osf_finger *kf; 67 - u16 window, totlen, mss = 0; 199 + int fcount = 0, ttl_check; 200 + int fmatch = FMATCH_WRONG; 201 + struct nf_osf_hdr_ctx ctx; 68 202 const struct tcphdr *tcp; 69 - struct tcphdr _tcph; 70 - bool df; 71 203 72 - tcp = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(struct tcphdr), &_tcph); 204 + memset(&ctx, 0, sizeof(ctx)); 205 + 206 + tcp = nf_osf_hdr_ctx_init(&ctx, skb, ip, opts); 73 207 if (!tcp) 74 208 return false; 75 209 76 - if (!tcp->syn) 77 - return false; 210 + ttl_check = (info->flags & NF_OSF_TTL) ? info->ttl : -1; 78 211 79 - totlen = ntohs(ip->tot_len); 80 - df = ntohs(ip->frag_off) & IP_DF; 81 - window = ntohs(tcp->window); 82 - 83 - if (tcp->doff * 4 > sizeof(struct tcphdr)) { 84 - optsize = tcp->doff * 4 - sizeof(struct tcphdr); 85 - 86 - _optp = optp = skb_header_pointer(skb, ip_hdrlen(skb) + 87 - sizeof(struct tcphdr), optsize, opts); 88 - } 89 - 90 - list_for_each_entry_rcu(kf, &nf_osf_fingers[df], finger_entry) { 91 - int foptsize, optnum; 212 + list_for_each_entry_rcu(kf, &nf_osf_fingers[ctx.df], finger_entry) { 92 213 93 214 f = &kf->finger; 94 215 95 216 if (!(info->flags & NF_OSF_LOG) && strcmp(info->genre, f->genre)) 96 217 continue; 97 218 98 - optp = _optp; 99 - fmatch = FMATCH_WRONG; 100 - 101 - if (totlen != f->ss || !nf_osf_ttl(skb, info, f->ttl)) 219 + if (!nf_osf_match_one(skb, f, ttl_check, &ctx)) 102 220 continue; 103 221 104 - /* 105 - * Should not happen if userspace parser was written correctly. 106 - */ 107 - if (f->wss.wc >= OSF_WSS_MAX) 108 - continue; 109 - 110 - /* Check options */ 111 - 112 - foptsize = 0; 113 - for (optnum = 0; optnum < f->opt_num; ++optnum) 114 - foptsize += f->opt[optnum].length; 115 - 116 - if (foptsize > MAX_IPOPTLEN || 117 - optsize > MAX_IPOPTLEN || 118 - optsize != foptsize) 119 - continue; 120 - 121 - check_WSS = f->wss.wc; 122 - 123 - for (optnum = 0; optnum < f->opt_num; ++optnum) { 124 - if (f->opt[optnum].kind == (*optp)) { 125 - __u32 len = f->opt[optnum].length; 126 - const __u8 *optend = optp + len; 127 - 128 - fmatch = FMATCH_OK; 129 - 130 - switch (*optp) { 131 - case OSFOPT_MSS: 132 - mss = optp[3]; 133 - mss <<= 8; 134 - mss |= optp[2]; 135 - 136 - mss = ntohs((__force __be16)mss); 137 - break; 138 - case OSFOPT_TS: 139 - break; 140 - } 141 - 142 - optp = optend; 143 - } else 144 - fmatch = FMATCH_OPT_WRONG; 145 - 146 - if (fmatch != FMATCH_OK) 147 - break; 148 - } 149 - 150 - if (fmatch != FMATCH_OPT_WRONG) { 151 - fmatch = FMATCH_WRONG; 152 - 153 - switch (check_WSS) { 154 - case OSF_WSS_PLAIN: 155 - if (f->wss.val == 0 || window == f->wss.val) 156 - fmatch = FMATCH_OK; 157 - break; 158 - case OSF_WSS_MSS: 159 - /* 160 - * Some smart modems decrease mangle MSS to 161 - * SMART_MSS_2, so we check standard, decreased 162 - * and the one provided in the fingerprint MSS 163 - * values. 164 - */ 165 - #define SMART_MSS_1 1460 166 - #define SMART_MSS_2 1448 167 - if (window == f->wss.val * mss || 168 - window == f->wss.val * SMART_MSS_1 || 169 - window == f->wss.val * SMART_MSS_2) 170 - fmatch = FMATCH_OK; 171 - break; 172 - case OSF_WSS_MTU: 173 - if (window == f->wss.val * (mss + 40) || 174 - window == f->wss.val * (SMART_MSS_1 + 40) || 175 - window == f->wss.val * (SMART_MSS_2 + 40)) 176 - fmatch = FMATCH_OK; 177 - break; 178 - case OSF_WSS_MODULO: 179 - if ((window % f->wss.val) == 0) 180 - fmatch = FMATCH_OK; 181 - break; 182 - } 183 - } 184 - 185 - if (fmatch != FMATCH_OK) 186 - continue; 222 + fmatch = FMATCH_OK; 187 223 188 224 fcount++; 189 225
+132 -62
net/netfilter/nf_tables_api.c
··· 455 455 return NULL; 456 456 } 457 457 458 + /* 459 + * Loading a module requires dropping mutex that guards the 460 + * transaction. 461 + * We first need to abort any pending transactions as once 462 + * mutex is unlocked a different client could start a new 463 + * transaction. It must not see any 'future generation' 464 + * changes * as these changes will never happen. 465 + */ 466 + #ifdef CONFIG_MODULES 467 + static int __nf_tables_abort(struct net *net); 468 + 469 + static void nft_request_module(struct net *net, const char *fmt, ...) 470 + { 471 + char module_name[MODULE_NAME_LEN]; 472 + va_list args; 473 + int ret; 474 + 475 + __nf_tables_abort(net); 476 + 477 + va_start(args, fmt); 478 + ret = vsnprintf(module_name, MODULE_NAME_LEN, fmt, args); 479 + va_end(args); 480 + if (WARN(ret >= MODULE_NAME_LEN, "truncated: '%s' (len %d)", module_name, ret)) 481 + return; 482 + 483 + mutex_unlock(&net->nft.commit_mutex); 484 + request_module("%s", module_name); 485 + mutex_lock(&net->nft.commit_mutex); 486 + } 487 + #endif 488 + 489 + static void lockdep_nfnl_nft_mutex_not_held(void) 490 + { 491 + #ifdef CONFIG_PROVE_LOCKING 492 + WARN_ON_ONCE(lockdep_nfnl_is_held(NFNL_SUBSYS_NFTABLES)); 493 + #endif 494 + } 495 + 458 496 static const struct nft_chain_type * 459 - nf_tables_chain_type_lookup(const struct nlattr *nla, u8 family, bool autoload) 497 + nf_tables_chain_type_lookup(struct net *net, const struct nlattr *nla, 498 + u8 family, bool autoload) 460 499 { 461 500 const struct nft_chain_type *type; 462 501 463 502 type = __nf_tables_chain_type_lookup(nla, family); 464 503 if (type != NULL) 465 504 return type; 505 + 506 + lockdep_nfnl_nft_mutex_not_held(); 466 507 #ifdef CONFIG_MODULES 467 508 if (autoload) { 468 - nfnl_unlock(NFNL_SUBSYS_NFTABLES); 469 - request_module("nft-chain-%u-%.*s", family, 470 - nla_len(nla), (const char *)nla_data(nla)); 471 - nfnl_lock(NFNL_SUBSYS_NFTABLES); 509 + nft_request_module(net, "nft-chain-%u-%.*s", family, 510 + nla_len(nla), (const char *)nla_data(nla)); 472 511 type = __nf_tables_chain_type_lookup(nla, family); 473 512 if (type != NULL) 474 513 return ERR_PTR(-EAGAIN); ··· 811 772 struct nft_ctx ctx; 812 773 int err; 813 774 775 + lockdep_assert_held(&net->nft.commit_mutex); 814 776 attr = nla[NFTA_TABLE_NAME]; 815 777 table = nft_table_lookup(net, attr, family, genmask); 816 778 if (IS_ERR(table)) { ··· 1052 1012 return ERR_PTR(-ENOENT); 1053 1013 } 1054 1014 1055 - static struct nft_chain *nft_chain_lookup(struct nft_table *table, 1015 + static bool lockdep_commit_lock_is_held(struct net *net) 1016 + { 1017 + #ifdef CONFIG_PROVE_LOCKING 1018 + return lockdep_is_held(&net->nft.commit_mutex); 1019 + #else 1020 + return true; 1021 + #endif 1022 + } 1023 + 1024 + static struct nft_chain *nft_chain_lookup(struct net *net, 1025 + struct nft_table *table, 1056 1026 const struct nlattr *nla, u8 genmask) 1057 1027 { 1058 1028 char search[NFT_CHAIN_MAXNAMELEN + 1]; ··· 1075 1025 nla_strlcpy(search, nla, sizeof(search)); 1076 1026 1077 1027 WARN_ON(!rcu_read_lock_held() && 1078 - !lockdep_nfnl_is_held(NFNL_SUBSYS_NFTABLES)); 1028 + !lockdep_commit_lock_is_held(net)); 1079 1029 1080 1030 chain = ERR_PTR(-ENOENT); 1081 1031 rcu_read_lock(); ··· 1315 1265 return PTR_ERR(table); 1316 1266 } 1317 1267 1318 - chain = nft_chain_lookup(table, nla[NFTA_CHAIN_NAME], genmask); 1268 + chain = nft_chain_lookup(net, table, nla[NFTA_CHAIN_NAME], genmask); 1319 1269 if (IS_ERR(chain)) { 1320 1270 NL_SET_BAD_ATTR(extack, nla[NFTA_CHAIN_NAME]); 1321 1271 return PTR_ERR(chain); ··· 1448 1398 struct net_device *dev; 1449 1399 int err; 1450 1400 1401 + lockdep_assert_held(&net->nft.commit_mutex); 1402 + lockdep_nfnl_nft_mutex_not_held(); 1403 + 1451 1404 err = nla_parse_nested(ha, NFTA_HOOK_MAX, nla[NFTA_CHAIN_HOOK], 1452 1405 nft_hook_policy, NULL); 1453 1406 if (err < 0) ··· 1465 1412 1466 1413 type = chain_type[family][NFT_CHAIN_T_DEFAULT]; 1467 1414 if (nla[NFTA_CHAIN_TYPE]) { 1468 - type = nf_tables_chain_type_lookup(nla[NFTA_CHAIN_TYPE], 1415 + type = nf_tables_chain_type_lookup(net, nla[NFTA_CHAIN_TYPE], 1469 1416 family, create); 1470 1417 if (IS_ERR(type)) 1471 1418 return PTR_ERR(type); ··· 1685 1632 nla[NFTA_CHAIN_NAME]) { 1686 1633 struct nft_chain *chain2; 1687 1634 1688 - chain2 = nft_chain_lookup(table, nla[NFTA_CHAIN_NAME], genmask); 1635 + chain2 = nft_chain_lookup(ctx->net, table, 1636 + nla[NFTA_CHAIN_NAME], genmask); 1689 1637 if (!IS_ERR(chain2)) 1690 1638 return -EEXIST; 1691 1639 } ··· 1748 1694 1749 1695 create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false; 1750 1696 1697 + lockdep_assert_held(&net->nft.commit_mutex); 1698 + 1751 1699 table = nft_table_lookup(net, nla[NFTA_CHAIN_TABLE], family, genmask); 1752 1700 if (IS_ERR(table)) { 1753 1701 NL_SET_BAD_ATTR(extack, nla[NFTA_CHAIN_TABLE]); ··· 1768 1712 } 1769 1713 attr = nla[NFTA_CHAIN_HANDLE]; 1770 1714 } else { 1771 - chain = nft_chain_lookup(table, attr, genmask); 1715 + chain = nft_chain_lookup(net, table, attr, genmask); 1772 1716 if (IS_ERR(chain)) { 1773 1717 if (PTR_ERR(chain) != -ENOENT) { 1774 1718 NL_SET_BAD_ATTR(extack, attr); ··· 1846 1790 chain = nft_chain_lookup_byhandle(table, handle, genmask); 1847 1791 } else { 1848 1792 attr = nla[NFTA_CHAIN_NAME]; 1849 - chain = nft_chain_lookup(table, attr, genmask); 1793 + chain = nft_chain_lookup(net, table, attr, genmask); 1850 1794 } 1851 1795 if (IS_ERR(chain)) { 1852 1796 NL_SET_BAD_ATTR(extack, attr); ··· 1931 1875 return NULL; 1932 1876 } 1933 1877 1934 - static const struct nft_expr_type *nft_expr_type_get(u8 family, 1878 + static const struct nft_expr_type *nft_expr_type_get(struct net *net, 1879 + u8 family, 1935 1880 struct nlattr *nla) 1936 1881 { 1937 1882 const struct nft_expr_type *type; ··· 1944 1887 if (type != NULL && try_module_get(type->owner)) 1945 1888 return type; 1946 1889 1890 + lockdep_nfnl_nft_mutex_not_held(); 1947 1891 #ifdef CONFIG_MODULES 1948 1892 if (type == NULL) { 1949 - nfnl_unlock(NFNL_SUBSYS_NFTABLES); 1950 - request_module("nft-expr-%u-%.*s", family, 1951 - nla_len(nla), (char *)nla_data(nla)); 1952 - nfnl_lock(NFNL_SUBSYS_NFTABLES); 1893 + nft_request_module(net, "nft-expr-%u-%.*s", family, 1894 + nla_len(nla), (char *)nla_data(nla)); 1953 1895 if (__nft_expr_type_get(family, nla)) 1954 1896 return ERR_PTR(-EAGAIN); 1955 1897 1956 - nfnl_unlock(NFNL_SUBSYS_NFTABLES); 1957 - request_module("nft-expr-%.*s", 1958 - nla_len(nla), (char *)nla_data(nla)); 1959 - nfnl_lock(NFNL_SUBSYS_NFTABLES); 1898 + nft_request_module(net, "nft-expr-%.*s", 1899 + nla_len(nla), (char *)nla_data(nla)); 1960 1900 if (__nft_expr_type_get(family, nla)) 1961 1901 return ERR_PTR(-EAGAIN); 1962 1902 } ··· 2022 1968 if (err < 0) 2023 1969 return err; 2024 1970 2025 - type = nft_expr_type_get(ctx->family, tb[NFTA_EXPR_NAME]); 1971 + type = nft_expr_type_get(ctx->net, ctx->family, tb[NFTA_EXPR_NAME]); 2026 1972 if (IS_ERR(type)) 2027 1973 return PTR_ERR(type); 2028 1974 ··· 2379 2325 return PTR_ERR(table); 2380 2326 } 2381 2327 2382 - chain = nft_chain_lookup(table, nla[NFTA_RULE_CHAIN], genmask); 2328 + chain = nft_chain_lookup(net, table, nla[NFTA_RULE_CHAIN], genmask); 2383 2329 if (IS_ERR(chain)) { 2384 2330 NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_CHAIN]); 2385 2331 return PTR_ERR(chain); ··· 2413 2359 { 2414 2360 struct nft_expr *expr; 2415 2361 2362 + lockdep_assert_held(&ctx->net->nft.commit_mutex); 2416 2363 /* 2417 2364 * Careful: some expressions might not be initialized in case this 2418 2365 * is called on error from nf_tables_newrule(). ··· 2482 2427 2483 2428 #define NFT_RULE_MAXEXPRS 128 2484 2429 2485 - static struct nft_expr_info *info; 2486 - 2487 2430 static int nf_tables_newrule(struct net *net, struct sock *nlsk, 2488 2431 struct sk_buff *skb, const struct nlmsghdr *nlh, 2489 2432 const struct nlattr * const nla[], ··· 2489 2436 { 2490 2437 const struct nfgenmsg *nfmsg = nlmsg_data(nlh); 2491 2438 u8 genmask = nft_genmask_next(net); 2439 + struct nft_expr_info *info = NULL; 2492 2440 int family = nfmsg->nfgen_family; 2493 2441 struct nft_table *table; 2494 2442 struct nft_chain *chain; ··· 2504 2450 bool create; 2505 2451 u64 handle, pos_handle; 2506 2452 2453 + lockdep_assert_held(&net->nft.commit_mutex); 2454 + 2507 2455 create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false; 2508 2456 2509 2457 table = nft_table_lookup(net, nla[NFTA_RULE_TABLE], family, genmask); ··· 2514 2458 return PTR_ERR(table); 2515 2459 } 2516 2460 2517 - chain = nft_chain_lookup(table, nla[NFTA_RULE_CHAIN], genmask); 2461 + chain = nft_chain_lookup(net, table, nla[NFTA_RULE_CHAIN], genmask); 2518 2462 if (IS_ERR(chain)) { 2519 2463 NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_CHAIN]); 2520 2464 return PTR_ERR(chain); ··· 2562 2506 n = 0; 2563 2507 size = 0; 2564 2508 if (nla[NFTA_RULE_EXPRESSIONS]) { 2509 + info = kvmalloc_array(NFT_RULE_MAXEXPRS, 2510 + sizeof(struct nft_expr_info), 2511 + GFP_KERNEL); 2512 + if (!info) 2513 + return -ENOMEM; 2514 + 2565 2515 nla_for_each_nested(tmp, nla[NFTA_RULE_EXPRESSIONS], rem) { 2566 2516 err = -EINVAL; 2567 2517 if (nla_type(tmp) != NFTA_LIST_ELEM) ··· 2660 2598 list_add_rcu(&rule->list, &chain->rules); 2661 2599 } 2662 2600 } 2601 + kvfree(info); 2663 2602 chain->use++; 2664 2603 2665 2604 if (net->nft.validate_state == NFT_VALIDATE_DO) ··· 2674 2611 if (info[i].ops != NULL) 2675 2612 module_put(info[i].ops->type->owner); 2676 2613 } 2614 + kvfree(info); 2677 2615 return err; 2678 2616 } 2679 2617 ··· 2714 2650 } 2715 2651 2716 2652 if (nla[NFTA_RULE_CHAIN]) { 2717 - chain = nft_chain_lookup(table, nla[NFTA_RULE_CHAIN], genmask); 2653 + chain = nft_chain_lookup(net, table, nla[NFTA_RULE_CHAIN], 2654 + genmask); 2718 2655 if (IS_ERR(chain)) { 2719 2656 NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_CHAIN]); 2720 2657 return PTR_ERR(chain); ··· 2807 2742 const struct nft_set_type *type; 2808 2743 u32 flags = 0; 2809 2744 2745 + lockdep_assert_held(&ctx->net->nft.commit_mutex); 2746 + lockdep_nfnl_nft_mutex_not_held(); 2810 2747 #ifdef CONFIG_MODULES 2811 2748 if (list_empty(&nf_tables_set_types)) { 2812 - nfnl_unlock(NFNL_SUBSYS_NFTABLES); 2813 - request_module("nft-set"); 2814 - nfnl_lock(NFNL_SUBSYS_NFTABLES); 2749 + nft_request_module(ctx->net, "nft-set"); 2815 2750 if (!list_empty(&nf_tables_set_types)) 2816 2751 return ERR_PTR(-EAGAIN); 2817 2752 } ··· 4844 4779 return NULL; 4845 4780 } 4846 4781 4847 - static const struct nft_object_type *nft_obj_type_get(u32 objtype) 4782 + static const struct nft_object_type * 4783 + nft_obj_type_get(struct net *net, u32 objtype) 4848 4784 { 4849 4785 const struct nft_object_type *type; 4850 4786 ··· 4853 4787 if (type != NULL && try_module_get(type->owner)) 4854 4788 return type; 4855 4789 4790 + lockdep_nfnl_nft_mutex_not_held(); 4856 4791 #ifdef CONFIG_MODULES 4857 4792 if (type == NULL) { 4858 - nfnl_unlock(NFNL_SUBSYS_NFTABLES); 4859 - request_module("nft-obj-%u", objtype); 4860 - nfnl_lock(NFNL_SUBSYS_NFTABLES); 4793 + nft_request_module(net, "nft-obj-%u", objtype); 4861 4794 if (__nft_obj_type_get(objtype)) 4862 4795 return ERR_PTR(-EAGAIN); 4863 4796 } ··· 4908 4843 4909 4844 nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla); 4910 4845 4911 - type = nft_obj_type_get(objtype); 4846 + type = nft_obj_type_get(net, objtype); 4912 4847 if (IS_ERR(type)) 4913 4848 return PTR_ERR(type); 4914 4849 ··· 5404 5339 return NULL; 5405 5340 } 5406 5341 5407 - static const struct nf_flowtable_type *nft_flowtable_type_get(u8 family) 5342 + static const struct nf_flowtable_type * 5343 + nft_flowtable_type_get(struct net *net, u8 family) 5408 5344 { 5409 5345 const struct nf_flowtable_type *type; 5410 5346 ··· 5413 5347 if (type != NULL && try_module_get(type->owner)) 5414 5348 return type; 5415 5349 5350 + lockdep_nfnl_nft_mutex_not_held(); 5416 5351 #ifdef CONFIG_MODULES 5417 5352 if (type == NULL) { 5418 - nfnl_unlock(NFNL_SUBSYS_NFTABLES); 5419 - request_module("nf-flowtable-%u", family); 5420 - nfnl_lock(NFNL_SUBSYS_NFTABLES); 5353 + nft_request_module(net, "nf-flowtable-%u", family); 5421 5354 if (__nft_flowtable_type_get(family)) 5422 5355 return ERR_PTR(-EAGAIN); 5423 5356 } ··· 5496 5431 goto err1; 5497 5432 } 5498 5433 5499 - type = nft_flowtable_type_get(family); 5434 + type = nft_flowtable_type_get(net, family); 5500 5435 if (IS_ERR(type)) { 5501 5436 err = PTR_ERR(type); 5502 5437 goto err2; ··· 6267 6202 next_genbit = nft_gencursor_next(net); 6268 6203 6269 6204 g0 = rcu_dereference_protected(chain->rules_gen_0, 6270 - lockdep_nfnl_is_held(NFNL_SUBSYS_NFTABLES)); 6205 + lockdep_commit_lock_is_held(net)); 6271 6206 g1 = rcu_dereference_protected(chain->rules_gen_1, 6272 - lockdep_nfnl_is_held(NFNL_SUBSYS_NFTABLES)); 6207 + lockdep_commit_lock_is_held(net)); 6273 6208 6274 6209 /* No changes to this chain? */ 6275 6210 if (chain->rules_next == NULL) { ··· 6477 6412 6478 6413 nf_tables_commit_release(net); 6479 6414 nf_tables_gen_notify(net, skb, NFT_MSG_NEWGEN); 6415 + mutex_unlock(&net->nft.commit_mutex); 6480 6416 6481 6417 return 0; 6482 6418 } ··· 6629 6563 6630 6564 static int nf_tables_abort(struct net *net, struct sk_buff *skb) 6631 6565 { 6632 - return __nf_tables_abort(net); 6566 + int ret = __nf_tables_abort(net); 6567 + 6568 + mutex_unlock(&net->nft.commit_mutex); 6569 + 6570 + return ret; 6633 6571 } 6634 6572 6635 6573 static bool nf_tables_valid_genid(struct net *net, u32 genid) 6636 6574 { 6637 - return net->nft.base_seq == genid; 6575 + bool genid_ok; 6576 + 6577 + mutex_lock(&net->nft.commit_mutex); 6578 + 6579 + genid_ok = genid == 0 || net->nft.base_seq == genid; 6580 + if (!genid_ok) 6581 + mutex_unlock(&net->nft.commit_mutex); 6582 + 6583 + /* else, commit mutex has to be released by commit or abort function */ 6584 + return genid_ok; 6638 6585 } 6639 6586 6640 6587 static const struct nfnetlink_subsystem nf_tables_subsys = { ··· 6659 6580 .abort = nf_tables_abort, 6660 6581 .cleanup = nf_tables_cleanup, 6661 6582 .valid_genid = nf_tables_valid_genid, 6583 + .owner = THIS_MODULE, 6662 6584 }; 6663 6585 6664 6586 int nft_chain_validate_dependency(const struct nft_chain *chain, ··· 6986 6906 case NFT_GOTO: 6987 6907 if (!tb[NFTA_VERDICT_CHAIN]) 6988 6908 return -EINVAL; 6989 - chain = nft_chain_lookup(ctx->table, tb[NFTA_VERDICT_CHAIN], 6990 - genmask); 6909 + chain = nft_chain_lookup(ctx->net, ctx->table, 6910 + tb[NFTA_VERDICT_CHAIN], genmask); 6991 6911 if (IS_ERR(chain)) 6992 6912 return PTR_ERR(chain); 6993 6913 if (nft_is_base_chain(chain)) ··· 7232 7152 { 7233 7153 INIT_LIST_HEAD(&net->nft.tables); 7234 7154 INIT_LIST_HEAD(&net->nft.commit_list); 7155 + mutex_init(&net->nft.commit_mutex); 7235 7156 net->nft.base_seq = 1; 7236 7157 net->nft.validate_state = NFT_VALIDATE_SKIP; 7237 7158 ··· 7241 7160 7242 7161 static void __net_exit nf_tables_exit_net(struct net *net) 7243 7162 { 7244 - nfnl_lock(NFNL_SUBSYS_NFTABLES); 7163 + mutex_lock(&net->nft.commit_mutex); 7245 7164 if (!list_empty(&net->nft.commit_list)) 7246 7165 __nf_tables_abort(net); 7247 7166 __nft_release_tables(net); 7248 - nfnl_unlock(NFNL_SUBSYS_NFTABLES); 7167 + mutex_unlock(&net->nft.commit_mutex); 7249 7168 WARN_ON_ONCE(!list_empty(&net->nft.tables)); 7250 7169 } 7251 7170 ··· 7260 7179 7261 7180 nft_chain_filter_init(); 7262 7181 7263 - info = kmalloc_array(NFT_RULE_MAXEXPRS, sizeof(struct nft_expr_info), 7264 - GFP_KERNEL); 7265 - if (info == NULL) { 7266 - err = -ENOMEM; 7267 - goto err1; 7268 - } 7269 - 7270 7182 err = nf_tables_core_module_init(); 7271 7183 if (err < 0) 7272 - goto err2; 7184 + return err; 7273 7185 7274 7186 err = nfnetlink_subsys_register(&nf_tables_subsys); 7275 7187 if (err < 0) 7276 - goto err3; 7188 + goto err; 7277 7189 7278 7190 register_netdevice_notifier(&nf_tables_flowtable_notifier); 7279 7191 7280 7192 return register_pernet_subsys(&nf_tables_net_ops); 7281 - err3: 7193 + err: 7282 7194 nf_tables_core_module_exit(); 7283 - err2: 7284 - kfree(info); 7285 - err1: 7286 7195 return err; 7287 7196 } 7288 7197 ··· 7284 7213 unregister_pernet_subsys(&nf_tables_net_ops); 7285 7214 rcu_barrier(); 7286 7215 nf_tables_core_module_exit(); 7287 - kfree(info); 7288 7216 } 7289 7217 7290 7218 module_init(nf_tables_module_init);
+14 -9
net/netfilter/nfnetlink.c
··· 331 331 } 332 332 } 333 333 334 - if (!ss->commit || !ss->abort) { 334 + if (!ss->valid_genid || !ss->commit || !ss->abort) { 335 335 nfnl_unlock(subsys_id); 336 336 netlink_ack(oskb, nlh, -EOPNOTSUPP, NULL); 337 337 return kfree_skb(skb); 338 338 } 339 339 340 - if (genid && ss->valid_genid && !ss->valid_genid(net, genid)) { 340 + if (!try_module_get(ss->owner)) { 341 + nfnl_unlock(subsys_id); 342 + netlink_ack(oskb, nlh, -EOPNOTSUPP, NULL); 343 + return kfree_skb(skb); 344 + } 345 + 346 + if (!ss->valid_genid(net, genid)) { 347 + module_put(ss->owner); 341 348 nfnl_unlock(subsys_id); 342 349 netlink_ack(oskb, nlh, -ERESTART, NULL); 343 350 return kfree_skb(skb); 344 351 } 352 + 353 + nfnl_unlock(subsys_id); 345 354 346 355 while (skb->len >= nlmsg_total_size(0)) { 347 356 int msglen, type; ··· 473 464 } 474 465 done: 475 466 if (status & NFNL_BATCH_REPLAY) { 476 - const struct nfnetlink_subsystem *ss2; 477 - 478 - ss2 = nfnl_dereference_protected(subsys_id); 479 - if (ss2 == ss) 480 - ss->abort(net, oskb); 467 + ss->abort(net, oskb); 481 468 nfnl_err_reset(&err_list); 482 - nfnl_unlock(subsys_id); 483 469 kfree_skb(skb); 470 + module_put(ss->owner); 484 471 goto replay; 485 472 } else if (status == NFNL_BATCH_DONE) { 486 473 err = ss->commit(net, oskb); ··· 494 489 ss->cleanup(net); 495 490 496 491 nfnl_err_deliver(&err_list, oskb); 497 - nfnl_unlock(subsys_id); 498 492 kfree_skb(skb); 493 + module_put(ss->owner); 499 494 } 500 495 501 496 static const struct nla_policy nfnl_batch_policy[NFNL_BATCH_MAX + 1] = {
+2 -2
net/netfilter/nft_chain_filter.c
··· 322 322 if (!ctx.net) 323 323 return NOTIFY_DONE; 324 324 325 - nfnl_lock(NFNL_SUBSYS_NFTABLES); 325 + mutex_lock(&ctx.net->nft.commit_mutex); 326 326 list_for_each_entry(table, &ctx.net->nft.tables, list) { 327 327 if (table->family != NFPROTO_NETDEV) 328 328 continue; ··· 337 337 nft_netdev_event(event, dev, &ctx); 338 338 } 339 339 } 340 - nfnl_unlock(NFNL_SUBSYS_NFTABLES); 340 + mutex_unlock(&ctx.net->nft.commit_mutex); 341 341 put_net(ctx.net); 342 342 343 343 return NOTIFY_DONE;
+12 -24
net/netfilter/nft_connlimit.c
··· 14 14 #include <net/netfilter/nf_conntrack_zones.h> 15 15 16 16 struct nft_connlimit { 17 - spinlock_t lock; 18 - struct hlist_head hhead; 19 - u32 limit; 20 - bool invert; 17 + struct nf_conncount_list list; 18 + u32 limit; 19 + bool invert; 21 20 }; 22 21 23 22 static inline void nft_connlimit_do_eval(struct nft_connlimit *priv, ··· 44 45 return; 45 46 } 46 47 47 - spin_lock_bh(&priv->lock); 48 - count = nf_conncount_lookup(nft_net(pkt), &priv->hhead, tuple_ptr, zone, 49 - &addit); 48 + nf_conncount_lookup(nft_net(pkt), &priv->list, tuple_ptr, zone, 49 + &addit); 50 + count = priv->list.count; 50 51 51 52 if (!addit) 52 53 goto out; 53 54 54 - if (!nf_conncount_add(&priv->hhead, tuple_ptr, zone)) { 55 + if (nf_conncount_add(&priv->list, tuple_ptr, zone) == NF_CONNCOUNT_ERR) { 55 56 regs->verdict.code = NF_DROP; 56 - spin_unlock_bh(&priv->lock); 57 57 return; 58 58 } 59 59 count++; 60 60 out: 61 - spin_unlock_bh(&priv->lock); 62 61 63 62 if ((count > priv->limit) ^ priv->invert) { 64 63 regs->verdict.code = NFT_BREAK; ··· 84 87 invert = true; 85 88 } 86 89 87 - spin_lock_init(&priv->lock); 88 - INIT_HLIST_HEAD(&priv->hhead); 90 + nf_conncount_list_init(&priv->list); 89 91 priv->limit = limit; 90 92 priv->invert = invert; 91 93 ··· 95 99 struct nft_connlimit *priv) 96 100 { 97 101 nf_ct_netns_put(ctx->net, ctx->family); 98 - nf_conncount_cache_free(&priv->hhead); 102 + nf_conncount_cache_free(&priv->list); 99 103 } 100 104 101 105 static int nft_connlimit_do_dump(struct sk_buff *skb, ··· 208 212 struct nft_connlimit *priv_dst = nft_expr_priv(dst); 209 213 struct nft_connlimit *priv_src = nft_expr_priv(src); 210 214 211 - spin_lock_init(&priv_dst->lock); 212 - INIT_HLIST_HEAD(&priv_dst->hhead); 215 + nf_conncount_list_init(&priv_dst->list); 213 216 priv_dst->limit = priv_src->limit; 214 217 priv_dst->invert = priv_src->invert; 215 218 ··· 220 225 { 221 226 struct nft_connlimit *priv = nft_expr_priv(expr); 222 227 223 - nf_conncount_cache_free(&priv->hhead); 228 + nf_conncount_cache_free(&priv->list); 224 229 } 225 230 226 231 static bool nft_connlimit_gc(struct net *net, const struct nft_expr *expr) 227 232 { 228 233 struct nft_connlimit *priv = nft_expr_priv(expr); 229 - bool addit, ret; 230 234 231 - spin_lock_bh(&priv->lock); 232 - nf_conncount_lookup(net, &priv->hhead, NULL, &nf_ct_zone_dflt, &addit); 233 - 234 - ret = hlist_empty(&priv->hhead); 235 - spin_unlock_bh(&priv->lock); 236 - 237 - return ret; 235 + return nf_conncount_gc_list(net, &priv->list); 238 236 } 239 237 240 238 static struct nft_expr_type nft_connlimit_type;
+1 -1
net/netfilter/nft_ct.c
··· 870 870 if (test_bit(IPS_HELPER_BIT, &ct->status)) 871 871 return; 872 872 873 - help = nf_ct_helper_ext_add(ct, to_assign, GFP_ATOMIC); 873 + help = nf_ct_helper_ext_add(ct, GFP_ATOMIC); 874 874 if (help) { 875 875 rcu_assign_pointer(help->helper, to_assign); 876 876 set_bit(IPS_HELPER_BIT, &ct->status);
+2
net/netfilter/nft_dynset.c
··· 118 118 u64 timeout; 119 119 int err; 120 120 121 + lockdep_assert_held(&ctx->net->nft.commit_mutex); 122 + 121 123 if (tb[NFTA_DYNSET_SET_NAME] == NULL || 122 124 tb[NFTA_DYNSET_OP] == NULL || 123 125 tb[NFTA_DYNSET_SREG_KEY] == NULL)
+14 -3
net/netfilter/nft_socket.c
··· 31 31 case NFPROTO_IPV4: 32 32 sk = nf_sk_lookup_slow_v4(nft_net(pkt), skb, nft_in(pkt)); 33 33 break; 34 - #if IS_ENABLED(CONFIG_NF_SOCKET_IPV6) 34 + #if IS_ENABLED(CONFIG_NF_TABLES_IPV6) 35 35 case NFPROTO_IPV6: 36 36 sk = nf_sk_lookup_slow_v6(nft_net(pkt), skb, nft_in(pkt)); 37 37 break; ··· 43 43 } 44 44 45 45 if (!sk) { 46 - nft_reg_store8(dest, 0); 46 + regs->verdict.code = NFT_BREAK; 47 47 return; 48 48 } 49 49 ··· 53 53 switch(priv->key) { 54 54 case NFT_SOCKET_TRANSPARENT: 55 55 nft_reg_store8(dest, inet_sk_transparent(sk)); 56 + break; 57 + case NFT_SOCKET_MARK: 58 + if (sk_fullsock(sk)) { 59 + *dest = sk->sk_mark; 60 + } else { 61 + regs->verdict.code = NFT_BREAK; 62 + return; 63 + } 56 64 break; 57 65 default: 58 66 WARN_ON(1); ··· 85 77 86 78 switch(ctx->family) { 87 79 case NFPROTO_IPV4: 88 - #if IS_ENABLED(CONFIG_NF_SOCKET_IPV6) 80 + #if IS_ENABLED(CONFIG_NF_TABLES_IPV6) 89 81 case NFPROTO_IPV6: 90 82 #endif 91 83 case NFPROTO_INET: ··· 98 90 switch(priv->key) { 99 91 case NFT_SOCKET_TRANSPARENT: 100 92 len = sizeof(u8); 93 + break; 94 + case NFT_SOCKET_MARK: 95 + len = sizeof(u32); 101 96 break; 102 97 default: 103 98 return -EOPNOTSUPP;
+120 -11
net/netfilter/utils.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 1 2 #include <linux/kernel.h> 2 3 #include <linux/netfilter.h> 3 4 #include <linux/netfilter_ipv4.h> 4 5 #include <linux/netfilter_ipv6.h> 5 6 #include <net/netfilter/nf_queue.h> 7 + #include <net/ip6_checksum.h> 8 + 9 + #ifdef CONFIG_INET 10 + __sum16 nf_ip_checksum(struct sk_buff *skb, unsigned int hook, 11 + unsigned int dataoff, u8 protocol) 12 + { 13 + const struct iphdr *iph = ip_hdr(skb); 14 + __sum16 csum = 0; 15 + 16 + switch (skb->ip_summed) { 17 + case CHECKSUM_COMPLETE: 18 + if (hook != NF_INET_PRE_ROUTING && hook != NF_INET_LOCAL_IN) 19 + break; 20 + if ((protocol == 0 && !csum_fold(skb->csum)) || 21 + !csum_tcpudp_magic(iph->saddr, iph->daddr, 22 + skb->len - dataoff, protocol, 23 + skb->csum)) { 24 + skb->ip_summed = CHECKSUM_UNNECESSARY; 25 + break; 26 + } 27 + /* fall through */ 28 + case CHECKSUM_NONE: 29 + if (protocol == 0) 30 + skb->csum = 0; 31 + else 32 + skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr, 33 + skb->len - dataoff, 34 + protocol, 0); 35 + csum = __skb_checksum_complete(skb); 36 + } 37 + return csum; 38 + } 39 + EXPORT_SYMBOL(nf_ip_checksum); 40 + #endif 41 + 42 + static __sum16 nf_ip_checksum_partial(struct sk_buff *skb, unsigned int hook, 43 + unsigned int dataoff, unsigned int len, 44 + u8 protocol) 45 + { 46 + const struct iphdr *iph = ip_hdr(skb); 47 + __sum16 csum = 0; 48 + 49 + switch (skb->ip_summed) { 50 + case CHECKSUM_COMPLETE: 51 + if (len == skb->len - dataoff) 52 + return nf_ip_checksum(skb, hook, dataoff, protocol); 53 + /* fall through */ 54 + case CHECKSUM_NONE: 55 + skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr, protocol, 56 + skb->len - dataoff, 0); 57 + skb->ip_summed = CHECKSUM_NONE; 58 + return __skb_checksum_complete_head(skb, dataoff + len); 59 + } 60 + return csum; 61 + } 62 + 63 + __sum16 nf_ip6_checksum(struct sk_buff *skb, unsigned int hook, 64 + unsigned int dataoff, u8 protocol) 65 + { 66 + const struct ipv6hdr *ip6h = ipv6_hdr(skb); 67 + __sum16 csum = 0; 68 + 69 + switch (skb->ip_summed) { 70 + case CHECKSUM_COMPLETE: 71 + if (hook != NF_INET_PRE_ROUTING && hook != NF_INET_LOCAL_IN) 72 + break; 73 + if (!csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr, 74 + skb->len - dataoff, protocol, 75 + csum_sub(skb->csum, 76 + skb_checksum(skb, 0, 77 + dataoff, 0)))) { 78 + skb->ip_summed = CHECKSUM_UNNECESSARY; 79 + break; 80 + } 81 + /* fall through */ 82 + case CHECKSUM_NONE: 83 + skb->csum = ~csum_unfold( 84 + csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr, 85 + skb->len - dataoff, 86 + protocol, 87 + csum_sub(0, 88 + skb_checksum(skb, 0, 89 + dataoff, 0)))); 90 + csum = __skb_checksum_complete(skb); 91 + } 92 + return csum; 93 + } 94 + EXPORT_SYMBOL(nf_ip6_checksum); 95 + 96 + static __sum16 nf_ip6_checksum_partial(struct sk_buff *skb, unsigned int hook, 97 + unsigned int dataoff, unsigned int len, 98 + u8 protocol) 99 + { 100 + const struct ipv6hdr *ip6h = ipv6_hdr(skb); 101 + __wsum hsum; 102 + __sum16 csum = 0; 103 + 104 + switch (skb->ip_summed) { 105 + case CHECKSUM_COMPLETE: 106 + if (len == skb->len - dataoff) 107 + return nf_ip6_checksum(skb, hook, dataoff, protocol); 108 + /* fall through */ 109 + case CHECKSUM_NONE: 110 + hsum = skb_checksum(skb, 0, dataoff, 0); 111 + skb->csum = ~csum_unfold(csum_ipv6_magic(&ip6h->saddr, 112 + &ip6h->daddr, 113 + skb->len - dataoff, 114 + protocol, 115 + csum_sub(0, hsum))); 116 + skb->ip_summed = CHECKSUM_NONE; 117 + return __skb_checksum_complete_head(skb, dataoff + len); 118 + } 119 + return csum; 120 + }; 6 121 7 122 __sum16 nf_checksum(struct sk_buff *skb, unsigned int hook, 8 - unsigned int dataoff, u_int8_t protocol, 123 + unsigned int dataoff, u8 protocol, 9 124 unsigned short family) 10 125 { 11 - const struct nf_ipv6_ops *v6ops; 12 126 __sum16 csum = 0; 13 127 14 128 switch (family) { ··· 130 16 csum = nf_ip_checksum(skb, hook, dataoff, protocol); 131 17 break; 132 18 case AF_INET6: 133 - v6ops = rcu_dereference(nf_ipv6_ops); 134 - if (v6ops) 135 - csum = v6ops->checksum(skb, hook, dataoff, protocol); 19 + csum = nf_ip6_checksum(skb, hook, dataoff, protocol); 136 20 break; 137 21 } 138 22 ··· 140 28 141 29 __sum16 nf_checksum_partial(struct sk_buff *skb, unsigned int hook, 142 30 unsigned int dataoff, unsigned int len, 143 - u_int8_t protocol, unsigned short family) 31 + u8 protocol, unsigned short family) 144 32 { 145 - const struct nf_ipv6_ops *v6ops; 146 33 __sum16 csum = 0; 147 34 148 35 switch (family) { ··· 150 39 protocol); 151 40 break; 152 41 case AF_INET6: 153 - v6ops = rcu_dereference(nf_ipv6_ops); 154 - if (v6ops) 155 - csum = v6ops->checksum_partial(skb, hook, dataoff, len, 156 - protocol); 42 + csum = nf_ip6_checksum_partial(skb, hook, dataoff, len, 43 + protocol); 157 44 break; 158 45 } 159 46
+1 -1
net/netfilter/xt_CT.c
··· 93 93 return -ENOENT; 94 94 } 95 95 96 - help = nf_ct_helper_ext_add(ct, helper, GFP_KERNEL); 96 + help = nf_ct_helper_ext_add(ct, GFP_KERNEL); 97 97 if (help == NULL) { 98 98 nf_conntrack_helper_put(helper); 99 99 return -ENOMEM;
+2 -2
net/netfilter/xt_TEE.c
··· 38 38 return XT_CONTINUE; 39 39 } 40 40 41 - #if IS_ENABLED(CONFIG_IPV6) 41 + #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) 42 42 static unsigned int 43 43 tee_tg6(struct sk_buff *skb, const struct xt_action_param *par) 44 44 { ··· 141 141 .destroy = tee_tg_destroy, 142 142 .me = THIS_MODULE, 143 143 }, 144 - #if IS_ENABLED(CONFIG_IPV6) 144 + #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) 145 145 { 146 146 .name = "TEE", 147 147 .revision = 1,
-9
net/netfilter/xt_TPROXY.c
··· 36 36 #include <net/netfilter/nf_tproxy.h> 37 37 #include <linux/netfilter/xt_TPROXY.h> 38 38 39 - /* assign a socket to the skb -- consumes sk */ 40 - static void 41 - nf_tproxy_assign_sock(struct sk_buff *skb, struct sock *sk) 42 - { 43 - skb_orphan(skb); 44 - skb->sk = sk; 45 - skb->destructor = sock_edemux; 46 - } 47 - 48 39 static unsigned int 49 40 tproxy_tg4(struct net *net, struct sk_buff *skb, __be32 laddr, __be16 lport, 50 41 u_int32_t mark_mask, u_int32_t mark_value)
+5 -15
net/openvswitch/conntrack.c
··· 26 26 #include <net/netfilter/nf_conntrack_seqadj.h> 27 27 #include <net/netfilter/nf_conntrack_zones.h> 28 28 #include <net/netfilter/ipv6/nf_defrag_ipv6.h> 29 + #include <net/ipv6_frag.h> 29 30 30 31 #ifdef CONFIG_NF_NAT_NEEDED 31 32 #include <linux/netfilter/nf_nat.h> ··· 608 607 ovs_ct_find_existing(struct net *net, const struct nf_conntrack_zone *zone, 609 608 u8 l3num, struct sk_buff *skb, bool natted) 610 609 { 611 - const struct nf_conntrack_l3proto *l3proto; 612 - const struct nf_conntrack_l4proto *l4proto; 613 610 struct nf_conntrack_tuple tuple; 614 611 struct nf_conntrack_tuple_hash *h; 615 612 struct nf_conn *ct; 616 - unsigned int dataoff; 617 - u8 protonum; 618 613 619 - l3proto = __nf_ct_l3proto_find(l3num); 620 - if (l3proto->get_l4proto(skb, skb_network_offset(skb), &dataoff, 621 - &protonum) <= 0) { 622 - pr_debug("ovs_ct_find_existing: Can't get protonum\n"); 623 - return NULL; 624 - } 625 - l4proto = __nf_ct_l4proto_find(l3num, protonum); 626 - if (!nf_ct_get_tuple(skb, skb_network_offset(skb), dataoff, l3num, 627 - protonum, net, &tuple, l3proto, l4proto)) { 614 + if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb), l3num, 615 + net, &tuple)) { 628 616 pr_debug("ovs_ct_find_existing: Can't get tuple\n"); 629 617 return NULL; 630 618 } ··· 622 632 if (natted) { 623 633 struct nf_conntrack_tuple inverse; 624 634 625 - if (!nf_ct_invert_tuple(&inverse, &tuple, l3proto, l4proto)) { 635 + if (!nf_ct_invert_tuplepr(&inverse, &tuple)) { 626 636 pr_debug("ovs_ct_find_existing: Inversion failed!\n"); 627 637 return NULL; 628 638 } ··· 1304 1314 return -EINVAL; 1305 1315 } 1306 1316 1307 - help = nf_ct_helper_ext_add(info->ct, helper, GFP_KERNEL); 1317 + help = nf_ct_helper_ext_add(info->ct, GFP_KERNEL); 1308 1318 if (!help) { 1309 1319 nf_conntrack_helper_put(helper); 1310 1320 return -ENOMEM;