Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next

+1

include/linux/netfilter/nfnetlink.h

··· 29 29 __u8 subsys_id; /* nfnetlink subsystem ID */ 30 30 __u8 cb_count; /* number of callbacks */ 31 31 const struct nfnl_callback *cb; /* callback for individual types */ 32 + struct module *owner; 32 33 int (*commit)(struct net *net, struct sk_buff *skb); 33 34 int (*abort)(struct net *net, struct sk_buff *skb); 34 35 void (*cleanup)(struct net *net);

-11

include/linux/netfilter_ipv4.h

··· 23 23 #ifdef CONFIG_INET 24 24 __sum16 nf_ip_checksum(struct sk_buff *skb, unsigned int hook, 25 25 unsigned int dataoff, u_int8_t protocol); 26 - __sum16 nf_ip_checksum_partial(struct sk_buff *skb, unsigned int hook, 27 - unsigned int dataoff, unsigned int len, 28 - u_int8_t protocol); 29 26 int nf_ip_route(struct net *net, struct dst_entry **dst, struct flowi *fl, 30 27 bool strict); 31 28 int nf_ip_reroute(struct sk_buff *skb, const struct nf_queue_entry *entry); 32 29 #else 33 30 static inline __sum16 nf_ip_checksum(struct sk_buff *skb, unsigned int hook, 34 31 unsigned int dataoff, u_int8_t protocol) 35 - { 36 - return 0; 37 - } 38 - static inline __sum16 nf_ip_checksum_partial(struct sk_buff *skb, 39 - unsigned int hook, 40 - unsigned int dataoff, 41 - unsigned int len, 42 - u_int8_t protocol) 43 32 { 44 33 return 0; 45 34 }

-5

include/linux/netfilter_ipv6.h

··· 30 30 void (*route_input)(struct sk_buff *skb); 31 31 int (*fragment)(struct net *net, struct sock *sk, struct sk_buff *skb, 32 32 int (*output)(struct net *, struct sock *, struct sk_buff *)); 33 - __sum16 (*checksum)(struct sk_buff *skb, unsigned int hook, 34 - unsigned int dataoff, u_int8_t protocol); 35 - __sum16 (*checksum_partial)(struct sk_buff *skb, unsigned int hook, 36 - unsigned int dataoff, unsigned int len, 37 - u_int8_t protocol); 38 33 int (*route)(struct net *net, struct dst_entry **dst, struct flowi *fl, 39 34 bool strict); 40 35 int (*reroute)(struct sk_buff *skb, const struct nf_queue_entry *entry);

+17 -1

include/net/ip_vs.h

··· 335 335 IP_VS_SCTP_S_LAST 336 336 }; 337 337 338 + /* Connection templates use bits from state */ 339 + #define IP_VS_CTPL_S_NONE 0x0000 340 + #define IP_VS_CTPL_S_ASSURED 0x0001 341 + #define IP_VS_CTPL_S_LAST 0x0002 342 + 338 343 /* Delta sequence info structure 339 344 * Each ip_vs_conn has 2 (output AND input seq. changes). 340 345 * Only used in the VS/NAT. ··· 1226 1221 struct ip_vs_dest *dest, __u32 fwmark); 1227 1222 void ip_vs_conn_expire_now(struct ip_vs_conn *cp); 1228 1223 1229 - const char *ip_vs_state_name(__u16 proto, int state); 1224 + const char *ip_vs_state_name(const struct ip_vs_conn *cp); 1230 1225 1231 1226 void ip_vs_tcp_conn_listen(struct ip_vs_conn *cp); 1232 1227 int ip_vs_check_template(struct ip_vs_conn *ct, struct ip_vs_dest *cdest); ··· 1292 1287 1293 1288 cp->control = ctl_cp; 1294 1289 atomic_inc(&ctl_cp->n_control); 1290 + } 1291 + 1292 + /* Mark our template as assured */ 1293 + static inline void 1294 + ip_vs_control_assure_ct(struct ip_vs_conn *cp) 1295 + { 1296 + struct ip_vs_conn *ct = cp->control; 1297 + 1298 + if (ct && !(ct->state & IP_VS_CTPL_S_ASSURED) && 1299 + (ct->flags & IP_VS_CONN_F_TEMPLATE)) 1300 + ct->state |= IP_VS_CTPL_S_ASSURED; 1295 1301 } 1296 1302 1297 1303 /* IPVS netns init & cleanup functions */

-28

include/net/ipv6.h

··· 574 574 } 575 575 #endif 576 576 577 - struct inet_frag_queue; 578 - 579 - enum ip6_defrag_users { 580 - IP6_DEFRAG_LOCAL_DELIVER, 581 - IP6_DEFRAG_CONNTRACK_IN, 582 - __IP6_DEFRAG_CONNTRACK_IN = IP6_DEFRAG_CONNTRACK_IN + USHRT_MAX, 583 - IP6_DEFRAG_CONNTRACK_OUT, 584 - __IP6_DEFRAG_CONNTRACK_OUT = IP6_DEFRAG_CONNTRACK_OUT + USHRT_MAX, 585 - IP6_DEFRAG_CONNTRACK_BRIDGE_IN, 586 - __IP6_DEFRAG_CONNTRACK_BRIDGE_IN = IP6_DEFRAG_CONNTRACK_BRIDGE_IN + USHRT_MAX, 587 - }; 588 - 589 - void ip6_frag_init(struct inet_frag_queue *q, const void *a); 590 - extern const struct rhashtable_params ip6_rhash_params; 591 - 592 - /* 593 - * Equivalent of ipv4 struct ip 594 - */ 595 - struct frag_queue { 596 - struct inet_frag_queue q; 597 - 598 - int iif; 599 - __u16 nhoffset; 600 - u8 ecn; 601 - }; 602 - 603 - void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq); 604 - 605 577 static inline bool ipv6_addr_any(const struct in6_addr *a) 606 578 { 607 579 #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64

+104

include/net/ipv6_frag.h

··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + #ifndef _IPV6_FRAG_H 3 + #define _IPV6_FRAG_H 4 + #include <linux/kernel.h> 5 + #include <net/addrconf.h> 6 + #include <net/ipv6.h> 7 + #include <net/inet_frag.h> 8 + 9 + enum ip6_defrag_users { 10 + IP6_DEFRAG_LOCAL_DELIVER, 11 + IP6_DEFRAG_CONNTRACK_IN, 12 + __IP6_DEFRAG_CONNTRACK_IN = IP6_DEFRAG_CONNTRACK_IN + USHRT_MAX, 13 + IP6_DEFRAG_CONNTRACK_OUT, 14 + __IP6_DEFRAG_CONNTRACK_OUT = IP6_DEFRAG_CONNTRACK_OUT + USHRT_MAX, 15 + IP6_DEFRAG_CONNTRACK_BRIDGE_IN, 16 + __IP6_DEFRAG_CONNTRACK_BRIDGE_IN = IP6_DEFRAG_CONNTRACK_BRIDGE_IN + USHRT_MAX, 17 + }; 18 + 19 + /* 20 + * Equivalent of ipv4 struct ip 21 + */ 22 + struct frag_queue { 23 + struct inet_frag_queue q; 24 + 25 + int iif; 26 + __u16 nhoffset; 27 + u8 ecn; 28 + }; 29 + 30 + #if IS_ENABLED(CONFIG_IPV6) 31 + static inline void ip6frag_init(struct inet_frag_queue *q, const void *a) 32 + { 33 + struct frag_queue *fq = container_of(q, struct frag_queue, q); 34 + const struct frag_v6_compare_key *key = a; 35 + 36 + q->key.v6 = *key; 37 + fq->ecn = 0; 38 + } 39 + 40 + static inline u32 ip6frag_key_hashfn(const void *data, u32 len, u32 seed) 41 + { 42 + return jhash2(data, 43 + sizeof(struct frag_v6_compare_key) / sizeof(u32), seed); 44 + } 45 + 46 + static inline u32 ip6frag_obj_hashfn(const void *data, u32 len, u32 seed) 47 + { 48 + const struct inet_frag_queue *fq = data; 49 + 50 + return jhash2((const u32 *)&fq->key.v6, 51 + sizeof(struct frag_v6_compare_key) / sizeof(u32), seed); 52 + } 53 + 54 + static inline int 55 + ip6frag_obj_cmpfn(struct rhashtable_compare_arg *arg, const void *ptr) 56 + { 57 + const struct frag_v6_compare_key *key = arg->key; 58 + const struct inet_frag_queue *fq = ptr; 59 + 60 + return !!memcmp(&fq->key, key, sizeof(*key)); 61 + } 62 + 63 + static inline void 64 + ip6frag_expire_frag_queue(struct net *net, struct frag_queue *fq) 65 + { 66 + struct net_device *dev = NULL; 67 + struct sk_buff *head; 68 + 69 + rcu_read_lock(); 70 + spin_lock(&fq->q.lock); 71 + 72 + if (fq->q.flags & INET_FRAG_COMPLETE) 73 + goto out; 74 + 75 + inet_frag_kill(&fq->q); 76 + 77 + dev = dev_get_by_index_rcu(net, fq->iif); 78 + if (!dev) 79 + goto out; 80 + 81 + __IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS); 82 + __IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMTIMEOUT); 83 + 84 + /* Don't send error if the first segment did not arrive. */ 85 + head = fq->q.fragments; 86 + if (!(fq->q.flags & INET_FRAG_FIRST_IN) || !head) 87 + goto out; 88 + 89 + head->dev = dev; 90 + skb_get(head); 91 + spin_unlock(&fq->q.lock); 92 + 93 + icmpv6_send(head, ICMPV6_TIME_EXCEED, ICMPV6_EXC_FRAGTIME, 0); 94 + kfree_skb(head); 95 + goto out_rcu_unlock; 96 + 97 + out: 98 + spin_unlock(&fq->q.lock); 99 + out_rcu_unlock: 100 + rcu_read_unlock(); 101 + inet_frag_put(&fq->q); 102 + } 103 + #endif 104 + #endif

-3

include/net/netfilter/ipv4/nf_conntrack_ipv4.h

··· 10 10 #ifndef _NF_CONNTRACK_IPV4_H 11 11 #define _NF_CONNTRACK_IPV4_H 12 12 13 - 14 - const extern struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4; 15 - 16 13 extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4; 17 14 extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4; 18 15 extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp;

+5

include/net/netfilter/nf_conntrack.h

··· 41 41 /* insert expect proto private data here */ 42 42 }; 43 43 44 + struct nf_conntrack_net { 45 + unsigned int users4; 46 + unsigned int users6; 47 + }; 48 + 44 49 #include <linux/types.h> 45 50 #include <linux/skbuff.h> 46 51

+2 -13

include/net/netfilter/nf_conntrack_core.h

··· 14 14 #define _NF_CONNTRACK_CORE_H 15 15 16 16 #include <linux/netfilter.h> 17 - #include <net/netfilter/nf_conntrack_l3proto.h> 18 17 #include <net/netfilter/nf_conntrack_l4proto.h> 19 18 #include <net/netfilter/nf_conntrack_ecache.h> 20 19 ··· 39 40 void nf_conntrack_init_end(void); 40 41 void nf_conntrack_cleanup_end(void); 41 42 42 - bool nf_ct_get_tuple(const struct sk_buff *skb, unsigned int nhoff, 43 - unsigned int dataoff, u_int16_t l3num, u_int8_t protonum, 44 - struct net *net, 45 - struct nf_conntrack_tuple *tuple, 46 - const struct nf_conntrack_l3proto *l3proto, 47 - const struct nf_conntrack_l4proto *l4proto); 48 - 49 43 bool nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse, 50 44 const struct nf_conntrack_tuple *orig, 51 - const struct nf_conntrack_l3proto *l3proto, 52 45 const struct nf_conntrack_l4proto *l4proto); 53 46 54 47 /* Find a connection corresponding to a tuple. */ ··· 66 75 return ret; 67 76 } 68 77 69 - void 70 - print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple, 71 - const struct nf_conntrack_l3proto *l3proto, 72 - const struct nf_conntrack_l4proto *proto); 78 + void print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple, 79 + const struct nf_conntrack_l4proto *proto); 73 80 74 81 #define CONNTRACK_LOCKS 1024 75 82

+29 -8

include/net/netfilter/nf_conntrack_count.h

··· 1 1 #ifndef _NF_CONNTRACK_COUNT_H 2 2 #define _NF_CONNTRACK_COUNT_H 3 3 4 + #include <linux/list.h> 5 + 4 6 struct nf_conncount_data; 7 + 8 + enum nf_conncount_list_add { 9 + NF_CONNCOUNT_ADDED, /* list add was ok */ 10 + NF_CONNCOUNT_ERR, /* -ENOMEM, must drop skb */ 11 + NF_CONNCOUNT_SKIP, /* list is already reclaimed by gc */ 12 + }; 13 + 14 + struct nf_conncount_list { 15 + spinlock_t list_lock; 16 + struct list_head head; /* connections with the same filtering key */ 17 + unsigned int count; /* length of list */ 18 + bool dead; 19 + }; 5 20 6 21 struct nf_conncount_data *nf_conncount_init(struct net *net, unsigned int family, 7 22 unsigned int keylen); ··· 29 14 const struct nf_conntrack_tuple *tuple, 30 15 const struct nf_conntrack_zone *zone); 31 16 32 - unsigned int nf_conncount_lookup(struct net *net, struct hlist_head *head, 33 - const struct nf_conntrack_tuple *tuple, 34 - const struct nf_conntrack_zone *zone, 35 - bool *addit); 17 + void nf_conncount_lookup(struct net *net, struct nf_conncount_list *list, 18 + const struct nf_conntrack_tuple *tuple, 19 + const struct nf_conntrack_zone *zone, 20 + bool *addit); 36 21 37 - bool nf_conncount_add(struct hlist_head *head, 38 - const struct nf_conntrack_tuple *tuple, 39 - const struct nf_conntrack_zone *zone); 22 + void nf_conncount_list_init(struct nf_conncount_list *list); 40 23 41 - void nf_conncount_cache_free(struct hlist_head *hhead); 24 + enum nf_conncount_list_add 25 + nf_conncount_add(struct nf_conncount_list *list, 26 + const struct nf_conntrack_tuple *tuple, 27 + const struct nf_conntrack_zone *zone); 28 + 29 + bool nf_conncount_gc_list(struct net *net, 30 + struct nf_conncount_list *list); 31 + 32 + void nf_conncount_cache_free(struct nf_conncount_list *list); 42 33 43 34 #endif

+1 -3

include/net/netfilter/nf_conntrack_helper.h

··· 103 103 void nf_conntrack_helpers_unregister(struct nf_conntrack_helper *, 104 104 unsigned int); 105 105 106 - struct nf_conn_help *nf_ct_helper_ext_add(struct nf_conn *ct, 107 - struct nf_conntrack_helper *helper, 108 - gfp_t gfp); 106 + struct nf_conn_help *nf_ct_helper_ext_add(struct nf_conn *ct, gfp_t gfp); 109 107 110 108 int __nf_ct_try_assign_helper(struct nf_conn *ct, struct nf_conn *tmpl, 111 109 gfp_t flags);

-84

include/net/netfilter/nf_conntrack_l3proto.h

··· 1 - /* SPDX-License-Identifier: GPL-2.0 */ 2 - /* 3 - * Copyright (C)2003,2004 USAGI/WIDE Project 4 - * 5 - * Header for use in defining a given L3 protocol for connection tracking. 6 - * 7 - * Author: 8 - * Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp> 9 - * 10 - * Derived from include/netfilter_ipv4/ip_conntrack_protocol.h 11 - */ 12 - 13 - #ifndef _NF_CONNTRACK_L3PROTO_H 14 - #define _NF_CONNTRACK_L3PROTO_H 15 - #include <linux/netlink.h> 16 - #include <net/netlink.h> 17 - #include <linux/seq_file.h> 18 - #include <net/netfilter/nf_conntrack.h> 19 - 20 - struct nf_conntrack_l3proto { 21 - /* L3 Protocol Family number. ex) PF_INET */ 22 - u_int16_t l3proto; 23 - 24 - /* size of tuple nlattr, fills a hole */ 25 - u16 nla_size; 26 - 27 - /* 28 - * Try to fill in the third arg: nhoff is offset of l3 proto 29 - * hdr. Return true if possible. 30 - */ 31 - bool (*pkt_to_tuple)(const struct sk_buff *skb, unsigned int nhoff, 32 - struct nf_conntrack_tuple *tuple); 33 - 34 - /* 35 - * Invert the per-proto part of the tuple: ie. turn xmit into reply. 36 - * Some packets can't be inverted: return 0 in that case. 37 - */ 38 - bool (*invert_tuple)(struct nf_conntrack_tuple *inverse, 39 - const struct nf_conntrack_tuple *orig); 40 - 41 - /* 42 - * Called before tracking. 43 - * *dataoff: offset of protocol header (TCP, UDP,...) in skb 44 - * *protonum: protocol number 45 - */ 46 - int (*get_l4proto)(const struct sk_buff *skb, unsigned int nhoff, 47 - unsigned int *dataoff, u_int8_t *protonum); 48 - 49 - #if IS_ENABLED(CONFIG_NF_CT_NETLINK) 50 - int (*tuple_to_nlattr)(struct sk_buff *skb, 51 - const struct nf_conntrack_tuple *t); 52 - int (*nlattr_to_tuple)(struct nlattr *tb[], 53 - struct nf_conntrack_tuple *t); 54 - const struct nla_policy *nla_policy; 55 - #endif 56 - 57 - /* Called when netns wants to use connection tracking */ 58 - int (*net_ns_get)(struct net *); 59 - void (*net_ns_put)(struct net *); 60 - 61 - /* Module (if any) which this is connected to. */ 62 - struct module *me; 63 - }; 64 - 65 - extern struct nf_conntrack_l3proto __rcu *nf_ct_l3protos[NFPROTO_NUMPROTO]; 66 - 67 - /* Protocol global registration. */ 68 - int nf_ct_l3proto_register(const struct nf_conntrack_l3proto *proto); 69 - void nf_ct_l3proto_unregister(const struct nf_conntrack_l3proto *proto); 70 - 71 - const struct nf_conntrack_l3proto *nf_ct_l3proto_find_get(u_int16_t l3proto); 72 - 73 - /* Existing built-in protocols */ 74 - extern struct nf_conntrack_l3proto nf_conntrack_l3proto_generic; 75 - 76 - static inline struct nf_conntrack_l3proto * 77 - __nf_ct_l3proto_find(u_int16_t l3proto) 78 - { 79 - if (unlikely(l3proto >= NFPROTO_NUMPROTO)) 80 - return &nf_conntrack_l3proto_generic; 81 - return rcu_dereference(nf_ct_l3protos[l3proto]); 82 - } 83 - 84 - #endif /*_NF_CONNTRACK_L3PROTO_H*/

+3 -11

include/net/netfilter/nf_conntrack_l4proto.h

··· 36 36 struct net *net, struct nf_conntrack_tuple *tuple); 37 37 38 38 /* Invert the per-proto part of the tuple: ie. turn xmit into reply. 39 - * Some packets can't be inverted: return 0 in that case. 39 + * Only used by icmp, most protocols use a generic version. 40 40 */ 41 41 bool (*invert_tuple)(struct nf_conntrack_tuple *inverse, 42 42 const struct nf_conntrack_tuple *orig); ··· 45 45 int (*packet)(struct nf_conn *ct, 46 46 const struct sk_buff *skb, 47 47 unsigned int dataoff, 48 - enum ip_conntrack_info ctinfo, 49 - unsigned int *timeouts); 48 + enum ip_conntrack_info ctinfo); 50 49 51 50 /* Called when a new connection for this protocol found; 52 51 * returns TRUE if it's OK. If so, packet() called next. */ 53 52 bool (*new)(struct nf_conn *ct, const struct sk_buff *skb, 54 - unsigned int dataoff, unsigned int *timeouts); 53 + unsigned int dataoff); 55 54 56 55 /* Called when a conntrack entry is destroyed */ 57 56 void (*destroy)(struct nf_conn *ct); ··· 61 62 62 63 /* called by gc worker if table is full */ 63 64 bool (*can_early_drop)(const struct nf_conn *ct); 64 - 65 - /* Return the array of timeouts for this protocol. */ 66 - unsigned int *(*get_timeouts)(struct net *net); 67 65 68 66 /* convert protoinfo to nfnetink attributes */ 69 67 int (*to_nlattr)(struct sk_buff *skb, struct nlattr *nla, ··· 130 134 /* Protocol global registration. */ 131 135 int nf_ct_l4proto_register_one(const struct nf_conntrack_l4proto *proto); 132 136 void nf_ct_l4proto_unregister_one(const struct nf_conntrack_l4proto *proto); 133 - int nf_ct_l4proto_register(const struct nf_conntrack_l4proto * const proto[], 134 - unsigned int num_proto); 135 - void nf_ct_l4proto_unregister(const struct nf_conntrack_l4proto * const proto[], 136 - unsigned int num_proto); 137 137 138 138 /* Generic netlink helpers */ 139 139 int nf_ct_port_tuple_to_nlattr(struct sk_buff *skb,

+4 -14

include/net/netfilter/nf_conntrack_timeout.h

··· 67 67 #endif 68 68 }; 69 69 70 - static inline unsigned int * 71 - nf_ct_timeout_lookup(struct net *net, struct nf_conn *ct, 72 - const struct nf_conntrack_l4proto *l4proto) 70 + static inline unsigned int *nf_ct_timeout_lookup(const struct nf_conn *ct) 73 71 { 72 + unsigned int *timeouts = NULL; 74 73 #ifdef CONFIG_NF_CONNTRACK_TIMEOUT 75 74 struct nf_conn_timeout *timeout_ext; 76 - unsigned int *timeouts; 77 75 78 76 timeout_ext = nf_ct_timeout_find(ct); 79 - if (timeout_ext) { 77 + if (timeout_ext) 80 78 timeouts = nf_ct_timeout_data(timeout_ext); 81 - if (unlikely(!timeouts)) 82 - timeouts = l4proto->get_timeouts(net); 83 - } else { 84 - timeouts = l4proto->get_timeouts(net); 85 - } 86 - 87 - return timeouts; 88 - #else 89 - return l4proto->get_timeouts(net); 90 79 #endif 80 + return timeouts; 91 81 } 92 82 93 83 #ifdef CONFIG_NF_CONNTRACK_TIMEOUT

+8

include/net/netfilter/nf_tproxy.h

··· 17 17 return false; 18 18 } 19 19 20 + /* assign a socket to the skb -- consumes sk */ 21 + static inline void nf_tproxy_assign_sock(struct sk_buff *skb, struct sock *sk) 22 + { 23 + skb_orphan(skb); 24 + skb->sk = sk; 25 + skb->destructor = sock_edemux; 26 + } 27 + 20 28 __be32 nf_tproxy_laddr4(struct sk_buff *skb, __be32 user_laddr, __be32 daddr); 21 29 22 30 /**

+1

include/net/netns/nftables.h

··· 7 7 struct netns_nftables { 8 8 struct list_head tables; 9 9 struct list_head commit_list; 10 + struct mutex commit_mutex; 10 11 unsigned int base_seq; 11 12 u8 gencursor; 12 13 u8 validate_state;

+11

include/uapi/linux/netfilter/nf_osf.h

··· 16 16 17 17 #define NF_OSF_TTL_TRUE 0 /* True ip and fingerprint TTL comparison */ 18 18 19 + /* Check if ip TTL is less than fingerprint one */ 20 + #define NF_OSF_TTL_LESS 1 21 + 19 22 /* Do not compare ip and fingerprint TTL at all */ 20 23 #define NF_OSF_TTL_NOCHECK 2 21 24 25 + #define NF_OSF_FLAGMASK (NF_OSF_GENRE | NF_OSF_TTL | \ 26 + NF_OSF_LOG | NF_OSF_INVERT) 22 27 /* Wildcard MSS (kind of). 23 28 * It is used to implement a state machine for the different wildcard values 24 29 * of the MSS and window sizes. ··· 86 81 87 82 /* Others are not used in the current OSF */ 88 83 OSFOPT_EMPTY = 255, 84 + }; 85 + 86 + enum nf_osf_attr_type { 87 + OSF_ATTR_UNSPEC, 88 + OSF_ATTR_FINGER, 89 + OSF_ATTR_MAX, 89 90 }; 90 91 91 92 #endif /* _NF_OSF_H */

+3 -1

include/uapi/linux/netfilter/nf_tables.h

··· 921 921 /* 922 922 * enum nft_socket_keys - nf_tables socket expression keys 923 923 * 924 - * @NFT_SOCKET_TRANSPARENT: Value of the IP(V6)_TRANSPARENT socket option_ 924 + * @NFT_SOCKET_TRANSPARENT: Value of the IP(V6)_TRANSPARENT socket option 925 + * @NFT_SOCKET_MARK: Value of the socket mark 925 926 */ 926 927 enum nft_socket_keys { 927 928 NFT_SOCKET_TRANSPARENT, 929 + NFT_SOCKET_MARK, 928 930 __NFT_SOCKET_MAX 929 931 }; 930 932 #define NFT_SOCKET_MAX (__NFT_SOCKET_MAX - 1)

+2 -8

include/uapi/linux/netfilter/xt_osf.h

··· 37 37 38 38 #define XT_OSF_TTL_TRUE NF_OSF_TTL_TRUE 39 39 #define XT_OSF_TTL_NOCHECK NF_OSF_TTL_NOCHECK 40 - 41 - #define XT_OSF_TTL_LESS 1 /* Check if ip TTL is less than fingerprint one */ 40 + #define XT_OSF_TTL_LESS NF_OSF_TTL_LESS 42 41 43 42 #define xt_osf_wc nf_osf_wc 44 43 #define xt_osf_opt nf_osf_opt ··· 46 47 #define xt_osf_finger nf_osf_finger 47 48 #define xt_osf_nlmsg nf_osf_nlmsg 48 49 50 + #define xt_osf_attr_type nf_osf_attr_type 49 51 /* 50 52 * Add/remove fingerprint from the kernel. 51 53 */ ··· 54 54 OSF_MSG_ADD, 55 55 OSF_MSG_REMOVE, 56 56 OSF_MSG_MAX, 57 - }; 58 - 59 - enum xt_osf_attr_type { 60 - OSF_ATTR_UNSPEC, 61 - OSF_ATTR_FINGER, 62 - OSF_ATTR_MAX, 63 57 }; 64 58 65 59 #endif /* _XT_OSF_H */

+1 -2

net/bridge/netfilter/nft_reject_bridge.c

··· 89 89 niph = nf_reject_iphdr_put(nskb, oldskb, IPPROTO_TCP, 90 90 net->ipv4.sysctl_ip_default_ttl); 91 91 nf_reject_ip_tcphdr_put(nskb, oldskb, oth); 92 - niph->ttl = net->ipv4.sysctl_ip_default_ttl; 93 - niph->tot_len = htons(nskb->len); 92 + niph->tot_len = htons(nskb->len); 94 93 ip_send_check(niph); 95 94 96 95 nft_reject_br_push_etherhdr(oldskb, nskb);

+1 -1

net/ieee802154/6lowpan/reassembly.c

··· 25 25 26 26 #include <net/ieee802154_netdev.h> 27 27 #include <net/6lowpan.h> 28 - #include <net/ipv6.h> 28 + #include <net/ipv6_frag.h> 29 29 #include <net/inet_frag.h> 30 30 31 31 #include "6lowpan_i.h"

-53

net/ipv4/netfilter.c

··· 98 98 } 99 99 EXPORT_SYMBOL_GPL(nf_ip_reroute); 100 100 101 - __sum16 nf_ip_checksum(struct sk_buff *skb, unsigned int hook, 102 - unsigned int dataoff, u_int8_t protocol) 103 - { 104 - const struct iphdr *iph = ip_hdr(skb); 105 - __sum16 csum = 0; 106 - 107 - switch (skb->ip_summed) { 108 - case CHECKSUM_COMPLETE: 109 - if (hook != NF_INET_PRE_ROUTING && hook != NF_INET_LOCAL_IN) 110 - break; 111 - if ((protocol == 0 && !csum_fold(skb->csum)) || 112 - !csum_tcpudp_magic(iph->saddr, iph->daddr, 113 - skb->len - dataoff, protocol, 114 - skb->csum)) { 115 - skb->ip_summed = CHECKSUM_UNNECESSARY; 116 - break; 117 - } 118 - /* fall through */ 119 - case CHECKSUM_NONE: 120 - if (protocol == 0) 121 - skb->csum = 0; 122 - else 123 - skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr, 124 - skb->len - dataoff, 125 - protocol, 0); 126 - csum = __skb_checksum_complete(skb); 127 - } 128 - return csum; 129 - } 130 - EXPORT_SYMBOL(nf_ip_checksum); 131 - 132 - __sum16 nf_ip_checksum_partial(struct sk_buff *skb, unsigned int hook, 133 - unsigned int dataoff, unsigned int len, 134 - u_int8_t protocol) 135 - { 136 - const struct iphdr *iph = ip_hdr(skb); 137 - __sum16 csum = 0; 138 - 139 - switch (skb->ip_summed) { 140 - case CHECKSUM_COMPLETE: 141 - if (len == skb->len - dataoff) 142 - return nf_ip_checksum(skb, hook, dataoff, protocol); 143 - /* fall through */ 144 - case CHECKSUM_NONE: 145 - skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr, protocol, 146 - skb->len - dataoff, 0); 147 - skb->ip_summed = CHECKSUM_NONE; 148 - return __skb_checksum_complete_head(skb, dataoff + len); 149 - } 150 - return csum; 151 - } 152 - EXPORT_SYMBOL_GPL(nf_ip_checksum_partial); 153 - 154 101 int nf_ip_route(struct net *net, struct dst_entry **dst, struct flowi *fl, 155 102 bool strict __always_unused) 156 103 {

+3 -19

net/ipv4/netfilter/Kconfig

··· 9 9 tristate 10 10 default n 11 11 12 - config NF_CONNTRACK_IPV4 13 - tristate "IPv4 connection tracking support (required for NAT)" 14 - depends on NF_CONNTRACK 15 - default m if NETFILTER_ADVANCED=n 16 - select NF_DEFRAG_IPV4 17 - ---help--- 18 - Connection tracking keeps a record of what packets have passed 19 - through your machine, in order to figure out how they are related 20 - into connections. 21 - 22 - This is IPv4 support on Layer 3 independent connection tracking. 23 - Layer 3 independent connection tracking is experimental scheme 24 - which generalize ip_conntrack to support other layer 3 protocols. 25 - 26 - To compile it as a module, choose M here. If unsure, say N. 27 - 28 12 config NF_SOCKET_IPV4 29 13 tristate "IPv4 socket lookup support" 30 14 help ··· 96 112 97 113 config NF_NAT_IPV4 98 114 tristate "IPv4 NAT" 99 - depends on NF_CONNTRACK_IPV4 115 + depends on NF_CONNTRACK 100 116 default m if NETFILTER_ADVANCED=n 101 117 select NF_NAT 102 118 help ··· 263 279 # NAT + specific targets: nf_conntrack 264 280 config IP_NF_NAT 265 281 tristate "iptables NAT support" 266 - depends on NF_CONNTRACK_IPV4 282 + depends on NF_CONNTRACK 267 283 default m if NETFILTER_ADVANCED=n 268 284 select NF_NAT 269 285 select NF_NAT_IPV4 ··· 324 340 config IP_NF_TARGET_CLUSTERIP 325 341 tristate "CLUSTERIP target support" 326 342 depends on IP_NF_MANGLE 327 - depends on NF_CONNTRACK_IPV4 343 + depends on NF_CONNTRACK 328 344 depends on NETFILTER_ADVANCED 329 345 select NF_CONNTRACK_MARK 330 346 select NETFILTER_FAMILY_ARP

-6

net/ipv4/netfilter/Makefile

··· 3 3 # Makefile for the netfilter modules on top of IPv4. 4 4 # 5 5 6 - # objects for l3 independent conntrack 7 - nf_conntrack_ipv4-y := nf_conntrack_l3proto_ipv4.o nf_conntrack_proto_icmp.o 8 - 9 - # connection tracking 10 - obj-$(CONFIG_NF_CONNTRACK_IPV4) += nf_conntrack_ipv4.o 11 - 12 6 nf_nat_ipv4-y := nf_nat_l3proto_ipv4.o nf_nat_proto_icmp.o 13 7 nf_nat_ipv4-$(CONFIG_NF_NAT_MASQUERADE_IPV4) += nf_nat_masquerade_ipv4.o 14 8 obj-$(CONFIG_NF_NAT_IPV4) += nf_nat_ipv4.o

-472

net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c

··· 1 - 2 - /* (C) 1999-2001 Paul `Rusty' Russell 3 - * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> 4 - * (C) 2006-2012 Patrick McHardy <kaber@trash.net> 5 - * 6 - * This program is free software; you can redistribute it and/or modify 7 - * it under the terms of the GNU General Public License version 2 as 8 - * published by the Free Software Foundation. 9 - */ 10 - 11 - #include <linux/types.h> 12 - #include <linux/ip.h> 13 - #include <linux/netfilter.h> 14 - #include <linux/module.h> 15 - #include <linux/skbuff.h> 16 - #include <linux/icmp.h> 17 - #include <linux/sysctl.h> 18 - #include <net/route.h> 19 - #include <net/ip.h> 20 - 21 - #include <linux/netfilter_ipv4.h> 22 - #include <net/netfilter/nf_conntrack.h> 23 - #include <net/netfilter/nf_conntrack_helper.h> 24 - #include <net/netfilter/nf_conntrack_l4proto.h> 25 - #include <net/netfilter/nf_conntrack_l3proto.h> 26 - #include <net/netfilter/nf_conntrack_zones.h> 27 - #include <net/netfilter/nf_conntrack_core.h> 28 - #include <net/netfilter/nf_conntrack_seqadj.h> 29 - #include <net/netfilter/ipv4/nf_conntrack_ipv4.h> 30 - #include <net/netfilter/nf_nat_helper.h> 31 - #include <net/netfilter/ipv4/nf_defrag_ipv4.h> 32 - #include <net/netfilter/nf_log.h> 33 - 34 - static int conntrack4_net_id __read_mostly; 35 - static DEFINE_MUTEX(register_ipv4_hooks); 36 - 37 - struct conntrack4_net { 38 - unsigned int users; 39 - }; 40 - 41 - static bool ipv4_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff, 42 - struct nf_conntrack_tuple *tuple) 43 - { 44 - const __be32 *ap; 45 - __be32 _addrs[2]; 46 - ap = skb_header_pointer(skb, nhoff + offsetof(struct iphdr, saddr), 47 - sizeof(u_int32_t) * 2, _addrs); 48 - if (ap == NULL) 49 - return false; 50 - 51 - tuple->src.u3.ip = ap[0]; 52 - tuple->dst.u3.ip = ap[1]; 53 - 54 - return true; 55 - } 56 - 57 - static bool ipv4_invert_tuple(struct nf_conntrack_tuple *tuple, 58 - const struct nf_conntrack_tuple *orig) 59 - { 60 - tuple->src.u3.ip = orig->dst.u3.ip; 61 - tuple->dst.u3.ip = orig->src.u3.ip; 62 - 63 - return true; 64 - } 65 - 66 - static int ipv4_get_l4proto(const struct sk_buff *skb, unsigned int nhoff, 67 - unsigned int *dataoff, u_int8_t *protonum) 68 - { 69 - const struct iphdr *iph; 70 - struct iphdr _iph; 71 - 72 - iph = skb_header_pointer(skb, nhoff, sizeof(_iph), &_iph); 73 - if (iph == NULL) 74 - return -NF_ACCEPT; 75 - 76 - /* Conntrack defragments packets, we might still see fragments 77 - * inside ICMP packets though. */ 78 - if (iph->frag_off & htons(IP_OFFSET)) 79 - return -NF_ACCEPT; 80 - 81 - *dataoff = nhoff + (iph->ihl << 2); 82 - *protonum = iph->protocol; 83 - 84 - /* Check bogus IP headers */ 85 - if (*dataoff > skb->len) { 86 - pr_debug("nf_conntrack_ipv4: bogus IPv4 packet: " 87 - "nhoff %u, ihl %u, skblen %u\n", 88 - nhoff, iph->ihl << 2, skb->len); 89 - return -NF_ACCEPT; 90 - } 91 - 92 - return NF_ACCEPT; 93 - } 94 - 95 - static unsigned int ipv4_helper(void *priv, 96 - struct sk_buff *skb, 97 - const struct nf_hook_state *state) 98 - { 99 - struct nf_conn *ct; 100 - enum ip_conntrack_info ctinfo; 101 - const struct nf_conn_help *help; 102 - const struct nf_conntrack_helper *helper; 103 - 104 - /* This is where we call the helper: as the packet goes out. */ 105 - ct = nf_ct_get(skb, &ctinfo); 106 - if (!ct || ctinfo == IP_CT_RELATED_REPLY) 107 - return NF_ACCEPT; 108 - 109 - help = nfct_help(ct); 110 - if (!help) 111 - return NF_ACCEPT; 112 - 113 - /* rcu_read_lock()ed by nf_hook_thresh */ 114 - helper = rcu_dereference(help->helper); 115 - if (!helper) 116 - return NF_ACCEPT; 117 - 118 - return helper->help(skb, skb_network_offset(skb) + ip_hdrlen(skb), 119 - ct, ctinfo); 120 - } 121 - 122 - static unsigned int ipv4_confirm(void *priv, 123 - struct sk_buff *skb, 124 - const struct nf_hook_state *state) 125 - { 126 - struct nf_conn *ct; 127 - enum ip_conntrack_info ctinfo; 128 - 129 - ct = nf_ct_get(skb, &ctinfo); 130 - if (!ct || ctinfo == IP_CT_RELATED_REPLY) 131 - goto out; 132 - 133 - /* adjust seqs for loopback traffic only in outgoing direction */ 134 - if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) && 135 - !nf_is_loopback_packet(skb)) { 136 - if (!nf_ct_seq_adjust(skb, ct, ctinfo, ip_hdrlen(skb))) { 137 - NF_CT_STAT_INC_ATOMIC(nf_ct_net(ct), drop); 138 - return NF_DROP; 139 - } 140 - } 141 - out: 142 - /* We've seen it coming out the other side: confirm it */ 143 - return nf_conntrack_confirm(skb); 144 - } 145 - 146 - static unsigned int ipv4_conntrack_in(void *priv, 147 - struct sk_buff *skb, 148 - const struct nf_hook_state *state) 149 - { 150 - return nf_conntrack_in(state->net, PF_INET, state->hook, skb); 151 - } 152 - 153 - static unsigned int ipv4_conntrack_local(void *priv, 154 - struct sk_buff *skb, 155 - const struct nf_hook_state *state) 156 - { 157 - if (ip_is_fragment(ip_hdr(skb))) { /* IP_NODEFRAG setsockopt set */ 158 - enum ip_conntrack_info ctinfo; 159 - struct nf_conn *tmpl; 160 - 161 - tmpl = nf_ct_get(skb, &ctinfo); 162 - if (tmpl && nf_ct_is_template(tmpl)) { 163 - /* when skipping ct, clear templates to avoid fooling 164 - * later targets/matches 165 - */ 166 - skb->_nfct = 0; 167 - nf_ct_put(tmpl); 168 - } 169 - return NF_ACCEPT; 170 - } 171 - 172 - return nf_conntrack_in(state->net, PF_INET, state->hook, skb); 173 - } 174 - 175 - /* Connection tracking may drop packets, but never alters them, so 176 - make it the first hook. */ 177 - static const struct nf_hook_ops ipv4_conntrack_ops[] = { 178 - { 179 - .hook = ipv4_conntrack_in, 180 - .pf = NFPROTO_IPV4, 181 - .hooknum = NF_INET_PRE_ROUTING, 182 - .priority = NF_IP_PRI_CONNTRACK, 183 - }, 184 - { 185 - .hook = ipv4_conntrack_local, 186 - .pf = NFPROTO_IPV4, 187 - .hooknum = NF_INET_LOCAL_OUT, 188 - .priority = NF_IP_PRI_CONNTRACK, 189 - }, 190 - { 191 - .hook = ipv4_helper, 192 - .pf = NFPROTO_IPV4, 193 - .hooknum = NF_INET_POST_ROUTING, 194 - .priority = NF_IP_PRI_CONNTRACK_HELPER, 195 - }, 196 - { 197 - .hook = ipv4_confirm, 198 - .pf = NFPROTO_IPV4, 199 - .hooknum = NF_INET_POST_ROUTING, 200 - .priority = NF_IP_PRI_CONNTRACK_CONFIRM, 201 - }, 202 - { 203 - .hook = ipv4_helper, 204 - .pf = NFPROTO_IPV4, 205 - .hooknum = NF_INET_LOCAL_IN, 206 - .priority = NF_IP_PRI_CONNTRACK_HELPER, 207 - }, 208 - { 209 - .hook = ipv4_confirm, 210 - .pf = NFPROTO_IPV4, 211 - .hooknum = NF_INET_LOCAL_IN, 212 - .priority = NF_IP_PRI_CONNTRACK_CONFIRM, 213 - }, 214 - }; 215 - 216 - /* Fast function for those who don't want to parse /proc (and I don't 217 - blame them). */ 218 - /* Reversing the socket's dst/src point of view gives us the reply 219 - mapping. */ 220 - static int 221 - getorigdst(struct sock *sk, int optval, void __user *user, int *len) 222 - { 223 - const struct inet_sock *inet = inet_sk(sk); 224 - const struct nf_conntrack_tuple_hash *h; 225 - struct nf_conntrack_tuple tuple; 226 - 227 - memset(&tuple, 0, sizeof(tuple)); 228 - 229 - lock_sock(sk); 230 - tuple.src.u3.ip = inet->inet_rcv_saddr; 231 - tuple.src.u.tcp.port = inet->inet_sport; 232 - tuple.dst.u3.ip = inet->inet_daddr; 233 - tuple.dst.u.tcp.port = inet->inet_dport; 234 - tuple.src.l3num = PF_INET; 235 - tuple.dst.protonum = sk->sk_protocol; 236 - release_sock(sk); 237 - 238 - /* We only do TCP and SCTP at the moment: is there a better way? */ 239 - if (tuple.dst.protonum != IPPROTO_TCP && 240 - tuple.dst.protonum != IPPROTO_SCTP) { 241 - pr_debug("SO_ORIGINAL_DST: Not a TCP/SCTP socket\n"); 242 - return -ENOPROTOOPT; 243 - } 244 - 245 - if ((unsigned int) *len < sizeof(struct sockaddr_in)) { 246 - pr_debug("SO_ORIGINAL_DST: len %d not %zu\n", 247 - *len, sizeof(struct sockaddr_in)); 248 - return -EINVAL; 249 - } 250 - 251 - h = nf_conntrack_find_get(sock_net(sk), &nf_ct_zone_dflt, &tuple); 252 - if (h) { 253 - struct sockaddr_in sin; 254 - struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); 255 - 256 - sin.sin_family = AF_INET; 257 - sin.sin_port = ct->tuplehash[IP_CT_DIR_ORIGINAL] 258 - .tuple.dst.u.tcp.port; 259 - sin.sin_addr.s_addr = ct->tuplehash[IP_CT_DIR_ORIGINAL] 260 - .tuple.dst.u3.ip; 261 - memset(sin.sin_zero, 0, sizeof(sin.sin_zero)); 262 - 263 - pr_debug("SO_ORIGINAL_DST: %pI4 %u\n", 264 - &sin.sin_addr.s_addr, ntohs(sin.sin_port)); 265 - nf_ct_put(ct); 266 - if (copy_to_user(user, &sin, sizeof(sin)) != 0) 267 - return -EFAULT; 268 - else 269 - return 0; 270 - } 271 - pr_debug("SO_ORIGINAL_DST: Can't find %pI4/%u-%pI4/%u.\n", 272 - &tuple.src.u3.ip, ntohs(tuple.src.u.tcp.port), 273 - &tuple.dst.u3.ip, ntohs(tuple.dst.u.tcp.port)); 274 - return -ENOENT; 275 - } 276 - 277 - #if IS_ENABLED(CONFIG_NF_CT_NETLINK) 278 - 279 - #include <linux/netfilter/nfnetlink.h> 280 - #include <linux/netfilter/nfnetlink_conntrack.h> 281 - 282 - static int ipv4_tuple_to_nlattr(struct sk_buff *skb, 283 - const struct nf_conntrack_tuple *tuple) 284 - { 285 - if (nla_put_in_addr(skb, CTA_IP_V4_SRC, tuple->src.u3.ip) || 286 - nla_put_in_addr(skb, CTA_IP_V4_DST, tuple->dst.u3.ip)) 287 - goto nla_put_failure; 288 - return 0; 289 - 290 - nla_put_failure: 291 - return -1; 292 - } 293 - 294 - static const struct nla_policy ipv4_nla_policy[CTA_IP_MAX+1] = { 295 - [CTA_IP_V4_SRC] = { .type = NLA_U32 }, 296 - [CTA_IP_V4_DST] = { .type = NLA_U32 }, 297 - }; 298 - 299 - static int ipv4_nlattr_to_tuple(struct nlattr *tb[], 300 - struct nf_conntrack_tuple *t) 301 - { 302 - if (!tb[CTA_IP_V4_SRC] || !tb[CTA_IP_V4_DST]) 303 - return -EINVAL; 304 - 305 - t->src.u3.ip = nla_get_in_addr(tb[CTA_IP_V4_SRC]); 306 - t->dst.u3.ip = nla_get_in_addr(tb[CTA_IP_V4_DST]); 307 - 308 - return 0; 309 - } 310 - #endif 311 - 312 - static struct nf_sockopt_ops so_getorigdst = { 313 - .pf = PF_INET, 314 - .get_optmin = SO_ORIGINAL_DST, 315 - .get_optmax = SO_ORIGINAL_DST+1, 316 - .get = getorigdst, 317 - .owner = THIS_MODULE, 318 - }; 319 - 320 - static int ipv4_hooks_register(struct net *net) 321 - { 322 - struct conntrack4_net *cnet = net_generic(net, conntrack4_net_id); 323 - int err = 0; 324 - 325 - mutex_lock(&register_ipv4_hooks); 326 - 327 - cnet->users++; 328 - if (cnet->users > 1) 329 - goto out_unlock; 330 - 331 - err = nf_defrag_ipv4_enable(net); 332 - if (err) { 333 - cnet->users = 0; 334 - goto out_unlock; 335 - } 336 - 337 - err = nf_register_net_hooks(net, ipv4_conntrack_ops, 338 - ARRAY_SIZE(ipv4_conntrack_ops)); 339 - 340 - if (err) 341 - cnet->users = 0; 342 - out_unlock: 343 - mutex_unlock(&register_ipv4_hooks); 344 - return err; 345 - } 346 - 347 - static void ipv4_hooks_unregister(struct net *net) 348 - { 349 - struct conntrack4_net *cnet = net_generic(net, conntrack4_net_id); 350 - 351 - mutex_lock(&register_ipv4_hooks); 352 - if (cnet->users && (--cnet->users == 0)) 353 - nf_unregister_net_hooks(net, ipv4_conntrack_ops, 354 - ARRAY_SIZE(ipv4_conntrack_ops)); 355 - mutex_unlock(&register_ipv4_hooks); 356 - } 357 - 358 - const struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 = { 359 - .l3proto = PF_INET, 360 - .pkt_to_tuple = ipv4_pkt_to_tuple, 361 - .invert_tuple = ipv4_invert_tuple, 362 - .get_l4proto = ipv4_get_l4proto, 363 - #if IS_ENABLED(CONFIG_NF_CT_NETLINK) 364 - .tuple_to_nlattr = ipv4_tuple_to_nlattr, 365 - .nlattr_to_tuple = ipv4_nlattr_to_tuple, 366 - .nla_policy = ipv4_nla_policy, 367 - .nla_size = NLA_ALIGN(NLA_HDRLEN + sizeof(u32)) + /* CTA_IP_V4_SRC */ 368 - NLA_ALIGN(NLA_HDRLEN + sizeof(u32)), /* CTA_IP_V4_DST */ 369 - #endif 370 - .net_ns_get = ipv4_hooks_register, 371 - .net_ns_put = ipv4_hooks_unregister, 372 - .me = THIS_MODULE, 373 - }; 374 - 375 - module_param_call(hashsize, nf_conntrack_set_hashsize, param_get_uint, 376 - &nf_conntrack_htable_size, 0600); 377 - 378 - MODULE_ALIAS("nf_conntrack-" __stringify(AF_INET)); 379 - MODULE_ALIAS("ip_conntrack"); 380 - MODULE_LICENSE("GPL"); 381 - 382 - static const struct nf_conntrack_l4proto * const builtin_l4proto4[] = { 383 - &nf_conntrack_l4proto_tcp4, 384 - &nf_conntrack_l4proto_udp4, 385 - &nf_conntrack_l4proto_icmp, 386 - #ifdef CONFIG_NF_CT_PROTO_DCCP 387 - &nf_conntrack_l4proto_dccp4, 388 - #endif 389 - #ifdef CONFIG_NF_CT_PROTO_SCTP 390 - &nf_conntrack_l4proto_sctp4, 391 - #endif 392 - #ifdef CONFIG_NF_CT_PROTO_UDPLITE 393 - &nf_conntrack_l4proto_udplite4, 394 - #endif 395 - }; 396 - 397 - static int ipv4_net_init(struct net *net) 398 - { 399 - return nf_ct_l4proto_pernet_register(net, builtin_l4proto4, 400 - ARRAY_SIZE(builtin_l4proto4)); 401 - } 402 - 403 - static void ipv4_net_exit(struct net *net) 404 - { 405 - nf_ct_l4proto_pernet_unregister(net, builtin_l4proto4, 406 - ARRAY_SIZE(builtin_l4proto4)); 407 - } 408 - 409 - static struct pernet_operations ipv4_net_ops = { 410 - .init = ipv4_net_init, 411 - .exit = ipv4_net_exit, 412 - .id = &conntrack4_net_id, 413 - .size = sizeof(struct conntrack4_net), 414 - }; 415 - 416 - static int __init nf_conntrack_l3proto_ipv4_init(void) 417 - { 418 - int ret = 0; 419 - 420 - need_conntrack(); 421 - 422 - #if IS_ENABLED(CONFIG_NF_CT_NETLINK) 423 - if (WARN_ON(nla_policy_len(ipv4_nla_policy, CTA_IP_MAX + 1) != 424 - nf_conntrack_l3proto_ipv4.nla_size)) 425 - return -EINVAL; 426 - #endif 427 - ret = nf_register_sockopt(&so_getorigdst); 428 - if (ret < 0) { 429 - pr_err("Unable to register netfilter socket option\n"); 430 - return ret; 431 - } 432 - 433 - ret = register_pernet_subsys(&ipv4_net_ops); 434 - if (ret < 0) { 435 - pr_err("nf_conntrack_ipv4: can't register pernet ops\n"); 436 - goto cleanup_sockopt; 437 - } 438 - 439 - ret = nf_ct_l4proto_register(builtin_l4proto4, 440 - ARRAY_SIZE(builtin_l4proto4)); 441 - if (ret < 0) 442 - goto cleanup_pernet; 443 - 444 - ret = nf_ct_l3proto_register(&nf_conntrack_l3proto_ipv4); 445 - if (ret < 0) { 446 - pr_err("nf_conntrack_ipv4: can't register ipv4 proto.\n"); 447 - goto cleanup_l4proto; 448 - } 449 - 450 - return ret; 451 - cleanup_l4proto: 452 - nf_ct_l4proto_unregister(builtin_l4proto4, 453 - ARRAY_SIZE(builtin_l4proto4)); 454 - cleanup_pernet: 455 - unregister_pernet_subsys(&ipv4_net_ops); 456 - cleanup_sockopt: 457 - nf_unregister_sockopt(&so_getorigdst); 458 - return ret; 459 - } 460 - 461 - static void __exit nf_conntrack_l3proto_ipv4_fini(void) 462 - { 463 - synchronize_net(); 464 - nf_ct_l3proto_unregister(&nf_conntrack_l3proto_ipv4); 465 - nf_ct_l4proto_unregister(builtin_l4proto4, 466 - ARRAY_SIZE(builtin_l4proto4)); 467 - unregister_pernet_subsys(&ipv4_net_ops); 468 - nf_unregister_sockopt(&so_getorigdst); 469 - } 470 - 471 - module_init(nf_conntrack_l3proto_ipv4_init); 472 - module_exit(nf_conntrack_l3proto_ipv4_fini);

+12 -7

net/ipv4/netfilter/nf_conntrack_proto_icmp.c net/netfilter/nf_conntrack_proto_icmp.c

··· 19 19 #include <net/netfilter/nf_conntrack_tuple.h> 20 20 #include <net/netfilter/nf_conntrack_l4proto.h> 21 21 #include <net/netfilter/nf_conntrack_core.h> 22 + #include <net/netfilter/nf_conntrack_timeout.h> 22 23 #include <net/netfilter/nf_conntrack_zones.h> 23 24 #include <net/netfilter/nf_log.h> 24 25 ··· 81 80 static int icmp_packet(struct nf_conn *ct, 82 81 const struct sk_buff *skb, 83 82 unsigned int dataoff, 84 - enum ip_conntrack_info ctinfo, 85 - unsigned int *timeout) 83 + enum ip_conntrack_info ctinfo) 86 84 { 87 85 /* Do not immediately delete the connection after the first 88 86 successful reply to avoid excessive conntrackd traffic 89 87 and also to handle correctly ICMP echo reply duplicates. */ 88 + unsigned int *timeout = nf_ct_timeout_lookup(ct); 89 + 90 + if (!timeout) 91 + timeout = icmp_get_timeouts(nf_ct_net(ct)); 92 + 90 93 nf_ct_refresh_acct(ct, ctinfo, skb, *timeout); 91 94 92 95 return NF_ACCEPT; ··· 98 93 99 94 /* Called when a new connection for this protocol found. */ 100 95 static bool icmp_new(struct nf_conn *ct, const struct sk_buff *skb, 101 - unsigned int dataoff, unsigned int *timeouts) 96 + unsigned int dataoff) 102 97 { 103 98 static const u_int8_t valid_new[] = { 104 99 [ICMP_ECHO] = 1, ··· 147 142 148 143 /* Ordinarily, we'd expect the inverted tupleproto, but it's 149 144 been preserved inside the ICMP. */ 150 - if (!nf_ct_invert_tuple(&innertuple, &origtuple, 151 - &nf_conntrack_l3proto_ipv4, innerproto)) { 145 + if (!nf_ct_invert_tuple(&innertuple, &origtuple, innerproto)) { 152 146 pr_debug("icmp_error_message: no match\n"); 153 147 return -NF_ACCEPT; 154 148 } ··· 285 281 struct nf_icmp_net *in = icmp_pernet(net); 286 282 287 283 if (tb[CTA_TIMEOUT_ICMP_TIMEOUT]) { 284 + if (!timeout) 285 + timeout = &in->timeout; 288 286 *timeout = 289 287 ntohl(nla_get_be32(tb[CTA_TIMEOUT_ICMP_TIMEOUT])) * HZ; 290 - } else { 288 + } else if (timeout) { 291 289 /* Set default ICMP timeout. */ 292 290 *timeout = in->timeout; 293 291 } ··· 364 358 .pkt_to_tuple = icmp_pkt_to_tuple, 365 359 .invert_tuple = icmp_invert_tuple, 366 360 .packet = icmp_packet, 367 - .get_timeouts = icmp_get_timeouts, 368 361 .new = icmp_new, 369 362 .error = icmp_error, 370 363 .destroy = NULL,

-62

net/ipv6/netfilter.c

··· 15 15 #include <net/ipv6.h> 16 16 #include <net/ip6_route.h> 17 17 #include <net/xfrm.h> 18 - #include <net/ip6_checksum.h> 19 18 #include <net/netfilter/nf_queue.h> 20 19 21 20 int ip6_route_me_harder(struct net *net, struct sk_buff *skb) ··· 105 106 return err; 106 107 } 107 108 108 - __sum16 nf_ip6_checksum(struct sk_buff *skb, unsigned int hook, 109 - unsigned int dataoff, u_int8_t protocol) 110 - { 111 - const struct ipv6hdr *ip6h = ipv6_hdr(skb); 112 - __sum16 csum = 0; 113 - 114 - switch (skb->ip_summed) { 115 - case CHECKSUM_COMPLETE: 116 - if (hook != NF_INET_PRE_ROUTING && hook != NF_INET_LOCAL_IN) 117 - break; 118 - if (!csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr, 119 - skb->len - dataoff, protocol, 120 - csum_sub(skb->csum, 121 - skb_checksum(skb, 0, 122 - dataoff, 0)))) { 123 - skb->ip_summed = CHECKSUM_UNNECESSARY; 124 - break; 125 - } 126 - /* fall through */ 127 - case CHECKSUM_NONE: 128 - skb->csum = ~csum_unfold( 129 - csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr, 130 - skb->len - dataoff, 131 - protocol, 132 - csum_sub(0, 133 - skb_checksum(skb, 0, 134 - dataoff, 0)))); 135 - csum = __skb_checksum_complete(skb); 136 - } 137 - return csum; 138 - } 139 - EXPORT_SYMBOL(nf_ip6_checksum); 140 - 141 - static __sum16 nf_ip6_checksum_partial(struct sk_buff *skb, unsigned int hook, 142 - unsigned int dataoff, unsigned int len, 143 - u_int8_t protocol) 144 - { 145 - const struct ipv6hdr *ip6h = ipv6_hdr(skb); 146 - __wsum hsum; 147 - __sum16 csum = 0; 148 - 149 - switch (skb->ip_summed) { 150 - case CHECKSUM_COMPLETE: 151 - if (len == skb->len - dataoff) 152 - return nf_ip6_checksum(skb, hook, dataoff, protocol); 153 - /* fall through */ 154 - case CHECKSUM_NONE: 155 - hsum = skb_checksum(skb, 0, dataoff, 0); 156 - skb->csum = ~csum_unfold(csum_ipv6_magic(&ip6h->saddr, 157 - &ip6h->daddr, 158 - skb->len - dataoff, 159 - protocol, 160 - csum_sub(0, hsum))); 161 - skb->ip_summed = CHECKSUM_NONE; 162 - return __skb_checksum_complete_head(skb, dataoff + len); 163 - } 164 - return csum; 165 - }; 166 - 167 109 static const struct nf_ipv6_ops ipv6ops = { 168 110 .chk_addr = ipv6_chk_addr, 169 111 .route_input = ip6_route_input, 170 112 .fragment = ip6_fragment, 171 - .checksum = nf_ip6_checksum, 172 - .checksum_partial = nf_ip6_checksum_partial, 173 113 .route = nf_ip6_route, 174 114 .reroute = nf_ip6_reroute, 175 115 };

+4 -23

net/ipv6/netfilter/Kconfig

··· 5 5 menu "IPv6: Netfilter Configuration" 6 6 depends on INET && IPV6 && NETFILTER 7 7 8 - config NF_DEFRAG_IPV6 9 - tristate 10 - default n 11 - 12 - config NF_CONNTRACK_IPV6 13 - tristate "IPv6 connection tracking support" 14 - depends on INET && IPV6 && NF_CONNTRACK 15 - default m if NETFILTER_ADVANCED=n 16 - select NF_DEFRAG_IPV6 17 - ---help--- 18 - Connection tracking keeps a record of what packets have passed 19 - through your machine, in order to figure out how they are related 20 - into connections. 21 - 22 - This is IPv6 support on Layer 3 independent connection tracking. 23 - Layer 3 independent connection tracking is experimental scheme 24 - which generalize ip_conntrack to support other layer 3 protocols. 25 - 26 - To compile it as a module, choose M here. If unsure, say N. 27 - 28 8 config NF_SOCKET_IPV6 29 9 tristate "IPv6 socket lookup support" 30 10 help ··· 108 128 109 129 config NF_NAT_IPV6 110 130 tristate "IPv6 NAT" 111 - depends on NF_CONNTRACK_IPV6 131 + depends on NF_CONNTRACK 112 132 depends on NETFILTER_ADVANCED 113 133 select NF_NAT 114 134 help ··· 308 328 309 329 config IP6_NF_NAT 310 330 tristate "ip6tables NAT support" 311 - depends on NF_CONNTRACK_IPV6 331 + depends on NF_CONNTRACK 312 332 depends on NETFILTER_ADVANCED 313 333 select NF_NAT 314 334 select NF_NAT_IPV6 ··· 345 365 endif # IP6_NF_NAT 346 366 347 367 endif # IP6_NF_IPTABLES 348 - 349 368 endmenu 350 369 370 + config NF_DEFRAG_IPV6 371 + tristate

-6

net/ipv6/netfilter/Makefile

··· 11 11 obj-$(CONFIG_IP6_NF_SECURITY) += ip6table_security.o 12 12 obj-$(CONFIG_IP6_NF_NAT) += ip6table_nat.o 13 13 14 - # objects for l3 independent conntrack 15 - nf_conntrack_ipv6-y := nf_conntrack_l3proto_ipv6.o nf_conntrack_proto_icmpv6.o 16 - 17 - # l3 independent conntrack 18 - obj-$(CONFIG_NF_CONNTRACK_IPV6) += nf_conntrack_ipv6.o 19 - 20 14 nf_nat_ipv6-y := nf_nat_l3proto_ipv6.o nf_nat_proto_icmpv6.o 21 15 nf_nat_ipv6-$(CONFIG_NF_NAT_MASQUERADE_IPV6) += nf_nat_masquerade_ipv6.o 22 16 obj-$(CONFIG_NF_NAT_IPV6) += nf_nat_ipv6.o

-460

net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c

··· 1 - /* 2 - * Copyright (C)2004 USAGI/WIDE Project 3 - * 4 - * This program is free software; you can redistribute it and/or modify 5 - * it under the terms of the GNU General Public License version 2 as 6 - * published by the Free Software Foundation. 7 - * 8 - * Author: 9 - * Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp> 10 - */ 11 - 12 - #include <linux/types.h> 13 - #include <linux/ipv6.h> 14 - #include <linux/in6.h> 15 - #include <linux/netfilter.h> 16 - #include <linux/module.h> 17 - #include <linux/skbuff.h> 18 - #include <linux/icmp.h> 19 - #include <net/ipv6.h> 20 - #include <net/inet_frag.h> 21 - 22 - #include <linux/netfilter_bridge.h> 23 - #include <linux/netfilter_ipv6.h> 24 - #include <linux/netfilter_ipv6/ip6_tables.h> 25 - #include <net/netfilter/nf_conntrack.h> 26 - #include <net/netfilter/nf_conntrack_helper.h> 27 - #include <net/netfilter/nf_conntrack_l4proto.h> 28 - #include <net/netfilter/nf_conntrack_l3proto.h> 29 - #include <net/netfilter/nf_conntrack_core.h> 30 - #include <net/netfilter/nf_conntrack_zones.h> 31 - #include <net/netfilter/nf_conntrack_seqadj.h> 32 - #include <net/netfilter/ipv6/nf_conntrack_ipv6.h> 33 - #include <net/netfilter/nf_nat_helper.h> 34 - #include <net/netfilter/ipv6/nf_defrag_ipv6.h> 35 - #include <net/netfilter/nf_log.h> 36 - 37 - static int conntrack6_net_id; 38 - static DEFINE_MUTEX(register_ipv6_hooks); 39 - 40 - struct conntrack6_net { 41 - unsigned int users; 42 - }; 43 - 44 - static bool ipv6_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff, 45 - struct nf_conntrack_tuple *tuple) 46 - { 47 - const u_int32_t *ap; 48 - u_int32_t _addrs[8]; 49 - 50 - ap = skb_header_pointer(skb, nhoff + offsetof(struct ipv6hdr, saddr), 51 - sizeof(_addrs), _addrs); 52 - if (ap == NULL) 53 - return false; 54 - 55 - memcpy(tuple->src.u3.ip6, ap, sizeof(tuple->src.u3.ip6)); 56 - memcpy(tuple->dst.u3.ip6, ap + 4, sizeof(tuple->dst.u3.ip6)); 57 - 58 - return true; 59 - } 60 - 61 - static bool ipv6_invert_tuple(struct nf_conntrack_tuple *tuple, 62 - const struct nf_conntrack_tuple *orig) 63 - { 64 - memcpy(tuple->src.u3.ip6, orig->dst.u3.ip6, sizeof(tuple->src.u3.ip6)); 65 - memcpy(tuple->dst.u3.ip6, orig->src.u3.ip6, sizeof(tuple->dst.u3.ip6)); 66 - 67 - return true; 68 - } 69 - 70 - static int ipv6_get_l4proto(const struct sk_buff *skb, unsigned int nhoff, 71 - unsigned int *dataoff, u_int8_t *protonum) 72 - { 73 - unsigned int extoff = nhoff + sizeof(struct ipv6hdr); 74 - __be16 frag_off; 75 - int protoff; 76 - u8 nexthdr; 77 - 78 - if (skb_copy_bits(skb, nhoff + offsetof(struct ipv6hdr, nexthdr), 79 - &nexthdr, sizeof(nexthdr)) != 0) { 80 - pr_debug("ip6_conntrack_core: can't get nexthdr\n"); 81 - return -NF_ACCEPT; 82 - } 83 - protoff = ipv6_skip_exthdr(skb, extoff, &nexthdr, &frag_off); 84 - /* 85 - * (protoff == skb->len) means the packet has not data, just 86 - * IPv6 and possibly extensions headers, but it is tracked anyway 87 - */ 88 - if (protoff < 0 || (frag_off & htons(~0x7)) != 0) { 89 - pr_debug("ip6_conntrack_core: can't find proto in pkt\n"); 90 - return -NF_ACCEPT; 91 - } 92 - 93 - *dataoff = protoff; 94 - *protonum = nexthdr; 95 - return NF_ACCEPT; 96 - } 97 - 98 - static unsigned int ipv6_helper(void *priv, 99 - struct sk_buff *skb, 100 - const struct nf_hook_state *state) 101 - { 102 - struct nf_conn *ct; 103 - const struct nf_conn_help *help; 104 - const struct nf_conntrack_helper *helper; 105 - enum ip_conntrack_info ctinfo; 106 - __be16 frag_off; 107 - int protoff; 108 - u8 nexthdr; 109 - 110 - /* This is where we call the helper: as the packet goes out. */ 111 - ct = nf_ct_get(skb, &ctinfo); 112 - if (!ct || ctinfo == IP_CT_RELATED_REPLY) 113 - return NF_ACCEPT; 114 - 115 - help = nfct_help(ct); 116 - if (!help) 117 - return NF_ACCEPT; 118 - /* rcu_read_lock()ed by nf_hook_thresh */ 119 - helper = rcu_dereference(help->helper); 120 - if (!helper) 121 - return NF_ACCEPT; 122 - 123 - nexthdr = ipv6_hdr(skb)->nexthdr; 124 - protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr, 125 - &frag_off); 126 - if (protoff < 0 || (frag_off & htons(~0x7)) != 0) { 127 - pr_debug("proto header not found\n"); 128 - return NF_ACCEPT; 129 - } 130 - 131 - return helper->help(skb, protoff, ct, ctinfo); 132 - } 133 - 134 - static unsigned int ipv6_confirm(void *priv, 135 - struct sk_buff *skb, 136 - const struct nf_hook_state *state) 137 - { 138 - struct nf_conn *ct; 139 - enum ip_conntrack_info ctinfo; 140 - unsigned char pnum = ipv6_hdr(skb)->nexthdr; 141 - int protoff; 142 - __be16 frag_off; 143 - 144 - ct = nf_ct_get(skb, &ctinfo); 145 - if (!ct || ctinfo == IP_CT_RELATED_REPLY) 146 - goto out; 147 - 148 - protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &pnum, 149 - &frag_off); 150 - if (protoff < 0 || (frag_off & htons(~0x7)) != 0) { 151 - pr_debug("proto header not found\n"); 152 - goto out; 153 - } 154 - 155 - /* adjust seqs for loopback traffic only in outgoing direction */ 156 - if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) && 157 - !nf_is_loopback_packet(skb)) { 158 - if (!nf_ct_seq_adjust(skb, ct, ctinfo, protoff)) { 159 - NF_CT_STAT_INC_ATOMIC(nf_ct_net(ct), drop); 160 - return NF_DROP; 161 - } 162 - } 163 - out: 164 - /* We've seen it coming out the other side: confirm it */ 165 - return nf_conntrack_confirm(skb); 166 - } 167 - 168 - static unsigned int ipv6_conntrack_in(void *priv, 169 - struct sk_buff *skb, 170 - const struct nf_hook_state *state) 171 - { 172 - return nf_conntrack_in(state->net, PF_INET6, state->hook, skb); 173 - } 174 - 175 - static unsigned int ipv6_conntrack_local(void *priv, 176 - struct sk_buff *skb, 177 - const struct nf_hook_state *state) 178 - { 179 - return nf_conntrack_in(state->net, PF_INET6, state->hook, skb); 180 - } 181 - 182 - static const struct nf_hook_ops ipv6_conntrack_ops[] = { 183 - { 184 - .hook = ipv6_conntrack_in, 185 - .pf = NFPROTO_IPV6, 186 - .hooknum = NF_INET_PRE_ROUTING, 187 - .priority = NF_IP6_PRI_CONNTRACK, 188 - }, 189 - { 190 - .hook = ipv6_conntrack_local, 191 - .pf = NFPROTO_IPV6, 192 - .hooknum = NF_INET_LOCAL_OUT, 193 - .priority = NF_IP6_PRI_CONNTRACK, 194 - }, 195 - { 196 - .hook = ipv6_helper, 197 - .pf = NFPROTO_IPV6, 198 - .hooknum = NF_INET_POST_ROUTING, 199 - .priority = NF_IP6_PRI_CONNTRACK_HELPER, 200 - }, 201 - { 202 - .hook = ipv6_confirm, 203 - .pf = NFPROTO_IPV6, 204 - .hooknum = NF_INET_POST_ROUTING, 205 - .priority = NF_IP6_PRI_LAST, 206 - }, 207 - { 208 - .hook = ipv6_helper, 209 - .pf = NFPROTO_IPV6, 210 - .hooknum = NF_INET_LOCAL_IN, 211 - .priority = NF_IP6_PRI_CONNTRACK_HELPER, 212 - }, 213 - { 214 - .hook = ipv6_confirm, 215 - .pf = NFPROTO_IPV6, 216 - .hooknum = NF_INET_LOCAL_IN, 217 - .priority = NF_IP6_PRI_LAST-1, 218 - }, 219 - }; 220 - 221 - static int 222 - ipv6_getorigdst(struct sock *sk, int optval, void __user *user, int *len) 223 - { 224 - struct nf_conntrack_tuple tuple = { .src.l3num = NFPROTO_IPV6 }; 225 - const struct ipv6_pinfo *inet6 = inet6_sk(sk); 226 - const struct inet_sock *inet = inet_sk(sk); 227 - const struct nf_conntrack_tuple_hash *h; 228 - struct sockaddr_in6 sin6; 229 - struct nf_conn *ct; 230 - __be32 flow_label; 231 - int bound_dev_if; 232 - 233 - lock_sock(sk); 234 - tuple.src.u3.in6 = sk->sk_v6_rcv_saddr; 235 - tuple.src.u.tcp.port = inet->inet_sport; 236 - tuple.dst.u3.in6 = sk->sk_v6_daddr; 237 - tuple.dst.u.tcp.port = inet->inet_dport; 238 - tuple.dst.protonum = sk->sk_protocol; 239 - bound_dev_if = sk->sk_bound_dev_if; 240 - flow_label = inet6->flow_label; 241 - release_sock(sk); 242 - 243 - if (tuple.dst.protonum != IPPROTO_TCP && 244 - tuple.dst.protonum != IPPROTO_SCTP) 245 - return -ENOPROTOOPT; 246 - 247 - if (*len < 0 || (unsigned int) *len < sizeof(sin6)) 248 - return -EINVAL; 249 - 250 - h = nf_conntrack_find_get(sock_net(sk), &nf_ct_zone_dflt, &tuple); 251 - if (!h) { 252 - pr_debug("IP6T_SO_ORIGINAL_DST: Can't find %pI6c/%u-%pI6c/%u.\n", 253 - &tuple.src.u3.ip6, ntohs(tuple.src.u.tcp.port), 254 - &tuple.dst.u3.ip6, ntohs(tuple.dst.u.tcp.port)); 255 - return -ENOENT; 256 - } 257 - 258 - ct = nf_ct_tuplehash_to_ctrack(h); 259 - 260 - sin6.sin6_family = AF_INET6; 261 - sin6.sin6_port = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u.tcp.port; 262 - sin6.sin6_flowinfo = flow_label & IPV6_FLOWINFO_MASK; 263 - memcpy(&sin6.sin6_addr, 264 - &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.in6, 265 - sizeof(sin6.sin6_addr)); 266 - 267 - nf_ct_put(ct); 268 - sin6.sin6_scope_id = ipv6_iface_scope_id(&sin6.sin6_addr, bound_dev_if); 269 - return copy_to_user(user, &sin6, sizeof(sin6)) ? -EFAULT : 0; 270 - } 271 - 272 - #if IS_ENABLED(CONFIG_NF_CT_NETLINK) 273 - 274 - #include <linux/netfilter/nfnetlink.h> 275 - #include <linux/netfilter/nfnetlink_conntrack.h> 276 - 277 - static int ipv6_tuple_to_nlattr(struct sk_buff *skb, 278 - const struct nf_conntrack_tuple *tuple) 279 - { 280 - if (nla_put_in6_addr(skb, CTA_IP_V6_SRC, &tuple->src.u3.in6) || 281 - nla_put_in6_addr(skb, CTA_IP_V6_DST, &tuple->dst.u3.in6)) 282 - goto nla_put_failure; 283 - return 0; 284 - 285 - nla_put_failure: 286 - return -1; 287 - } 288 - 289 - static const struct nla_policy ipv6_nla_policy[CTA_IP_MAX+1] = { 290 - [CTA_IP_V6_SRC] = { .len = sizeof(u_int32_t)*4 }, 291 - [CTA_IP_V6_DST] = { .len = sizeof(u_int32_t)*4 }, 292 - }; 293 - 294 - static int ipv6_nlattr_to_tuple(struct nlattr *tb[], 295 - struct nf_conntrack_tuple *t) 296 - { 297 - if (!tb[CTA_IP_V6_SRC] || !tb[CTA_IP_V6_DST]) 298 - return -EINVAL; 299 - 300 - t->src.u3.in6 = nla_get_in6_addr(tb[CTA_IP_V6_SRC]); 301 - t->dst.u3.in6 = nla_get_in6_addr(tb[CTA_IP_V6_DST]); 302 - 303 - return 0; 304 - } 305 - #endif 306 - 307 - static int ipv6_hooks_register(struct net *net) 308 - { 309 - struct conntrack6_net *cnet = net_generic(net, conntrack6_net_id); 310 - int err = 0; 311 - 312 - mutex_lock(&register_ipv6_hooks); 313 - cnet->users++; 314 - if (cnet->users > 1) 315 - goto out_unlock; 316 - 317 - err = nf_defrag_ipv6_enable(net); 318 - if (err < 0) { 319 - cnet->users = 0; 320 - goto out_unlock; 321 - } 322 - 323 - err = nf_register_net_hooks(net, ipv6_conntrack_ops, 324 - ARRAY_SIZE(ipv6_conntrack_ops)); 325 - if (err) 326 - cnet->users = 0; 327 - out_unlock: 328 - mutex_unlock(&register_ipv6_hooks); 329 - return err; 330 - } 331 - 332 - static void ipv6_hooks_unregister(struct net *net) 333 - { 334 - struct conntrack6_net *cnet = net_generic(net, conntrack6_net_id); 335 - 336 - mutex_lock(&register_ipv6_hooks); 337 - if (cnet->users && (--cnet->users == 0)) 338 - nf_unregister_net_hooks(net, ipv6_conntrack_ops, 339 - ARRAY_SIZE(ipv6_conntrack_ops)); 340 - mutex_unlock(&register_ipv6_hooks); 341 - } 342 - 343 - const struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6 = { 344 - .l3proto = PF_INET6, 345 - .pkt_to_tuple = ipv6_pkt_to_tuple, 346 - .invert_tuple = ipv6_invert_tuple, 347 - .get_l4proto = ipv6_get_l4proto, 348 - #if IS_ENABLED(CONFIG_NF_CT_NETLINK) 349 - .tuple_to_nlattr = ipv6_tuple_to_nlattr, 350 - .nlattr_to_tuple = ipv6_nlattr_to_tuple, 351 - .nla_policy = ipv6_nla_policy, 352 - .nla_size = NLA_ALIGN(NLA_HDRLEN + sizeof(u32[4])) + 353 - NLA_ALIGN(NLA_HDRLEN + sizeof(u32[4])), 354 - #endif 355 - .net_ns_get = ipv6_hooks_register, 356 - .net_ns_put = ipv6_hooks_unregister, 357 - .me = THIS_MODULE, 358 - }; 359 - 360 - MODULE_ALIAS("nf_conntrack-" __stringify(AF_INET6)); 361 - MODULE_LICENSE("GPL"); 362 - MODULE_AUTHOR("Yasuyuki KOZAKAI @USAGI <yasuyuki.kozakai@toshiba.co.jp>"); 363 - 364 - static struct nf_sockopt_ops so_getorigdst6 = { 365 - .pf = NFPROTO_IPV6, 366 - .get_optmin = IP6T_SO_ORIGINAL_DST, 367 - .get_optmax = IP6T_SO_ORIGINAL_DST + 1, 368 - .get = ipv6_getorigdst, 369 - .owner = THIS_MODULE, 370 - }; 371 - 372 - static const struct nf_conntrack_l4proto * const builtin_l4proto6[] = { 373 - &nf_conntrack_l4proto_tcp6, 374 - &nf_conntrack_l4proto_udp6, 375 - &nf_conntrack_l4proto_icmpv6, 376 - #ifdef CONFIG_NF_CT_PROTO_DCCP 377 - &nf_conntrack_l4proto_dccp6, 378 - #endif 379 - #ifdef CONFIG_NF_CT_PROTO_SCTP 380 - &nf_conntrack_l4proto_sctp6, 381 - #endif 382 - #ifdef CONFIG_NF_CT_PROTO_UDPLITE 383 - &nf_conntrack_l4proto_udplite6, 384 - #endif 385 - }; 386 - 387 - static int ipv6_net_init(struct net *net) 388 - { 389 - return nf_ct_l4proto_pernet_register(net, builtin_l4proto6, 390 - ARRAY_SIZE(builtin_l4proto6)); 391 - } 392 - 393 - static void ipv6_net_exit(struct net *net) 394 - { 395 - nf_ct_l4proto_pernet_unregister(net, builtin_l4proto6, 396 - ARRAY_SIZE(builtin_l4proto6)); 397 - } 398 - 399 - static struct pernet_operations ipv6_net_ops = { 400 - .init = ipv6_net_init, 401 - .exit = ipv6_net_exit, 402 - .id = &conntrack6_net_id, 403 - .size = sizeof(struct conntrack6_net), 404 - }; 405 - 406 - static int __init nf_conntrack_l3proto_ipv6_init(void) 407 - { 408 - int ret = 0; 409 - 410 - need_conntrack(); 411 - 412 - #if IS_ENABLED(CONFIG_NF_CT_NETLINK) 413 - if (WARN_ON(nla_policy_len(ipv6_nla_policy, CTA_IP_MAX + 1) != 414 - nf_conntrack_l3proto_ipv6.nla_size)) 415 - return -EINVAL; 416 - #endif 417 - 418 - ret = nf_register_sockopt(&so_getorigdst6); 419 - if (ret < 0) { 420 - pr_err("Unable to register netfilter socket option\n"); 421 - return ret; 422 - } 423 - 424 - ret = register_pernet_subsys(&ipv6_net_ops); 425 - if (ret < 0) 426 - goto cleanup_sockopt; 427 - 428 - ret = nf_ct_l4proto_register(builtin_l4proto6, 429 - ARRAY_SIZE(builtin_l4proto6)); 430 - if (ret < 0) 431 - goto cleanup_pernet; 432 - 433 - ret = nf_ct_l3proto_register(&nf_conntrack_l3proto_ipv6); 434 - if (ret < 0) { 435 - pr_err("nf_conntrack_ipv6: can't register ipv6 proto.\n"); 436 - goto cleanup_l4proto; 437 - } 438 - return ret; 439 - cleanup_l4proto: 440 - nf_ct_l4proto_unregister(builtin_l4proto6, 441 - ARRAY_SIZE(builtin_l4proto6)); 442 - cleanup_pernet: 443 - unregister_pernet_subsys(&ipv6_net_ops); 444 - cleanup_sockopt: 445 - nf_unregister_sockopt(&so_getorigdst6); 446 - return ret; 447 - } 448 - 449 - static void __exit nf_conntrack_l3proto_ipv6_fini(void) 450 - { 451 - synchronize_net(); 452 - nf_ct_l3proto_unregister(&nf_conntrack_l3proto_ipv6); 453 - nf_ct_l4proto_unregister(builtin_l4proto6, 454 - ARRAY_SIZE(builtin_l4proto6)); 455 - unregister_pernet_subsys(&ipv6_net_ops); 456 - nf_unregister_sockopt(&so_getorigdst6); 457 - } 458 - 459 - module_init(nf_conntrack_l3proto_ipv6_init); 460 - module_exit(nf_conntrack_l3proto_ipv6_fini);

+11 -6

net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c net/netfilter/nf_conntrack_proto_icmpv6.c

··· 23 23 #include <net/netfilter/nf_conntrack_tuple.h> 24 24 #include <net/netfilter/nf_conntrack_l4proto.h> 25 25 #include <net/netfilter/nf_conntrack_core.h> 26 + #include <net/netfilter/nf_conntrack_timeout.h> 26 27 #include <net/netfilter/nf_conntrack_zones.h> 27 28 #include <net/netfilter/ipv6/nf_conntrack_icmpv6.h> 28 29 #include <net/netfilter/nf_log.h> ··· 94 93 static int icmpv6_packet(struct nf_conn *ct, 95 94 const struct sk_buff *skb, 96 95 unsigned int dataoff, 97 - enum ip_conntrack_info ctinfo, 98 - unsigned int *timeout) 96 + enum ip_conntrack_info ctinfo) 99 97 { 98 + unsigned int *timeout = nf_ct_timeout_lookup(ct); 99 + 100 + if (!timeout) 101 + timeout = icmpv6_get_timeouts(nf_ct_net(ct)); 102 + 100 103 /* Do not immediately delete the connection after the first 101 104 successful reply to avoid excessive conntrackd traffic 102 105 and also to handle correctly ICMP echo reply duplicates. */ ··· 111 106 112 107 /* Called when a new connection for this protocol found. */ 113 108 static bool icmpv6_new(struct nf_conn *ct, const struct sk_buff *skb, 114 - unsigned int dataoff, unsigned int *timeouts) 109 + unsigned int dataoff) 115 110 { 116 111 static const u_int8_t valid_new[] = { 117 112 [ICMPV6_ECHO_REQUEST - 128] = 1, ··· 157 152 158 153 /* Ordinarily, we'd expect the inverted tupleproto, but it's 159 154 been preserved inside the ICMP. */ 160 - if (!nf_ct_invert_tuple(&intuple, &origtuple, 161 - &nf_conntrack_l3proto_ipv6, inproto)) { 155 + if (!nf_ct_invert_tuple(&intuple, &origtuple, inproto)) { 162 156 pr_debug("icmpv6_error: Can't invert tuple\n"); 163 157 return -NF_ACCEPT; 164 158 } ··· 285 281 unsigned int *timeout = data; 286 282 struct nf_icmp_net *in = icmpv6_pernet(net); 287 283 284 + if (!timeout) 285 + timeout = icmpv6_get_timeouts(net); 288 286 if (tb[CTA_TIMEOUT_ICMPV6_TIMEOUT]) { 289 287 *timeout = 290 288 ntohl(nla_get_be32(tb[CTA_TIMEOUT_ICMPV6_TIMEOUT])) * HZ; ··· 365 359 .pkt_to_tuple = icmpv6_pkt_to_tuple, 366 360 .invert_tuple = icmpv6_invert_tuple, 367 361 .packet = icmpv6_packet, 368 - .get_timeouts = icmpv6_get_timeouts, 369 362 .new = icmpv6_new, 370 363 .error = icmpv6_error, 371 364 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)

+12 -5

net/ipv6/netfilter/nf_conntrack_reasm.c

··· 33 33 34 34 #include <net/sock.h> 35 35 #include <net/snmp.h> 36 - #include <net/inet_frag.h> 36 + #include <net/ipv6_frag.h> 37 37 38 - #include <net/ipv6.h> 39 38 #include <net/protocol.h> 40 39 #include <net/transp_v6.h> 41 40 #include <net/rawv6.h> ··· 150 151 fq = container_of(frag, struct frag_queue, q); 151 152 net = container_of(fq->q.net, struct net, nf_frag.frags); 152 153 153 - ip6_expire_frag_queue(net, fq); 154 + ip6frag_expire_frag_queue(net, fq); 154 155 } 155 156 156 157 /* Creation primitives. */ ··· 623 624 .exit = nf_ct_net_exit, 624 625 }; 625 626 627 + static const struct rhashtable_params nfct_rhash_params = { 628 + .head_offset = offsetof(struct inet_frag_queue, node), 629 + .hashfn = ip6frag_key_hashfn, 630 + .obj_hashfn = ip6frag_obj_hashfn, 631 + .obj_cmpfn = ip6frag_obj_cmpfn, 632 + .automatic_shrinking = true, 633 + }; 634 + 626 635 int nf_ct_frag6_init(void) 627 636 { 628 637 int ret = 0; 629 638 630 - nf_frags.constructor = ip6_frag_init; 639 + nf_frags.constructor = ip6frag_init; 631 640 nf_frags.destructor = NULL; 632 641 nf_frags.qsize = sizeof(struct frag_queue); 633 642 nf_frags.frag_expire = nf_ct_frag6_expire; 634 643 nf_frags.frags_cache_name = nf_frags_cache_name; 635 - nf_frags.rhash_params = ip6_rhash_params; 644 + nf_frags.rhash_params = nfct_rhash_params; 636 645 ret = inet_frags_init(&nf_frags); 637 646 if (ret) 638 647 goto out;

+1 -3

net/ipv6/netfilter/nf_defrag_ipv6_hooks.c

··· 14 14 #include <linux/skbuff.h> 15 15 #include <linux/icmp.h> 16 16 #include <linux/sysctl.h> 17 - #include <net/ipv6.h> 18 - #include <net/inet_frag.h> 17 + #include <net/ipv6_frag.h> 19 18 20 19 #include <linux/netfilter_ipv6.h> 21 20 #include <linux/netfilter_bridge.h> ··· 22 23 #include <net/netfilter/nf_conntrack.h> 23 24 #include <net/netfilter/nf_conntrack_helper.h> 24 25 #include <net/netfilter/nf_conntrack_l4proto.h> 25 - #include <net/netfilter/nf_conntrack_l3proto.h> 26 26 #include <net/netfilter/nf_conntrack_core.h> 27 27 #include <net/netfilter/ipv6/nf_conntrack_ipv6.h> 28 28 #endif

+7 -85

net/ipv6/reassembly.c

··· 57 57 #include <net/rawv6.h> 58 58 #include <net/ndisc.h> 59 59 #include <net/addrconf.h> 60 - #include <net/inet_frag.h> 60 + #include <net/ipv6_frag.h> 61 61 #include <net/inet_ecn.h> 62 62 63 63 static const char ip6_frag_cache_name[] = "ip6-frags"; ··· 72 72 static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, 73 73 struct net_device *dev); 74 74 75 - void ip6_frag_init(struct inet_frag_queue *q, const void *a) 76 - { 77 - struct frag_queue *fq = container_of(q, struct frag_queue, q); 78 - const struct frag_v6_compare_key *key = a; 79 - 80 - q->key.v6 = *key; 81 - fq->ecn = 0; 82 - } 83 - EXPORT_SYMBOL(ip6_frag_init); 84 - 85 - void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq) 86 - { 87 - struct net_device *dev = NULL; 88 - struct sk_buff *head; 89 - 90 - rcu_read_lock(); 91 - spin_lock(&fq->q.lock); 92 - 93 - if (fq->q.flags & INET_FRAG_COMPLETE) 94 - goto out; 95 - 96 - inet_frag_kill(&fq->q); 97 - 98 - dev = dev_get_by_index_rcu(net, fq->iif); 99 - if (!dev) 100 - goto out; 101 - 102 - __IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS); 103 - __IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMTIMEOUT); 104 - 105 - /* Don't send error if the first segment did not arrive. */ 106 - head = fq->q.fragments; 107 - if (!(fq->q.flags & INET_FRAG_FIRST_IN) || !head) 108 - goto out; 109 - 110 - /* But use as source device on which LAST ARRIVED 111 - * segment was received. And do not use fq->dev 112 - * pointer directly, device might already disappeared. 113 - */ 114 - head->dev = dev; 115 - skb_get(head); 116 - spin_unlock(&fq->q.lock); 117 - 118 - icmpv6_send(head, ICMPV6_TIME_EXCEED, ICMPV6_EXC_FRAGTIME, 0); 119 - kfree_skb(head); 120 - goto out_rcu_unlock; 121 - 122 - out: 123 - spin_unlock(&fq->q.lock); 124 - out_rcu_unlock: 125 - rcu_read_unlock(); 126 - inet_frag_put(&fq->q); 127 - } 128 - EXPORT_SYMBOL(ip6_expire_frag_queue); 129 - 130 75 static void ip6_frag_expire(struct timer_list *t) 131 76 { 132 77 struct inet_frag_queue *frag = from_timer(frag, t, timer); ··· 81 136 fq = container_of(frag, struct frag_queue, q); 82 137 net = container_of(fq->q.net, struct net, ipv6.frags); 83 138 84 - ip6_expire_frag_queue(net, fq); 139 + ip6frag_expire_frag_queue(net, fq); 85 140 } 86 141 87 142 static struct frag_queue * ··· 641 696 .exit = ipv6_frags_exit_net, 642 697 }; 643 698 644 - static u32 ip6_key_hashfn(const void *data, u32 len, u32 seed) 645 - { 646 - return jhash2(data, 647 - sizeof(struct frag_v6_compare_key) / sizeof(u32), seed); 648 - } 649 - 650 - static u32 ip6_obj_hashfn(const void *data, u32 len, u32 seed) 651 - { 652 - const struct inet_frag_queue *fq = data; 653 - 654 - return jhash2((const u32 *)&fq->key.v6, 655 - sizeof(struct frag_v6_compare_key) / sizeof(u32), seed); 656 - } 657 - 658 - static int ip6_obj_cmpfn(struct rhashtable_compare_arg *arg, const void *ptr) 659 - { 660 - const struct frag_v6_compare_key *key = arg->key; 661 - const struct inet_frag_queue *fq = ptr; 662 - 663 - return !!memcmp(&fq->key, key, sizeof(*key)); 664 - } 665 - 666 - const struct rhashtable_params ip6_rhash_params = { 699 + static const struct rhashtable_params ip6_rhash_params = { 667 700 .head_offset = offsetof(struct inet_frag_queue, node), 668 - .hashfn = ip6_key_hashfn, 669 - .obj_hashfn = ip6_obj_hashfn, 670 - .obj_cmpfn = ip6_obj_cmpfn, 701 + .hashfn = ip6frag_key_hashfn, 702 + .obj_hashfn = ip6frag_obj_hashfn, 703 + .obj_cmpfn = ip6frag_obj_cmpfn, 671 704 .automatic_shrinking = true, 672 705 }; 673 - EXPORT_SYMBOL(ip6_rhash_params); 674 706 675 707 int __init ipv6_frag_init(void) 676 708 { 677 709 int ret; 678 710 679 - ip6_frags.constructor = ip6_frag_init; 711 + ip6_frags.constructor = ip6frag_init; 680 712 ip6_frags.destructor = NULL; 681 713 ip6_frags.qsize = sizeof(struct frag_queue); 682 714 ip6_frags.frag_expire = ip6_frag_expire;

+7 -5

net/netfilter/Kconfig

··· 49 49 config NF_CONNTRACK 50 50 tristate "Netfilter connection tracking support" 51 51 default m if NETFILTER_ADVANCED=n 52 + select NF_DEFRAG_IPV4 53 + select NF_DEFRAG_IPV6 if IPV6 != n 52 54 help 53 55 Connection tracking keeps a record of what packets have passed 54 56 through your machine, in order to figure out how they are related ··· 617 615 tristate "Netfilter nf_tables socket match support" 618 616 depends on IPV6 || IPV6=n 619 617 select NF_SOCKET_IPV4 620 - select NF_SOCKET_IPV6 if IPV6 618 + select NF_SOCKET_IPV6 if NF_TABLES_IPV6 621 619 help 622 620 This option allows matching for the presence or absence of a 623 621 corresponding socket and its attributes. ··· 883 881 tristate "LOG target support" 884 882 select NF_LOG_COMMON 885 883 select NF_LOG_IPV4 886 - select NF_LOG_IPV6 if IPV6 884 + select NF_LOG_IPV6 if IP6_NF_IPTABLES 887 885 default m if NETFILTER_ADVANCED=n 888 886 help 889 887 This option adds a `LOG' target, which allows you to create rules in ··· 975 973 depends on IPV6 || IPV6=n 976 974 depends on !NF_CONNTRACK || NF_CONNTRACK 977 975 select NF_DUP_IPV4 978 - select NF_DUP_IPV6 if IPV6 976 + select NF_DUP_IPV6 if IP6_NF_IPTABLES 979 977 ---help--- 980 978 This option adds a "TEE" target with which a packet can be cloned and 981 979 this clone be rerouted to another nexthop. ··· 1483 1481 depends on NETFILTER_ADVANCED 1484 1482 depends on IPV6 || IPV6=n 1485 1483 depends on IP6_NF_IPTABLES || IP6_NF_IPTABLES=n 1486 - depends on NF_SOCKET_IPV4 1487 - depends on NF_SOCKET_IPV6 1484 + select NF_SOCKET_IPV4 1485 + select NF_SOCKET_IPV6 if IP6_NF_IPTABLES 1488 1486 select NF_DEFRAG_IPV4 1489 1487 select NF_DEFRAG_IPV6 if IP6_NF_IPTABLES != n 1490 1488 help

+6 -1

net/netfilter/Makefile

··· 1 1 # SPDX-License-Identifier: GPL-2.0 2 2 netfilter-objs := core.o nf_log.o nf_queue.o nf_sockopt.o utils.o 3 3 4 - nf_conntrack-y := nf_conntrack_core.o nf_conntrack_standalone.o nf_conntrack_expect.o nf_conntrack_helper.o nf_conntrack_proto.o nf_conntrack_l3proto_generic.o nf_conntrack_proto_generic.o nf_conntrack_proto_tcp.o nf_conntrack_proto_udp.o nf_conntrack_extend.o nf_conntrack_acct.o nf_conntrack_seqadj.o 4 + nf_conntrack-y := nf_conntrack_core.o nf_conntrack_standalone.o nf_conntrack_expect.o nf_conntrack_helper.o \ 5 + nf_conntrack_proto.o nf_conntrack_proto_generic.o nf_conntrack_proto_tcp.o nf_conntrack_proto_udp.o \ 6 + nf_conntrack_proto_icmp.o \ 7 + nf_conntrack_extend.o nf_conntrack_acct.o nf_conntrack_seqadj.o 8 + 9 + nf_conntrack-$(subst m,y,$(CONFIG_IPV6)) += nf_conntrack_proto_icmpv6.o 5 10 nf_conntrack-$(CONFIG_NF_CONNTRACK_TIMEOUT) += nf_conntrack_timeout.o 6 11 nf_conntrack-$(CONFIG_NF_CONNTRACK_TIMESTAMP) += nf_conntrack_timestamp.o 7 12 nf_conntrack-$(CONFIG_NF_CONNTRACK_EVENTS) += nf_conntrack_ecache.o

+43 -24

net/netfilter/ipvs/ip_vs_conn.c

··· 825 825 826 826 /* Unlink conn if not referenced anymore */ 827 827 if (likely(ip_vs_conn_unlink(cp))) { 828 + struct ip_vs_conn *ct = cp->control; 829 + 828 830 /* delete the timer if it is activated by other users */ 829 831 del_timer(&cp->timer); 830 832 831 833 /* does anybody control me? */ 832 - if (cp->control) 834 + if (ct) { 833 835 ip_vs_control_del(cp); 836 + /* Drop CTL or non-assured TPL if not used anymore */ 837 + if (!cp->timeout && !atomic_read(&ct->n_control) && 838 + (!(ct->flags & IP_VS_CONN_F_TEMPLATE) || 839 + !(ct->state & IP_VS_CTPL_S_ASSURED))) { 840 + IP_VS_DBG(4, "drop controlling connection\n"); 841 + ct->timeout = 0; 842 + ip_vs_conn_expire_now(ct); 843 + } 844 + } 834 845 835 846 if ((cp->flags & IP_VS_CONN_F_NFCT) && 836 847 !(cp->flags & IP_VS_CONN_F_ONE_PACKET)) { ··· 883 872 884 873 /* Modify timer, so that it expires as soon as possible. 885 874 * Can be called without reference only if under RCU lock. 875 + * We can have such chain of conns linked with ->control: DATA->CTL->TPL 876 + * - DATA (eg. FTP) and TPL (persistence) can be present depending on setup 877 + * - cp->timeout=0 indicates all conns from chain should be dropped but 878 + * TPL is not dropped if in assured state 886 879 */ 887 880 void ip_vs_conn_expire_now(struct ip_vs_conn *cp) 888 881 { ··· 1122 1107 &cp->caddr.in6, ntohs(cp->cport), 1123 1108 &cp->vaddr.in6, ntohs(cp->vport), 1124 1109 dbuf, ntohs(cp->dport), 1125 - ip_vs_state_name(cp->protocol, cp->state), 1110 + ip_vs_state_name(cp), 1126 1111 (cp->timer.expires-jiffies)/HZ, pe_data); 1127 1112 else 1128 1113 #endif ··· 1133 1118 ntohl(cp->caddr.ip), ntohs(cp->cport), 1134 1119 ntohl(cp->vaddr.ip), ntohs(cp->vport), 1135 1120 dbuf, ntohs(cp->dport), 1136 - ip_vs_state_name(cp->protocol, cp->state), 1121 + ip_vs_state_name(cp), 1137 1122 (cp->timer.expires-jiffies)/HZ, pe_data); 1138 1123 } 1139 1124 return 0; ··· 1184 1169 &cp->caddr.in6, ntohs(cp->cport), 1185 1170 &cp->vaddr.in6, ntohs(cp->vport), 1186 1171 dbuf, ntohs(cp->dport), 1187 - ip_vs_state_name(cp->protocol, cp->state), 1172 + ip_vs_state_name(cp), 1188 1173 ip_vs_origin_name(cp->flags), 1189 1174 (cp->timer.expires-jiffies)/HZ); 1190 1175 else ··· 1196 1181 ntohl(cp->caddr.ip), ntohs(cp->cport), 1197 1182 ntohl(cp->vaddr.ip), ntohs(cp->vport), 1198 1183 dbuf, ntohs(cp->dport), 1199 - ip_vs_state_name(cp->protocol, cp->state), 1184 + ip_vs_state_name(cp), 1200 1185 ip_vs_origin_name(cp->flags), 1201 1186 (cp->timer.expires-jiffies)/HZ); 1202 1187 } ··· 1212 1197 #endif 1213 1198 1214 1199 1215 - /* 1216 - * Randomly drop connection entries before running out of memory 1200 + /* Randomly drop connection entries before running out of memory 1201 + * Can be used for DATA and CTL conns. For TPL conns there are exceptions: 1202 + * - traffic for services in OPS mode increases ct->in_pkts, so it is supported 1203 + * - traffic for services not in OPS mode does not increase ct->in_pkts in 1204 + * all cases, so it is not supported 1217 1205 */ 1218 1206 static inline int todrop_entry(struct ip_vs_conn *cp) 1219 1207 { ··· 1260 1242 void ip_vs_random_dropentry(struct netns_ipvs *ipvs) 1261 1243 { 1262 1244 int idx; 1263 - struct ip_vs_conn *cp, *cp_c; 1245 + struct ip_vs_conn *cp; 1264 1246 1265 1247 rcu_read_lock(); 1266 1248 /* ··· 1272 1254 hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[hash], c_list) { 1273 1255 if (cp->ipvs != ipvs) 1274 1256 continue; 1257 + if (atomic_read(&cp->n_control)) 1258 + continue; 1275 1259 if (cp->flags & IP_VS_CONN_F_TEMPLATE) { 1276 - if (atomic_read(&cp->n_control) || 1277 - !ip_vs_conn_ops_mode(cp)) 1278 - continue; 1279 - else 1280 - /* connection template of OPS */ 1260 + /* connection template of OPS */ 1261 + if (ip_vs_conn_ops_mode(cp)) 1281 1262 goto try_drop; 1263 + if (!(cp->state & IP_VS_CTPL_S_ASSURED)) 1264 + goto drop; 1265 + continue; 1282 1266 } 1283 1267 if (cp->protocol == IPPROTO_TCP) { 1284 1268 switch(cp->state) { ··· 1314 1294 continue; 1315 1295 } 1316 1296 1317 - IP_VS_DBG(4, "del connection\n"); 1297 + drop: 1298 + IP_VS_DBG(4, "drop connection\n"); 1299 + cp->timeout = 0; 1318 1300 ip_vs_conn_expire_now(cp); 1319 - cp_c = cp->control; 1320 - /* cp->control is valid only with reference to cp */ 1321 - if (cp_c && __ip_vs_conn_get(cp)) { 1322 - IP_VS_DBG(4, "del conn template\n"); 1323 - ip_vs_conn_expire_now(cp_c); 1324 - __ip_vs_conn_put(cp); 1325 - } 1326 1301 } 1327 1302 cond_resched_rcu(); 1328 1303 } ··· 1340 1325 hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[idx], c_list) { 1341 1326 if (cp->ipvs != ipvs) 1342 1327 continue; 1343 - IP_VS_DBG(4, "del connection\n"); 1344 - ip_vs_conn_expire_now(cp); 1328 + /* As timers are expired in LIFO order, restart 1329 + * the timer of controlling connection first, so 1330 + * that it is expired after us. 1331 + */ 1345 1332 cp_c = cp->control; 1346 1333 /* cp->control is valid only with reference to cp */ 1347 1334 if (cp_c && __ip_vs_conn_get(cp)) { 1348 - IP_VS_DBG(4, "del conn template\n"); 1335 + IP_VS_DBG(4, "del controlling connection\n"); 1349 1336 ip_vs_conn_expire_now(cp_c); 1350 1337 __ip_vs_conn_put(cp); 1351 1338 } 1339 + IP_VS_DBG(4, "del connection\n"); 1340 + ip_vs_conn_expire_now(cp); 1352 1341 } 1353 1342 cond_resched_rcu(); 1354 1343 }

+16 -3

net/netfilter/ipvs/ip_vs_proto.c

··· 42 42 43 43 static struct ip_vs_protocol *ip_vs_proto_table[IP_VS_PROTO_TAB_SIZE]; 44 44 45 + /* States for conn templates: NONE or words separated with ",", max 15 chars */ 46 + static const char *ip_vs_ctpl_state_name_table[IP_VS_CTPL_S_LAST] = { 47 + [IP_VS_CTPL_S_NONE] = "NONE", 48 + [IP_VS_CTPL_S_ASSURED] = "ASSURED", 49 + }; 45 50 46 51 /* 47 52 * register an ipvs protocol ··· 198 193 } 199 194 200 195 201 - const char * ip_vs_state_name(__u16 proto, int state) 196 + const char *ip_vs_state_name(const struct ip_vs_conn *cp) 202 197 { 203 - struct ip_vs_protocol *pp = ip_vs_proto_get(proto); 198 + unsigned int state = cp->state; 199 + struct ip_vs_protocol *pp; 204 200 201 + if (cp->flags & IP_VS_CONN_F_TEMPLATE) { 202 + 203 + if (state >= IP_VS_CTPL_S_LAST) 204 + return "ERR!"; 205 + return ip_vs_ctpl_state_name_table[state] ? : "?"; 206 + } 207 + pp = ip_vs_proto_get(cp->protocol); 205 208 if (pp == NULL || pp->state_name == NULL) 206 - return (IPPROTO_IP == proto) ? "NONE" : "ERR!"; 209 + return (cp->protocol == IPPROTO_IP) ? "NONE" : "ERR!"; 207 210 return pp->state_name(state); 208 211 } 209 212

+2

net/netfilter/ipvs/ip_vs_proto_sctp.c

··· 461 461 cp->flags &= ~IP_VS_CONN_F_INACTIVE; 462 462 } 463 463 } 464 + if (next_state == IP_VS_SCTP_S_ESTABLISHED) 465 + ip_vs_control_assure_ct(cp); 464 466 } 465 467 if (likely(pd)) 466 468 cp->timeout = pd->timeout_table[cp->state = next_state];

+2

net/netfilter/ipvs/ip_vs_proto_tcp.c

··· 569 569 cp->flags &= ~IP_VS_CONN_F_INACTIVE; 570 570 } 571 571 } 572 + if (new_state == IP_VS_TCP_S_ESTABLISHED) 573 + ip_vs_control_assure_ct(cp); 572 574 } 573 575 574 576 if (likely(pd))

+2

net/netfilter/ipvs/ip_vs_proto_udp.c

··· 460 460 } 461 461 462 462 cp->timeout = pd->timeout_table[IP_VS_UDP_S_NORMAL]; 463 + if (direction == IP_VS_DIR_OUTPUT) 464 + ip_vs_control_assure_ct(cp); 463 465 } 464 466 465 467 static int __udp_init(struct netns_ipvs *ipvs, struct ip_vs_proto_data *pd)

+6 -12

net/netfilter/ipvs/ip_vs_sync.c

··· 1003 1003 continue; 1004 1004 } 1005 1005 } else { 1006 - /* protocol in templates is not used for state/timeout */ 1007 - if (state > 0) { 1008 - IP_VS_DBG(2, "BACKUP v0, Invalid template state %u\n", 1009 - state); 1010 - state = 0; 1011 - } 1006 + if (state >= IP_VS_CTPL_S_LAST) 1007 + IP_VS_DBG(7, "BACKUP v0, Invalid tpl state %u\n", 1008 + state); 1012 1009 } 1013 1010 1014 1011 ip_vs_conn_fill_param(ipvs, AF_INET, s->protocol, ··· 1163 1166 goto out; 1164 1167 } 1165 1168 } else { 1166 - /* protocol in templates is not used for state/timeout */ 1167 - if (state > 0) { 1168 - IP_VS_DBG(3, "BACKUP, Invalid template state %u\n", 1169 - state); 1170 - state = 0; 1171 - } 1169 + if (state >= IP_VS_CTPL_S_LAST) 1170 + IP_VS_DBG(7, "BACKUP, Invalid tpl state %u\n", 1171 + state); 1172 1172 } 1173 1173 if (ip_vs_conn_fill_param_sync(ipvs, af, s, &param, pe_data, 1174 1174 pe_data_len, pe_name, pe_name_len)) {

+300 -88

net/netfilter/nf_conncount.c

··· 44 44 45 45 /* we will save the tuples of all connections we care about */ 46 46 struct nf_conncount_tuple { 47 - struct hlist_node node; 47 + struct list_head node; 48 48 struct nf_conntrack_tuple tuple; 49 49 struct nf_conntrack_zone zone; 50 50 int cpu; 51 51 u32 jiffies32; 52 + struct rcu_head rcu_head; 52 53 }; 53 54 54 55 struct nf_conncount_rb { 55 56 struct rb_node node; 56 - struct hlist_head hhead; /* connections/hosts in same subnet */ 57 + struct nf_conncount_list list; 57 58 u32 key[MAX_KEYLEN]; 59 + struct rcu_head rcu_head; 58 60 }; 59 61 60 62 static spinlock_t nf_conncount_locks[CONNCOUNT_LOCK_SLOTS] __cacheline_aligned_in_smp; ··· 64 62 struct nf_conncount_data { 65 63 unsigned int keylen; 66 64 struct rb_root root[CONNCOUNT_SLOTS]; 65 + struct net *net; 66 + struct work_struct gc_work; 67 + unsigned long pending_trees[BITS_TO_LONGS(CONNCOUNT_SLOTS)]; 68 + unsigned int gc_tree; 67 69 }; 68 70 69 71 static u_int32_t conncount_rnd __read_mostly; ··· 88 82 return memcmp(a, b, klen * sizeof(u32)); 89 83 } 90 84 91 - bool nf_conncount_add(struct hlist_head *head, 92 - const struct nf_conntrack_tuple *tuple, 93 - const struct nf_conntrack_zone *zone) 85 + enum nf_conncount_list_add 86 + nf_conncount_add(struct nf_conncount_list *list, 87 + const struct nf_conntrack_tuple *tuple, 88 + const struct nf_conntrack_zone *zone) 94 89 { 95 90 struct nf_conncount_tuple *conn; 96 91 92 + if (WARN_ON_ONCE(list->count > INT_MAX)) 93 + return NF_CONNCOUNT_ERR; 94 + 97 95 conn = kmem_cache_alloc(conncount_conn_cachep, GFP_ATOMIC); 98 96 if (conn == NULL) 99 - return false; 97 + return NF_CONNCOUNT_ERR; 98 + 100 99 conn->tuple = *tuple; 101 100 conn->zone = *zone; 102 101 conn->cpu = raw_smp_processor_id(); 103 102 conn->jiffies32 = (u32)jiffies; 104 - hlist_add_head(&conn->node, head); 105 - return true; 103 + spin_lock(&list->list_lock); 104 + if (list->dead == true) { 105 + kmem_cache_free(conncount_conn_cachep, conn); 106 + spin_unlock(&list->list_lock); 107 + return NF_CONNCOUNT_SKIP; 108 + } 109 + list_add_tail(&conn->node, &list->head); 110 + list->count++; 111 + spin_unlock(&list->list_lock); 112 + return NF_CONNCOUNT_ADDED; 106 113 } 107 114 EXPORT_SYMBOL_GPL(nf_conncount_add); 108 115 116 + static void __conn_free(struct rcu_head *h) 117 + { 118 + struct nf_conncount_tuple *conn; 119 + 120 + conn = container_of(h, struct nf_conncount_tuple, rcu_head); 121 + kmem_cache_free(conncount_conn_cachep, conn); 122 + } 123 + 124 + static bool conn_free(struct nf_conncount_list *list, 125 + struct nf_conncount_tuple *conn) 126 + { 127 + bool free_entry = false; 128 + 129 + spin_lock(&list->list_lock); 130 + 131 + if (list->count == 0) { 132 + spin_unlock(&list->list_lock); 133 + return free_entry; 134 + } 135 + 136 + list->count--; 137 + list_del_rcu(&conn->node); 138 + if (list->count == 0) 139 + free_entry = true; 140 + 141 + spin_unlock(&list->list_lock); 142 + call_rcu(&conn->rcu_head, __conn_free); 143 + return free_entry; 144 + } 145 + 109 146 static const struct nf_conntrack_tuple_hash * 110 - find_or_evict(struct net *net, struct nf_conncount_tuple *conn) 147 + find_or_evict(struct net *net, struct nf_conncount_list *list, 148 + struct nf_conncount_tuple *conn, bool *free_entry) 111 149 { 112 150 const struct nf_conntrack_tuple_hash *found; 113 151 unsigned long a, b; ··· 171 121 */ 172 122 age = a - b; 173 123 if (conn->cpu == cpu || age >= 2) { 174 - hlist_del(&conn->node); 175 - kmem_cache_free(conncount_conn_cachep, conn); 124 + *free_entry = conn_free(list, conn); 176 125 return ERR_PTR(-ENOENT); 177 126 } 178 127 179 128 return ERR_PTR(-EAGAIN); 180 129 } 181 130 182 - unsigned int nf_conncount_lookup(struct net *net, struct hlist_head *head, 183 - const struct nf_conntrack_tuple *tuple, 184 - const struct nf_conntrack_zone *zone, 185 - bool *addit) 131 + void nf_conncount_lookup(struct net *net, 132 + struct nf_conncount_list *list, 133 + const struct nf_conntrack_tuple *tuple, 134 + const struct nf_conntrack_zone *zone, 135 + bool *addit) 186 136 { 187 137 const struct nf_conntrack_tuple_hash *found; 188 - struct nf_conncount_tuple *conn; 138 + struct nf_conncount_tuple *conn, *conn_n; 189 139 struct nf_conn *found_ct; 190 - struct hlist_node *n; 191 - unsigned int length = 0; 140 + unsigned int collect = 0; 141 + bool free_entry = false; 192 142 143 + /* best effort only */ 193 144 *addit = tuple ? true : false; 194 145 195 146 /* check the saved connections */ 196 - hlist_for_each_entry_safe(conn, n, head, node) { 197 - found = find_or_evict(net, conn); 147 + list_for_each_entry_safe(conn, conn_n, &list->head, node) { 148 + if (collect > CONNCOUNT_GC_MAX_NODES) 149 + break; 150 + 151 + found = find_or_evict(net, list, conn, &free_entry); 198 152 if (IS_ERR(found)) { 199 153 /* Not found, but might be about to be confirmed */ 200 154 if (PTR_ERR(found) == -EAGAIN) { 201 - length++; 202 155 if (!tuple) 203 156 continue; 204 157 ··· 209 156 nf_ct_zone_id(&conn->zone, conn->zone.dir) == 210 157 nf_ct_zone_id(zone, zone->dir)) 211 158 *addit = false; 212 - } 159 + } else if (PTR_ERR(found) == -ENOENT) 160 + collect++; 213 161 continue; 214 162 } 215 163 ··· 219 165 if (tuple && nf_ct_tuple_equal(&conn->tuple, tuple) && 220 166 nf_ct_zone_equal(found_ct, zone, zone->dir)) { 221 167 /* 222 - * Just to be sure we have it only once in the list. 223 168 * We should not see tuples twice unless someone hooks 224 169 * this into a table without "-p tcp --syn". 170 + * 171 + * Attempt to avoid a re-add in this case. 225 172 */ 226 173 *addit = false; 227 174 } else if (already_closed(found_ct)) { ··· 231 176 * closed already -> ditch it 232 177 */ 233 178 nf_ct_put(found_ct); 234 - hlist_del(&conn->node); 235 - kmem_cache_free(conncount_conn_cachep, conn); 179 + conn_free(list, conn); 180 + collect++; 236 181 continue; 237 182 } 238 183 239 184 nf_ct_put(found_ct); 240 - length++; 241 185 } 242 - 243 - return length; 244 186 } 245 187 EXPORT_SYMBOL_GPL(nf_conncount_lookup); 188 + 189 + void nf_conncount_list_init(struct nf_conncount_list *list) 190 + { 191 + spin_lock_init(&list->list_lock); 192 + INIT_LIST_HEAD(&list->head); 193 + list->count = 1; 194 + list->dead = false; 195 + } 196 + EXPORT_SYMBOL_GPL(nf_conncount_list_init); 197 + 198 + /* Return true if the list is empty */ 199 + bool nf_conncount_gc_list(struct net *net, 200 + struct nf_conncount_list *list) 201 + { 202 + const struct nf_conntrack_tuple_hash *found; 203 + struct nf_conncount_tuple *conn, *conn_n; 204 + struct nf_conn *found_ct; 205 + unsigned int collected = 0; 206 + bool free_entry = false; 207 + 208 + list_for_each_entry_safe(conn, conn_n, &list->head, node) { 209 + found = find_or_evict(net, list, conn, &free_entry); 210 + if (IS_ERR(found)) { 211 + if (PTR_ERR(found) == -ENOENT) { 212 + if (free_entry) 213 + return true; 214 + collected++; 215 + } 216 + continue; 217 + } 218 + 219 + found_ct = nf_ct_tuplehash_to_ctrack(found); 220 + if (already_closed(found_ct)) { 221 + /* 222 + * we do not care about connections which are 223 + * closed already -> ditch it 224 + */ 225 + nf_ct_put(found_ct); 226 + if (conn_free(list, conn)) 227 + return true; 228 + collected++; 229 + continue; 230 + } 231 + 232 + nf_ct_put(found_ct); 233 + if (collected > CONNCOUNT_GC_MAX_NODES) 234 + return false; 235 + } 236 + return false; 237 + } 238 + EXPORT_SYMBOL_GPL(nf_conncount_gc_list); 239 + 240 + static void __tree_nodes_free(struct rcu_head *h) 241 + { 242 + struct nf_conncount_rb *rbconn; 243 + 244 + rbconn = container_of(h, struct nf_conncount_rb, rcu_head); 245 + kmem_cache_free(conncount_rb_cachep, rbconn); 246 + } 246 247 247 248 static void tree_nodes_free(struct rb_root *root, 248 249 struct nf_conncount_rb *gc_nodes[], ··· 308 197 309 198 while (gc_count) { 310 199 rbconn = gc_nodes[--gc_count]; 311 - rb_erase(&rbconn->node, root); 312 - kmem_cache_free(conncount_rb_cachep, rbconn); 200 + spin_lock(&rbconn->list.list_lock); 201 + if (rbconn->list.count == 0 && rbconn->list.dead == false) { 202 + rbconn->list.dead = true; 203 + rb_erase(&rbconn->node, root); 204 + call_rcu(&rbconn->rcu_head, __tree_nodes_free); 205 + } 206 + spin_unlock(&rbconn->list.list_lock); 313 207 } 314 208 } 315 209 316 - static unsigned int 317 - count_tree(struct net *net, struct rb_root *root, 318 - const u32 *key, u8 keylen, 319 - const struct nf_conntrack_tuple *tuple, 320 - const struct nf_conntrack_zone *zone) 210 + static void schedule_gc_worker(struct nf_conncount_data *data, int tree) 321 211 { 212 + set_bit(tree, data->pending_trees); 213 + schedule_work(&data->gc_work); 214 + } 215 + 216 + static unsigned int 217 + insert_tree(struct net *net, 218 + struct nf_conncount_data *data, 219 + struct rb_root *root, 220 + unsigned int hash, 221 + const u32 *key, 222 + u8 keylen, 223 + const struct nf_conntrack_tuple *tuple, 224 + const struct nf_conntrack_zone *zone) 225 + { 226 + enum nf_conncount_list_add ret; 322 227 struct nf_conncount_rb *gc_nodes[CONNCOUNT_GC_MAX_NODES]; 323 228 struct rb_node **rbnode, *parent; 324 229 struct nf_conncount_rb *rbconn; 325 230 struct nf_conncount_tuple *conn; 326 - unsigned int gc_count; 327 - bool no_gc = false; 231 + unsigned int count = 0, gc_count = 0; 232 + bool node_found = false; 328 233 329 - restart: 330 - gc_count = 0; 234 + spin_lock_bh(&nf_conncount_locks[hash % CONNCOUNT_LOCK_SLOTS]); 235 + 331 236 parent = NULL; 332 237 rbnode = &(root->rb_node); 333 238 while (*rbnode) { 334 239 int diff; 335 - bool addit; 336 - 337 240 rbconn = rb_entry(*rbnode, struct nf_conncount_rb, node); 338 241 339 242 parent = *rbnode; ··· 357 232 } else if (diff > 0) { 358 233 rbnode = &((*rbnode)->rb_right); 359 234 } else { 360 - /* same source network -> be counted! */ 361 - unsigned int count; 362 - 363 - count = nf_conncount_lookup(net, &rbconn->hhead, tuple, 364 - zone, &addit); 365 - 366 - tree_nodes_free(root, gc_nodes, gc_count); 367 - if (!addit) 368 - return count; 369 - 370 - if (!nf_conncount_add(&rbconn->hhead, tuple, zone)) 371 - return 0; /* hotdrop */ 372 - 373 - return count + 1; 235 + /* unlikely: other cpu added node already */ 236 + node_found = true; 237 + ret = nf_conncount_add(&rbconn->list, tuple, zone); 238 + if (ret == NF_CONNCOUNT_ERR) { 239 + count = 0; /* hotdrop */ 240 + } else if (ret == NF_CONNCOUNT_ADDED) { 241 + count = rbconn->list.count; 242 + } else { 243 + /* NF_CONNCOUNT_SKIP, rbconn is already 244 + * reclaimed by gc, insert a new tree node 245 + */ 246 + node_found = false; 247 + } 248 + break; 374 249 } 375 250 376 - if (no_gc || gc_count >= ARRAY_SIZE(gc_nodes)) 251 + if (gc_count >= ARRAY_SIZE(gc_nodes)) 377 252 continue; 378 253 379 - /* only used for GC on hhead, retval and 'addit' ignored */ 380 - nf_conncount_lookup(net, &rbconn->hhead, tuple, zone, &addit); 381 - if (hlist_empty(&rbconn->hhead)) 254 + if (nf_conncount_gc_list(net, &rbconn->list)) 382 255 gc_nodes[gc_count++] = rbconn; 383 256 } 384 257 385 258 if (gc_count) { 386 - no_gc = true; 387 259 tree_nodes_free(root, gc_nodes, gc_count); 388 260 /* tree_node_free before new allocation permits 389 261 * allocator to re-use newly free'd object. ··· 388 266 * This is a rare event; in most cases we will find 389 267 * existing node to re-use. (or gc_count is 0). 390 268 */ 391 - goto restart; 269 + 270 + if (gc_count >= ARRAY_SIZE(gc_nodes)) 271 + schedule_gc_worker(data, hash); 392 272 } 393 273 394 - if (!tuple) 395 - return 0; 274 + if (node_found) 275 + goto out_unlock; 396 276 397 - /* no match, need to insert new node */ 277 + /* expected case: match, insert new node */ 398 278 rbconn = kmem_cache_alloc(conncount_rb_cachep, GFP_ATOMIC); 399 279 if (rbconn == NULL) 400 - return 0; 280 + goto out_unlock; 401 281 402 282 conn = kmem_cache_alloc(conncount_conn_cachep, GFP_ATOMIC); 403 283 if (conn == NULL) { 404 284 kmem_cache_free(conncount_rb_cachep, rbconn); 405 - return 0; 285 + goto out_unlock; 406 286 } 407 287 408 288 conn->tuple = *tuple; 409 289 conn->zone = *zone; 410 290 memcpy(rbconn->key, key, sizeof(u32) * keylen); 411 291 412 - INIT_HLIST_HEAD(&rbconn->hhead); 413 - hlist_add_head(&conn->node, &rbconn->hhead); 292 + nf_conncount_list_init(&rbconn->list); 293 + list_add(&conn->node, &rbconn->list.head); 294 + count = 1; 414 295 415 296 rb_link_node(&rbconn->node, parent, rbnode); 416 297 rb_insert_color(&rbconn->node, root); 417 - return 1; 298 + out_unlock: 299 + spin_unlock_bh(&nf_conncount_locks[hash % CONNCOUNT_LOCK_SLOTS]); 300 + return count; 301 + } 302 + 303 + static unsigned int 304 + count_tree(struct net *net, 305 + struct nf_conncount_data *data, 306 + const u32 *key, 307 + const struct nf_conntrack_tuple *tuple, 308 + const struct nf_conntrack_zone *zone) 309 + { 310 + enum nf_conncount_list_add ret; 311 + struct rb_root *root; 312 + struct rb_node *parent; 313 + struct nf_conncount_rb *rbconn; 314 + unsigned int hash; 315 + u8 keylen = data->keylen; 316 + 317 + hash = jhash2(key, data->keylen, conncount_rnd) % CONNCOUNT_SLOTS; 318 + root = &data->root[hash]; 319 + 320 + parent = rcu_dereference_raw(root->rb_node); 321 + while (parent) { 322 + int diff; 323 + bool addit; 324 + 325 + rbconn = rb_entry(parent, struct nf_conncount_rb, node); 326 + 327 + diff = key_diff(key, rbconn->key, keylen); 328 + if (diff < 0) { 329 + parent = rcu_dereference_raw(parent->rb_left); 330 + } else if (diff > 0) { 331 + parent = rcu_dereference_raw(parent->rb_right); 332 + } else { 333 + /* same source network -> be counted! */ 334 + nf_conncount_lookup(net, &rbconn->list, tuple, zone, 335 + &addit); 336 + 337 + if (!addit) 338 + return rbconn->list.count; 339 + 340 + ret = nf_conncount_add(&rbconn->list, tuple, zone); 341 + if (ret == NF_CONNCOUNT_ERR) { 342 + return 0; /* hotdrop */ 343 + } else if (ret == NF_CONNCOUNT_ADDED) { 344 + return rbconn->list.count; 345 + } else { 346 + /* NF_CONNCOUNT_SKIP, rbconn is already 347 + * reclaimed by gc, insert a new tree node 348 + */ 349 + break; 350 + } 351 + } 352 + } 353 + 354 + if (!tuple) 355 + return 0; 356 + 357 + return insert_tree(net, data, root, hash, key, keylen, tuple, zone); 358 + } 359 + 360 + static void tree_gc_worker(struct work_struct *work) 361 + { 362 + struct nf_conncount_data *data = container_of(work, struct nf_conncount_data, gc_work); 363 + struct nf_conncount_rb *gc_nodes[CONNCOUNT_GC_MAX_NODES], *rbconn; 364 + struct rb_root *root; 365 + struct rb_node *node; 366 + unsigned int tree, next_tree, gc_count = 0; 367 + 368 + tree = data->gc_tree % CONNCOUNT_LOCK_SLOTS; 369 + root = &data->root[tree]; 370 + 371 + rcu_read_lock(); 372 + for (node = rb_first(root); node != NULL; node = rb_next(node)) { 373 + rbconn = rb_entry(node, struct nf_conncount_rb, node); 374 + if (nf_conncount_gc_list(data->net, &rbconn->list)) 375 + gc_nodes[gc_count++] = rbconn; 376 + } 377 + rcu_read_unlock(); 378 + 379 + spin_lock_bh(&nf_conncount_locks[tree]); 380 + 381 + if (gc_count) { 382 + tree_nodes_free(root, gc_nodes, gc_count); 383 + } 384 + 385 + clear_bit(tree, data->pending_trees); 386 + 387 + next_tree = (tree + 1) % CONNCOUNT_SLOTS; 388 + next_tree = find_next_bit(data->pending_trees, next_tree, CONNCOUNT_SLOTS); 389 + 390 + if (next_tree < CONNCOUNT_SLOTS) { 391 + data->gc_tree = next_tree; 392 + schedule_work(work); 393 + } 394 + 395 + spin_unlock_bh(&nf_conncount_locks[tree]); 418 396 } 419 397 420 398 /* Count and return number of conntrack entries in 'net' with particular 'key'. 421 399 * If 'tuple' is not null, insert it into the accounting data structure. 400 + * Call with RCU read lock. 422 401 */ 423 402 unsigned int nf_conncount_count(struct net *net, 424 403 struct nf_conncount_data *data, ··· 527 304 const struct nf_conntrack_tuple *tuple, 528 305 const struct nf_conntrack_zone *zone) 529 306 { 530 - struct rb_root *root; 531 - int count; 532 - u32 hash; 533 - 534 - hash = jhash2(key, data->keylen, conncount_rnd) % CONNCOUNT_SLOTS; 535 - root = &data->root[hash]; 536 - 537 - spin_lock_bh(&nf_conncount_locks[hash % CONNCOUNT_LOCK_SLOTS]); 538 - 539 - count = count_tree(net, root, key, data->keylen, tuple, zone); 540 - 541 - spin_unlock_bh(&nf_conncount_locks[hash % CONNCOUNT_LOCK_SLOTS]); 542 - 543 - return count; 307 + return count_tree(net, data, key, tuple, zone); 544 308 } 545 309 EXPORT_SYMBOL_GPL(nf_conncount_count); 546 310 ··· 558 348 data->root[i] = RB_ROOT; 559 349 560 350 data->keylen = keylen / sizeof(u32); 351 + data->net = net; 352 + INIT_WORK(&data->gc_work, tree_gc_worker); 561 353 562 354 return data; 563 355 } 564 356 EXPORT_SYMBOL_GPL(nf_conncount_init); 565 357 566 - void nf_conncount_cache_free(struct hlist_head *hhead) 358 + void nf_conncount_cache_free(struct nf_conncount_list *list) 567 359 { 568 - struct nf_conncount_tuple *conn; 569 - struct hlist_node *n; 360 + struct nf_conncount_tuple *conn, *conn_n; 570 361 571 - hlist_for_each_entry_safe(conn, n, hhead, node) 362 + list_for_each_entry_safe(conn, conn_n, &list->head, node) 572 363 kmem_cache_free(conncount_conn_cachep, conn); 573 364 } 574 365 EXPORT_SYMBOL_GPL(nf_conncount_cache_free); ··· 584 373 585 374 rb_erase(node, r); 586 375 587 - nf_conncount_cache_free(&rbconn->hhead); 376 + nf_conncount_cache_free(&rbconn->list); 588 377 589 378 kmem_cache_free(conncount_rb_cachep, rbconn); 590 379 } ··· 595 384 { 596 385 unsigned int i; 597 386 387 + cancel_work_sync(&data->gc_work); 598 388 nf_ct_netns_put(net, family); 599 389 600 390 for (i = 0; i < ARRAY_SIZE(data->root); ++i)

+185 -67

net/netfilter/nf_conntrack_core.c

··· 37 37 #include <linux/rculist_nulls.h> 38 38 39 39 #include <net/netfilter/nf_conntrack.h> 40 - #include <net/netfilter/nf_conntrack_l3proto.h> 41 40 #include <net/netfilter/nf_conntrack_l4proto.h> 42 41 #include <net/netfilter/nf_conntrack_expect.h> 43 42 #include <net/netfilter/nf_conntrack_helper.h> ··· 54 55 #include <net/netfilter/nf_nat_core.h> 55 56 #include <net/netfilter/nf_nat_helper.h> 56 57 #include <net/netns/hash.h> 58 + #include <net/ip.h> 57 59 58 60 #include "nf_internals.h" 59 61 ··· 222 222 return scale_hash(hash_conntrack_raw(tuple, net)); 223 223 } 224 224 225 - bool 225 + static bool 226 226 nf_ct_get_tuple(const struct sk_buff *skb, 227 227 unsigned int nhoff, 228 228 unsigned int dataoff, ··· 230 230 u_int8_t protonum, 231 231 struct net *net, 232 232 struct nf_conntrack_tuple *tuple, 233 - const struct nf_conntrack_l3proto *l3proto, 234 233 const struct nf_conntrack_l4proto *l4proto) 235 234 { 235 + unsigned int size; 236 + const __be32 *ap; 237 + __be32 _addrs[8]; 238 + struct { 239 + __be16 sport; 240 + __be16 dport; 241 + } _inet_hdr, *inet_hdr; 242 + 236 243 memset(tuple, 0, sizeof(*tuple)); 237 244 238 245 tuple->src.l3num = l3num; 239 - if (l3proto->pkt_to_tuple(skb, nhoff, tuple) == 0) 246 + switch (l3num) { 247 + case NFPROTO_IPV4: 248 + nhoff += offsetof(struct iphdr, saddr); 249 + size = 2 * sizeof(__be32); 250 + break; 251 + case NFPROTO_IPV6: 252 + nhoff += offsetof(struct ipv6hdr, saddr); 253 + size = sizeof(_addrs); 254 + break; 255 + default: 256 + return true; 257 + } 258 + 259 + ap = skb_header_pointer(skb, nhoff, size, _addrs); 260 + if (!ap) 240 261 return false; 262 + 263 + switch (l3num) { 264 + case NFPROTO_IPV4: 265 + tuple->src.u3.ip = ap[0]; 266 + tuple->dst.u3.ip = ap[1]; 267 + break; 268 + case NFPROTO_IPV6: 269 + memcpy(tuple->src.u3.ip6, ap, sizeof(tuple->src.u3.ip6)); 270 + memcpy(tuple->dst.u3.ip6, ap + 4, sizeof(tuple->dst.u3.ip6)); 271 + break; 272 + } 241 273 242 274 tuple->dst.protonum = protonum; 243 275 tuple->dst.dir = IP_CT_DIR_ORIGINAL; 244 276 245 - return l4proto->pkt_to_tuple(skb, dataoff, net, tuple); 277 + if (unlikely(l4proto->pkt_to_tuple)) 278 + return l4proto->pkt_to_tuple(skb, dataoff, net, tuple); 279 + 280 + /* Actually only need first 4 bytes to get ports. */ 281 + inet_hdr = skb_header_pointer(skb, dataoff, sizeof(_inet_hdr), &_inet_hdr); 282 + if (!inet_hdr) 283 + return false; 284 + 285 + tuple->src.u.udp.port = inet_hdr->sport; 286 + tuple->dst.u.udp.port = inet_hdr->dport; 287 + return true; 246 288 } 247 - EXPORT_SYMBOL_GPL(nf_ct_get_tuple); 289 + 290 + static int ipv4_get_l4proto(const struct sk_buff *skb, unsigned int nhoff, 291 + u_int8_t *protonum) 292 + { 293 + int dataoff = -1; 294 + const struct iphdr *iph; 295 + struct iphdr _iph; 296 + 297 + iph = skb_header_pointer(skb, nhoff, sizeof(_iph), &_iph); 298 + if (!iph) 299 + return -1; 300 + 301 + /* Conntrack defragments packets, we might still see fragments 302 + * inside ICMP packets though. 303 + */ 304 + if (iph->frag_off & htons(IP_OFFSET)) 305 + return -1; 306 + 307 + dataoff = nhoff + (iph->ihl << 2); 308 + *protonum = iph->protocol; 309 + 310 + /* Check bogus IP headers */ 311 + if (dataoff > skb->len) { 312 + pr_debug("bogus IPv4 packet: nhoff %u, ihl %u, skblen %u\n", 313 + nhoff, iph->ihl << 2, skb->len); 314 + return -1; 315 + } 316 + return dataoff; 317 + } 318 + 319 + #if IS_ENABLED(CONFIG_IPV6) 320 + static int ipv6_get_l4proto(const struct sk_buff *skb, unsigned int nhoff, 321 + u8 *protonum) 322 + { 323 + int protoff = -1; 324 + unsigned int extoff = nhoff + sizeof(struct ipv6hdr); 325 + __be16 frag_off; 326 + u8 nexthdr; 327 + 328 + if (skb_copy_bits(skb, nhoff + offsetof(struct ipv6hdr, nexthdr), 329 + &nexthdr, sizeof(nexthdr)) != 0) { 330 + pr_debug("can't get nexthdr\n"); 331 + return -1; 332 + } 333 + protoff = ipv6_skip_exthdr(skb, extoff, &nexthdr, &frag_off); 334 + /* 335 + * (protoff == skb->len) means the packet has not data, just 336 + * IPv6 and possibly extensions headers, but it is tracked anyway 337 + */ 338 + if (protoff < 0 || (frag_off & htons(~0x7)) != 0) { 339 + pr_debug("can't find proto in pkt\n"); 340 + return -1; 341 + } 342 + 343 + *protonum = nexthdr; 344 + return protoff; 345 + } 346 + #endif 347 + 348 + static int get_l4proto(const struct sk_buff *skb, 349 + unsigned int nhoff, u8 pf, u8 *l4num) 350 + { 351 + switch (pf) { 352 + case NFPROTO_IPV4: 353 + return ipv4_get_l4proto(skb, nhoff, l4num); 354 + #if IS_ENABLED(CONFIG_IPV6) 355 + case NFPROTO_IPV6: 356 + return ipv6_get_l4proto(skb, nhoff, l4num); 357 + #endif 358 + default: 359 + *l4num = 0; 360 + break; 361 + } 362 + return -1; 363 + } 248 364 249 365 bool nf_ct_get_tuplepr(const struct sk_buff *skb, unsigned int nhoff, 250 366 u_int16_t l3num, 251 367 struct net *net, struct nf_conntrack_tuple *tuple) 252 368 { 253 - const struct nf_conntrack_l3proto *l3proto; 254 369 const struct nf_conntrack_l4proto *l4proto; 255 - unsigned int protoff; 256 - u_int8_t protonum; 370 + u8 protonum; 371 + int protoff; 257 372 int ret; 258 373 259 374 rcu_read_lock(); 260 375 261 - l3proto = __nf_ct_l3proto_find(l3num); 262 - ret = l3proto->get_l4proto(skb, nhoff, &protoff, &protonum); 263 - if (ret != NF_ACCEPT) { 376 + protoff = get_l4proto(skb, nhoff, l3num, &protonum); 377 + if (protoff <= 0) { 264 378 rcu_read_unlock(); 265 379 return false; 266 380 } ··· 382 268 l4proto = __nf_ct_l4proto_find(l3num, protonum); 383 269 384 270 ret = nf_ct_get_tuple(skb, nhoff, protoff, l3num, protonum, net, tuple, 385 - l3proto, l4proto); 271 + l4proto); 386 272 387 273 rcu_read_unlock(); 388 274 return ret; ··· 392 278 bool 393 279 nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse, 394 280 const struct nf_conntrack_tuple *orig, 395 - const struct nf_conntrack_l3proto *l3proto, 396 281 const struct nf_conntrack_l4proto *l4proto) 397 282 { 398 283 memset(inverse, 0, sizeof(*inverse)); 399 284 400 285 inverse->src.l3num = orig->src.l3num; 401 - if (l3proto->invert_tuple(inverse, orig) == 0) 402 - return false; 286 + 287 + switch (orig->src.l3num) { 288 + case NFPROTO_IPV4: 289 + inverse->src.u3.ip = orig->dst.u3.ip; 290 + inverse->dst.u3.ip = orig->src.u3.ip; 291 + break; 292 + case NFPROTO_IPV6: 293 + inverse->src.u3.in6 = orig->dst.u3.in6; 294 + inverse->dst.u3.in6 = orig->src.u3.in6; 295 + break; 296 + default: 297 + break; 298 + } 403 299 404 300 inverse->dst.dir = !orig->dst.dir; 405 301 406 302 inverse->dst.protonum = orig->dst.protonum; 407 - return l4proto->invert_tuple(inverse, orig); 303 + 304 + if (unlikely(l4proto->invert_tuple)) 305 + return l4proto->invert_tuple(inverse, orig); 306 + 307 + inverse->src.u.all = orig->dst.u.all; 308 + inverse->dst.u.all = orig->src.u.all; 309 + return true; 408 310 } 409 311 EXPORT_SYMBOL_GPL(nf_ct_invert_tuple); 410 312 ··· 632 502 net_eq(net, nf_ct_net(ct)); 633 503 } 634 504 505 + static inline bool 506 + nf_ct_match(const struct nf_conn *ct1, const struct nf_conn *ct2) 507 + { 508 + return nf_ct_tuple_equal(&ct1->tuplehash[IP_CT_DIR_ORIGINAL].tuple, 509 + &ct2->tuplehash[IP_CT_DIR_ORIGINAL].tuple) && 510 + nf_ct_tuple_equal(&ct1->tuplehash[IP_CT_DIR_REPLY].tuple, 511 + &ct2->tuplehash[IP_CT_DIR_REPLY].tuple) && 512 + nf_ct_zone_equal(ct1, nf_ct_zone(ct2), IP_CT_DIR_ORIGINAL) && 513 + nf_ct_zone_equal(ct1, nf_ct_zone(ct2), IP_CT_DIR_REPLY) && 514 + net_eq(nf_ct_net(ct1), nf_ct_net(ct2)); 515 + } 516 + 635 517 /* caller must hold rcu readlock and none of the nf_conntrack_locks */ 636 518 static void nf_ct_gc_expired(struct nf_conn *ct) 637 519 { ··· 837 695 /* This is the conntrack entry already in hashes that won race. */ 838 696 struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); 839 697 const struct nf_conntrack_l4proto *l4proto; 698 + enum ip_conntrack_info oldinfo; 699 + struct nf_conn *loser_ct = nf_ct_get(skb, &oldinfo); 840 700 841 701 l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct)); 842 702 if (l4proto->allow_clash && 843 - ((ct->status & IPS_NAT_DONE_MASK) == 0) && 844 703 !nf_ct_is_dying(ct) && 845 704 atomic_inc_not_zero(&ct->ct_general.use)) { 846 - enum ip_conntrack_info oldinfo; 847 - struct nf_conn *loser_ct = nf_ct_get(skb, &oldinfo); 848 - 849 - nf_ct_acct_merge(ct, ctinfo, loser_ct); 850 - nf_conntrack_put(&loser_ct->ct_general); 851 - nf_ct_set(skb, ct, oldinfo); 852 - return NF_ACCEPT; 705 + if (((ct->status & IPS_NAT_DONE_MASK) == 0) || 706 + nf_ct_match(ct, loser_ct)) { 707 + nf_ct_acct_merge(ct, ctinfo, loser_ct); 708 + nf_conntrack_put(&loser_ct->ct_general); 709 + nf_ct_set(skb, ct, oldinfo); 710 + return NF_ACCEPT; 711 + } 712 + nf_ct_put(ct); 853 713 } 854 714 NF_CT_STAT_INC(net, drop); 855 715 return NF_DROP; ··· 1339 1195 static noinline struct nf_conntrack_tuple_hash * 1340 1196 init_conntrack(struct net *net, struct nf_conn *tmpl, 1341 1197 const struct nf_conntrack_tuple *tuple, 1342 - const struct nf_conntrack_l3proto *l3proto, 1343 1198 const struct nf_conntrack_l4proto *l4proto, 1344 1199 struct sk_buff *skb, 1345 1200 unsigned int dataoff, u32 hash) ··· 1351 1208 const struct nf_conntrack_zone *zone; 1352 1209 struct nf_conn_timeout *timeout_ext; 1353 1210 struct nf_conntrack_zone tmp; 1354 - unsigned int *timeouts; 1355 1211 1356 - if (!nf_ct_invert_tuple(&repl_tuple, tuple, l3proto, l4proto)) { 1212 + if (!nf_ct_invert_tuple(&repl_tuple, tuple, l4proto)) { 1357 1213 pr_debug("Can't invert tuple.\n"); 1358 1214 return NULL; 1359 1215 } ··· 1369 1227 } 1370 1228 1371 1229 timeout_ext = tmpl ? nf_ct_timeout_find(tmpl) : NULL; 1372 - if (timeout_ext) { 1373 - timeouts = nf_ct_timeout_data(timeout_ext); 1374 - if (unlikely(!timeouts)) 1375 - timeouts = l4proto->get_timeouts(net); 1376 - } else { 1377 - timeouts = l4proto->get_timeouts(net); 1378 - } 1379 1230 1380 - if (!l4proto->new(ct, skb, dataoff, timeouts)) { 1231 + if (!l4proto->new(ct, skb, dataoff)) { 1381 1232 nf_conntrack_free(ct); 1382 1233 pr_debug("can't track with proto module\n"); 1383 1234 return NULL; ··· 1401 1266 /* exp->master safe, refcnt bumped in nf_ct_find_expectation */ 1402 1267 ct->master = exp->master; 1403 1268 if (exp->helper) { 1404 - help = nf_ct_helper_ext_add(ct, exp->helper, 1405 - GFP_ATOMIC); 1269 + help = nf_ct_helper_ext_add(ct, GFP_ATOMIC); 1406 1270 if (help) 1407 1271 rcu_assign_pointer(help->helper, exp->helper); 1408 1272 } ··· 1441 1307 unsigned int dataoff, 1442 1308 u_int16_t l3num, 1443 1309 u_int8_t protonum, 1444 - const struct nf_conntrack_l3proto *l3proto, 1445 1310 const struct nf_conntrack_l4proto *l4proto) 1446 1311 { 1447 1312 const struct nf_conntrack_zone *zone; ··· 1452 1319 u32 hash; 1453 1320 1454 1321 if (!nf_ct_get_tuple(skb, skb_network_offset(skb), 1455 - dataoff, l3num, protonum, net, &tuple, l3proto, 1456 - l4proto)) { 1322 + dataoff, l3num, protonum, net, &tuple, l4proto)) { 1457 1323 pr_debug("Can't get tuple\n"); 1458 1324 return 0; 1459 1325 } ··· 1462 1330 hash = hash_conntrack_raw(&tuple, net); 1463 1331 h = __nf_conntrack_find_get(net, zone, &tuple, hash); 1464 1332 if (!h) { 1465 - h = init_conntrack(net, tmpl, &tuple, l3proto, l4proto, 1333 + h = init_conntrack(net, tmpl, &tuple, l4proto, 1466 1334 skb, dataoff, hash); 1467 1335 if (!h) 1468 1336 return 0; ··· 1495 1363 nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum, 1496 1364 struct sk_buff *skb) 1497 1365 { 1498 - const struct nf_conntrack_l3proto *l3proto; 1499 1366 const struct nf_conntrack_l4proto *l4proto; 1500 1367 struct nf_conn *ct, *tmpl; 1501 1368 enum ip_conntrack_info ctinfo; 1502 - unsigned int *timeouts; 1503 - unsigned int dataoff; 1504 1369 u_int8_t protonum; 1505 - int ret; 1370 + int dataoff, ret; 1506 1371 1507 1372 tmpl = nf_ct_get(skb, &ctinfo); 1508 1373 if (tmpl || ctinfo == IP_CT_UNTRACKED) { ··· 1513 1384 } 1514 1385 1515 1386 /* rcu_read_lock()ed by nf_hook_thresh */ 1516 - l3proto = __nf_ct_l3proto_find(pf); 1517 - ret = l3proto->get_l4proto(skb, skb_network_offset(skb), 1518 - &dataoff, &protonum); 1519 - if (ret <= 0) { 1387 + dataoff = get_l4proto(skb, skb_network_offset(skb), pf, &protonum); 1388 + if (dataoff <= 0) { 1520 1389 pr_debug("not prepared to track yet or error occurred\n"); 1521 1390 NF_CT_STAT_INC_ATOMIC(net, error); 1522 1391 NF_CT_STAT_INC_ATOMIC(net, invalid); 1523 - ret = -ret; 1392 + ret = NF_ACCEPT; 1524 1393 goto out; 1525 1394 } 1526 1395 ··· 1540 1413 goto out; 1541 1414 } 1542 1415 repeat: 1543 - ret = resolve_normal_ct(net, tmpl, skb, dataoff, pf, protonum, 1544 - l3proto, l4proto); 1416 + ret = resolve_normal_ct(net, tmpl, skb, dataoff, pf, protonum, l4proto); 1545 1417 if (ret < 0) { 1546 1418 /* Too stressed to deal. */ 1547 1419 NF_CT_STAT_INC_ATOMIC(net, drop); ··· 1556 1430 goto out; 1557 1431 } 1558 1432 1559 - /* Decide what timeout policy we want to apply to this flow. */ 1560 - timeouts = nf_ct_timeout_lookup(net, ct, l4proto); 1561 - 1562 - ret = l4proto->packet(ct, skb, dataoff, ctinfo, timeouts); 1433 + ret = l4proto->packet(ct, skb, dataoff, ctinfo); 1563 1434 if (ret <= 0) { 1564 1435 /* Invalid: inverse of the return code tells 1565 1436 * the netfilter core what to do */ ··· 1594 1471 1595 1472 rcu_read_lock(); 1596 1473 ret = nf_ct_invert_tuple(inverse, orig, 1597 - __nf_ct_l3proto_find(orig->src.l3num), 1598 1474 __nf_ct_l4proto_find(orig->src.l3num, 1599 1475 orig->dst.protonum)); 1600 1476 rcu_read_unlock(); ··· 1731 1609 1732 1610 static int nf_conntrack_update(struct net *net, struct sk_buff *skb) 1733 1611 { 1734 - const struct nf_conntrack_l3proto *l3proto; 1735 1612 const struct nf_conntrack_l4proto *l4proto; 1736 1613 struct nf_conntrack_tuple_hash *h; 1737 1614 struct nf_conntrack_tuple tuple; 1738 1615 enum ip_conntrack_info ctinfo; 1739 1616 struct nf_nat_hook *nat_hook; 1740 - unsigned int dataoff, status; 1617 + unsigned int status; 1741 1618 struct nf_conn *ct; 1619 + int dataoff; 1742 1620 u16 l3num; 1743 1621 u8 l4num; 1744 1622 ··· 1747 1625 return 0; 1748 1626 1749 1627 l3num = nf_ct_l3num(ct); 1750 - l3proto = nf_ct_l3proto_find_get(l3num); 1751 1628 1752 - if (l3proto->get_l4proto(skb, skb_network_offset(skb), &dataoff, 1753 - &l4num) <= 0) 1629 + dataoff = get_l4proto(skb, skb_network_offset(skb), l3num, &l4num); 1630 + if (dataoff <= 0) 1754 1631 return -1; 1755 1632 1756 1633 l4proto = nf_ct_l4proto_find_get(l3num, l4num); 1757 1634 1758 1635 if (!nf_ct_get_tuple(skb, skb_network_offset(skb), dataoff, l3num, 1759 - l4num, net, &tuple, l3proto, l4proto)) 1636 + l4num, net, &tuple, l4proto)) 1760 1637 return -1; 1761 1638 1762 1639 if (ct->status & IPS_SRC_NAT) { ··· 2209 2088 return nf_conntrack_hash_resize(hashsize); 2210 2089 } 2211 2090 EXPORT_SYMBOL_GPL(nf_conntrack_set_hashsize); 2212 - 2213 - module_param_call(hashsize, nf_conntrack_set_hashsize, param_get_uint, 2214 - &nf_conntrack_htable_size, 0600); 2215 2091 2216 2092 static __always_inline unsigned int total_extension_size(void) 2217 2093 {

-1

net/netfilter/nf_conntrack_expect.c

··· 610 610 expect->tuple.src.l3num, 611 611 expect->tuple.dst.protonum); 612 612 print_tuple(s, &expect->tuple, 613 - __nf_ct_l3proto_find(expect->tuple.src.l3num), 614 613 __nf_ct_l4proto_find(expect->tuple.src.l3num, 615 614 expect->tuple.dst.protonum)); 616 615

+2 -4

net/netfilter/nf_conntrack_helper.c

··· 24 24 #include <linux/rtnetlink.h> 25 25 26 26 #include <net/netfilter/nf_conntrack.h> 27 - #include <net/netfilter/nf_conntrack_l3proto.h> 28 27 #include <net/netfilter/nf_conntrack_l4proto.h> 29 28 #include <net/netfilter/nf_conntrack_helper.h> 30 29 #include <net/netfilter/nf_conntrack_core.h> ··· 192 193 EXPORT_SYMBOL_GPL(nf_conntrack_helper_put); 193 194 194 195 struct nf_conn_help * 195 - nf_ct_helper_ext_add(struct nf_conn *ct, 196 - struct nf_conntrack_helper *helper, gfp_t gfp) 196 + nf_ct_helper_ext_add(struct nf_conn *ct, gfp_t gfp) 197 197 { 198 198 struct nf_conn_help *help; 199 199 ··· 261 263 } 262 264 263 265 if (help == NULL) { 264 - help = nf_ct_helper_ext_add(ct, helper, flags); 266 + help = nf_ct_helper_ext_add(ct, flags); 265 267 if (help == NULL) 266 268 return -ENOMEM; 267 269 } else {

-66

net/netfilter/nf_conntrack_l3proto_generic.c

··· 1 - /* 2 - * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org> 3 - * 4 - * Based largely upon the original ip_conntrack code which 5 - * had the following copyright information: 6 - * 7 - * (C) 1999-2001 Paul `Rusty' Russell 8 - * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> 9 - * 10 - * This program is free software; you can redistribute it and/or modify 11 - * it under the terms of the GNU General Public License version 2 as 12 - * published by the Free Software Foundation. 13 - * 14 - * Author: 15 - * Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp> 16 - */ 17 - 18 - #include <linux/types.h> 19 - #include <linux/ip.h> 20 - #include <linux/netfilter.h> 21 - #include <linux/module.h> 22 - #include <linux/skbuff.h> 23 - #include <linux/icmp.h> 24 - #include <linux/sysctl.h> 25 - #include <net/ip.h> 26 - 27 - #include <linux/netfilter_ipv4.h> 28 - #include <net/netfilter/nf_conntrack.h> 29 - #include <net/netfilter/nf_conntrack_l4proto.h> 30 - #include <net/netfilter/nf_conntrack_l3proto.h> 31 - #include <net/netfilter/nf_conntrack_core.h> 32 - #include <net/netfilter/ipv4/nf_conntrack_ipv4.h> 33 - 34 - static bool generic_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff, 35 - struct nf_conntrack_tuple *tuple) 36 - { 37 - memset(&tuple->src.u3, 0, sizeof(tuple->src.u3)); 38 - memset(&tuple->dst.u3, 0, sizeof(tuple->dst.u3)); 39 - 40 - return true; 41 - } 42 - 43 - static bool generic_invert_tuple(struct nf_conntrack_tuple *tuple, 44 - const struct nf_conntrack_tuple *orig) 45 - { 46 - memset(&tuple->src.u3, 0, sizeof(tuple->src.u3)); 47 - memset(&tuple->dst.u3, 0, sizeof(tuple->dst.u3)); 48 - 49 - return true; 50 - } 51 - 52 - static int generic_get_l4proto(const struct sk_buff *skb, unsigned int nhoff, 53 - unsigned int *dataoff, u_int8_t *protonum) 54 - { 55 - /* Never track !!! */ 56 - return -NF_ACCEPT; 57 - } 58 - 59 - 60 - struct nf_conntrack_l3proto nf_conntrack_l3proto_generic __read_mostly = { 61 - .l3proto = PF_UNSPEC, 62 - .pkt_to_tuple = generic_pkt_to_tuple, 63 - .invert_tuple = generic_invert_tuple, 64 - .get_l4proto = generic_get_l4proto, 65 - }; 66 - EXPORT_SYMBOL_GPL(nf_conntrack_l3proto_generic);

+73 -25

net/netfilter/nf_conntrack_netlink.c

··· 38 38 #include <net/netfilter/nf_conntrack_expect.h> 39 39 #include <net/netfilter/nf_conntrack_helper.h> 40 40 #include <net/netfilter/nf_conntrack_seqadj.h> 41 - #include <net/netfilter/nf_conntrack_l3proto.h> 42 41 #include <net/netfilter/nf_conntrack_l4proto.h> 43 42 #include <net/netfilter/nf_conntrack_tuple.h> 44 43 #include <net/netfilter/nf_conntrack_acct.h> ··· 80 81 return -1; 81 82 } 82 83 84 + static int ipv4_tuple_to_nlattr(struct sk_buff *skb, 85 + const struct nf_conntrack_tuple *tuple) 86 + { 87 + if (nla_put_in_addr(skb, CTA_IP_V4_SRC, tuple->src.u3.ip) || 88 + nla_put_in_addr(skb, CTA_IP_V4_DST, tuple->dst.u3.ip)) 89 + return -EMSGSIZE; 90 + return 0; 91 + } 92 + 93 + static int ipv6_tuple_to_nlattr(struct sk_buff *skb, 94 + const struct nf_conntrack_tuple *tuple) 95 + { 96 + if (nla_put_in6_addr(skb, CTA_IP_V6_SRC, &tuple->src.u3.in6) || 97 + nla_put_in6_addr(skb, CTA_IP_V6_DST, &tuple->dst.u3.in6)) 98 + return -EMSGSIZE; 99 + return 0; 100 + } 101 + 83 102 static int ctnetlink_dump_tuples_ip(struct sk_buff *skb, 84 - const struct nf_conntrack_tuple *tuple, 85 - const struct nf_conntrack_l3proto *l3proto) 103 + const struct nf_conntrack_tuple *tuple) 86 104 { 87 105 int ret = 0; 88 106 struct nlattr *nest_parms; ··· 108 92 if (!nest_parms) 109 93 goto nla_put_failure; 110 94 111 - if (likely(l3proto->tuple_to_nlattr)) 112 - ret = l3proto->tuple_to_nlattr(skb, tuple); 95 + switch (tuple->src.l3num) { 96 + case NFPROTO_IPV4: 97 + ret = ipv4_tuple_to_nlattr(skb, tuple); 98 + break; 99 + case NFPROTO_IPV6: 100 + ret = ipv6_tuple_to_nlattr(skb, tuple); 101 + break; 102 + } 113 103 114 104 nla_nest_end(skb, nest_parms); 115 105 ··· 128 106 static int ctnetlink_dump_tuples(struct sk_buff *skb, 129 107 const struct nf_conntrack_tuple *tuple) 130 108 { 131 - const struct nf_conntrack_l3proto *l3proto; 132 109 const struct nf_conntrack_l4proto *l4proto; 133 110 int ret; 134 111 135 112 rcu_read_lock(); 136 - l3proto = __nf_ct_l3proto_find(tuple->src.l3num); 137 - ret = ctnetlink_dump_tuples_ip(skb, tuple, l3proto); 113 + ret = ctnetlink_dump_tuples_ip(skb, tuple); 138 114 139 115 if (ret >= 0) { 140 116 l4proto = __nf_ct_l4proto_find(tuple->src.l3num, ··· 576 556 return -1; 577 557 } 578 558 559 + static const struct nla_policy cta_ip_nla_policy[CTA_IP_MAX + 1] = { 560 + [CTA_IP_V4_SRC] = { .type = NLA_U32 }, 561 + [CTA_IP_V4_DST] = { .type = NLA_U32 }, 562 + [CTA_IP_V6_SRC] = { .len = sizeof(__be32) * 4 }, 563 + [CTA_IP_V6_DST] = { .len = sizeof(__be32) * 4 }, 564 + }; 565 + 579 566 #if defined(CONFIG_NETFILTER_NETLINK_GLUE_CT) || defined(CONFIG_NF_CONNTRACK_EVENTS) 580 567 static size_t ctnetlink_proto_size(const struct nf_conn *ct) 581 568 { 582 - const struct nf_conntrack_l3proto *l3proto; 583 569 const struct nf_conntrack_l4proto *l4proto; 584 570 size_t len, len4 = 0; 585 571 586 - l3proto = __nf_ct_l3proto_find(nf_ct_l3num(ct)); 587 - len = l3proto->nla_size; 572 + len = nla_policy_len(cta_ip_nla_policy, CTA_IP_MAX + 1); 588 573 len *= 3u; /* ORIG, REPLY, MASTER */ 589 574 590 575 l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct)); ··· 961 936 return skb->len; 962 937 } 963 938 939 + static int ipv4_nlattr_to_tuple(struct nlattr *tb[], 940 + struct nf_conntrack_tuple *t) 941 + { 942 + if (!tb[CTA_IP_V4_SRC] || !tb[CTA_IP_V4_DST]) 943 + return -EINVAL; 944 + 945 + t->src.u3.ip = nla_get_in_addr(tb[CTA_IP_V4_SRC]); 946 + t->dst.u3.ip = nla_get_in_addr(tb[CTA_IP_V4_DST]); 947 + 948 + return 0; 949 + } 950 + 951 + static int ipv6_nlattr_to_tuple(struct nlattr *tb[], 952 + struct nf_conntrack_tuple *t) 953 + { 954 + if (!tb[CTA_IP_V6_SRC] || !tb[CTA_IP_V6_DST]) 955 + return -EINVAL; 956 + 957 + t->src.u3.in6 = nla_get_in6_addr(tb[CTA_IP_V6_SRC]); 958 + t->dst.u3.in6 = nla_get_in6_addr(tb[CTA_IP_V6_DST]); 959 + 960 + return 0; 961 + } 962 + 964 963 static int ctnetlink_parse_tuple_ip(struct nlattr *attr, 965 964 struct nf_conntrack_tuple *tuple) 966 965 { 967 966 struct nlattr *tb[CTA_IP_MAX+1]; 968 - struct nf_conntrack_l3proto *l3proto; 969 967 int ret = 0; 970 968 971 969 ret = nla_parse_nested(tb, CTA_IP_MAX, attr, NULL, NULL); 972 970 if (ret < 0) 973 971 return ret; 974 972 975 - rcu_read_lock(); 976 - l3proto = __nf_ct_l3proto_find(tuple->src.l3num); 973 + ret = nla_validate_nested(attr, CTA_IP_MAX, 974 + cta_ip_nla_policy, NULL); 975 + if (ret) 976 + return ret; 977 977 978 - if (likely(l3proto->nlattr_to_tuple)) { 979 - ret = nla_validate_nested(attr, CTA_IP_MAX, 980 - l3proto->nla_policy, NULL); 981 - if (ret == 0) 982 - ret = l3proto->nlattr_to_tuple(tb, tuple); 978 + switch (tuple->src.l3num) { 979 + case NFPROTO_IPV4: 980 + ret = ipv4_nlattr_to_tuple(tb, tuple); 981 + break; 982 + case NFPROTO_IPV6: 983 + ret = ipv6_nlattr_to_tuple(tb, tuple); 984 + break; 983 985 } 984 - 985 - rcu_read_unlock(); 986 986 987 987 return ret; 988 988 } ··· 1947 1897 } else { 1948 1898 struct nf_conn_help *help; 1949 1899 1950 - help = nf_ct_helper_ext_add(ct, helper, GFP_ATOMIC); 1900 + help = nf_ct_helper_ext_add(ct, GFP_ATOMIC); 1951 1901 if (help == NULL) { 1952 1902 err = -ENOMEM; 1953 1903 goto err2; ··· 2631 2581 const struct nf_conntrack_tuple *tuple, 2632 2582 const struct nf_conntrack_tuple_mask *mask) 2633 2583 { 2634 - const struct nf_conntrack_l3proto *l3proto; 2635 2584 const struct nf_conntrack_l4proto *l4proto; 2636 2585 struct nf_conntrack_tuple m; 2637 2586 struct nlattr *nest_parms; ··· 2646 2597 goto nla_put_failure; 2647 2598 2648 2599 rcu_read_lock(); 2649 - l3proto = __nf_ct_l3proto_find(tuple->src.l3num); 2650 - ret = ctnetlink_dump_tuples_ip(skb, &m, l3proto); 2600 + ret = ctnetlink_dump_tuples_ip(skb, &m); 2651 2601 if (ret >= 0) { 2652 2602 l4proto = __nf_ct_l4proto_find(tuple->src.l3num, 2653 2603 tuple->dst.protonum);

+617 -228

net/netfilter/nf_conntrack_proto.c

··· 1 - /* L3/L4 protocol support for nf_conntrack. */ 2 - 3 - /* (C) 1999-2001 Paul `Rusty' Russell 4 - * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org> 5 - * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org> 6 - * (C) 2006-2012 Patrick McHardy <kaber@trash.net> 7 - * 8 - * This program is free software; you can redistribute it and/or modify 9 - * it under the terms of the GNU General Public License version 2 as 10 - * published by the Free Software Foundation. 11 - */ 1 + // SPDX-License-Identifier: GPL-2.0 12 2 13 3 #include <linux/types.h> 14 4 #include <linux/netfilter.h> ··· 14 24 #include <linux/netdevice.h> 15 25 16 26 #include <net/netfilter/nf_conntrack.h> 17 - #include <net/netfilter/nf_conntrack_l3proto.h> 18 27 #include <net/netfilter/nf_conntrack_l4proto.h> 19 28 #include <net/netfilter/nf_conntrack_core.h> 20 29 #include <net/netfilter/nf_log.h> 21 30 31 + #include <linux/ip.h> 32 + #include <linux/icmp.h> 33 + #include <linux/sysctl.h> 34 + #include <net/route.h> 35 + #include <net/ip.h> 36 + 37 + #include <linux/netfilter_ipv4.h> 38 + #include <linux/netfilter_ipv6.h> 39 + #include <linux/netfilter_ipv6/ip6_tables.h> 40 + #include <net/netfilter/nf_conntrack_helper.h> 41 + #include <net/netfilter/nf_conntrack_zones.h> 42 + #include <net/netfilter/nf_conntrack_seqadj.h> 43 + #include <net/netfilter/ipv4/nf_conntrack_ipv4.h> 44 + #include <net/netfilter/ipv6/nf_conntrack_ipv6.h> 45 + #include <net/netfilter/nf_nat_helper.h> 46 + #include <net/netfilter/ipv4/nf_defrag_ipv4.h> 47 + #include <net/netfilter/ipv6/nf_defrag_ipv6.h> 48 + 49 + #include <linux/ipv6.h> 50 + #include <linux/in6.h> 51 + #include <net/ipv6.h> 52 + #include <net/inet_frag.h> 53 + 54 + extern unsigned int nf_conntrack_net_id; 55 + 22 56 static struct nf_conntrack_l4proto __rcu **nf_ct_protos[NFPROTO_NUMPROTO] __read_mostly; 23 - struct nf_conntrack_l3proto __rcu *nf_ct_l3protos[NFPROTO_NUMPROTO] __read_mostly; 24 - EXPORT_SYMBOL_GPL(nf_ct_l3protos); 25 57 26 58 static DEFINE_MUTEX(nf_ct_proto_mutex); 27 59 ··· 134 122 } 135 123 EXPORT_SYMBOL_GPL(__nf_ct_l4proto_find); 136 124 137 - /* this is guaranteed to always return a valid protocol helper, since 138 - * it falls back to generic_protocol */ 139 - const struct nf_conntrack_l3proto * 140 - nf_ct_l3proto_find_get(u_int16_t l3proto) 141 - { 142 - struct nf_conntrack_l3proto *p; 143 - 144 - rcu_read_lock(); 145 - p = __nf_ct_l3proto_find(l3proto); 146 - if (!try_module_get(p->me)) 147 - p = &nf_conntrack_l3proto_generic; 148 - rcu_read_unlock(); 149 - 150 - return p; 151 - } 152 - EXPORT_SYMBOL_GPL(nf_ct_l3proto_find_get); 153 - 154 - int 155 - nf_ct_l3proto_try_module_get(unsigned short l3proto) 156 - { 157 - const struct nf_conntrack_l3proto *p; 158 - int ret; 159 - 160 - retry: p = nf_ct_l3proto_find_get(l3proto); 161 - if (p == &nf_conntrack_l3proto_generic) { 162 - ret = request_module("nf_conntrack-%d", l3proto); 163 - if (!ret) 164 - goto retry; 165 - 166 - return -EPROTOTYPE; 167 - } 168 - 169 - return 0; 170 - } 171 - EXPORT_SYMBOL_GPL(nf_ct_l3proto_try_module_get); 172 - 173 - void nf_ct_l3proto_module_put(unsigned short l3proto) 174 - { 175 - struct nf_conntrack_l3proto *p; 176 - 177 - /* rcu_read_lock not necessary since the caller holds a reference, but 178 - * taken anyways to avoid lockdep warnings in __nf_ct_l3proto_find() 179 - */ 180 - rcu_read_lock(); 181 - p = __nf_ct_l3proto_find(l3proto); 182 - module_put(p->me); 183 - rcu_read_unlock(); 184 - } 185 - EXPORT_SYMBOL_GPL(nf_ct_l3proto_module_put); 186 - 187 - static int nf_ct_netns_do_get(struct net *net, u8 nfproto) 188 - { 189 - const struct nf_conntrack_l3proto *l3proto; 190 - int ret; 191 - 192 - might_sleep(); 193 - 194 - ret = nf_ct_l3proto_try_module_get(nfproto); 195 - if (ret < 0) 196 - return ret; 197 - 198 - /* we already have a reference, can't fail */ 199 - rcu_read_lock(); 200 - l3proto = __nf_ct_l3proto_find(nfproto); 201 - rcu_read_unlock(); 202 - 203 - if (!l3proto->net_ns_get) 204 - return 0; 205 - 206 - ret = l3proto->net_ns_get(net); 207 - if (ret < 0) 208 - nf_ct_l3proto_module_put(nfproto); 209 - 210 - return ret; 211 - } 212 - 213 - int nf_ct_netns_get(struct net *net, u8 nfproto) 214 - { 215 - int err; 216 - 217 - if (nfproto == NFPROTO_INET) { 218 - err = nf_ct_netns_do_get(net, NFPROTO_IPV4); 219 - if (err < 0) 220 - goto err1; 221 - err = nf_ct_netns_do_get(net, NFPROTO_IPV6); 222 - if (err < 0) 223 - goto err2; 224 - } else { 225 - err = nf_ct_netns_do_get(net, nfproto); 226 - if (err < 0) 227 - goto err1; 228 - } 229 - return 0; 230 - 231 - err2: 232 - nf_ct_netns_put(net, NFPROTO_IPV4); 233 - err1: 234 - return err; 235 - } 236 - EXPORT_SYMBOL_GPL(nf_ct_netns_get); 237 - 238 - static void nf_ct_netns_do_put(struct net *net, u8 nfproto) 239 - { 240 - const struct nf_conntrack_l3proto *l3proto; 241 - 242 - might_sleep(); 243 - 244 - /* same as nf_conntrack_netns_get(), reference assumed */ 245 - rcu_read_lock(); 246 - l3proto = __nf_ct_l3proto_find(nfproto); 247 - rcu_read_unlock(); 248 - 249 - if (WARN_ON(!l3proto)) 250 - return; 251 - 252 - if (l3proto->net_ns_put) 253 - l3proto->net_ns_put(net); 254 - 255 - nf_ct_l3proto_module_put(nfproto); 256 - } 257 - 258 - void nf_ct_netns_put(struct net *net, uint8_t nfproto) 259 - { 260 - if (nfproto == NFPROTO_INET) { 261 - nf_ct_netns_do_put(net, NFPROTO_IPV4); 262 - nf_ct_netns_do_put(net, NFPROTO_IPV6); 263 - } else 264 - nf_ct_netns_do_put(net, nfproto); 265 - } 266 - EXPORT_SYMBOL_GPL(nf_ct_netns_put); 267 - 268 125 const struct nf_conntrack_l4proto * 269 126 nf_ct_l4proto_find_get(u_int16_t l3num, u_int8_t l4num) 270 127 { ··· 155 274 } 156 275 EXPORT_SYMBOL_GPL(nf_ct_l4proto_put); 157 276 158 - static int kill_l3proto(struct nf_conn *i, void *data) 159 - { 160 - return nf_ct_l3num(i) == ((const struct nf_conntrack_l3proto *)data)->l3proto; 161 - } 162 - 163 277 static int kill_l4proto(struct nf_conn *i, void *data) 164 278 { 165 279 const struct nf_conntrack_l4proto *l4proto; ··· 162 286 return nf_ct_protonum(i) == l4proto->l4proto && 163 287 nf_ct_l3num(i) == l4proto->l3proto; 164 288 } 165 - 166 - int nf_ct_l3proto_register(const struct nf_conntrack_l3proto *proto) 167 - { 168 - int ret = 0; 169 - struct nf_conntrack_l3proto *old; 170 - 171 - if (proto->l3proto >= NFPROTO_NUMPROTO) 172 - return -EBUSY; 173 - #if IS_ENABLED(CONFIG_NF_CT_NETLINK) 174 - if (proto->tuple_to_nlattr && proto->nla_size == 0) 175 - return -EINVAL; 176 - #endif 177 - mutex_lock(&nf_ct_proto_mutex); 178 - old = rcu_dereference_protected(nf_ct_l3protos[proto->l3proto], 179 - lockdep_is_held(&nf_ct_proto_mutex)); 180 - if (old != &nf_conntrack_l3proto_generic) { 181 - ret = -EBUSY; 182 - goto out_unlock; 183 - } 184 - 185 - rcu_assign_pointer(nf_ct_l3protos[proto->l3proto], proto); 186 - 187 - out_unlock: 188 - mutex_unlock(&nf_ct_proto_mutex); 189 - return ret; 190 - 191 - } 192 - EXPORT_SYMBOL_GPL(nf_ct_l3proto_register); 193 - 194 - void nf_ct_l3proto_unregister(const struct nf_conntrack_l3proto *proto) 195 - { 196 - BUG_ON(proto->l3proto >= NFPROTO_NUMPROTO); 197 - 198 - mutex_lock(&nf_ct_proto_mutex); 199 - BUG_ON(rcu_dereference_protected(nf_ct_l3protos[proto->l3proto], 200 - lockdep_is_held(&nf_ct_proto_mutex) 201 - ) != proto); 202 - rcu_assign_pointer(nf_ct_l3protos[proto->l3proto], 203 - &nf_conntrack_l3proto_generic); 204 - mutex_unlock(&nf_ct_proto_mutex); 205 - 206 - synchronize_rcu(); 207 - /* Remove all contrack entries for this protocol */ 208 - nf_ct_iterate_destroy(kill_l3proto, (void*)proto); 209 - } 210 - EXPORT_SYMBOL_GPL(nf_ct_l3proto_unregister); 211 289 212 290 static struct nf_proto_net *nf_ct_l4proto_net(struct net *net, 213 291 const struct nf_conntrack_l4proto *l4proto) ··· 329 499 } 330 500 EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_unregister_one); 331 501 332 - int nf_ct_l4proto_register(const struct nf_conntrack_l4proto * const l4proto[], 333 - unsigned int num_proto) 502 + static void 503 + nf_ct_l4proto_unregister(const struct nf_conntrack_l4proto * const l4proto[], 504 + unsigned int num_proto) 505 + { 506 + mutex_lock(&nf_ct_proto_mutex); 507 + while (num_proto-- != 0) 508 + __nf_ct_l4proto_unregister_one(l4proto[num_proto]); 509 + mutex_unlock(&nf_ct_proto_mutex); 510 + 511 + synchronize_net(); 512 + /* Remove all contrack entries for this protocol */ 513 + nf_ct_iterate_destroy(kill_l4proto, (void *)l4proto); 514 + } 515 + 516 + static int 517 + nf_ct_l4proto_register(const struct nf_conntrack_l4proto * const l4proto[], 518 + unsigned int num_proto) 334 519 { 335 520 int ret = -EINVAL, ver; 336 521 unsigned int i; ··· 363 518 } 364 519 return ret; 365 520 } 366 - EXPORT_SYMBOL_GPL(nf_ct_l4proto_register); 367 521 368 522 int nf_ct_l4proto_pernet_register(struct net *net, 369 523 const struct nf_conntrack_l4proto *const l4proto[], ··· 386 542 } 387 543 EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_register); 388 544 389 - void nf_ct_l4proto_unregister(const struct nf_conntrack_l4proto * const l4proto[], 390 - unsigned int num_proto) 391 - { 392 - mutex_lock(&nf_ct_proto_mutex); 393 - while (num_proto-- != 0) 394 - __nf_ct_l4proto_unregister_one(l4proto[num_proto]); 395 - mutex_unlock(&nf_ct_proto_mutex); 396 - 397 - synchronize_net(); 398 - /* Remove all contrack entries for this protocol */ 399 - nf_ct_iterate_destroy(kill_l4proto, (void *)l4proto); 400 - } 401 - EXPORT_SYMBOL_GPL(nf_ct_l4proto_unregister); 402 - 403 545 void nf_ct_l4proto_pernet_unregister(struct net *net, 404 546 const struct nf_conntrack_l4proto *const l4proto[], 405 547 unsigned int num_proto) ··· 394 564 nf_ct_l4proto_pernet_unregister_one(net, l4proto[num_proto]); 395 565 } 396 566 EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_unregister); 567 + 568 + static unsigned int ipv4_helper(void *priv, 569 + struct sk_buff *skb, 570 + const struct nf_hook_state *state) 571 + { 572 + struct nf_conn *ct; 573 + enum ip_conntrack_info ctinfo; 574 + const struct nf_conn_help *help; 575 + const struct nf_conntrack_helper *helper; 576 + 577 + /* This is where we call the helper: as the packet goes out. */ 578 + ct = nf_ct_get(skb, &ctinfo); 579 + if (!ct || ctinfo == IP_CT_RELATED_REPLY) 580 + return NF_ACCEPT; 581 + 582 + help = nfct_help(ct); 583 + if (!help) 584 + return NF_ACCEPT; 585 + 586 + /* rcu_read_lock()ed by nf_hook_thresh */ 587 + helper = rcu_dereference(help->helper); 588 + if (!helper) 589 + return NF_ACCEPT; 590 + 591 + return helper->help(skb, skb_network_offset(skb) + ip_hdrlen(skb), 592 + ct, ctinfo); 593 + } 594 + 595 + static unsigned int ipv4_confirm(void *priv, 596 + struct sk_buff *skb, 597 + const struct nf_hook_state *state) 598 + { 599 + struct nf_conn *ct; 600 + enum ip_conntrack_info ctinfo; 601 + 602 + ct = nf_ct_get(skb, &ctinfo); 603 + if (!ct || ctinfo == IP_CT_RELATED_REPLY) 604 + goto out; 605 + 606 + /* adjust seqs for loopback traffic only in outgoing direction */ 607 + if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) && 608 + !nf_is_loopback_packet(skb)) { 609 + if (!nf_ct_seq_adjust(skb, ct, ctinfo, ip_hdrlen(skb))) { 610 + NF_CT_STAT_INC_ATOMIC(nf_ct_net(ct), drop); 611 + return NF_DROP; 612 + } 613 + } 614 + out: 615 + /* We've seen it coming out the other side: confirm it */ 616 + return nf_conntrack_confirm(skb); 617 + } 618 + 619 + static unsigned int ipv4_conntrack_in(void *priv, 620 + struct sk_buff *skb, 621 + const struct nf_hook_state *state) 622 + { 623 + return nf_conntrack_in(state->net, PF_INET, state->hook, skb); 624 + } 625 + 626 + static unsigned int ipv4_conntrack_local(void *priv, 627 + struct sk_buff *skb, 628 + const struct nf_hook_state *state) 629 + { 630 + if (ip_is_fragment(ip_hdr(skb))) { /* IP_NODEFRAG setsockopt set */ 631 + enum ip_conntrack_info ctinfo; 632 + struct nf_conn *tmpl; 633 + 634 + tmpl = nf_ct_get(skb, &ctinfo); 635 + if (tmpl && nf_ct_is_template(tmpl)) { 636 + /* when skipping ct, clear templates to avoid fooling 637 + * later targets/matches 638 + */ 639 + skb->_nfct = 0; 640 + nf_ct_put(tmpl); 641 + } 642 + return NF_ACCEPT; 643 + } 644 + 645 + return nf_conntrack_in(state->net, PF_INET, state->hook, skb); 646 + } 647 + 648 + /* Connection tracking may drop packets, but never alters them, so 649 + * make it the first hook. 650 + */ 651 + static const struct nf_hook_ops ipv4_conntrack_ops[] = { 652 + { 653 + .hook = ipv4_conntrack_in, 654 + .pf = NFPROTO_IPV4, 655 + .hooknum = NF_INET_PRE_ROUTING, 656 + .priority = NF_IP_PRI_CONNTRACK, 657 + }, 658 + { 659 + .hook = ipv4_conntrack_local, 660 + .pf = NFPROTO_IPV4, 661 + .hooknum = NF_INET_LOCAL_OUT, 662 + .priority = NF_IP_PRI_CONNTRACK, 663 + }, 664 + { 665 + .hook = ipv4_helper, 666 + .pf = NFPROTO_IPV4, 667 + .hooknum = NF_INET_POST_ROUTING, 668 + .priority = NF_IP_PRI_CONNTRACK_HELPER, 669 + }, 670 + { 671 + .hook = ipv4_confirm, 672 + .pf = NFPROTO_IPV4, 673 + .hooknum = NF_INET_POST_ROUTING, 674 + .priority = NF_IP_PRI_CONNTRACK_CONFIRM, 675 + }, 676 + { 677 + .hook = ipv4_helper, 678 + .pf = NFPROTO_IPV4, 679 + .hooknum = NF_INET_LOCAL_IN, 680 + .priority = NF_IP_PRI_CONNTRACK_HELPER, 681 + }, 682 + { 683 + .hook = ipv4_confirm, 684 + .pf = NFPROTO_IPV4, 685 + .hooknum = NF_INET_LOCAL_IN, 686 + .priority = NF_IP_PRI_CONNTRACK_CONFIRM, 687 + }, 688 + }; 689 + 690 + /* Fast function for those who don't want to parse /proc (and I don't 691 + * blame them). 692 + * Reversing the socket's dst/src point of view gives us the reply 693 + * mapping. 694 + */ 695 + static int 696 + getorigdst(struct sock *sk, int optval, void __user *user, int *len) 697 + { 698 + const struct inet_sock *inet = inet_sk(sk); 699 + const struct nf_conntrack_tuple_hash *h; 700 + struct nf_conntrack_tuple tuple; 701 + 702 + memset(&tuple, 0, sizeof(tuple)); 703 + 704 + lock_sock(sk); 705 + tuple.src.u3.ip = inet->inet_rcv_saddr; 706 + tuple.src.u.tcp.port = inet->inet_sport; 707 + tuple.dst.u3.ip = inet->inet_daddr; 708 + tuple.dst.u.tcp.port = inet->inet_dport; 709 + tuple.src.l3num = PF_INET; 710 + tuple.dst.protonum = sk->sk_protocol; 711 + release_sock(sk); 712 + 713 + /* We only do TCP and SCTP at the moment: is there a better way? */ 714 + if (tuple.dst.protonum != IPPROTO_TCP && 715 + tuple.dst.protonum != IPPROTO_SCTP) { 716 + pr_debug("SO_ORIGINAL_DST: Not a TCP/SCTP socket\n"); 717 + return -ENOPROTOOPT; 718 + } 719 + 720 + if ((unsigned int)*len < sizeof(struct sockaddr_in)) { 721 + pr_debug("SO_ORIGINAL_DST: len %d not %zu\n", 722 + *len, sizeof(struct sockaddr_in)); 723 + return -EINVAL; 724 + } 725 + 726 + h = nf_conntrack_find_get(sock_net(sk), &nf_ct_zone_dflt, &tuple); 727 + if (h) { 728 + struct sockaddr_in sin; 729 + struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); 730 + 731 + sin.sin_family = AF_INET; 732 + sin.sin_port = ct->tuplehash[IP_CT_DIR_ORIGINAL] 733 + .tuple.dst.u.tcp.port; 734 + sin.sin_addr.s_addr = ct->tuplehash[IP_CT_DIR_ORIGINAL] 735 + .tuple.dst.u3.ip; 736 + memset(sin.sin_zero, 0, sizeof(sin.sin_zero)); 737 + 738 + pr_debug("SO_ORIGINAL_DST: %pI4 %u\n", 739 + &sin.sin_addr.s_addr, ntohs(sin.sin_port)); 740 + nf_ct_put(ct); 741 + if (copy_to_user(user, &sin, sizeof(sin)) != 0) 742 + return -EFAULT; 743 + else 744 + return 0; 745 + } 746 + pr_debug("SO_ORIGINAL_DST: Can't find %pI4/%u-%pI4/%u.\n", 747 + &tuple.src.u3.ip, ntohs(tuple.src.u.tcp.port), 748 + &tuple.dst.u3.ip, ntohs(tuple.dst.u.tcp.port)); 749 + return -ENOENT; 750 + } 751 + 752 + static struct nf_sockopt_ops so_getorigdst = { 753 + .pf = PF_INET, 754 + .get_optmin = SO_ORIGINAL_DST, 755 + .get_optmax = SO_ORIGINAL_DST + 1, 756 + .get = getorigdst, 757 + .owner = THIS_MODULE, 758 + }; 759 + 760 + #if IS_ENABLED(CONFIG_IPV6) 761 + static int 762 + ipv6_getorigdst(struct sock *sk, int optval, void __user *user, int *len) 763 + { 764 + struct nf_conntrack_tuple tuple = { .src.l3num = NFPROTO_IPV6 }; 765 + const struct ipv6_pinfo *inet6 = inet6_sk(sk); 766 + const struct inet_sock *inet = inet_sk(sk); 767 + const struct nf_conntrack_tuple_hash *h; 768 + struct sockaddr_in6 sin6; 769 + struct nf_conn *ct; 770 + __be32 flow_label; 771 + int bound_dev_if; 772 + 773 + lock_sock(sk); 774 + tuple.src.u3.in6 = sk->sk_v6_rcv_saddr; 775 + tuple.src.u.tcp.port = inet->inet_sport; 776 + tuple.dst.u3.in6 = sk->sk_v6_daddr; 777 + tuple.dst.u.tcp.port = inet->inet_dport; 778 + tuple.dst.protonum = sk->sk_protocol; 779 + bound_dev_if = sk->sk_bound_dev_if; 780 + flow_label = inet6->flow_label; 781 + release_sock(sk); 782 + 783 + if (tuple.dst.protonum != IPPROTO_TCP && 784 + tuple.dst.protonum != IPPROTO_SCTP) 785 + return -ENOPROTOOPT; 786 + 787 + if (*len < 0 || (unsigned int)*len < sizeof(sin6)) 788 + return -EINVAL; 789 + 790 + h = nf_conntrack_find_get(sock_net(sk), &nf_ct_zone_dflt, &tuple); 791 + if (!h) { 792 + pr_debug("IP6T_SO_ORIGINAL_DST: Can't find %pI6c/%u-%pI6c/%u.\n", 793 + &tuple.src.u3.ip6, ntohs(tuple.src.u.tcp.port), 794 + &tuple.dst.u3.ip6, ntohs(tuple.dst.u.tcp.port)); 795 + return -ENOENT; 796 + } 797 + 798 + ct = nf_ct_tuplehash_to_ctrack(h); 799 + 800 + sin6.sin6_family = AF_INET6; 801 + sin6.sin6_port = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u.tcp.port; 802 + sin6.sin6_flowinfo = flow_label & IPV6_FLOWINFO_MASK; 803 + memcpy(&sin6.sin6_addr, 804 + &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.in6, 805 + sizeof(sin6.sin6_addr)); 806 + 807 + nf_ct_put(ct); 808 + sin6.sin6_scope_id = ipv6_iface_scope_id(&sin6.sin6_addr, bound_dev_if); 809 + return copy_to_user(user, &sin6, sizeof(sin6)) ? -EFAULT : 0; 810 + } 811 + 812 + static struct nf_sockopt_ops so_getorigdst6 = { 813 + .pf = NFPROTO_IPV6, 814 + .get_optmin = IP6T_SO_ORIGINAL_DST, 815 + .get_optmax = IP6T_SO_ORIGINAL_DST + 1, 816 + .get = ipv6_getorigdst, 817 + .owner = THIS_MODULE, 818 + }; 819 + 820 + static unsigned int ipv6_confirm(void *priv, 821 + struct sk_buff *skb, 822 + const struct nf_hook_state *state) 823 + { 824 + struct nf_conn *ct; 825 + enum ip_conntrack_info ctinfo; 826 + unsigned char pnum = ipv6_hdr(skb)->nexthdr; 827 + int protoff; 828 + __be16 frag_off; 829 + 830 + ct = nf_ct_get(skb, &ctinfo); 831 + if (!ct || ctinfo == IP_CT_RELATED_REPLY) 832 + goto out; 833 + 834 + protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &pnum, 835 + &frag_off); 836 + if (protoff < 0 || (frag_off & htons(~0x7)) != 0) { 837 + pr_debug("proto header not found\n"); 838 + goto out; 839 + } 840 + 841 + /* adjust seqs for loopback traffic only in outgoing direction */ 842 + if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) && 843 + !nf_is_loopback_packet(skb)) { 844 + if (!nf_ct_seq_adjust(skb, ct, ctinfo, protoff)) { 845 + NF_CT_STAT_INC_ATOMIC(nf_ct_net(ct), drop); 846 + return NF_DROP; 847 + } 848 + } 849 + out: 850 + /* We've seen it coming out the other side: confirm it */ 851 + return nf_conntrack_confirm(skb); 852 + } 853 + 854 + static unsigned int ipv6_conntrack_in(void *priv, 855 + struct sk_buff *skb, 856 + const struct nf_hook_state *state) 857 + { 858 + return nf_conntrack_in(state->net, PF_INET6, state->hook, skb); 859 + } 860 + 861 + static unsigned int ipv6_conntrack_local(void *priv, 862 + struct sk_buff *skb, 863 + const struct nf_hook_state *state) 864 + { 865 + return nf_conntrack_in(state->net, PF_INET6, state->hook, skb); 866 + } 867 + 868 + static unsigned int ipv6_helper(void *priv, 869 + struct sk_buff *skb, 870 + const struct nf_hook_state *state) 871 + { 872 + struct nf_conn *ct; 873 + const struct nf_conn_help *help; 874 + const struct nf_conntrack_helper *helper; 875 + enum ip_conntrack_info ctinfo; 876 + __be16 frag_off; 877 + int protoff; 878 + u8 nexthdr; 879 + 880 + /* This is where we call the helper: as the packet goes out. */ 881 + ct = nf_ct_get(skb, &ctinfo); 882 + if (!ct || ctinfo == IP_CT_RELATED_REPLY) 883 + return NF_ACCEPT; 884 + 885 + help = nfct_help(ct); 886 + if (!help) 887 + return NF_ACCEPT; 888 + /* rcu_read_lock()ed by nf_hook_thresh */ 889 + helper = rcu_dereference(help->helper); 890 + if (!helper) 891 + return NF_ACCEPT; 892 + 893 + nexthdr = ipv6_hdr(skb)->nexthdr; 894 + protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr, 895 + &frag_off); 896 + if (protoff < 0 || (frag_off & htons(~0x7)) != 0) { 897 + pr_debug("proto header not found\n"); 898 + return NF_ACCEPT; 899 + } 900 + 901 + return helper->help(skb, protoff, ct, ctinfo); 902 + } 903 + 904 + static const struct nf_hook_ops ipv6_conntrack_ops[] = { 905 + { 906 + .hook = ipv6_conntrack_in, 907 + .pf = NFPROTO_IPV6, 908 + .hooknum = NF_INET_PRE_ROUTING, 909 + .priority = NF_IP6_PRI_CONNTRACK, 910 + }, 911 + { 912 + .hook = ipv6_conntrack_local, 913 + .pf = NFPROTO_IPV6, 914 + .hooknum = NF_INET_LOCAL_OUT, 915 + .priority = NF_IP6_PRI_CONNTRACK, 916 + }, 917 + { 918 + .hook = ipv6_helper, 919 + .pf = NFPROTO_IPV6, 920 + .hooknum = NF_INET_POST_ROUTING, 921 + .priority = NF_IP6_PRI_CONNTRACK_HELPER, 922 + }, 923 + { 924 + .hook = ipv6_confirm, 925 + .pf = NFPROTO_IPV6, 926 + .hooknum = NF_INET_POST_ROUTING, 927 + .priority = NF_IP6_PRI_LAST, 928 + }, 929 + { 930 + .hook = ipv6_helper, 931 + .pf = NFPROTO_IPV6, 932 + .hooknum = NF_INET_LOCAL_IN, 933 + .priority = NF_IP6_PRI_CONNTRACK_HELPER, 934 + }, 935 + { 936 + .hook = ipv6_confirm, 937 + .pf = NFPROTO_IPV6, 938 + .hooknum = NF_INET_LOCAL_IN, 939 + .priority = NF_IP6_PRI_LAST - 1, 940 + }, 941 + }; 942 + #endif 943 + 944 + static int nf_ct_netns_do_get(struct net *net, u8 nfproto) 945 + { 946 + struct nf_conntrack_net *cnet = net_generic(net, nf_conntrack_net_id); 947 + int err = 0; 948 + 949 + mutex_lock(&nf_ct_proto_mutex); 950 + 951 + switch (nfproto) { 952 + case NFPROTO_IPV4: 953 + cnet->users4++; 954 + if (cnet->users4 > 1) 955 + goto out_unlock; 956 + err = nf_defrag_ipv4_enable(net); 957 + if (err) { 958 + cnet->users4 = 0; 959 + goto out_unlock; 960 + } 961 + 962 + err = nf_register_net_hooks(net, ipv4_conntrack_ops, 963 + ARRAY_SIZE(ipv4_conntrack_ops)); 964 + if (err) 965 + cnet->users4 = 0; 966 + break; 967 + #if IS_ENABLED(CONFIG_IPV6) 968 + case NFPROTO_IPV6: 969 + cnet->users6++; 970 + if (cnet->users6 > 1) 971 + goto out_unlock; 972 + err = nf_defrag_ipv6_enable(net); 973 + if (err < 0) { 974 + cnet->users6 = 0; 975 + goto out_unlock; 976 + } 977 + 978 + err = nf_register_net_hooks(net, ipv6_conntrack_ops, 979 + ARRAY_SIZE(ipv6_conntrack_ops)); 980 + if (err) 981 + cnet->users6 = 0; 982 + break; 983 + #endif 984 + default: 985 + err = -EPROTO; 986 + break; 987 + } 988 + out_unlock: 989 + mutex_unlock(&nf_ct_proto_mutex); 990 + return err; 991 + } 992 + 993 + static void nf_ct_netns_do_put(struct net *net, u8 nfproto) 994 + { 995 + struct nf_conntrack_net *cnet = net_generic(net, nf_conntrack_net_id); 996 + 997 + mutex_lock(&nf_ct_proto_mutex); 998 + switch (nfproto) { 999 + case NFPROTO_IPV4: 1000 + if (cnet->users4 && (--cnet->users4 == 0)) 1001 + nf_unregister_net_hooks(net, ipv4_conntrack_ops, 1002 + ARRAY_SIZE(ipv4_conntrack_ops)); 1003 + break; 1004 + #if IS_ENABLED(CONFIG_IPV6) 1005 + case NFPROTO_IPV6: 1006 + if (cnet->users6 && (--cnet->users6 == 0)) 1007 + nf_unregister_net_hooks(net, ipv6_conntrack_ops, 1008 + ARRAY_SIZE(ipv6_conntrack_ops)); 1009 + break; 1010 + #endif 1011 + } 1012 + 1013 + mutex_unlock(&nf_ct_proto_mutex); 1014 + } 1015 + 1016 + int nf_ct_netns_get(struct net *net, u8 nfproto) 1017 + { 1018 + int err; 1019 + 1020 + if (nfproto == NFPROTO_INET) { 1021 + err = nf_ct_netns_do_get(net, NFPROTO_IPV4); 1022 + if (err < 0) 1023 + goto err1; 1024 + err = nf_ct_netns_do_get(net, NFPROTO_IPV6); 1025 + if (err < 0) 1026 + goto err2; 1027 + } else { 1028 + err = nf_ct_netns_do_get(net, nfproto); 1029 + if (err < 0) 1030 + goto err1; 1031 + } 1032 + return 0; 1033 + 1034 + err2: 1035 + nf_ct_netns_put(net, NFPROTO_IPV4); 1036 + err1: 1037 + return err; 1038 + } 1039 + EXPORT_SYMBOL_GPL(nf_ct_netns_get); 1040 + 1041 + void nf_ct_netns_put(struct net *net, uint8_t nfproto) 1042 + { 1043 + if (nfproto == NFPROTO_INET) { 1044 + nf_ct_netns_do_put(net, NFPROTO_IPV4); 1045 + nf_ct_netns_do_put(net, NFPROTO_IPV6); 1046 + } else { 1047 + nf_ct_netns_do_put(net, nfproto); 1048 + } 1049 + } 1050 + EXPORT_SYMBOL_GPL(nf_ct_netns_put); 1051 + 1052 + static const struct nf_conntrack_l4proto * const builtin_l4proto[] = { 1053 + &nf_conntrack_l4proto_tcp4, 1054 + &nf_conntrack_l4proto_udp4, 1055 + &nf_conntrack_l4proto_icmp, 1056 + #ifdef CONFIG_NF_CT_PROTO_DCCP 1057 + &nf_conntrack_l4proto_dccp4, 1058 + #endif 1059 + #ifdef CONFIG_NF_CT_PROTO_SCTP 1060 + &nf_conntrack_l4proto_sctp4, 1061 + #endif 1062 + #ifdef CONFIG_NF_CT_PROTO_UDPLITE 1063 + &nf_conntrack_l4proto_udplite4, 1064 + #endif 1065 + #if IS_ENABLED(CONFIG_IPV6) 1066 + &nf_conntrack_l4proto_tcp6, 1067 + &nf_conntrack_l4proto_udp6, 1068 + &nf_conntrack_l4proto_icmpv6, 1069 + #ifdef CONFIG_NF_CT_PROTO_DCCP 1070 + &nf_conntrack_l4proto_dccp6, 1071 + #endif 1072 + #ifdef CONFIG_NF_CT_PROTO_SCTP 1073 + &nf_conntrack_l4proto_sctp6, 1074 + #endif 1075 + #ifdef CONFIG_NF_CT_PROTO_UDPLITE 1076 + &nf_conntrack_l4proto_udplite6, 1077 + #endif 1078 + #endif /* CONFIG_IPV6 */ 1079 + }; 1080 + 1081 + int nf_conntrack_proto_init(void) 1082 + { 1083 + int ret = 0; 1084 + 1085 + ret = nf_register_sockopt(&so_getorigdst); 1086 + if (ret < 0) 1087 + return ret; 1088 + 1089 + #if IS_ENABLED(CONFIG_IPV6) 1090 + ret = nf_register_sockopt(&so_getorigdst6); 1091 + if (ret < 0) 1092 + goto cleanup_sockopt; 1093 + #endif 1094 + ret = nf_ct_l4proto_register(builtin_l4proto, 1095 + ARRAY_SIZE(builtin_l4proto)); 1096 + if (ret < 0) 1097 + goto cleanup_sockopt2; 1098 + 1099 + return ret; 1100 + cleanup_sockopt2: 1101 + nf_unregister_sockopt(&so_getorigdst); 1102 + #if IS_ENABLED(CONFIG_IPV6) 1103 + cleanup_sockopt: 1104 + nf_unregister_sockopt(&so_getorigdst6); 1105 + #endif 1106 + return ret; 1107 + } 1108 + 1109 + void nf_conntrack_proto_fini(void) 1110 + { 1111 + unsigned int i; 1112 + 1113 + nf_ct_l4proto_unregister(builtin_l4proto, 1114 + ARRAY_SIZE(builtin_l4proto)); 1115 + nf_unregister_sockopt(&so_getorigdst); 1116 + #if IS_ENABLED(CONFIG_IPV6) 1117 + nf_unregister_sockopt(&so_getorigdst6); 1118 + #endif 1119 + 1120 + /* free l3proto protocol tables */ 1121 + for (i = 0; i < ARRAY_SIZE(nf_ct_protos); i++) 1122 + kfree(nf_ct_protos[i]); 1123 + } 397 1124 398 1125 int nf_conntrack_proto_pernet_init(struct net *net) 399 1126 { ··· 968 581 if (err < 0) 969 582 return err; 970 583 584 + err = nf_ct_l4proto_pernet_register(net, builtin_l4proto, 585 + ARRAY_SIZE(builtin_l4proto)); 586 + if (err < 0) { 587 + nf_ct_l4proto_unregister_sysctl(net, pn, 588 + &nf_conntrack_l4proto_generic); 589 + return err; 590 + } 591 + 971 592 pn->users++; 972 593 return 0; 973 594 } ··· 985 590 struct nf_proto_net *pn = nf_ct_l4proto_net(net, 986 591 &nf_conntrack_l4proto_generic); 987 592 593 + nf_ct_l4proto_pernet_unregister(net, builtin_l4proto, 594 + ARRAY_SIZE(builtin_l4proto)); 988 595 pn->users--; 989 596 nf_ct_l4proto_unregister_sysctl(net, 990 597 pn, 991 598 &nf_conntrack_l4proto_generic); 992 599 } 993 600 994 - int nf_conntrack_proto_init(void) 995 - { 996 - unsigned int i; 997 - for (i = 0; i < NFPROTO_NUMPROTO; i++) 998 - rcu_assign_pointer(nf_ct_l3protos[i], 999 - &nf_conntrack_l3proto_generic); 1000 - return 0; 1001 - } 1002 601 1003 - void nf_conntrack_proto_fini(void) 1004 - { 1005 - unsigned int i; 1006 - /* free l3proto protocol tables */ 1007 - for (i = 0; i < ARRAY_SIZE(nf_ct_protos); i++) 1008 - kfree(nf_ct_protos[i]); 1009 - } 602 + module_param_call(hashsize, nf_conntrack_set_hashsize, param_get_uint, 603 + &nf_conntrack_htable_size, 0600); 604 + 605 + MODULE_ALIAS("ip_conntrack"); 606 + MODULE_ALIAS("nf_conntrack-" __stringify(AF_INET)); 607 + MODULE_ALIAS("nf_conntrack-" __stringify(AF_INET6)); 608 + MODULE_LICENSE("GPL");

+7 -37

net/netfilter/nf_conntrack_proto_dccp.c

··· 23 23 #include <net/netfilter/nf_conntrack.h> 24 24 #include <net/netfilter/nf_conntrack_l4proto.h> 25 25 #include <net/netfilter/nf_conntrack_ecache.h> 26 + #include <net/netfilter/nf_conntrack_timeout.h> 26 27 #include <net/netfilter/nf_log.h> 27 28 28 29 /* Timeouts are based on values from RFC4340: ··· 389 388 return &net->ct.nf_ct_proto.dccp; 390 389 } 391 390 392 - static bool dccp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, 393 - struct net *net, struct nf_conntrack_tuple *tuple) 394 - { 395 - struct dccp_hdr _hdr, *dh; 396 - 397 - /* Actually only need first 4 bytes to get ports. */ 398 - dh = skb_header_pointer(skb, dataoff, 4, &_hdr); 399 - if (dh == NULL) 400 - return false; 401 - 402 - tuple->src.u.dccp.port = dh->dccph_sport; 403 - tuple->dst.u.dccp.port = dh->dccph_dport; 404 - return true; 405 - } 406 - 407 - static bool dccp_invert_tuple(struct nf_conntrack_tuple *inv, 408 - const struct nf_conntrack_tuple *tuple) 409 - { 410 - inv->src.u.dccp.port = tuple->dst.u.dccp.port; 411 - inv->dst.u.dccp.port = tuple->src.u.dccp.port; 412 - return true; 413 - } 414 - 415 391 static bool dccp_new(struct nf_conn *ct, const struct sk_buff *skb, 416 - unsigned int dataoff, unsigned int *timeouts) 392 + unsigned int dataoff) 417 393 { 418 394 struct net *net = nf_ct_net(ct); 419 395 struct nf_dccp_net *dn; ··· 438 460 ntohl(dhack->dccph_ack_nr_low); 439 461 } 440 462 441 - static unsigned int *dccp_get_timeouts(struct net *net) 442 - { 443 - return dccp_pernet(net)->dccp_timeout; 444 - } 445 - 446 463 static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb, 447 - unsigned int dataoff, enum ip_conntrack_info ctinfo, 448 - unsigned int *timeouts) 464 + unsigned int dataoff, enum ip_conntrack_info ctinfo) 449 465 { 450 466 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); 451 467 struct dccp_hdr _dh, *dh; 452 468 u_int8_t type, old_state, new_state; 453 469 enum ct_dccp_roles role; 470 + unsigned int *timeouts; 454 471 455 472 dh = skb_header_pointer(skb, dataoff, sizeof(_dh), &_dh); 456 473 BUG_ON(dh == NULL); ··· 519 546 if (new_state != old_state) 520 547 nf_conntrack_event_cache(IPCT_PROTOINFO, ct); 521 548 549 + timeouts = nf_ct_timeout_lookup(ct); 550 + if (!timeouts) 551 + timeouts = dccp_pernet(nf_ct_net(ct))->dccp_timeout; 522 552 nf_ct_refresh_acct(ct, ctinfo, skb, timeouts[new_state]); 523 553 524 554 return NF_ACCEPT; ··· 840 864 const struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp4 = { 841 865 .l3proto = AF_INET, 842 866 .l4proto = IPPROTO_DCCP, 843 - .pkt_to_tuple = dccp_pkt_to_tuple, 844 - .invert_tuple = dccp_invert_tuple, 845 867 .new = dccp_new, 846 868 .packet = dccp_packet, 847 - .get_timeouts = dccp_get_timeouts, 848 869 .error = dccp_error, 849 870 .can_early_drop = dccp_can_early_drop, 850 871 #ifdef CONFIG_NF_CONNTRACK_PROCFS ··· 873 900 const struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp6 = { 874 901 .l3proto = AF_INET6, 875 902 .l4proto = IPPROTO_DCCP, 876 - .pkt_to_tuple = dccp_pkt_to_tuple, 877 - .invert_tuple = dccp_invert_tuple, 878 903 .new = dccp_new, 879 904 .packet = dccp_packet, 880 - .get_timeouts = dccp_get_timeouts, 881 905 .error = dccp_error, 882 906 .can_early_drop = dccp_can_early_drop, 883 907 #ifdef CONFIG_NF_CONNTRACK_PROCFS

+12 -20

net/netfilter/nf_conntrack_proto_generic.c

··· 11 11 #include <linux/timer.h> 12 12 #include <linux/netfilter.h> 13 13 #include <net/netfilter/nf_conntrack_l4proto.h> 14 + #include <net/netfilter/nf_conntrack_timeout.h> 14 15 15 16 static const unsigned int nf_ct_generic_timeout = 600*HZ; 16 17 ··· 42 41 return true; 43 42 } 44 43 45 - static bool generic_invert_tuple(struct nf_conntrack_tuple *tuple, 46 - const struct nf_conntrack_tuple *orig) 47 - { 48 - tuple->src.u.all = 0; 49 - tuple->dst.u.all = 0; 50 - 51 - return true; 52 - } 53 - 54 - static unsigned int *generic_get_timeouts(struct net *net) 55 - { 56 - return &(generic_pernet(net)->timeout); 57 - } 58 - 59 44 /* Returns verdict for packet, or -1 for invalid. */ 60 45 static int generic_packet(struct nf_conn *ct, 61 46 const struct sk_buff *skb, 62 47 unsigned int dataoff, 63 - enum ip_conntrack_info ctinfo, 64 - unsigned int *timeout) 48 + enum ip_conntrack_info ctinfo) 65 49 { 50 + const unsigned int *timeout = nf_ct_timeout_lookup(ct); 51 + 52 + if (!timeout) 53 + timeout = &generic_pernet(nf_ct_net(ct))->timeout; 54 + 66 55 nf_ct_refresh_acct(ct, ctinfo, skb, *timeout); 67 56 return NF_ACCEPT; 68 57 } 69 58 70 59 /* Called when a new connection for this protocol found. */ 71 60 static bool generic_new(struct nf_conn *ct, const struct sk_buff *skb, 72 - unsigned int dataoff, unsigned int *timeouts) 61 + unsigned int dataoff) 73 62 { 74 63 bool ret; 75 64 ··· 78 87 static int generic_timeout_nlattr_to_obj(struct nlattr *tb[], 79 88 struct net *net, void *data) 80 89 { 81 - unsigned int *timeout = data; 82 90 struct nf_generic_net *gn = generic_pernet(net); 91 + unsigned int *timeout = data; 92 + 93 + if (!timeout) 94 + timeout = &gn->timeout; 83 95 84 96 if (tb[CTA_TIMEOUT_GENERIC_TIMEOUT]) 85 97 *timeout = ··· 162 168 .l3proto = PF_UNSPEC, 163 169 .l4proto = 255, 164 170 .pkt_to_tuple = generic_pkt_to_tuple, 165 - .invert_tuple = generic_invert_tuple, 166 171 .packet = generic_packet, 167 - .get_timeouts = generic_get_timeouts, 168 172 .new = generic_new, 169 173 #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) 170 174 .ctnl_timeout = {

+10 -14

net/netfilter/nf_conntrack_proto_gre.c

··· 39 39 #include <net/netfilter/nf_conntrack_l4proto.h> 40 40 #include <net/netfilter/nf_conntrack_helper.h> 41 41 #include <net/netfilter/nf_conntrack_core.h> 42 + #include <net/netfilter/nf_conntrack_timeout.h> 42 43 #include <linux/netfilter/nf_conntrack_proto_gre.h> 43 44 #include <linux/netfilter/nf_conntrack_pptp.h> 44 45 ··· 180 179 181 180 /* PUBLIC CONNTRACK PROTO HELPER FUNCTIONS */ 182 181 183 - /* invert gre part of tuple */ 184 - static bool gre_invert_tuple(struct nf_conntrack_tuple *tuple, 185 - const struct nf_conntrack_tuple *orig) 186 - { 187 - tuple->dst.u.gre.key = orig->src.u.gre.key; 188 - tuple->src.u.gre.key = orig->dst.u.gre.key; 189 - return true; 190 - } 191 - 192 182 /* gre hdr info to tuple */ 193 183 static bool gre_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, 194 184 struct net *net, struct nf_conntrack_tuple *tuple) ··· 235 243 static int gre_packet(struct nf_conn *ct, 236 244 const struct sk_buff *skb, 237 245 unsigned int dataoff, 238 - enum ip_conntrack_info ctinfo, 239 - unsigned int *timeouts) 246 + enum ip_conntrack_info ctinfo) 240 247 { 241 248 /* If we've seen traffic both ways, this is a GRE connection. 242 249 * Extend timeout. */ ··· 254 263 255 264 /* Called when a new connection for this protocol found. */ 256 265 static bool gre_new(struct nf_conn *ct, const struct sk_buff *skb, 257 - unsigned int dataoff, unsigned int *timeouts) 266 + unsigned int dataoff) 258 267 { 268 + unsigned int *timeouts = nf_ct_timeout_lookup(ct); 269 + 270 + if (!timeouts) 271 + timeouts = gre_get_timeouts(nf_ct_net(ct)); 272 + 259 273 pr_debug(": "); 260 274 nf_ct_dump_tuple(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); 261 275 ··· 296 300 unsigned int *timeouts = data; 297 301 struct netns_proto_gre *net_gre = gre_pernet(net); 298 302 303 + if (!timeouts) 304 + timeouts = gre_get_timeouts(net); 299 305 /* set default timeouts for GRE. */ 300 306 timeouts[GRE_CT_UNREPLIED] = net_gre->gre_timeouts[GRE_CT_UNREPLIED]; 301 307 timeouts[GRE_CT_REPLIED] = net_gre->gre_timeouts[GRE_CT_REPLIED]; ··· 354 356 .l3proto = AF_INET, 355 357 .l4proto = IPPROTO_GRE, 356 358 .pkt_to_tuple = gre_pkt_to_tuple, 357 - .invert_tuple = gre_invert_tuple, 358 359 #ifdef CONFIG_NF_CONNTRACK_PROCFS 359 360 .print_conntrack = gre_print_conntrack, 360 361 #endif 361 - .get_timeouts = gre_get_timeouts, 362 362 .packet = gre_packet, 363 363 .new = gre_new, 364 364 .destroy = gre_destroy,

+8 -38

net/netfilter/nf_conntrack_proto_sctp.c

··· 28 28 #include <net/netfilter/nf_conntrack.h> 29 29 #include <net/netfilter/nf_conntrack_l4proto.h> 30 30 #include <net/netfilter/nf_conntrack_ecache.h> 31 + #include <net/netfilter/nf_conntrack_timeout.h> 31 32 32 33 /* FIXME: Examine ipfilter's timeouts and conntrack transitions more 33 34 closely. They're more complex. --RR ··· 149 148 static inline struct nf_sctp_net *sctp_pernet(struct net *net) 150 149 { 151 150 return &net->ct.nf_ct_proto.sctp; 152 - } 153 - 154 - static bool sctp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, 155 - struct net *net, struct nf_conntrack_tuple *tuple) 156 - { 157 - const struct sctphdr *hp; 158 - struct sctphdr _hdr; 159 - 160 - /* Actually only need first 4 bytes to get ports. */ 161 - hp = skb_header_pointer(skb, dataoff, 4, &_hdr); 162 - if (hp == NULL) 163 - return false; 164 - 165 - tuple->src.u.sctp.port = hp->source; 166 - tuple->dst.u.sctp.port = hp->dest; 167 - return true; 168 - } 169 - 170 - static bool sctp_invert_tuple(struct nf_conntrack_tuple *tuple, 171 - const struct nf_conntrack_tuple *orig) 172 - { 173 - tuple->src.u.sctp.port = orig->dst.u.sctp.port; 174 - tuple->dst.u.sctp.port = orig->src.u.sctp.port; 175 - return true; 176 151 } 177 152 178 153 #ifdef CONFIG_NF_CONNTRACK_PROCFS ··· 273 296 return sctp_conntracks[dir][i][cur_state]; 274 297 } 275 298 276 - static unsigned int *sctp_get_timeouts(struct net *net) 277 - { 278 - return sctp_pernet(net)->timeouts; 279 - } 280 - 281 299 /* Returns verdict for packet, or -NF_ACCEPT for invalid. */ 282 300 static int sctp_packet(struct nf_conn *ct, 283 301 const struct sk_buff *skb, 284 302 unsigned int dataoff, 285 - enum ip_conntrack_info ctinfo, 286 - unsigned int *timeouts) 303 + enum ip_conntrack_info ctinfo) 287 304 { 288 305 enum sctp_conntrack new_state, old_state; 289 306 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); ··· 286 315 const struct sctp_chunkhdr *sch; 287 316 struct sctp_chunkhdr _sch; 288 317 u_int32_t offset, count; 318 + unsigned int *timeouts; 289 319 unsigned long map[256 / sizeof(unsigned long)] = { 0 }; 290 320 291 321 sh = skb_header_pointer(skb, dataoff, sizeof(_sctph), &_sctph); ··· 375 403 } 376 404 spin_unlock_bh(&ct->lock); 377 405 406 + timeouts = nf_ct_timeout_lookup(ct); 407 + if (!timeouts) 408 + timeouts = sctp_pernet(nf_ct_net(ct))->timeouts; 409 + 378 410 nf_ct_refresh_acct(ct, ctinfo, skb, timeouts[new_state]); 379 411 380 412 if (old_state == SCTP_CONNTRACK_COOKIE_ECHOED && ··· 399 423 400 424 /* Called when a new connection for this protocol found. */ 401 425 static bool sctp_new(struct nf_conn *ct, const struct sk_buff *skb, 402 - unsigned int dataoff, unsigned int *timeouts) 426 + unsigned int dataoff) 403 427 { 404 428 enum sctp_conntrack new_state; 405 429 const struct sctphdr *sh; ··· 756 780 const struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4 = { 757 781 .l3proto = PF_INET, 758 782 .l4proto = IPPROTO_SCTP, 759 - .pkt_to_tuple = sctp_pkt_to_tuple, 760 - .invert_tuple = sctp_invert_tuple, 761 783 #ifdef CONFIG_NF_CONNTRACK_PROCFS 762 784 .print_conntrack = sctp_print_conntrack, 763 785 #endif 764 786 .packet = sctp_packet, 765 - .get_timeouts = sctp_get_timeouts, 766 787 .new = sctp_new, 767 788 .error = sctp_error, 768 789 .can_early_drop = sctp_can_early_drop, ··· 790 817 const struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6 = { 791 818 .l3proto = PF_INET6, 792 819 .l4proto = IPPROTO_SCTP, 793 - .pkt_to_tuple = sctp_pkt_to_tuple, 794 - .invert_tuple = sctp_invert_tuple, 795 820 #ifdef CONFIG_NF_CONNTRACK_PROCFS 796 821 .print_conntrack = sctp_print_conntrack, 797 822 #endif 798 823 .packet = sctp_packet, 799 - .get_timeouts = sctp_get_timeouts, 800 824 .new = sctp_new, 801 825 .error = sctp_error, 802 826 .can_early_drop = sctp_can_early_drop,

+11 -41

net/netfilter/nf_conntrack_proto_tcp.c

··· 29 29 #include <net/netfilter/nf_conntrack_ecache.h> 30 30 #include <net/netfilter/nf_conntrack_seqadj.h> 31 31 #include <net/netfilter/nf_conntrack_synproxy.h> 32 + #include <net/netfilter/nf_conntrack_timeout.h> 32 33 #include <net/netfilter/nf_log.h> 33 34 #include <net/netfilter/ipv4/nf_conntrack_ipv4.h> 34 35 #include <net/netfilter/ipv6/nf_conntrack_ipv6.h> ··· 275 274 static inline struct nf_tcp_net *tcp_pernet(struct net *net) 276 275 { 277 276 return &net->ct.nf_ct_proto.tcp; 278 - } 279 - 280 - static bool tcp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, 281 - struct net *net, struct nf_conntrack_tuple *tuple) 282 - { 283 - const struct tcphdr *hp; 284 - struct tcphdr _hdr; 285 - 286 - /* Actually only need first 4 bytes to get ports. */ 287 - hp = skb_header_pointer(skb, dataoff, 4, &_hdr); 288 - if (hp == NULL) 289 - return false; 290 - 291 - tuple->src.u.tcp.port = hp->source; 292 - tuple->dst.u.tcp.port = hp->dest; 293 - 294 - return true; 295 - } 296 - 297 - static bool tcp_invert_tuple(struct nf_conntrack_tuple *tuple, 298 - const struct nf_conntrack_tuple *orig) 299 - { 300 - tuple->src.u.tcp.port = orig->dst.u.tcp.port; 301 - tuple->dst.u.tcp.port = orig->src.u.tcp.port; 302 - return true; 303 277 } 304 278 305 279 #ifdef CONFIG_NF_CONNTRACK_PROCFS ··· 769 793 return NF_ACCEPT; 770 794 } 771 795 772 - static unsigned int *tcp_get_timeouts(struct net *net) 773 - { 774 - return tcp_pernet(net)->timeouts; 775 - } 776 - 777 796 /* Returns verdict for packet, or -1 for invalid. */ 778 797 static int tcp_packet(struct nf_conn *ct, 779 798 const struct sk_buff *skb, 780 799 unsigned int dataoff, 781 - enum ip_conntrack_info ctinfo, 782 - unsigned int *timeouts) 800 + enum ip_conntrack_info ctinfo) 783 801 { 784 802 struct net *net = nf_ct_net(ct); 785 803 struct nf_tcp_net *tn = tcp_pernet(net); 786 804 struct nf_conntrack_tuple *tuple; 787 805 enum tcp_conntrack new_state, old_state; 806 + unsigned int index, *timeouts; 788 807 enum ip_conntrack_dir dir; 789 808 const struct tcphdr *th; 790 809 struct tcphdr _tcph; 791 810 unsigned long timeout; 792 - unsigned int index; 793 811 794 812 th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph); 795 813 BUG_ON(th == NULL); ··· 1016 1046 && new_state == TCP_CONNTRACK_FIN_WAIT) 1017 1047 ct->proto.tcp.seen[dir].flags |= IP_CT_TCP_FLAG_CLOSE_INIT; 1018 1048 1049 + timeouts = nf_ct_timeout_lookup(ct); 1050 + if (!timeouts) 1051 + timeouts = tn->timeouts; 1052 + 1019 1053 if (ct->proto.tcp.retrans >= tn->tcp_max_retrans && 1020 1054 timeouts[new_state] > timeouts[TCP_CONNTRACK_RETRANS]) 1021 1055 timeout = timeouts[TCP_CONNTRACK_RETRANS]; ··· 1069 1095 1070 1096 /* Called when a new connection for this protocol found. */ 1071 1097 static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb, 1072 - unsigned int dataoff, unsigned int *timeouts) 1098 + unsigned int dataoff) 1073 1099 { 1074 1100 enum tcp_conntrack new_state; 1075 1101 const struct tcphdr *th; ··· 1287 1313 static int tcp_timeout_nlattr_to_obj(struct nlattr *tb[], 1288 1314 struct net *net, void *data) 1289 1315 { 1290 - unsigned int *timeouts = data; 1291 1316 struct nf_tcp_net *tn = tcp_pernet(net); 1317 + unsigned int *timeouts = data; 1292 1318 int i; 1293 1319 1320 + if (!timeouts) 1321 + timeouts = tn->timeouts; 1294 1322 /* set default TCP timeouts. */ 1295 1323 for (i=0; i<TCP_CONNTRACK_TIMEOUT_MAX; i++) 1296 1324 timeouts[i] = tn->timeouts[i]; ··· 1535 1559 { 1536 1560 .l3proto = PF_INET, 1537 1561 .l4proto = IPPROTO_TCP, 1538 - .pkt_to_tuple = tcp_pkt_to_tuple, 1539 - .invert_tuple = tcp_invert_tuple, 1540 1562 #ifdef CONFIG_NF_CONNTRACK_PROCFS 1541 1563 .print_conntrack = tcp_print_conntrack, 1542 1564 #endif 1543 1565 .packet = tcp_packet, 1544 - .get_timeouts = tcp_get_timeouts, 1545 1566 .new = tcp_new, 1546 1567 .error = tcp_error, 1547 1568 .can_early_drop = tcp_can_early_drop, ··· 1570 1597 { 1571 1598 .l3proto = PF_INET6, 1572 1599 .l4proto = IPPROTO_TCP, 1573 - .pkt_to_tuple = tcp_pkt_to_tuple, 1574 - .invert_tuple = tcp_invert_tuple, 1575 1600 #ifdef CONFIG_NF_CONNTRACK_PROCFS 1576 1601 .print_conntrack = tcp_print_conntrack, 1577 1602 #endif 1578 1603 .packet = tcp_packet, 1579 - .get_timeouts = tcp_get_timeouts, 1580 1604 .new = tcp_new, 1581 1605 .error = tcp_error, 1582 1606 .can_early_drop = tcp_can_early_drop,

+13 -42

net/netfilter/nf_conntrack_proto_udp.c

··· 22 22 #include <linux/netfilter_ipv6.h> 23 23 #include <net/netfilter/nf_conntrack_l4proto.h> 24 24 #include <net/netfilter/nf_conntrack_ecache.h> 25 + #include <net/netfilter/nf_conntrack_timeout.h> 25 26 #include <net/netfilter/nf_log.h> 26 27 #include <net/netfilter/ipv4/nf_conntrack_ipv4.h> 27 28 #include <net/netfilter/ipv6/nf_conntrack_ipv6.h> ··· 37 36 return &net->ct.nf_ct_proto.udp; 38 37 } 39 38 40 - static bool udp_pkt_to_tuple(const struct sk_buff *skb, 41 - unsigned int dataoff, 42 - struct net *net, 43 - struct nf_conntrack_tuple *tuple) 44 - { 45 - const struct udphdr *hp; 46 - struct udphdr _hdr; 47 - 48 - /* Actually only need first 4 bytes to get ports. */ 49 - hp = skb_header_pointer(skb, dataoff, 4, &_hdr); 50 - if (hp == NULL) 51 - return false; 52 - 53 - tuple->src.u.udp.port = hp->source; 54 - tuple->dst.u.udp.port = hp->dest; 55 - 56 - return true; 57 - } 58 - 59 - static bool udp_invert_tuple(struct nf_conntrack_tuple *tuple, 60 - const struct nf_conntrack_tuple *orig) 61 - { 62 - tuple->src.u.udp.port = orig->dst.u.udp.port; 63 - tuple->dst.u.udp.port = orig->src.u.udp.port; 64 - return true; 65 - } 66 - 67 39 static unsigned int *udp_get_timeouts(struct net *net) 68 40 { 69 41 return udp_pernet(net)->timeouts; ··· 46 72 static int udp_packet(struct nf_conn *ct, 47 73 const struct sk_buff *skb, 48 74 unsigned int dataoff, 49 - enum ip_conntrack_info ctinfo, 50 - unsigned int *timeouts) 75 + enum ip_conntrack_info ctinfo) 51 76 { 77 + unsigned int *timeouts; 78 + 79 + timeouts = nf_ct_timeout_lookup(ct); 80 + if (!timeouts) 81 + timeouts = udp_get_timeouts(nf_ct_net(ct)); 82 + 52 83 /* If we've seen traffic both ways, this is some kind of UDP 53 84 stream. Extend timeout. */ 54 85 if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) { ··· 71 92 72 93 /* Called when a new connection for this protocol found. */ 73 94 static bool udp_new(struct nf_conn *ct, const struct sk_buff *skb, 74 - unsigned int dataoff, unsigned int *timeouts) 95 + unsigned int dataoff) 75 96 { 76 97 return true; 77 98 } ··· 182 203 unsigned int *timeouts = data; 183 204 struct nf_udp_net *un = udp_pernet(net); 184 205 206 + if (!timeouts) 207 + timeouts = un->timeouts; 208 + 185 209 /* set default timeouts for UDP. */ 186 210 timeouts[UDP_CT_UNREPLIED] = un->timeouts[UDP_CT_UNREPLIED]; 187 211 timeouts[UDP_CT_REPLIED] = un->timeouts[UDP_CT_REPLIED]; ··· 283 301 .l3proto = PF_INET, 284 302 .l4proto = IPPROTO_UDP, 285 303 .allow_clash = true, 286 - .pkt_to_tuple = udp_pkt_to_tuple, 287 - .invert_tuple = udp_invert_tuple, 288 304 .packet = udp_packet, 289 - .get_timeouts = udp_get_timeouts, 290 305 .new = udp_new, 291 306 .error = udp_error, 292 307 #if IS_ENABLED(CONFIG_NF_CT_NETLINK) ··· 312 333 .l3proto = PF_INET, 313 334 .l4proto = IPPROTO_UDPLITE, 314 335 .allow_clash = true, 315 - .pkt_to_tuple = udp_pkt_to_tuple, 316 - .invert_tuple = udp_invert_tuple, 317 336 .packet = udp_packet, 318 - .get_timeouts = udp_get_timeouts, 319 337 .new = udp_new, 320 338 .error = udplite_error, 321 339 #if IS_ENABLED(CONFIG_NF_CT_NETLINK) ··· 341 365 .l3proto = PF_INET6, 342 366 .l4proto = IPPROTO_UDP, 343 367 .allow_clash = true, 344 - .pkt_to_tuple = udp_pkt_to_tuple, 345 - .invert_tuple = udp_invert_tuple, 346 368 .packet = udp_packet, 347 - .get_timeouts = udp_get_timeouts, 348 369 .new = udp_new, 349 370 .error = udp_error, 350 371 #if IS_ENABLED(CONFIG_NF_CT_NETLINK) ··· 370 397 .l3proto = PF_INET6, 371 398 .l4proto = IPPROTO_UDPLITE, 372 399 .allow_clash = true, 373 - .pkt_to_tuple = udp_pkt_to_tuple, 374 - .invert_tuple = udp_invert_tuple, 375 400 .packet = udp_packet, 376 - .get_timeouts = udp_get_timeouts, 377 401 .new = udp_new, 378 402 .error = udplite_error, 379 403 #if IS_ENABLED(CONFIG_NF_CT_NETLINK) ··· 393 423 }; 394 424 EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_udplite6); 395 425 #endif 426 + #include <net/netfilter/nf_conntrack_timeout.h>

+8 -20

net/netfilter/nf_conntrack_standalone.c

··· 1 - /* (C) 1999-2001 Paul `Rusty' Russell 2 - * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> 3 - * (C) 2005-2012 Patrick McHardy <kaber@trash.net> 4 - * 5 - * This program is free software; you can redistribute it and/or modify 6 - * it under the terms of the GNU General Public License version 2 as 7 - * published by the Free Software Foundation. 8 - */ 9 - 1 + // SPDX-License-Identifier: GPL-2.0 10 2 #include <linux/types.h> 11 3 #include <linux/netfilter.h> 12 4 #include <linux/slab.h> ··· 16 24 17 25 #include <net/netfilter/nf_conntrack.h> 18 26 #include <net/netfilter/nf_conntrack_core.h> 19 - #include <net/netfilter/nf_conntrack_l3proto.h> 20 27 #include <net/netfilter/nf_conntrack_l4proto.h> 21 28 #include <net/netfilter/nf_conntrack_expect.h> 22 29 #include <net/netfilter/nf_conntrack_helper.h> ··· 24 33 #include <net/netfilter/nf_conntrack_timestamp.h> 25 34 #include <linux/rculist_nulls.h> 26 35 27 - MODULE_LICENSE("GPL"); 36 + unsigned int nf_conntrack_net_id __read_mostly; 28 37 29 38 #ifdef CONFIG_NF_CONNTRACK_PROCFS 30 39 void 31 40 print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple, 32 - const struct nf_conntrack_l3proto *l3proto, 33 41 const struct nf_conntrack_l4proto *l4proto) 34 42 { 35 - switch (l3proto->l3proto) { 43 + switch (tuple->src.l3num) { 36 44 case NFPROTO_IPV4: 37 45 seq_printf(s, "src=%pI4 dst=%pI4 ", 38 46 &tuple->src.u3.ip, &tuple->dst.u3.ip); ··· 272 282 { 273 283 struct nf_conntrack_tuple_hash *hash = v; 274 284 struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(hash); 275 - const struct nf_conntrack_l3proto *l3proto; 276 285 const struct nf_conntrack_l4proto *l4proto; 277 286 struct net *net = seq_file_net(s); 278 287 int ret = 0; ··· 292 303 if (!net_eq(nf_ct_net(ct), net)) 293 304 goto release; 294 305 295 - l3proto = __nf_ct_l3proto_find(nf_ct_l3num(ct)); 296 - WARN_ON(!l3proto); 297 306 l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct)); 298 307 WARN_ON(!l4proto); 299 308 300 309 ret = -ENOSPC; 301 310 seq_printf(s, "%-8s %u %-8s %u ", 302 - l3proto_name(l3proto->l3proto), nf_ct_l3num(ct), 311 + l3proto_name(nf_ct_l3num(ct)), nf_ct_l3num(ct), 303 312 l4proto_name(l4proto->l4proto), nf_ct_protonum(ct)); 304 313 305 314 if (!test_bit(IPS_OFFLOAD_BIT, &ct->status)) ··· 307 320 l4proto->print_conntrack(s, ct); 308 321 309 322 print_tuple(s, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, 310 - l3proto, l4proto); 323 + l4proto); 311 324 312 325 ct_show_zone(s, ct, NF_CT_ZONE_DIR_ORIG); 313 326 ··· 320 333 if (!(test_bit(IPS_SEEN_REPLY_BIT, &ct->status))) 321 334 seq_puts(s, "[UNREPLIED] "); 322 335 323 - print_tuple(s, &ct->tuplehash[IP_CT_DIR_REPLY].tuple, 324 - l3proto, l4proto); 336 + print_tuple(s, &ct->tuplehash[IP_CT_DIR_REPLY].tuple, l4proto); 325 337 326 338 ct_show_zone(s, ct, NF_CT_ZONE_DIR_REPL); 327 339 ··· 666 680 static struct pernet_operations nf_conntrack_net_ops = { 667 681 .init = nf_conntrack_pernet_init, 668 682 .exit_batch = nf_conntrack_pernet_exit, 683 + .id = &nf_conntrack_net_id, 684 + .size = sizeof(struct nf_conntrack_net), 669 685 }; 670 686 671 687 static int __init nf_conntrack_standalone_init(void)

+5 -8

net/netfilter/nf_flow_table_core.c

··· 107 107 tcp->seen[1].td_maxwin = 0; 108 108 } 109 109 110 + #define NF_FLOWTABLE_TCP_PICKUP_TIMEOUT (120 * HZ) 111 + #define NF_FLOWTABLE_UDP_PICKUP_TIMEOUT (30 * HZ) 112 + 110 113 static void flow_offload_fixup_ct_state(struct nf_conn *ct) 111 114 { 112 115 const struct nf_conntrack_l4proto *l4proto; 113 - struct net *net = nf_ct_net(ct); 114 - unsigned int *timeouts; 115 116 unsigned int timeout; 116 117 int l4num; 117 118 ··· 124 123 if (!l4proto) 125 124 return; 126 125 127 - timeouts = l4proto->get_timeouts(net); 128 - if (!timeouts) 129 - return; 130 - 131 126 if (l4num == IPPROTO_TCP) 132 - timeout = timeouts[TCP_CONNTRACK_ESTABLISHED]; 127 + timeout = NF_FLOWTABLE_TCP_PICKUP_TIMEOUT; 133 128 else if (l4num == IPPROTO_UDP) 134 - timeout = timeouts[UDP_CT_REPLIED]; 129 + timeout = NF_FLOWTABLE_UDP_PICKUP_TIMEOUT; 135 130 else 136 131 return; 137 132

-8

net/netfilter/nf_nat_core.c

··· 28 28 #include <net/netfilter/nf_nat_helper.h> 29 29 #include <net/netfilter/nf_conntrack_helper.h> 30 30 #include <net/netfilter/nf_conntrack_seqadj.h> 31 - #include <net/netfilter/nf_conntrack_l3proto.h> 32 31 #include <net/netfilter/nf_conntrack_zones.h> 33 32 #include <linux/netfilter/nf_nat.h> 34 33 ··· 742 743 743 744 int nf_nat_l3proto_register(const struct nf_nat_l3proto *l3proto) 744 745 { 745 - int err; 746 - 747 - err = nf_ct_l3proto_try_module_get(l3proto->l3proto); 748 - if (err < 0) 749 - return err; 750 - 751 746 mutex_lock(&nf_nat_proto_mutex); 752 747 RCU_INIT_POINTER(nf_nat_l4protos[l3proto->l3proto][IPPROTO_TCP], 753 748 &nf_nat_l4proto_tcp); ··· 774 781 synchronize_rcu(); 775 782 776 783 nf_nat_l3proto_clean(l3proto->l3proto); 777 - nf_ct_l3proto_module_put(l3proto->l3proto); 778 784 } 779 785 EXPORT_SYMBOL_GPL(nf_nat_l3proto_unregister); 780 786

+148 -114

net/netfilter/nf_osf.c

··· 21 21 #include <linux/netfilter/nf_osf.h> 22 22 23 23 static inline int nf_osf_ttl(const struct sk_buff *skb, 24 - const struct nf_osf_info *info, 25 - unsigned char f_ttl) 24 + int ttl_check, unsigned char f_ttl) 26 25 { 27 26 const struct iphdr *ip = ip_hdr(skb); 28 27 29 - if (info->flags & NF_OSF_TTL) { 30 - if (info->ttl == NF_OSF_TTL_TRUE) 28 + if (ttl_check != -1) { 29 + if (ttl_check == NF_OSF_TTL_TRUE) 31 30 return ip->ttl == f_ttl; 32 - if (info->ttl == NF_OSF_TTL_NOCHECK) 31 + if (ttl_check == NF_OSF_TTL_NOCHECK) 33 32 return 1; 34 33 else if (ip->ttl <= f_ttl) 35 34 return 1; ··· 51 52 return ip->ttl == f_ttl; 52 53 } 53 54 55 + struct nf_osf_hdr_ctx { 56 + bool df; 57 + u16 window; 58 + u16 totlen; 59 + const unsigned char *optp; 60 + unsigned int optsize; 61 + }; 62 + 63 + static bool nf_osf_match_one(const struct sk_buff *skb, 64 + const struct nf_osf_user_finger *f, 65 + int ttl_check, 66 + struct nf_osf_hdr_ctx *ctx) 67 + { 68 + unsigned int check_WSS = 0; 69 + int fmatch = FMATCH_WRONG; 70 + int foptsize, optnum; 71 + u16 mss = 0; 72 + 73 + if (ctx->totlen != f->ss || !nf_osf_ttl(skb, ttl_check, f->ttl)) 74 + return false; 75 + 76 + /* 77 + * Should not happen if userspace parser was written correctly. 78 + */ 79 + if (f->wss.wc >= OSF_WSS_MAX) 80 + return false; 81 + 82 + /* Check options */ 83 + 84 + foptsize = 0; 85 + for (optnum = 0; optnum < f->opt_num; ++optnum) 86 + foptsize += f->opt[optnum].length; 87 + 88 + if (foptsize > MAX_IPOPTLEN || 89 + ctx->optsize > MAX_IPOPTLEN || 90 + ctx->optsize != foptsize) 91 + return false; 92 + 93 + check_WSS = f->wss.wc; 94 + 95 + for (optnum = 0; optnum < f->opt_num; ++optnum) { 96 + if (f->opt[optnum].kind == *ctx->optp) { 97 + __u32 len = f->opt[optnum].length; 98 + const __u8 *optend = ctx->optp + len; 99 + 100 + fmatch = FMATCH_OK; 101 + 102 + switch (*ctx->optp) { 103 + case OSFOPT_MSS: 104 + mss = ctx->optp[3]; 105 + mss <<= 8; 106 + mss |= ctx->optp[2]; 107 + 108 + mss = ntohs((__force __be16)mss); 109 + break; 110 + case OSFOPT_TS: 111 + break; 112 + } 113 + 114 + ctx->optp = optend; 115 + } else 116 + fmatch = FMATCH_OPT_WRONG; 117 + 118 + if (fmatch != FMATCH_OK) 119 + break; 120 + } 121 + 122 + if (fmatch != FMATCH_OPT_WRONG) { 123 + fmatch = FMATCH_WRONG; 124 + 125 + switch (check_WSS) { 126 + case OSF_WSS_PLAIN: 127 + if (f->wss.val == 0 || ctx->window == f->wss.val) 128 + fmatch = FMATCH_OK; 129 + break; 130 + case OSF_WSS_MSS: 131 + /* 132 + * Some smart modems decrease mangle MSS to 133 + * SMART_MSS_2, so we check standard, decreased 134 + * and the one provided in the fingerprint MSS 135 + * values. 136 + */ 137 + #define SMART_MSS_1 1460 138 + #define SMART_MSS_2 1448 139 + if (ctx->window == f->wss.val * mss || 140 + ctx->window == f->wss.val * SMART_MSS_1 || 141 + ctx->window == f->wss.val * SMART_MSS_2) 142 + fmatch = FMATCH_OK; 143 + break; 144 + case OSF_WSS_MTU: 145 + if (ctx->window == f->wss.val * (mss + 40) || 146 + ctx->window == f->wss.val * (SMART_MSS_1 + 40) || 147 + ctx->window == f->wss.val * (SMART_MSS_2 + 40)) 148 + fmatch = FMATCH_OK; 149 + break; 150 + case OSF_WSS_MODULO: 151 + if ((ctx->window % f->wss.val) == 0) 152 + fmatch = FMATCH_OK; 153 + break; 154 + } 155 + } 156 + 157 + return fmatch == FMATCH_OK; 158 + } 159 + 160 + static const struct tcphdr *nf_osf_hdr_ctx_init(struct nf_osf_hdr_ctx *ctx, 161 + const struct sk_buff *skb, 162 + const struct iphdr *ip, 163 + unsigned char *opts) 164 + { 165 + const struct tcphdr *tcp; 166 + struct tcphdr _tcph; 167 + 168 + tcp = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(struct tcphdr), &_tcph); 169 + if (!tcp) 170 + return NULL; 171 + 172 + if (!tcp->syn) 173 + return NULL; 174 + 175 + ctx->totlen = ntohs(ip->tot_len); 176 + ctx->df = ntohs(ip->frag_off) & IP_DF; 177 + ctx->window = ntohs(tcp->window); 178 + 179 + if (tcp->doff * 4 > sizeof(struct tcphdr)) { 180 + ctx->optsize = tcp->doff * 4 - sizeof(struct tcphdr); 181 + 182 + ctx->optp = skb_header_pointer(skb, ip_hdrlen(skb) + 183 + sizeof(struct tcphdr), ctx->optsize, opts); 184 + } 185 + 186 + return tcp; 187 + } 188 + 54 189 bool 55 190 nf_osf_match(const struct sk_buff *skb, u_int8_t family, 56 191 int hooknum, struct net_device *in, struct net_device *out, 57 192 const struct nf_osf_info *info, struct net *net, 58 193 const struct list_head *nf_osf_fingers) 59 194 { 60 - const unsigned char *optp = NULL, *_optp = NULL; 61 - unsigned int optsize = 0, check_WSS = 0; 62 - int fmatch = FMATCH_WRONG, fcount = 0; 63 195 const struct iphdr *ip = ip_hdr(skb); 64 196 const struct nf_osf_user_finger *f; 65 197 unsigned char opts[MAX_IPOPTLEN]; 66 198 const struct nf_osf_finger *kf; 67 - u16 window, totlen, mss = 0; 199 + int fcount = 0, ttl_check; 200 + int fmatch = FMATCH_WRONG; 201 + struct nf_osf_hdr_ctx ctx; 68 202 const struct tcphdr *tcp; 69 - struct tcphdr _tcph; 70 - bool df; 71 203 72 - tcp = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(struct tcphdr), &_tcph); 204 + memset(&ctx, 0, sizeof(ctx)); 205 + 206 + tcp = nf_osf_hdr_ctx_init(&ctx, skb, ip, opts); 73 207 if (!tcp) 74 208 return false; 75 209 76 - if (!tcp->syn) 77 - return false; 210 + ttl_check = (info->flags & NF_OSF_TTL) ? info->ttl : -1; 78 211 79 - totlen = ntohs(ip->tot_len); 80 - df = ntohs(ip->frag_off) & IP_DF; 81 - window = ntohs(tcp->window); 82 - 83 - if (tcp->doff * 4 > sizeof(struct tcphdr)) { 84 - optsize = tcp->doff * 4 - sizeof(struct tcphdr); 85 - 86 - _optp = optp = skb_header_pointer(skb, ip_hdrlen(skb) + 87 - sizeof(struct tcphdr), optsize, opts); 88 - } 89 - 90 - list_for_each_entry_rcu(kf, &nf_osf_fingers[df], finger_entry) { 91 - int foptsize, optnum; 212 + list_for_each_entry_rcu(kf, &nf_osf_fingers[ctx.df], finger_entry) { 92 213 93 214 f = &kf->finger; 94 215 95 216 if (!(info->flags & NF_OSF_LOG) && strcmp(info->genre, f->genre)) 96 217 continue; 97 218 98 - optp = _optp; 99 - fmatch = FMATCH_WRONG; 100 - 101 - if (totlen != f->ss || !nf_osf_ttl(skb, info, f->ttl)) 219 + if (!nf_osf_match_one(skb, f, ttl_check, &ctx)) 102 220 continue; 103 221 104 - /* 105 - * Should not happen if userspace parser was written correctly. 106 - */ 107 - if (f->wss.wc >= OSF_WSS_MAX) 108 - continue; 109 - 110 - /* Check options */ 111 - 112 - foptsize = 0; 113 - for (optnum = 0; optnum < f->opt_num; ++optnum) 114 - foptsize += f->opt[optnum].length; 115 - 116 - if (foptsize > MAX_IPOPTLEN || 117 - optsize > MAX_IPOPTLEN || 118 - optsize != foptsize) 119 - continue; 120 - 121 - check_WSS = f->wss.wc; 122 - 123 - for (optnum = 0; optnum < f->opt_num; ++optnum) { 124 - if (f->opt[optnum].kind == (*optp)) { 125 - __u32 len = f->opt[optnum].length; 126 - const __u8 *optend = optp + len; 127 - 128 - fmatch = FMATCH_OK; 129 - 130 - switch (*optp) { 131 - case OSFOPT_MSS: 132 - mss = optp[3]; 133 - mss <<= 8; 134 - mss |= optp[2]; 135 - 136 - mss = ntohs((__force __be16)mss); 137 - break; 138 - case OSFOPT_TS: 139 - break; 140 - } 141 - 142 - optp = optend; 143 - } else 144 - fmatch = FMATCH_OPT_WRONG; 145 - 146 - if (fmatch != FMATCH_OK) 147 - break; 148 - } 149 - 150 - if (fmatch != FMATCH_OPT_WRONG) { 151 - fmatch = FMATCH_WRONG; 152 - 153 - switch (check_WSS) { 154 - case OSF_WSS_PLAIN: 155 - if (f->wss.val == 0 || window == f->wss.val) 156 - fmatch = FMATCH_OK; 157 - break; 158 - case OSF_WSS_MSS: 159 - /* 160 - * Some smart modems decrease mangle MSS to 161 - * SMART_MSS_2, so we check standard, decreased 162 - * and the one provided in the fingerprint MSS 163 - * values. 164 - */ 165 - #define SMART_MSS_1 1460 166 - #define SMART_MSS_2 1448 167 - if (window == f->wss.val * mss || 168 - window == f->wss.val * SMART_MSS_1 || 169 - window == f->wss.val * SMART_MSS_2) 170 - fmatch = FMATCH_OK; 171 - break; 172 - case OSF_WSS_MTU: 173 - if (window == f->wss.val * (mss + 40) || 174 - window == f->wss.val * (SMART_MSS_1 + 40) || 175 - window == f->wss.val * (SMART_MSS_2 + 40)) 176 - fmatch = FMATCH_OK; 177 - break; 178 - case OSF_WSS_MODULO: 179 - if ((window % f->wss.val) == 0) 180 - fmatch = FMATCH_OK; 181 - break; 182 - } 183 - } 184 - 185 - if (fmatch != FMATCH_OK) 186 - continue; 222 + fmatch = FMATCH_OK; 187 223 188 224 fcount++; 189 225

+132 -62

net/netfilter/nf_tables_api.c

··· 455 455 return NULL; 456 456 } 457 457 458 + /* 459 + * Loading a module requires dropping mutex that guards the 460 + * transaction. 461 + * We first need to abort any pending transactions as once 462 + * mutex is unlocked a different client could start a new 463 + * transaction. It must not see any 'future generation' 464 + * changes * as these changes will never happen. 465 + */ 466 + #ifdef CONFIG_MODULES 467 + static int __nf_tables_abort(struct net *net); 468 + 469 + static void nft_request_module(struct net *net, const char *fmt, ...) 470 + { 471 + char module_name[MODULE_NAME_LEN]; 472 + va_list args; 473 + int ret; 474 + 475 + __nf_tables_abort(net); 476 + 477 + va_start(args, fmt); 478 + ret = vsnprintf(module_name, MODULE_NAME_LEN, fmt, args); 479 + va_end(args); 480 + if (WARN(ret >= MODULE_NAME_LEN, "truncated: '%s' (len %d)", module_name, ret)) 481 + return; 482 + 483 + mutex_unlock(&net->nft.commit_mutex); 484 + request_module("%s", module_name); 485 + mutex_lock(&net->nft.commit_mutex); 486 + } 487 + #endif 488 + 489 + static void lockdep_nfnl_nft_mutex_not_held(void) 490 + { 491 + #ifdef CONFIG_PROVE_LOCKING 492 + WARN_ON_ONCE(lockdep_nfnl_is_held(NFNL_SUBSYS_NFTABLES)); 493 + #endif 494 + } 495 + 458 496 static const struct nft_chain_type * 459 - nf_tables_chain_type_lookup(const struct nlattr *nla, u8 family, bool autoload) 497 + nf_tables_chain_type_lookup(struct net *net, const struct nlattr *nla, 498 + u8 family, bool autoload) 460 499 { 461 500 const struct nft_chain_type *type; 462 501 463 502 type = __nf_tables_chain_type_lookup(nla, family); 464 503 if (type != NULL) 465 504 return type; 505 + 506 + lockdep_nfnl_nft_mutex_not_held(); 466 507 #ifdef CONFIG_MODULES 467 508 if (autoload) { 468 - nfnl_unlock(NFNL_SUBSYS_NFTABLES); 469 - request_module("nft-chain-%u-%.*s", family, 470 - nla_len(nla), (const char *)nla_data(nla)); 471 - nfnl_lock(NFNL_SUBSYS_NFTABLES); 509 + nft_request_module(net, "nft-chain-%u-%.*s", family, 510 + nla_len(nla), (const char *)nla_data(nla)); 472 511 type = __nf_tables_chain_type_lookup(nla, family); 473 512 if (type != NULL) 474 513 return ERR_PTR(-EAGAIN); ··· 811 772 struct nft_ctx ctx; 812 773 int err; 813 774 775 + lockdep_assert_held(&net->nft.commit_mutex); 814 776 attr = nla[NFTA_TABLE_NAME]; 815 777 table = nft_table_lookup(net, attr, family, genmask); 816 778 if (IS_ERR(table)) { ··· 1052 1012 return ERR_PTR(-ENOENT); 1053 1013 } 1054 1014 1055 - static struct nft_chain *nft_chain_lookup(struct nft_table *table, 1015 + static bool lockdep_commit_lock_is_held(struct net *net) 1016 + { 1017 + #ifdef CONFIG_PROVE_LOCKING 1018 + return lockdep_is_held(&net->nft.commit_mutex); 1019 + #else 1020 + return true; 1021 + #endif 1022 + } 1023 + 1024 + static struct nft_chain *nft_chain_lookup(struct net *net, 1025 + struct nft_table *table, 1056 1026 const struct nlattr *nla, u8 genmask) 1057 1027 { 1058 1028 char search[NFT_CHAIN_MAXNAMELEN + 1]; ··· 1075 1025 nla_strlcpy(search, nla, sizeof(search)); 1076 1026 1077 1027 WARN_ON(!rcu_read_lock_held() && 1078 - !lockdep_nfnl_is_held(NFNL_SUBSYS_NFTABLES)); 1028 + !lockdep_commit_lock_is_held(net)); 1079 1029 1080 1030 chain = ERR_PTR(-ENOENT); 1081 1031 rcu_read_lock(); ··· 1315 1265 return PTR_ERR(table); 1316 1266 } 1317 1267 1318 - chain = nft_chain_lookup(table, nla[NFTA_CHAIN_NAME], genmask); 1268 + chain = nft_chain_lookup(net, table, nla[NFTA_CHAIN_NAME], genmask); 1319 1269 if (IS_ERR(chain)) { 1320 1270 NL_SET_BAD_ATTR(extack, nla[NFTA_CHAIN_NAME]); 1321 1271 return PTR_ERR(chain); ··· 1448 1398 struct net_device *dev; 1449 1399 int err; 1450 1400 1401 + lockdep_assert_held(&net->nft.commit_mutex); 1402 + lockdep_nfnl_nft_mutex_not_held(); 1403 + 1451 1404 err = nla_parse_nested(ha, NFTA_HOOK_MAX, nla[NFTA_CHAIN_HOOK], 1452 1405 nft_hook_policy, NULL); 1453 1406 if (err < 0) ··· 1465 1412 1466 1413 type = chain_type[family][NFT_CHAIN_T_DEFAULT]; 1467 1414 if (nla[NFTA_CHAIN_TYPE]) { 1468 - type = nf_tables_chain_type_lookup(nla[NFTA_CHAIN_TYPE], 1415 + type = nf_tables_chain_type_lookup(net, nla[NFTA_CHAIN_TYPE], 1469 1416 family, create); 1470 1417 if (IS_ERR(type)) 1471 1418 return PTR_ERR(type); ··· 1685 1632 nla[NFTA_CHAIN_NAME]) { 1686 1633 struct nft_chain *chain2; 1687 1634 1688 - chain2 = nft_chain_lookup(table, nla[NFTA_CHAIN_NAME], genmask); 1635 + chain2 = nft_chain_lookup(ctx->net, table, 1636 + nla[NFTA_CHAIN_NAME], genmask); 1689 1637 if (!IS_ERR(chain2)) 1690 1638 return -EEXIST; 1691 1639 } ··· 1748 1694 1749 1695 create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false; 1750 1696 1697 + lockdep_assert_held(&net->nft.commit_mutex); 1698 + 1751 1699 table = nft_table_lookup(net, nla[NFTA_CHAIN_TABLE], family, genmask); 1752 1700 if (IS_ERR(table)) { 1753 1701 NL_SET_BAD_ATTR(extack, nla[NFTA_CHAIN_TABLE]); ··· 1768 1712 } 1769 1713 attr = nla[NFTA_CHAIN_HANDLE]; 1770 1714 } else { 1771 - chain = nft_chain_lookup(table, attr, genmask); 1715 + chain = nft_chain_lookup(net, table, attr, genmask); 1772 1716 if (IS_ERR(chain)) { 1773 1717 if (PTR_ERR(chain) != -ENOENT) { 1774 1718 NL_SET_BAD_ATTR(extack, attr); ··· 1846 1790 chain = nft_chain_lookup_byhandle(table, handle, genmask); 1847 1791 } else { 1848 1792 attr = nla[NFTA_CHAIN_NAME]; 1849 - chain = nft_chain_lookup(table, attr, genmask); 1793 + chain = nft_chain_lookup(net, table, attr, genmask); 1850 1794 } 1851 1795 if (IS_ERR(chain)) { 1852 1796 NL_SET_BAD_ATTR(extack, attr); ··· 1931 1875 return NULL; 1932 1876 } 1933 1877 1934 - static const struct nft_expr_type *nft_expr_type_get(u8 family, 1878 + static const struct nft_expr_type *nft_expr_type_get(struct net *net, 1879 + u8 family, 1935 1880 struct nlattr *nla) 1936 1881 { 1937 1882 const struct nft_expr_type *type; ··· 1944 1887 if (type != NULL && try_module_get(type->owner)) 1945 1888 return type; 1946 1889 1890 + lockdep_nfnl_nft_mutex_not_held(); 1947 1891 #ifdef CONFIG_MODULES 1948 1892 if (type == NULL) { 1949 - nfnl_unlock(NFNL_SUBSYS_NFTABLES); 1950 - request_module("nft-expr-%u-%.*s", family, 1951 - nla_len(nla), (char *)nla_data(nla)); 1952 - nfnl_lock(NFNL_SUBSYS_NFTABLES); 1893 + nft_request_module(net, "nft-expr-%u-%.*s", family, 1894 + nla_len(nla), (char *)nla_data(nla)); 1953 1895 if (__nft_expr_type_get(family, nla)) 1954 1896 return ERR_PTR(-EAGAIN); 1955 1897 1956 - nfnl_unlock(NFNL_SUBSYS_NFTABLES); 1957 - request_module("nft-expr-%.*s", 1958 - nla_len(nla), (char *)nla_data(nla)); 1959 - nfnl_lock(NFNL_SUBSYS_NFTABLES); 1898 + nft_request_module(net, "nft-expr-%.*s", 1899 + nla_len(nla), (char *)nla_data(nla)); 1960 1900 if (__nft_expr_type_get(family, nla)) 1961 1901 return ERR_PTR(-EAGAIN); 1962 1902 } ··· 2022 1968 if (err < 0) 2023 1969 return err; 2024 1970 2025 - type = nft_expr_type_get(ctx->family, tb[NFTA_EXPR_NAME]); 1971 + type = nft_expr_type_get(ctx->net, ctx->family, tb[NFTA_EXPR_NAME]); 2026 1972 if (IS_ERR(type)) 2027 1973 return PTR_ERR(type); 2028 1974 ··· 2379 2325 return PTR_ERR(table); 2380 2326 } 2381 2327 2382 - chain = nft_chain_lookup(table, nla[NFTA_RULE_CHAIN], genmask); 2328 + chain = nft_chain_lookup(net, table, nla[NFTA_RULE_CHAIN], genmask); 2383 2329 if (IS_ERR(chain)) { 2384 2330 NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_CHAIN]); 2385 2331 return PTR_ERR(chain); ··· 2413 2359 { 2414 2360 struct nft_expr *expr; 2415 2361 2362 + lockdep_assert_held(&ctx->net->nft.commit_mutex); 2416 2363 /* 2417 2364 * Careful: some expressions might not be initialized in case this 2418 2365 * is called on error from nf_tables_newrule(). ··· 2482 2427 2483 2428 #define NFT_RULE_MAXEXPRS 128 2484 2429 2485 - static struct nft_expr_info *info; 2486 - 2487 2430 static int nf_tables_newrule(struct net *net, struct sock *nlsk, 2488 2431 struct sk_buff *skb, const struct nlmsghdr *nlh, 2489 2432 const struct nlattr * const nla[], ··· 2489 2436 { 2490 2437 const struct nfgenmsg *nfmsg = nlmsg_data(nlh); 2491 2438 u8 genmask = nft_genmask_next(net); 2439 + struct nft_expr_info *info = NULL; 2492 2440 int family = nfmsg->nfgen_family; 2493 2441 struct nft_table *table; 2494 2442 struct nft_chain *chain; ··· 2504 2450 bool create; 2505 2451 u64 handle, pos_handle; 2506 2452 2453 + lockdep_assert_held(&net->nft.commit_mutex); 2454 + 2507 2455 create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false; 2508 2456 2509 2457 table = nft_table_lookup(net, nla[NFTA_RULE_TABLE], family, genmask); ··· 2514 2458 return PTR_ERR(table); 2515 2459 } 2516 2460 2517 - chain = nft_chain_lookup(table, nla[NFTA_RULE_CHAIN], genmask); 2461 + chain = nft_chain_lookup(net, table, nla[NFTA_RULE_CHAIN], genmask); 2518 2462 if (IS_ERR(chain)) { 2519 2463 NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_CHAIN]); 2520 2464 return PTR_ERR(chain); ··· 2562 2506 n = 0; 2563 2507 size = 0; 2564 2508 if (nla[NFTA_RULE_EXPRESSIONS]) { 2509 + info = kvmalloc_array(NFT_RULE_MAXEXPRS, 2510 + sizeof(struct nft_expr_info), 2511 + GFP_KERNEL); 2512 + if (!info) 2513 + return -ENOMEM; 2514 + 2565 2515 nla_for_each_nested(tmp, nla[NFTA_RULE_EXPRESSIONS], rem) { 2566 2516 err = -EINVAL; 2567 2517 if (nla_type(tmp) != NFTA_LIST_ELEM) ··· 2660 2598 list_add_rcu(&rule->list, &chain->rules); 2661 2599 } 2662 2600 } 2601 + kvfree(info); 2663 2602 chain->use++; 2664 2603 2665 2604 if (net->nft.validate_state == NFT_VALIDATE_DO) ··· 2674 2611 if (info[i].ops != NULL) 2675 2612 module_put(info[i].ops->type->owner); 2676 2613 } 2614 + kvfree(info); 2677 2615 return err; 2678 2616 } 2679 2617 ··· 2714 2650 } 2715 2651 2716 2652 if (nla[NFTA_RULE_CHAIN]) { 2717 - chain = nft_chain_lookup(table, nla[NFTA_RULE_CHAIN], genmask); 2653 + chain = nft_chain_lookup(net, table, nla[NFTA_RULE_CHAIN], 2654 + genmask); 2718 2655 if (IS_ERR(chain)) { 2719 2656 NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_CHAIN]); 2720 2657 return PTR_ERR(chain); ··· 2807 2742 const struct nft_set_type *type; 2808 2743 u32 flags = 0; 2809 2744 2745 + lockdep_assert_held(&ctx->net->nft.commit_mutex); 2746 + lockdep_nfnl_nft_mutex_not_held(); 2810 2747 #ifdef CONFIG_MODULES 2811 2748 if (list_empty(&nf_tables_set_types)) { 2812 - nfnl_unlock(NFNL_SUBSYS_NFTABLES); 2813 - request_module("nft-set"); 2814 - nfnl_lock(NFNL_SUBSYS_NFTABLES); 2749 + nft_request_module(ctx->net, "nft-set"); 2815 2750 if (!list_empty(&nf_tables_set_types)) 2816 2751 return ERR_PTR(-EAGAIN); 2817 2752 } ··· 4844 4779 return NULL; 4845 4780 } 4846 4781 4847 - static const struct nft_object_type *nft_obj_type_get(u32 objtype) 4782 + static const struct nft_object_type * 4783 + nft_obj_type_get(struct net *net, u32 objtype) 4848 4784 { 4849 4785 const struct nft_object_type *type; 4850 4786 ··· 4853 4787 if (type != NULL && try_module_get(type->owner)) 4854 4788 return type; 4855 4789 4790 + lockdep_nfnl_nft_mutex_not_held(); 4856 4791 #ifdef CONFIG_MODULES 4857 4792 if (type == NULL) { 4858 - nfnl_unlock(NFNL_SUBSYS_NFTABLES); 4859 - request_module("nft-obj-%u", objtype); 4860 - nfnl_lock(NFNL_SUBSYS_NFTABLES); 4793 + nft_request_module(net, "nft-obj-%u", objtype); 4861 4794 if (__nft_obj_type_get(objtype)) 4862 4795 return ERR_PTR(-EAGAIN); 4863 4796 } ··· 4908 4843 4909 4844 nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla); 4910 4845 4911 - type = nft_obj_type_get(objtype); 4846 + type = nft_obj_type_get(net, objtype); 4912 4847 if (IS_ERR(type)) 4913 4848 return PTR_ERR(type); 4914 4849 ··· 5404 5339 return NULL; 5405 5340 } 5406 5341 5407 - static const struct nf_flowtable_type *nft_flowtable_type_get(u8 family) 5342 + static const struct nf_flowtable_type * 5343 + nft_flowtable_type_get(struct net *net, u8 family) 5408 5344 { 5409 5345 const struct nf_flowtable_type *type; 5410 5346 ··· 5413 5347 if (type != NULL && try_module_get(type->owner)) 5414 5348 return type; 5415 5349 5350 + lockdep_nfnl_nft_mutex_not_held(); 5416 5351 #ifdef CONFIG_MODULES 5417 5352 if (type == NULL) { 5418 - nfnl_unlock(NFNL_SUBSYS_NFTABLES); 5419 - request_module("nf-flowtable-%u", family); 5420 - nfnl_lock(NFNL_SUBSYS_NFTABLES); 5353 + nft_request_module(net, "nf-flowtable-%u", family); 5421 5354 if (__nft_flowtable_type_get(family)) 5422 5355 return ERR_PTR(-EAGAIN); 5423 5356 } ··· 5496 5431 goto err1; 5497 5432 } 5498 5433 5499 - type = nft_flowtable_type_get(family); 5434 + type = nft_flowtable_type_get(net, family); 5500 5435 if (IS_ERR(type)) { 5501 5436 err = PTR_ERR(type); 5502 5437 goto err2; ··· 6267 6202 next_genbit = nft_gencursor_next(net); 6268 6203 6269 6204 g0 = rcu_dereference_protected(chain->rules_gen_0, 6270 - lockdep_nfnl_is_held(NFNL_SUBSYS_NFTABLES)); 6205 + lockdep_commit_lock_is_held(net)); 6271 6206 g1 = rcu_dereference_protected(chain->rules_gen_1, 6272 - lockdep_nfnl_is_held(NFNL_SUBSYS_NFTABLES)); 6207 + lockdep_commit_lock_is_held(net)); 6273 6208 6274 6209 /* No changes to this chain? */ 6275 6210 if (chain->rules_next == NULL) { ··· 6477 6412 6478 6413 nf_tables_commit_release(net); 6479 6414 nf_tables_gen_notify(net, skb, NFT_MSG_NEWGEN); 6415 + mutex_unlock(&net->nft.commit_mutex); 6480 6416 6481 6417 return 0; 6482 6418 } ··· 6629 6563 6630 6564 static int nf_tables_abort(struct net *net, struct sk_buff *skb) 6631 6565 { 6632 - return __nf_tables_abort(net); 6566 + int ret = __nf_tables_abort(net); 6567 + 6568 + mutex_unlock(&net->nft.commit_mutex); 6569 + 6570 + return ret; 6633 6571 } 6634 6572 6635 6573 static bool nf_tables_valid_genid(struct net *net, u32 genid) 6636 6574 { 6637 - return net->nft.base_seq == genid; 6575 + bool genid_ok; 6576 + 6577 + mutex_lock(&net->nft.commit_mutex); 6578 + 6579 + genid_ok = genid == 0 || net->nft.base_seq == genid; 6580 + if (!genid_ok) 6581 + mutex_unlock(&net->nft.commit_mutex); 6582 + 6583 + /* else, commit mutex has to be released by commit or abort function */ 6584 + return genid_ok; 6638 6585 } 6639 6586 6640 6587 static const struct nfnetlink_subsystem nf_tables_subsys = { ··· 6659 6580 .abort = nf_tables_abort, 6660 6581 .cleanup = nf_tables_cleanup, 6661 6582 .valid_genid = nf_tables_valid_genid, 6583 + .owner = THIS_MODULE, 6662 6584 }; 6663 6585 6664 6586 int nft_chain_validate_dependency(const struct nft_chain *chain, ··· 6986 6906 case NFT_GOTO: 6987 6907 if (!tb[NFTA_VERDICT_CHAIN]) 6988 6908 return -EINVAL; 6989 - chain = nft_chain_lookup(ctx->table, tb[NFTA_VERDICT_CHAIN], 6990 - genmask); 6909 + chain = nft_chain_lookup(ctx->net, ctx->table, 6910 + tb[NFTA_VERDICT_CHAIN], genmask); 6991 6911 if (IS_ERR(chain)) 6992 6912 return PTR_ERR(chain); 6993 6913 if (nft_is_base_chain(chain)) ··· 7232 7152 { 7233 7153 INIT_LIST_HEAD(&net->nft.tables); 7234 7154 INIT_LIST_HEAD(&net->nft.commit_list); 7155 + mutex_init(&net->nft.commit_mutex); 7235 7156 net->nft.base_seq = 1; 7236 7157 net->nft.validate_state = NFT_VALIDATE_SKIP; 7237 7158 ··· 7241 7160 7242 7161 static void __net_exit nf_tables_exit_net(struct net *net) 7243 7162 { 7244 - nfnl_lock(NFNL_SUBSYS_NFTABLES); 7163 + mutex_lock(&net->nft.commit_mutex); 7245 7164 if (!list_empty(&net->nft.commit_list)) 7246 7165 __nf_tables_abort(net); 7247 7166 __nft_release_tables(net); 7248 - nfnl_unlock(NFNL_SUBSYS_NFTABLES); 7167 + mutex_unlock(&net->nft.commit_mutex); 7249 7168 WARN_ON_ONCE(!list_empty(&net->nft.tables)); 7250 7169 } 7251 7170 ··· 7260 7179 7261 7180 nft_chain_filter_init(); 7262 7181 7263 - info = kmalloc_array(NFT_RULE_MAXEXPRS, sizeof(struct nft_expr_info), 7264 - GFP_KERNEL); 7265 - if (info == NULL) { 7266 - err = -ENOMEM; 7267 - goto err1; 7268 - } 7269 - 7270 7182 err = nf_tables_core_module_init(); 7271 7183 if (err < 0) 7272 - goto err2; 7184 + return err; 7273 7185 7274 7186 err = nfnetlink_subsys_register(&nf_tables_subsys); 7275 7187 if (err < 0) 7276 - goto err3; 7188 + goto err; 7277 7189 7278 7190 register_netdevice_notifier(&nf_tables_flowtable_notifier); 7279 7191 7280 7192 return register_pernet_subsys(&nf_tables_net_ops); 7281 - err3: 7193 + err: 7282 7194 nf_tables_core_module_exit(); 7283 - err2: 7284 - kfree(info); 7285 - err1: 7286 7195 return err; 7287 7196 } 7288 7197 ··· 7284 7213 unregister_pernet_subsys(&nf_tables_net_ops); 7285 7214 rcu_barrier(); 7286 7215 nf_tables_core_module_exit(); 7287 - kfree(info); 7288 7216 } 7289 7217 7290 7218 module_init(nf_tables_module_init);

+14 -9

net/netfilter/nfnetlink.c

··· 331 331 } 332 332 } 333 333 334 - if (!ss->commit || !ss->abort) { 334 + if (!ss->valid_genid || !ss->commit || !ss->abort) { 335 335 nfnl_unlock(subsys_id); 336 336 netlink_ack(oskb, nlh, -EOPNOTSUPP, NULL); 337 337 return kfree_skb(skb); 338 338 } 339 339 340 - if (genid && ss->valid_genid && !ss->valid_genid(net, genid)) { 340 + if (!try_module_get(ss->owner)) { 341 + nfnl_unlock(subsys_id); 342 + netlink_ack(oskb, nlh, -EOPNOTSUPP, NULL); 343 + return kfree_skb(skb); 344 + } 345 + 346 + if (!ss->valid_genid(net, genid)) { 347 + module_put(ss->owner); 341 348 nfnl_unlock(subsys_id); 342 349 netlink_ack(oskb, nlh, -ERESTART, NULL); 343 350 return kfree_skb(skb); 344 351 } 352 + 353 + nfnl_unlock(subsys_id); 345 354 346 355 while (skb->len >= nlmsg_total_size(0)) { 347 356 int msglen, type; ··· 473 464 } 474 465 done: 475 466 if (status & NFNL_BATCH_REPLAY) { 476 - const struct nfnetlink_subsystem *ss2; 477 - 478 - ss2 = nfnl_dereference_protected(subsys_id); 479 - if (ss2 == ss) 480 - ss->abort(net, oskb); 467 + ss->abort(net, oskb); 481 468 nfnl_err_reset(&err_list); 482 - nfnl_unlock(subsys_id); 483 469 kfree_skb(skb); 470 + module_put(ss->owner); 484 471 goto replay; 485 472 } else if (status == NFNL_BATCH_DONE) { 486 473 err = ss->commit(net, oskb); ··· 494 489 ss->cleanup(net); 495 490 496 491 nfnl_err_deliver(&err_list, oskb); 497 - nfnl_unlock(subsys_id); 498 492 kfree_skb(skb); 493 + module_put(ss->owner); 499 494 } 500 495 501 496 static const struct nla_policy nfnl_batch_policy[NFNL_BATCH_MAX + 1] = {

+4 -9

net/netfilter/nfnetlink_cttimeout.c

··· 26 26 #include <net/sock.h> 27 27 #include <net/netfilter/nf_conntrack.h> 28 28 #include <net/netfilter/nf_conntrack_core.h> 29 - #include <net/netfilter/nf_conntrack_l3proto.h> 30 29 #include <net/netfilter/nf_conntrack_l4proto.h> 31 30 #include <net/netfilter/nf_conntrack_tuple.h> 32 31 #include <net/netfilter/nf_conntrack_timeout.h> ··· 46 47 }; 47 48 48 49 static int 49 - ctnl_timeout_parse_policy(void *timeouts, 50 + ctnl_timeout_parse_policy(void *timeout, 50 51 const struct nf_conntrack_l4proto *l4proto, 51 52 struct net *net, const struct nlattr *attr) 52 53 { ··· 67 68 if (ret < 0) 68 69 goto err; 69 70 70 - ret = l4proto->ctnl_timeout.nlattr_to_obj(tb, net, timeouts); 71 + ret = l4proto->ctnl_timeout.nlattr_to_obj(tb, net, timeout); 71 72 72 73 err: 73 74 kfree(tb); ··· 372 373 struct netlink_ext_ack *extack) 373 374 { 374 375 const struct nf_conntrack_l4proto *l4proto; 375 - unsigned int *timeouts; 376 376 __u16 l3num; 377 377 __u8 l4num; 378 378 int ret; ··· 391 393 goto err; 392 394 } 393 395 394 - timeouts = l4proto->get_timeouts(net); 395 - 396 - ret = ctnl_timeout_parse_policy(timeouts, l4proto, net, 396 + ret = ctnl_timeout_parse_policy(NULL, l4proto, net, 397 397 cda[CTA_TIMEOUT_DATA]); 398 398 if (ret < 0) 399 399 goto err; ··· 428 432 429 433 if (likely(l4proto->ctnl_timeout.obj_to_nlattr)) { 430 434 struct nlattr *nest_parms; 431 - unsigned int *timeouts = l4proto->get_timeouts(net); 432 435 int ret; 433 436 434 437 nest_parms = nla_nest_start(skb, ··· 435 440 if (!nest_parms) 436 441 goto nla_put_failure; 437 442 438 - ret = l4proto->ctnl_timeout.obj_to_nlattr(skb, timeouts); 443 + ret = l4proto->ctnl_timeout.obj_to_nlattr(skb, NULL); 439 444 if (ret < 0) 440 445 goto nla_put_failure; 441 446

+2 -2

net/netfilter/nft_chain_filter.c

··· 322 322 if (!ctx.net) 323 323 return NOTIFY_DONE; 324 324 325 - nfnl_lock(NFNL_SUBSYS_NFTABLES); 325 + mutex_lock(&ctx.net->nft.commit_mutex); 326 326 list_for_each_entry(table, &ctx.net->nft.tables, list) { 327 327 if (table->family != NFPROTO_NETDEV) 328 328 continue; ··· 337 337 nft_netdev_event(event, dev, &ctx); 338 338 } 339 339 } 340 - nfnl_unlock(NFNL_SUBSYS_NFTABLES); 340 + mutex_unlock(&ctx.net->nft.commit_mutex); 341 341 put_net(ctx.net); 342 342 343 343 return NOTIFY_DONE;

+12 -24

net/netfilter/nft_connlimit.c

··· 14 14 #include <net/netfilter/nf_conntrack_zones.h> 15 15 16 16 struct nft_connlimit { 17 - spinlock_t lock; 18 - struct hlist_head hhead; 19 - u32 limit; 20 - bool invert; 17 + struct nf_conncount_list list; 18 + u32 limit; 19 + bool invert; 21 20 }; 22 21 23 22 static inline void nft_connlimit_do_eval(struct nft_connlimit *priv, ··· 44 45 return; 45 46 } 46 47 47 - spin_lock_bh(&priv->lock); 48 - count = nf_conncount_lookup(nft_net(pkt), &priv->hhead, tuple_ptr, zone, 49 - &addit); 48 + nf_conncount_lookup(nft_net(pkt), &priv->list, tuple_ptr, zone, 49 + &addit); 50 + count = priv->list.count; 50 51 51 52 if (!addit) 52 53 goto out; 53 54 54 - if (!nf_conncount_add(&priv->hhead, tuple_ptr, zone)) { 55 + if (nf_conncount_add(&priv->list, tuple_ptr, zone) == NF_CONNCOUNT_ERR) { 55 56 regs->verdict.code = NF_DROP; 56 - spin_unlock_bh(&priv->lock); 57 57 return; 58 58 } 59 59 count++; 60 60 out: 61 - spin_unlock_bh(&priv->lock); 62 61 63 62 if ((count > priv->limit) ^ priv->invert) { 64 63 regs->verdict.code = NFT_BREAK; ··· 84 87 invert = true; 85 88 } 86 89 87 - spin_lock_init(&priv->lock); 88 - INIT_HLIST_HEAD(&priv->hhead); 90 + nf_conncount_list_init(&priv->list); 89 91 priv->limit = limit; 90 92 priv->invert = invert; 91 93 ··· 95 99 struct nft_connlimit *priv) 96 100 { 97 101 nf_ct_netns_put(ctx->net, ctx->family); 98 - nf_conncount_cache_free(&priv->hhead); 102 + nf_conncount_cache_free(&priv->list); 99 103 } 100 104 101 105 static int nft_connlimit_do_dump(struct sk_buff *skb, ··· 208 212 struct nft_connlimit *priv_dst = nft_expr_priv(dst); 209 213 struct nft_connlimit *priv_src = nft_expr_priv(src); 210 214 211 - spin_lock_init(&priv_dst->lock); 212 - INIT_HLIST_HEAD(&priv_dst->hhead); 215 + nf_conncount_list_init(&priv_dst->list); 213 216 priv_dst->limit = priv_src->limit; 214 217 priv_dst->invert = priv_src->invert; 215 218 ··· 220 225 { 221 226 struct nft_connlimit *priv = nft_expr_priv(expr); 222 227 223 - nf_conncount_cache_free(&priv->hhead); 228 + nf_conncount_cache_free(&priv->list); 224 229 } 225 230 226 231 static bool nft_connlimit_gc(struct net *net, const struct nft_expr *expr) 227 232 { 228 233 struct nft_connlimit *priv = nft_expr_priv(expr); 229 - bool addit, ret; 230 234 231 - spin_lock_bh(&priv->lock); 232 - nf_conncount_lookup(net, &priv->hhead, NULL, &nf_ct_zone_dflt, &addit); 233 - 234 - ret = hlist_empty(&priv->hhead); 235 - spin_unlock_bh(&priv->lock); 236 - 237 - return ret; 235 + return nf_conncount_gc_list(net, &priv->list); 238 236 } 239 237 240 238 static struct nft_expr_type nft_connlimit_type;

+1 -1

net/netfilter/nft_ct.c

··· 870 870 if (test_bit(IPS_HELPER_BIT, &ct->status)) 871 871 return; 872 872 873 - help = nf_ct_helper_ext_add(ct, to_assign, GFP_ATOMIC); 873 + help = nf_ct_helper_ext_add(ct, GFP_ATOMIC); 874 874 if (help) { 875 875 rcu_assign_pointer(help->helper, to_assign); 876 876 set_bit(IPS_HELPER_BIT, &ct->status);

+2

net/netfilter/nft_dynset.c

··· 118 118 u64 timeout; 119 119 int err; 120 120 121 + lockdep_assert_held(&ctx->net->nft.commit_mutex); 122 + 121 123 if (tb[NFTA_DYNSET_SET_NAME] == NULL || 122 124 tb[NFTA_DYNSET_OP] == NULL || 123 125 tb[NFTA_DYNSET_SREG_KEY] == NULL)

+14 -3

net/netfilter/nft_socket.c

··· 31 31 case NFPROTO_IPV4: 32 32 sk = nf_sk_lookup_slow_v4(nft_net(pkt), skb, nft_in(pkt)); 33 33 break; 34 - #if IS_ENABLED(CONFIG_NF_SOCKET_IPV6) 34 + #if IS_ENABLED(CONFIG_NF_TABLES_IPV6) 35 35 case NFPROTO_IPV6: 36 36 sk = nf_sk_lookup_slow_v6(nft_net(pkt), skb, nft_in(pkt)); 37 37 break; ··· 43 43 } 44 44 45 45 if (!sk) { 46 - nft_reg_store8(dest, 0); 46 + regs->verdict.code = NFT_BREAK; 47 47 return; 48 48 } 49 49 ··· 53 53 switch(priv->key) { 54 54 case NFT_SOCKET_TRANSPARENT: 55 55 nft_reg_store8(dest, inet_sk_transparent(sk)); 56 + break; 57 + case NFT_SOCKET_MARK: 58 + if (sk_fullsock(sk)) { 59 + *dest = sk->sk_mark; 60 + } else { 61 + regs->verdict.code = NFT_BREAK; 62 + return; 63 + } 56 64 break; 57 65 default: 58 66 WARN_ON(1); ··· 85 77 86 78 switch(ctx->family) { 87 79 case NFPROTO_IPV4: 88 - #if IS_ENABLED(CONFIG_NF_SOCKET_IPV6) 80 + #if IS_ENABLED(CONFIG_NF_TABLES_IPV6) 89 81 case NFPROTO_IPV6: 90 82 #endif 91 83 case NFPROTO_INET: ··· 98 90 switch(priv->key) { 99 91 case NFT_SOCKET_TRANSPARENT: 100 92 len = sizeof(u8); 93 + break; 94 + case NFT_SOCKET_MARK: 95 + len = sizeof(u32); 101 96 break; 102 97 default: 103 98 return -EOPNOTSUPP;

+120 -11

net/netfilter/utils.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 1 2 #include <linux/kernel.h> 2 3 #include <linux/netfilter.h> 3 4 #include <linux/netfilter_ipv4.h> 4 5 #include <linux/netfilter_ipv6.h> 5 6 #include <net/netfilter/nf_queue.h> 7 + #include <net/ip6_checksum.h> 8 + 9 + #ifdef CONFIG_INET 10 + __sum16 nf_ip_checksum(struct sk_buff *skb, unsigned int hook, 11 + unsigned int dataoff, u8 protocol) 12 + { 13 + const struct iphdr *iph = ip_hdr(skb); 14 + __sum16 csum = 0; 15 + 16 + switch (skb->ip_summed) { 17 + case CHECKSUM_COMPLETE: 18 + if (hook != NF_INET_PRE_ROUTING && hook != NF_INET_LOCAL_IN) 19 + break; 20 + if ((protocol == 0 && !csum_fold(skb->csum)) || 21 + !csum_tcpudp_magic(iph->saddr, iph->daddr, 22 + skb->len - dataoff, protocol, 23 + skb->csum)) { 24 + skb->ip_summed = CHECKSUM_UNNECESSARY; 25 + break; 26 + } 27 + /* fall through */ 28 + case CHECKSUM_NONE: 29 + if (protocol == 0) 30 + skb->csum = 0; 31 + else 32 + skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr, 33 + skb->len - dataoff, 34 + protocol, 0); 35 + csum = __skb_checksum_complete(skb); 36 + } 37 + return csum; 38 + } 39 + EXPORT_SYMBOL(nf_ip_checksum); 40 + #endif 41 + 42 + static __sum16 nf_ip_checksum_partial(struct sk_buff *skb, unsigned int hook, 43 + unsigned int dataoff, unsigned int len, 44 + u8 protocol) 45 + { 46 + const struct iphdr *iph = ip_hdr(skb); 47 + __sum16 csum = 0; 48 + 49 + switch (skb->ip_summed) { 50 + case CHECKSUM_COMPLETE: 51 + if (len == skb->len - dataoff) 52 + return nf_ip_checksum(skb, hook, dataoff, protocol); 53 + /* fall through */ 54 + case CHECKSUM_NONE: 55 + skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr, protocol, 56 + skb->len - dataoff, 0); 57 + skb->ip_summed = CHECKSUM_NONE; 58 + return __skb_checksum_complete_head(skb, dataoff + len); 59 + } 60 + return csum; 61 + } 62 + 63 + __sum16 nf_ip6_checksum(struct sk_buff *skb, unsigned int hook, 64 + unsigned int dataoff, u8 protocol) 65 + { 66 + const struct ipv6hdr *ip6h = ipv6_hdr(skb); 67 + __sum16 csum = 0; 68 + 69 + switch (skb->ip_summed) { 70 + case CHECKSUM_COMPLETE: 71 + if (hook != NF_INET_PRE_ROUTING && hook != NF_INET_LOCAL_IN) 72 + break; 73 + if (!csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr, 74 + skb->len - dataoff, protocol, 75 + csum_sub(skb->csum, 76 + skb_checksum(skb, 0, 77 + dataoff, 0)))) { 78 + skb->ip_summed = CHECKSUM_UNNECESSARY; 79 + break; 80 + } 81 + /* fall through */ 82 + case CHECKSUM_NONE: 83 + skb->csum = ~csum_unfold( 84 + csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr, 85 + skb->len - dataoff, 86 + protocol, 87 + csum_sub(0, 88 + skb_checksum(skb, 0, 89 + dataoff, 0)))); 90 + csum = __skb_checksum_complete(skb); 91 + } 92 + return csum; 93 + } 94 + EXPORT_SYMBOL(nf_ip6_checksum); 95 + 96 + static __sum16 nf_ip6_checksum_partial(struct sk_buff *skb, unsigned int hook, 97 + unsigned int dataoff, unsigned int len, 98 + u8 protocol) 99 + { 100 + const struct ipv6hdr *ip6h = ipv6_hdr(skb); 101 + __wsum hsum; 102 + __sum16 csum = 0; 103 + 104 + switch (skb->ip_summed) { 105 + case CHECKSUM_COMPLETE: 106 + if (len == skb->len - dataoff) 107 + return nf_ip6_checksum(skb, hook, dataoff, protocol); 108 + /* fall through */ 109 + case CHECKSUM_NONE: 110 + hsum = skb_checksum(skb, 0, dataoff, 0); 111 + skb->csum = ~csum_unfold(csum_ipv6_magic(&ip6h->saddr, 112 + &ip6h->daddr, 113 + skb->len - dataoff, 114 + protocol, 115 + csum_sub(0, hsum))); 116 + skb->ip_summed = CHECKSUM_NONE; 117 + return __skb_checksum_complete_head(skb, dataoff + len); 118 + } 119 + return csum; 120 + }; 6 121 7 122 __sum16 nf_checksum(struct sk_buff *skb, unsigned int hook, 8 - unsigned int dataoff, u_int8_t protocol, 123 + unsigned int dataoff, u8 protocol, 9 124 unsigned short family) 10 125 { 11 - const struct nf_ipv6_ops *v6ops; 12 126 __sum16 csum = 0; 13 127 14 128 switch (family) { ··· 130 16 csum = nf_ip_checksum(skb, hook, dataoff, protocol); 131 17 break; 132 18 case AF_INET6: 133 - v6ops = rcu_dereference(nf_ipv6_ops); 134 - if (v6ops) 135 - csum = v6ops->checksum(skb, hook, dataoff, protocol); 19 + csum = nf_ip6_checksum(skb, hook, dataoff, protocol); 136 20 break; 137 21 } 138 22 ··· 140 28 141 29 __sum16 nf_checksum_partial(struct sk_buff *skb, unsigned int hook, 142 30 unsigned int dataoff, unsigned int len, 143 - u_int8_t protocol, unsigned short family) 31 + u8 protocol, unsigned short family) 144 32 { 145 - const struct nf_ipv6_ops *v6ops; 146 33 __sum16 csum = 0; 147 34 148 35 switch (family) { ··· 150 39 protocol); 151 40 break; 152 41 case AF_INET6: 153 - v6ops = rcu_dereference(nf_ipv6_ops); 154 - if (v6ops) 155 - csum = v6ops->checksum_partial(skb, hook, dataoff, len, 156 - protocol); 42 + csum = nf_ip6_checksum_partial(skb, hook, dataoff, len, 43 + protocol); 157 44 break; 158 45 } 159 46

+1 -1

net/netfilter/xt_CT.c

··· 93 93 return -ENOENT; 94 94 } 95 95 96 - help = nf_ct_helper_ext_add(ct, helper, GFP_KERNEL); 96 + help = nf_ct_helper_ext_add(ct, GFP_KERNEL); 97 97 if (help == NULL) { 98 98 nf_conntrack_helper_put(helper); 99 99 return -ENOMEM;

+2 -2

net/netfilter/xt_TEE.c

··· 38 38 return XT_CONTINUE; 39 39 } 40 40 41 - #if IS_ENABLED(CONFIG_IPV6) 41 + #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) 42 42 static unsigned int 43 43 tee_tg6(struct sk_buff *skb, const struct xt_action_param *par) 44 44 { ··· 141 141 .destroy = tee_tg_destroy, 142 142 .me = THIS_MODULE, 143 143 }, 144 - #if IS_ENABLED(CONFIG_IPV6) 144 + #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) 145 145 { 146 146 .name = "TEE", 147 147 .revision = 1,

-9

net/netfilter/xt_TPROXY.c

··· 36 36 #include <net/netfilter/nf_tproxy.h> 37 37 #include <linux/netfilter/xt_TPROXY.h> 38 38 39 - /* assign a socket to the skb -- consumes sk */ 40 - static void 41 - nf_tproxy_assign_sock(struct sk_buff *skb, struct sock *sk) 42 - { 43 - skb_orphan(skb); 44 - skb->sk = sk; 45 - skb->destructor = sock_edemux; 46 - } 47 - 48 39 static unsigned int 49 40 tproxy_tg4(struct net *net, struct sk_buff *skb, __be32 laddr, __be16 lport, 50 41 u_int32_t mark_mask, u_int32_t mark_value)

+5 -15

net/openvswitch/conntrack.c

··· 26 26 #include <net/netfilter/nf_conntrack_seqadj.h> 27 27 #include <net/netfilter/nf_conntrack_zones.h> 28 28 #include <net/netfilter/ipv6/nf_defrag_ipv6.h> 29 + #include <net/ipv6_frag.h> 29 30 30 31 #ifdef CONFIG_NF_NAT_NEEDED 31 32 #include <linux/netfilter/nf_nat.h> ··· 608 607 ovs_ct_find_existing(struct net *net, const struct nf_conntrack_zone *zone, 609 608 u8 l3num, struct sk_buff *skb, bool natted) 610 609 { 611 - const struct nf_conntrack_l3proto *l3proto; 612 - const struct nf_conntrack_l4proto *l4proto; 613 610 struct nf_conntrack_tuple tuple; 614 611 struct nf_conntrack_tuple_hash *h; 615 612 struct nf_conn *ct; 616 - unsigned int dataoff; 617 - u8 protonum; 618 613 619 - l3proto = __nf_ct_l3proto_find(l3num); 620 - if (l3proto->get_l4proto(skb, skb_network_offset(skb), &dataoff, 621 - &protonum) <= 0) { 622 - pr_debug("ovs_ct_find_existing: Can't get protonum\n"); 623 - return NULL; 624 - } 625 - l4proto = __nf_ct_l4proto_find(l3num, protonum); 626 - if (!nf_ct_get_tuple(skb, skb_network_offset(skb), dataoff, l3num, 627 - protonum, net, &tuple, l3proto, l4proto)) { 614 + if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb), l3num, 615 + net, &tuple)) { 628 616 pr_debug("ovs_ct_find_existing: Can't get tuple\n"); 629 617 return NULL; 630 618 } ··· 622 632 if (natted) { 623 633 struct nf_conntrack_tuple inverse; 624 634 625 - if (!nf_ct_invert_tuple(&inverse, &tuple, l3proto, l4proto)) { 635 + if (!nf_ct_invert_tuplepr(&inverse, &tuple)) { 626 636 pr_debug("ovs_ct_find_existing: Inversion failed!\n"); 627 637 return NULL; 628 638 } ··· 1304 1314 return -EINVAL; 1305 1315 } 1306 1316 1307 - help = nf_ct_helper_ext_add(info->ct, helper, GFP_KERNEL); 1317 + help = nf_ct_helper_ext_add(info->ct, GFP_KERNEL); 1308 1318 if (!help) { 1309 1319 nf_conntrack_helper_put(helper); 1310 1320 return -ENOMEM;