Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next

Pablo Neira Ayuso says:

====================
Netfilter/IPVS updates for net-next

The following patchset contains Netfilter/IPVS updates for your net-next
tree, the most relevant things in this batch are:

1) Compile masquerade infrastructure into NAT module, from Florian Westphal.
Same thing with the redirection support.

2) Abort transaction if early initialization of the commit phase fails.
Also from Florian.

3) Get rid of synchronize_rcu() by using rule array in nf_tables, from
Florian.

4) Abort nf_tables batch if fatal signal is pending, from Florian.

5) Use .call_rcu nfnetlink from nf_tables to make dumps fully lockless.
From Florian Westphal.

6) Support to match transparent sockets from nf_tables, from Máté Eckl.

7) Audit support for nf_tables, from Phil Sutter.

8) Validate chain dependencies from commit phase, fall back to fine grain
validation only in case of errors.

9) Attach dst to skbuff from netfilter flowtable packet path, from
Jason A. Donenfeld.

10) Use artificial maximum attribute cap to remove VLA from nfnetlink.
Patch from Kees Cook.

11) Add extension to allow to forward packets through neighbour layer.

12) Add IPv6 conntrack helper support to IPVS, from Julian Anastasov.

13) Add IPv6 FTP conntrack support to IPVS, from Julian Anastasov.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>

+1379 -412
+1
include/linux/netfilter/nfnetlink.h
··· 31 31 const struct nfnl_callback *cb; /* callback for individual types */ 32 32 int (*commit)(struct net *net, struct sk_buff *skb); 33 33 int (*abort)(struct net *net, struct sk_buff *skb); 34 + void (*cleanup)(struct net *net); 34 35 bool (*valid_genid)(struct net *net, u32 genid); 35 36 }; 36 37
+6 -4
include/net/ip_vs.h
··· 763 763 * 2=Mangled but checksum was not updated 764 764 */ 765 765 int (*pkt_out)(struct ip_vs_app *, struct ip_vs_conn *, 766 - struct sk_buff *, int *diff); 766 + struct sk_buff *, int *diff, struct ip_vs_iphdr *ipvsh); 767 767 768 768 /* input hook: Process packet in outin direction, diff set for TCP. 769 769 * Return: 0=Error, 1=Payload Not Mangled/Mangled but checksum is ok, 770 770 * 2=Mangled but checksum was not updated 771 771 */ 772 772 int (*pkt_in)(struct ip_vs_app *, struct ip_vs_conn *, 773 - struct sk_buff *, int *diff); 773 + struct sk_buff *, int *diff, struct ip_vs_iphdr *ipvsh); 774 774 775 775 /* ip_vs_app initializer */ 776 776 int (*init_conn)(struct ip_vs_app *, struct ip_vs_conn *); ··· 1328 1328 int ip_vs_app_inc_get(struct ip_vs_app *inc); 1329 1329 void ip_vs_app_inc_put(struct ip_vs_app *inc); 1330 1330 1331 - int ip_vs_app_pkt_out(struct ip_vs_conn *, struct sk_buff *skb); 1332 - int ip_vs_app_pkt_in(struct ip_vs_conn *, struct sk_buff *skb); 1331 + int ip_vs_app_pkt_out(struct ip_vs_conn *, struct sk_buff *skb, 1332 + struct ip_vs_iphdr *ipvsh); 1333 + int ip_vs_app_pkt_in(struct ip_vs_conn *, struct sk_buff *skb, 1334 + struct ip_vs_iphdr *ipvsh); 1333 1335 1334 1336 int register_ip_vs_pe(struct ip_vs_pe *pe); 1335 1337 int unregister_ip_vs_pe(struct ip_vs_pe *pe);
+2 -4
include/net/netfilter/nf_socket.h
··· 2 2 #ifndef _NF_SOCK_H_ 3 3 #define _NF_SOCK_H_ 4 4 5 - struct net_device; 6 - struct sk_buff; 7 - struct sock; 8 - struct net; 5 + #include <net/sock.h> 6 + #include <net/inet_timewait_sock.h> 9 7 10 8 static inline bool nf_sk_is_transparent(struct sock *sk) 11 9 {
+7
include/net/netfilter/nf_tables.h
··· 858 858 * @name: name of the chain 859 859 */ 860 860 struct nft_chain { 861 + struct nft_rule *__rcu *rules_gen_0; 862 + struct nft_rule *__rcu *rules_gen_1; 861 863 struct list_head rules; 862 864 struct list_head list; 863 865 struct nft_table *table; ··· 869 867 u8 flags:6, 870 868 genmask:2; 871 869 char *name; 870 + 871 + /* Only used during control plane commit phase: */ 872 + struct nft_rule **rules_next; 872 873 }; 874 + 875 + int nft_chain_validate(const struct nft_ctx *ctx, const struct nft_chain *chain); 873 876 874 877 enum nft_chain_types { 875 878 NFT_CHAIN_T_DEFAULT = 0,
+8
include/net/netfilter/nf_tables_core.h
··· 2 2 #ifndef _NET_NF_TABLES_CORE_H 3 3 #define _NET_NF_TABLES_CORE_H 4 4 5 + #include <net/netfilter/nf_tables.h> 6 + 5 7 extern struct nft_expr_type nft_imm_type; 6 8 extern struct nft_expr_type nft_cmp_type; 7 9 extern struct nft_expr_type nft_lookup_type; ··· 23 21 u32 data; 24 22 enum nft_registers sreg:8; 25 23 u8 len; 24 + }; 25 + 26 + struct nft_immediate_expr { 27 + struct nft_data data; 28 + enum nft_registers dreg:8; 29 + u8 dlen; 26 30 }; 27 31 28 32 /* Calculate the mask for the nft_cmp_fast expression. On big endian the
+1
include/net/netns/nftables.h
··· 9 9 struct list_head commit_list; 10 10 unsigned int base_seq; 11 11 u8 gencursor; 12 + u8 validate_state; 12 13 }; 13 14 14 15 #endif
+34
include/uapi/linux/netfilter/nf_tables.h
··· 905 905 #define NFTA_RT_MAX (__NFTA_RT_MAX - 1) 906 906 907 907 /** 908 + * enum nft_socket_attributes - nf_tables socket expression netlink attributes 909 + * 910 + * @NFTA_SOCKET_KEY: socket key to match 911 + * @NFTA_SOCKET_DREG: destination register 912 + */ 913 + enum nft_socket_attributes { 914 + NFTA_SOCKET_UNSPEC, 915 + NFTA_SOCKET_KEY, 916 + NFTA_SOCKET_DREG, 917 + __NFTA_SOCKET_MAX 918 + }; 919 + #define NFTA_SOCKET_MAX (__NFTA_SOCKET_MAX - 1) 920 + 921 + /* 922 + * enum nft_socket_keys - nf_tables socket expression keys 923 + * 924 + * @NFT_SOCKET_TRANSPARENT: Value of the IP(V6)_TRANSPARENT socket option_ 925 + */ 926 + enum nft_socket_keys { 927 + NFT_SOCKET_TRANSPARENT, 928 + __NFT_SOCKET_MAX 929 + }; 930 + #define NFT_SOCKET_MAX (__NFT_SOCKET_MAX - 1) 931 + 932 + /** 908 933 * enum nft_ct_keys - nf_tables ct expression keys 909 934 * 910 935 * @NFT_CT_STATE: conntrack state (bitmask of enum ip_conntrack_info) ··· 1079 1054 __NFTA_LOG_MAX 1080 1055 }; 1081 1056 #define NFTA_LOG_MAX (__NFTA_LOG_MAX - 1) 1057 + 1058 + /** 1059 + * LOGLEVEL_AUDIT - a pseudo log level enabling audit logging 1060 + */ 1061 + #define LOGLEVEL_AUDIT 8 1082 1062 1083 1063 /** 1084 1064 * enum nft_queue_attributes - nf_tables queue expression netlink attributes ··· 1260 1230 * enum nft_fwd_attributes - nf_tables fwd expression netlink attributes 1261 1231 * 1262 1232 * @NFTA_FWD_SREG_DEV: source register of output interface (NLA_U32: nft_register) 1233 + * @NFTA_FWD_SREG_ADDR: source register of destination address (NLA_U32: nft_register) 1234 + * @NFTA_FWD_NFPROTO: layer 3 family of source register address (NLA_U32: enum nfproto) 1263 1235 */ 1264 1236 enum nft_fwd_attributes { 1265 1237 NFTA_FWD_UNSPEC, 1266 1238 NFTA_FWD_SREG_DEV, 1239 + NFTA_FWD_SREG_ADDR, 1240 + NFTA_FWD_NFPROTO, 1267 1241 __NFTA_FWD_MAX 1268 1242 }; 1269 1243 #define NFTA_FWD_MAX (__NFTA_FWD_MAX - 1)
+1 -4
net/ipv4/netfilter/Kconfig
··· 129 129 source and destination ports. 130 130 131 131 config NF_NAT_MASQUERADE_IPV4 132 - tristate "IPv4 masquerade support" 133 - help 134 - This is the kernel functionality to provide NAT in the masquerade 135 - flavour (automatic source address selection). 132 + bool 136 133 137 134 config NFT_MASQ_IPV4 138 135 tristate "IPv4 masquerading support for nf_tables"
+1 -3
net/ipv4/netfilter/Makefile
··· 10 10 obj-$(CONFIG_NF_CONNTRACK_IPV4) += nf_conntrack_ipv4.o 11 11 12 12 nf_nat_ipv4-y := nf_nat_l3proto_ipv4.o nf_nat_proto_icmp.o 13 + nf_nat_ipv4-$(CONFIG_NF_NAT_MASQUERADE_IPV4) += nf_nat_masquerade_ipv4.o 13 14 obj-$(CONFIG_NF_NAT_IPV4) += nf_nat_ipv4.o 14 15 15 16 # defrag ··· 32 31 nf_nat_snmp_basic-y := nf_nat_snmp_basic.asn1.o nf_nat_snmp_basic_main.o 33 32 $(obj)/nf_nat_snmp_basic_main.o: $(obj)/nf_nat_snmp_basic.asn1.h 34 33 obj-$(CONFIG_NF_NAT_SNMP_BASIC) += nf_nat_snmp_basic.o 35 - 36 - obj-$(CONFIG_NF_NAT_MASQUERADE_IPV4) += nf_nat_masquerade_ipv4.o 37 - 38 34 39 35 # NAT protocols (nf_nat) 40 36 obj-$(CONFIG_NF_NAT_PROTO_GRE) += nf_nat_proto_gre.o
-4
net/ipv4/netfilter/nf_nat_masquerade_ipv4.c
··· 7 7 */ 8 8 9 9 #include <linux/types.h> 10 - #include <linux/module.h> 11 10 #include <linux/atomic.h> 12 11 #include <linux/inetdevice.h> 13 12 #include <linux/ip.h> ··· 156 157 unregister_inetaddr_notifier(&masq_inet_notifier); 157 158 } 158 159 EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv4_unregister_notifier); 159 - 160 - MODULE_LICENSE("GPL"); 161 - MODULE_AUTHOR("Rusty Russell <rusty@rustcorp.com.au>");
+1 -4
net/ipv6/netfilter/Kconfig
··· 136 136 if NF_NAT_IPV6 137 137 138 138 config NF_NAT_MASQUERADE_IPV6 139 - tristate "IPv6 masquerade support" 140 - help 141 - This is the kernel functionality to provide NAT in the masquerade 142 - flavour (automatic source address selection) for IPv6. 139 + bool 143 140 144 141 endif # NF_NAT_IPV6 145 142
+1 -1
net/ipv6/netfilter/Makefile
··· 18 18 obj-$(CONFIG_NF_CONNTRACK_IPV6) += nf_conntrack_ipv6.o 19 19 20 20 nf_nat_ipv6-y := nf_nat_l3proto_ipv6.o nf_nat_proto_icmpv6.o 21 + nf_nat_ipv6-$(CONFIG_NF_NAT_MASQUERADE_IPV6) += nf_nat_masquerade_ipv6.o 21 22 obj-$(CONFIG_NF_NAT_IPV6) += nf_nat_ipv6.o 22 - obj-$(CONFIG_NF_NAT_MASQUERADE_IPV6) += nf_nat_masquerade_ipv6.o 23 23 24 24 # defrag 25 25 nf_defrag_ipv6-y := nf_defrag_ipv6_hooks.o nf_conntrack_reasm.o
-4
net/ipv6/netfilter/nf_nat_masquerade_ipv6.c
··· 10 10 */ 11 11 12 12 #include <linux/kernel.h> 13 - #include <linux/module.h> 14 13 #include <linux/atomic.h> 15 14 #include <linux/netdevice.h> 16 15 #include <linux/ipv6.h> ··· 185 186 unregister_netdevice_notifier(&masq_dev_notifier); 186 187 } 187 188 EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv6_unregister_notifier); 188 - 189 - MODULE_LICENSE("GPL"); 190 - MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+10 -5
net/netfilter/Kconfig
··· 433 433 default NF_NAT && NF_CONNTRACK_TFTP 434 434 435 435 config NF_NAT_REDIRECT 436 - tristate "IPv4/IPv6 redirect support" 437 - depends on NF_NAT 438 - help 439 - This is the kernel functionality to redirect packets to local 440 - machine through NAT. 436 + bool 441 437 442 438 config NETFILTER_SYNPROXY 443 439 tristate ··· 612 616 This option allows using the FIB expression from the inet table. 613 617 The lookup will be delegated to the IPv4 or IPv6 FIB depending 614 618 on the protocol of the packet. 619 + 620 + config NFT_SOCKET 621 + tristate "Netfilter nf_tables socket match support" 622 + depends on IPV6 || IPV6=n 623 + select NF_SOCKET_IPV4 624 + select NF_SOCKET_IPV6 if IPV6 625 + help 626 + This option allows matching for the presence or absence of a 627 + corresponding socket and its attributes. 615 628 616 629 if NF_TABLES_NETDEV 617 630
+2 -1
net/netfilter/Makefile
··· 55 55 obj-$(CONFIG_NF_LOG_NETDEV) += nf_log_netdev.o 56 56 57 57 obj-$(CONFIG_NF_NAT) += nf_nat.o 58 - obj-$(CONFIG_NF_NAT_REDIRECT) += nf_nat_redirect.o 58 + nf_nat-$(CONFIG_NF_NAT_REDIRECT) += nf_nat_redirect.o 59 59 60 60 # NAT helpers 61 61 obj-$(CONFIG_NF_NAT_AMANDA) += nf_nat_amanda.o ··· 102 102 obj-$(CONFIG_NFT_FIB_INET) += nft_fib_inet.o 103 103 obj-$(CONFIG_NFT_FIB_NETDEV) += nft_fib_netdev.o 104 104 obj-$(CONFIG_NF_OSF) += nf_osf.o 105 + obj-$(CONFIG_NFT_SOCKET) += nft_socket.o 105 106 106 107 # nf_tables netdev 107 108 obj-$(CONFIG_NFT_DUP_NETDEV) += nft_dup_netdev.o
+14 -10
net/netfilter/ipvs/ip_vs_app.c
··· 355 355 } 356 356 357 357 static inline int app_tcp_pkt_out(struct ip_vs_conn *cp, struct sk_buff *skb, 358 - struct ip_vs_app *app) 358 + struct ip_vs_app *app, 359 + struct ip_vs_iphdr *ipvsh) 359 360 { 360 361 int diff; 361 362 const unsigned int tcp_offset = ip_hdrlen(skb); ··· 387 386 if (app->pkt_out == NULL) 388 387 return 1; 389 388 390 - if (!app->pkt_out(app, cp, skb, &diff)) 389 + if (!app->pkt_out(app, cp, skb, &diff, ipvsh)) 391 390 return 0; 392 391 393 392 /* ··· 405 404 * called by ipvs packet handler, assumes previously checked cp!=NULL 406 405 * returns false if it can't handle packet (oom) 407 406 */ 408 - int ip_vs_app_pkt_out(struct ip_vs_conn *cp, struct sk_buff *skb) 407 + int ip_vs_app_pkt_out(struct ip_vs_conn *cp, struct sk_buff *skb, 408 + struct ip_vs_iphdr *ipvsh) 409 409 { 410 410 struct ip_vs_app *app; 411 411 ··· 419 417 420 418 /* TCP is complicated */ 421 419 if (cp->protocol == IPPROTO_TCP) 422 - return app_tcp_pkt_out(cp, skb, app); 420 + return app_tcp_pkt_out(cp, skb, app, ipvsh); 423 421 424 422 /* 425 423 * Call private output hook function ··· 427 425 if (app->pkt_out == NULL) 428 426 return 1; 429 427 430 - return app->pkt_out(app, cp, skb, NULL); 428 + return app->pkt_out(app, cp, skb, NULL, ipvsh); 431 429 } 432 430 433 431 434 432 static inline int app_tcp_pkt_in(struct ip_vs_conn *cp, struct sk_buff *skb, 435 - struct ip_vs_app *app) 433 + struct ip_vs_app *app, 434 + struct ip_vs_iphdr *ipvsh) 436 435 { 437 436 int diff; 438 437 const unsigned int tcp_offset = ip_hdrlen(skb); ··· 464 461 if (app->pkt_in == NULL) 465 462 return 1; 466 463 467 - if (!app->pkt_in(app, cp, skb, &diff)) 464 + if (!app->pkt_in(app, cp, skb, &diff, ipvsh)) 468 465 return 0; 469 466 470 467 /* ··· 482 479 * called by ipvs packet handler, assumes previously checked cp!=NULL. 483 480 * returns false if can't handle packet (oom). 484 481 */ 485 - int ip_vs_app_pkt_in(struct ip_vs_conn *cp, struct sk_buff *skb) 482 + int ip_vs_app_pkt_in(struct ip_vs_conn *cp, struct sk_buff *skb, 483 + struct ip_vs_iphdr *ipvsh) 486 484 { 487 485 struct ip_vs_app *app; 488 486 ··· 496 492 497 493 /* TCP is complicated */ 498 494 if (cp->protocol == IPPROTO_TCP) 499 - return app_tcp_pkt_in(cp, skb, app); 495 + return app_tcp_pkt_in(cp, skb, app, ipvsh); 500 496 501 497 /* 502 498 * Call private input hook function ··· 504 500 if (app->pkt_in == NULL) 505 501 return 1; 506 502 507 - return app->pkt_in(app, cp, skb, NULL); 503 + return app->pkt_in(app, cp, skb, NULL, ipvsh); 508 504 } 509 505 510 506
+312 -169
net/netfilter/ipvs/ip_vs_ftp.c
··· 29 29 #include <linux/moduleparam.h> 30 30 #include <linux/kernel.h> 31 31 #include <linux/skbuff.h> 32 + #include <linux/ctype.h> 33 + #include <linux/inet.h> 32 34 #include <linux/in.h> 33 35 #include <linux/ip.h> 34 36 #include <linux/netfilter.h> ··· 46 44 #include <net/ip_vs.h> 47 45 48 46 49 - #define SERVER_STRING "227 " 50 - #define CLIENT_STRING "PORT" 47 + #define SERVER_STRING_PASV "227 " 48 + #define CLIENT_STRING_PORT "PORT" 49 + #define SERVER_STRING_EPSV "229 " 50 + #define CLIENT_STRING_EPRT "EPRT" 51 51 52 + enum { 53 + IP_VS_FTP_ACTIVE = 0, 54 + IP_VS_FTP_PORT = 0, 55 + IP_VS_FTP_PASV, 56 + IP_VS_FTP_EPRT, 57 + IP_VS_FTP_EPSV, 58 + }; 52 59 53 60 /* 54 61 * List of ports (up to IP_VS_APP_MAX_PORTS) to be handled by helper ··· 69 58 MODULE_PARM_DESC(ports, "Ports to monitor for FTP control commands"); 70 59 71 60 72 - /* Dummy variable */ 73 - static int ip_vs_ftp_pasv; 61 + static char *ip_vs_ftp_data_ptr(struct sk_buff *skb, struct ip_vs_iphdr *ipvsh) 62 + { 63 + struct tcphdr *th = (struct tcphdr *)((char *)skb->data + ipvsh->len); 74 64 65 + if ((th->doff << 2) < sizeof(struct tcphdr)) 66 + return NULL; 67 + 68 + return (char *)th + (th->doff << 2); 69 + } 75 70 76 71 static int 77 72 ip_vs_ftp_init_conn(struct ip_vs_app *app, struct ip_vs_conn *cp) ··· 95 78 } 96 79 97 80 98 - /* 99 - * Get <addr,port> from the string "xxx.xxx.xxx.xxx,ppp,ppp", started 100 - * with the "pattern", ignoring before "skip" and terminated with 101 - * the "term" character. 102 - * <addr,port> is in network order. 81 + /* Get <addr,port> from the string "xxx.xxx.xxx.xxx,ppp,ppp", started 82 + * with the "pattern". <addr,port> is in network order. 83 + * Parse extended format depending on ext. In this case addr can be pre-set. 103 84 */ 104 85 static int ip_vs_ftp_get_addrport(char *data, char *data_limit, 105 86 const char *pattern, size_t plen, 106 - char skip, char term, 107 - __be32 *addr, __be16 *port, 108 - char **start, char **end) 87 + char skip, bool ext, int mode, 88 + union nf_inet_addr *addr, __be16 *port, 89 + __u16 af, char **start, char **end) 109 90 { 110 91 char *s, c; 111 92 unsigned char p[6]; 93 + char edelim; 94 + __u16 hport; 112 95 int i = 0; 113 96 114 97 if (data_limit - data < plen) { ··· 130 113 if (s == data_limit) 131 114 return -1; 132 115 if (!found) { 116 + /* "(" is optional for non-extended format, 117 + * so catch the start of IPv4 address 118 + */ 119 + if (!ext && isdigit(*s)) 120 + break; 133 121 if (*s == skip) 134 122 found = 1; 135 123 } else if (*s != skip) { ··· 142 120 } 143 121 } 144 122 } 145 - 146 - for (data = s; ; data++) { 147 - if (data == data_limit) 148 - return -1; 149 - if (*data == term) 150 - break; 151 - } 152 - *end = data; 153 - 154 - memset(p, 0, sizeof(p)); 155 - for (data = s; ; data++) { 156 - c = *data; 157 - if (c == term) 158 - break; 159 - if (c >= '0' && c <= '9') { 160 - p[i] = p[i]*10 + c - '0'; 161 - } else if (c == ',' && i < 5) { 162 - i++; 163 - } else { 164 - /* unexpected character */ 165 - return -1; 123 + /* Old IPv4-only format? */ 124 + if (!ext) { 125 + p[0] = 0; 126 + for (data = s; ; data++) { 127 + if (data == data_limit) 128 + return -1; 129 + c = *data; 130 + if (isdigit(c)) { 131 + p[i] = p[i]*10 + c - '0'; 132 + } else if (c == ',' && i < 5) { 133 + i++; 134 + p[i] = 0; 135 + } else { 136 + /* unexpected character or terminator */ 137 + break; 138 + } 166 139 } 140 + 141 + if (i != 5) 142 + return -1; 143 + 144 + *start = s; 145 + *end = data; 146 + addr->ip = get_unaligned((__be32 *) p); 147 + *port = get_unaligned((__be16 *) (p + 4)); 148 + return 1; 167 149 } 168 - 169 - if (i != 5) 150 + if (s == data_limit) 170 151 return -1; 171 - 172 152 *start = s; 173 - *addr = get_unaligned((__be32 *) p); 174 - *port = get_unaligned((__be16 *) (p + 4)); 153 + edelim = *s++; 154 + if (edelim < 33 || edelim > 126) 155 + return -1; 156 + if (s == data_limit) 157 + return -1; 158 + if (*s == edelim) { 159 + /* Address family is usually missing for EPSV response */ 160 + if (mode != IP_VS_FTP_EPSV) 161 + return -1; 162 + s++; 163 + if (s == data_limit) 164 + return -1; 165 + /* Then address should be missing too */ 166 + if (*s != edelim) 167 + return -1; 168 + /* Caller can pre-set addr, if needed */ 169 + s++; 170 + } else { 171 + const char *ep; 172 + 173 + /* We allow address only from same family */ 174 + if (af == AF_INET6 && *s != '2') 175 + return -1; 176 + if (af == AF_INET && *s != '1') 177 + return -1; 178 + s++; 179 + if (s == data_limit) 180 + return -1; 181 + if (*s != edelim) 182 + return -1; 183 + s++; 184 + if (s == data_limit) 185 + return -1; 186 + if (af == AF_INET6) { 187 + if (in6_pton(s, data_limit - s, (u8 *)addr, edelim, 188 + &ep) <= 0) 189 + return -1; 190 + } else { 191 + if (in4_pton(s, data_limit - s, (u8 *)addr, edelim, 192 + &ep) <= 0) 193 + return -1; 194 + } 195 + s = (char *) ep; 196 + if (s == data_limit) 197 + return -1; 198 + if (*s != edelim) 199 + return -1; 200 + s++; 201 + } 202 + for (hport = 0; ; s++) 203 + { 204 + if (s == data_limit) 205 + return -1; 206 + if (!isdigit(*s)) 207 + break; 208 + hport = hport * 10 + *s - '0'; 209 + } 210 + if (s == data_limit || !hport || *s != edelim) 211 + return -1; 212 + s++; 213 + *end = s; 214 + *port = htons(hport); 175 215 return 1; 176 216 } 177 217 178 - /* 179 - * Look at outgoing ftp packets to catch the response to a PASV command 218 + /* Look at outgoing ftp packets to catch the response to a PASV/EPSV command 180 219 * from the server (inside-to-outside). 181 220 * When we see one, we build a connection entry with the client address, 182 221 * client port 0 (unknown at the moment), the server address and the ··· 248 165 * The outgoing packet should be something like 249 166 * "227 Entering Passive Mode (xxx,xxx,xxx,xxx,ppp,ppp)". 250 167 * xxx,xxx,xxx,xxx is the server address, ppp,ppp is the server port number. 168 + * The extended format for EPSV response provides usually only port: 169 + * "229 Entering Extended Passive Mode (|||ppp|)" 251 170 */ 252 171 static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp, 253 - struct sk_buff *skb, int *diff) 172 + struct sk_buff *skb, int *diff, 173 + struct ip_vs_iphdr *ipvsh) 254 174 { 255 - struct iphdr *iph; 256 - struct tcphdr *th; 257 175 char *data, *data_limit; 258 176 char *start, *end; 259 177 union nf_inet_addr from; ··· 268 184 269 185 *diff = 0; 270 186 271 - #ifdef CONFIG_IP_VS_IPV6 272 - /* This application helper doesn't work with IPv6 yet, 273 - * so turn this into a no-op for IPv6 packets 274 - */ 275 - if (cp->af == AF_INET6) 276 - return 1; 277 - #endif 278 - 279 187 /* Only useful for established sessions */ 280 188 if (cp->state != IP_VS_TCP_S_ESTABLISHED) 281 189 return 1; ··· 276 200 if (!skb_make_writable(skb, skb->len)) 277 201 return 0; 278 202 279 - if (cp->app_data == &ip_vs_ftp_pasv) { 280 - iph = ip_hdr(skb); 281 - th = (struct tcphdr *)&(((char *)iph)[iph->ihl*4]); 282 - data = (char *)th + (th->doff << 2); 203 + if (cp->app_data == (void *) IP_VS_FTP_PASV) { 204 + data = ip_vs_ftp_data_ptr(skb, ipvsh); 283 205 data_limit = skb_tail_pointer(skb); 284 206 207 + if (!data || data >= data_limit) 208 + return 1; 209 + 285 210 if (ip_vs_ftp_get_addrport(data, data_limit, 286 - SERVER_STRING, 287 - sizeof(SERVER_STRING)-1, 288 - '(', ')', 289 - &from.ip, &port, 211 + SERVER_STRING_PASV, 212 + sizeof(SERVER_STRING_PASV)-1, 213 + '(', false, IP_VS_FTP_PASV, 214 + &from, &port, cp->af, 290 215 &start, &end) != 1) 291 216 return 1; 292 217 293 - IP_VS_DBG(7, "PASV response (%pI4:%d) -> %pI4:%d detected\n", 218 + IP_VS_DBG(7, "PASV response (%pI4:%u) -> %pI4:%u detected\n", 294 219 &from.ip, ntohs(port), &cp->caddr.ip, 0); 220 + } else if (cp->app_data == (void *) IP_VS_FTP_EPSV) { 221 + data = ip_vs_ftp_data_ptr(skb, ipvsh); 222 + data_limit = skb_tail_pointer(skb); 295 223 296 - /* 297 - * Now update or create an connection entry for it 224 + if (!data || data >= data_limit) 225 + return 1; 226 + 227 + /* Usually, data address is not specified but 228 + * we support different address, so pre-set it. 298 229 */ 299 - { 300 - struct ip_vs_conn_param p; 301 - ip_vs_conn_fill_param(cp->ipvs, AF_INET, 302 - iph->protocol, &from, port, 303 - &cp->caddr, 0, &p); 304 - n_cp = ip_vs_conn_out_get(&p); 305 - } 306 - if (!n_cp) { 307 - struct ip_vs_conn_param p; 308 - ip_vs_conn_fill_param(cp->ipvs, 309 - AF_INET, IPPROTO_TCP, &cp->caddr, 310 - 0, &cp->vaddr, port, &p); 311 - /* As above, this is ipv4 only */ 312 - n_cp = ip_vs_conn_new(&p, AF_INET, &from, port, 313 - IP_VS_CONN_F_NO_CPORT | 314 - IP_VS_CONN_F_NFCT, 315 - cp->dest, skb->mark); 316 - if (!n_cp) 317 - return 0; 230 + from = cp->daddr; 231 + if (ip_vs_ftp_get_addrport(data, data_limit, 232 + SERVER_STRING_EPSV, 233 + sizeof(SERVER_STRING_EPSV)-1, 234 + '(', true, IP_VS_FTP_EPSV, 235 + &from, &port, cp->af, 236 + &start, &end) != 1) 237 + return 1; 318 238 319 - /* add its controller */ 320 - ip_vs_control_add(n_cp, cp); 321 - } 239 + IP_VS_DBG_BUF(7, "EPSV response (%s:%u) -> %s:%u detected\n", 240 + IP_VS_DBG_ADDR(cp->af, &from), ntohs(port), 241 + IP_VS_DBG_ADDR(cp->af, &cp->caddr), 0); 242 + } else { 243 + return 1; 244 + } 322 245 323 - /* 324 - * Replace the old passive address with the new one 325 - */ 246 + /* Now update or create a connection entry for it */ 247 + { 248 + struct ip_vs_conn_param p; 249 + 250 + ip_vs_conn_fill_param(cp->ipvs, cp->af, 251 + ipvsh->protocol, &from, port, 252 + &cp->caddr, 0, &p); 253 + n_cp = ip_vs_conn_out_get(&p); 254 + } 255 + if (!n_cp) { 256 + struct ip_vs_conn_param p; 257 + 258 + ip_vs_conn_fill_param(cp->ipvs, 259 + cp->af, ipvsh->protocol, &cp->caddr, 260 + 0, &cp->vaddr, port, &p); 261 + n_cp = ip_vs_conn_new(&p, cp->af, &from, port, 262 + IP_VS_CONN_F_NO_CPORT | 263 + IP_VS_CONN_F_NFCT, 264 + cp->dest, skb->mark); 265 + if (!n_cp) 266 + return 0; 267 + 268 + /* add its controller */ 269 + ip_vs_control_add(n_cp, cp); 270 + } 271 + 272 + /* Replace the old passive address with the new one */ 273 + if (cp->app_data == (void *) IP_VS_FTP_PASV) { 326 274 from.ip = n_cp->vaddr.ip; 327 275 port = n_cp->vport; 328 276 snprintf(buf, sizeof(buf), "%u,%u,%u,%u,%u,%u", ··· 356 256 ((unsigned char *)&from.ip)[3], 357 257 ntohs(port) >> 8, 358 258 ntohs(port) & 0xFF); 359 - 360 - buf_len = strlen(buf); 361 - 362 - ct = nf_ct_get(skb, &ctinfo); 363 - if (ct) { 364 - bool mangled; 365 - 366 - /* If mangling fails this function will return 0 367 - * which will cause the packet to be dropped. 368 - * Mangling can only fail under memory pressure, 369 - * hopefully it will succeed on the retransmitted 370 - * packet. 371 - */ 372 - mangled = nf_nat_mangle_tcp_packet(skb, ct, ctinfo, 373 - iph->ihl * 4, 374 - start - data, 375 - end - start, 376 - buf, buf_len); 377 - if (mangled) { 378 - ip_vs_nfct_expect_related(skb, ct, n_cp, 379 - IPPROTO_TCP, 0, 0); 380 - if (skb->ip_summed == CHECKSUM_COMPLETE) 381 - skb->ip_summed = CHECKSUM_UNNECESSARY; 382 - /* csum is updated */ 383 - ret = 1; 384 - } 385 - } 386 - 387 - /* 388 - * Not setting 'diff' is intentional, otherwise the sequence 389 - * would be adjusted twice. 390 - */ 391 - 392 - cp->app_data = NULL; 393 - ip_vs_tcp_conn_listen(n_cp); 394 - ip_vs_conn_put(n_cp); 395 - return ret; 259 + } else if (cp->app_data == (void *) IP_VS_FTP_EPSV) { 260 + from = n_cp->vaddr; 261 + port = n_cp->vport; 262 + /* Only port, client will use VIP for the data connection */ 263 + snprintf(buf, sizeof(buf), "|||%u|", 264 + ntohs(port)); 265 + } else { 266 + *buf = 0; 396 267 } 397 - return 1; 268 + buf_len = strlen(buf); 269 + 270 + ct = nf_ct_get(skb, &ctinfo); 271 + if (ct) { 272 + bool mangled; 273 + 274 + /* If mangling fails this function will return 0 275 + * which will cause the packet to be dropped. 276 + * Mangling can only fail under memory pressure, 277 + * hopefully it will succeed on the retransmitted 278 + * packet. 279 + */ 280 + mangled = nf_nat_mangle_tcp_packet(skb, ct, ctinfo, 281 + ipvsh->len, 282 + start - data, 283 + end - start, 284 + buf, buf_len); 285 + if (mangled) { 286 + ip_vs_nfct_expect_related(skb, ct, n_cp, 287 + ipvsh->protocol, 0, 0); 288 + if (skb->ip_summed == CHECKSUM_COMPLETE) 289 + skb->ip_summed = CHECKSUM_UNNECESSARY; 290 + /* csum is updated */ 291 + ret = 1; 292 + } 293 + } 294 + 295 + /* Not setting 'diff' is intentional, otherwise the sequence 296 + * would be adjusted twice. 297 + */ 298 + 299 + cp->app_data = (void *) IP_VS_FTP_ACTIVE; 300 + ip_vs_tcp_conn_listen(n_cp); 301 + ip_vs_conn_put(n_cp); 302 + return ret; 398 303 } 399 304 400 305 401 - /* 402 - * Look at incoming ftp packets to catch the PASV/PORT command 306 + /* Look at incoming ftp packets to catch the PASV/PORT/EPRT/EPSV command 403 307 * (outside-to-inside). 404 308 * 405 309 * The incoming packet having the PORT command should be something like ··· 412 308 * In this case, we create a connection entry using the client address and 413 309 * port, so that the active ftp data connection from the server can reach 414 310 * the client. 311 + * Extended format: 312 + * "EPSV\r\n" when client requests server address from same family 313 + * "EPSV 1\r\n" when client requests IPv4 server address 314 + * "EPSV 2\r\n" when client requests IPv6 server address 315 + * "EPSV ALL\r\n" - not supported 316 + * EPRT with specified delimiter (ASCII 33..126), "|" by default: 317 + * "EPRT |1|IPv4ADDR|PORT|\r\n" when client provides IPv4 addrport 318 + * "EPRT |2|IPv6ADDR|PORT|\r\n" when client provides IPv6 addrport 415 319 */ 416 320 static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp, 417 - struct sk_buff *skb, int *diff) 321 + struct sk_buff *skb, int *diff, 322 + struct ip_vs_iphdr *ipvsh) 418 323 { 419 - struct iphdr *iph; 420 - struct tcphdr *th; 421 324 char *data, *data_start, *data_limit; 422 325 char *start, *end; 423 326 union nf_inet_addr to; ··· 434 323 /* no diff required for incoming packets */ 435 324 *diff = 0; 436 325 437 - #ifdef CONFIG_IP_VS_IPV6 438 - /* This application helper doesn't work with IPv6 yet, 439 - * so turn this into a no-op for IPv6 packets 440 - */ 441 - if (cp->af == AF_INET6) 442 - return 1; 443 - #endif 444 - 445 326 /* Only useful for established sessions */ 446 327 if (cp->state != IP_VS_TCP_S_ESTABLISHED) 447 328 return 1; ··· 442 339 if (!skb_make_writable(skb, skb->len)) 443 340 return 0; 444 341 445 - /* 446 - * Detecting whether it is passive 447 - */ 448 - iph = ip_hdr(skb); 449 - th = (struct tcphdr *)&(((char *)iph)[iph->ihl*4]); 450 - 451 - /* Since there may be OPTIONS in the TCP packet and the HLEN is 452 - the length of the header in 32-bit multiples, it is accurate 453 - to calculate data address by th+HLEN*4 */ 454 - data = data_start = (char *)th + (th->doff << 2); 342 + data = data_start = ip_vs_ftp_data_ptr(skb, ipvsh); 455 343 data_limit = skb_tail_pointer(skb); 344 + if (!data || data >= data_limit) 345 + return 1; 456 346 457 347 while (data <= data_limit - 6) { 458 - if (strncasecmp(data, "PASV\r\n", 6) == 0) { 348 + if (cp->af == AF_INET && 349 + strncasecmp(data, "PASV\r\n", 6) == 0) { 459 350 /* Passive mode on */ 460 351 IP_VS_DBG(7, "got PASV at %td of %td\n", 461 352 data - data_start, 462 353 data_limit - data_start); 463 - cp->app_data = &ip_vs_ftp_pasv; 354 + cp->app_data = (void *) IP_VS_FTP_PASV; 464 355 return 1; 465 356 } 357 + 358 + /* EPSV or EPSV<space><net-prt> */ 359 + if (strncasecmp(data, "EPSV", 4) == 0 && 360 + (data[4] == ' ' || data[4] == '\r')) { 361 + if (data[4] == ' ') { 362 + char proto = data[5]; 363 + 364 + if (data > data_limit - 7 || data[6] != '\r') 365 + return 1; 366 + 367 + #ifdef CONFIG_IP_VS_IPV6 368 + if (cp->af == AF_INET6 && proto == '2') { 369 + } else 370 + #endif 371 + if (cp->af == AF_INET && proto == '1') { 372 + } else { 373 + return 1; 374 + } 375 + } 376 + /* Extended Passive mode on */ 377 + IP_VS_DBG(7, "got EPSV at %td of %td\n", 378 + data - data_start, 379 + data_limit - data_start); 380 + cp->app_data = (void *) IP_VS_FTP_EPSV; 381 + return 1; 382 + } 383 + 466 384 data++; 467 385 } 468 386 ··· 494 370 * then create a new connection entry for the coming data 495 371 * connection. 496 372 */ 497 - if (ip_vs_ftp_get_addrport(data_start, data_limit, 498 - CLIENT_STRING, sizeof(CLIENT_STRING)-1, 499 - ' ', '\r', &to.ip, &port, 500 - &start, &end) != 1) 501 - return 1; 373 + if (cp->af == AF_INET && 374 + ip_vs_ftp_get_addrport(data_start, data_limit, 375 + CLIENT_STRING_PORT, 376 + sizeof(CLIENT_STRING_PORT)-1, 377 + ' ', false, IP_VS_FTP_PORT, 378 + &to, &port, cp->af, 379 + &start, &end) == 1) { 502 380 503 - IP_VS_DBG(7, "PORT %pI4:%d detected\n", &to.ip, ntohs(port)); 381 + IP_VS_DBG(7, "PORT %pI4:%u detected\n", &to.ip, ntohs(port)); 382 + 383 + /* Now update or create a connection entry for it */ 384 + IP_VS_DBG(7, "protocol %s %pI4:%u %pI4:%u\n", 385 + ip_vs_proto_name(ipvsh->protocol), 386 + &to.ip, ntohs(port), &cp->vaddr.ip, 387 + ntohs(cp->vport)-1); 388 + } else if (ip_vs_ftp_get_addrport(data_start, data_limit, 389 + CLIENT_STRING_EPRT, 390 + sizeof(CLIENT_STRING_EPRT)-1, 391 + ' ', true, IP_VS_FTP_EPRT, 392 + &to, &port, cp->af, 393 + &start, &end) == 1) { 394 + 395 + IP_VS_DBG_BUF(7, "EPRT %s:%u detected\n", 396 + IP_VS_DBG_ADDR(cp->af, &to), ntohs(port)); 397 + 398 + /* Now update or create a connection entry for it */ 399 + IP_VS_DBG_BUF(7, "protocol %s %s:%u %s:%u\n", 400 + ip_vs_proto_name(ipvsh->protocol), 401 + IP_VS_DBG_ADDR(cp->af, &to), ntohs(port), 402 + IP_VS_DBG_ADDR(cp->af, &cp->vaddr), 403 + ntohs(cp->vport)-1); 404 + } else { 405 + return 1; 406 + } 504 407 505 408 /* Passive mode off */ 506 - cp->app_data = NULL; 507 - 508 - /* 509 - * Now update or create a connection entry for it 510 - */ 511 - IP_VS_DBG(7, "protocol %s %pI4:%d %pI4:%d\n", 512 - ip_vs_proto_name(iph->protocol), 513 - &to.ip, ntohs(port), &cp->vaddr.ip, 0); 409 + cp->app_data = (void *) IP_VS_FTP_ACTIVE; 514 410 515 411 { 516 412 struct ip_vs_conn_param p; 517 - ip_vs_conn_fill_param(cp->ipvs, AF_INET, 518 - iph->protocol, &to, port, &cp->vaddr, 413 + ip_vs_conn_fill_param(cp->ipvs, cp->af, 414 + ipvsh->protocol, &to, port, &cp->vaddr, 519 415 htons(ntohs(cp->vport)-1), &p); 520 416 n_cp = ip_vs_conn_in_get(&p); 521 417 if (!n_cp) { 522 - /* This is ipv4 only */ 523 - n_cp = ip_vs_conn_new(&p, AF_INET, &cp->daddr, 418 + n_cp = ip_vs_conn_new(&p, cp->af, &cp->daddr, 524 419 htons(ntohs(cp->dport)-1), 525 420 IP_VS_CONN_F_NFCT, cp->dest, 526 421 skb->mark); ··· 597 454 ret = register_ip_vs_app_inc(ipvs, app, app->protocol, ports[i]); 598 455 if (ret) 599 456 goto err_unreg; 600 - pr_info("%s: loaded support on port[%d] = %d\n", 457 + pr_info("%s: loaded support on port[%d] = %u\n", 601 458 app->name, i, ports[i]); 602 459 } 603 460 return 0;
+49 -52
net/netfilter/ipvs/ip_vs_nfct.c
··· 67 67 #include <net/netfilter/nf_conntrack_zones.h> 68 68 69 69 70 - #define FMT_TUPLE "%pI4:%u->%pI4:%u/%u" 71 - #define ARG_TUPLE(T) &(T)->src.u3.ip, ntohs((T)->src.u.all), \ 72 - &(T)->dst.u3.ip, ntohs((T)->dst.u.all), \ 70 + #define FMT_TUPLE "%s:%u->%s:%u/%u" 71 + #define ARG_TUPLE(T) IP_VS_DBG_ADDR((T)->src.l3num, &(T)->src.u3), \ 72 + ntohs((T)->src.u.all), \ 73 + IP_VS_DBG_ADDR((T)->src.l3num, &(T)->dst.u3), \ 74 + ntohs((T)->dst.u.all), \ 73 75 (T)->dst.protonum 74 76 75 - #define FMT_CONN "%pI4:%u->%pI4:%u->%pI4:%u/%u:%u" 76 - #define ARG_CONN(C) &((C)->caddr.ip), ntohs((C)->cport), \ 77 - &((C)->vaddr.ip), ntohs((C)->vport), \ 78 - &((C)->daddr.ip), ntohs((C)->dport), \ 77 + #define FMT_CONN "%s:%u->%s:%u->%s:%u/%u:%u" 78 + #define ARG_CONN(C) IP_VS_DBG_ADDR((C)->af, &((C)->caddr)), \ 79 + ntohs((C)->cport), \ 80 + IP_VS_DBG_ADDR((C)->af, &((C)->vaddr)), \ 81 + ntohs((C)->vport), \ 82 + IP_VS_DBG_ADDR((C)->daf, &((C)->daddr)), \ 83 + ntohs((C)->dport), \ 79 84 (C)->protocol, (C)->state 80 85 81 86 void ··· 132 127 new_tuple.dst.protonum != IPPROTO_ICMPV6) 133 128 new_tuple.dst.u.tcp.port = cp->vport; 134 129 } 135 - IP_VS_DBG(7, "%s: Updating conntrack ct=%p, status=0x%lX, " 136 - "ctinfo=%d, old reply=" FMT_TUPLE 137 - ", new reply=" FMT_TUPLE ", cp=" FMT_CONN "\n", 138 - __func__, ct, ct->status, ctinfo, 139 - ARG_TUPLE(&ct->tuplehash[IP_CT_DIR_REPLY].tuple), 140 - ARG_TUPLE(&new_tuple), ARG_CONN(cp)); 130 + IP_VS_DBG_BUF(7, "%s: Updating conntrack ct=%p, status=0x%lX, " 131 + "ctinfo=%d, old reply=" FMT_TUPLE "\n", 132 + __func__, ct, ct->status, ctinfo, 133 + ARG_TUPLE(&ct->tuplehash[IP_CT_DIR_REPLY].tuple)); 134 + IP_VS_DBG_BUF(7, "%s: Updating conntrack ct=%p, status=0x%lX, " 135 + "ctinfo=%d, new reply=" FMT_TUPLE "\n", 136 + __func__, ct, ct->status, ctinfo, 137 + ARG_TUPLE(&new_tuple)); 141 138 nf_conntrack_alter_reply(ct, &new_tuple); 139 + IP_VS_DBG_BUF(7, "%s: Updated conntrack ct=%p for cp=" FMT_CONN "\n", 140 + __func__, ct, ARG_CONN(cp)); 142 141 } 143 142 144 143 int ip_vs_confirm_conntrack(struct sk_buff *skb) ··· 161 152 struct ip_vs_conn_param p; 162 153 struct net *net = nf_ct_net(ct); 163 154 164 - if (exp->tuple.src.l3num != PF_INET) 165 - return; 166 - 167 155 /* 168 156 * We assume that no NF locks are held before this callback. 169 157 * ip_vs_conn_out_get and ip_vs_conn_in_get should match their ··· 177 171 cp = ip_vs_conn_out_get(&p); 178 172 if (cp) { 179 173 /* Change reply CLIENT->RS to CLIENT->VS */ 174 + IP_VS_DBG_BUF(7, "%s: for ct=%p, status=0x%lX found inout cp=" 175 + FMT_CONN "\n", 176 + __func__, ct, ct->status, ARG_CONN(cp)); 180 177 new_reply = ct->tuplehash[IP_CT_DIR_REPLY].tuple; 181 - IP_VS_DBG(7, "%s: ct=%p, status=0x%lX, tuples=" FMT_TUPLE ", " 182 - FMT_TUPLE ", found inout cp=" FMT_CONN "\n", 183 - __func__, ct, ct->status, 184 - ARG_TUPLE(orig), ARG_TUPLE(&new_reply), 185 - ARG_CONN(cp)); 178 + IP_VS_DBG_BUF(7, "%s: ct=%p before alter: reply tuple=" 179 + FMT_TUPLE "\n", 180 + __func__, ct, ARG_TUPLE(&new_reply)); 186 181 new_reply.dst.u3 = cp->vaddr; 187 182 new_reply.dst.u.tcp.port = cp->vport; 188 - IP_VS_DBG(7, "%s: ct=%p, new tuples=" FMT_TUPLE ", " FMT_TUPLE 189 - ", inout cp=" FMT_CONN "\n", 190 - __func__, ct, 191 - ARG_TUPLE(orig), ARG_TUPLE(&new_reply), 192 - ARG_CONN(cp)); 193 183 goto alter; 194 184 } 195 185 ··· 193 191 cp = ip_vs_conn_in_get(&p); 194 192 if (cp) { 195 193 /* Change reply VS->CLIENT to RS->CLIENT */ 194 + IP_VS_DBG_BUF(7, "%s: for ct=%p, status=0x%lX found outin cp=" 195 + FMT_CONN "\n", 196 + __func__, ct, ct->status, ARG_CONN(cp)); 196 197 new_reply = ct->tuplehash[IP_CT_DIR_REPLY].tuple; 197 - IP_VS_DBG(7, "%s: ct=%p, status=0x%lX, tuples=" FMT_TUPLE ", " 198 - FMT_TUPLE ", found outin cp=" FMT_CONN "\n", 199 - __func__, ct, ct->status, 200 - ARG_TUPLE(orig), ARG_TUPLE(&new_reply), 201 - ARG_CONN(cp)); 198 + IP_VS_DBG_BUF(7, "%s: ct=%p before alter: reply tuple=" 199 + FMT_TUPLE "\n", 200 + __func__, ct, ARG_TUPLE(&new_reply)); 202 201 new_reply.src.u3 = cp->daddr; 203 202 new_reply.src.u.tcp.port = cp->dport; 204 - IP_VS_DBG(7, "%s: ct=%p, new tuples=" FMT_TUPLE ", " 205 - FMT_TUPLE ", outin cp=" FMT_CONN "\n", 206 - __func__, ct, 207 - ARG_TUPLE(orig), ARG_TUPLE(&new_reply), 208 - ARG_CONN(cp)); 209 203 goto alter; 210 204 } 211 205 212 - IP_VS_DBG(7, "%s: ct=%p, status=0x%lX, tuple=" FMT_TUPLE 213 - " - unknown expect\n", 214 - __func__, ct, ct->status, ARG_TUPLE(orig)); 206 + IP_VS_DBG_BUF(7, "%s: ct=%p, status=0x%lX, tuple=" FMT_TUPLE 207 + " - unknown expect\n", 208 + __func__, ct, ct->status, ARG_TUPLE(orig)); 215 209 return; 216 210 217 211 alter: ··· 245 247 246 248 exp->expectfn = ip_vs_nfct_expect_callback; 247 249 248 - IP_VS_DBG(7, "%s: ct=%p, expect tuple=" FMT_TUPLE "\n", 249 - __func__, ct, ARG_TUPLE(&exp->tuple)); 250 + IP_VS_DBG_BUF(7, "%s: ct=%p, expect tuple=" FMT_TUPLE "\n", 251 + __func__, ct, ARG_TUPLE(&exp->tuple)); 250 252 nf_ct_expect_related(exp); 251 253 nf_ct_expect_put(exp); 252 254 } ··· 272 274 tuple.dst.u3 = cp->vaddr; 273 275 tuple.dst.u.all = cp->vport; 274 276 275 - IP_VS_DBG(7, "%s: dropping conntrack with tuple=" FMT_TUPLE 276 - " for conn " FMT_CONN "\n", 277 - __func__, ARG_TUPLE(&tuple), ARG_CONN(cp)); 277 + IP_VS_DBG_BUF(7, "%s: dropping conntrack for conn " FMT_CONN "\n", 278 + __func__, ARG_CONN(cp)); 278 279 279 280 h = nf_conntrack_find_get(cp->ipvs->net, &nf_ct_zone_dflt, &tuple); 280 281 if (h) { 281 282 ct = nf_ct_tuplehash_to_ctrack(h); 282 283 if (nf_ct_kill(ct)) { 283 - IP_VS_DBG(7, "%s: ct=%p, deleted conntrack for tuple=" 284 - FMT_TUPLE "\n", 285 - __func__, ct, ARG_TUPLE(&tuple)); 284 + IP_VS_DBG_BUF(7, "%s: ct=%p deleted for tuple=" 285 + FMT_TUPLE "\n", 286 + __func__, ct, ARG_TUPLE(&tuple)); 286 287 } else { 287 - IP_VS_DBG(7, "%s: ct=%p, no conntrack timer for tuple=" 288 - FMT_TUPLE "\n", 289 - __func__, ct, ARG_TUPLE(&tuple)); 288 + IP_VS_DBG_BUF(7, "%s: ct=%p, no conntrack for tuple=" 289 + FMT_TUPLE "\n", 290 + __func__, ct, ARG_TUPLE(&tuple)); 290 291 } 291 292 nf_ct_put(ct); 292 293 } else { 293 - IP_VS_DBG(7, "%s: no conntrack for tuple=" FMT_TUPLE "\n", 294 - __func__, ARG_TUPLE(&tuple)); 294 + IP_VS_DBG_BUF(7, "%s: no conntrack for tuple=" FMT_TUPLE "\n", 295 + __func__, ARG_TUPLE(&tuple)); 295 296 } 296 297 } 297 298
+2 -2
net/netfilter/ipvs/ip_vs_proto_sctp.c
··· 109 109 return 0; 110 110 111 111 /* Call application helper if needed */ 112 - ret = ip_vs_app_pkt_out(cp, skb); 112 + ret = ip_vs_app_pkt_out(cp, skb, iph); 113 113 if (ret == 0) 114 114 return 0; 115 115 /* ret=2: csum update is needed after payload mangling */ ··· 156 156 return 0; 157 157 158 158 /* Call application helper if needed */ 159 - ret = ip_vs_app_pkt_in(cp, skb); 159 + ret = ip_vs_app_pkt_in(cp, skb, iph); 160 160 if (ret == 0) 161 161 return 0; 162 162 /* ret=2: csum update is needed after payload mangling */
+2 -2
net/netfilter/ipvs/ip_vs_proto_tcp.c
··· 170 170 return 0; 171 171 172 172 /* Call application helper if needed */ 173 - if (!(ret = ip_vs_app_pkt_out(cp, skb))) 173 + if (!(ret = ip_vs_app_pkt_out(cp, skb, iph))) 174 174 return 0; 175 175 /* ret=2: csum update is needed after payload mangling */ 176 176 if (ret == 1) ··· 251 251 * Attempt ip_vs_app call. 252 252 * It will fix ip_vs_conn and iph ack_seq stuff 253 253 */ 254 - if (!(ret = ip_vs_app_pkt_in(cp, skb))) 254 + if (!(ret = ip_vs_app_pkt_in(cp, skb, iph))) 255 255 return 0; 256 256 /* ret=2: csum update is needed after payload mangling */ 257 257 if (ret == 1)
+2 -2
net/netfilter/ipvs/ip_vs_proto_udp.c
··· 162 162 /* 163 163 * Call application helper if needed 164 164 */ 165 - if (!(ret = ip_vs_app_pkt_out(cp, skb))) 165 + if (!(ret = ip_vs_app_pkt_out(cp, skb, iph))) 166 166 return 0; 167 167 /* ret=2: csum update is needed after payload mangling */ 168 168 if (ret == 1) ··· 246 246 * Attempt ip_vs_app call. 247 247 * It will fix ip_vs_conn 248 248 */ 249 - if (!(ret = ip_vs_app_pkt_in(cp, skb))) 249 + if (!(ret = ip_vs_app_pkt_in(cp, skb, iph))) 250 250 return 0; 251 251 /* ret=2: csum update is needed after payload mangling */ 252 252 if (ret == 1)
+4 -2
net/netfilter/nf_flow_table_ip.c
··· 220 220 enum flow_offload_tuple_dir dir; 221 221 struct flow_offload *flow; 222 222 struct net_device *outdev; 223 - const struct rtable *rt; 223 + struct rtable *rt; 224 224 unsigned int thoff; 225 225 struct iphdr *iph; 226 226 __be32 nexthop; ··· 241 241 242 242 dir = tuplehash->tuple.dir; 243 243 flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]); 244 - rt = (const struct rtable *)flow->tuplehash[dir].tuple.dst_cache; 244 + rt = (struct rtable *)flow->tuplehash[dir].tuple.dst_cache; 245 245 246 246 if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)) && 247 247 (ip_hdr(skb)->frag_off & htons(IP_DF)) != 0) ··· 264 264 265 265 skb->dev = outdev; 266 266 nexthop = rt_nexthop(rt, flow->tuplehash[!dir].tuple.src_v4.s_addr); 267 + skb_dst_set_noref(skb, &rt->dst); 267 268 neigh_xmit(NEIGH_ARP_TABLE, outdev, &nexthop, skb); 268 269 269 270 return NF_STOLEN; ··· 481 480 482 481 skb->dev = outdev; 483 482 nexthop = rt6_nexthop(rt, &flow->tuplehash[!dir].tuple.src_v6); 483 + skb_dst_set_noref(skb, &rt->dst); 484 484 neigh_xmit(NEIGH_ND_TABLE, outdev, nexthop, skb); 485 485 486 486 return NF_STOLEN;
+1 -1
net/netfilter/nf_nat_core.c
··· 1036 1036 .size = sizeof(struct nat_net), 1037 1037 }; 1038 1038 1039 - struct nf_nat_hook nat_hook = { 1039 + static struct nf_nat_hook nat_hook = { 1040 1040 .parse_nat_setup = nfnetlink_parse_nat_setup, 1041 1041 #ifdef CONFIG_XFRM 1042 1042 .decode_session = __nf_nat_decode_session,
-4
net/netfilter/nf_nat_redirect.c
··· 15 15 #include <linux/inetdevice.h> 16 16 #include <linux/ip.h> 17 17 #include <linux/kernel.h> 18 - #include <linux/module.h> 19 18 #include <linux/netdevice.h> 20 19 #include <linux/netfilter.h> 21 20 #include <linux/types.h> ··· 123 124 return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_DST); 124 125 } 125 126 EXPORT_SYMBOL_GPL(nf_nat_redirect_ipv6); 126 - 127 - MODULE_LICENSE("GPL"); 128 - MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+399 -70
net/netfilter/nf_tables_api.c
··· 28 28 static LIST_HEAD(nf_tables_flowtables); 29 29 static u64 table_handle; 30 30 31 + enum { 32 + NFT_VALIDATE_SKIP = 0, 33 + NFT_VALIDATE_NEED, 34 + NFT_VALIDATE_DO, 35 + }; 36 + 37 + static void nft_validate_state_update(struct net *net, u8 new_validate_state) 38 + { 39 + switch (net->nft.validate_state) { 40 + case NFT_VALIDATE_SKIP: 41 + WARN_ON_ONCE(new_validate_state == NFT_VALIDATE_DO); 42 + break; 43 + case NFT_VALIDATE_NEED: 44 + break; 45 + case NFT_VALIDATE_DO: 46 + if (new_validate_state == NFT_VALIDATE_NEED) 47 + return; 48 + } 49 + 50 + net->nft.validate_state = new_validate_state; 51 + } 52 + 31 53 static void nft_ctx_init(struct nft_ctx *ctx, 32 54 struct net *net, 33 55 const struct sk_buff *skb, ··· 395 373 if (nla == NULL) 396 374 return ERR_PTR(-EINVAL); 397 375 398 - list_for_each_entry(table, &net->nft.tables, list) { 376 + list_for_each_entry_rcu(table, &net->nft.tables, list) { 399 377 if (!nla_strcmp(nla, table->name) && 400 378 table->family == family && 401 379 nft_active_genmask(table, genmask)) ··· 568 546 return skb->len; 569 547 } 570 548 549 + static int nft_netlink_dump_start_rcu(struct sock *nlsk, struct sk_buff *skb, 550 + const struct nlmsghdr *nlh, 551 + struct netlink_dump_control *c) 552 + { 553 + int err; 554 + 555 + if (!try_module_get(THIS_MODULE)) 556 + return -EINVAL; 557 + 558 + rcu_read_unlock(); 559 + err = netlink_dump_start(nlsk, skb, nlh, c); 560 + rcu_read_lock(); 561 + module_put(THIS_MODULE); 562 + 563 + return err; 564 + } 565 + 566 + /* called with rcu_read_lock held */ 571 567 static int nf_tables_gettable(struct net *net, struct sock *nlsk, 572 568 struct sk_buff *skb, const struct nlmsghdr *nlh, 573 569 const struct nlattr * const nla[], ··· 601 561 if (nlh->nlmsg_flags & NLM_F_DUMP) { 602 562 struct netlink_dump_control c = { 603 563 .dump = nf_tables_dump_tables, 564 + .module = THIS_MODULE, 604 565 }; 605 - return netlink_dump_start(nlsk, skb, nlh, &c); 566 + 567 + return nft_netlink_dump_start_rcu(nlsk, skb, nlh, &c); 606 568 } 607 569 608 570 table = nft_table_lookup(net, nla[NFTA_TABLE_NAME], family, genmask); ··· 613 571 return PTR_ERR(table); 614 572 } 615 573 616 - skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); 574 + skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC); 617 575 if (!skb2) 618 576 return -ENOMEM; 619 577 ··· 975 933 if (nla == NULL) 976 934 return ERR_PTR(-EINVAL); 977 935 978 - list_for_each_entry(chain, &table->chains, list) { 936 + list_for_each_entry_rcu(chain, &table->chains, list) { 979 937 if (!nla_strcmp(nla, chain->name) && 980 938 nft_active_genmask(chain, genmask)) 981 939 return chain; ··· 1177 1135 return skb->len; 1178 1136 } 1179 1137 1138 + /* called with rcu_read_lock held */ 1180 1139 static int nf_tables_getchain(struct net *net, struct sock *nlsk, 1181 1140 struct sk_buff *skb, const struct nlmsghdr *nlh, 1182 1141 const struct nlattr * const nla[], ··· 1194 1151 if (nlh->nlmsg_flags & NLM_F_DUMP) { 1195 1152 struct netlink_dump_control c = { 1196 1153 .dump = nf_tables_dump_chains, 1154 + .module = THIS_MODULE, 1197 1155 }; 1198 - return netlink_dump_start(nlsk, skb, nlh, &c); 1156 + 1157 + return nft_netlink_dump_start_rcu(nlsk, skb, nlh, &c); 1199 1158 } 1200 1159 1201 1160 table = nft_table_lookup(net, nla[NFTA_CHAIN_TABLE], family, genmask); ··· 1212 1167 return PTR_ERR(chain); 1213 1168 } 1214 1169 1215 - skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); 1170 + skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC); 1216 1171 if (!skb2) 1217 1172 return -ENOMEM; 1218 1173 ··· 1282 1237 rcu_assign_pointer(chain->stats, newstats); 1283 1238 } 1284 1239 1240 + static void nf_tables_chain_free_chain_rules(struct nft_chain *chain) 1241 + { 1242 + struct nft_rule **g0 = rcu_dereference_raw(chain->rules_gen_0); 1243 + struct nft_rule **g1 = rcu_dereference_raw(chain->rules_gen_1); 1244 + 1245 + if (g0 != g1) 1246 + kvfree(g1); 1247 + kvfree(g0); 1248 + 1249 + /* should be NULL either via abort or via successful commit */ 1250 + WARN_ON_ONCE(chain->rules_next); 1251 + kvfree(chain->rules_next); 1252 + } 1253 + 1285 1254 static void nf_tables_chain_destroy(struct nft_ctx *ctx) 1286 1255 { 1287 1256 struct nft_chain *chain = ctx->chain; 1288 1257 1289 1258 BUG_ON(chain->use > 0); 1259 + 1260 + /* no concurrent access possible anymore */ 1261 + nf_tables_chain_free_chain_rules(chain); 1290 1262 1291 1263 if (nft_is_base_chain(chain)) { 1292 1264 struct nft_base_chain *basechain = nft_base_chain(chain); ··· 1397 1335 module_put(hook->type->owner); 1398 1336 } 1399 1337 1338 + struct nft_rules_old { 1339 + struct rcu_head h; 1340 + struct nft_rule **start; 1341 + }; 1342 + 1343 + static struct nft_rule **nf_tables_chain_alloc_rules(const struct nft_chain *chain, 1344 + unsigned int alloc) 1345 + { 1346 + if (alloc > INT_MAX) 1347 + return NULL; 1348 + 1349 + alloc += 1; /* NULL, ends rules */ 1350 + if (sizeof(struct nft_rule *) > INT_MAX / alloc) 1351 + return NULL; 1352 + 1353 + alloc *= sizeof(struct nft_rule *); 1354 + alloc += sizeof(struct nft_rules_old); 1355 + 1356 + return kvmalloc(alloc, GFP_KERNEL); 1357 + } 1358 + 1400 1359 static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, 1401 1360 u8 policy, bool create) 1402 1361 { ··· 1427 1344 struct nft_stats __percpu *stats; 1428 1345 struct net *net = ctx->net; 1429 1346 struct nft_chain *chain; 1347 + struct nft_rule **rules; 1430 1348 int err; 1431 1349 1432 1350 if (table->use == UINT_MAX) ··· 1489 1405 err = -ENOMEM; 1490 1406 goto err1; 1491 1407 } 1408 + 1409 + rules = nf_tables_chain_alloc_rules(chain, 0); 1410 + if (!rules) { 1411 + err = -ENOMEM; 1412 + goto err1; 1413 + } 1414 + 1415 + *rules = NULL; 1416 + rcu_assign_pointer(chain->rules_gen_0, rules); 1417 + rcu_assign_pointer(chain->rules_gen_1, rules); 1492 1418 1493 1419 err = nf_tables_register_hook(net, table, chain); 1494 1420 if (err < 0) ··· 1943 1849 goto err1; 1944 1850 } 1945 1851 1946 - if (ops->validate) { 1947 - const struct nft_data *data = NULL; 1948 - 1949 - err = ops->validate(ctx, expr, &data); 1950 - if (err < 0) 1951 - goto err2; 1952 - } 1953 - 1954 1852 return 0; 1955 - 1956 - err2: 1957 - if (ops->destroy) 1958 - ops->destroy(ctx, expr); 1959 1853 err1: 1960 1854 expr->ops = NULL; 1961 1855 return err; ··· 2002 1920 struct nft_rule *rule; 2003 1921 2004 1922 // FIXME: this sucks 2005 - list_for_each_entry(rule, &chain->rules, list) { 1923 + list_for_each_entry_rcu(rule, &chain->rules, list) { 2006 1924 if (handle == rule->handle) 2007 1925 return rule; 2008 1926 } ··· 2198 2116 return 0; 2199 2117 } 2200 2118 2119 + /* called with rcu_read_lock held */ 2201 2120 static int nf_tables_getrule(struct net *net, struct sock *nlsk, 2202 2121 struct sk_buff *skb, const struct nlmsghdr *nlh, 2203 2122 const struct nlattr * const nla[], ··· 2217 2134 struct netlink_dump_control c = { 2218 2135 .dump = nf_tables_dump_rules, 2219 2136 .done = nf_tables_dump_rules_done, 2137 + .module = THIS_MODULE, 2220 2138 }; 2221 2139 2222 2140 if (nla[NFTA_RULE_TABLE] || nla[NFTA_RULE_CHAIN]) { 2223 2141 struct nft_rule_dump_ctx *ctx; 2224 2142 2225 - ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); 2143 + ctx = kzalloc(sizeof(*ctx), GFP_ATOMIC); 2226 2144 if (!ctx) 2227 2145 return -ENOMEM; 2228 2146 2229 2147 if (nla[NFTA_RULE_TABLE]) { 2230 2148 ctx->table = nla_strdup(nla[NFTA_RULE_TABLE], 2231 - GFP_KERNEL); 2149 + GFP_ATOMIC); 2232 2150 if (!ctx->table) { 2233 2151 kfree(ctx); 2234 2152 return -ENOMEM; ··· 2237 2153 } 2238 2154 if (nla[NFTA_RULE_CHAIN]) { 2239 2155 ctx->chain = nla_strdup(nla[NFTA_RULE_CHAIN], 2240 - GFP_KERNEL); 2156 + GFP_ATOMIC); 2241 2157 if (!ctx->chain) { 2242 2158 kfree(ctx->table); 2243 2159 kfree(ctx); ··· 2247 2163 c.data = ctx; 2248 2164 } 2249 2165 2250 - return netlink_dump_start(nlsk, skb, nlh, &c); 2166 + return nft_netlink_dump_start_rcu(nlsk, skb, nlh, &c); 2251 2167 } 2252 2168 2253 2169 table = nft_table_lookup(net, nla[NFTA_RULE_TABLE], family, genmask); ··· 2268 2184 return PTR_ERR(rule); 2269 2185 } 2270 2186 2271 - skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); 2187 + skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC); 2272 2188 if (!skb2) 2273 2189 return -ENOMEM; 2274 2190 ··· 2307 2223 { 2308 2224 nft_rule_expr_deactivate(ctx, rule); 2309 2225 nf_tables_rule_destroy(ctx, rule); 2226 + } 2227 + 2228 + int nft_chain_validate(const struct nft_ctx *ctx, const struct nft_chain *chain) 2229 + { 2230 + struct nft_expr *expr, *last; 2231 + const struct nft_data *data; 2232 + struct nft_rule *rule; 2233 + int err; 2234 + 2235 + list_for_each_entry(rule, &chain->rules, list) { 2236 + if (!nft_is_active_next(ctx->net, rule)) 2237 + continue; 2238 + 2239 + nft_rule_for_each_expr(expr, last, rule) { 2240 + if (!expr->ops->validate) 2241 + continue; 2242 + 2243 + err = expr->ops->validate(ctx, expr, &data); 2244 + if (err < 0) 2245 + return err; 2246 + } 2247 + } 2248 + 2249 + return 0; 2250 + } 2251 + EXPORT_SYMBOL_GPL(nft_chain_validate); 2252 + 2253 + static int nft_table_validate(struct net *net, const struct nft_table *table) 2254 + { 2255 + struct nft_chain *chain; 2256 + struct nft_ctx ctx = { 2257 + .net = net, 2258 + .family = table->family, 2259 + }; 2260 + int err; 2261 + 2262 + list_for_each_entry(chain, &table->chains, list) { 2263 + if (!nft_is_base_chain(chain)) 2264 + continue; 2265 + 2266 + ctx.chain = chain; 2267 + err = nft_chain_validate(&ctx, chain); 2268 + if (err < 0) 2269 + return err; 2270 + } 2271 + 2272 + return 0; 2310 2273 } 2311 2274 2312 2275 #define NFT_RULE_MAXEXPRS 128 ··· 2483 2352 err = nf_tables_newexpr(&ctx, &info[i], expr); 2484 2353 if (err < 0) 2485 2354 goto err2; 2355 + 2356 + if (info[i].ops->validate) 2357 + nft_validate_state_update(net, NFT_VALIDATE_NEED); 2358 + 2486 2359 info[i].ops = NULL; 2487 2360 expr = nft_expr_next(expr); 2488 2361 } ··· 2530 2395 } 2531 2396 } 2532 2397 chain->use++; 2533 - return 0; 2534 2398 2399 + if (net->nft.validate_state == NFT_VALIDATE_DO) 2400 + return nft_table_validate(net, table); 2401 + 2402 + return 0; 2535 2403 err2: 2536 2404 nf_tables_rule_release(&ctx, rule); 2537 2405 err1: ··· 2793 2655 if (nla == NULL) 2794 2656 return ERR_PTR(-EINVAL); 2795 2657 2796 - list_for_each_entry(set, &table->sets, list) { 2658 + list_for_each_entry_rcu(set, &table->sets, list) { 2797 2659 if (!nla_strcmp(nla, set->name) && 2798 2660 nft_active_genmask(set, genmask)) 2799 2661 return set; ··· 2919 2781 return 0; 2920 2782 } 2921 2783 2922 - static u64 nf_jiffies64_to_msecs(u64 input) 2784 + static __be64 nf_jiffies64_to_msecs(u64 input) 2923 2785 { 2924 2786 u64 ms = jiffies64_to_nsecs(input); 2925 2787 ··· 3098 2960 return 0; 3099 2961 } 3100 2962 2963 + /* called with rcu_read_lock held */ 3101 2964 static int nf_tables_getset(struct net *net, struct sock *nlsk, 3102 2965 struct sk_buff *skb, const struct nlmsghdr *nlh, 3103 2966 const struct nlattr * const nla[], ··· 3121 2982 struct netlink_dump_control c = { 3122 2983 .dump = nf_tables_dump_sets, 3123 2984 .done = nf_tables_dump_sets_done, 2985 + .module = THIS_MODULE, 3124 2986 }; 3125 2987 struct nft_ctx *ctx_dump; 3126 2988 3127 - ctx_dump = kmalloc(sizeof(*ctx_dump), GFP_KERNEL); 2989 + ctx_dump = kmalloc(sizeof(*ctx_dump), GFP_ATOMIC); 3128 2990 if (ctx_dump == NULL) 3129 2991 return -ENOMEM; 3130 2992 3131 2993 *ctx_dump = ctx; 3132 2994 c.data = ctx_dump; 3133 2995 3134 - return netlink_dump_start(nlsk, skb, nlh, &c); 2996 + return nft_netlink_dump_start_rcu(nlsk, skb, nlh, &c); 3135 2997 } 3136 2998 3137 2999 /* Only accept unspec with dump */ ··· 3145 3005 if (IS_ERR(set)) 3146 3006 return PTR_ERR(set); 3147 3007 3148 - skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); 3008 + skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC); 3149 3009 if (skb2 == NULL) 3150 3010 return -ENOMEM; 3151 3011 ··· 3886 3746 ext = nft_set_elem_ext(set, &elem); 3887 3747 3888 3748 err = -ENOMEM; 3889 - skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); 3749 + skb = nlmsg_new(NLMSG_GOODSIZE, GFP_ATOMIC); 3890 3750 if (skb == NULL) 3891 3751 goto err1; 3892 3752 ··· 3908 3768 return err == -EAGAIN ? -ENOBUFS : err; 3909 3769 } 3910 3770 3771 + /* called with rcu_read_lock held */ 3911 3772 static int nf_tables_getsetelem(struct net *net, struct sock *nlsk, 3912 3773 struct sk_buff *skb, const struct nlmsghdr *nlh, 3913 3774 const struct nlattr * const nla[], ··· 3933 3792 struct netlink_dump_control c = { 3934 3793 .dump = nf_tables_dump_set, 3935 3794 .done = nf_tables_dump_set_done, 3795 + .module = THIS_MODULE, 3936 3796 }; 3937 3797 struct nft_set_dump_ctx *dump_ctx; 3938 3798 3939 - dump_ctx = kmalloc(sizeof(*dump_ctx), GFP_KERNEL); 3799 + dump_ctx = kmalloc(sizeof(*dump_ctx), GFP_ATOMIC); 3940 3800 if (!dump_ctx) 3941 3801 return -ENOMEM; 3942 3802 ··· 3945 3803 dump_ctx->ctx = ctx; 3946 3804 3947 3805 c.data = dump_ctx; 3948 - return netlink_dump_start(nlsk, skb, nlh, &c); 3806 + return nft_netlink_dump_start_rcu(nlsk, skb, nlh, &c); 3949 3807 } 3950 3808 3951 3809 if (!nla[NFTA_SET_ELEM_LIST_ELEMENTS]) ··· 4176 4034 d2.type, d2.len); 4177 4035 if (err < 0) 4178 4036 goto err3; 4037 + 4038 + if (d2.type == NFT_DATA_VERDICT && 4039 + (data.verdict.code == NFT_GOTO || 4040 + data.verdict.code == NFT_JUMP)) 4041 + nft_validate_state_update(ctx->net, 4042 + NFT_VALIDATE_NEED); 4179 4043 } 4180 4044 4181 4045 nft_set_ext_add_length(&tmpl, NFT_SET_EXT_DATA, d2.len); ··· 4281 4133 const struct nlattr *attr; 4282 4134 struct nft_set *set; 4283 4135 struct nft_ctx ctx; 4284 - int rem, err = 0; 4136 + int rem, err; 4285 4137 4286 4138 if (nla[NFTA_SET_ELEM_LIST_ELEMENTS] == NULL) 4287 4139 return -EINVAL; ··· 4302 4154 nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) { 4303 4155 err = nft_add_set_elem(&ctx, set, attr, nlh->nlmsg_flags); 4304 4156 if (err < 0) 4305 - break; 4157 + return err; 4306 4158 } 4307 - return err; 4159 + 4160 + if (net->nft.validate_state == NFT_VALIDATE_DO) 4161 + return nft_table_validate(net, ctx.table); 4162 + 4163 + return 0; 4308 4164 } 4309 4165 4310 4166 /** ··· 4578 4426 { 4579 4427 struct nft_object *obj; 4580 4428 4581 - list_for_each_entry(obj, &table->objects, list) { 4429 + list_for_each_entry_rcu(obj, &table->objects, list) { 4582 4430 if (!nla_strcmp(nla, obj->name) && 4583 4431 objtype == obj->ops->type->type && 4584 4432 nft_active_genmask(obj, genmask)) ··· 4908 4756 { 4909 4757 struct nft_obj_filter *filter; 4910 4758 4911 - filter = kzalloc(sizeof(*filter), GFP_KERNEL); 4759 + filter = kzalloc(sizeof(*filter), GFP_ATOMIC); 4912 4760 if (!filter) 4913 4761 return ERR_PTR(-ENOMEM); 4914 4762 4915 4763 if (nla[NFTA_OBJ_TABLE]) { 4916 - filter->table = nla_strdup(nla[NFTA_OBJ_TABLE], GFP_KERNEL); 4764 + filter->table = nla_strdup(nla[NFTA_OBJ_TABLE], GFP_ATOMIC); 4917 4765 if (!filter->table) { 4918 4766 kfree(filter); 4919 4767 return ERR_PTR(-ENOMEM); ··· 4925 4773 return filter; 4926 4774 } 4927 4775 4776 + /* called with rcu_read_lock held */ 4928 4777 static int nf_tables_getobj(struct net *net, struct sock *nlsk, 4929 4778 struct sk_buff *skb, const struct nlmsghdr *nlh, 4930 4779 const struct nlattr * const nla[], ··· 4945 4792 struct netlink_dump_control c = { 4946 4793 .dump = nf_tables_dump_obj, 4947 4794 .done = nf_tables_dump_obj_done, 4795 + .module = THIS_MODULE, 4948 4796 }; 4949 4797 4950 4798 if (nla[NFTA_OBJ_TABLE] || ··· 4958 4804 4959 4805 c.data = filter; 4960 4806 } 4961 - return netlink_dump_start(nlsk, skb, nlh, &c); 4807 + return nft_netlink_dump_start_rcu(nlsk, skb, nlh, &c); 4962 4808 } 4963 4809 4964 4810 if (!nla[NFTA_OBJ_NAME] || ··· 4978 4824 return PTR_ERR(obj); 4979 4825 } 4980 4826 4981 - skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); 4827 + skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC); 4982 4828 if (!skb2) 4983 4829 return -ENOMEM; 4984 4830 ··· 5123 4969 { 5124 4970 struct nft_flowtable *flowtable; 5125 4971 5126 - list_for_each_entry(flowtable, &table->flowtables, list) { 4972 + list_for_each_entry_rcu(flowtable, &table->flowtables, list) { 5127 4973 if (!nla_strcmp(nla, flowtable->name) && 5128 4974 nft_active_genmask(flowtable, genmask)) 5129 4975 return flowtable; ··· 5584 5430 { 5585 5431 struct nft_flowtable_filter *filter; 5586 5432 5587 - filter = kzalloc(sizeof(*filter), GFP_KERNEL); 5433 + filter = kzalloc(sizeof(*filter), GFP_ATOMIC); 5588 5434 if (!filter) 5589 5435 return ERR_PTR(-ENOMEM); 5590 5436 5591 5437 if (nla[NFTA_FLOWTABLE_TABLE]) { 5592 5438 filter->table = nla_strdup(nla[NFTA_FLOWTABLE_TABLE], 5593 - GFP_KERNEL); 5439 + GFP_ATOMIC); 5594 5440 if (!filter->table) { 5595 5441 kfree(filter); 5596 5442 return ERR_PTR(-ENOMEM); ··· 5599 5445 return filter; 5600 5446 } 5601 5447 5448 + /* called with rcu_read_lock held */ 5602 5449 static int nf_tables_getflowtable(struct net *net, struct sock *nlsk, 5603 5450 struct sk_buff *skb, 5604 5451 const struct nlmsghdr *nlh, ··· 5618 5463 struct netlink_dump_control c = { 5619 5464 .dump = nf_tables_dump_flowtable, 5620 5465 .done = nf_tables_dump_flowtable_done, 5466 + .module = THIS_MODULE, 5621 5467 }; 5622 5468 5623 5469 if (nla[NFTA_FLOWTABLE_TABLE]) { ··· 5630 5474 5631 5475 c.data = filter; 5632 5476 } 5633 - return netlink_dump_start(nlsk, skb, nlh, &c); 5477 + return nft_netlink_dump_start_rcu(nlsk, skb, nlh, &c); 5634 5478 } 5635 5479 5636 5480 if (!nla[NFTA_FLOWTABLE_NAME]) ··· 5646 5490 if (IS_ERR(flowtable)) 5647 5491 return PTR_ERR(flowtable); 5648 5492 5649 - skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); 5493 + skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC); 5650 5494 if (!skb2) 5651 5495 return -ENOMEM; 5652 5496 ··· 5810 5654 struct sk_buff *skb2; 5811 5655 int err; 5812 5656 5813 - skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); 5657 + skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC); 5814 5658 if (skb2 == NULL) 5815 5659 return -ENOMEM; 5816 5660 ··· 5832 5676 .policy = nft_table_policy, 5833 5677 }, 5834 5678 [NFT_MSG_GETTABLE] = { 5835 - .call = nf_tables_gettable, 5679 + .call_rcu = nf_tables_gettable, 5836 5680 .attr_count = NFTA_TABLE_MAX, 5837 5681 .policy = nft_table_policy, 5838 5682 }, ··· 5847 5691 .policy = nft_chain_policy, 5848 5692 }, 5849 5693 [NFT_MSG_GETCHAIN] = { 5850 - .call = nf_tables_getchain, 5694 + .call_rcu = nf_tables_getchain, 5851 5695 .attr_count = NFTA_CHAIN_MAX, 5852 5696 .policy = nft_chain_policy, 5853 5697 }, ··· 5862 5706 .policy = nft_rule_policy, 5863 5707 }, 5864 5708 [NFT_MSG_GETRULE] = { 5865 - .call = nf_tables_getrule, 5709 + .call_rcu = nf_tables_getrule, 5866 5710 .attr_count = NFTA_RULE_MAX, 5867 5711 .policy = nft_rule_policy, 5868 5712 }, ··· 5877 5721 .policy = nft_set_policy, 5878 5722 }, 5879 5723 [NFT_MSG_GETSET] = { 5880 - .call = nf_tables_getset, 5724 + .call_rcu = nf_tables_getset, 5881 5725 .attr_count = NFTA_SET_MAX, 5882 5726 .policy = nft_set_policy, 5883 5727 }, ··· 5892 5736 .policy = nft_set_elem_list_policy, 5893 5737 }, 5894 5738 [NFT_MSG_GETSETELEM] = { 5895 - .call = nf_tables_getsetelem, 5739 + .call_rcu = nf_tables_getsetelem, 5896 5740 .attr_count = NFTA_SET_ELEM_LIST_MAX, 5897 5741 .policy = nft_set_elem_list_policy, 5898 5742 }, ··· 5902 5746 .policy = nft_set_elem_list_policy, 5903 5747 }, 5904 5748 [NFT_MSG_GETGEN] = { 5905 - .call = nf_tables_getgen, 5749 + .call_rcu = nf_tables_getgen, 5906 5750 }, 5907 5751 [NFT_MSG_NEWOBJ] = { 5908 5752 .call_batch = nf_tables_newobj, ··· 5910 5754 .policy = nft_obj_policy, 5911 5755 }, 5912 5756 [NFT_MSG_GETOBJ] = { 5913 - .call = nf_tables_getobj, 5757 + .call_rcu = nf_tables_getobj, 5914 5758 .attr_count = NFTA_OBJ_MAX, 5915 5759 .policy = nft_obj_policy, 5916 5760 }, ··· 5920 5764 .policy = nft_obj_policy, 5921 5765 }, 5922 5766 [NFT_MSG_GETOBJ_RESET] = { 5923 - .call = nf_tables_getobj, 5767 + .call_rcu = nf_tables_getobj, 5924 5768 .attr_count = NFTA_OBJ_MAX, 5925 5769 .policy = nft_obj_policy, 5926 5770 }, ··· 5930 5774 .policy = nft_flowtable_policy, 5931 5775 }, 5932 5776 [NFT_MSG_GETFLOWTABLE] = { 5933 - .call = nf_tables_getflowtable, 5777 + .call_rcu = nf_tables_getflowtable, 5934 5778 .attr_count = NFTA_FLOWTABLE_MAX, 5935 5779 .policy = nft_flowtable_policy, 5936 5780 }, ··· 5940 5784 .policy = nft_flowtable_policy, 5941 5785 }, 5942 5786 }; 5787 + 5788 + static int nf_tables_validate(struct net *net) 5789 + { 5790 + struct nft_table *table; 5791 + 5792 + switch (net->nft.validate_state) { 5793 + case NFT_VALIDATE_SKIP: 5794 + break; 5795 + case NFT_VALIDATE_NEED: 5796 + nft_validate_state_update(net, NFT_VALIDATE_DO); 5797 + /* fall through */ 5798 + case NFT_VALIDATE_DO: 5799 + list_for_each_entry(table, &net->nft.tables, list) { 5800 + if (nft_table_validate(net, table) < 0) 5801 + return -EAGAIN; 5802 + } 5803 + break; 5804 + } 5805 + 5806 + return 0; 5807 + } 5943 5808 5944 5809 static void nft_chain_commit_update(struct nft_trans *trans) 5945 5810 { ··· 6027 5850 } 6028 5851 } 6029 5852 5853 + static int nf_tables_commit_chain_prepare(struct net *net, struct nft_chain *chain) 5854 + { 5855 + struct nft_rule *rule; 5856 + unsigned int alloc = 0; 5857 + int i; 5858 + 5859 + /* already handled or inactive chain? */ 5860 + if (chain->rules_next || !nft_is_active_next(net, chain)) 5861 + return 0; 5862 + 5863 + rule = list_entry(&chain->rules, struct nft_rule, list); 5864 + i = 0; 5865 + 5866 + list_for_each_entry_continue(rule, &chain->rules, list) { 5867 + if (nft_is_active_next(net, rule)) 5868 + alloc++; 5869 + } 5870 + 5871 + chain->rules_next = nf_tables_chain_alloc_rules(chain, alloc); 5872 + if (!chain->rules_next) 5873 + return -ENOMEM; 5874 + 5875 + list_for_each_entry_continue(rule, &chain->rules, list) { 5876 + if (nft_is_active_next(net, rule)) 5877 + chain->rules_next[i++] = rule; 5878 + } 5879 + 5880 + chain->rules_next[i] = NULL; 5881 + return 0; 5882 + } 5883 + 5884 + static void nf_tables_commit_chain_prepare_cancel(struct net *net) 5885 + { 5886 + struct nft_trans *trans, *next; 5887 + 5888 + list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) { 5889 + struct nft_chain *chain = trans->ctx.chain; 5890 + 5891 + if (trans->msg_type == NFT_MSG_NEWRULE || 5892 + trans->msg_type == NFT_MSG_DELRULE) { 5893 + kvfree(chain->rules_next); 5894 + chain->rules_next = NULL; 5895 + } 5896 + } 5897 + } 5898 + 5899 + static void __nf_tables_commit_chain_free_rules_old(struct rcu_head *h) 5900 + { 5901 + struct nft_rules_old *o = container_of(h, struct nft_rules_old, h); 5902 + 5903 + kvfree(o->start); 5904 + } 5905 + 5906 + static void nf_tables_commit_chain_free_rules_old(struct nft_rule **rules) 5907 + { 5908 + struct nft_rule **r = rules; 5909 + struct nft_rules_old *old; 5910 + 5911 + while (*r) 5912 + r++; 5913 + 5914 + r++; /* rcu_head is after end marker */ 5915 + old = (void *) r; 5916 + old->start = rules; 5917 + 5918 + call_rcu(&old->h, __nf_tables_commit_chain_free_rules_old); 5919 + } 5920 + 5921 + static void nf_tables_commit_chain_active(struct net *net, struct nft_chain *chain) 5922 + { 5923 + struct nft_rule **g0, **g1; 5924 + bool next_genbit; 5925 + 5926 + next_genbit = nft_gencursor_next(net); 5927 + 5928 + g0 = rcu_dereference_protected(chain->rules_gen_0, 5929 + lockdep_nfnl_is_held(NFNL_SUBSYS_NFTABLES)); 5930 + g1 = rcu_dereference_protected(chain->rules_gen_1, 5931 + lockdep_nfnl_is_held(NFNL_SUBSYS_NFTABLES)); 5932 + 5933 + /* No changes to this chain? */ 5934 + if (chain->rules_next == NULL) { 5935 + /* chain had no change in last or next generation */ 5936 + if (g0 == g1) 5937 + return; 5938 + /* 5939 + * chain had no change in this generation; make sure next 5940 + * one uses same rules as current generation. 5941 + */ 5942 + if (next_genbit) { 5943 + rcu_assign_pointer(chain->rules_gen_1, g0); 5944 + nf_tables_commit_chain_free_rules_old(g1); 5945 + } else { 5946 + rcu_assign_pointer(chain->rules_gen_0, g1); 5947 + nf_tables_commit_chain_free_rules_old(g0); 5948 + } 5949 + 5950 + return; 5951 + } 5952 + 5953 + if (next_genbit) 5954 + rcu_assign_pointer(chain->rules_gen_1, chain->rules_next); 5955 + else 5956 + rcu_assign_pointer(chain->rules_gen_0, chain->rules_next); 5957 + 5958 + chain->rules_next = NULL; 5959 + 5960 + if (g0 == g1) 5961 + return; 5962 + 5963 + if (next_genbit) 5964 + nf_tables_commit_chain_free_rules_old(g1); 5965 + else 5966 + nf_tables_commit_chain_free_rules_old(g0); 5967 + } 5968 + 6030 5969 static int nf_tables_commit(struct net *net, struct sk_buff *skb) 6031 5970 { 6032 5971 struct nft_trans *trans, *next; 6033 5972 struct nft_trans_elem *te; 5973 + struct nft_chain *chain; 5974 + struct nft_table *table; 6034 5975 6035 - /* Bump generation counter, invalidate any dump in progress */ 5976 + /* 0. Validate ruleset, otherwise roll back for error reporting. */ 5977 + if (nf_tables_validate(net) < 0) 5978 + return -EAGAIN; 5979 + 5980 + /* 1. Allocate space for next generation rules_gen_X[] */ 5981 + list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) { 5982 + int ret; 5983 + 5984 + if (trans->msg_type == NFT_MSG_NEWRULE || 5985 + trans->msg_type == NFT_MSG_DELRULE) { 5986 + chain = trans->ctx.chain; 5987 + 5988 + ret = nf_tables_commit_chain_prepare(net, chain); 5989 + if (ret < 0) { 5990 + nf_tables_commit_chain_prepare_cancel(net); 5991 + return ret; 5992 + } 5993 + } 5994 + } 5995 + 5996 + /* step 2. Make rules_gen_X visible to packet path */ 5997 + list_for_each_entry(table, &net->nft.tables, list) { 5998 + list_for_each_entry(chain, &table->chains, list) { 5999 + if (!nft_is_active_next(net, chain)) 6000 + continue; 6001 + nf_tables_commit_chain_active(net, chain); 6002 + } 6003 + } 6004 + 6005 + /* 6006 + * Bump generation counter, invalidate any dump in progress. 6007 + * Cannot fail after this point. 6008 + */ 6036 6009 while (++net->nft.base_seq == 0); 6037 6010 6038 - /* A new generation has just started */ 6011 + /* step 3. Start new generation, rules_gen_X now in use. */ 6039 6012 net->nft.gencursor = nft_gencursor_next(net); 6040 - 6041 - /* Make sure all packets have left the previous generation before 6042 - * purging old rules. 6043 - */ 6044 - synchronize_rcu(); 6045 6013 6046 6014 list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) { 6047 6015 switch (trans->msg_type) { ··· 6448 6126 return 0; 6449 6127 } 6450 6128 6129 + static void nf_tables_cleanup(struct net *net) 6130 + { 6131 + nft_validate_state_update(net, NFT_VALIDATE_SKIP); 6132 + } 6133 + 6451 6134 static bool nf_tables_valid_genid(struct net *net, u32 genid) 6452 6135 { 6453 6136 return net->nft.base_seq == genid; ··· 6465 6138 .cb = nf_tables_cb, 6466 6139 .commit = nf_tables_commit, 6467 6140 .abort = nf_tables_abort, 6141 + .cleanup = nf_tables_cleanup, 6468 6142 .valid_genid = nf_tables_valid_genid, 6469 6143 }; 6470 6144 ··· 6549 6221 6550 6222 list_for_each_entry(rule, &chain->rules, list) { 6551 6223 nft_rule_for_each_expr(expr, last, rule) { 6552 - const struct nft_data *data = NULL; 6224 + struct nft_immediate_expr *priv; 6225 + const struct nft_data *data; 6553 6226 int err; 6554 6227 6555 - if (!expr->ops->validate) 6228 + if (strcmp(expr->ops->type->name, "immediate")) 6556 6229 continue; 6557 6230 6558 - err = expr->ops->validate(ctx, expr, &data); 6559 - if (err < 0) 6560 - return err; 6561 - 6562 - if (data == NULL) 6231 + priv = nft_expr_priv(expr); 6232 + if (priv->dreg != NFT_REG_VERDICT) 6563 6233 continue; 6564 6234 6235 + data = &priv->data; 6565 6236 switch (data->verdict.code) { 6566 6237 case NFT_JUMP: 6567 6238 case NFT_GOTO: ··· 7040 6713 INIT_LIST_HEAD(&net->nft.tables); 7041 6714 INIT_LIST_HEAD(&net->nft.commit_list); 7042 6715 net->nft.base_seq = 1; 6716 + net->nft.validate_state = NFT_VALIDATE_SKIP; 6717 + 7043 6718 return 0; 7044 6719 } 7045 6720
+13 -27
net/netfilter/nf_tables_core.c
··· 23 23 #include <net/netfilter/nf_tables.h> 24 24 #include <net/netfilter/nf_log.h> 25 25 26 - static const char *const comments[__NFT_TRACETYPE_MAX] = { 27 - [NFT_TRACETYPE_POLICY] = "policy", 28 - [NFT_TRACETYPE_RETURN] = "return", 29 - [NFT_TRACETYPE_RULE] = "rule", 30 - }; 31 - 32 - static const struct nf_loginfo trace_loginfo = { 33 - .type = NF_LOG_TYPE_LOG, 34 - .u = { 35 - .log = { 36 - .level = LOGLEVEL_WARNING, 37 - .logflags = NF_LOG_DEFAULT_MASK, 38 - }, 39 - }, 40 - }; 41 - 42 26 static noinline void __nft_trace_packet(struct nft_traceinfo *info, 43 27 const struct nft_chain *chain, 44 28 enum nft_trace_types type) ··· 117 133 118 134 struct nft_jumpstack { 119 135 const struct nft_chain *chain; 120 - const struct nft_rule *rule; 136 + struct nft_rule *const *rules; 121 137 }; 122 138 123 139 unsigned int ··· 125 141 { 126 142 const struct nft_chain *chain = priv, *basechain = chain; 127 143 const struct net *net = nft_net(pkt); 144 + struct nft_rule *const *rules; 128 145 const struct nft_rule *rule; 129 146 const struct nft_expr *expr, *last; 130 147 struct nft_regs regs; 131 148 unsigned int stackptr = 0; 132 149 struct nft_jumpstack jumpstack[NFT_JUMP_STACK_SIZE]; 133 - unsigned int gencursor = nft_genmask_cur(net); 150 + bool genbit = READ_ONCE(net->nft.gencursor); 134 151 struct nft_traceinfo info; 135 152 136 153 info.trace = false; 137 154 if (static_branch_unlikely(&nft_trace_enabled)) 138 155 nft_trace_init(&info, pkt, &regs.verdict, basechain); 139 156 do_chain: 140 - rule = list_entry(&chain->rules, struct nft_rule, list); 157 + if (genbit) 158 + rules = rcu_dereference(chain->rules_gen_1); 159 + else 160 + rules = rcu_dereference(chain->rules_gen_0); 161 + 141 162 next_rule: 163 + rule = *rules; 142 164 regs.verdict.code = NFT_CONTINUE; 143 - list_for_each_entry_continue_rcu(rule, &chain->rules, list) { 144 - 145 - /* This rule is not active, skip. */ 146 - if (unlikely(rule->genmask & gencursor)) 147 - continue; 148 - 165 + for (; *rules ; rules++) { 166 + rule = *rules; 149 167 nft_rule_for_each_expr(expr, last, rule) { 150 168 if (expr->ops == &nft_cmp_fast_ops) 151 169 nft_cmp_fast_eval(expr, &regs); ··· 185 199 case NFT_JUMP: 186 200 BUG_ON(stackptr >= NFT_JUMP_STACK_SIZE); 187 201 jumpstack[stackptr].chain = chain; 188 - jumpstack[stackptr].rule = rule; 202 + jumpstack[stackptr].rules = rules + 1; 189 203 stackptr++; 190 204 /* fall through */ 191 205 case NFT_GOTO: ··· 207 221 if (stackptr > 0) { 208 222 stackptr--; 209 223 chain = jumpstack[stackptr].chain; 210 - rule = jumpstack[stackptr].rule; 224 + rules = jumpstack[stackptr].rules; 211 225 goto next_rule; 212 226 } 213 227
+41 -3
net/netfilter/nfnetlink.c
··· 25 25 #include <linux/uaccess.h> 26 26 #include <net/sock.h> 27 27 #include <linux/init.h> 28 + #include <linux/sched/signal.h> 28 29 29 30 #include <net/netlink.h> 30 31 #include <linux/netfilter/nfnetlink.h> ··· 37 36 #define nfnl_dereference_protected(id) \ 38 37 rcu_dereference_protected(table[(id)].subsys, \ 39 38 lockdep_nfnl_is_held((id))) 39 + 40 + #define NFNL_MAX_ATTR_COUNT 32 40 41 41 42 static struct { 42 43 struct mutex mutex; ··· 79 76 80 77 int nfnetlink_subsys_register(const struct nfnetlink_subsystem *n) 81 78 { 79 + u8 cb_id; 80 + 81 + /* Sanity-check attr_count size to avoid stack buffer overflow. */ 82 + for (cb_id = 0; cb_id < n->cb_count; cb_id++) 83 + if (WARN_ON(n->cb[cb_id].attr_count > NFNL_MAX_ATTR_COUNT)) 84 + return -EINVAL; 85 + 82 86 nfnl_lock(n->subsys_id); 83 87 if (table[n->subsys_id].subsys) { 84 88 nfnl_unlock(n->subsys_id); ··· 195 185 { 196 186 int min_len = nlmsg_total_size(sizeof(struct nfgenmsg)); 197 187 u8 cb_id = NFNL_MSG_TYPE(nlh->nlmsg_type); 198 - struct nlattr *cda[ss->cb[cb_id].attr_count + 1]; 188 + struct nlattr *cda[NFNL_MAX_ATTR_COUNT + 1]; 199 189 struct nlattr *attr = (void *)nlh + min_len; 200 190 int attrlen = nlh->nlmsg_len - min_len; 201 191 __u8 subsys_id = NFNL_SUBSYS_ID(type); 192 + 193 + /* Sanity-check NFNL_MAX_ATTR_COUNT */ 194 + if (ss->cb[cb_id].attr_count > NFNL_MAX_ATTR_COUNT) { 195 + rcu_read_unlock(); 196 + return -ENOMEM; 197 + } 202 198 203 199 err = nla_parse(cda, ss->cb[cb_id].attr_count, attr, attrlen, 204 200 ss->cb[cb_id].policy, extack); ··· 346 330 while (skb->len >= nlmsg_total_size(0)) { 347 331 int msglen, type; 348 332 333 + if (fatal_signal_pending(current)) { 334 + nfnl_err_reset(&err_list); 335 + err = -EINTR; 336 + status = NFNL_BATCH_FAILURE; 337 + goto done; 338 + } 339 + 349 340 memset(&extack, 0, sizeof(extack)); 350 341 nlh = nlmsg_hdr(skb); 351 342 err = 0; ··· 402 379 { 403 380 int min_len = nlmsg_total_size(sizeof(struct nfgenmsg)); 404 381 u8 cb_id = NFNL_MSG_TYPE(nlh->nlmsg_type); 405 - struct nlattr *cda[ss->cb[cb_id].attr_count + 1]; 382 + struct nlattr *cda[NFNL_MAX_ATTR_COUNT + 1]; 406 383 struct nlattr *attr = (void *)nlh + min_len; 407 384 int attrlen = nlh->nlmsg_len - min_len; 385 + 386 + /* Sanity-check NFTA_MAX_ATTR */ 387 + if (ss->cb[cb_id].attr_count > NFNL_MAX_ATTR_COUNT) { 388 + err = -ENOMEM; 389 + goto ack; 390 + } 408 391 409 392 err = nla_parse(cda, ss->cb[cb_id].attr_count, attr, 410 393 attrlen, ss->cb[cb_id].policy, NULL); ··· 470 441 kfree_skb(skb); 471 442 goto replay; 472 443 } else if (status == NFNL_BATCH_DONE) { 473 - ss->commit(net, oskb); 444 + err = ss->commit(net, oskb); 445 + if (err == -EAGAIN) { 446 + status |= NFNL_BATCH_REPLAY; 447 + goto done; 448 + } else if (err) { 449 + ss->abort(net, oskb); 450 + netlink_ack(oskb, nlmsg_hdr(oskb), err, NULL); 451 + } 474 452 } else { 475 453 ss->abort(net, oskb); 476 454 } 455 + if (ss->cleanup) 456 + ss->cleanup(net); 477 457 478 458 nfnl_err_deliver(&err_list, oskb); 479 459 nfnl_unlock(subsys_id);
+18 -11
net/netfilter/nft_compat.c
··· 611 611 return -1; 612 612 } 613 613 614 - static int nfnl_compat_get(struct net *net, struct sock *nfnl, 615 - struct sk_buff *skb, const struct nlmsghdr *nlh, 616 - const struct nlattr * const tb[], 617 - struct netlink_ext_ack *extack) 614 + static int nfnl_compat_get_rcu(struct net *net, struct sock *nfnl, 615 + struct sk_buff *skb, const struct nlmsghdr *nlh, 616 + const struct nlattr * const tb[], 617 + struct netlink_ext_ack *extack) 618 618 { 619 619 int ret = 0, target; 620 620 struct nfgenmsg *nfmsg; ··· 653 653 return -EINVAL; 654 654 } 655 655 656 + if (!try_module_get(THIS_MODULE)) 657 + return -EINVAL; 658 + 659 + rcu_read_unlock(); 656 660 try_then_request_module(xt_find_revision(nfmsg->nfgen_family, name, 657 661 rev, target, &ret), 658 662 fmt, name); 659 - 660 663 if (ret < 0) 661 - return ret; 664 + goto out_put; 662 665 663 666 skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 664 - if (skb2 == NULL) 665 - return -ENOMEM; 667 + if (skb2 == NULL) { 668 + ret = -ENOMEM; 669 + goto out_put; 670 + } 666 671 667 672 /* include the best revision for this extension in the message */ 668 673 if (nfnl_compat_fill_info(skb2, NETLINK_CB(skb).portid, ··· 677 672 nfmsg->nfgen_family, 678 673 name, ret, target) <= 0) { 679 674 kfree_skb(skb2); 680 - return -ENOSPC; 675 + goto out_put; 681 676 } 682 677 683 678 ret = netlink_unicast(nfnl, skb2, NETLINK_CB(skb).portid, 684 679 MSG_DONTWAIT); 685 680 if (ret > 0) 686 681 ret = 0; 687 - 682 + out_put: 683 + rcu_read_lock(); 684 + module_put(THIS_MODULE); 688 685 return ret == -EAGAIN ? -ENOBUFS : ret; 689 686 } 690 687 ··· 698 691 }; 699 692 700 693 static const struct nfnl_callback nfnl_nft_compat_cb[NFNL_MSG_COMPAT_MAX] = { 701 - [NFNL_MSG_COMPAT_GET] = { .call = nfnl_compat_get, 694 + [NFNL_MSG_COMPAT_GET] = { .call_rcu = nfnl_compat_get_rcu, 702 695 .attr_count = NFTA_COMPAT_MAX, 703 696 .policy = nfnl_compat_policy_get }, 704 697 };
+145 -1
net/netfilter/nft_fwd_netdev.c
··· 12 12 #include <linux/netlink.h> 13 13 #include <linux/netfilter.h> 14 14 #include <linux/netfilter/nf_tables.h> 15 + #include <linux/ip.h> 16 + #include <linux/ipv6.h> 15 17 #include <net/netfilter/nf_tables.h> 16 18 #include <net/netfilter/nf_dup_netdev.h> 19 + #include <net/neighbour.h> 20 + #include <net/ip.h> 17 21 18 22 struct nft_fwd_netdev { 19 23 enum nft_registers sreg_dev:8; ··· 36 32 37 33 static const struct nla_policy nft_fwd_netdev_policy[NFTA_FWD_MAX + 1] = { 38 34 [NFTA_FWD_SREG_DEV] = { .type = NLA_U32 }, 35 + [NFTA_FWD_SREG_ADDR] = { .type = NLA_U32 }, 36 + [NFTA_FWD_NFPROTO] = { .type = NLA_U32 }, 39 37 }; 40 38 41 39 static int nft_fwd_netdev_init(const struct nft_ctx *ctx, ··· 68 62 return -1; 69 63 } 70 64 65 + struct nft_fwd_neigh { 66 + enum nft_registers sreg_dev:8; 67 + enum nft_registers sreg_addr:8; 68 + u8 nfproto; 69 + }; 70 + 71 + static void nft_fwd_neigh_eval(const struct nft_expr *expr, 72 + struct nft_regs *regs, 73 + const struct nft_pktinfo *pkt) 74 + { 75 + struct nft_fwd_neigh *priv = nft_expr_priv(expr); 76 + void *addr = &regs->data[priv->sreg_addr]; 77 + int oif = regs->data[priv->sreg_dev]; 78 + unsigned int verdict = NF_STOLEN; 79 + struct sk_buff *skb = pkt->skb; 80 + struct net_device *dev; 81 + int neigh_table; 82 + 83 + switch (priv->nfproto) { 84 + case NFPROTO_IPV4: { 85 + struct iphdr *iph; 86 + 87 + if (skb->protocol != htons(ETH_P_IP)) { 88 + verdict = NFT_BREAK; 89 + goto out; 90 + } 91 + if (skb_try_make_writable(skb, sizeof(*iph))) { 92 + verdict = NF_DROP; 93 + goto out; 94 + } 95 + iph = ip_hdr(skb); 96 + ip_decrease_ttl(iph); 97 + neigh_table = NEIGH_ARP_TABLE; 98 + break; 99 + } 100 + case NFPROTO_IPV6: { 101 + struct ipv6hdr *ip6h; 102 + 103 + if (skb->protocol != htons(ETH_P_IPV6)) { 104 + verdict = NFT_BREAK; 105 + goto out; 106 + } 107 + if (skb_try_make_writable(skb, sizeof(*ip6h))) { 108 + verdict = NF_DROP; 109 + goto out; 110 + } 111 + ip6h = ipv6_hdr(skb); 112 + ip6h->hop_limit--; 113 + neigh_table = NEIGH_ND_TABLE; 114 + break; 115 + } 116 + default: 117 + verdict = NFT_BREAK; 118 + goto out; 119 + } 120 + 121 + dev = dev_get_by_index_rcu(nft_net(pkt), oif); 122 + if (dev == NULL) 123 + return; 124 + 125 + skb->dev = dev; 126 + neigh_xmit(neigh_table, dev, addr, skb); 127 + out: 128 + regs->verdict.code = verdict; 129 + } 130 + 131 + static int nft_fwd_neigh_init(const struct nft_ctx *ctx, 132 + const struct nft_expr *expr, 133 + const struct nlattr * const tb[]) 134 + { 135 + struct nft_fwd_neigh *priv = nft_expr_priv(expr); 136 + unsigned int addr_len; 137 + int err; 138 + 139 + if (!tb[NFTA_FWD_SREG_DEV] || 140 + !tb[NFTA_FWD_SREG_ADDR] || 141 + !tb[NFTA_FWD_NFPROTO]) 142 + return -EINVAL; 143 + 144 + priv->sreg_dev = nft_parse_register(tb[NFTA_FWD_SREG_DEV]); 145 + priv->sreg_addr = nft_parse_register(tb[NFTA_FWD_SREG_ADDR]); 146 + priv->nfproto = ntohl(nla_get_be32(tb[NFTA_FWD_NFPROTO])); 147 + 148 + switch (priv->nfproto) { 149 + case NFPROTO_IPV4: 150 + addr_len = sizeof(struct in_addr); 151 + break; 152 + case NFPROTO_IPV6: 153 + addr_len = sizeof(struct in6_addr); 154 + break; 155 + default: 156 + return -EOPNOTSUPP; 157 + } 158 + 159 + err = nft_validate_register_load(priv->sreg_dev, sizeof(int)); 160 + if (err < 0) 161 + return err; 162 + 163 + return nft_validate_register_load(priv->sreg_addr, addr_len); 164 + } 165 + 166 + static const struct nft_expr_ops nft_fwd_netdev_ingress_ops; 167 + 168 + static int nft_fwd_neigh_dump(struct sk_buff *skb, const struct nft_expr *expr) 169 + { 170 + struct nft_fwd_neigh *priv = nft_expr_priv(expr); 171 + 172 + if (nft_dump_register(skb, NFTA_FWD_SREG_DEV, priv->sreg_dev) || 173 + nft_dump_register(skb, NFTA_FWD_SREG_ADDR, priv->sreg_addr) || 174 + nla_put_be32(skb, NFTA_FWD_NFPROTO, htonl(priv->nfproto))) 175 + goto nla_put_failure; 176 + 177 + return 0; 178 + 179 + nla_put_failure: 180 + return -1; 181 + } 182 + 71 183 static struct nft_expr_type nft_fwd_netdev_type; 184 + static const struct nft_expr_ops nft_fwd_neigh_netdev_ops = { 185 + .type = &nft_fwd_netdev_type, 186 + .size = NFT_EXPR_SIZE(sizeof(struct nft_fwd_neigh)), 187 + .eval = nft_fwd_neigh_eval, 188 + .init = nft_fwd_neigh_init, 189 + .dump = nft_fwd_neigh_dump, 190 + }; 191 + 72 192 static const struct nft_expr_ops nft_fwd_netdev_ops = { 73 193 .type = &nft_fwd_netdev_type, 74 194 .size = NFT_EXPR_SIZE(sizeof(struct nft_fwd_netdev)), ··· 203 71 .dump = nft_fwd_netdev_dump, 204 72 }; 205 73 74 + static const struct nft_expr_ops * 75 + nft_fwd_select_ops(const struct nft_ctx *ctx, 76 + const struct nlattr * const tb[]) 77 + { 78 + if (tb[NFTA_FWD_SREG_ADDR]) 79 + return &nft_fwd_neigh_netdev_ops; 80 + if (tb[NFTA_FWD_SREG_DEV]) 81 + return &nft_fwd_netdev_ops; 82 + 83 + return ERR_PTR(-EOPNOTSUPP); 84 + } 85 + 206 86 static struct nft_expr_type nft_fwd_netdev_type __read_mostly = { 207 87 .family = NFPROTO_NETDEV, 208 88 .name = "fwd", 209 - .ops = &nft_fwd_netdev_ops, 89 + .select_ops = nft_fwd_select_ops, 210 90 .policy = nft_fwd_netdev_policy, 211 91 .maxattr = NFTA_FWD_MAX, 212 92 .owner = THIS_MODULE,
+2 -8
net/netfilter/nft_hash.c
··· 177 177 priv->map = nft_set_lookup_global(ctx->net, ctx->table, 178 178 tb[NFTA_HASH_SET_NAME], 179 179 tb[NFTA_HASH_SET_ID], genmask); 180 - if (IS_ERR(priv->map)) 181 - return PTR_ERR(priv->map); 182 - 183 - return 0; 180 + return PTR_ERR_OR_ZERO(priv->map); 184 181 } 185 182 186 183 static int nft_symhash_init(const struct nft_ctx *ctx, ··· 217 220 priv->map = nft_set_lookup_global(ctx->net, ctx->table, 218 221 tb[NFTA_HASH_SET_NAME], 219 222 tb[NFTA_HASH_SET_ID], genmask); 220 - if (IS_ERR(priv->map)) 221 - return PTR_ERR(priv->map); 222 - 223 - return 0; 223 + return PTR_ERR_OR_ZERO(priv->map); 224 224 } 225 225 226 226 static int nft_jhash_dump(struct sk_buff *skb,
+18 -9
net/netfilter/nft_immediate.c
··· 17 17 #include <net/netfilter/nf_tables_core.h> 18 18 #include <net/netfilter/nf_tables.h> 19 19 20 - struct nft_immediate_expr { 21 - struct nft_data data; 22 - enum nft_registers dreg:8; 23 - u8 dlen; 24 - }; 25 - 26 20 static void nft_immediate_eval(const struct nft_expr *expr, 27 21 struct nft_regs *regs, 28 22 const struct nft_pktinfo *pkt) ··· 95 101 96 102 static int nft_immediate_validate(const struct nft_ctx *ctx, 97 103 const struct nft_expr *expr, 98 - const struct nft_data **data) 104 + const struct nft_data **d) 99 105 { 100 106 const struct nft_immediate_expr *priv = nft_expr_priv(expr); 107 + const struct nft_data *data; 108 + int err; 101 109 102 - if (priv->dreg == NFT_REG_VERDICT) 103 - *data = &priv->data; 110 + if (priv->dreg != NFT_REG_VERDICT) 111 + return 0; 112 + 113 + data = &priv->data; 114 + 115 + switch (data->verdict.code) { 116 + case NFT_JUMP: 117 + case NFT_GOTO: 118 + err = nft_chain_validate(ctx, data->verdict.chain); 119 + if (err < 0) 120 + return err; 121 + break; 122 + default: 123 + break; 124 + } 104 125 105 126 return 0; 106 127 }
+91 -1
net/netfilter/nft_log.c
··· 9 9 * Development of this code funded by Astaro AG (http://www.astaro.com/) 10 10 */ 11 11 12 + #include <linux/audit.h> 12 13 #include <linux/kernel.h> 13 14 #include <linux/init.h> 14 15 #include <linux/module.h> 15 16 #include <linux/netlink.h> 16 17 #include <linux/netfilter.h> 17 18 #include <linux/netfilter/nf_tables.h> 19 + #include <net/ipv6.h> 20 + #include <net/ip.h> 18 21 #include <net/netfilter/nf_tables.h> 19 22 #include <net/netfilter/nf_log.h> 20 23 #include <linux/netdevice.h> ··· 29 26 char *prefix; 30 27 }; 31 28 29 + static bool audit_ip4(struct audit_buffer *ab, struct sk_buff *skb) 30 + { 31 + struct iphdr _iph; 32 + const struct iphdr *ih; 33 + 34 + ih = skb_header_pointer(skb, skb_network_offset(skb), sizeof(_iph), &_iph); 35 + if (!ih) 36 + return false; 37 + 38 + audit_log_format(ab, " saddr=%pI4 daddr=%pI4 proto=%hhu", 39 + &ih->saddr, &ih->daddr, ih->protocol); 40 + 41 + return true; 42 + } 43 + 44 + static bool audit_ip6(struct audit_buffer *ab, struct sk_buff *skb) 45 + { 46 + struct ipv6hdr _ip6h; 47 + const struct ipv6hdr *ih; 48 + u8 nexthdr; 49 + __be16 frag_off; 50 + 51 + ih = skb_header_pointer(skb, skb_network_offset(skb), sizeof(_ip6h), &_ip6h); 52 + if (!ih) 53 + return false; 54 + 55 + nexthdr = ih->nexthdr; 56 + ipv6_skip_exthdr(skb, skb_network_offset(skb) + sizeof(_ip6h), &nexthdr, &frag_off); 57 + 58 + audit_log_format(ab, " saddr=%pI6c daddr=%pI6c proto=%hhu", 59 + &ih->saddr, &ih->daddr, nexthdr); 60 + 61 + return true; 62 + } 63 + 64 + static void nft_log_eval_audit(const struct nft_pktinfo *pkt) 65 + { 66 + struct sk_buff *skb = pkt->skb; 67 + struct audit_buffer *ab; 68 + int fam = -1; 69 + 70 + if (!audit_enabled) 71 + return; 72 + 73 + ab = audit_log_start(NULL, GFP_ATOMIC, AUDIT_NETFILTER_PKT); 74 + if (!ab) 75 + return; 76 + 77 + audit_log_format(ab, "mark=%#x", skb->mark); 78 + 79 + switch (nft_pf(pkt)) { 80 + case NFPROTO_BRIDGE: 81 + switch (eth_hdr(skb)->h_proto) { 82 + case htons(ETH_P_IP): 83 + fam = audit_ip4(ab, skb) ? NFPROTO_IPV4 : -1; 84 + break; 85 + case htons(ETH_P_IPV6): 86 + fam = audit_ip6(ab, skb) ? NFPROTO_IPV6 : -1; 87 + break; 88 + } 89 + break; 90 + case NFPROTO_IPV4: 91 + fam = audit_ip4(ab, skb) ? NFPROTO_IPV4 : -1; 92 + break; 93 + case NFPROTO_IPV6: 94 + fam = audit_ip6(ab, skb) ? NFPROTO_IPV6 : -1; 95 + break; 96 + } 97 + 98 + if (fam == -1) 99 + audit_log_format(ab, " saddr=? daddr=? proto=-1"); 100 + 101 + audit_log_end(ab); 102 + } 103 + 32 104 static void nft_log_eval(const struct nft_expr *expr, 33 105 struct nft_regs *regs, 34 106 const struct nft_pktinfo *pkt) 35 107 { 36 108 const struct nft_log *priv = nft_expr_priv(expr); 109 + 110 + if (priv->loginfo.type == NF_LOG_TYPE_LOG && 111 + priv->loginfo.u.log.level == LOGLEVEL_AUDIT) { 112 + nft_log_eval_audit(pkt); 113 + return; 114 + } 37 115 38 116 nf_log_packet(nft_net(pkt), nft_pf(pkt), nft_hook(pkt), pkt->skb, 39 117 nft_in(pkt), nft_out(pkt), &priv->loginfo, "%s", ··· 168 84 } else { 169 85 li->u.log.level = LOGLEVEL_WARNING; 170 86 } 171 - if (li->u.log.level > LOGLEVEL_DEBUG) { 87 + if (li->u.log.level > LOGLEVEL_AUDIT) { 172 88 err = -EINVAL; 173 89 goto err1; 174 90 } ··· 196 112 break; 197 113 } 198 114 115 + if (li->u.log.level == LOGLEVEL_AUDIT) 116 + return 0; 117 + 199 118 err = nf_logger_find_get(ctx->family, li->type); 200 119 if (err < 0) 201 120 goto err1; ··· 219 132 220 133 if (priv->prefix != nft_log_null_prefix) 221 134 kfree(priv->prefix); 135 + 136 + if (li->u.log.level == LOGLEVEL_AUDIT) 137 + return; 222 138 223 139 nf_logger_put(ctx->family, li->type); 224 140 }
+47
net/netfilter/nft_lookup.c
··· 149 149 return -1; 150 150 } 151 151 152 + static int nft_lookup_validate_setelem(const struct nft_ctx *ctx, 153 + struct nft_set *set, 154 + const struct nft_set_iter *iter, 155 + struct nft_set_elem *elem) 156 + { 157 + const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv); 158 + const struct nft_data *data; 159 + 160 + if (nft_set_ext_exists(ext, NFT_SET_EXT_FLAGS) && 161 + *nft_set_ext_flags(ext) & NFT_SET_ELEM_INTERVAL_END) 162 + return 0; 163 + 164 + data = nft_set_ext_data(ext); 165 + switch (data->verdict.code) { 166 + case NFT_JUMP: 167 + case NFT_GOTO: 168 + return nft_chain_validate(ctx, data->verdict.chain); 169 + default: 170 + return 0; 171 + } 172 + } 173 + 174 + static int nft_lookup_validate(const struct nft_ctx *ctx, 175 + const struct nft_expr *expr, 176 + const struct nft_data **d) 177 + { 178 + const struct nft_lookup *priv = nft_expr_priv(expr); 179 + struct nft_set_iter iter; 180 + 181 + if (!(priv->set->flags & NFT_SET_MAP) || 182 + priv->set->dtype != NFT_DATA_VERDICT) 183 + return 0; 184 + 185 + iter.genmask = nft_genmask_next(ctx->net); 186 + iter.skip = 0; 187 + iter.count = 0; 188 + iter.err = 0; 189 + iter.fn = nft_lookup_validate_setelem; 190 + 191 + priv->set->ops->walk(ctx, priv->set, &iter); 192 + if (iter.err < 0) 193 + return iter.err; 194 + 195 + return 0; 196 + } 197 + 152 198 static const struct nft_expr_ops nft_lookup_ops = { 153 199 .type = &nft_lookup_type, 154 200 .size = NFT_EXPR_SIZE(sizeof(struct nft_lookup)), ··· 202 156 .init = nft_lookup_init, 203 157 .destroy = nft_lookup_destroy, 204 158 .dump = nft_lookup_dump, 159 + .validate = nft_lookup_validate, 205 160 }; 206 161 207 162 struct nft_expr_type nft_lookup_type __read_mostly = {
+1 -4
net/netfilter/nft_numgen.c
··· 114 114 tb[NFTA_NG_SET_NAME], 115 115 tb[NFTA_NG_SET_ID], genmask); 116 116 117 - if (IS_ERR(priv->map)) 118 - return PTR_ERR(priv->map); 119 - 120 - return 0; 117 + return PTR_ERR_OR_ZERO(priv->map); 121 118 } 122 119 123 120 static int nft_ng_dump(struct sk_buff *skb, enum nft_registers dreg,
+143
net/netfilter/nft_socket.c
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + #include <linux/module.h> 3 + #include <linux/netfilter/nf_tables.h> 4 + #include <net/netfilter/nf_tables.h> 5 + #include <net/netfilter/nf_tables_core.h> 6 + #include <net/netfilter/nf_socket.h> 7 + #include <net/inet_sock.h> 8 + 9 + struct nft_socket { 10 + enum nft_socket_keys key:8; 11 + union { 12 + enum nft_registers dreg:8; 13 + }; 14 + }; 15 + 16 + static void nft_socket_eval(const struct nft_expr *expr, 17 + struct nft_regs *regs, 18 + const struct nft_pktinfo *pkt) 19 + { 20 + const struct nft_socket *priv = nft_expr_priv(expr); 21 + struct sk_buff *skb = pkt->skb; 22 + struct sock *sk = skb->sk; 23 + u32 *dest = &regs->data[priv->dreg]; 24 + 25 + if (!sk) 26 + switch(nft_pf(pkt)) { 27 + case NFPROTO_IPV4: 28 + sk = nf_sk_lookup_slow_v4(nft_net(pkt), skb, nft_in(pkt)); 29 + break; 30 + #if IS_ENABLED(CONFIG_NF_SOCKET_IPV6) 31 + case NFPROTO_IPV6: 32 + sk = nf_sk_lookup_slow_v6(nft_net(pkt), skb, nft_in(pkt)); 33 + break; 34 + #endif 35 + default: 36 + WARN_ON_ONCE(1); 37 + regs->verdict.code = NFT_BREAK; 38 + return; 39 + } 40 + 41 + if(!sk) { 42 + nft_reg_store8(dest, 0); 43 + return; 44 + } 45 + 46 + /* So that subsequent socket matching not to require other lookups. */ 47 + skb->sk = sk; 48 + 49 + switch(priv->key) { 50 + case NFT_SOCKET_TRANSPARENT: 51 + nft_reg_store8(dest, nf_sk_is_transparent(sk)); 52 + break; 53 + default: 54 + WARN_ON(1); 55 + regs->verdict.code = NFT_BREAK; 56 + } 57 + } 58 + 59 + static const struct nla_policy nft_socket_policy[NFTA_SOCKET_MAX + 1] = { 60 + [NFTA_SOCKET_KEY] = { .type = NLA_U32 }, 61 + [NFTA_SOCKET_DREG] = { .type = NLA_U32 }, 62 + }; 63 + 64 + static int nft_socket_init(const struct nft_ctx *ctx, 65 + const struct nft_expr *expr, 66 + const struct nlattr * const tb[]) 67 + { 68 + struct nft_socket *priv = nft_expr_priv(expr); 69 + unsigned int len; 70 + 71 + if (!tb[NFTA_SOCKET_DREG] || !tb[NFTA_SOCKET_KEY]) 72 + return -EINVAL; 73 + 74 + switch(ctx->family) { 75 + case NFPROTO_IPV4: 76 + #if IS_ENABLED(CONFIG_NF_SOCKET_IPV6) 77 + case NFPROTO_IPV6: 78 + #endif 79 + case NFPROTO_INET: 80 + break; 81 + default: 82 + return -EOPNOTSUPP; 83 + } 84 + 85 + priv->key = ntohl(nla_get_u32(tb[NFTA_SOCKET_KEY])); 86 + switch(priv->key) { 87 + case NFT_SOCKET_TRANSPARENT: 88 + len = sizeof(u8); 89 + break; 90 + default: 91 + return -EOPNOTSUPP; 92 + } 93 + 94 + priv->dreg = nft_parse_register(tb[NFTA_SOCKET_DREG]); 95 + return nft_validate_register_store(ctx, priv->dreg, NULL, 96 + NFT_DATA_VALUE, len); 97 + } 98 + 99 + static int nft_socket_dump(struct sk_buff *skb, 100 + const struct nft_expr *expr) 101 + { 102 + const struct nft_socket *priv = nft_expr_priv(expr); 103 + 104 + if (nla_put_u32(skb, NFTA_SOCKET_KEY, htonl(priv->key))) 105 + return -1; 106 + if (nft_dump_register(skb, NFTA_SOCKET_DREG, priv->dreg)) 107 + return -1; 108 + return 0; 109 + } 110 + 111 + static struct nft_expr_type nft_socket_type; 112 + static const struct nft_expr_ops nft_socket_ops = { 113 + .type = &nft_socket_type, 114 + .size = NFT_EXPR_SIZE(sizeof(struct nft_socket)), 115 + .eval = nft_socket_eval, 116 + .init = nft_socket_init, 117 + .dump = nft_socket_dump, 118 + }; 119 + 120 + static struct nft_expr_type nft_socket_type __read_mostly = { 121 + .name = "socket", 122 + .ops = &nft_socket_ops, 123 + .policy = nft_socket_policy, 124 + .maxattr = NFTA_SOCKET_MAX, 125 + .owner = THIS_MODULE, 126 + }; 127 + 128 + static int __init nft_socket_module_init(void) 129 + { 130 + return nft_register_expr(&nft_socket_type); 131 + } 132 + 133 + static void __exit nft_socket_module_exit(void) 134 + { 135 + nft_unregister_expr(&nft_socket_type); 136 + } 137 + 138 + module_init(nft_socket_module_init); 139 + module_exit(nft_socket_module_exit); 140 + 141 + MODULE_LICENSE("GPL"); 142 + MODULE_AUTHOR("Máté Eckl"); 143 + MODULE_DESCRIPTION("nf_tables socket match module");