Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

netfilter: nat: move specific NAT IPv4 to core

Move the specific NAT IPv4 core functions that are called from the
hooks from iptable_nat.c to nf_nat_l3proto_ipv4.c. This prepares the
ground to allow iptables and nft to use the same NAT engine code that
comes in a follow up patch.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>

+271 -199
+38
include/net/netfilter/nf_nat_l3proto.h
··· 42 42 int nf_nat_icmp_reply_translation(struct sk_buff *skb, struct nf_conn *ct, 43 43 enum ip_conntrack_info ctinfo, 44 44 unsigned int hooknum); 45 + 46 + unsigned int nf_nat_ipv4_in(const struct nf_hook_ops *ops, struct sk_buff *skb, 47 + const struct net_device *in, 48 + const struct net_device *out, 49 + unsigned int (*do_chain)(const struct nf_hook_ops *ops, 50 + struct sk_buff *skb, 51 + const struct net_device *in, 52 + const struct net_device *out, 53 + struct nf_conn *ct)); 54 + 55 + unsigned int nf_nat_ipv4_out(const struct nf_hook_ops *ops, struct sk_buff *skb, 56 + const struct net_device *in, 57 + const struct net_device *out, 58 + unsigned int (*do_chain)(const struct nf_hook_ops *ops, 59 + struct sk_buff *skb, 60 + const struct net_device *in, 61 + const struct net_device *out, 62 + struct nf_conn *ct)); 63 + 64 + unsigned int nf_nat_ipv4_local_fn(const struct nf_hook_ops *ops, 65 + struct sk_buff *skb, 66 + const struct net_device *in, 67 + const struct net_device *out, 68 + unsigned int (*do_chain)(const struct nf_hook_ops *ops, 69 + struct sk_buff *skb, 70 + const struct net_device *in, 71 + const struct net_device *out, 72 + struct nf_conn *ct)); 73 + 74 + unsigned int nf_nat_ipv4_fn(const struct nf_hook_ops *ops, struct sk_buff *skb, 75 + const struct net_device *in, 76 + const struct net_device *out, 77 + unsigned int (*do_chain)(const struct nf_hook_ops *ops, 78 + struct sk_buff *skb, 79 + const struct net_device *in, 80 + const struct net_device *out, 81 + struct nf_conn *ct)); 82 + 45 83 int nf_nat_icmpv6_reply_translation(struct sk_buff *skb, struct nf_conn *ct, 46 84 enum ip_conntrack_info ctinfo, 47 85 unsigned int hooknum, unsigned int hdrlen);
+34 -199
net/ipv4/netfilter/iptable_nat.c
··· 28 28 .af = NFPROTO_IPV4, 29 29 }; 30 30 31 - static unsigned int alloc_null_binding(struct nf_conn *ct, unsigned int hooknum) 32 - { 33 - /* Force range to this IP; let proto decide mapping for 34 - * per-proto parts (hence not IP_NAT_RANGE_PROTO_SPECIFIED). 35 - */ 36 - struct nf_nat_range range; 37 - 38 - range.flags = 0; 39 - pr_debug("Allocating NULL binding for %p (%pI4)\n", ct, 40 - HOOK2MANIP(hooknum) == NF_NAT_MANIP_SRC ? 41 - &ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip : 42 - &ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip); 43 - 44 - return nf_nat_setup_info(ct, &range, HOOK2MANIP(hooknum)); 45 - } 46 - 47 - static unsigned int nf_nat_rule_find(struct sk_buff *skb, unsigned int hooknum, 48 - const struct net_device *in, 49 - const struct net_device *out, 50 - struct nf_conn *ct) 31 + static unsigned int iptable_nat_do_chain(const struct nf_hook_ops *ops, 32 + struct sk_buff *skb, 33 + const struct net_device *in, 34 + const struct net_device *out, 35 + struct nf_conn *ct) 51 36 { 52 37 struct net *net = nf_ct_net(ct); 53 - unsigned int ret; 54 38 55 - ret = ipt_do_table(skb, hooknum, in, out, net->ipv4.nat_table); 56 - if (ret == NF_ACCEPT) { 57 - if (!nf_nat_initialized(ct, HOOK2MANIP(hooknum))) 58 - ret = alloc_null_binding(ct, hooknum); 59 - } 60 - return ret; 39 + return ipt_do_table(skb, ops->hooknum, in, out, net->ipv4.nat_table); 61 40 } 62 41 63 - static unsigned int 64 - nf_nat_ipv4_fn(const struct nf_hook_ops *ops, 65 - struct sk_buff *skb, 66 - const struct net_device *in, 67 - const struct net_device *out, 68 - int (*okfn)(struct sk_buff *)) 42 + static unsigned int iptable_nat_ipv4_fn(const struct nf_hook_ops *ops, 43 + struct sk_buff *skb, 44 + const struct net_device *in, 45 + const struct net_device *out, 46 + int (*okfn)(struct sk_buff *)) 69 47 { 70 - struct nf_conn *ct; 71 - enum ip_conntrack_info ctinfo; 72 - struct nf_conn_nat *nat; 73 - /* maniptype == SRC for postrouting. */ 74 - enum nf_nat_manip_type maniptype = HOOK2MANIP(ops->hooknum); 75 - 76 - /* We never see fragments: conntrack defrags on pre-routing 77 - * and local-out, and nf_nat_out protects post-routing. 78 - */ 79 - NF_CT_ASSERT(!ip_is_fragment(ip_hdr(skb))); 80 - 81 - ct = nf_ct_get(skb, &ctinfo); 82 - /* Can't track? It's not due to stress, or conntrack would 83 - * have dropped it. Hence it's the user's responsibilty to 84 - * packet filter it out, or implement conntrack/NAT for that 85 - * protocol. 8) --RR 86 - */ 87 - if (!ct) 88 - return NF_ACCEPT; 89 - 90 - /* Don't try to NAT if this packet is not conntracked */ 91 - if (nf_ct_is_untracked(ct)) 92 - return NF_ACCEPT; 93 - 94 - nat = nf_ct_nat_ext_add(ct); 95 - if (nat == NULL) 96 - return NF_ACCEPT; 97 - 98 - switch (ctinfo) { 99 - case IP_CT_RELATED: 100 - case IP_CT_RELATED_REPLY: 101 - if (ip_hdr(skb)->protocol == IPPROTO_ICMP) { 102 - if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo, 103 - ops->hooknum)) 104 - return NF_DROP; 105 - else 106 - return NF_ACCEPT; 107 - } 108 - /* Fall thru... (Only ICMPs can be IP_CT_IS_REPLY) */ 109 - case IP_CT_NEW: 110 - /* Seen it before? This can happen for loopback, retrans, 111 - * or local packets. 112 - */ 113 - if (!nf_nat_initialized(ct, maniptype)) { 114 - unsigned int ret; 115 - 116 - ret = nf_nat_rule_find(skb, ops->hooknum, in, out, ct); 117 - if (ret != NF_ACCEPT) 118 - return ret; 119 - } else { 120 - pr_debug("Already setup manip %s for ct %p\n", 121 - maniptype == NF_NAT_MANIP_SRC ? "SRC" : "DST", 122 - ct); 123 - if (nf_nat_oif_changed(ops->hooknum, ctinfo, nat, out)) 124 - goto oif_changed; 125 - } 126 - break; 127 - 128 - default: 129 - /* ESTABLISHED */ 130 - NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED || 131 - ctinfo == IP_CT_ESTABLISHED_REPLY); 132 - if (nf_nat_oif_changed(ops->hooknum, ctinfo, nat, out)) 133 - goto oif_changed; 134 - } 135 - 136 - return nf_nat_packet(ct, ctinfo, ops->hooknum, skb); 137 - 138 - oif_changed: 139 - nf_ct_kill_acct(ct, ctinfo, skb); 140 - return NF_DROP; 48 + return nf_nat_ipv4_fn(ops, skb, in, out, iptable_nat_do_chain); 141 49 } 142 50 143 - static unsigned int 144 - nf_nat_ipv4_in(const struct nf_hook_ops *ops, 145 - struct sk_buff *skb, 146 - const struct net_device *in, 147 - const struct net_device *out, 148 - int (*okfn)(struct sk_buff *)) 51 + static unsigned int iptable_nat_ipv4_in(const struct nf_hook_ops *ops, 52 + struct sk_buff *skb, 53 + const struct net_device *in, 54 + const struct net_device *out, 55 + int (*okfn)(struct sk_buff *)) 149 56 { 150 - unsigned int ret; 151 - __be32 daddr = ip_hdr(skb)->daddr; 152 - 153 - ret = nf_nat_ipv4_fn(ops, skb, in, out, okfn); 154 - if (ret != NF_DROP && ret != NF_STOLEN && 155 - daddr != ip_hdr(skb)->daddr) 156 - skb_dst_drop(skb); 157 - 158 - return ret; 57 + return nf_nat_ipv4_in(ops, skb, in, out, iptable_nat_do_chain); 159 58 } 160 59 161 - static unsigned int 162 - nf_nat_ipv4_out(const struct nf_hook_ops *ops, 163 - struct sk_buff *skb, 164 - const struct net_device *in, 165 - const struct net_device *out, 166 - int (*okfn)(struct sk_buff *)) 60 + static unsigned int iptable_nat_ipv4_out(const struct nf_hook_ops *ops, 61 + struct sk_buff *skb, 62 + const struct net_device *in, 63 + const struct net_device *out, 64 + int (*okfn)(struct sk_buff *)) 167 65 { 168 - #ifdef CONFIG_XFRM 169 - const struct nf_conn *ct; 170 - enum ip_conntrack_info ctinfo; 171 - int err; 172 - #endif 173 - unsigned int ret; 174 - 175 - /* root is playing with raw sockets. */ 176 - if (skb->len < sizeof(struct iphdr) || 177 - ip_hdrlen(skb) < sizeof(struct iphdr)) 178 - return NF_ACCEPT; 179 - 180 - ret = nf_nat_ipv4_fn(ops, skb, in, out, okfn); 181 - #ifdef CONFIG_XFRM 182 - if (ret != NF_DROP && ret != NF_STOLEN && 183 - !(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) && 184 - (ct = nf_ct_get(skb, &ctinfo)) != NULL) { 185 - enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); 186 - 187 - if ((ct->tuplehash[dir].tuple.src.u3.ip != 188 - ct->tuplehash[!dir].tuple.dst.u3.ip) || 189 - (ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMP && 190 - ct->tuplehash[dir].tuple.src.u.all != 191 - ct->tuplehash[!dir].tuple.dst.u.all)) { 192 - err = nf_xfrm_me_harder(skb, AF_INET); 193 - if (err < 0) 194 - ret = NF_DROP_ERR(err); 195 - } 196 - } 197 - #endif 198 - return ret; 66 + return nf_nat_ipv4_out(ops, skb, in, out, iptable_nat_do_chain); 199 67 } 200 68 201 - static unsigned int 202 - nf_nat_ipv4_local_fn(const struct nf_hook_ops *ops, 203 - struct sk_buff *skb, 204 - const struct net_device *in, 205 - const struct net_device *out, 206 - int (*okfn)(struct sk_buff *)) 69 + static unsigned int iptable_nat_ipv4_local_fn(const struct nf_hook_ops *ops, 70 + struct sk_buff *skb, 71 + const struct net_device *in, 72 + const struct net_device *out, 73 + int (*okfn)(struct sk_buff *)) 207 74 { 208 - const struct nf_conn *ct; 209 - enum ip_conntrack_info ctinfo; 210 - unsigned int ret; 211 - int err; 212 - 213 - /* root is playing with raw sockets. */ 214 - if (skb->len < sizeof(struct iphdr) || 215 - ip_hdrlen(skb) < sizeof(struct iphdr)) 216 - return NF_ACCEPT; 217 - 218 - ret = nf_nat_ipv4_fn(ops, skb, in, out, okfn); 219 - if (ret != NF_DROP && ret != NF_STOLEN && 220 - (ct = nf_ct_get(skb, &ctinfo)) != NULL) { 221 - enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); 222 - 223 - if (ct->tuplehash[dir].tuple.dst.u3.ip != 224 - ct->tuplehash[!dir].tuple.src.u3.ip) { 225 - err = ip_route_me_harder(skb, RTN_UNSPEC); 226 - if (err < 0) 227 - ret = NF_DROP_ERR(err); 228 - } 229 - #ifdef CONFIG_XFRM 230 - else if (!(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) && 231 - ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMP && 232 - ct->tuplehash[dir].tuple.dst.u.all != 233 - ct->tuplehash[!dir].tuple.src.u.all) { 234 - err = nf_xfrm_me_harder(skb, AF_INET); 235 - if (err < 0) 236 - ret = NF_DROP_ERR(err); 237 - } 238 - #endif 239 - } 240 - return ret; 75 + return nf_nat_ipv4_local_fn(ops, skb, in, out, iptable_nat_do_chain); 241 76 } 242 77 243 78 static struct nf_hook_ops nf_nat_ipv4_ops[] __read_mostly = { 244 79 /* Before packet filtering, change destination */ 245 80 { 246 - .hook = nf_nat_ipv4_in, 81 + .hook = iptable_nat_ipv4_in, 247 82 .owner = THIS_MODULE, 248 83 .pf = NFPROTO_IPV4, 249 84 .hooknum = NF_INET_PRE_ROUTING, ··· 86 251 }, 87 252 /* After packet filtering, change source */ 88 253 { 89 - .hook = nf_nat_ipv4_out, 254 + .hook = iptable_nat_ipv4_out, 90 255 .owner = THIS_MODULE, 91 256 .pf = NFPROTO_IPV4, 92 257 .hooknum = NF_INET_POST_ROUTING, ··· 94 259 }, 95 260 /* Before packet filtering, change destination */ 96 261 { 97 - .hook = nf_nat_ipv4_local_fn, 262 + .hook = iptable_nat_ipv4_local_fn, 98 263 .owner = THIS_MODULE, 99 264 .pf = NFPROTO_IPV4, 100 265 .hooknum = NF_INET_LOCAL_OUT, ··· 102 267 }, 103 268 /* After packet filtering, change source */ 104 269 { 105 - .hook = nf_nat_ipv4_fn, 270 + .hook = iptable_nat_ipv4_fn, 106 271 .owner = THIS_MODULE, 107 272 .pf = NFPROTO_IPV4, 108 273 .hooknum = NF_INET_LOCAL_IN,
+199
net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
··· 254 254 } 255 255 EXPORT_SYMBOL_GPL(nf_nat_icmp_reply_translation); 256 256 257 + unsigned int 258 + nf_nat_ipv4_fn(const struct nf_hook_ops *ops, struct sk_buff *skb, 259 + const struct net_device *in, const struct net_device *out, 260 + unsigned int (*do_chain)(const struct nf_hook_ops *ops, 261 + struct sk_buff *skb, 262 + const struct net_device *in, 263 + const struct net_device *out, 264 + struct nf_conn *ct)) 265 + { 266 + struct nf_conn *ct; 267 + enum ip_conntrack_info ctinfo; 268 + struct nf_conn_nat *nat; 269 + /* maniptype == SRC for postrouting. */ 270 + enum nf_nat_manip_type maniptype = HOOK2MANIP(ops->hooknum); 271 + 272 + /* We never see fragments: conntrack defrags on pre-routing 273 + * and local-out, and nf_nat_out protects post-routing. 274 + */ 275 + NF_CT_ASSERT(!ip_is_fragment(ip_hdr(skb))); 276 + 277 + ct = nf_ct_get(skb, &ctinfo); 278 + /* Can't track? It's not due to stress, or conntrack would 279 + * have dropped it. Hence it's the user's responsibilty to 280 + * packet filter it out, or implement conntrack/NAT for that 281 + * protocol. 8) --RR 282 + */ 283 + if (!ct) 284 + return NF_ACCEPT; 285 + 286 + /* Don't try to NAT if this packet is not conntracked */ 287 + if (nf_ct_is_untracked(ct)) 288 + return NF_ACCEPT; 289 + 290 + nat = nf_ct_nat_ext_add(ct); 291 + if (nat == NULL) 292 + return NF_ACCEPT; 293 + 294 + switch (ctinfo) { 295 + case IP_CT_RELATED: 296 + case IP_CT_RELATED_REPLY: 297 + if (ip_hdr(skb)->protocol == IPPROTO_ICMP) { 298 + if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo, 299 + ops->hooknum)) 300 + return NF_DROP; 301 + else 302 + return NF_ACCEPT; 303 + } 304 + /* Fall thru... (Only ICMPs can be IP_CT_IS_REPLY) */ 305 + case IP_CT_NEW: 306 + /* Seen it before? This can happen for loopback, retrans, 307 + * or local packets. 308 + */ 309 + if (!nf_nat_initialized(ct, maniptype)) { 310 + unsigned int ret; 311 + 312 + ret = do_chain(ops, skb, in, out, ct); 313 + if (ret != NF_ACCEPT) 314 + return ret; 315 + 316 + if (nf_nat_initialized(ct, HOOK2MANIP(ops->hooknum))) 317 + break; 318 + 319 + ret = nf_nat_alloc_null_binding(ct, ops->hooknum); 320 + if (ret != NF_ACCEPT) 321 + return ret; 322 + } else { 323 + pr_debug("Already setup manip %s for ct %p\n", 324 + maniptype == NF_NAT_MANIP_SRC ? "SRC" : "DST", 325 + ct); 326 + if (nf_nat_oif_changed(ops->hooknum, ctinfo, nat, out)) 327 + goto oif_changed; 328 + } 329 + break; 330 + 331 + default: 332 + /* ESTABLISHED */ 333 + NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED || 334 + ctinfo == IP_CT_ESTABLISHED_REPLY); 335 + if (nf_nat_oif_changed(ops->hooknum, ctinfo, nat, out)) 336 + goto oif_changed; 337 + } 338 + 339 + return nf_nat_packet(ct, ctinfo, ops->hooknum, skb); 340 + 341 + oif_changed: 342 + nf_ct_kill_acct(ct, ctinfo, skb); 343 + return NF_DROP; 344 + } 345 + EXPORT_SYMBOL_GPL(nf_nat_ipv4_fn); 346 + 347 + unsigned int 348 + nf_nat_ipv4_in(const struct nf_hook_ops *ops, struct sk_buff *skb, 349 + const struct net_device *in, const struct net_device *out, 350 + unsigned int (*do_chain)(const struct nf_hook_ops *ops, 351 + struct sk_buff *skb, 352 + const struct net_device *in, 353 + const struct net_device *out, 354 + struct nf_conn *ct)) 355 + { 356 + unsigned int ret; 357 + __be32 daddr = ip_hdr(skb)->daddr; 358 + 359 + ret = nf_nat_ipv4_fn(ops, skb, in, out, do_chain); 360 + if (ret != NF_DROP && ret != NF_STOLEN && 361 + daddr != ip_hdr(skb)->daddr) 362 + skb_dst_drop(skb); 363 + 364 + return ret; 365 + } 366 + EXPORT_SYMBOL_GPL(nf_nat_ipv4_in); 367 + 368 + unsigned int 369 + nf_nat_ipv4_out(const struct nf_hook_ops *ops, struct sk_buff *skb, 370 + const struct net_device *in, const struct net_device *out, 371 + unsigned int (*do_chain)(const struct nf_hook_ops *ops, 372 + struct sk_buff *skb, 373 + const struct net_device *in, 374 + const struct net_device *out, 375 + struct nf_conn *ct)) 376 + { 377 + #ifdef CONFIG_XFRM 378 + const struct nf_conn *ct; 379 + enum ip_conntrack_info ctinfo; 380 + int err; 381 + #endif 382 + unsigned int ret; 383 + 384 + /* root is playing with raw sockets. */ 385 + if (skb->len < sizeof(struct iphdr) || 386 + ip_hdrlen(skb) < sizeof(struct iphdr)) 387 + return NF_ACCEPT; 388 + 389 + ret = nf_nat_ipv4_fn(ops, skb, in, out, do_chain); 390 + #ifdef CONFIG_XFRM 391 + if (ret != NF_DROP && ret != NF_STOLEN && 392 + !(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) && 393 + (ct = nf_ct_get(skb, &ctinfo)) != NULL) { 394 + enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); 395 + 396 + if ((ct->tuplehash[dir].tuple.src.u3.ip != 397 + ct->tuplehash[!dir].tuple.dst.u3.ip) || 398 + (ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMP && 399 + ct->tuplehash[dir].tuple.src.u.all != 400 + ct->tuplehash[!dir].tuple.dst.u.all)) { 401 + err = nf_xfrm_me_harder(skb, AF_INET); 402 + if (err < 0) 403 + ret = NF_DROP_ERR(err); 404 + } 405 + } 406 + #endif 407 + return ret; 408 + } 409 + EXPORT_SYMBOL_GPL(nf_nat_ipv4_out); 410 + 411 + unsigned int 412 + nf_nat_ipv4_local_fn(const struct nf_hook_ops *ops, struct sk_buff *skb, 413 + const struct net_device *in, const struct net_device *out, 414 + unsigned int (*do_chain)(const struct nf_hook_ops *ops, 415 + struct sk_buff *skb, 416 + const struct net_device *in, 417 + const struct net_device *out, 418 + struct nf_conn *ct)) 419 + { 420 + const struct nf_conn *ct; 421 + enum ip_conntrack_info ctinfo; 422 + unsigned int ret; 423 + int err; 424 + 425 + /* root is playing with raw sockets. */ 426 + if (skb->len < sizeof(struct iphdr) || 427 + ip_hdrlen(skb) < sizeof(struct iphdr)) 428 + return NF_ACCEPT; 429 + 430 + ret = nf_nat_ipv4_fn(ops, skb, in, out, do_chain); 431 + if (ret != NF_DROP && ret != NF_STOLEN && 432 + (ct = nf_ct_get(skb, &ctinfo)) != NULL) { 433 + enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); 434 + 435 + if (ct->tuplehash[dir].tuple.dst.u3.ip != 436 + ct->tuplehash[!dir].tuple.src.u3.ip) { 437 + err = ip_route_me_harder(skb, RTN_UNSPEC); 438 + if (err < 0) 439 + ret = NF_DROP_ERR(err); 440 + } 441 + #ifdef CONFIG_XFRM 442 + else if (!(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) && 443 + ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMP && 444 + ct->tuplehash[dir].tuple.dst.u.all != 445 + ct->tuplehash[!dir].tuple.src.u.all) { 446 + err = nf_xfrm_me_harder(skb, AF_INET); 447 + if (err < 0) 448 + ret = NF_DROP_ERR(err); 449 + } 450 + #endif 451 + } 452 + return ret; 453 + } 454 + EXPORT_SYMBOL_GPL(nf_nat_ipv4_local_fn); 455 + 257 456 static int __init nf_nat_l3proto_ipv4_init(void) 258 457 { 259 458 int err;