Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at v3.1-rc6 1043 lines 28 kB view raw
1/* 2 * Handle firewalling 3 * Linux ethernet bridge 4 * 5 * Authors: 6 * Lennert Buytenhek <buytenh@gnu.org> 7 * Bart De Schuymer <bdschuym@pandora.be> 8 * 9 * This program is free software; you can redistribute it and/or 10 * modify it under the terms of the GNU General Public License 11 * as published by the Free Software Foundation; either version 12 * 2 of the License, or (at your option) any later version. 13 * 14 * Lennert dedicates this file to Kerstin Wurdinger. 15 */ 16 17#include <linux/module.h> 18#include <linux/kernel.h> 19#include <linux/slab.h> 20#include <linux/ip.h> 21#include <linux/netdevice.h> 22#include <linux/skbuff.h> 23#include <linux/if_arp.h> 24#include <linux/if_ether.h> 25#include <linux/if_vlan.h> 26#include <linux/if_pppox.h> 27#include <linux/ppp_defs.h> 28#include <linux/netfilter_bridge.h> 29#include <linux/netfilter_ipv4.h> 30#include <linux/netfilter_ipv6.h> 31#include <linux/netfilter_arp.h> 32#include <linux/in_route.h> 33#include <linux/inetdevice.h> 34 35#include <net/ip.h> 36#include <net/ipv6.h> 37#include <net/route.h> 38 39#include <asm/uaccess.h> 40#include "br_private.h" 41#ifdef CONFIG_SYSCTL 42#include <linux/sysctl.h> 43#endif 44 45#define skb_origaddr(skb) (((struct bridge_skb_cb *) \ 46 (skb->nf_bridge->data))->daddr.ipv4) 47#define store_orig_dstaddr(skb) (skb_origaddr(skb) = ip_hdr(skb)->daddr) 48#define dnat_took_place(skb) (skb_origaddr(skb) != ip_hdr(skb)->daddr) 49 50#ifdef CONFIG_SYSCTL 51static struct ctl_table_header *brnf_sysctl_header; 52static int brnf_call_iptables __read_mostly = 1; 53static int brnf_call_ip6tables __read_mostly = 1; 54static int brnf_call_arptables __read_mostly = 1; 55static int brnf_filter_vlan_tagged __read_mostly = 0; 56static int brnf_filter_pppoe_tagged __read_mostly = 0; 57#else 58#define brnf_call_iptables 1 59#define brnf_call_ip6tables 1 60#define brnf_call_arptables 1 61#define brnf_filter_vlan_tagged 0 62#define brnf_filter_pppoe_tagged 0 63#endif 64 65static inline __be16 vlan_proto(const struct sk_buff *skb) 66{ 67 if (vlan_tx_tag_present(skb)) 68 return skb->protocol; 69 else if (skb->protocol == htons(ETH_P_8021Q)) 70 return vlan_eth_hdr(skb)->h_vlan_encapsulated_proto; 71 else 72 return 0; 73} 74 75#define IS_VLAN_IP(skb) \ 76 (vlan_proto(skb) == htons(ETH_P_IP) && \ 77 brnf_filter_vlan_tagged) 78 79#define IS_VLAN_IPV6(skb) \ 80 (vlan_proto(skb) == htons(ETH_P_IPV6) && \ 81 brnf_filter_vlan_tagged) 82 83#define IS_VLAN_ARP(skb) \ 84 (vlan_proto(skb) == htons(ETH_P_ARP) && \ 85 brnf_filter_vlan_tagged) 86 87static inline __be16 pppoe_proto(const struct sk_buff *skb) 88{ 89 return *((__be16 *)(skb_mac_header(skb) + ETH_HLEN + 90 sizeof(struct pppoe_hdr))); 91} 92 93#define IS_PPPOE_IP(skb) \ 94 (skb->protocol == htons(ETH_P_PPP_SES) && \ 95 pppoe_proto(skb) == htons(PPP_IP) && \ 96 brnf_filter_pppoe_tagged) 97 98#define IS_PPPOE_IPV6(skb) \ 99 (skb->protocol == htons(ETH_P_PPP_SES) && \ 100 pppoe_proto(skb) == htons(PPP_IPV6) && \ 101 brnf_filter_pppoe_tagged) 102 103static void fake_update_pmtu(struct dst_entry *dst, u32 mtu) 104{ 105} 106 107static u32 *fake_cow_metrics(struct dst_entry *dst, unsigned long old) 108{ 109 return NULL; 110} 111 112static struct neighbour *fake_neigh_lookup(const struct dst_entry *dst, const void *daddr) 113{ 114 return NULL; 115} 116 117static struct dst_ops fake_dst_ops = { 118 .family = AF_INET, 119 .protocol = cpu_to_be16(ETH_P_IP), 120 .update_pmtu = fake_update_pmtu, 121 .cow_metrics = fake_cow_metrics, 122 .neigh_lookup = fake_neigh_lookup, 123}; 124 125/* 126 * Initialize bogus route table used to keep netfilter happy. 127 * Currently, we fill in the PMTU entry because netfilter 128 * refragmentation needs it, and the rt_flags entry because 129 * ipt_REJECT needs it. Future netfilter modules might 130 * require us to fill additional fields. 131 */ 132static const u32 br_dst_default_metrics[RTAX_MAX] = { 133 [RTAX_MTU - 1] = 1500, 134}; 135 136void br_netfilter_rtable_init(struct net_bridge *br) 137{ 138 struct rtable *rt = &br->fake_rtable; 139 140 atomic_set(&rt->dst.__refcnt, 1); 141 rt->dst.dev = br->dev; 142 rt->dst.path = &rt->dst; 143 dst_init_metrics(&rt->dst, br_dst_default_metrics, true); 144 rt->dst.flags = DST_NOXFRM; 145 rt->dst.ops = &fake_dst_ops; 146} 147 148static inline struct rtable *bridge_parent_rtable(const struct net_device *dev) 149{ 150 struct net_bridge_port *port; 151 152 port = br_port_get_rcu(dev); 153 return port ? &port->br->fake_rtable : NULL; 154} 155 156static inline struct net_device *bridge_parent(const struct net_device *dev) 157{ 158 struct net_bridge_port *port; 159 160 port = br_port_get_rcu(dev); 161 return port ? port->br->dev : NULL; 162} 163 164static inline struct nf_bridge_info *nf_bridge_alloc(struct sk_buff *skb) 165{ 166 skb->nf_bridge = kzalloc(sizeof(struct nf_bridge_info), GFP_ATOMIC); 167 if (likely(skb->nf_bridge)) 168 atomic_set(&(skb->nf_bridge->use), 1); 169 170 return skb->nf_bridge; 171} 172 173static inline struct nf_bridge_info *nf_bridge_unshare(struct sk_buff *skb) 174{ 175 struct nf_bridge_info *nf_bridge = skb->nf_bridge; 176 177 if (atomic_read(&nf_bridge->use) > 1) { 178 struct nf_bridge_info *tmp = nf_bridge_alloc(skb); 179 180 if (tmp) { 181 memcpy(tmp, nf_bridge, sizeof(struct nf_bridge_info)); 182 atomic_set(&tmp->use, 1); 183 } 184 nf_bridge_put(nf_bridge); 185 nf_bridge = tmp; 186 } 187 return nf_bridge; 188} 189 190static inline void nf_bridge_push_encap_header(struct sk_buff *skb) 191{ 192 unsigned int len = nf_bridge_encap_header_len(skb); 193 194 skb_push(skb, len); 195 skb->network_header -= len; 196} 197 198static inline void nf_bridge_pull_encap_header(struct sk_buff *skb) 199{ 200 unsigned int len = nf_bridge_encap_header_len(skb); 201 202 skb_pull(skb, len); 203 skb->network_header += len; 204} 205 206static inline void nf_bridge_pull_encap_header_rcsum(struct sk_buff *skb) 207{ 208 unsigned int len = nf_bridge_encap_header_len(skb); 209 210 skb_pull_rcsum(skb, len); 211 skb->network_header += len; 212} 213 214static inline void nf_bridge_save_header(struct sk_buff *skb) 215{ 216 int header_size = ETH_HLEN + nf_bridge_encap_header_len(skb); 217 218 skb_copy_from_linear_data_offset(skb, -header_size, 219 skb->nf_bridge->data, header_size); 220} 221 222static inline void nf_bridge_update_protocol(struct sk_buff *skb) 223{ 224 if (skb->nf_bridge->mask & BRNF_8021Q) 225 skb->protocol = htons(ETH_P_8021Q); 226 else if (skb->nf_bridge->mask & BRNF_PPPoE) 227 skb->protocol = htons(ETH_P_PPP_SES); 228} 229 230/* When handing a packet over to the IP layer 231 * check whether we have a skb that is in the 232 * expected format 233 */ 234 235static int br_parse_ip_options(struct sk_buff *skb) 236{ 237 struct ip_options *opt; 238 const struct iphdr *iph; 239 struct net_device *dev = skb->dev; 240 u32 len; 241 242 iph = ip_hdr(skb); 243 opt = &(IPCB(skb)->opt); 244 245 /* Basic sanity checks */ 246 if (iph->ihl < 5 || iph->version != 4) 247 goto inhdr_error; 248 249 if (!pskb_may_pull(skb, iph->ihl*4)) 250 goto inhdr_error; 251 252 iph = ip_hdr(skb); 253 if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl))) 254 goto inhdr_error; 255 256 len = ntohs(iph->tot_len); 257 if (skb->len < len) { 258 IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INTRUNCATEDPKTS); 259 goto drop; 260 } else if (len < (iph->ihl*4)) 261 goto inhdr_error; 262 263 if (pskb_trim_rcsum(skb, len)) { 264 IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INDISCARDS); 265 goto drop; 266 } 267 268 memset(IPCB(skb), 0, sizeof(struct inet_skb_parm)); 269 if (iph->ihl == 5) 270 return 0; 271 272 opt->optlen = iph->ihl*4 - sizeof(struct iphdr); 273 if (ip_options_compile(dev_net(dev), opt, skb)) 274 goto inhdr_error; 275 276 /* Check correct handling of SRR option */ 277 if (unlikely(opt->srr)) { 278 struct in_device *in_dev = __in_dev_get_rcu(dev); 279 if (in_dev && !IN_DEV_SOURCE_ROUTE(in_dev)) 280 goto drop; 281 282 if (ip_options_rcv_srr(skb)) 283 goto drop; 284 } 285 286 return 0; 287 288inhdr_error: 289 IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INHDRERRORS); 290drop: 291 return -1; 292} 293 294/* Fill in the header for fragmented IP packets handled by 295 * the IPv4 connection tracking code. 296 */ 297int nf_bridge_copy_header(struct sk_buff *skb) 298{ 299 int err; 300 unsigned int header_size; 301 302 nf_bridge_update_protocol(skb); 303 header_size = ETH_HLEN + nf_bridge_encap_header_len(skb); 304 err = skb_cow_head(skb, header_size); 305 if (err) 306 return err; 307 308 skb_copy_to_linear_data_offset(skb, -header_size, 309 skb->nf_bridge->data, header_size); 310 __skb_push(skb, nf_bridge_encap_header_len(skb)); 311 return 0; 312} 313 314/* PF_BRIDGE/PRE_ROUTING *********************************************/ 315/* Undo the changes made for ip6tables PREROUTING and continue the 316 * bridge PRE_ROUTING hook. */ 317static int br_nf_pre_routing_finish_ipv6(struct sk_buff *skb) 318{ 319 struct nf_bridge_info *nf_bridge = skb->nf_bridge; 320 struct rtable *rt; 321 322 if (nf_bridge->mask & BRNF_PKT_TYPE) { 323 skb->pkt_type = PACKET_OTHERHOST; 324 nf_bridge->mask ^= BRNF_PKT_TYPE; 325 } 326 nf_bridge->mask ^= BRNF_NF_BRIDGE_PREROUTING; 327 328 rt = bridge_parent_rtable(nf_bridge->physindev); 329 if (!rt) { 330 kfree_skb(skb); 331 return 0; 332 } 333 skb_dst_set_noref(skb, &rt->dst); 334 335 skb->dev = nf_bridge->physindev; 336 nf_bridge_update_protocol(skb); 337 nf_bridge_push_encap_header(skb); 338 NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, skb, skb->dev, NULL, 339 br_handle_frame_finish, 1); 340 341 return 0; 342} 343 344/* Obtain the correct destination MAC address, while preserving the original 345 * source MAC address. If we already know this address, we just copy it. If we 346 * don't, we use the neighbour framework to find out. In both cases, we make 347 * sure that br_handle_frame_finish() is called afterwards. 348 */ 349static int br_nf_pre_routing_finish_bridge(struct sk_buff *skb) 350{ 351 struct nf_bridge_info *nf_bridge = skb->nf_bridge; 352 struct neighbour *neigh; 353 struct dst_entry *dst; 354 355 skb->dev = bridge_parent(skb->dev); 356 if (!skb->dev) 357 goto free_skb; 358 dst = skb_dst(skb); 359 neigh = dst_get_neighbour(dst); 360 if (neigh->hh.hh_len) { 361 neigh_hh_bridge(&neigh->hh, skb); 362 skb->dev = nf_bridge->physindev; 363 return br_handle_frame_finish(skb); 364 } else { 365 /* the neighbour function below overwrites the complete 366 * MAC header, so we save the Ethernet source address and 367 * protocol number. */ 368 skb_copy_from_linear_data_offset(skb, -(ETH_HLEN-ETH_ALEN), skb->nf_bridge->data, ETH_HLEN-ETH_ALEN); 369 /* tell br_dev_xmit to continue with forwarding */ 370 nf_bridge->mask |= BRNF_BRIDGED_DNAT; 371 return neigh->output(neigh, skb); 372 } 373free_skb: 374 kfree_skb(skb); 375 return 0; 376} 377 378/* This requires some explaining. If DNAT has taken place, 379 * we will need to fix up the destination Ethernet address. 380 * 381 * There are two cases to consider: 382 * 1. The packet was DNAT'ed to a device in the same bridge 383 * port group as it was received on. We can still bridge 384 * the packet. 385 * 2. The packet was DNAT'ed to a different device, either 386 * a non-bridged device or another bridge port group. 387 * The packet will need to be routed. 388 * 389 * The correct way of distinguishing between these two cases is to 390 * call ip_route_input() and to look at skb->dst->dev, which is 391 * changed to the destination device if ip_route_input() succeeds. 392 * 393 * Let's first consider the case that ip_route_input() succeeds: 394 * 395 * If the output device equals the logical bridge device the packet 396 * came in on, we can consider this bridging. The corresponding MAC 397 * address will be obtained in br_nf_pre_routing_finish_bridge. 398 * Otherwise, the packet is considered to be routed and we just 399 * change the destination MAC address so that the packet will 400 * later be passed up to the IP stack to be routed. For a redirected 401 * packet, ip_route_input() will give back the localhost as output device, 402 * which differs from the bridge device. 403 * 404 * Let's now consider the case that ip_route_input() fails: 405 * 406 * This can be because the destination address is martian, in which case 407 * the packet will be dropped. 408 * If IP forwarding is disabled, ip_route_input() will fail, while 409 * ip_route_output_key() can return success. The source 410 * address for ip_route_output_key() is set to zero, so ip_route_output_key() 411 * thinks we're handling a locally generated packet and won't care 412 * if IP forwarding is enabled. If the output device equals the logical bridge 413 * device, we proceed as if ip_route_input() succeeded. If it differs from the 414 * logical bridge port or if ip_route_output_key() fails we drop the packet. 415 */ 416static int br_nf_pre_routing_finish(struct sk_buff *skb) 417{ 418 struct net_device *dev = skb->dev; 419 struct iphdr *iph = ip_hdr(skb); 420 struct nf_bridge_info *nf_bridge = skb->nf_bridge; 421 struct rtable *rt; 422 int err; 423 424 if (nf_bridge->mask & BRNF_PKT_TYPE) { 425 skb->pkt_type = PACKET_OTHERHOST; 426 nf_bridge->mask ^= BRNF_PKT_TYPE; 427 } 428 nf_bridge->mask ^= BRNF_NF_BRIDGE_PREROUTING; 429 if (dnat_took_place(skb)) { 430 if ((err = ip_route_input(skb, iph->daddr, iph->saddr, iph->tos, dev))) { 431 struct in_device *in_dev = __in_dev_get_rcu(dev); 432 433 /* If err equals -EHOSTUNREACH the error is due to a 434 * martian destination or due to the fact that 435 * forwarding is disabled. For most martian packets, 436 * ip_route_output_key() will fail. It won't fail for 2 types of 437 * martian destinations: loopback destinations and destination 438 * 0.0.0.0. In both cases the packet will be dropped because the 439 * destination is the loopback device and not the bridge. */ 440 if (err != -EHOSTUNREACH || !in_dev || IN_DEV_FORWARD(in_dev)) 441 goto free_skb; 442 443 rt = ip_route_output(dev_net(dev), iph->daddr, 0, 444 RT_TOS(iph->tos), 0); 445 if (!IS_ERR(rt)) { 446 /* - Bridged-and-DNAT'ed traffic doesn't 447 * require ip_forwarding. */ 448 if (rt->dst.dev == dev) { 449 skb_dst_set(skb, &rt->dst); 450 goto bridged_dnat; 451 } 452 ip_rt_put(rt); 453 } 454free_skb: 455 kfree_skb(skb); 456 return 0; 457 } else { 458 if (skb_dst(skb)->dev == dev) { 459bridged_dnat: 460 skb->dev = nf_bridge->physindev; 461 nf_bridge_update_protocol(skb); 462 nf_bridge_push_encap_header(skb); 463 NF_HOOK_THRESH(NFPROTO_BRIDGE, 464 NF_BR_PRE_ROUTING, 465 skb, skb->dev, NULL, 466 br_nf_pre_routing_finish_bridge, 467 1); 468 return 0; 469 } 470 memcpy(eth_hdr(skb)->h_dest, dev->dev_addr, ETH_ALEN); 471 skb->pkt_type = PACKET_HOST; 472 } 473 } else { 474 rt = bridge_parent_rtable(nf_bridge->physindev); 475 if (!rt) { 476 kfree_skb(skb); 477 return 0; 478 } 479 skb_dst_set_noref(skb, &rt->dst); 480 } 481 482 skb->dev = nf_bridge->physindev; 483 nf_bridge_update_protocol(skb); 484 nf_bridge_push_encap_header(skb); 485 NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, skb, skb->dev, NULL, 486 br_handle_frame_finish, 1); 487 488 return 0; 489} 490 491/* Some common code for IPv4/IPv6 */ 492static struct net_device *setup_pre_routing(struct sk_buff *skb) 493{ 494 struct nf_bridge_info *nf_bridge = skb->nf_bridge; 495 496 if (skb->pkt_type == PACKET_OTHERHOST) { 497 skb->pkt_type = PACKET_HOST; 498 nf_bridge->mask |= BRNF_PKT_TYPE; 499 } 500 501 nf_bridge->mask |= BRNF_NF_BRIDGE_PREROUTING; 502 nf_bridge->physindev = skb->dev; 503 skb->dev = bridge_parent(skb->dev); 504 if (skb->protocol == htons(ETH_P_8021Q)) 505 nf_bridge->mask |= BRNF_8021Q; 506 else if (skb->protocol == htons(ETH_P_PPP_SES)) 507 nf_bridge->mask |= BRNF_PPPoE; 508 509 return skb->dev; 510} 511 512/* We only check the length. A bridge shouldn't do any hop-by-hop stuff anyway */ 513static int check_hbh_len(struct sk_buff *skb) 514{ 515 unsigned char *raw = (u8 *)(ipv6_hdr(skb) + 1); 516 u32 pkt_len; 517 const unsigned char *nh = skb_network_header(skb); 518 int off = raw - nh; 519 int len = (raw[1] + 1) << 3; 520 521 if ((raw + len) - skb->data > skb_headlen(skb)) 522 goto bad; 523 524 off += 2; 525 len -= 2; 526 527 while (len > 0) { 528 int optlen = nh[off + 1] + 2; 529 530 switch (nh[off]) { 531 case IPV6_TLV_PAD0: 532 optlen = 1; 533 break; 534 535 case IPV6_TLV_PADN: 536 break; 537 538 case IPV6_TLV_JUMBO: 539 if (nh[off + 1] != 4 || (off & 3) != 2) 540 goto bad; 541 pkt_len = ntohl(*(__be32 *) (nh + off + 2)); 542 if (pkt_len <= IPV6_MAXPLEN || 543 ipv6_hdr(skb)->payload_len) 544 goto bad; 545 if (pkt_len > skb->len - sizeof(struct ipv6hdr)) 546 goto bad; 547 if (pskb_trim_rcsum(skb, 548 pkt_len + sizeof(struct ipv6hdr))) 549 goto bad; 550 nh = skb_network_header(skb); 551 break; 552 default: 553 if (optlen > len) 554 goto bad; 555 break; 556 } 557 off += optlen; 558 len -= optlen; 559 } 560 if (len == 0) 561 return 0; 562bad: 563 return -1; 564 565} 566 567/* Replicate the checks that IPv6 does on packet reception and pass the packet 568 * to ip6tables, which doesn't support NAT, so things are fairly simple. */ 569static unsigned int br_nf_pre_routing_ipv6(unsigned int hook, 570 struct sk_buff *skb, 571 const struct net_device *in, 572 const struct net_device *out, 573 int (*okfn)(struct sk_buff *)) 574{ 575 const struct ipv6hdr *hdr; 576 u32 pkt_len; 577 578 if (skb->len < sizeof(struct ipv6hdr)) 579 return NF_DROP; 580 581 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) 582 return NF_DROP; 583 584 hdr = ipv6_hdr(skb); 585 586 if (hdr->version != 6) 587 return NF_DROP; 588 589 pkt_len = ntohs(hdr->payload_len); 590 591 if (pkt_len || hdr->nexthdr != NEXTHDR_HOP) { 592 if (pkt_len + sizeof(struct ipv6hdr) > skb->len) 593 return NF_DROP; 594 if (pskb_trim_rcsum(skb, pkt_len + sizeof(struct ipv6hdr))) 595 return NF_DROP; 596 } 597 if (hdr->nexthdr == NEXTHDR_HOP && check_hbh_len(skb)) 598 return NF_DROP; 599 600 nf_bridge_put(skb->nf_bridge); 601 if (!nf_bridge_alloc(skb)) 602 return NF_DROP; 603 if (!setup_pre_routing(skb)) 604 return NF_DROP; 605 606 skb->protocol = htons(ETH_P_IPV6); 607 NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, skb, skb->dev, NULL, 608 br_nf_pre_routing_finish_ipv6); 609 610 return NF_STOLEN; 611} 612 613/* Direct IPv6 traffic to br_nf_pre_routing_ipv6. 614 * Replicate the checks that IPv4 does on packet reception. 615 * Set skb->dev to the bridge device (i.e. parent of the 616 * receiving device) to make netfilter happy, the REDIRECT 617 * target in particular. Save the original destination IP 618 * address to be able to detect DNAT afterwards. */ 619static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff *skb, 620 const struct net_device *in, 621 const struct net_device *out, 622 int (*okfn)(struct sk_buff *)) 623{ 624 struct net_bridge_port *p; 625 struct net_bridge *br; 626 __u32 len = nf_bridge_encap_header_len(skb); 627 628 if (unlikely(!pskb_may_pull(skb, len))) 629 return NF_DROP; 630 631 p = br_port_get_rcu(in); 632 if (p == NULL) 633 return NF_DROP; 634 br = p->br; 635 636 if (skb->protocol == htons(ETH_P_IPV6) || IS_VLAN_IPV6(skb) || 637 IS_PPPOE_IPV6(skb)) { 638 if (!brnf_call_ip6tables && !br->nf_call_ip6tables) 639 return NF_ACCEPT; 640 641 nf_bridge_pull_encap_header_rcsum(skb); 642 return br_nf_pre_routing_ipv6(hook, skb, in, out, okfn); 643 } 644 645 if (!brnf_call_iptables && !br->nf_call_iptables) 646 return NF_ACCEPT; 647 648 if (skb->protocol != htons(ETH_P_IP) && !IS_VLAN_IP(skb) && 649 !IS_PPPOE_IP(skb)) 650 return NF_ACCEPT; 651 652 nf_bridge_pull_encap_header_rcsum(skb); 653 654 if (br_parse_ip_options(skb)) 655 return NF_DROP; 656 657 nf_bridge_put(skb->nf_bridge); 658 if (!nf_bridge_alloc(skb)) 659 return NF_DROP; 660 if (!setup_pre_routing(skb)) 661 return NF_DROP; 662 store_orig_dstaddr(skb); 663 skb->protocol = htons(ETH_P_IP); 664 665 NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, skb, skb->dev, NULL, 666 br_nf_pre_routing_finish); 667 668 return NF_STOLEN; 669} 670 671 672/* PF_BRIDGE/LOCAL_IN ************************************************/ 673/* The packet is locally destined, which requires a real 674 * dst_entry, so detach the fake one. On the way up, the 675 * packet would pass through PRE_ROUTING again (which already 676 * took place when the packet entered the bridge), but we 677 * register an IPv4 PRE_ROUTING 'sabotage' hook that will 678 * prevent this from happening. */ 679static unsigned int br_nf_local_in(unsigned int hook, struct sk_buff *skb, 680 const struct net_device *in, 681 const struct net_device *out, 682 int (*okfn)(struct sk_buff *)) 683{ 684 struct rtable *rt = skb_rtable(skb); 685 686 if (rt && rt == bridge_parent_rtable(in)) 687 skb_dst_drop(skb); 688 689 return NF_ACCEPT; 690} 691 692/* PF_BRIDGE/FORWARD *************************************************/ 693static int br_nf_forward_finish(struct sk_buff *skb) 694{ 695 struct nf_bridge_info *nf_bridge = skb->nf_bridge; 696 struct net_device *in; 697 698 if (skb->protocol != htons(ETH_P_ARP) && !IS_VLAN_ARP(skb)) { 699 in = nf_bridge->physindev; 700 if (nf_bridge->mask & BRNF_PKT_TYPE) { 701 skb->pkt_type = PACKET_OTHERHOST; 702 nf_bridge->mask ^= BRNF_PKT_TYPE; 703 } 704 nf_bridge_update_protocol(skb); 705 } else { 706 in = *((struct net_device **)(skb->cb)); 707 } 708 nf_bridge_push_encap_header(skb); 709 710 NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_FORWARD, skb, in, 711 skb->dev, br_forward_finish, 1); 712 return 0; 713} 714 715/* This is the 'purely bridged' case. For IP, we pass the packet to 716 * netfilter with indev and outdev set to the bridge device, 717 * but we are still able to filter on the 'real' indev/outdev 718 * because of the physdev module. For ARP, indev and outdev are the 719 * bridge ports. */ 720static unsigned int br_nf_forward_ip(unsigned int hook, struct sk_buff *skb, 721 const struct net_device *in, 722 const struct net_device *out, 723 int (*okfn)(struct sk_buff *)) 724{ 725 struct nf_bridge_info *nf_bridge; 726 struct net_device *parent; 727 u_int8_t pf; 728 729 if (!skb->nf_bridge) 730 return NF_ACCEPT; 731 732 /* Need exclusive nf_bridge_info since we might have multiple 733 * different physoutdevs. */ 734 if (!nf_bridge_unshare(skb)) 735 return NF_DROP; 736 737 parent = bridge_parent(out); 738 if (!parent) 739 return NF_DROP; 740 741 if (skb->protocol == htons(ETH_P_IP) || IS_VLAN_IP(skb) || 742 IS_PPPOE_IP(skb)) 743 pf = PF_INET; 744 else if (skb->protocol == htons(ETH_P_IPV6) || IS_VLAN_IPV6(skb) || 745 IS_PPPOE_IPV6(skb)) 746 pf = PF_INET6; 747 else 748 return NF_ACCEPT; 749 750 nf_bridge_pull_encap_header(skb); 751 752 nf_bridge = skb->nf_bridge; 753 if (skb->pkt_type == PACKET_OTHERHOST) { 754 skb->pkt_type = PACKET_HOST; 755 nf_bridge->mask |= BRNF_PKT_TYPE; 756 } 757 758 if (pf == PF_INET && br_parse_ip_options(skb)) 759 return NF_DROP; 760 761 /* The physdev module checks on this */ 762 nf_bridge->mask |= BRNF_BRIDGED; 763 nf_bridge->physoutdev = skb->dev; 764 if (pf == PF_INET) 765 skb->protocol = htons(ETH_P_IP); 766 else 767 skb->protocol = htons(ETH_P_IPV6); 768 769 NF_HOOK(pf, NF_INET_FORWARD, skb, bridge_parent(in), parent, 770 br_nf_forward_finish); 771 772 return NF_STOLEN; 773} 774 775static unsigned int br_nf_forward_arp(unsigned int hook, struct sk_buff *skb, 776 const struct net_device *in, 777 const struct net_device *out, 778 int (*okfn)(struct sk_buff *)) 779{ 780 struct net_bridge_port *p; 781 struct net_bridge *br; 782 struct net_device **d = (struct net_device **)(skb->cb); 783 784 p = br_port_get_rcu(out); 785 if (p == NULL) 786 return NF_ACCEPT; 787 br = p->br; 788 789 if (!brnf_call_arptables && !br->nf_call_arptables) 790 return NF_ACCEPT; 791 792 if (skb->protocol != htons(ETH_P_ARP)) { 793 if (!IS_VLAN_ARP(skb)) 794 return NF_ACCEPT; 795 nf_bridge_pull_encap_header(skb); 796 } 797 798 if (arp_hdr(skb)->ar_pln != 4) { 799 if (IS_VLAN_ARP(skb)) 800 nf_bridge_push_encap_header(skb); 801 return NF_ACCEPT; 802 } 803 *d = (struct net_device *)in; 804 NF_HOOK(NFPROTO_ARP, NF_ARP_FORWARD, skb, (struct net_device *)in, 805 (struct net_device *)out, br_nf_forward_finish); 806 807 return NF_STOLEN; 808} 809 810#if defined(CONFIG_NF_CONNTRACK_IPV4) || defined(CONFIG_NF_CONNTRACK_IPV4_MODULE) 811static int br_nf_dev_queue_xmit(struct sk_buff *skb) 812{ 813 int ret; 814 815 if (skb->nfct != NULL && skb->protocol == htons(ETH_P_IP) && 816 skb->len + nf_bridge_mtu_reduction(skb) > skb->dev->mtu && 817 !skb_is_gso(skb)) { 818 if (br_parse_ip_options(skb)) 819 /* Drop invalid packet */ 820 return NF_DROP; 821 ret = ip_fragment(skb, br_dev_queue_push_xmit); 822 } else 823 ret = br_dev_queue_push_xmit(skb); 824 825 return ret; 826} 827#else 828static int br_nf_dev_queue_xmit(struct sk_buff *skb) 829{ 830 return br_dev_queue_push_xmit(skb); 831} 832#endif 833 834/* PF_BRIDGE/POST_ROUTING ********************************************/ 835static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff *skb, 836 const struct net_device *in, 837 const struct net_device *out, 838 int (*okfn)(struct sk_buff *)) 839{ 840 struct nf_bridge_info *nf_bridge = skb->nf_bridge; 841 struct net_device *realoutdev = bridge_parent(skb->dev); 842 u_int8_t pf; 843 844 if (!nf_bridge || !(nf_bridge->mask & BRNF_BRIDGED)) 845 return NF_ACCEPT; 846 847 if (!realoutdev) 848 return NF_DROP; 849 850 if (skb->protocol == htons(ETH_P_IP) || IS_VLAN_IP(skb) || 851 IS_PPPOE_IP(skb)) 852 pf = PF_INET; 853 else if (skb->protocol == htons(ETH_P_IPV6) || IS_VLAN_IPV6(skb) || 854 IS_PPPOE_IPV6(skb)) 855 pf = PF_INET6; 856 else 857 return NF_ACCEPT; 858 859 /* We assume any code from br_dev_queue_push_xmit onwards doesn't care 860 * about the value of skb->pkt_type. */ 861 if (skb->pkt_type == PACKET_OTHERHOST) { 862 skb->pkt_type = PACKET_HOST; 863 nf_bridge->mask |= BRNF_PKT_TYPE; 864 } 865 866 nf_bridge_pull_encap_header(skb); 867 nf_bridge_save_header(skb); 868 if (pf == PF_INET) 869 skb->protocol = htons(ETH_P_IP); 870 else 871 skb->protocol = htons(ETH_P_IPV6); 872 873 NF_HOOK(pf, NF_INET_POST_ROUTING, skb, NULL, realoutdev, 874 br_nf_dev_queue_xmit); 875 876 return NF_STOLEN; 877} 878 879/* IP/SABOTAGE *****************************************************/ 880/* Don't hand locally destined packets to PF_INET(6)/PRE_ROUTING 881 * for the second time. */ 882static unsigned int ip_sabotage_in(unsigned int hook, struct sk_buff *skb, 883 const struct net_device *in, 884 const struct net_device *out, 885 int (*okfn)(struct sk_buff *)) 886{ 887 if (skb->nf_bridge && 888 !(skb->nf_bridge->mask & BRNF_NF_BRIDGE_PREROUTING)) { 889 return NF_STOP; 890 } 891 892 return NF_ACCEPT; 893} 894 895/* For br_nf_post_routing, we need (prio = NF_BR_PRI_LAST), because 896 * br_dev_queue_push_xmit is called afterwards */ 897static struct nf_hook_ops br_nf_ops[] __read_mostly = { 898 { 899 .hook = br_nf_pre_routing, 900 .owner = THIS_MODULE, 901 .pf = PF_BRIDGE, 902 .hooknum = NF_BR_PRE_ROUTING, 903 .priority = NF_BR_PRI_BRNF, 904 }, 905 { 906 .hook = br_nf_local_in, 907 .owner = THIS_MODULE, 908 .pf = PF_BRIDGE, 909 .hooknum = NF_BR_LOCAL_IN, 910 .priority = NF_BR_PRI_BRNF, 911 }, 912 { 913 .hook = br_nf_forward_ip, 914 .owner = THIS_MODULE, 915 .pf = PF_BRIDGE, 916 .hooknum = NF_BR_FORWARD, 917 .priority = NF_BR_PRI_BRNF - 1, 918 }, 919 { 920 .hook = br_nf_forward_arp, 921 .owner = THIS_MODULE, 922 .pf = PF_BRIDGE, 923 .hooknum = NF_BR_FORWARD, 924 .priority = NF_BR_PRI_BRNF, 925 }, 926 { 927 .hook = br_nf_post_routing, 928 .owner = THIS_MODULE, 929 .pf = PF_BRIDGE, 930 .hooknum = NF_BR_POST_ROUTING, 931 .priority = NF_BR_PRI_LAST, 932 }, 933 { 934 .hook = ip_sabotage_in, 935 .owner = THIS_MODULE, 936 .pf = PF_INET, 937 .hooknum = NF_INET_PRE_ROUTING, 938 .priority = NF_IP_PRI_FIRST, 939 }, 940 { 941 .hook = ip_sabotage_in, 942 .owner = THIS_MODULE, 943 .pf = PF_INET6, 944 .hooknum = NF_INET_PRE_ROUTING, 945 .priority = NF_IP6_PRI_FIRST, 946 }, 947}; 948 949#ifdef CONFIG_SYSCTL 950static 951int brnf_sysctl_call_tables(ctl_table * ctl, int write, 952 void __user * buffer, size_t * lenp, loff_t * ppos) 953{ 954 int ret; 955 956 ret = proc_dointvec(ctl, write, buffer, lenp, ppos); 957 958 if (write && *(int *)(ctl->data)) 959 *(int *)(ctl->data) = 1; 960 return ret; 961} 962 963static ctl_table brnf_table[] = { 964 { 965 .procname = "bridge-nf-call-arptables", 966 .data = &brnf_call_arptables, 967 .maxlen = sizeof(int), 968 .mode = 0644, 969 .proc_handler = brnf_sysctl_call_tables, 970 }, 971 { 972 .procname = "bridge-nf-call-iptables", 973 .data = &brnf_call_iptables, 974 .maxlen = sizeof(int), 975 .mode = 0644, 976 .proc_handler = brnf_sysctl_call_tables, 977 }, 978 { 979 .procname = "bridge-nf-call-ip6tables", 980 .data = &brnf_call_ip6tables, 981 .maxlen = sizeof(int), 982 .mode = 0644, 983 .proc_handler = brnf_sysctl_call_tables, 984 }, 985 { 986 .procname = "bridge-nf-filter-vlan-tagged", 987 .data = &brnf_filter_vlan_tagged, 988 .maxlen = sizeof(int), 989 .mode = 0644, 990 .proc_handler = brnf_sysctl_call_tables, 991 }, 992 { 993 .procname = "bridge-nf-filter-pppoe-tagged", 994 .data = &brnf_filter_pppoe_tagged, 995 .maxlen = sizeof(int), 996 .mode = 0644, 997 .proc_handler = brnf_sysctl_call_tables, 998 }, 999 { } 1000}; 1001 1002static struct ctl_path brnf_path[] = { 1003 { .procname = "net", }, 1004 { .procname = "bridge", }, 1005 { } 1006}; 1007#endif 1008 1009int __init br_netfilter_init(void) 1010{ 1011 int ret; 1012 1013 ret = dst_entries_init(&fake_dst_ops); 1014 if (ret < 0) 1015 return ret; 1016 1017 ret = nf_register_hooks(br_nf_ops, ARRAY_SIZE(br_nf_ops)); 1018 if (ret < 0) { 1019 dst_entries_destroy(&fake_dst_ops); 1020 return ret; 1021 } 1022#ifdef CONFIG_SYSCTL 1023 brnf_sysctl_header = register_sysctl_paths(brnf_path, brnf_table); 1024 if (brnf_sysctl_header == NULL) { 1025 printk(KERN_WARNING 1026 "br_netfilter: can't register to sysctl.\n"); 1027 nf_unregister_hooks(br_nf_ops, ARRAY_SIZE(br_nf_ops)); 1028 dst_entries_destroy(&fake_dst_ops); 1029 return -ENOMEM; 1030 } 1031#endif 1032 printk(KERN_NOTICE "Bridge firewalling registered\n"); 1033 return 0; 1034} 1035 1036void br_netfilter_fini(void) 1037{ 1038 nf_unregister_hooks(br_nf_ops, ARRAY_SIZE(br_nf_ops)); 1039#ifdef CONFIG_SYSCTL 1040 unregister_sysctl_table(brnf_sysctl_header); 1041#endif 1042 dst_entries_destroy(&fake_dst_ops); 1043}