Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

net/ipv4: VTI support new module for ip_vti.

New VTI tunnel kernel module, Kconfig and Makefile changes.

Signed-off-by: Saurabh Mohan <saurabh.mohan@vyatta.com>
Reviewed-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Saurabh and committed by
David S. Miller
1181412c eb8637cd

+982
+14
include/linux/if_tunnel.h
··· 80 80 81 81 #define IFLA_GRE_MAX (__IFLA_GRE_MAX - 1) 82 82 83 + /* VTI-mode i_flags */ 84 + #define VTI_ISVTI 0x0001 85 + 86 + enum { 87 + IFLA_VTI_UNSPEC, 88 + IFLA_VTI_LINK, 89 + IFLA_VTI_IKEY, 90 + IFLA_VTI_OKEY, 91 + IFLA_VTI_LOCAL, 92 + IFLA_VTI_REMOTE, 93 + __IFLA_VTI_MAX, 94 + }; 95 + 96 + #define IFLA_VTI_MAX (__IFLA_VTI_MAX - 1) 83 97 #endif /* _IF_TUNNEL_H_ */
+11
net/ipv4/Kconfig
··· 310 310 311 311 If unsure, say N. 312 312 313 + config NET_IPVTI 314 + tristate "Virtual (secure) IP: tunneling" 315 + select INET_TUNNEL 316 + depends on INET_XFRM_MODE_TUNNEL 317 + ---help--- 318 + Tunneling means encapsulating data of one protocol type within 319 + another protocol and sending it over a channel that understands the 320 + encapsulating protocol. This can be used with xfrm mode tunnel to give 321 + the notion of a secure tunnel for IPSEC and then use routing protocol 322 + on top. 323 + 313 324 config INET_AH 314 325 tristate "IP: AH transformation" 315 326 select XFRM_ALGO
+1
net/ipv4/Makefile
··· 20 20 obj-$(CONFIG_NET_IPIP) += ipip.o 21 21 obj-$(CONFIG_NET_IPGRE_DEMUX) += gre.o 22 22 obj-$(CONFIG_NET_IPGRE) += ip_gre.o 23 + obj-$(CONFIG_NET_IPVTI) += ip_vti.o 23 24 obj-$(CONFIG_SYN_COOKIES) += syncookies.o 24 25 obj-$(CONFIG_INET_AH) += ah4.o 25 26 obj-$(CONFIG_INET_ESP) += esp4.o
+956
net/ipv4/ip_vti.c
··· 1 + /* 2 + * Linux NET3: IP/IP protocol decoder modified to support 3 + * virtual tunnel interface 4 + * 5 + * Authors: 6 + * Saurabh Mohan (saurabh.mohan@vyatta.com) 05/07/2012 7 + * 8 + * This program is free software; you can redistribute it and/or 9 + * modify it under the terms of the GNU General Public License 10 + * as published by the Free Software Foundation; either version 11 + * 2 of the License, or (at your option) any later version. 12 + * 13 + */ 14 + 15 + /* 16 + This version of net/ipv4/ip_vti.c is cloned of net/ipv4/ipip.c 17 + 18 + For comments look at net/ipv4/ip_gre.c --ANK 19 + */ 20 + 21 + 22 + #include <linux/capability.h> 23 + #include <linux/module.h> 24 + #include <linux/types.h> 25 + #include <linux/kernel.h> 26 + #include <linux/uaccess.h> 27 + #include <linux/skbuff.h> 28 + #include <linux/netdevice.h> 29 + #include <linux/in.h> 30 + #include <linux/tcp.h> 31 + #include <linux/udp.h> 32 + #include <linux/if_arp.h> 33 + #include <linux/mroute.h> 34 + #include <linux/init.h> 35 + #include <linux/netfilter_ipv4.h> 36 + #include <linux/if_ether.h> 37 + 38 + #include <net/sock.h> 39 + #include <net/ip.h> 40 + #include <net/icmp.h> 41 + #include <net/ipip.h> 42 + #include <net/inet_ecn.h> 43 + #include <net/xfrm.h> 44 + #include <net/net_namespace.h> 45 + #include <net/netns/generic.h> 46 + 47 + #define HASH_SIZE 16 48 + #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&(HASH_SIZE-1)) 49 + 50 + static struct rtnl_link_ops vti_link_ops __read_mostly; 51 + 52 + static int vti_net_id __read_mostly; 53 + struct vti_net { 54 + struct ip_tunnel __rcu *tunnels_r_l[HASH_SIZE]; 55 + struct ip_tunnel __rcu *tunnels_r[HASH_SIZE]; 56 + struct ip_tunnel __rcu *tunnels_l[HASH_SIZE]; 57 + struct ip_tunnel __rcu *tunnels_wc[1]; 58 + struct ip_tunnel **tunnels[4]; 59 + 60 + struct net_device *fb_tunnel_dev; 61 + }; 62 + 63 + static int vti_fb_tunnel_init(struct net_device *dev); 64 + static int vti_tunnel_init(struct net_device *dev); 65 + static void vti_tunnel_setup(struct net_device *dev); 66 + static void vti_dev_free(struct net_device *dev); 67 + static int vti_tunnel_bind_dev(struct net_device *dev); 68 + 69 + /* Locking : hash tables are protected by RCU and RTNL */ 70 + 71 + #define for_each_ip_tunnel_rcu(start) \ 72 + for (t = rcu_dereference(start); t; t = rcu_dereference(t->next)) 73 + 74 + /* often modified stats are per cpu, other are shared (netdev->stats) */ 75 + struct pcpu_tstats { 76 + u64 rx_packets; 77 + u64 rx_bytes; 78 + u64 tx_packets; 79 + u64 tx_bytes; 80 + struct u64_stats_sync syncp; 81 + }; 82 + 83 + #define VTI_XMIT(stats1, stats2) do { \ 84 + int err; \ 85 + int pkt_len = skb->len; \ 86 + err = dst_output(skb); \ 87 + if (net_xmit_eval(err) == 0) { \ 88 + u64_stats_update_begin(&(stats1)->syncp); \ 89 + (stats1)->tx_bytes += pkt_len; \ 90 + (stats1)->tx_packets++; \ 91 + u64_stats_update_end(&(stats1)->syncp); \ 92 + } else { \ 93 + (stats2)->tx_errors++; \ 94 + (stats2)->tx_aborted_errors++; \ 95 + } \ 96 + } while (0) 97 + 98 + 99 + static struct rtnl_link_stats64 *vti_get_stats64(struct net_device *dev, 100 + struct rtnl_link_stats64 *tot) 101 + { 102 + int i; 103 + 104 + for_each_possible_cpu(i) { 105 + const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i); 106 + u64 rx_packets, rx_bytes, tx_packets, tx_bytes; 107 + unsigned int start; 108 + 109 + do { 110 + start = u64_stats_fetch_begin_bh(&tstats->syncp); 111 + rx_packets = tstats->rx_packets; 112 + tx_packets = tstats->tx_packets; 113 + rx_bytes = tstats->rx_bytes; 114 + tx_bytes = tstats->tx_bytes; 115 + } while (u64_stats_fetch_retry_bh(&tstats->syncp, start)); 116 + 117 + tot->rx_packets += rx_packets; 118 + tot->tx_packets += tx_packets; 119 + tot->rx_bytes += rx_bytes; 120 + tot->tx_bytes += tx_bytes; 121 + } 122 + 123 + tot->multicast = dev->stats.multicast; 124 + tot->rx_crc_errors = dev->stats.rx_crc_errors; 125 + tot->rx_fifo_errors = dev->stats.rx_fifo_errors; 126 + tot->rx_length_errors = dev->stats.rx_length_errors; 127 + tot->rx_errors = dev->stats.rx_errors; 128 + tot->tx_fifo_errors = dev->stats.tx_fifo_errors; 129 + tot->tx_carrier_errors = dev->stats.tx_carrier_errors; 130 + tot->tx_dropped = dev->stats.tx_dropped; 131 + tot->tx_aborted_errors = dev->stats.tx_aborted_errors; 132 + tot->tx_errors = dev->stats.tx_errors; 133 + 134 + return tot; 135 + } 136 + 137 + static struct ip_tunnel *vti_tunnel_lookup(struct net *net, 138 + __be32 remote, __be32 local) 139 + { 140 + unsigned h0 = HASH(remote); 141 + unsigned h1 = HASH(local); 142 + struct ip_tunnel *t; 143 + struct vti_net *ipn = net_generic(net, vti_net_id); 144 + 145 + for_each_ip_tunnel_rcu(ipn->tunnels_r_l[h0 ^ h1]) 146 + if (local == t->parms.iph.saddr && 147 + remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP)) 148 + return t; 149 + for_each_ip_tunnel_rcu(ipn->tunnels_r[h0]) 150 + if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP)) 151 + return t; 152 + 153 + for_each_ip_tunnel_rcu(ipn->tunnels_l[h1]) 154 + if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP)) 155 + return t; 156 + 157 + for_each_ip_tunnel_rcu(ipn->tunnels_wc[0]) 158 + if (t && (t->dev->flags&IFF_UP)) 159 + return t; 160 + return NULL; 161 + } 162 + 163 + static struct ip_tunnel **__vti_bucket(struct vti_net *ipn, 164 + struct ip_tunnel_parm *parms) 165 + { 166 + __be32 remote = parms->iph.daddr; 167 + __be32 local = parms->iph.saddr; 168 + unsigned h = 0; 169 + int prio = 0; 170 + 171 + if (remote) { 172 + prio |= 2; 173 + h ^= HASH(remote); 174 + } 175 + if (local) { 176 + prio |= 1; 177 + h ^= HASH(local); 178 + } 179 + return &ipn->tunnels[prio][h]; 180 + } 181 + 182 + static inline struct ip_tunnel **vti_bucket(struct vti_net *ipn, 183 + struct ip_tunnel *t) 184 + { 185 + return __vti_bucket(ipn, &t->parms); 186 + } 187 + 188 + static void vti_tunnel_unlink(struct vti_net *ipn, struct ip_tunnel *t) 189 + { 190 + struct ip_tunnel __rcu **tp; 191 + struct ip_tunnel *iter; 192 + 193 + for (tp = vti_bucket(ipn, t); 194 + (iter = rtnl_dereference(*tp)) != NULL; 195 + tp = &iter->next) { 196 + if (t == iter) { 197 + rcu_assign_pointer(*tp, t->next); 198 + break; 199 + } 200 + } 201 + } 202 + 203 + static void vti_tunnel_link(struct vti_net *ipn, struct ip_tunnel *t) 204 + { 205 + struct ip_tunnel __rcu **tp = vti_bucket(ipn, t); 206 + 207 + rcu_assign_pointer(t->next, rtnl_dereference(*tp)); 208 + rcu_assign_pointer(*tp, t); 209 + } 210 + 211 + static struct ip_tunnel *vti_tunnel_locate(struct net *net, 212 + struct ip_tunnel_parm *parms, 213 + int create) 214 + { 215 + __be32 remote = parms->iph.daddr; 216 + __be32 local = parms->iph.saddr; 217 + struct ip_tunnel *t, *nt; 218 + struct ip_tunnel __rcu **tp; 219 + struct net_device *dev; 220 + char name[IFNAMSIZ]; 221 + struct vti_net *ipn = net_generic(net, vti_net_id); 222 + 223 + for (tp = __vti_bucket(ipn, parms); 224 + (t = rtnl_dereference(*tp)) != NULL; 225 + tp = &t->next) { 226 + if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) 227 + return t; 228 + } 229 + if (!create) 230 + return NULL; 231 + 232 + if (parms->name[0]) 233 + strlcpy(name, parms->name, IFNAMSIZ); 234 + else 235 + strcpy(name, "vti%d"); 236 + 237 + dev = alloc_netdev(sizeof(*t), name, vti_tunnel_setup); 238 + if (dev == NULL) 239 + return NULL; 240 + 241 + dev_net_set(dev, net); 242 + 243 + nt = netdev_priv(dev); 244 + nt->parms = *parms; 245 + dev->rtnl_link_ops = &vti_link_ops; 246 + 247 + vti_tunnel_bind_dev(dev); 248 + 249 + if (register_netdevice(dev) < 0) 250 + goto failed_free; 251 + 252 + dev_hold(dev); 253 + vti_tunnel_link(ipn, nt); 254 + return nt; 255 + 256 + failed_free: 257 + free_netdev(dev); 258 + return NULL; 259 + } 260 + 261 + static void vti_tunnel_uninit(struct net_device *dev) 262 + { 263 + struct net *net = dev_net(dev); 264 + struct vti_net *ipn = net_generic(net, vti_net_id); 265 + 266 + vti_tunnel_unlink(ipn, netdev_priv(dev)); 267 + dev_put(dev); 268 + } 269 + 270 + static int vti_err(struct sk_buff *skb, u32 info) 271 + { 272 + 273 + /* All the routers (except for Linux) return only 274 + * 8 bytes of packet payload. It means, that precise relaying of 275 + * ICMP in the real Internet is absolutely infeasible. 276 + */ 277 + struct iphdr *iph = (struct iphdr *)skb->data; 278 + const int type = icmp_hdr(skb)->type; 279 + const int code = icmp_hdr(skb)->code; 280 + struct ip_tunnel *t; 281 + int err; 282 + 283 + switch (type) { 284 + default: 285 + case ICMP_PARAMETERPROB: 286 + return 0; 287 + 288 + case ICMP_DEST_UNREACH: 289 + switch (code) { 290 + case ICMP_SR_FAILED: 291 + case ICMP_PORT_UNREACH: 292 + /* Impossible event. */ 293 + return 0; 294 + default: 295 + /* All others are translated to HOST_UNREACH. */ 296 + break; 297 + } 298 + break; 299 + case ICMP_TIME_EXCEEDED: 300 + if (code != ICMP_EXC_TTL) 301 + return 0; 302 + break; 303 + } 304 + 305 + err = -ENOENT; 306 + 307 + rcu_read_lock(); 308 + t = vti_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr); 309 + if (t == NULL) 310 + goto out; 311 + 312 + if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { 313 + ipv4_update_pmtu(skb, dev_net(skb->dev), info, 314 + t->parms.link, 0, IPPROTO_IPIP, 0); 315 + err = 0; 316 + goto out; 317 + } 318 + 319 + err = 0; 320 + if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED) 321 + goto out; 322 + 323 + if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO)) 324 + t->err_count++; 325 + else 326 + t->err_count = 1; 327 + t->err_time = jiffies; 328 + out: 329 + rcu_read_unlock(); 330 + return err; 331 + } 332 + 333 + /* We dont digest the packet therefore let the packet pass */ 334 + static int vti_rcv(struct sk_buff *skb) 335 + { 336 + struct ip_tunnel *tunnel; 337 + const struct iphdr *iph = ip_hdr(skb); 338 + 339 + rcu_read_lock(); 340 + tunnel = vti_tunnel_lookup(dev_net(skb->dev), iph->saddr, iph->daddr); 341 + if (tunnel != NULL) { 342 + struct pcpu_tstats *tstats; 343 + 344 + tstats = this_cpu_ptr(tunnel->dev->tstats); 345 + u64_stats_update_begin(&tstats->syncp); 346 + tstats->rx_packets++; 347 + tstats->rx_bytes += skb->len; 348 + u64_stats_update_end(&tstats->syncp); 349 + 350 + skb->dev = tunnel->dev; 351 + rcu_read_unlock(); 352 + return 1; 353 + } 354 + rcu_read_unlock(); 355 + 356 + return -1; 357 + } 358 + 359 + /* This function assumes it is being called from dev_queue_xmit() 360 + * and that skb is filled properly by that function. 361 + */ 362 + 363 + static netdev_tx_t vti_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) 364 + { 365 + struct ip_tunnel *tunnel = netdev_priv(dev); 366 + struct pcpu_tstats *tstats; 367 + struct iphdr *tiph = &tunnel->parms.iph; 368 + u8 tos; 369 + struct rtable *rt; /* Route to the other host */ 370 + struct net_device *tdev; /* Device to other host */ 371 + struct iphdr *old_iph = ip_hdr(skb); 372 + __be32 dst = tiph->daddr; 373 + struct flowi4 fl4; 374 + 375 + if (skb->protocol != htons(ETH_P_IP)) 376 + goto tx_error; 377 + 378 + tos = old_iph->tos; 379 + 380 + memset(&fl4, 0, sizeof(fl4)); 381 + flowi4_init_output(&fl4, tunnel->parms.link, 382 + htonl(tunnel->parms.i_key), RT_TOS(tos), 383 + RT_SCOPE_UNIVERSE, 384 + IPPROTO_IPIP, 0, 385 + dst, tiph->saddr, 0, 0); 386 + rt = ip_route_output_key(dev_net(dev), &fl4); 387 + if (IS_ERR(rt)) { 388 + dev->stats.tx_carrier_errors++; 389 + goto tx_error_icmp; 390 + } 391 + /* if there is no transform then this tunnel is not functional. 392 + * Or if the xfrm is not mode tunnel. 393 + */ 394 + if (!rt->dst.xfrm || 395 + rt->dst.xfrm->props.mode != XFRM_MODE_TUNNEL) { 396 + dev->stats.tx_carrier_errors++; 397 + goto tx_error_icmp; 398 + } 399 + tdev = rt->dst.dev; 400 + 401 + if (tdev == dev) { 402 + ip_rt_put(rt); 403 + dev->stats.collisions++; 404 + goto tx_error; 405 + } 406 + 407 + if (tunnel->err_count > 0) { 408 + if (time_before(jiffies, 409 + tunnel->err_time + IPTUNNEL_ERR_TIMEO)) { 410 + tunnel->err_count--; 411 + dst_link_failure(skb); 412 + } else 413 + tunnel->err_count = 0; 414 + } 415 + 416 + IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED | 417 + IPSKB_REROUTED); 418 + skb_dst_drop(skb); 419 + skb_dst_set(skb, &rt->dst); 420 + nf_reset(skb); 421 + skb->dev = skb_dst(skb)->dev; 422 + 423 + tstats = this_cpu_ptr(dev->tstats); 424 + VTI_XMIT(tstats, &dev->stats); 425 + return NETDEV_TX_OK; 426 + 427 + tx_error_icmp: 428 + dst_link_failure(skb); 429 + tx_error: 430 + dev->stats.tx_errors++; 431 + dev_kfree_skb(skb); 432 + return NETDEV_TX_OK; 433 + } 434 + 435 + static int vti_tunnel_bind_dev(struct net_device *dev) 436 + { 437 + struct net_device *tdev = NULL; 438 + struct ip_tunnel *tunnel; 439 + struct iphdr *iph; 440 + 441 + tunnel = netdev_priv(dev); 442 + iph = &tunnel->parms.iph; 443 + 444 + if (iph->daddr) { 445 + struct rtable *rt; 446 + struct flowi4 fl4; 447 + memset(&fl4, 0, sizeof(fl4)); 448 + flowi4_init_output(&fl4, tunnel->parms.link, 449 + htonl(tunnel->parms.i_key), 450 + RT_TOS(iph->tos), RT_SCOPE_UNIVERSE, 451 + IPPROTO_IPIP, 0, 452 + iph->daddr, iph->saddr, 0, 0); 453 + rt = ip_route_output_key(dev_net(dev), &fl4); 454 + if (!IS_ERR(rt)) { 455 + tdev = rt->dst.dev; 456 + ip_rt_put(rt); 457 + } 458 + dev->flags |= IFF_POINTOPOINT; 459 + } 460 + 461 + if (!tdev && tunnel->parms.link) 462 + tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link); 463 + 464 + if (tdev) { 465 + dev->hard_header_len = tdev->hard_header_len + 466 + sizeof(struct iphdr); 467 + dev->mtu = tdev->mtu; 468 + } 469 + dev->iflink = tunnel->parms.link; 470 + return dev->mtu; 471 + } 472 + 473 + static int 474 + vti_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) 475 + { 476 + int err = 0; 477 + struct ip_tunnel_parm p; 478 + struct ip_tunnel *t; 479 + struct net *net = dev_net(dev); 480 + struct vti_net *ipn = net_generic(net, vti_net_id); 481 + 482 + switch (cmd) { 483 + case SIOCGETTUNNEL: 484 + t = NULL; 485 + if (dev == ipn->fb_tunnel_dev) { 486 + if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, 487 + sizeof(p))) { 488 + err = -EFAULT; 489 + break; 490 + } 491 + t = vti_tunnel_locate(net, &p, 0); 492 + } 493 + if (t == NULL) 494 + t = netdev_priv(dev); 495 + memcpy(&p, &t->parms, sizeof(p)); 496 + p.i_flags |= GRE_KEY | VTI_ISVTI; 497 + p.o_flags |= GRE_KEY; 498 + if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) 499 + err = -EFAULT; 500 + break; 501 + 502 + case SIOCADDTUNNEL: 503 + case SIOCCHGTUNNEL: 504 + err = -EPERM; 505 + if (!capable(CAP_NET_ADMIN)) 506 + goto done; 507 + 508 + err = -EFAULT; 509 + if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) 510 + goto done; 511 + 512 + err = -EINVAL; 513 + if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP || 514 + p.iph.ihl != 5) 515 + goto done; 516 + 517 + t = vti_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL); 518 + 519 + if (dev != ipn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) { 520 + if (t != NULL) { 521 + if (t->dev != dev) { 522 + err = -EEXIST; 523 + break; 524 + } 525 + } else { 526 + if (((dev->flags&IFF_POINTOPOINT) && 527 + !p.iph.daddr) || 528 + (!(dev->flags&IFF_POINTOPOINT) && 529 + p.iph.daddr)) { 530 + err = -EINVAL; 531 + break; 532 + } 533 + t = netdev_priv(dev); 534 + vti_tunnel_unlink(ipn, t); 535 + synchronize_net(); 536 + t->parms.iph.saddr = p.iph.saddr; 537 + t->parms.iph.daddr = p.iph.daddr; 538 + t->parms.i_key = p.i_key; 539 + t->parms.o_key = p.o_key; 540 + t->parms.iph.protocol = IPPROTO_IPIP; 541 + memcpy(dev->dev_addr, &p.iph.saddr, 4); 542 + memcpy(dev->broadcast, &p.iph.daddr, 4); 543 + vti_tunnel_link(ipn, t); 544 + netdev_state_change(dev); 545 + } 546 + } 547 + 548 + if (t) { 549 + err = 0; 550 + if (cmd == SIOCCHGTUNNEL) { 551 + t->parms.i_key = p.i_key; 552 + t->parms.o_key = p.o_key; 553 + if (t->parms.link != p.link) { 554 + t->parms.link = p.link; 555 + vti_tunnel_bind_dev(dev); 556 + netdev_state_change(dev); 557 + } 558 + } 559 + p.i_flags |= GRE_KEY | VTI_ISVTI; 560 + p.o_flags |= GRE_KEY; 561 + if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, 562 + sizeof(p))) 563 + err = -EFAULT; 564 + } else 565 + err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT); 566 + break; 567 + 568 + case SIOCDELTUNNEL: 569 + err = -EPERM; 570 + if (!capable(CAP_NET_ADMIN)) 571 + goto done; 572 + 573 + if (dev == ipn->fb_tunnel_dev) { 574 + err = -EFAULT; 575 + if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, 576 + sizeof(p))) 577 + goto done; 578 + err = -ENOENT; 579 + 580 + t = vti_tunnel_locate(net, &p, 0); 581 + if (t == NULL) 582 + goto done; 583 + err = -EPERM; 584 + if (t->dev == ipn->fb_tunnel_dev) 585 + goto done; 586 + dev = t->dev; 587 + } 588 + unregister_netdevice(dev); 589 + err = 0; 590 + break; 591 + 592 + default: 593 + err = -EINVAL; 594 + } 595 + 596 + done: 597 + return err; 598 + } 599 + 600 + static int vti_tunnel_change_mtu(struct net_device *dev, int new_mtu) 601 + { 602 + if (new_mtu < 68 || new_mtu > 0xFFF8) 603 + return -EINVAL; 604 + dev->mtu = new_mtu; 605 + return 0; 606 + } 607 + 608 + static const struct net_device_ops vti_netdev_ops = { 609 + .ndo_init = vti_tunnel_init, 610 + .ndo_uninit = vti_tunnel_uninit, 611 + .ndo_start_xmit = vti_tunnel_xmit, 612 + .ndo_do_ioctl = vti_tunnel_ioctl, 613 + .ndo_change_mtu = vti_tunnel_change_mtu, 614 + .ndo_get_stats64 = vti_get_stats64, 615 + }; 616 + 617 + static void vti_dev_free(struct net_device *dev) 618 + { 619 + free_percpu(dev->tstats); 620 + free_netdev(dev); 621 + } 622 + 623 + static void vti_tunnel_setup(struct net_device *dev) 624 + { 625 + dev->netdev_ops = &vti_netdev_ops; 626 + dev->destructor = vti_dev_free; 627 + 628 + dev->type = ARPHRD_TUNNEL; 629 + dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr); 630 + dev->mtu = ETH_DATA_LEN; 631 + dev->flags = IFF_NOARP; 632 + dev->iflink = 0; 633 + dev->addr_len = 4; 634 + dev->features |= NETIF_F_NETNS_LOCAL; 635 + dev->features |= NETIF_F_LLTX; 636 + dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; 637 + } 638 + 639 + static int vti_tunnel_init(struct net_device *dev) 640 + { 641 + struct ip_tunnel *tunnel = netdev_priv(dev); 642 + 643 + tunnel->dev = dev; 644 + strcpy(tunnel->parms.name, dev->name); 645 + 646 + memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4); 647 + memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4); 648 + 649 + dev->tstats = alloc_percpu(struct pcpu_tstats); 650 + if (!dev->tstats) 651 + return -ENOMEM; 652 + 653 + return 0; 654 + } 655 + 656 + static int __net_init vti_fb_tunnel_init(struct net_device *dev) 657 + { 658 + struct ip_tunnel *tunnel = netdev_priv(dev); 659 + struct iphdr *iph = &tunnel->parms.iph; 660 + struct vti_net *ipn = net_generic(dev_net(dev), vti_net_id); 661 + 662 + tunnel->dev = dev; 663 + strcpy(tunnel->parms.name, dev->name); 664 + 665 + iph->version = 4; 666 + iph->protocol = IPPROTO_IPIP; 667 + iph->ihl = 5; 668 + 669 + dev->tstats = alloc_percpu(struct pcpu_tstats); 670 + if (!dev->tstats) 671 + return -ENOMEM; 672 + 673 + dev_hold(dev); 674 + rcu_assign_pointer(ipn->tunnels_wc[0], tunnel); 675 + return 0; 676 + } 677 + 678 + static struct xfrm_tunnel vti_handler __read_mostly = { 679 + .handler = vti_rcv, 680 + .err_handler = vti_err, 681 + .priority = 1, 682 + }; 683 + 684 + static void vti_destroy_tunnels(struct vti_net *ipn, struct list_head *head) 685 + { 686 + int prio; 687 + 688 + for (prio = 1; prio < 4; prio++) { 689 + int h; 690 + for (h = 0; h < HASH_SIZE; h++) { 691 + struct ip_tunnel *t; 692 + 693 + t = rtnl_dereference(ipn->tunnels[prio][h]); 694 + while (t != NULL) { 695 + unregister_netdevice_queue(t->dev, head); 696 + t = rtnl_dereference(t->next); 697 + } 698 + } 699 + } 700 + } 701 + 702 + static int __net_init vti_init_net(struct net *net) 703 + { 704 + int err; 705 + struct vti_net *ipn = net_generic(net, vti_net_id); 706 + 707 + ipn->tunnels[0] = ipn->tunnels_wc; 708 + ipn->tunnels[1] = ipn->tunnels_l; 709 + ipn->tunnels[2] = ipn->tunnels_r; 710 + ipn->tunnels[3] = ipn->tunnels_r_l; 711 + 712 + ipn->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), 713 + "ip_vti0", 714 + vti_tunnel_setup); 715 + if (!ipn->fb_tunnel_dev) { 716 + err = -ENOMEM; 717 + goto err_alloc_dev; 718 + } 719 + dev_net_set(ipn->fb_tunnel_dev, net); 720 + 721 + err = vti_fb_tunnel_init(ipn->fb_tunnel_dev); 722 + if (err) 723 + goto err_reg_dev; 724 + ipn->fb_tunnel_dev->rtnl_link_ops = &vti_link_ops; 725 + 726 + err = register_netdev(ipn->fb_tunnel_dev); 727 + if (err) 728 + goto err_reg_dev; 729 + return 0; 730 + 731 + err_reg_dev: 732 + vti_dev_free(ipn->fb_tunnel_dev); 733 + err_alloc_dev: 734 + /* nothing */ 735 + return err; 736 + } 737 + 738 + static void __net_exit vti_exit_net(struct net *net) 739 + { 740 + struct vti_net *ipn = net_generic(net, vti_net_id); 741 + LIST_HEAD(list); 742 + 743 + rtnl_lock(); 744 + vti_destroy_tunnels(ipn, &list); 745 + unregister_netdevice_many(&list); 746 + rtnl_unlock(); 747 + } 748 + 749 + static struct pernet_operations vti_net_ops = { 750 + .init = vti_init_net, 751 + .exit = vti_exit_net, 752 + .id = &vti_net_id, 753 + .size = sizeof(struct vti_net), 754 + }; 755 + 756 + static int vti_tunnel_validate(struct nlattr *tb[], struct nlattr *data[]) 757 + { 758 + return 0; 759 + } 760 + 761 + static void vti_netlink_parms(struct nlattr *data[], 762 + struct ip_tunnel_parm *parms) 763 + { 764 + memset(parms, 0, sizeof(*parms)); 765 + 766 + parms->iph.protocol = IPPROTO_IPIP; 767 + 768 + if (!data) 769 + return; 770 + 771 + if (data[IFLA_VTI_LINK]) 772 + parms->link = nla_get_u32(data[IFLA_VTI_LINK]); 773 + 774 + if (data[IFLA_VTI_IKEY]) 775 + parms->i_key = nla_get_be32(data[IFLA_VTI_IKEY]); 776 + 777 + if (data[IFLA_VTI_OKEY]) 778 + parms->o_key = nla_get_be32(data[IFLA_VTI_OKEY]); 779 + 780 + if (data[IFLA_VTI_LOCAL]) 781 + parms->iph.saddr = nla_get_be32(data[IFLA_VTI_LOCAL]); 782 + 783 + if (data[IFLA_VTI_REMOTE]) 784 + parms->iph.daddr = nla_get_be32(data[IFLA_VTI_REMOTE]); 785 + 786 + } 787 + 788 + static int vti_newlink(struct net *src_net, struct net_device *dev, 789 + struct nlattr *tb[], struct nlattr *data[]) 790 + { 791 + struct ip_tunnel *nt; 792 + struct net *net = dev_net(dev); 793 + struct vti_net *ipn = net_generic(net, vti_net_id); 794 + int mtu; 795 + int err; 796 + 797 + nt = netdev_priv(dev); 798 + vti_netlink_parms(data, &nt->parms); 799 + 800 + if (vti_tunnel_locate(net, &nt->parms, 0)) 801 + return -EEXIST; 802 + 803 + mtu = vti_tunnel_bind_dev(dev); 804 + if (!tb[IFLA_MTU]) 805 + dev->mtu = mtu; 806 + 807 + err = register_netdevice(dev); 808 + if (err) 809 + goto out; 810 + 811 + dev_hold(dev); 812 + vti_tunnel_link(ipn, nt); 813 + 814 + out: 815 + return err; 816 + } 817 + 818 + static int vti_changelink(struct net_device *dev, struct nlattr *tb[], 819 + struct nlattr *data[]) 820 + { 821 + struct ip_tunnel *t, *nt; 822 + struct net *net = dev_net(dev); 823 + struct vti_net *ipn = net_generic(net, vti_net_id); 824 + struct ip_tunnel_parm p; 825 + int mtu; 826 + 827 + if (dev == ipn->fb_tunnel_dev) 828 + return -EINVAL; 829 + 830 + nt = netdev_priv(dev); 831 + vti_netlink_parms(data, &p); 832 + 833 + t = vti_tunnel_locate(net, &p, 0); 834 + 835 + if (t) { 836 + if (t->dev != dev) 837 + return -EEXIST; 838 + } else { 839 + t = nt; 840 + 841 + vti_tunnel_unlink(ipn, t); 842 + t->parms.iph.saddr = p.iph.saddr; 843 + t->parms.iph.daddr = p.iph.daddr; 844 + t->parms.i_key = p.i_key; 845 + t->parms.o_key = p.o_key; 846 + if (dev->type != ARPHRD_ETHER) { 847 + memcpy(dev->dev_addr, &p.iph.saddr, 4); 848 + memcpy(dev->broadcast, &p.iph.daddr, 4); 849 + } 850 + vti_tunnel_link(ipn, t); 851 + netdev_state_change(dev); 852 + } 853 + 854 + if (t->parms.link != p.link) { 855 + t->parms.link = p.link; 856 + mtu = vti_tunnel_bind_dev(dev); 857 + if (!tb[IFLA_MTU]) 858 + dev->mtu = mtu; 859 + netdev_state_change(dev); 860 + } 861 + 862 + return 0; 863 + } 864 + 865 + static size_t vti_get_size(const struct net_device *dev) 866 + { 867 + return 868 + /* IFLA_VTI_LINK */ 869 + nla_total_size(4) + 870 + /* IFLA_VTI_IKEY */ 871 + nla_total_size(4) + 872 + /* IFLA_VTI_OKEY */ 873 + nla_total_size(4) + 874 + /* IFLA_VTI_LOCAL */ 875 + nla_total_size(4) + 876 + /* IFLA_VTI_REMOTE */ 877 + nla_total_size(4) + 878 + 0; 879 + } 880 + 881 + static int vti_fill_info(struct sk_buff *skb, const struct net_device *dev) 882 + { 883 + struct ip_tunnel *t = netdev_priv(dev); 884 + struct ip_tunnel_parm *p = &t->parms; 885 + 886 + nla_put_u32(skb, IFLA_VTI_LINK, p->link); 887 + nla_put_be32(skb, IFLA_VTI_IKEY, p->i_key); 888 + nla_put_be32(skb, IFLA_VTI_OKEY, p->o_key); 889 + nla_put_be32(skb, IFLA_VTI_LOCAL, p->iph.saddr); 890 + nla_put_be32(skb, IFLA_VTI_REMOTE, p->iph.daddr); 891 + 892 + return 0; 893 + } 894 + 895 + static const struct nla_policy vti_policy[IFLA_VTI_MAX + 1] = { 896 + [IFLA_VTI_LINK] = { .type = NLA_U32 }, 897 + [IFLA_VTI_IKEY] = { .type = NLA_U32 }, 898 + [IFLA_VTI_OKEY] = { .type = NLA_U32 }, 899 + [IFLA_VTI_LOCAL] = { .len = FIELD_SIZEOF(struct iphdr, saddr) }, 900 + [IFLA_VTI_REMOTE] = { .len = FIELD_SIZEOF(struct iphdr, daddr) }, 901 + }; 902 + 903 + static struct rtnl_link_ops vti_link_ops __read_mostly = { 904 + .kind = "vti", 905 + .maxtype = IFLA_VTI_MAX, 906 + .policy = vti_policy, 907 + .priv_size = sizeof(struct ip_tunnel), 908 + .setup = vti_tunnel_setup, 909 + .validate = vti_tunnel_validate, 910 + .newlink = vti_newlink, 911 + .changelink = vti_changelink, 912 + .get_size = vti_get_size, 913 + .fill_info = vti_fill_info, 914 + }; 915 + 916 + static int __init vti_init(void) 917 + { 918 + int err; 919 + 920 + pr_info("IPv4 over IPSec tunneling driver\n"); 921 + 922 + err = register_pernet_device(&vti_net_ops); 923 + if (err < 0) 924 + return err; 925 + err = xfrm4_mode_tunnel_input_register(&vti_handler); 926 + if (err < 0) { 927 + unregister_pernet_device(&vti_net_ops); 928 + pr_info(KERN_INFO "vti init: can't register tunnel\n"); 929 + } 930 + 931 + err = rtnl_link_register(&vti_link_ops); 932 + if (err < 0) 933 + goto rtnl_link_failed; 934 + 935 + return err; 936 + 937 + rtnl_link_failed: 938 + xfrm4_mode_tunnel_input_deregister(&vti_handler); 939 + unregister_pernet_device(&vti_net_ops); 940 + return err; 941 + } 942 + 943 + static void __exit vti_fini(void) 944 + { 945 + rtnl_link_unregister(&vti_link_ops); 946 + if (xfrm4_mode_tunnel_input_deregister(&vti_handler)) 947 + pr_info("vti close: can't deregister tunnel\n"); 948 + 949 + unregister_pernet_device(&vti_net_ops); 950 + } 951 + 952 + module_init(vti_init); 953 + module_exit(vti_fini); 954 + MODULE_LICENSE("GPL"); 955 + MODULE_ALIAS_RTNL_LINK("vti"); 956 + MODULE_ALIAS_NETDEV("ip_vti0");