Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at v2.6.23-rc7 2597 lines 61 kB view raw
1/* 2 * Linux INET6 implementation 3 * FIB front-end. 4 * 5 * Authors: 6 * Pedro Roque <roque@di.fc.ul.pt> 7 * 8 * $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $ 9 * 10 * This program is free software; you can redistribute it and/or 11 * modify it under the terms of the GNU General Public License 12 * as published by the Free Software Foundation; either version 13 * 2 of the License, or (at your option) any later version. 14 */ 15 16/* Changes: 17 * 18 * YOSHIFUJI Hideaki @USAGI 19 * reworked default router selection. 20 * - respect outgoing interface 21 * - select from (probably) reachable routers (i.e. 22 * routers in REACHABLE, STALE, DELAY or PROBE states). 23 * - always select the same router if it is (probably) 24 * reachable. otherwise, round-robin the list. 25 * Ville Nuorvala 26 * Fixed routing subtrees. 27 */ 28 29#include <linux/capability.h> 30#include <linux/errno.h> 31#include <linux/types.h> 32#include <linux/times.h> 33#include <linux/socket.h> 34#include <linux/sockios.h> 35#include <linux/net.h> 36#include <linux/route.h> 37#include <linux/netdevice.h> 38#include <linux/in6.h> 39#include <linux/init.h> 40#include <linux/if_arp.h> 41 42#ifdef CONFIG_PROC_FS 43#include <linux/proc_fs.h> 44#include <linux/seq_file.h> 45#endif 46 47#include <net/snmp.h> 48#include <net/ipv6.h> 49#include <net/ip6_fib.h> 50#include <net/ip6_route.h> 51#include <net/ndisc.h> 52#include <net/addrconf.h> 53#include <net/tcp.h> 54#include <linux/rtnetlink.h> 55#include <net/dst.h> 56#include <net/xfrm.h> 57#include <net/netevent.h> 58#include <net/netlink.h> 59 60#include <asm/uaccess.h> 61 62#ifdef CONFIG_SYSCTL 63#include <linux/sysctl.h> 64#endif 65 66/* Set to 3 to get tracing. */ 67#define RT6_DEBUG 2 68 69#if RT6_DEBUG >= 3 70#define RDBG(x) printk x 71#define RT6_TRACE(x...) printk(KERN_DEBUG x) 72#else 73#define RDBG(x) 74#define RT6_TRACE(x...) do { ; } while (0) 75#endif 76 77#define CLONE_OFFLINK_ROUTE 0 78 79static int ip6_rt_max_size = 4096; 80static int ip6_rt_gc_min_interval = HZ / 2; 81static int ip6_rt_gc_timeout = 60*HZ; 82int ip6_rt_gc_interval = 30*HZ; 83static int ip6_rt_gc_elasticity = 9; 84static int ip6_rt_mtu_expires = 10*60*HZ; 85static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40; 86 87static struct rt6_info * ip6_rt_copy(struct rt6_info *ort); 88static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie); 89static struct dst_entry *ip6_negative_advice(struct dst_entry *); 90static void ip6_dst_destroy(struct dst_entry *); 91static void ip6_dst_ifdown(struct dst_entry *, 92 struct net_device *dev, int how); 93static int ip6_dst_gc(void); 94 95static int ip6_pkt_discard(struct sk_buff *skb); 96static int ip6_pkt_discard_out(struct sk_buff *skb); 97static void ip6_link_failure(struct sk_buff *skb); 98static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu); 99 100#ifdef CONFIG_IPV6_ROUTE_INFO 101static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen, 102 struct in6_addr *gwaddr, int ifindex, 103 unsigned pref); 104static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen, 105 struct in6_addr *gwaddr, int ifindex); 106#endif 107 108static struct dst_ops ip6_dst_ops = { 109 .family = AF_INET6, 110 .protocol = __constant_htons(ETH_P_IPV6), 111 .gc = ip6_dst_gc, 112 .gc_thresh = 1024, 113 .check = ip6_dst_check, 114 .destroy = ip6_dst_destroy, 115 .ifdown = ip6_dst_ifdown, 116 .negative_advice = ip6_negative_advice, 117 .link_failure = ip6_link_failure, 118 .update_pmtu = ip6_rt_update_pmtu, 119 .entry_size = sizeof(struct rt6_info), 120}; 121 122static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu) 123{ 124} 125 126static struct dst_ops ip6_dst_blackhole_ops = { 127 .family = AF_INET6, 128 .protocol = __constant_htons(ETH_P_IPV6), 129 .destroy = ip6_dst_destroy, 130 .check = ip6_dst_check, 131 .update_pmtu = ip6_rt_blackhole_update_pmtu, 132 .entry_size = sizeof(struct rt6_info), 133}; 134 135struct rt6_info ip6_null_entry = { 136 .u = { 137 .dst = { 138 .__refcnt = ATOMIC_INIT(1), 139 .__use = 1, 140 .dev = &loopback_dev, 141 .obsolete = -1, 142 .error = -ENETUNREACH, 143 .metrics = { [RTAX_HOPLIMIT - 1] = 255, }, 144 .input = ip6_pkt_discard, 145 .output = ip6_pkt_discard_out, 146 .ops = &ip6_dst_ops, 147 .path = (struct dst_entry*)&ip6_null_entry, 148 } 149 }, 150 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), 151 .rt6i_metric = ~(u32) 0, 152 .rt6i_ref = ATOMIC_INIT(1), 153}; 154 155#ifdef CONFIG_IPV6_MULTIPLE_TABLES 156 157static int ip6_pkt_prohibit(struct sk_buff *skb); 158static int ip6_pkt_prohibit_out(struct sk_buff *skb); 159static int ip6_pkt_blk_hole(struct sk_buff *skb); 160 161struct rt6_info ip6_prohibit_entry = { 162 .u = { 163 .dst = { 164 .__refcnt = ATOMIC_INIT(1), 165 .__use = 1, 166 .dev = &loopback_dev, 167 .obsolete = -1, 168 .error = -EACCES, 169 .metrics = { [RTAX_HOPLIMIT - 1] = 255, }, 170 .input = ip6_pkt_prohibit, 171 .output = ip6_pkt_prohibit_out, 172 .ops = &ip6_dst_ops, 173 .path = (struct dst_entry*)&ip6_prohibit_entry, 174 } 175 }, 176 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), 177 .rt6i_metric = ~(u32) 0, 178 .rt6i_ref = ATOMIC_INIT(1), 179}; 180 181struct rt6_info ip6_blk_hole_entry = { 182 .u = { 183 .dst = { 184 .__refcnt = ATOMIC_INIT(1), 185 .__use = 1, 186 .dev = &loopback_dev, 187 .obsolete = -1, 188 .error = -EINVAL, 189 .metrics = { [RTAX_HOPLIMIT - 1] = 255, }, 190 .input = ip6_pkt_blk_hole, 191 .output = ip6_pkt_blk_hole, 192 .ops = &ip6_dst_ops, 193 .path = (struct dst_entry*)&ip6_blk_hole_entry, 194 } 195 }, 196 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), 197 .rt6i_metric = ~(u32) 0, 198 .rt6i_ref = ATOMIC_INIT(1), 199}; 200 201#endif 202 203/* allocate dst with ip6_dst_ops */ 204static __inline__ struct rt6_info *ip6_dst_alloc(void) 205{ 206 return (struct rt6_info *)dst_alloc(&ip6_dst_ops); 207} 208 209static void ip6_dst_destroy(struct dst_entry *dst) 210{ 211 struct rt6_info *rt = (struct rt6_info *)dst; 212 struct inet6_dev *idev = rt->rt6i_idev; 213 214 if (idev != NULL) { 215 rt->rt6i_idev = NULL; 216 in6_dev_put(idev); 217 } 218} 219 220static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, 221 int how) 222{ 223 struct rt6_info *rt = (struct rt6_info *)dst; 224 struct inet6_dev *idev = rt->rt6i_idev; 225 226 if (dev != &loopback_dev && idev != NULL && idev->dev == dev) { 227 struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev); 228 if (loopback_idev != NULL) { 229 rt->rt6i_idev = loopback_idev; 230 in6_dev_put(idev); 231 } 232 } 233} 234 235static __inline__ int rt6_check_expired(const struct rt6_info *rt) 236{ 237 return (rt->rt6i_flags & RTF_EXPIRES && 238 time_after(jiffies, rt->rt6i_expires)); 239} 240 241static inline int rt6_need_strict(struct in6_addr *daddr) 242{ 243 return (ipv6_addr_type(daddr) & 244 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL)); 245} 246 247/* 248 * Route lookup. Any table->tb6_lock is implied. 249 */ 250 251static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt, 252 int oif, 253 int strict) 254{ 255 struct rt6_info *local = NULL; 256 struct rt6_info *sprt; 257 258 if (oif) { 259 for (sprt = rt; sprt; sprt = sprt->u.dst.rt6_next) { 260 struct net_device *dev = sprt->rt6i_dev; 261 if (dev->ifindex == oif) 262 return sprt; 263 if (dev->flags & IFF_LOOPBACK) { 264 if (sprt->rt6i_idev == NULL || 265 sprt->rt6i_idev->dev->ifindex != oif) { 266 if (strict && oif) 267 continue; 268 if (local && (!oif || 269 local->rt6i_idev->dev->ifindex == oif)) 270 continue; 271 } 272 local = sprt; 273 } 274 } 275 276 if (local) 277 return local; 278 279 if (strict) 280 return &ip6_null_entry; 281 } 282 return rt; 283} 284 285#ifdef CONFIG_IPV6_ROUTER_PREF 286static void rt6_probe(struct rt6_info *rt) 287{ 288 struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL; 289 /* 290 * Okay, this does not seem to be appropriate 291 * for now, however, we need to check if it 292 * is really so; aka Router Reachability Probing. 293 * 294 * Router Reachability Probe MUST be rate-limited 295 * to no more than one per minute. 296 */ 297 if (!neigh || (neigh->nud_state & NUD_VALID)) 298 return; 299 read_lock_bh(&neigh->lock); 300 if (!(neigh->nud_state & NUD_VALID) && 301 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) { 302 struct in6_addr mcaddr; 303 struct in6_addr *target; 304 305 neigh->updated = jiffies; 306 read_unlock_bh(&neigh->lock); 307 308 target = (struct in6_addr *)&neigh->primary_key; 309 addrconf_addr_solict_mult(target, &mcaddr); 310 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL); 311 } else 312 read_unlock_bh(&neigh->lock); 313} 314#else 315static inline void rt6_probe(struct rt6_info *rt) 316{ 317 return; 318} 319#endif 320 321/* 322 * Default Router Selection (RFC 2461 6.3.6) 323 */ 324static inline int rt6_check_dev(struct rt6_info *rt, int oif) 325{ 326 struct net_device *dev = rt->rt6i_dev; 327 if (!oif || dev->ifindex == oif) 328 return 2; 329 if ((dev->flags & IFF_LOOPBACK) && 330 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif) 331 return 1; 332 return 0; 333} 334 335static inline int rt6_check_neigh(struct rt6_info *rt) 336{ 337 struct neighbour *neigh = rt->rt6i_nexthop; 338 int m = 0; 339 if (rt->rt6i_flags & RTF_NONEXTHOP || 340 !(rt->rt6i_flags & RTF_GATEWAY)) 341 m = 1; 342 else if (neigh) { 343 read_lock_bh(&neigh->lock); 344 if (neigh->nud_state & NUD_VALID) 345 m = 2; 346 else if (!(neigh->nud_state & NUD_FAILED)) 347 m = 1; 348 read_unlock_bh(&neigh->lock); 349 } 350 return m; 351} 352 353static int rt6_score_route(struct rt6_info *rt, int oif, 354 int strict) 355{ 356 int m, n; 357 358 m = rt6_check_dev(rt, oif); 359 if (!m && (strict & RT6_LOOKUP_F_IFACE)) 360 return -1; 361#ifdef CONFIG_IPV6_ROUTER_PREF 362 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2; 363#endif 364 n = rt6_check_neigh(rt); 365 if (!n && (strict & RT6_LOOKUP_F_REACHABLE)) 366 return -1; 367 return m; 368} 369 370static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict, 371 int *mpri, struct rt6_info *match) 372{ 373 int m; 374 375 if (rt6_check_expired(rt)) 376 goto out; 377 378 m = rt6_score_route(rt, oif, strict); 379 if (m < 0) 380 goto out; 381 382 if (m > *mpri) { 383 if (strict & RT6_LOOKUP_F_REACHABLE) 384 rt6_probe(match); 385 *mpri = m; 386 match = rt; 387 } else if (strict & RT6_LOOKUP_F_REACHABLE) { 388 rt6_probe(rt); 389 } 390 391out: 392 return match; 393} 394 395static struct rt6_info *find_rr_leaf(struct fib6_node *fn, 396 struct rt6_info *rr_head, 397 u32 metric, int oif, int strict) 398{ 399 struct rt6_info *rt, *match; 400 int mpri = -1; 401 402 match = NULL; 403 for (rt = rr_head; rt && rt->rt6i_metric == metric; 404 rt = rt->u.dst.rt6_next) 405 match = find_match(rt, oif, strict, &mpri, match); 406 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric; 407 rt = rt->u.dst.rt6_next) 408 match = find_match(rt, oif, strict, &mpri, match); 409 410 return match; 411} 412 413static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict) 414{ 415 struct rt6_info *match, *rt0; 416 417 RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n", 418 __FUNCTION__, fn->leaf, oif); 419 420 rt0 = fn->rr_ptr; 421 if (!rt0) 422 fn->rr_ptr = rt0 = fn->leaf; 423 424 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict); 425 426 if (!match && 427 (strict & RT6_LOOKUP_F_REACHABLE)) { 428 struct rt6_info *next = rt0->u.dst.rt6_next; 429 430 /* no entries matched; do round-robin */ 431 if (!next || next->rt6i_metric != rt0->rt6i_metric) 432 next = fn->leaf; 433 434 if (next != rt0) 435 fn->rr_ptr = next; 436 } 437 438 RT6_TRACE("%s() => %p\n", 439 __FUNCTION__, match); 440 441 return (match ? match : &ip6_null_entry); 442} 443 444#ifdef CONFIG_IPV6_ROUTE_INFO 445int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, 446 struct in6_addr *gwaddr) 447{ 448 struct route_info *rinfo = (struct route_info *) opt; 449 struct in6_addr prefix_buf, *prefix; 450 unsigned int pref; 451 u32 lifetime; 452 struct rt6_info *rt; 453 454 if (len < sizeof(struct route_info)) { 455 return -EINVAL; 456 } 457 458 /* Sanity check for prefix_len and length */ 459 if (rinfo->length > 3) { 460 return -EINVAL; 461 } else if (rinfo->prefix_len > 128) { 462 return -EINVAL; 463 } else if (rinfo->prefix_len > 64) { 464 if (rinfo->length < 2) { 465 return -EINVAL; 466 } 467 } else if (rinfo->prefix_len > 0) { 468 if (rinfo->length < 1) { 469 return -EINVAL; 470 } 471 } 472 473 pref = rinfo->route_pref; 474 if (pref == ICMPV6_ROUTER_PREF_INVALID) 475 pref = ICMPV6_ROUTER_PREF_MEDIUM; 476 477 lifetime = ntohl(rinfo->lifetime); 478 if (lifetime == 0xffffffff) { 479 /* infinity */ 480 } else if (lifetime > 0x7fffffff/HZ) { 481 /* Avoid arithmetic overflow */ 482 lifetime = 0x7fffffff/HZ - 1; 483 } 484 485 if (rinfo->length == 3) 486 prefix = (struct in6_addr *)rinfo->prefix; 487 else { 488 /* this function is safe */ 489 ipv6_addr_prefix(&prefix_buf, 490 (struct in6_addr *)rinfo->prefix, 491 rinfo->prefix_len); 492 prefix = &prefix_buf; 493 } 494 495 rt = rt6_get_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex); 496 497 if (rt && !lifetime) { 498 ip6_del_rt(rt); 499 rt = NULL; 500 } 501 502 if (!rt && lifetime) 503 rt = rt6_add_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex, 504 pref); 505 else if (rt) 506 rt->rt6i_flags = RTF_ROUTEINFO | 507 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref); 508 509 if (rt) { 510 if (lifetime == 0xffffffff) { 511 rt->rt6i_flags &= ~RTF_EXPIRES; 512 } else { 513 rt->rt6i_expires = jiffies + HZ * lifetime; 514 rt->rt6i_flags |= RTF_EXPIRES; 515 } 516 dst_release(&rt->u.dst); 517 } 518 return 0; 519} 520#endif 521 522#define BACKTRACK(saddr) \ 523do { \ 524 if (rt == &ip6_null_entry) { \ 525 struct fib6_node *pn; \ 526 while (1) { \ 527 if (fn->fn_flags & RTN_TL_ROOT) \ 528 goto out; \ 529 pn = fn->parent; \ 530 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \ 531 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \ 532 else \ 533 fn = pn; \ 534 if (fn->fn_flags & RTN_RTINFO) \ 535 goto restart; \ 536 } \ 537 } \ 538} while(0) 539 540static struct rt6_info *ip6_pol_route_lookup(struct fib6_table *table, 541 struct flowi *fl, int flags) 542{ 543 struct fib6_node *fn; 544 struct rt6_info *rt; 545 546 read_lock_bh(&table->tb6_lock); 547 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src); 548restart: 549 rt = fn->leaf; 550 rt = rt6_device_match(rt, fl->oif, flags); 551 BACKTRACK(&fl->fl6_src); 552out: 553 dst_hold(&rt->u.dst); 554 read_unlock_bh(&table->tb6_lock); 555 556 rt->u.dst.lastuse = jiffies; 557 rt->u.dst.__use++; 558 559 return rt; 560 561} 562 563struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr, 564 int oif, int strict) 565{ 566 struct flowi fl = { 567 .oif = oif, 568 .nl_u = { 569 .ip6_u = { 570 .daddr = *daddr, 571 }, 572 }, 573 }; 574 struct dst_entry *dst; 575 int flags = strict ? RT6_LOOKUP_F_IFACE : 0; 576 577 if (saddr) { 578 memcpy(&fl.fl6_src, saddr, sizeof(*saddr)); 579 flags |= RT6_LOOKUP_F_HAS_SADDR; 580 } 581 582 dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_lookup); 583 if (dst->error == 0) 584 return (struct rt6_info *) dst; 585 586 dst_release(dst); 587 588 return NULL; 589} 590 591EXPORT_SYMBOL(rt6_lookup); 592 593/* ip6_ins_rt is called with FREE table->tb6_lock. 594 It takes new route entry, the addition fails by any reason the 595 route is freed. In any case, if caller does not hold it, it may 596 be destroyed. 597 */ 598 599static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info) 600{ 601 int err; 602 struct fib6_table *table; 603 604 table = rt->rt6i_table; 605 write_lock_bh(&table->tb6_lock); 606 err = fib6_add(&table->tb6_root, rt, info); 607 write_unlock_bh(&table->tb6_lock); 608 609 return err; 610} 611 612int ip6_ins_rt(struct rt6_info *rt) 613{ 614 return __ip6_ins_rt(rt, NULL); 615} 616 617static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr, 618 struct in6_addr *saddr) 619{ 620 struct rt6_info *rt; 621 622 /* 623 * Clone the route. 624 */ 625 626 rt = ip6_rt_copy(ort); 627 628 if (rt) { 629 if (!(rt->rt6i_flags&RTF_GATEWAY)) { 630 if (rt->rt6i_dst.plen != 128 && 631 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr)) 632 rt->rt6i_flags |= RTF_ANYCAST; 633 ipv6_addr_copy(&rt->rt6i_gateway, daddr); 634 } 635 636 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr); 637 rt->rt6i_dst.plen = 128; 638 rt->rt6i_flags |= RTF_CACHE; 639 rt->u.dst.flags |= DST_HOST; 640 641#ifdef CONFIG_IPV6_SUBTREES 642 if (rt->rt6i_src.plen && saddr) { 643 ipv6_addr_copy(&rt->rt6i_src.addr, saddr); 644 rt->rt6i_src.plen = 128; 645 } 646#endif 647 648 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway); 649 650 } 651 652 return rt; 653} 654 655static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr) 656{ 657 struct rt6_info *rt = ip6_rt_copy(ort); 658 if (rt) { 659 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr); 660 rt->rt6i_dst.plen = 128; 661 rt->rt6i_flags |= RTF_CACHE; 662 rt->u.dst.flags |= DST_HOST; 663 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop); 664 } 665 return rt; 666} 667 668static struct rt6_info *ip6_pol_route_input(struct fib6_table *table, 669 struct flowi *fl, int flags) 670{ 671 struct fib6_node *fn; 672 struct rt6_info *rt, *nrt; 673 int strict = 0; 674 int attempts = 3; 675 int err; 676 int reachable = ipv6_devconf.forwarding ? 0 : RT6_LOOKUP_F_REACHABLE; 677 678 strict |= flags & RT6_LOOKUP_F_IFACE; 679 680relookup: 681 read_lock_bh(&table->tb6_lock); 682 683restart_2: 684 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src); 685 686restart: 687 rt = rt6_select(fn, fl->iif, strict | reachable); 688 BACKTRACK(&fl->fl6_src); 689 if (rt == &ip6_null_entry || 690 rt->rt6i_flags & RTF_CACHE) 691 goto out; 692 693 dst_hold(&rt->u.dst); 694 read_unlock_bh(&table->tb6_lock); 695 696 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) 697 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src); 698 else { 699#if CLONE_OFFLINK_ROUTE 700 nrt = rt6_alloc_clone(rt, &fl->fl6_dst); 701#else 702 goto out2; 703#endif 704 } 705 706 dst_release(&rt->u.dst); 707 rt = nrt ? : &ip6_null_entry; 708 709 dst_hold(&rt->u.dst); 710 if (nrt) { 711 err = ip6_ins_rt(nrt); 712 if (!err) 713 goto out2; 714 } 715 716 if (--attempts <= 0) 717 goto out2; 718 719 /* 720 * Race condition! In the gap, when table->tb6_lock was 721 * released someone could insert this route. Relookup. 722 */ 723 dst_release(&rt->u.dst); 724 goto relookup; 725 726out: 727 if (reachable) { 728 reachable = 0; 729 goto restart_2; 730 } 731 dst_hold(&rt->u.dst); 732 read_unlock_bh(&table->tb6_lock); 733out2: 734 rt->u.dst.lastuse = jiffies; 735 rt->u.dst.__use++; 736 737 return rt; 738} 739 740void ip6_route_input(struct sk_buff *skb) 741{ 742 struct ipv6hdr *iph = ipv6_hdr(skb); 743 int flags = RT6_LOOKUP_F_HAS_SADDR; 744 struct flowi fl = { 745 .iif = skb->dev->ifindex, 746 .nl_u = { 747 .ip6_u = { 748 .daddr = iph->daddr, 749 .saddr = iph->saddr, 750 .flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK, 751 }, 752 }, 753 .mark = skb->mark, 754 .proto = iph->nexthdr, 755 }; 756 757 if (rt6_need_strict(&iph->daddr)) 758 flags |= RT6_LOOKUP_F_IFACE; 759 760 skb->dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_input); 761} 762 763static struct rt6_info *ip6_pol_route_output(struct fib6_table *table, 764 struct flowi *fl, int flags) 765{ 766 struct fib6_node *fn; 767 struct rt6_info *rt, *nrt; 768 int strict = 0; 769 int attempts = 3; 770 int err; 771 int reachable = ipv6_devconf.forwarding ? 0 : RT6_LOOKUP_F_REACHABLE; 772 773 strict |= flags & RT6_LOOKUP_F_IFACE; 774 775relookup: 776 read_lock_bh(&table->tb6_lock); 777 778restart_2: 779 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src); 780 781restart: 782 rt = rt6_select(fn, fl->oif, strict | reachable); 783 BACKTRACK(&fl->fl6_src); 784 if (rt == &ip6_null_entry || 785 rt->rt6i_flags & RTF_CACHE) 786 goto out; 787 788 dst_hold(&rt->u.dst); 789 read_unlock_bh(&table->tb6_lock); 790 791 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) 792 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src); 793 else { 794#if CLONE_OFFLINK_ROUTE 795 nrt = rt6_alloc_clone(rt, &fl->fl6_dst); 796#else 797 goto out2; 798#endif 799 } 800 801 dst_release(&rt->u.dst); 802 rt = nrt ? : &ip6_null_entry; 803 804 dst_hold(&rt->u.dst); 805 if (nrt) { 806 err = ip6_ins_rt(nrt); 807 if (!err) 808 goto out2; 809 } 810 811 if (--attempts <= 0) 812 goto out2; 813 814 /* 815 * Race condition! In the gap, when table->tb6_lock was 816 * released someone could insert this route. Relookup. 817 */ 818 dst_release(&rt->u.dst); 819 goto relookup; 820 821out: 822 if (reachable) { 823 reachable = 0; 824 goto restart_2; 825 } 826 dst_hold(&rt->u.dst); 827 read_unlock_bh(&table->tb6_lock); 828out2: 829 rt->u.dst.lastuse = jiffies; 830 rt->u.dst.__use++; 831 return rt; 832} 833 834struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl) 835{ 836 int flags = 0; 837 838 if (rt6_need_strict(&fl->fl6_dst)) 839 flags |= RT6_LOOKUP_F_IFACE; 840 841 if (!ipv6_addr_any(&fl->fl6_src)) 842 flags |= RT6_LOOKUP_F_HAS_SADDR; 843 844 return fib6_rule_lookup(fl, flags, ip6_pol_route_output); 845} 846 847EXPORT_SYMBOL(ip6_route_output); 848 849static int ip6_blackhole_output(struct sk_buff *skb) 850{ 851 kfree_skb(skb); 852 return 0; 853} 854 855int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl) 856{ 857 struct rt6_info *ort = (struct rt6_info *) *dstp; 858 struct rt6_info *rt = (struct rt6_info *) 859 dst_alloc(&ip6_dst_blackhole_ops); 860 struct dst_entry *new = NULL; 861 862 if (rt) { 863 new = &rt->u.dst; 864 865 atomic_set(&new->__refcnt, 1); 866 new->__use = 1; 867 new->input = ip6_blackhole_output; 868 new->output = ip6_blackhole_output; 869 870 memcpy(new->metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32)); 871 new->dev = ort->u.dst.dev; 872 if (new->dev) 873 dev_hold(new->dev); 874 rt->rt6i_idev = ort->rt6i_idev; 875 if (rt->rt6i_idev) 876 in6_dev_hold(rt->rt6i_idev); 877 rt->rt6i_expires = 0; 878 879 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway); 880 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES; 881 rt->rt6i_metric = 0; 882 883 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key)); 884#ifdef CONFIG_IPV6_SUBTREES 885 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key)); 886#endif 887 888 dst_free(new); 889 } 890 891 dst_release(*dstp); 892 *dstp = new; 893 return (new ? 0 : -ENOMEM); 894} 895EXPORT_SYMBOL_GPL(ip6_dst_blackhole); 896 897/* 898 * Destination cache support functions 899 */ 900 901static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie) 902{ 903 struct rt6_info *rt; 904 905 rt = (struct rt6_info *) dst; 906 907 if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) 908 return dst; 909 910 return NULL; 911} 912 913static struct dst_entry *ip6_negative_advice(struct dst_entry *dst) 914{ 915 struct rt6_info *rt = (struct rt6_info *) dst; 916 917 if (rt) { 918 if (rt->rt6i_flags & RTF_CACHE) 919 ip6_del_rt(rt); 920 else 921 dst_release(dst); 922 } 923 return NULL; 924} 925 926static void ip6_link_failure(struct sk_buff *skb) 927{ 928 struct rt6_info *rt; 929 930 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev); 931 932 rt = (struct rt6_info *) skb->dst; 933 if (rt) { 934 if (rt->rt6i_flags&RTF_CACHE) { 935 dst_set_expires(&rt->u.dst, 0); 936 rt->rt6i_flags |= RTF_EXPIRES; 937 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) 938 rt->rt6i_node->fn_sernum = -1; 939 } 940} 941 942static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu) 943{ 944 struct rt6_info *rt6 = (struct rt6_info*)dst; 945 946 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) { 947 rt6->rt6i_flags |= RTF_MODIFIED; 948 if (mtu < IPV6_MIN_MTU) { 949 mtu = IPV6_MIN_MTU; 950 dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG; 951 } 952 dst->metrics[RTAX_MTU-1] = mtu; 953 call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst); 954 } 955} 956 957static int ipv6_get_mtu(struct net_device *dev); 958 959static inline unsigned int ipv6_advmss(unsigned int mtu) 960{ 961 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr); 962 963 if (mtu < ip6_rt_min_advmss) 964 mtu = ip6_rt_min_advmss; 965 966 /* 967 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and 968 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size. 969 * IPV6_MAXPLEN is also valid and means: "any MSS, 970 * rely only on pmtu discovery" 971 */ 972 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr)) 973 mtu = IPV6_MAXPLEN; 974 return mtu; 975} 976 977static struct dst_entry *ndisc_dst_gc_list; 978static DEFINE_SPINLOCK(ndisc_lock); 979 980struct dst_entry *ndisc_dst_alloc(struct net_device *dev, 981 struct neighbour *neigh, 982 struct in6_addr *addr, 983 int (*output)(struct sk_buff *)) 984{ 985 struct rt6_info *rt; 986 struct inet6_dev *idev = in6_dev_get(dev); 987 988 if (unlikely(idev == NULL)) 989 return NULL; 990 991 rt = ip6_dst_alloc(); 992 if (unlikely(rt == NULL)) { 993 in6_dev_put(idev); 994 goto out; 995 } 996 997 dev_hold(dev); 998 if (neigh) 999 neigh_hold(neigh); 1000 else 1001 neigh = ndisc_get_neigh(dev, addr); 1002 1003 rt->rt6i_dev = dev; 1004 rt->rt6i_idev = idev; 1005 rt->rt6i_nexthop = neigh; 1006 atomic_set(&rt->u.dst.__refcnt, 1); 1007 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255; 1008 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev); 1009 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst)); 1010 rt->u.dst.output = output; 1011 1012#if 0 /* there's no chance to use these for ndisc */ 1013 rt->u.dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST 1014 ? DST_HOST 1015 : 0; 1016 ipv6_addr_copy(&rt->rt6i_dst.addr, addr); 1017 rt->rt6i_dst.plen = 128; 1018#endif 1019 1020 spin_lock_bh(&ndisc_lock); 1021 rt->u.dst.next = ndisc_dst_gc_list; 1022 ndisc_dst_gc_list = &rt->u.dst; 1023 spin_unlock_bh(&ndisc_lock); 1024 1025 fib6_force_start_gc(); 1026 1027out: 1028 return &rt->u.dst; 1029} 1030 1031int ndisc_dst_gc(int *more) 1032{ 1033 struct dst_entry *dst, *next, **pprev; 1034 int freed; 1035 1036 next = NULL; 1037 freed = 0; 1038 1039 spin_lock_bh(&ndisc_lock); 1040 pprev = &ndisc_dst_gc_list; 1041 1042 while ((dst = *pprev) != NULL) { 1043 if (!atomic_read(&dst->__refcnt)) { 1044 *pprev = dst->next; 1045 dst_free(dst); 1046 freed++; 1047 } else { 1048 pprev = &dst->next; 1049 (*more)++; 1050 } 1051 } 1052 1053 spin_unlock_bh(&ndisc_lock); 1054 1055 return freed; 1056} 1057 1058static int ip6_dst_gc(void) 1059{ 1060 static unsigned expire = 30*HZ; 1061 static unsigned long last_gc; 1062 unsigned long now = jiffies; 1063 1064 if (time_after(last_gc + ip6_rt_gc_min_interval, now) && 1065 atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size) 1066 goto out; 1067 1068 expire++; 1069 fib6_run_gc(expire); 1070 last_gc = now; 1071 if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh) 1072 expire = ip6_rt_gc_timeout>>1; 1073 1074out: 1075 expire -= expire>>ip6_rt_gc_elasticity; 1076 return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size); 1077} 1078 1079/* Clean host part of a prefix. Not necessary in radix tree, 1080 but results in cleaner routing tables. 1081 1082 Remove it only when all the things will work! 1083 */ 1084 1085static int ipv6_get_mtu(struct net_device *dev) 1086{ 1087 int mtu = IPV6_MIN_MTU; 1088 struct inet6_dev *idev; 1089 1090 idev = in6_dev_get(dev); 1091 if (idev) { 1092 mtu = idev->cnf.mtu6; 1093 in6_dev_put(idev); 1094 } 1095 return mtu; 1096} 1097 1098int ipv6_get_hoplimit(struct net_device *dev) 1099{ 1100 int hoplimit = ipv6_devconf.hop_limit; 1101 struct inet6_dev *idev; 1102 1103 idev = in6_dev_get(dev); 1104 if (idev) { 1105 hoplimit = idev->cnf.hop_limit; 1106 in6_dev_put(idev); 1107 } 1108 return hoplimit; 1109} 1110 1111/* 1112 * 1113 */ 1114 1115int ip6_route_add(struct fib6_config *cfg) 1116{ 1117 int err; 1118 struct rt6_info *rt = NULL; 1119 struct net_device *dev = NULL; 1120 struct inet6_dev *idev = NULL; 1121 struct fib6_table *table; 1122 int addr_type; 1123 1124 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128) 1125 return -EINVAL; 1126#ifndef CONFIG_IPV6_SUBTREES 1127 if (cfg->fc_src_len) 1128 return -EINVAL; 1129#endif 1130 if (cfg->fc_ifindex) { 1131 err = -ENODEV; 1132 dev = dev_get_by_index(cfg->fc_ifindex); 1133 if (!dev) 1134 goto out; 1135 idev = in6_dev_get(dev); 1136 if (!idev) 1137 goto out; 1138 } 1139 1140 if (cfg->fc_metric == 0) 1141 cfg->fc_metric = IP6_RT_PRIO_USER; 1142 1143 table = fib6_new_table(cfg->fc_table); 1144 if (table == NULL) { 1145 err = -ENOBUFS; 1146 goto out; 1147 } 1148 1149 rt = ip6_dst_alloc(); 1150 1151 if (rt == NULL) { 1152 err = -ENOMEM; 1153 goto out; 1154 } 1155 1156 rt->u.dst.obsolete = -1; 1157 rt->rt6i_expires = jiffies + clock_t_to_jiffies(cfg->fc_expires); 1158 1159 if (cfg->fc_protocol == RTPROT_UNSPEC) 1160 cfg->fc_protocol = RTPROT_BOOT; 1161 rt->rt6i_protocol = cfg->fc_protocol; 1162 1163 addr_type = ipv6_addr_type(&cfg->fc_dst); 1164 1165 if (addr_type & IPV6_ADDR_MULTICAST) 1166 rt->u.dst.input = ip6_mc_input; 1167 else 1168 rt->u.dst.input = ip6_forward; 1169 1170 rt->u.dst.output = ip6_output; 1171 1172 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len); 1173 rt->rt6i_dst.plen = cfg->fc_dst_len; 1174 if (rt->rt6i_dst.plen == 128) 1175 rt->u.dst.flags = DST_HOST; 1176 1177#ifdef CONFIG_IPV6_SUBTREES 1178 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len); 1179 rt->rt6i_src.plen = cfg->fc_src_len; 1180#endif 1181 1182 rt->rt6i_metric = cfg->fc_metric; 1183 1184 /* We cannot add true routes via loopback here, 1185 they would result in kernel looping; promote them to reject routes 1186 */ 1187 if ((cfg->fc_flags & RTF_REJECT) || 1188 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) { 1189 /* hold loopback dev/idev if we haven't done so. */ 1190 if (dev != &loopback_dev) { 1191 if (dev) { 1192 dev_put(dev); 1193 in6_dev_put(idev); 1194 } 1195 dev = &loopback_dev; 1196 dev_hold(dev); 1197 idev = in6_dev_get(dev); 1198 if (!idev) { 1199 err = -ENODEV; 1200 goto out; 1201 } 1202 } 1203 rt->u.dst.output = ip6_pkt_discard_out; 1204 rt->u.dst.input = ip6_pkt_discard; 1205 rt->u.dst.error = -ENETUNREACH; 1206 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP; 1207 goto install_route; 1208 } 1209 1210 if (cfg->fc_flags & RTF_GATEWAY) { 1211 struct in6_addr *gw_addr; 1212 int gwa_type; 1213 1214 gw_addr = &cfg->fc_gateway; 1215 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr); 1216 gwa_type = ipv6_addr_type(gw_addr); 1217 1218 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) { 1219 struct rt6_info *grt; 1220 1221 /* IPv6 strictly inhibits using not link-local 1222 addresses as nexthop address. 1223 Otherwise, router will not able to send redirects. 1224 It is very good, but in some (rare!) circumstances 1225 (SIT, PtP, NBMA NOARP links) it is handy to allow 1226 some exceptions. --ANK 1227 */ 1228 err = -EINVAL; 1229 if (!(gwa_type&IPV6_ADDR_UNICAST)) 1230 goto out; 1231 1232 grt = rt6_lookup(gw_addr, NULL, cfg->fc_ifindex, 1); 1233 1234 err = -EHOSTUNREACH; 1235 if (grt == NULL) 1236 goto out; 1237 if (dev) { 1238 if (dev != grt->rt6i_dev) { 1239 dst_release(&grt->u.dst); 1240 goto out; 1241 } 1242 } else { 1243 dev = grt->rt6i_dev; 1244 idev = grt->rt6i_idev; 1245 dev_hold(dev); 1246 in6_dev_hold(grt->rt6i_idev); 1247 } 1248 if (!(grt->rt6i_flags&RTF_GATEWAY)) 1249 err = 0; 1250 dst_release(&grt->u.dst); 1251 1252 if (err) 1253 goto out; 1254 } 1255 err = -EINVAL; 1256 if (dev == NULL || (dev->flags&IFF_LOOPBACK)) 1257 goto out; 1258 } 1259 1260 err = -ENODEV; 1261 if (dev == NULL) 1262 goto out; 1263 1264 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) { 1265 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev); 1266 if (IS_ERR(rt->rt6i_nexthop)) { 1267 err = PTR_ERR(rt->rt6i_nexthop); 1268 rt->rt6i_nexthop = NULL; 1269 goto out; 1270 } 1271 } 1272 1273 rt->rt6i_flags = cfg->fc_flags; 1274 1275install_route: 1276 if (cfg->fc_mx) { 1277 struct nlattr *nla; 1278 int remaining; 1279 1280 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) { 1281 int type = nla->nla_type; 1282 1283 if (type) { 1284 if (type > RTAX_MAX) { 1285 err = -EINVAL; 1286 goto out; 1287 } 1288 1289 rt->u.dst.metrics[type - 1] = nla_get_u32(nla); 1290 } 1291 } 1292 } 1293 1294 if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0) 1295 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1; 1296 if (!rt->u.dst.metrics[RTAX_MTU-1]) 1297 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev); 1298 if (!rt->u.dst.metrics[RTAX_ADVMSS-1]) 1299 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst)); 1300 rt->u.dst.dev = dev; 1301 rt->rt6i_idev = idev; 1302 rt->rt6i_table = table; 1303 return __ip6_ins_rt(rt, &cfg->fc_nlinfo); 1304 1305out: 1306 if (dev) 1307 dev_put(dev); 1308 if (idev) 1309 in6_dev_put(idev); 1310 if (rt) 1311 dst_free(&rt->u.dst); 1312 return err; 1313} 1314 1315static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info) 1316{ 1317 int err; 1318 struct fib6_table *table; 1319 1320 if (rt == &ip6_null_entry) 1321 return -ENOENT; 1322 1323 table = rt->rt6i_table; 1324 write_lock_bh(&table->tb6_lock); 1325 1326 err = fib6_del(rt, info); 1327 dst_release(&rt->u.dst); 1328 1329 write_unlock_bh(&table->tb6_lock); 1330 1331 return err; 1332} 1333 1334int ip6_del_rt(struct rt6_info *rt) 1335{ 1336 return __ip6_del_rt(rt, NULL); 1337} 1338 1339static int ip6_route_del(struct fib6_config *cfg) 1340{ 1341 struct fib6_table *table; 1342 struct fib6_node *fn; 1343 struct rt6_info *rt; 1344 int err = -ESRCH; 1345 1346 table = fib6_get_table(cfg->fc_table); 1347 if (table == NULL) 1348 return err; 1349 1350 read_lock_bh(&table->tb6_lock); 1351 1352 fn = fib6_locate(&table->tb6_root, 1353 &cfg->fc_dst, cfg->fc_dst_len, 1354 &cfg->fc_src, cfg->fc_src_len); 1355 1356 if (fn) { 1357 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) { 1358 if (cfg->fc_ifindex && 1359 (rt->rt6i_dev == NULL || 1360 rt->rt6i_dev->ifindex != cfg->fc_ifindex)) 1361 continue; 1362 if (cfg->fc_flags & RTF_GATEWAY && 1363 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway)) 1364 continue; 1365 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric) 1366 continue; 1367 dst_hold(&rt->u.dst); 1368 read_unlock_bh(&table->tb6_lock); 1369 1370 return __ip6_del_rt(rt, &cfg->fc_nlinfo); 1371 } 1372 } 1373 read_unlock_bh(&table->tb6_lock); 1374 1375 return err; 1376} 1377 1378/* 1379 * Handle redirects 1380 */ 1381struct ip6rd_flowi { 1382 struct flowi fl; 1383 struct in6_addr gateway; 1384}; 1385 1386static struct rt6_info *__ip6_route_redirect(struct fib6_table *table, 1387 struct flowi *fl, 1388 int flags) 1389{ 1390 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl; 1391 struct rt6_info *rt; 1392 struct fib6_node *fn; 1393 1394 /* 1395 * Get the "current" route for this destination and 1396 * check if the redirect has come from approriate router. 1397 * 1398 * RFC 2461 specifies that redirects should only be 1399 * accepted if they come from the nexthop to the target. 1400 * Due to the way the routes are chosen, this notion 1401 * is a bit fuzzy and one might need to check all possible 1402 * routes. 1403 */ 1404 1405 read_lock_bh(&table->tb6_lock); 1406 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src); 1407restart: 1408 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) { 1409 /* 1410 * Current route is on-link; redirect is always invalid. 1411 * 1412 * Seems, previous statement is not true. It could 1413 * be node, which looks for us as on-link (f.e. proxy ndisc) 1414 * But then router serving it might decide, that we should 1415 * know truth 8)8) --ANK (980726). 1416 */ 1417 if (rt6_check_expired(rt)) 1418 continue; 1419 if (!(rt->rt6i_flags & RTF_GATEWAY)) 1420 continue; 1421 if (fl->oif != rt->rt6i_dev->ifindex) 1422 continue; 1423 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway)) 1424 continue; 1425 break; 1426 } 1427 1428 if (!rt) 1429 rt = &ip6_null_entry; 1430 BACKTRACK(&fl->fl6_src); 1431out: 1432 dst_hold(&rt->u.dst); 1433 1434 read_unlock_bh(&table->tb6_lock); 1435 1436 return rt; 1437}; 1438 1439static struct rt6_info *ip6_route_redirect(struct in6_addr *dest, 1440 struct in6_addr *src, 1441 struct in6_addr *gateway, 1442 struct net_device *dev) 1443{ 1444 int flags = RT6_LOOKUP_F_HAS_SADDR; 1445 struct ip6rd_flowi rdfl = { 1446 .fl = { 1447 .oif = dev->ifindex, 1448 .nl_u = { 1449 .ip6_u = { 1450 .daddr = *dest, 1451 .saddr = *src, 1452 }, 1453 }, 1454 }, 1455 .gateway = *gateway, 1456 }; 1457 1458 if (rt6_need_strict(dest)) 1459 flags |= RT6_LOOKUP_F_IFACE; 1460 1461 return (struct rt6_info *)fib6_rule_lookup((struct flowi *)&rdfl, flags, __ip6_route_redirect); 1462} 1463 1464void rt6_redirect(struct in6_addr *dest, struct in6_addr *src, 1465 struct in6_addr *saddr, 1466 struct neighbour *neigh, u8 *lladdr, int on_link) 1467{ 1468 struct rt6_info *rt, *nrt = NULL; 1469 struct netevent_redirect netevent; 1470 1471 rt = ip6_route_redirect(dest, src, saddr, neigh->dev); 1472 1473 if (rt == &ip6_null_entry) { 1474 if (net_ratelimit()) 1475 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop " 1476 "for redirect target\n"); 1477 goto out; 1478 } 1479 1480 /* 1481 * We have finally decided to accept it. 1482 */ 1483 1484 neigh_update(neigh, lladdr, NUD_STALE, 1485 NEIGH_UPDATE_F_WEAK_OVERRIDE| 1486 NEIGH_UPDATE_F_OVERRIDE| 1487 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER| 1488 NEIGH_UPDATE_F_ISROUTER)) 1489 ); 1490 1491 /* 1492 * Redirect received -> path was valid. 1493 * Look, redirects are sent only in response to data packets, 1494 * so that this nexthop apparently is reachable. --ANK 1495 */ 1496 dst_confirm(&rt->u.dst); 1497 1498 /* Duplicate redirect: silently ignore. */ 1499 if (neigh == rt->u.dst.neighbour) 1500 goto out; 1501 1502 nrt = ip6_rt_copy(rt); 1503 if (nrt == NULL) 1504 goto out; 1505 1506 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE; 1507 if (on_link) 1508 nrt->rt6i_flags &= ~RTF_GATEWAY; 1509 1510 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest); 1511 nrt->rt6i_dst.plen = 128; 1512 nrt->u.dst.flags |= DST_HOST; 1513 1514 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key); 1515 nrt->rt6i_nexthop = neigh_clone(neigh); 1516 /* Reset pmtu, it may be better */ 1517 nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev); 1518 nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst)); 1519 1520 if (ip6_ins_rt(nrt)) 1521 goto out; 1522 1523 netevent.old = &rt->u.dst; 1524 netevent.new = &nrt->u.dst; 1525 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent); 1526 1527 if (rt->rt6i_flags&RTF_CACHE) { 1528 ip6_del_rt(rt); 1529 return; 1530 } 1531 1532out: 1533 dst_release(&rt->u.dst); 1534 return; 1535} 1536 1537/* 1538 * Handle ICMP "packet too big" messages 1539 * i.e. Path MTU discovery 1540 */ 1541 1542void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr, 1543 struct net_device *dev, u32 pmtu) 1544{ 1545 struct rt6_info *rt, *nrt; 1546 int allfrag = 0; 1547 1548 rt = rt6_lookup(daddr, saddr, dev->ifindex, 0); 1549 if (rt == NULL) 1550 return; 1551 1552 if (pmtu >= dst_mtu(&rt->u.dst)) 1553 goto out; 1554 1555 if (pmtu < IPV6_MIN_MTU) { 1556 /* 1557 * According to RFC2460, PMTU is set to the IPv6 Minimum Link 1558 * MTU (1280) and a fragment header should always be included 1559 * after a node receiving Too Big message reporting PMTU is 1560 * less than the IPv6 Minimum Link MTU. 1561 */ 1562 pmtu = IPV6_MIN_MTU; 1563 allfrag = 1; 1564 } 1565 1566 /* New mtu received -> path was valid. 1567 They are sent only in response to data packets, 1568 so that this nexthop apparently is reachable. --ANK 1569 */ 1570 dst_confirm(&rt->u.dst); 1571 1572 /* Host route. If it is static, it would be better 1573 not to override it, but add new one, so that 1574 when cache entry will expire old pmtu 1575 would return automatically. 1576 */ 1577 if (rt->rt6i_flags & RTF_CACHE) { 1578 rt->u.dst.metrics[RTAX_MTU-1] = pmtu; 1579 if (allfrag) 1580 rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG; 1581 dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires); 1582 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES; 1583 goto out; 1584 } 1585 1586 /* Network route. 1587 Two cases are possible: 1588 1. It is connected route. Action: COW 1589 2. It is gatewayed route or NONEXTHOP route. Action: clone it. 1590 */ 1591 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) 1592 nrt = rt6_alloc_cow(rt, daddr, saddr); 1593 else 1594 nrt = rt6_alloc_clone(rt, daddr); 1595 1596 if (nrt) { 1597 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu; 1598 if (allfrag) 1599 nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG; 1600 1601 /* According to RFC 1981, detecting PMTU increase shouldn't be 1602 * happened within 5 mins, the recommended timer is 10 mins. 1603 * Here this route expiration time is set to ip6_rt_mtu_expires 1604 * which is 10 mins. After 10 mins the decreased pmtu is expired 1605 * and detecting PMTU increase will be automatically happened. 1606 */ 1607 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires); 1608 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES; 1609 1610 ip6_ins_rt(nrt); 1611 } 1612out: 1613 dst_release(&rt->u.dst); 1614} 1615 1616/* 1617 * Misc support functions 1618 */ 1619 1620static struct rt6_info * ip6_rt_copy(struct rt6_info *ort) 1621{ 1622 struct rt6_info *rt = ip6_dst_alloc(); 1623 1624 if (rt) { 1625 rt->u.dst.input = ort->u.dst.input; 1626 rt->u.dst.output = ort->u.dst.output; 1627 1628 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32)); 1629 rt->u.dst.error = ort->u.dst.error; 1630 rt->u.dst.dev = ort->u.dst.dev; 1631 if (rt->u.dst.dev) 1632 dev_hold(rt->u.dst.dev); 1633 rt->rt6i_idev = ort->rt6i_idev; 1634 if (rt->rt6i_idev) 1635 in6_dev_hold(rt->rt6i_idev); 1636 rt->u.dst.lastuse = jiffies; 1637 rt->rt6i_expires = 0; 1638 1639 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway); 1640 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES; 1641 rt->rt6i_metric = 0; 1642 1643 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key)); 1644#ifdef CONFIG_IPV6_SUBTREES 1645 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key)); 1646#endif 1647 rt->rt6i_table = ort->rt6i_table; 1648 } 1649 return rt; 1650} 1651 1652#ifdef CONFIG_IPV6_ROUTE_INFO 1653static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen, 1654 struct in6_addr *gwaddr, int ifindex) 1655{ 1656 struct fib6_node *fn; 1657 struct rt6_info *rt = NULL; 1658 struct fib6_table *table; 1659 1660 table = fib6_get_table(RT6_TABLE_INFO); 1661 if (table == NULL) 1662 return NULL; 1663 1664 write_lock_bh(&table->tb6_lock); 1665 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0); 1666 if (!fn) 1667 goto out; 1668 1669 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) { 1670 if (rt->rt6i_dev->ifindex != ifindex) 1671 continue; 1672 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY)) 1673 continue; 1674 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr)) 1675 continue; 1676 dst_hold(&rt->u.dst); 1677 break; 1678 } 1679out: 1680 write_unlock_bh(&table->tb6_lock); 1681 return rt; 1682} 1683 1684static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen, 1685 struct in6_addr *gwaddr, int ifindex, 1686 unsigned pref) 1687{ 1688 struct fib6_config cfg = { 1689 .fc_table = RT6_TABLE_INFO, 1690 .fc_metric = 1024, 1691 .fc_ifindex = ifindex, 1692 .fc_dst_len = prefixlen, 1693 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO | 1694 RTF_UP | RTF_PREF(pref), 1695 }; 1696 1697 ipv6_addr_copy(&cfg.fc_dst, prefix); 1698 ipv6_addr_copy(&cfg.fc_gateway, gwaddr); 1699 1700 /* We should treat it as a default route if prefix length is 0. */ 1701 if (!prefixlen) 1702 cfg.fc_flags |= RTF_DEFAULT; 1703 1704 ip6_route_add(&cfg); 1705 1706 return rt6_get_route_info(prefix, prefixlen, gwaddr, ifindex); 1707} 1708#endif 1709 1710struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev) 1711{ 1712 struct rt6_info *rt; 1713 struct fib6_table *table; 1714 1715 table = fib6_get_table(RT6_TABLE_DFLT); 1716 if (table == NULL) 1717 return NULL; 1718 1719 write_lock_bh(&table->tb6_lock); 1720 for (rt = table->tb6_root.leaf; rt; rt=rt->u.dst.rt6_next) { 1721 if (dev == rt->rt6i_dev && 1722 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) && 1723 ipv6_addr_equal(&rt->rt6i_gateway, addr)) 1724 break; 1725 } 1726 if (rt) 1727 dst_hold(&rt->u.dst); 1728 write_unlock_bh(&table->tb6_lock); 1729 return rt; 1730} 1731 1732struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr, 1733 struct net_device *dev, 1734 unsigned int pref) 1735{ 1736 struct fib6_config cfg = { 1737 .fc_table = RT6_TABLE_DFLT, 1738 .fc_metric = 1024, 1739 .fc_ifindex = dev->ifindex, 1740 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | 1741 RTF_UP | RTF_EXPIRES | RTF_PREF(pref), 1742 }; 1743 1744 ipv6_addr_copy(&cfg.fc_gateway, gwaddr); 1745 1746 ip6_route_add(&cfg); 1747 1748 return rt6_get_dflt_router(gwaddr, dev); 1749} 1750 1751void rt6_purge_dflt_routers(void) 1752{ 1753 struct rt6_info *rt; 1754 struct fib6_table *table; 1755 1756 /* NOTE: Keep consistent with rt6_get_dflt_router */ 1757 table = fib6_get_table(RT6_TABLE_DFLT); 1758 if (table == NULL) 1759 return; 1760 1761restart: 1762 read_lock_bh(&table->tb6_lock); 1763 for (rt = table->tb6_root.leaf; rt; rt = rt->u.dst.rt6_next) { 1764 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) { 1765 dst_hold(&rt->u.dst); 1766 read_unlock_bh(&table->tb6_lock); 1767 ip6_del_rt(rt); 1768 goto restart; 1769 } 1770 } 1771 read_unlock_bh(&table->tb6_lock); 1772} 1773 1774static void rtmsg_to_fib6_config(struct in6_rtmsg *rtmsg, 1775 struct fib6_config *cfg) 1776{ 1777 memset(cfg, 0, sizeof(*cfg)); 1778 1779 cfg->fc_table = RT6_TABLE_MAIN; 1780 cfg->fc_ifindex = rtmsg->rtmsg_ifindex; 1781 cfg->fc_metric = rtmsg->rtmsg_metric; 1782 cfg->fc_expires = rtmsg->rtmsg_info; 1783 cfg->fc_dst_len = rtmsg->rtmsg_dst_len; 1784 cfg->fc_src_len = rtmsg->rtmsg_src_len; 1785 cfg->fc_flags = rtmsg->rtmsg_flags; 1786 1787 ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst); 1788 ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src); 1789 ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway); 1790} 1791 1792int ipv6_route_ioctl(unsigned int cmd, void __user *arg) 1793{ 1794 struct fib6_config cfg; 1795 struct in6_rtmsg rtmsg; 1796 int err; 1797 1798 switch(cmd) { 1799 case SIOCADDRT: /* Add a route */ 1800 case SIOCDELRT: /* Delete a route */ 1801 if (!capable(CAP_NET_ADMIN)) 1802 return -EPERM; 1803 err = copy_from_user(&rtmsg, arg, 1804 sizeof(struct in6_rtmsg)); 1805 if (err) 1806 return -EFAULT; 1807 1808 rtmsg_to_fib6_config(&rtmsg, &cfg); 1809 1810 rtnl_lock(); 1811 switch (cmd) { 1812 case SIOCADDRT: 1813 err = ip6_route_add(&cfg); 1814 break; 1815 case SIOCDELRT: 1816 err = ip6_route_del(&cfg); 1817 break; 1818 default: 1819 err = -EINVAL; 1820 } 1821 rtnl_unlock(); 1822 1823 return err; 1824 } 1825 1826 return -EINVAL; 1827} 1828 1829/* 1830 * Drop the packet on the floor 1831 */ 1832 1833static inline int ip6_pkt_drop(struct sk_buff *skb, int code, 1834 int ipstats_mib_noroutes) 1835{ 1836 int type; 1837 switch (ipstats_mib_noroutes) { 1838 case IPSTATS_MIB_INNOROUTES: 1839 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr); 1840 if (type == IPV6_ADDR_ANY || type == IPV6_ADDR_RESERVED) { 1841 IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_INADDRERRORS); 1842 break; 1843 } 1844 /* FALLTHROUGH */ 1845 case IPSTATS_MIB_OUTNOROUTES: 1846 IP6_INC_STATS(ip6_dst_idev(skb->dst), ipstats_mib_noroutes); 1847 break; 1848 } 1849 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0, skb->dev); 1850 kfree_skb(skb); 1851 return 0; 1852} 1853 1854static int ip6_pkt_discard(struct sk_buff *skb) 1855{ 1856 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES); 1857} 1858 1859static int ip6_pkt_discard_out(struct sk_buff *skb) 1860{ 1861 skb->dev = skb->dst->dev; 1862 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES); 1863} 1864 1865#ifdef CONFIG_IPV6_MULTIPLE_TABLES 1866 1867static int ip6_pkt_prohibit(struct sk_buff *skb) 1868{ 1869 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES); 1870} 1871 1872static int ip6_pkt_prohibit_out(struct sk_buff *skb) 1873{ 1874 skb->dev = skb->dst->dev; 1875 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES); 1876} 1877 1878static int ip6_pkt_blk_hole(struct sk_buff *skb) 1879{ 1880 kfree_skb(skb); 1881 return 0; 1882} 1883 1884#endif 1885 1886/* 1887 * Allocate a dst for local (unicast / anycast) address. 1888 */ 1889 1890struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, 1891 const struct in6_addr *addr, 1892 int anycast) 1893{ 1894 struct rt6_info *rt = ip6_dst_alloc(); 1895 1896 if (rt == NULL) 1897 return ERR_PTR(-ENOMEM); 1898 1899 dev_hold(&loopback_dev); 1900 in6_dev_hold(idev); 1901 1902 rt->u.dst.flags = DST_HOST; 1903 rt->u.dst.input = ip6_input; 1904 rt->u.dst.output = ip6_output; 1905 rt->rt6i_dev = &loopback_dev; 1906 rt->rt6i_idev = idev; 1907 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev); 1908 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst)); 1909 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1; 1910 rt->u.dst.obsolete = -1; 1911 1912 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP; 1913 if (anycast) 1914 rt->rt6i_flags |= RTF_ANYCAST; 1915 else 1916 rt->rt6i_flags |= RTF_LOCAL; 1917 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway); 1918 if (rt->rt6i_nexthop == NULL) { 1919 dst_free(&rt->u.dst); 1920 return ERR_PTR(-ENOMEM); 1921 } 1922 1923 ipv6_addr_copy(&rt->rt6i_dst.addr, addr); 1924 rt->rt6i_dst.plen = 128; 1925 rt->rt6i_table = fib6_get_table(RT6_TABLE_LOCAL); 1926 1927 atomic_set(&rt->u.dst.__refcnt, 1); 1928 1929 return rt; 1930} 1931 1932static int fib6_ifdown(struct rt6_info *rt, void *arg) 1933{ 1934 if (((void*)rt->rt6i_dev == arg || arg == NULL) && 1935 rt != &ip6_null_entry) { 1936 RT6_TRACE("deleted by ifdown %p\n", rt); 1937 return -1; 1938 } 1939 return 0; 1940} 1941 1942void rt6_ifdown(struct net_device *dev) 1943{ 1944 fib6_clean_all(fib6_ifdown, 0, dev); 1945} 1946 1947struct rt6_mtu_change_arg 1948{ 1949 struct net_device *dev; 1950 unsigned mtu; 1951}; 1952 1953static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg) 1954{ 1955 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg; 1956 struct inet6_dev *idev; 1957 1958 /* In IPv6 pmtu discovery is not optional, 1959 so that RTAX_MTU lock cannot disable it. 1960 We still use this lock to block changes 1961 caused by addrconf/ndisc. 1962 */ 1963 1964 idev = __in6_dev_get(arg->dev); 1965 if (idev == NULL) 1966 return 0; 1967 1968 /* For administrative MTU increase, there is no way to discover 1969 IPv6 PMTU increase, so PMTU increase should be updated here. 1970 Since RFC 1981 doesn't include administrative MTU increase 1971 update PMTU increase is a MUST. (i.e. jumbo frame) 1972 */ 1973 /* 1974 If new MTU is less than route PMTU, this new MTU will be the 1975 lowest MTU in the path, update the route PMTU to reflect PMTU 1976 decreases; if new MTU is greater than route PMTU, and the 1977 old MTU is the lowest MTU in the path, update the route PMTU 1978 to reflect the increase. In this case if the other nodes' MTU 1979 also have the lowest MTU, TOO BIG MESSAGE will be lead to 1980 PMTU discouvery. 1981 */ 1982 if (rt->rt6i_dev == arg->dev && 1983 !dst_metric_locked(&rt->u.dst, RTAX_MTU) && 1984 (dst_mtu(&rt->u.dst) > arg->mtu || 1985 (dst_mtu(&rt->u.dst) < arg->mtu && 1986 dst_mtu(&rt->u.dst) == idev->cnf.mtu6))) { 1987 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu; 1988 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu); 1989 } 1990 return 0; 1991} 1992 1993void rt6_mtu_change(struct net_device *dev, unsigned mtu) 1994{ 1995 struct rt6_mtu_change_arg arg = { 1996 .dev = dev, 1997 .mtu = mtu, 1998 }; 1999 2000 fib6_clean_all(rt6_mtu_change_route, 0, &arg); 2001} 2002 2003static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = { 2004 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) }, 2005 [RTA_OIF] = { .type = NLA_U32 }, 2006 [RTA_IIF] = { .type = NLA_U32 }, 2007 [RTA_PRIORITY] = { .type = NLA_U32 }, 2008 [RTA_METRICS] = { .type = NLA_NESTED }, 2009}; 2010 2011static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, 2012 struct fib6_config *cfg) 2013{ 2014 struct rtmsg *rtm; 2015 struct nlattr *tb[RTA_MAX+1]; 2016 int err; 2017 2018 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy); 2019 if (err < 0) 2020 goto errout; 2021 2022 err = -EINVAL; 2023 rtm = nlmsg_data(nlh); 2024 memset(cfg, 0, sizeof(*cfg)); 2025 2026 cfg->fc_table = rtm->rtm_table; 2027 cfg->fc_dst_len = rtm->rtm_dst_len; 2028 cfg->fc_src_len = rtm->rtm_src_len; 2029 cfg->fc_flags = RTF_UP; 2030 cfg->fc_protocol = rtm->rtm_protocol; 2031 2032 if (rtm->rtm_type == RTN_UNREACHABLE) 2033 cfg->fc_flags |= RTF_REJECT; 2034 2035 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid; 2036 cfg->fc_nlinfo.nlh = nlh; 2037 2038 if (tb[RTA_GATEWAY]) { 2039 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16); 2040 cfg->fc_flags |= RTF_GATEWAY; 2041 } 2042 2043 if (tb[RTA_DST]) { 2044 int plen = (rtm->rtm_dst_len + 7) >> 3; 2045 2046 if (nla_len(tb[RTA_DST]) < plen) 2047 goto errout; 2048 2049 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen); 2050 } 2051 2052 if (tb[RTA_SRC]) { 2053 int plen = (rtm->rtm_src_len + 7) >> 3; 2054 2055 if (nla_len(tb[RTA_SRC]) < plen) 2056 goto errout; 2057 2058 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen); 2059 } 2060 2061 if (tb[RTA_OIF]) 2062 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]); 2063 2064 if (tb[RTA_PRIORITY]) 2065 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]); 2066 2067 if (tb[RTA_METRICS]) { 2068 cfg->fc_mx = nla_data(tb[RTA_METRICS]); 2069 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]); 2070 } 2071 2072 if (tb[RTA_TABLE]) 2073 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]); 2074 2075 err = 0; 2076errout: 2077 return err; 2078} 2079 2080static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) 2081{ 2082 struct fib6_config cfg; 2083 int err; 2084 2085 err = rtm_to_fib6_config(skb, nlh, &cfg); 2086 if (err < 0) 2087 return err; 2088 2089 return ip6_route_del(&cfg); 2090} 2091 2092static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) 2093{ 2094 struct fib6_config cfg; 2095 int err; 2096 2097 err = rtm_to_fib6_config(skb, nlh, &cfg); 2098 if (err < 0) 2099 return err; 2100 2101 return ip6_route_add(&cfg); 2102} 2103 2104static inline size_t rt6_nlmsg_size(void) 2105{ 2106 return NLMSG_ALIGN(sizeof(struct rtmsg)) 2107 + nla_total_size(16) /* RTA_SRC */ 2108 + nla_total_size(16) /* RTA_DST */ 2109 + nla_total_size(16) /* RTA_GATEWAY */ 2110 + nla_total_size(16) /* RTA_PREFSRC */ 2111 + nla_total_size(4) /* RTA_TABLE */ 2112 + nla_total_size(4) /* RTA_IIF */ 2113 + nla_total_size(4) /* RTA_OIF */ 2114 + nla_total_size(4) /* RTA_PRIORITY */ 2115 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */ 2116 + nla_total_size(sizeof(struct rta_cacheinfo)); 2117} 2118 2119static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt, 2120 struct in6_addr *dst, struct in6_addr *src, 2121 int iif, int type, u32 pid, u32 seq, 2122 int prefix, unsigned int flags) 2123{ 2124 struct rtmsg *rtm; 2125 struct nlmsghdr *nlh; 2126 long expires; 2127 u32 table; 2128 2129 if (prefix) { /* user wants prefix routes only */ 2130 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) { 2131 /* success since this is not a prefix route */ 2132 return 1; 2133 } 2134 } 2135 2136 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags); 2137 if (nlh == NULL) 2138 return -EMSGSIZE; 2139 2140 rtm = nlmsg_data(nlh); 2141 rtm->rtm_family = AF_INET6; 2142 rtm->rtm_dst_len = rt->rt6i_dst.plen; 2143 rtm->rtm_src_len = rt->rt6i_src.plen; 2144 rtm->rtm_tos = 0; 2145 if (rt->rt6i_table) 2146 table = rt->rt6i_table->tb6_id; 2147 else 2148 table = RT6_TABLE_UNSPEC; 2149 rtm->rtm_table = table; 2150 NLA_PUT_U32(skb, RTA_TABLE, table); 2151 if (rt->rt6i_flags&RTF_REJECT) 2152 rtm->rtm_type = RTN_UNREACHABLE; 2153 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK)) 2154 rtm->rtm_type = RTN_LOCAL; 2155 else 2156 rtm->rtm_type = RTN_UNICAST; 2157 rtm->rtm_flags = 0; 2158 rtm->rtm_scope = RT_SCOPE_UNIVERSE; 2159 rtm->rtm_protocol = rt->rt6i_protocol; 2160 if (rt->rt6i_flags&RTF_DYNAMIC) 2161 rtm->rtm_protocol = RTPROT_REDIRECT; 2162 else if (rt->rt6i_flags & RTF_ADDRCONF) 2163 rtm->rtm_protocol = RTPROT_KERNEL; 2164 else if (rt->rt6i_flags&RTF_DEFAULT) 2165 rtm->rtm_protocol = RTPROT_RA; 2166 2167 if (rt->rt6i_flags&RTF_CACHE) 2168 rtm->rtm_flags |= RTM_F_CLONED; 2169 2170 if (dst) { 2171 NLA_PUT(skb, RTA_DST, 16, dst); 2172 rtm->rtm_dst_len = 128; 2173 } else if (rtm->rtm_dst_len) 2174 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr); 2175#ifdef CONFIG_IPV6_SUBTREES 2176 if (src) { 2177 NLA_PUT(skb, RTA_SRC, 16, src); 2178 rtm->rtm_src_len = 128; 2179 } else if (rtm->rtm_src_len) 2180 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr); 2181#endif 2182 if (iif) 2183 NLA_PUT_U32(skb, RTA_IIF, iif); 2184 else if (dst) { 2185 struct in6_addr saddr_buf; 2186 if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0) 2187 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf); 2188 } 2189 2190 if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0) 2191 goto nla_put_failure; 2192 2193 if (rt->u.dst.neighbour) 2194 NLA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key); 2195 2196 if (rt->u.dst.dev) 2197 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex); 2198 2199 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric); 2200 2201 expires = rt->rt6i_expires ? rt->rt6i_expires - jiffies : 0; 2202 if (rtnl_put_cacheinfo(skb, &rt->u.dst, 0, 0, 0, 2203 expires, rt->u.dst.error) < 0) 2204 goto nla_put_failure; 2205 2206 return nlmsg_end(skb, nlh); 2207 2208nla_put_failure: 2209 nlmsg_cancel(skb, nlh); 2210 return -EMSGSIZE; 2211} 2212 2213int rt6_dump_route(struct rt6_info *rt, void *p_arg) 2214{ 2215 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg; 2216 int prefix; 2217 2218 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) { 2219 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh); 2220 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0; 2221 } else 2222 prefix = 0; 2223 2224 return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE, 2225 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq, 2226 prefix, NLM_F_MULTI); 2227} 2228 2229static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg) 2230{ 2231 struct nlattr *tb[RTA_MAX+1]; 2232 struct rt6_info *rt; 2233 struct sk_buff *skb; 2234 struct rtmsg *rtm; 2235 struct flowi fl; 2236 int err, iif = 0; 2237 2238 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy); 2239 if (err < 0) 2240 goto errout; 2241 2242 err = -EINVAL; 2243 memset(&fl, 0, sizeof(fl)); 2244 2245 if (tb[RTA_SRC]) { 2246 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr)) 2247 goto errout; 2248 2249 ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC])); 2250 } 2251 2252 if (tb[RTA_DST]) { 2253 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr)) 2254 goto errout; 2255 2256 ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST])); 2257 } 2258 2259 if (tb[RTA_IIF]) 2260 iif = nla_get_u32(tb[RTA_IIF]); 2261 2262 if (tb[RTA_OIF]) 2263 fl.oif = nla_get_u32(tb[RTA_OIF]); 2264 2265 if (iif) { 2266 struct net_device *dev; 2267 dev = __dev_get_by_index(iif); 2268 if (!dev) { 2269 err = -ENODEV; 2270 goto errout; 2271 } 2272 } 2273 2274 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); 2275 if (skb == NULL) { 2276 err = -ENOBUFS; 2277 goto errout; 2278 } 2279 2280 /* Reserve room for dummy headers, this skb can pass 2281 through good chunk of routing engine. 2282 */ 2283 skb_reset_mac_header(skb); 2284 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr)); 2285 2286 rt = (struct rt6_info*) ip6_route_output(NULL, &fl); 2287 skb->dst = &rt->u.dst; 2288 2289 err = rt6_fill_node(skb, rt, &fl.fl6_dst, &fl.fl6_src, iif, 2290 RTM_NEWROUTE, NETLINK_CB(in_skb).pid, 2291 nlh->nlmsg_seq, 0, 0); 2292 if (err < 0) { 2293 kfree_skb(skb); 2294 goto errout; 2295 } 2296 2297 err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid); 2298errout: 2299 return err; 2300} 2301 2302void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info) 2303{ 2304 struct sk_buff *skb; 2305 u32 pid = 0, seq = 0; 2306 struct nlmsghdr *nlh = NULL; 2307 int err = -ENOBUFS; 2308 2309 if (info) { 2310 pid = info->pid; 2311 nlh = info->nlh; 2312 if (nlh) 2313 seq = nlh->nlmsg_seq; 2314 } 2315 2316 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any()); 2317 if (skb == NULL) 2318 goto errout; 2319 2320 err = rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0); 2321 if (err < 0) { 2322 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */ 2323 WARN_ON(err == -EMSGSIZE); 2324 kfree_skb(skb); 2325 goto errout; 2326 } 2327 err = rtnl_notify(skb, pid, RTNLGRP_IPV6_ROUTE, nlh, gfp_any()); 2328errout: 2329 if (err < 0) 2330 rtnl_set_sk_err(RTNLGRP_IPV6_ROUTE, err); 2331} 2332 2333/* 2334 * /proc 2335 */ 2336 2337#ifdef CONFIG_PROC_FS 2338 2339#define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1) 2340 2341struct rt6_proc_arg 2342{ 2343 char *buffer; 2344 int offset; 2345 int length; 2346 int skip; 2347 int len; 2348}; 2349 2350static int rt6_info_route(struct rt6_info *rt, void *p_arg) 2351{ 2352 struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg; 2353 2354 if (arg->skip < arg->offset / RT6_INFO_LEN) { 2355 arg->skip++; 2356 return 0; 2357 } 2358 2359 if (arg->len >= arg->length) 2360 return 0; 2361 2362 arg->len += sprintf(arg->buffer + arg->len, 2363 NIP6_SEQFMT " %02x ", 2364 NIP6(rt->rt6i_dst.addr), 2365 rt->rt6i_dst.plen); 2366 2367#ifdef CONFIG_IPV6_SUBTREES 2368 arg->len += sprintf(arg->buffer + arg->len, 2369 NIP6_SEQFMT " %02x ", 2370 NIP6(rt->rt6i_src.addr), 2371 rt->rt6i_src.plen); 2372#else 2373 arg->len += sprintf(arg->buffer + arg->len, 2374 "00000000000000000000000000000000 00 "); 2375#endif 2376 2377 if (rt->rt6i_nexthop) { 2378 arg->len += sprintf(arg->buffer + arg->len, 2379 NIP6_SEQFMT, 2380 NIP6(*((struct in6_addr *)rt->rt6i_nexthop->primary_key))); 2381 } else { 2382 arg->len += sprintf(arg->buffer + arg->len, 2383 "00000000000000000000000000000000"); 2384 } 2385 arg->len += sprintf(arg->buffer + arg->len, 2386 " %08x %08x %08x %08x %8s\n", 2387 rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt), 2388 rt->u.dst.__use, rt->rt6i_flags, 2389 rt->rt6i_dev ? rt->rt6i_dev->name : ""); 2390 return 0; 2391} 2392 2393static int rt6_proc_info(char *buffer, char **start, off_t offset, int length) 2394{ 2395 struct rt6_proc_arg arg = { 2396 .buffer = buffer, 2397 .offset = offset, 2398 .length = length, 2399 }; 2400 2401 fib6_clean_all(rt6_info_route, 0, &arg); 2402 2403 *start = buffer; 2404 if (offset) 2405 *start += offset % RT6_INFO_LEN; 2406 2407 arg.len -= offset % RT6_INFO_LEN; 2408 2409 if (arg.len > length) 2410 arg.len = length; 2411 if (arg.len < 0) 2412 arg.len = 0; 2413 2414 return arg.len; 2415} 2416 2417static int rt6_stats_seq_show(struct seq_file *seq, void *v) 2418{ 2419 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n", 2420 rt6_stats.fib_nodes, rt6_stats.fib_route_nodes, 2421 rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries, 2422 rt6_stats.fib_rt_cache, 2423 atomic_read(&ip6_dst_ops.entries), 2424 rt6_stats.fib_discarded_routes); 2425 2426 return 0; 2427} 2428 2429static int rt6_stats_seq_open(struct inode *inode, struct file *file) 2430{ 2431 return single_open(file, rt6_stats_seq_show, NULL); 2432} 2433 2434static const struct file_operations rt6_stats_seq_fops = { 2435 .owner = THIS_MODULE, 2436 .open = rt6_stats_seq_open, 2437 .read = seq_read, 2438 .llseek = seq_lseek, 2439 .release = single_release, 2440}; 2441#endif /* CONFIG_PROC_FS */ 2442 2443#ifdef CONFIG_SYSCTL 2444 2445static int flush_delay; 2446 2447static 2448int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp, 2449 void __user *buffer, size_t *lenp, loff_t *ppos) 2450{ 2451 if (write) { 2452 proc_dointvec(ctl, write, filp, buffer, lenp, ppos); 2453 fib6_run_gc(flush_delay <= 0 ? ~0UL : (unsigned long)flush_delay); 2454 return 0; 2455 } else 2456 return -EINVAL; 2457} 2458 2459ctl_table ipv6_route_table[] = { 2460 { 2461 .ctl_name = NET_IPV6_ROUTE_FLUSH, 2462 .procname = "flush", 2463 .data = &flush_delay, 2464 .maxlen = sizeof(int), 2465 .mode = 0200, 2466 .proc_handler = &ipv6_sysctl_rtcache_flush 2467 }, 2468 { 2469 .ctl_name = NET_IPV6_ROUTE_GC_THRESH, 2470 .procname = "gc_thresh", 2471 .data = &ip6_dst_ops.gc_thresh, 2472 .maxlen = sizeof(int), 2473 .mode = 0644, 2474 .proc_handler = &proc_dointvec, 2475 }, 2476 { 2477 .ctl_name = NET_IPV6_ROUTE_MAX_SIZE, 2478 .procname = "max_size", 2479 .data = &ip6_rt_max_size, 2480 .maxlen = sizeof(int), 2481 .mode = 0644, 2482 .proc_handler = &proc_dointvec, 2483 }, 2484 { 2485 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL, 2486 .procname = "gc_min_interval", 2487 .data = &ip6_rt_gc_min_interval, 2488 .maxlen = sizeof(int), 2489 .mode = 0644, 2490 .proc_handler = &proc_dointvec_jiffies, 2491 .strategy = &sysctl_jiffies, 2492 }, 2493 { 2494 .ctl_name = NET_IPV6_ROUTE_GC_TIMEOUT, 2495 .procname = "gc_timeout", 2496 .data = &ip6_rt_gc_timeout, 2497 .maxlen = sizeof(int), 2498 .mode = 0644, 2499 .proc_handler = &proc_dointvec_jiffies, 2500 .strategy = &sysctl_jiffies, 2501 }, 2502 { 2503 .ctl_name = NET_IPV6_ROUTE_GC_INTERVAL, 2504 .procname = "gc_interval", 2505 .data = &ip6_rt_gc_interval, 2506 .maxlen = sizeof(int), 2507 .mode = 0644, 2508 .proc_handler = &proc_dointvec_jiffies, 2509 .strategy = &sysctl_jiffies, 2510 }, 2511 { 2512 .ctl_name = NET_IPV6_ROUTE_GC_ELASTICITY, 2513 .procname = "gc_elasticity", 2514 .data = &ip6_rt_gc_elasticity, 2515 .maxlen = sizeof(int), 2516 .mode = 0644, 2517 .proc_handler = &proc_dointvec_jiffies, 2518 .strategy = &sysctl_jiffies, 2519 }, 2520 { 2521 .ctl_name = NET_IPV6_ROUTE_MTU_EXPIRES, 2522 .procname = "mtu_expires", 2523 .data = &ip6_rt_mtu_expires, 2524 .maxlen = sizeof(int), 2525 .mode = 0644, 2526 .proc_handler = &proc_dointvec_jiffies, 2527 .strategy = &sysctl_jiffies, 2528 }, 2529 { 2530 .ctl_name = NET_IPV6_ROUTE_MIN_ADVMSS, 2531 .procname = "min_adv_mss", 2532 .data = &ip6_rt_min_advmss, 2533 .maxlen = sizeof(int), 2534 .mode = 0644, 2535 .proc_handler = &proc_dointvec_jiffies, 2536 .strategy = &sysctl_jiffies, 2537 }, 2538 { 2539 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS, 2540 .procname = "gc_min_interval_ms", 2541 .data = &ip6_rt_gc_min_interval, 2542 .maxlen = sizeof(int), 2543 .mode = 0644, 2544 .proc_handler = &proc_dointvec_ms_jiffies, 2545 .strategy = &sysctl_ms_jiffies, 2546 }, 2547 { .ctl_name = 0 } 2548}; 2549 2550#endif 2551 2552void __init ip6_route_init(void) 2553{ 2554#ifdef CONFIG_PROC_FS 2555 struct proc_dir_entry *p; 2556#endif 2557 ip6_dst_ops.kmem_cachep = 2558 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0, 2559 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); 2560 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops.kmem_cachep; 2561 2562 fib6_init(); 2563#ifdef CONFIG_PROC_FS 2564 p = proc_net_create("ipv6_route", 0, rt6_proc_info); 2565 if (p) 2566 p->owner = THIS_MODULE; 2567 2568 proc_net_fops_create("rt6_stats", S_IRUGO, &rt6_stats_seq_fops); 2569#endif 2570#ifdef CONFIG_XFRM 2571 xfrm6_init(); 2572#endif 2573#ifdef CONFIG_IPV6_MULTIPLE_TABLES 2574 fib6_rules_init(); 2575#endif 2576 2577 __rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL); 2578 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL); 2579 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL); 2580} 2581 2582void ip6_route_cleanup(void) 2583{ 2584#ifdef CONFIG_IPV6_MULTIPLE_TABLES 2585 fib6_rules_cleanup(); 2586#endif 2587#ifdef CONFIG_PROC_FS 2588 proc_net_remove("ipv6_route"); 2589 proc_net_remove("rt6_stats"); 2590#endif 2591#ifdef CONFIG_XFRM 2592 xfrm6_fini(); 2593#endif 2594 rt6_ifdown(NULL); 2595 fib6_gc_cleanup(); 2596 kmem_cache_destroy(ip6_dst_ops.kmem_cachep); 2597}