Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at v2.6.19-rc2 1199 lines 28 kB view raw
1/* 2 * INET An implementation of the TCP/IP protocol suite for the LINUX 3 * operating system. INET is implemented using the BSD Socket 4 * interface as the means of communication with the user level. 5 * 6 * IPv4 Forwarding Information Base: semantics. 7 * 8 * Version: $Id: fib_semantics.c,v 1.19 2002/01/12 07:54:56 davem Exp $ 9 * 10 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> 11 * 12 * This program is free software; you can redistribute it and/or 13 * modify it under the terms of the GNU General Public License 14 * as published by the Free Software Foundation; either version 15 * 2 of the License, or (at your option) any later version. 16 */ 17 18#include <asm/uaccess.h> 19#include <asm/system.h> 20#include <linux/bitops.h> 21#include <linux/types.h> 22#include <linux/kernel.h> 23#include <linux/jiffies.h> 24#include <linux/mm.h> 25#include <linux/string.h> 26#include <linux/socket.h> 27#include <linux/sockios.h> 28#include <linux/errno.h> 29#include <linux/in.h> 30#include <linux/inet.h> 31#include <linux/inetdevice.h> 32#include <linux/netdevice.h> 33#include <linux/if_arp.h> 34#include <linux/proc_fs.h> 35#include <linux/skbuff.h> 36#include <linux/init.h> 37 38#include <net/arp.h> 39#include <net/ip.h> 40#include <net/protocol.h> 41#include <net/route.h> 42#include <net/tcp.h> 43#include <net/sock.h> 44#include <net/ip_fib.h> 45#include <net/ip_mp_alg.h> 46#include <net/netlink.h> 47#include <net/nexthop.h> 48 49#include "fib_lookup.h" 50 51#define FSprintk(a...) 52 53static DEFINE_SPINLOCK(fib_info_lock); 54static struct hlist_head *fib_info_hash; 55static struct hlist_head *fib_info_laddrhash; 56static unsigned int fib_hash_size; 57static unsigned int fib_info_cnt; 58 59#define DEVINDEX_HASHBITS 8 60#define DEVINDEX_HASHSIZE (1U << DEVINDEX_HASHBITS) 61static struct hlist_head fib_info_devhash[DEVINDEX_HASHSIZE]; 62 63#ifdef CONFIG_IP_ROUTE_MULTIPATH 64 65static DEFINE_SPINLOCK(fib_multipath_lock); 66 67#define for_nexthops(fi) { int nhsel; const struct fib_nh * nh; \ 68for (nhsel=0, nh = (fi)->fib_nh; nhsel < (fi)->fib_nhs; nh++, nhsel++) 69 70#define change_nexthops(fi) { int nhsel; struct fib_nh * nh; \ 71for (nhsel=0, nh = (struct fib_nh*)((fi)->fib_nh); nhsel < (fi)->fib_nhs; nh++, nhsel++) 72 73#else /* CONFIG_IP_ROUTE_MULTIPATH */ 74 75/* Hope, that gcc will optimize it to get rid of dummy loop */ 76 77#define for_nexthops(fi) { int nhsel=0; const struct fib_nh * nh = (fi)->fib_nh; \ 78for (nhsel=0; nhsel < 1; nhsel++) 79 80#define change_nexthops(fi) { int nhsel=0; struct fib_nh * nh = (struct fib_nh*)((fi)->fib_nh); \ 81for (nhsel=0; nhsel < 1; nhsel++) 82 83#endif /* CONFIG_IP_ROUTE_MULTIPATH */ 84 85#define endfor_nexthops(fi) } 86 87 88static const struct 89{ 90 int error; 91 u8 scope; 92} fib_props[RTA_MAX + 1] = { 93 { 94 .error = 0, 95 .scope = RT_SCOPE_NOWHERE, 96 }, /* RTN_UNSPEC */ 97 { 98 .error = 0, 99 .scope = RT_SCOPE_UNIVERSE, 100 }, /* RTN_UNICAST */ 101 { 102 .error = 0, 103 .scope = RT_SCOPE_HOST, 104 }, /* RTN_LOCAL */ 105 { 106 .error = 0, 107 .scope = RT_SCOPE_LINK, 108 }, /* RTN_BROADCAST */ 109 { 110 .error = 0, 111 .scope = RT_SCOPE_LINK, 112 }, /* RTN_ANYCAST */ 113 { 114 .error = 0, 115 .scope = RT_SCOPE_UNIVERSE, 116 }, /* RTN_MULTICAST */ 117 { 118 .error = -EINVAL, 119 .scope = RT_SCOPE_UNIVERSE, 120 }, /* RTN_BLACKHOLE */ 121 { 122 .error = -EHOSTUNREACH, 123 .scope = RT_SCOPE_UNIVERSE, 124 }, /* RTN_UNREACHABLE */ 125 { 126 .error = -EACCES, 127 .scope = RT_SCOPE_UNIVERSE, 128 }, /* RTN_PROHIBIT */ 129 { 130 .error = -EAGAIN, 131 .scope = RT_SCOPE_UNIVERSE, 132 }, /* RTN_THROW */ 133 { 134 .error = -EINVAL, 135 .scope = RT_SCOPE_NOWHERE, 136 }, /* RTN_NAT */ 137 { 138 .error = -EINVAL, 139 .scope = RT_SCOPE_NOWHERE, 140 }, /* RTN_XRESOLVE */ 141}; 142 143 144/* Release a nexthop info record */ 145 146void free_fib_info(struct fib_info *fi) 147{ 148 if (fi->fib_dead == 0) { 149 printk("Freeing alive fib_info %p\n", fi); 150 return; 151 } 152 change_nexthops(fi) { 153 if (nh->nh_dev) 154 dev_put(nh->nh_dev); 155 nh->nh_dev = NULL; 156 } endfor_nexthops(fi); 157 fib_info_cnt--; 158 kfree(fi); 159} 160 161void fib_release_info(struct fib_info *fi) 162{ 163 spin_lock_bh(&fib_info_lock); 164 if (fi && --fi->fib_treeref == 0) { 165 hlist_del(&fi->fib_hash); 166 if (fi->fib_prefsrc) 167 hlist_del(&fi->fib_lhash); 168 change_nexthops(fi) { 169 if (!nh->nh_dev) 170 continue; 171 hlist_del(&nh->nh_hash); 172 } endfor_nexthops(fi) 173 fi->fib_dead = 1; 174 fib_info_put(fi); 175 } 176 spin_unlock_bh(&fib_info_lock); 177} 178 179static __inline__ int nh_comp(const struct fib_info *fi, const struct fib_info *ofi) 180{ 181 const struct fib_nh *onh = ofi->fib_nh; 182 183 for_nexthops(fi) { 184 if (nh->nh_oif != onh->nh_oif || 185 nh->nh_gw != onh->nh_gw || 186 nh->nh_scope != onh->nh_scope || 187#ifdef CONFIG_IP_ROUTE_MULTIPATH 188 nh->nh_weight != onh->nh_weight || 189#endif 190#ifdef CONFIG_NET_CLS_ROUTE 191 nh->nh_tclassid != onh->nh_tclassid || 192#endif 193 ((nh->nh_flags^onh->nh_flags)&~RTNH_F_DEAD)) 194 return -1; 195 onh++; 196 } endfor_nexthops(fi); 197 return 0; 198} 199 200static inline unsigned int fib_info_hashfn(const struct fib_info *fi) 201{ 202 unsigned int mask = (fib_hash_size - 1); 203 unsigned int val = fi->fib_nhs; 204 205 val ^= fi->fib_protocol; 206 val ^= (__force u32)fi->fib_prefsrc; 207 val ^= fi->fib_priority; 208 209 return (val ^ (val >> 7) ^ (val >> 12)) & mask; 210} 211 212static struct fib_info *fib_find_info(const struct fib_info *nfi) 213{ 214 struct hlist_head *head; 215 struct hlist_node *node; 216 struct fib_info *fi; 217 unsigned int hash; 218 219 hash = fib_info_hashfn(nfi); 220 head = &fib_info_hash[hash]; 221 222 hlist_for_each_entry(fi, node, head, fib_hash) { 223 if (fi->fib_nhs != nfi->fib_nhs) 224 continue; 225 if (nfi->fib_protocol == fi->fib_protocol && 226 nfi->fib_prefsrc == fi->fib_prefsrc && 227 nfi->fib_priority == fi->fib_priority && 228 memcmp(nfi->fib_metrics, fi->fib_metrics, 229 sizeof(fi->fib_metrics)) == 0 && 230 ((nfi->fib_flags^fi->fib_flags)&~RTNH_F_DEAD) == 0 && 231 (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0)) 232 return fi; 233 } 234 235 return NULL; 236} 237 238static inline unsigned int fib_devindex_hashfn(unsigned int val) 239{ 240 unsigned int mask = DEVINDEX_HASHSIZE - 1; 241 242 return (val ^ 243 (val >> DEVINDEX_HASHBITS) ^ 244 (val >> (DEVINDEX_HASHBITS * 2))) & mask; 245} 246 247/* Check, that the gateway is already configured. 248 Used only by redirect accept routine. 249 */ 250 251int ip_fib_check_default(__be32 gw, struct net_device *dev) 252{ 253 struct hlist_head *head; 254 struct hlist_node *node; 255 struct fib_nh *nh; 256 unsigned int hash; 257 258 spin_lock(&fib_info_lock); 259 260 hash = fib_devindex_hashfn(dev->ifindex); 261 head = &fib_info_devhash[hash]; 262 hlist_for_each_entry(nh, node, head, nh_hash) { 263 if (nh->nh_dev == dev && 264 nh->nh_gw == gw && 265 !(nh->nh_flags&RTNH_F_DEAD)) { 266 spin_unlock(&fib_info_lock); 267 return 0; 268 } 269 } 270 271 spin_unlock(&fib_info_lock); 272 273 return -1; 274} 275 276void rtmsg_fib(int event, __be32 key, struct fib_alias *fa, 277 int dst_len, u32 tb_id, struct nl_info *info) 278{ 279 struct sk_buff *skb; 280 int payload = sizeof(struct rtmsg) + 256; 281 u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0; 282 int err = -ENOBUFS; 283 284 skb = nlmsg_new(nlmsg_total_size(payload), GFP_KERNEL); 285 if (skb == NULL) 286 goto errout; 287 288 err = fib_dump_info(skb, info->pid, seq, event, tb_id, 289 fa->fa_type, fa->fa_scope, key, dst_len, 290 fa->fa_tos, fa->fa_info, 0); 291 if (err < 0) { 292 kfree_skb(skb); 293 goto errout; 294 } 295 296 err = rtnl_notify(skb, info->pid, RTNLGRP_IPV4_ROUTE, 297 info->nlh, GFP_KERNEL); 298errout: 299 if (err < 0) 300 rtnl_set_sk_err(RTNLGRP_IPV4_ROUTE, err); 301} 302 303/* Return the first fib alias matching TOS with 304 * priority less than or equal to PRIO. 305 */ 306struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio) 307{ 308 if (fah) { 309 struct fib_alias *fa; 310 list_for_each_entry(fa, fah, fa_list) { 311 if (fa->fa_tos > tos) 312 continue; 313 if (fa->fa_info->fib_priority >= prio || 314 fa->fa_tos < tos) 315 return fa; 316 } 317 } 318 return NULL; 319} 320 321int fib_detect_death(struct fib_info *fi, int order, 322 struct fib_info **last_resort, int *last_idx, int *dflt) 323{ 324 struct neighbour *n; 325 int state = NUD_NONE; 326 327 n = neigh_lookup(&arp_tbl, &fi->fib_nh[0].nh_gw, fi->fib_dev); 328 if (n) { 329 state = n->nud_state; 330 neigh_release(n); 331 } 332 if (state==NUD_REACHABLE) 333 return 0; 334 if ((state&NUD_VALID) && order != *dflt) 335 return 0; 336 if ((state&NUD_VALID) || 337 (*last_idx<0 && order > *dflt)) { 338 *last_resort = fi; 339 *last_idx = order; 340 } 341 return 1; 342} 343 344#ifdef CONFIG_IP_ROUTE_MULTIPATH 345 346static int fib_count_nexthops(struct rtnexthop *rtnh, int remaining) 347{ 348 int nhs = 0; 349 350 while (rtnh_ok(rtnh, remaining)) { 351 nhs++; 352 rtnh = rtnh_next(rtnh, &remaining); 353 } 354 355 /* leftover implies invalid nexthop configuration, discard it */ 356 return remaining > 0 ? 0 : nhs; 357} 358 359static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh, 360 int remaining, struct fib_config *cfg) 361{ 362 change_nexthops(fi) { 363 int attrlen; 364 365 if (!rtnh_ok(rtnh, remaining)) 366 return -EINVAL; 367 368 nh->nh_flags = (cfg->fc_flags & ~0xFF) | rtnh->rtnh_flags; 369 nh->nh_oif = rtnh->rtnh_ifindex; 370 nh->nh_weight = rtnh->rtnh_hops + 1; 371 372 attrlen = rtnh_attrlen(rtnh); 373 if (attrlen > 0) { 374 struct nlattr *nla, *attrs = rtnh_attrs(rtnh); 375 376 nla = nla_find(attrs, attrlen, RTA_GATEWAY); 377 nh->nh_gw = nla ? nla_get_be32(nla) : 0; 378#ifdef CONFIG_NET_CLS_ROUTE 379 nla = nla_find(attrs, attrlen, RTA_FLOW); 380 nh->nh_tclassid = nla ? nla_get_u32(nla) : 0; 381#endif 382 } 383 384 rtnh = rtnh_next(rtnh, &remaining); 385 } endfor_nexthops(fi); 386 387 return 0; 388} 389 390#endif 391 392int fib_nh_match(struct fib_config *cfg, struct fib_info *fi) 393{ 394#ifdef CONFIG_IP_ROUTE_MULTIPATH 395 struct rtnexthop *rtnh; 396 int remaining; 397#endif 398 399 if (cfg->fc_priority && cfg->fc_priority != fi->fib_priority) 400 return 1; 401 402 if (cfg->fc_oif || cfg->fc_gw) { 403 if ((!cfg->fc_oif || cfg->fc_oif == fi->fib_nh->nh_oif) && 404 (!cfg->fc_gw || cfg->fc_gw == fi->fib_nh->nh_gw)) 405 return 0; 406 return 1; 407 } 408 409#ifdef CONFIG_IP_ROUTE_MULTIPATH 410 if (cfg->fc_mp == NULL) 411 return 0; 412 413 rtnh = cfg->fc_mp; 414 remaining = cfg->fc_mp_len; 415 416 for_nexthops(fi) { 417 int attrlen; 418 419 if (!rtnh_ok(rtnh, remaining)) 420 return -EINVAL; 421 422 if (rtnh->rtnh_ifindex && rtnh->rtnh_ifindex != nh->nh_oif) 423 return 1; 424 425 attrlen = rtnh_attrlen(rtnh); 426 if (attrlen < 0) { 427 struct nlattr *nla, *attrs = rtnh_attrs(rtnh); 428 429 nla = nla_find(attrs, attrlen, RTA_GATEWAY); 430 if (nla && nla_get_be32(nla) != nh->nh_gw) 431 return 1; 432#ifdef CONFIG_NET_CLS_ROUTE 433 nla = nla_find(attrs, attrlen, RTA_FLOW); 434 if (nla && nla_get_u32(nla) != nh->nh_tclassid) 435 return 1; 436#endif 437 } 438 439 rtnh = rtnh_next(rtnh, &remaining); 440 } endfor_nexthops(fi); 441#endif 442 return 0; 443} 444 445 446/* 447 Picture 448 ------- 449 450 Semantics of nexthop is very messy by historical reasons. 451 We have to take into account, that: 452 a) gateway can be actually local interface address, 453 so that gatewayed route is direct. 454 b) gateway must be on-link address, possibly 455 described not by an ifaddr, but also by a direct route. 456 c) If both gateway and interface are specified, they should not 457 contradict. 458 d) If we use tunnel routes, gateway could be not on-link. 459 460 Attempt to reconcile all of these (alas, self-contradictory) conditions 461 results in pretty ugly and hairy code with obscure logic. 462 463 I chose to generalized it instead, so that the size 464 of code does not increase practically, but it becomes 465 much more general. 466 Every prefix is assigned a "scope" value: "host" is local address, 467 "link" is direct route, 468 [ ... "site" ... "interior" ... ] 469 and "universe" is true gateway route with global meaning. 470 471 Every prefix refers to a set of "nexthop"s (gw, oif), 472 where gw must have narrower scope. This recursion stops 473 when gw has LOCAL scope or if "nexthop" is declared ONLINK, 474 which means that gw is forced to be on link. 475 476 Code is still hairy, but now it is apparently logically 477 consistent and very flexible. F.e. as by-product it allows 478 to co-exists in peace independent exterior and interior 479 routing processes. 480 481 Normally it looks as following. 482 483 {universe prefix} -> (gw, oif) [scope link] 484 | 485 |-> {link prefix} -> (gw, oif) [scope local] 486 | 487 |-> {local prefix} (terminal node) 488 */ 489 490static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi, 491 struct fib_nh *nh) 492{ 493 int err; 494 495 if (nh->nh_gw) { 496 struct fib_result res; 497 498#ifdef CONFIG_IP_ROUTE_PERVASIVE 499 if (nh->nh_flags&RTNH_F_PERVASIVE) 500 return 0; 501#endif 502 if (nh->nh_flags&RTNH_F_ONLINK) { 503 struct net_device *dev; 504 505 if (cfg->fc_scope >= RT_SCOPE_LINK) 506 return -EINVAL; 507 if (inet_addr_type(nh->nh_gw) != RTN_UNICAST) 508 return -EINVAL; 509 if ((dev = __dev_get_by_index(nh->nh_oif)) == NULL) 510 return -ENODEV; 511 if (!(dev->flags&IFF_UP)) 512 return -ENETDOWN; 513 nh->nh_dev = dev; 514 dev_hold(dev); 515 nh->nh_scope = RT_SCOPE_LINK; 516 return 0; 517 } 518 { 519 struct flowi fl = { 520 .nl_u = { 521 .ip4_u = { 522 .daddr = nh->nh_gw, 523 .scope = cfg->fc_scope + 1, 524 }, 525 }, 526 .oif = nh->nh_oif, 527 }; 528 529 /* It is not necessary, but requires a bit of thinking */ 530 if (fl.fl4_scope < RT_SCOPE_LINK) 531 fl.fl4_scope = RT_SCOPE_LINK; 532 if ((err = fib_lookup(&fl, &res)) != 0) 533 return err; 534 } 535 err = -EINVAL; 536 if (res.type != RTN_UNICAST && res.type != RTN_LOCAL) 537 goto out; 538 nh->nh_scope = res.scope; 539 nh->nh_oif = FIB_RES_OIF(res); 540 if ((nh->nh_dev = FIB_RES_DEV(res)) == NULL) 541 goto out; 542 dev_hold(nh->nh_dev); 543 err = -ENETDOWN; 544 if (!(nh->nh_dev->flags & IFF_UP)) 545 goto out; 546 err = 0; 547out: 548 fib_res_put(&res); 549 return err; 550 } else { 551 struct in_device *in_dev; 552 553 if (nh->nh_flags&(RTNH_F_PERVASIVE|RTNH_F_ONLINK)) 554 return -EINVAL; 555 556 in_dev = inetdev_by_index(nh->nh_oif); 557 if (in_dev == NULL) 558 return -ENODEV; 559 if (!(in_dev->dev->flags&IFF_UP)) { 560 in_dev_put(in_dev); 561 return -ENETDOWN; 562 } 563 nh->nh_dev = in_dev->dev; 564 dev_hold(nh->nh_dev); 565 nh->nh_scope = RT_SCOPE_HOST; 566 in_dev_put(in_dev); 567 } 568 return 0; 569} 570 571static inline unsigned int fib_laddr_hashfn(__be32 val) 572{ 573 unsigned int mask = (fib_hash_size - 1); 574 575 return ((__force u32)val ^ ((__force u32)val >> 7) ^ ((__force u32)val >> 14)) & mask; 576} 577 578static struct hlist_head *fib_hash_alloc(int bytes) 579{ 580 if (bytes <= PAGE_SIZE) 581 return kmalloc(bytes, GFP_KERNEL); 582 else 583 return (struct hlist_head *) 584 __get_free_pages(GFP_KERNEL, get_order(bytes)); 585} 586 587static void fib_hash_free(struct hlist_head *hash, int bytes) 588{ 589 if (!hash) 590 return; 591 592 if (bytes <= PAGE_SIZE) 593 kfree(hash); 594 else 595 free_pages((unsigned long) hash, get_order(bytes)); 596} 597 598static void fib_hash_move(struct hlist_head *new_info_hash, 599 struct hlist_head *new_laddrhash, 600 unsigned int new_size) 601{ 602 struct hlist_head *old_info_hash, *old_laddrhash; 603 unsigned int old_size = fib_hash_size; 604 unsigned int i, bytes; 605 606 spin_lock_bh(&fib_info_lock); 607 old_info_hash = fib_info_hash; 608 old_laddrhash = fib_info_laddrhash; 609 fib_hash_size = new_size; 610 611 for (i = 0; i < old_size; i++) { 612 struct hlist_head *head = &fib_info_hash[i]; 613 struct hlist_node *node, *n; 614 struct fib_info *fi; 615 616 hlist_for_each_entry_safe(fi, node, n, head, fib_hash) { 617 struct hlist_head *dest; 618 unsigned int new_hash; 619 620 hlist_del(&fi->fib_hash); 621 622 new_hash = fib_info_hashfn(fi); 623 dest = &new_info_hash[new_hash]; 624 hlist_add_head(&fi->fib_hash, dest); 625 } 626 } 627 fib_info_hash = new_info_hash; 628 629 for (i = 0; i < old_size; i++) { 630 struct hlist_head *lhead = &fib_info_laddrhash[i]; 631 struct hlist_node *node, *n; 632 struct fib_info *fi; 633 634 hlist_for_each_entry_safe(fi, node, n, lhead, fib_lhash) { 635 struct hlist_head *ldest; 636 unsigned int new_hash; 637 638 hlist_del(&fi->fib_lhash); 639 640 new_hash = fib_laddr_hashfn(fi->fib_prefsrc); 641 ldest = &new_laddrhash[new_hash]; 642 hlist_add_head(&fi->fib_lhash, ldest); 643 } 644 } 645 fib_info_laddrhash = new_laddrhash; 646 647 spin_unlock_bh(&fib_info_lock); 648 649 bytes = old_size * sizeof(struct hlist_head *); 650 fib_hash_free(old_info_hash, bytes); 651 fib_hash_free(old_laddrhash, bytes); 652} 653 654struct fib_info *fib_create_info(struct fib_config *cfg) 655{ 656 int err; 657 struct fib_info *fi = NULL; 658 struct fib_info *ofi; 659 int nhs = 1; 660 661 /* Fast check to catch the most weird cases */ 662 if (fib_props[cfg->fc_type].scope > cfg->fc_scope) 663 goto err_inval; 664 665#ifdef CONFIG_IP_ROUTE_MULTIPATH 666 if (cfg->fc_mp) { 667 nhs = fib_count_nexthops(cfg->fc_mp, cfg->fc_mp_len); 668 if (nhs == 0) 669 goto err_inval; 670 } 671#endif 672#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED 673 if (cfg->fc_mp_alg) { 674 if (cfg->fc_mp_alg < IP_MP_ALG_NONE || 675 cfg->fc_mp_alg > IP_MP_ALG_MAX) 676 goto err_inval; 677 } 678#endif 679 680 err = -ENOBUFS; 681 if (fib_info_cnt >= fib_hash_size) { 682 unsigned int new_size = fib_hash_size << 1; 683 struct hlist_head *new_info_hash; 684 struct hlist_head *new_laddrhash; 685 unsigned int bytes; 686 687 if (!new_size) 688 new_size = 1; 689 bytes = new_size * sizeof(struct hlist_head *); 690 new_info_hash = fib_hash_alloc(bytes); 691 new_laddrhash = fib_hash_alloc(bytes); 692 if (!new_info_hash || !new_laddrhash) { 693 fib_hash_free(new_info_hash, bytes); 694 fib_hash_free(new_laddrhash, bytes); 695 } else { 696 memset(new_info_hash, 0, bytes); 697 memset(new_laddrhash, 0, bytes); 698 699 fib_hash_move(new_info_hash, new_laddrhash, new_size); 700 } 701 702 if (!fib_hash_size) 703 goto failure; 704 } 705 706 fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL); 707 if (fi == NULL) 708 goto failure; 709 fib_info_cnt++; 710 711 fi->fib_protocol = cfg->fc_protocol; 712 fi->fib_flags = cfg->fc_flags; 713 fi->fib_priority = cfg->fc_priority; 714 fi->fib_prefsrc = cfg->fc_prefsrc; 715 716 fi->fib_nhs = nhs; 717 change_nexthops(fi) { 718 nh->nh_parent = fi; 719 } endfor_nexthops(fi) 720 721 if (cfg->fc_mx) { 722 struct nlattr *nla; 723 int remaining; 724 725 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) { 726 int type = nla->nla_type; 727 728 if (type) { 729 if (type > RTAX_MAX) 730 goto err_inval; 731 fi->fib_metrics[type - 1] = nla_get_u32(nla); 732 } 733 } 734 } 735 736 if (cfg->fc_mp) { 737#ifdef CONFIG_IP_ROUTE_MULTIPATH 738 err = fib_get_nhs(fi, cfg->fc_mp, cfg->fc_mp_len, cfg); 739 if (err != 0) 740 goto failure; 741 if (cfg->fc_oif && fi->fib_nh->nh_oif != cfg->fc_oif) 742 goto err_inval; 743 if (cfg->fc_gw && fi->fib_nh->nh_gw != cfg->fc_gw) 744 goto err_inval; 745#ifdef CONFIG_NET_CLS_ROUTE 746 if (cfg->fc_flow && fi->fib_nh->nh_tclassid != cfg->fc_flow) 747 goto err_inval; 748#endif 749#else 750 goto err_inval; 751#endif 752 } else { 753 struct fib_nh *nh = fi->fib_nh; 754 755 nh->nh_oif = cfg->fc_oif; 756 nh->nh_gw = cfg->fc_gw; 757 nh->nh_flags = cfg->fc_flags; 758#ifdef CONFIG_NET_CLS_ROUTE 759 nh->nh_tclassid = cfg->fc_flow; 760#endif 761#ifdef CONFIG_IP_ROUTE_MULTIPATH 762 nh->nh_weight = 1; 763#endif 764 } 765 766#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED 767 fi->fib_mp_alg = cfg->fc_mp_alg; 768#endif 769 770 if (fib_props[cfg->fc_type].error) { 771 if (cfg->fc_gw || cfg->fc_oif || cfg->fc_mp) 772 goto err_inval; 773 goto link_it; 774 } 775 776 if (cfg->fc_scope > RT_SCOPE_HOST) 777 goto err_inval; 778 779 if (cfg->fc_scope == RT_SCOPE_HOST) { 780 struct fib_nh *nh = fi->fib_nh; 781 782 /* Local address is added. */ 783 if (nhs != 1 || nh->nh_gw) 784 goto err_inval; 785 nh->nh_scope = RT_SCOPE_NOWHERE; 786 nh->nh_dev = dev_get_by_index(fi->fib_nh->nh_oif); 787 err = -ENODEV; 788 if (nh->nh_dev == NULL) 789 goto failure; 790 } else { 791 change_nexthops(fi) { 792 if ((err = fib_check_nh(cfg, fi, nh)) != 0) 793 goto failure; 794 } endfor_nexthops(fi) 795 } 796 797 if (fi->fib_prefsrc) { 798 if (cfg->fc_type != RTN_LOCAL || !cfg->fc_dst || 799 fi->fib_prefsrc != cfg->fc_dst) 800 if (inet_addr_type(fi->fib_prefsrc) != RTN_LOCAL) 801 goto err_inval; 802 } 803 804link_it: 805 if ((ofi = fib_find_info(fi)) != NULL) { 806 fi->fib_dead = 1; 807 free_fib_info(fi); 808 ofi->fib_treeref++; 809 return ofi; 810 } 811 812 fi->fib_treeref++; 813 atomic_inc(&fi->fib_clntref); 814 spin_lock_bh(&fib_info_lock); 815 hlist_add_head(&fi->fib_hash, 816 &fib_info_hash[fib_info_hashfn(fi)]); 817 if (fi->fib_prefsrc) { 818 struct hlist_head *head; 819 820 head = &fib_info_laddrhash[fib_laddr_hashfn(fi->fib_prefsrc)]; 821 hlist_add_head(&fi->fib_lhash, head); 822 } 823 change_nexthops(fi) { 824 struct hlist_head *head; 825 unsigned int hash; 826 827 if (!nh->nh_dev) 828 continue; 829 hash = fib_devindex_hashfn(nh->nh_dev->ifindex); 830 head = &fib_info_devhash[hash]; 831 hlist_add_head(&nh->nh_hash, head); 832 } endfor_nexthops(fi) 833 spin_unlock_bh(&fib_info_lock); 834 return fi; 835 836err_inval: 837 err = -EINVAL; 838 839failure: 840 if (fi) { 841 fi->fib_dead = 1; 842 free_fib_info(fi); 843 } 844 845 return ERR_PTR(err); 846} 847 848/* Note! fib_semantic_match intentionally uses RCU list functions. */ 849int fib_semantic_match(struct list_head *head, const struct flowi *flp, 850 struct fib_result *res, __be32 zone, __be32 mask, 851 int prefixlen) 852{ 853 struct fib_alias *fa; 854 int nh_sel = 0; 855 856 list_for_each_entry_rcu(fa, head, fa_list) { 857 int err; 858 859 if (fa->fa_tos && 860 fa->fa_tos != flp->fl4_tos) 861 continue; 862 863 if (fa->fa_scope < flp->fl4_scope) 864 continue; 865 866 fa->fa_state |= FA_S_ACCESSED; 867 868 err = fib_props[fa->fa_type].error; 869 if (err == 0) { 870 struct fib_info *fi = fa->fa_info; 871 872 if (fi->fib_flags & RTNH_F_DEAD) 873 continue; 874 875 switch (fa->fa_type) { 876 case RTN_UNICAST: 877 case RTN_LOCAL: 878 case RTN_BROADCAST: 879 case RTN_ANYCAST: 880 case RTN_MULTICAST: 881 for_nexthops(fi) { 882 if (nh->nh_flags&RTNH_F_DEAD) 883 continue; 884 if (!flp->oif || flp->oif == nh->nh_oif) 885 break; 886 } 887#ifdef CONFIG_IP_ROUTE_MULTIPATH 888 if (nhsel < fi->fib_nhs) { 889 nh_sel = nhsel; 890 goto out_fill_res; 891 } 892#else 893 if (nhsel < 1) { 894 goto out_fill_res; 895 } 896#endif 897 endfor_nexthops(fi); 898 continue; 899 900 default: 901 printk(KERN_DEBUG "impossible 102\n"); 902 return -EINVAL; 903 }; 904 } 905 return err; 906 } 907 return 1; 908 909out_fill_res: 910 res->prefixlen = prefixlen; 911 res->nh_sel = nh_sel; 912 res->type = fa->fa_type; 913 res->scope = fa->fa_scope; 914 res->fi = fa->fa_info; 915#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED 916 res->netmask = mask; 917 res->network = zone & inet_make_mask(prefixlen); 918#endif 919 atomic_inc(&res->fi->fib_clntref); 920 return 0; 921} 922 923/* Find appropriate source address to this destination */ 924 925__be32 __fib_res_prefsrc(struct fib_result *res) 926{ 927 return inet_select_addr(FIB_RES_DEV(*res), FIB_RES_GW(*res), res->scope); 928} 929 930int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, 931 u32 tb_id, u8 type, u8 scope, __be32 dst, int dst_len, u8 tos, 932 struct fib_info *fi, unsigned int flags) 933{ 934 struct nlmsghdr *nlh; 935 struct rtmsg *rtm; 936 937 nlh = nlmsg_put(skb, pid, seq, event, sizeof(*rtm), flags); 938 if (nlh == NULL) 939 return -ENOBUFS; 940 941 rtm = nlmsg_data(nlh); 942 rtm->rtm_family = AF_INET; 943 rtm->rtm_dst_len = dst_len; 944 rtm->rtm_src_len = 0; 945 rtm->rtm_tos = tos; 946 rtm->rtm_table = tb_id; 947 NLA_PUT_U32(skb, RTA_TABLE, tb_id); 948 rtm->rtm_type = type; 949 rtm->rtm_flags = fi->fib_flags; 950 rtm->rtm_scope = scope; 951 rtm->rtm_protocol = fi->fib_protocol; 952 953 if (rtm->rtm_dst_len) 954 NLA_PUT_BE32(skb, RTA_DST, dst); 955 956 if (fi->fib_priority) 957 NLA_PUT_U32(skb, RTA_PRIORITY, fi->fib_priority); 958 959 if (rtnetlink_put_metrics(skb, fi->fib_metrics) < 0) 960 goto nla_put_failure; 961 962 if (fi->fib_prefsrc) 963 NLA_PUT_BE32(skb, RTA_PREFSRC, fi->fib_prefsrc); 964 965 if (fi->fib_nhs == 1) { 966 if (fi->fib_nh->nh_gw) 967 NLA_PUT_BE32(skb, RTA_GATEWAY, fi->fib_nh->nh_gw); 968 969 if (fi->fib_nh->nh_oif) 970 NLA_PUT_U32(skb, RTA_OIF, fi->fib_nh->nh_oif); 971#ifdef CONFIG_NET_CLS_ROUTE 972 if (fi->fib_nh[0].nh_tclassid) 973 NLA_PUT_U32(skb, RTA_FLOW, fi->fib_nh[0].nh_tclassid); 974#endif 975 } 976#ifdef CONFIG_IP_ROUTE_MULTIPATH 977 if (fi->fib_nhs > 1) { 978 struct rtnexthop *rtnh; 979 struct nlattr *mp; 980 981 mp = nla_nest_start(skb, RTA_MULTIPATH); 982 if (mp == NULL) 983 goto nla_put_failure; 984 985 for_nexthops(fi) { 986 rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh)); 987 if (rtnh == NULL) 988 goto nla_put_failure; 989 990 rtnh->rtnh_flags = nh->nh_flags & 0xFF; 991 rtnh->rtnh_hops = nh->nh_weight - 1; 992 rtnh->rtnh_ifindex = nh->nh_oif; 993 994 if (nh->nh_gw) 995 NLA_PUT_BE32(skb, RTA_GATEWAY, nh->nh_gw); 996#ifdef CONFIG_NET_CLS_ROUTE 997 if (nh->nh_tclassid) 998 NLA_PUT_U32(skb, RTA_FLOW, nh->nh_tclassid); 999#endif 1000 /* length of rtnetlink header + attributes */ 1001 rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *) rtnh; 1002 } endfor_nexthops(fi); 1003 1004 nla_nest_end(skb, mp); 1005 } 1006#endif 1007 return nlmsg_end(skb, nlh); 1008 1009nla_put_failure: 1010 return nlmsg_cancel(skb, nlh); 1011} 1012 1013/* 1014 Update FIB if: 1015 - local address disappeared -> we must delete all the entries 1016 referring to it. 1017 - device went down -> we must shutdown all nexthops going via it. 1018 */ 1019 1020int fib_sync_down(__be32 local, struct net_device *dev, int force) 1021{ 1022 int ret = 0; 1023 int scope = RT_SCOPE_NOWHERE; 1024 1025 if (force) 1026 scope = -1; 1027 1028 if (local && fib_info_laddrhash) { 1029 unsigned int hash = fib_laddr_hashfn(local); 1030 struct hlist_head *head = &fib_info_laddrhash[hash]; 1031 struct hlist_node *node; 1032 struct fib_info *fi; 1033 1034 hlist_for_each_entry(fi, node, head, fib_lhash) { 1035 if (fi->fib_prefsrc == local) { 1036 fi->fib_flags |= RTNH_F_DEAD; 1037 ret++; 1038 } 1039 } 1040 } 1041 1042 if (dev) { 1043 struct fib_info *prev_fi = NULL; 1044 unsigned int hash = fib_devindex_hashfn(dev->ifindex); 1045 struct hlist_head *head = &fib_info_devhash[hash]; 1046 struct hlist_node *node; 1047 struct fib_nh *nh; 1048 1049 hlist_for_each_entry(nh, node, head, nh_hash) { 1050 struct fib_info *fi = nh->nh_parent; 1051 int dead; 1052 1053 BUG_ON(!fi->fib_nhs); 1054 if (nh->nh_dev != dev || fi == prev_fi) 1055 continue; 1056 prev_fi = fi; 1057 dead = 0; 1058 change_nexthops(fi) { 1059 if (nh->nh_flags&RTNH_F_DEAD) 1060 dead++; 1061 else if (nh->nh_dev == dev && 1062 nh->nh_scope != scope) { 1063 nh->nh_flags |= RTNH_F_DEAD; 1064#ifdef CONFIG_IP_ROUTE_MULTIPATH 1065 spin_lock_bh(&fib_multipath_lock); 1066 fi->fib_power -= nh->nh_power; 1067 nh->nh_power = 0; 1068 spin_unlock_bh(&fib_multipath_lock); 1069#endif 1070 dead++; 1071 } 1072#ifdef CONFIG_IP_ROUTE_MULTIPATH 1073 if (force > 1 && nh->nh_dev == dev) { 1074 dead = fi->fib_nhs; 1075 break; 1076 } 1077#endif 1078 } endfor_nexthops(fi) 1079 if (dead == fi->fib_nhs) { 1080 fi->fib_flags |= RTNH_F_DEAD; 1081 ret++; 1082 } 1083 } 1084 } 1085 1086 return ret; 1087} 1088 1089#ifdef CONFIG_IP_ROUTE_MULTIPATH 1090 1091/* 1092 Dead device goes up. We wake up dead nexthops. 1093 It takes sense only on multipath routes. 1094 */ 1095 1096int fib_sync_up(struct net_device *dev) 1097{ 1098 struct fib_info *prev_fi; 1099 unsigned int hash; 1100 struct hlist_head *head; 1101 struct hlist_node *node; 1102 struct fib_nh *nh; 1103 int ret; 1104 1105 if (!(dev->flags&IFF_UP)) 1106 return 0; 1107 1108 prev_fi = NULL; 1109 hash = fib_devindex_hashfn(dev->ifindex); 1110 head = &fib_info_devhash[hash]; 1111 ret = 0; 1112 1113 hlist_for_each_entry(nh, node, head, nh_hash) { 1114 struct fib_info *fi = nh->nh_parent; 1115 int alive; 1116 1117 BUG_ON(!fi->fib_nhs); 1118 if (nh->nh_dev != dev || fi == prev_fi) 1119 continue; 1120 1121 prev_fi = fi; 1122 alive = 0; 1123 change_nexthops(fi) { 1124 if (!(nh->nh_flags&RTNH_F_DEAD)) { 1125 alive++; 1126 continue; 1127 } 1128 if (nh->nh_dev == NULL || !(nh->nh_dev->flags&IFF_UP)) 1129 continue; 1130 if (nh->nh_dev != dev || !__in_dev_get_rtnl(dev)) 1131 continue; 1132 alive++; 1133 spin_lock_bh(&fib_multipath_lock); 1134 nh->nh_power = 0; 1135 nh->nh_flags &= ~RTNH_F_DEAD; 1136 spin_unlock_bh(&fib_multipath_lock); 1137 } endfor_nexthops(fi) 1138 1139 if (alive > 0) { 1140 fi->fib_flags &= ~RTNH_F_DEAD; 1141 ret++; 1142 } 1143 } 1144 1145 return ret; 1146} 1147 1148/* 1149 The algorithm is suboptimal, but it provides really 1150 fair weighted route distribution. 1151 */ 1152 1153void fib_select_multipath(const struct flowi *flp, struct fib_result *res) 1154{ 1155 struct fib_info *fi = res->fi; 1156 int w; 1157 1158 spin_lock_bh(&fib_multipath_lock); 1159 if (fi->fib_power <= 0) { 1160 int power = 0; 1161 change_nexthops(fi) { 1162 if (!(nh->nh_flags&RTNH_F_DEAD)) { 1163 power += nh->nh_weight; 1164 nh->nh_power = nh->nh_weight; 1165 } 1166 } endfor_nexthops(fi); 1167 fi->fib_power = power; 1168 if (power <= 0) { 1169 spin_unlock_bh(&fib_multipath_lock); 1170 /* Race condition: route has just become dead. */ 1171 res->nh_sel = 0; 1172 return; 1173 } 1174 } 1175 1176 1177 /* w should be random number [0..fi->fib_power-1], 1178 it is pretty bad approximation. 1179 */ 1180 1181 w = jiffies % fi->fib_power; 1182 1183 change_nexthops(fi) { 1184 if (!(nh->nh_flags&RTNH_F_DEAD) && nh->nh_power) { 1185 if ((w -= nh->nh_power) <= 0) { 1186 nh->nh_power--; 1187 fi->fib_power--; 1188 res->nh_sel = nhsel; 1189 spin_unlock_bh(&fib_multipath_lock); 1190 return; 1191 } 1192 } 1193 } endfor_nexthops(fi); 1194 1195 /* Race condition: route has just become dead. */ 1196 res->nh_sel = 0; 1197 spin_unlock_bh(&fib_multipath_lock); 1198} 1199#endif