jcs's openbsd hax
openbsd
at jcs 1001 lines 21 kB view raw
1/* $OpenBSD: rtable.c,v 1.95 2025/07/16 13:48:38 jsg Exp $ */ 2 3/* 4 * Copyright (c) 2014-2016 Martin Pieuchot 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19#ifndef _KERNEL 20#include "kern_compat.h" 21#else 22#include <sys/param.h> 23#include <sys/systm.h> 24#include <sys/socket.h> 25#include <sys/malloc.h> 26#include <sys/queue.h> 27#include <sys/domain.h> 28#include <sys/srp.h> 29#include <sys/smr.h> 30#endif 31 32#include <net/rtable.h> 33#include <net/route.h> 34#include <net/art.h> 35 36/* 37 * Structures used by rtable_get() to retrieve the corresponding 38 * routing table for a given pair of ``af'' and ``rtableid''. 39 * 40 * Note that once allocated routing table heads are never freed. 41 * This way we do not need to reference count them. 42 * 43 * afmap rtmap/dommp 44 * ----------- --------- ----- 45 * | 0 |--------> | 0 | 0 | ... | 0 | Array mapping rtableid (=index) 46 * ----------- --------- ----- to rdomain/loopback (=value). 47 * | AF_INET |. 48 * ----------- `. .---------. .---------. 49 * ... `----> | rtable0 | ... | rtableN | Array of pointers for 50 * ----------- '---------' '---------' IPv4 routing tables 51 * | AF_MPLS | indexed by ``rtableid''. 52 * ----------- 53 */ 54struct srp *afmap; 55uint8_t af2idx[AF_MAX+1]; /* To only allocate supported AF */ 56uint8_t af2idx_max; 57 58/* Array of routing table pointers. */ 59struct rtmap { 60 unsigned int limit; 61 void **tbl; 62}; 63 64/* 65 * Array of rtableid -> rdomain mapping. 66 * 67 * Only used for the first index as described above. 68 */ 69struct dommp { 70 unsigned int limit; 71 /* 72 * Array to get the routing domain and loopback interface related to 73 * a routing table. Format: 74 * 75 * 8 unused bits | 16 bits for loopback index | 8 bits for rdomain 76 */ 77 unsigned int *value; 78}; 79 80unsigned int rtmap_limit = 0; 81 82void rtmap_init(void); 83void rtmap_grow(unsigned int, sa_family_t); 84void rtmap_dtor(void *, void *); 85 86struct srp_gc rtmap_gc = SRP_GC_INITIALIZER(rtmap_dtor, NULL); 87 88void rtable_init_backend(void); 89struct rtable *rtable_alloc(unsigned int, unsigned int, unsigned int); 90struct rtable *rtable_get(unsigned int, sa_family_t); 91 92void 93rtmap_init(void) 94{ 95 const struct domain *dp; 96 int i; 97 98 /* Start with a single table for every domain that requires it. */ 99 for (i = 0; (dp = domains[i]) != NULL; i++) { 100 if (dp->dom_rtoffset == 0) 101 continue; 102 103 rtmap_grow(1, dp->dom_family); 104 } 105 106 /* Initialize the rtableid->rdomain mapping table. */ 107 rtmap_grow(1, 0); 108 109 rtmap_limit = 1; 110} 111 112/* 113 * Grow the size of the array of routing table for AF ``af'' to ``nlimit''. 114 */ 115void 116rtmap_grow(unsigned int nlimit, sa_family_t af) 117{ 118 struct rtmap *map, *nmap; 119 int i; 120 121 KERNEL_ASSERT_LOCKED(); 122 123 KASSERT(nlimit > rtmap_limit); 124 125 nmap = malloc(sizeof(*nmap), M_RTABLE, M_WAITOK); 126 nmap->limit = nlimit; 127 nmap->tbl = mallocarray(nlimit, sizeof(*nmap[0].tbl), M_RTABLE, 128 M_WAITOK|M_ZERO); 129 130 map = srp_get_locked(&afmap[af2idx[af]]); 131 if (map != NULL) { 132 KASSERT(map->limit == rtmap_limit); 133 134 for (i = 0; i < map->limit; i++) 135 nmap->tbl[i] = map->tbl[i]; 136 } 137 138 srp_update_locked(&rtmap_gc, &afmap[af2idx[af]], nmap); 139} 140 141void 142rtmap_dtor(void *null, void *xmap) 143{ 144 struct rtmap *map = xmap; 145 146 /* 147 * doesn't need to be serialized since this is the last reference 148 * to this map. there's nothing to race against. 149 */ 150 free(map->tbl, M_RTABLE, map->limit * sizeof(*map[0].tbl)); 151 free(map, M_RTABLE, sizeof(*map)); 152} 153 154void 155rtable_init(void) 156{ 157 const struct domain *dp; 158 int i; 159 160 KASSERT(sizeof(struct rtmap) == sizeof(struct dommp)); 161 162 /* We use index 0 for the rtable/rdomain map. */ 163 af2idx_max = 1; 164 memset(af2idx, 0, sizeof(af2idx)); 165 166 /* 167 * Compute the maximum supported key length in case the routing 168 * table backend needs it. 169 */ 170 for (i = 0; (dp = domains[i]) != NULL; i++) { 171 if (dp->dom_rtoffset == 0) 172 continue; 173 174 af2idx[dp->dom_family] = af2idx_max++; 175 } 176 rtable_init_backend(); 177 178 /* 179 * Allocate AF-to-id table now that we now how many AFs this 180 * kernel supports. 181 */ 182 afmap = mallocarray(af2idx_max + 1, sizeof(*afmap), M_RTABLE, 183 M_WAITOK|M_ZERO); 184 185 rtmap_init(); 186 187 if (rtable_add(0) != 0) 188 panic("unable to create default routing table"); 189 190 rt_timer_init(); 191} 192 193int 194rtable_add(unsigned int id) 195{ 196 const struct domain *dp; 197 struct rtable *tbl; 198 struct rtmap *map; 199 struct dommp *dmm; 200 sa_family_t af; 201 unsigned int off, alen; 202 int i, error = 0; 203 204 if (id > RT_TABLEID_MAX) 205 return (EINVAL); 206 207 KERNEL_LOCK(); 208 209 if (rtable_exists(id)) 210 goto out; 211 212 for (i = 0; (dp = domains[i]) != NULL; i++) { 213 if (dp->dom_rtoffset == 0) 214 continue; 215 216 af = dp->dom_family; 217 off = dp->dom_rtoffset; 218 alen = dp->dom_maxplen; 219 220 if (id >= rtmap_limit) 221 rtmap_grow(id + 1, af); 222 223 tbl = rtable_alloc(id, alen, off); 224 if (tbl == NULL) { 225 error = ENOMEM; 226 goto out; 227 } 228 229 map = srp_get_locked(&afmap[af2idx[af]]); 230 map->tbl[id] = tbl; 231 } 232 233 /* Reflect possible growth. */ 234 if (id >= rtmap_limit) { 235 rtmap_grow(id + 1, 0); 236 rtmap_limit = id + 1; 237 } 238 239 /* Use main rtable/rdomain by default. */ 240 dmm = srp_get_locked(&afmap[0]); 241 dmm->value[id] = 0; 242out: 243 KERNEL_UNLOCK(); 244 245 return (error); 246} 247 248struct rtable * 249rtable_get(unsigned int rtableid, sa_family_t af) 250{ 251 struct rtmap *map; 252 struct rtable *tbl = NULL; 253 struct srp_ref sr; 254 255 if (af >= nitems(af2idx) || af2idx[af] == 0) 256 return (NULL); 257 258 map = srp_enter(&sr, &afmap[af2idx[af]]); 259 if (rtableid < map->limit) 260 tbl = map->tbl[rtableid]; 261 srp_leave(&sr); 262 263 return (tbl); 264} 265 266int 267rtable_exists(unsigned int rtableid) 268{ 269 const struct domain *dp; 270 void *tbl; 271 int i; 272 273 for (i = 0; (dp = domains[i]) != NULL; i++) { 274 if (dp->dom_rtoffset == 0) 275 continue; 276 277 tbl = rtable_get(rtableid, dp->dom_family); 278 if (tbl != NULL) 279 return (1); 280 } 281 282 return (0); 283} 284 285int 286rtable_empty(unsigned int rtableid) 287{ 288 const struct domain *dp; 289 int i; 290 struct rtable *tbl; 291 292 for (i = 0; (dp = domains[i]) != NULL; i++) { 293 if (dp->dom_rtoffset == 0) 294 continue; 295 296 tbl = rtable_get(rtableid, dp->dom_family); 297 if (tbl == NULL) 298 continue; 299 if (!art_is_empty(tbl->r_art)) 300 return (0); 301 } 302 303 return (1); 304} 305 306unsigned int 307rtable_l2(unsigned int rtableid) 308{ 309 struct dommp *dmm; 310 unsigned int rdomain = 0; 311 struct srp_ref sr; 312 313 dmm = srp_enter(&sr, &afmap[0]); 314 if (rtableid < dmm->limit) 315 rdomain = (dmm->value[rtableid] & RT_TABLEID_MASK); 316 srp_leave(&sr); 317 318 return (rdomain); 319} 320 321unsigned int 322rtable_loindex(unsigned int rtableid) 323{ 324 struct dommp *dmm; 325 unsigned int loifidx = 0; 326 struct srp_ref sr; 327 328 dmm = srp_enter(&sr, &afmap[0]); 329 if (rtableid < dmm->limit) 330 loifidx = (dmm->value[rtableid] >> RT_TABLEID_BITS); 331 srp_leave(&sr); 332 333 return (loifidx); 334} 335 336void 337rtable_l2set(unsigned int rtableid, unsigned int rdomain, unsigned int loifidx) 338{ 339 struct dommp *dmm; 340 unsigned int value; 341 342 KERNEL_ASSERT_LOCKED(); 343 344 if (!rtable_exists(rtableid) || !rtable_exists(rdomain)) 345 return; 346 347 value = (rdomain & RT_TABLEID_MASK) | (loifidx << RT_TABLEID_BITS); 348 349 dmm = srp_get_locked(&afmap[0]); 350 dmm->value[rtableid] = value; 351} 352 353 354static inline const uint8_t *satoaddr(struct rtable *, 355 const struct sockaddr *); 356 357void rtable_mpath_insert(struct art_node *, struct rtentry *); 358 359void 360rtable_init_backend(void) 361{ 362 art_boot(); 363} 364 365struct rtable * 366rtable_alloc(unsigned int rtableid, unsigned int alen, unsigned int off) 367{ 368 struct rtable *tbl; 369 370 tbl = malloc(sizeof(*tbl), M_RTABLE, M_NOWAIT|M_ZERO); 371 if (tbl == NULL) 372 return (NULL); 373 374 tbl->r_art = art_alloc(alen); 375 if (tbl->r_art == NULL) { 376 free(tbl, M_RTABLE, sizeof(*tbl)); 377 return (NULL); 378 } 379 380 rw_init(&tbl->r_lock, "rtable"); 381 tbl->r_off = off; 382 tbl->r_source = NULL; 383 384 return (tbl); 385} 386 387int 388rtable_setsource(unsigned int rtableid, int af, struct sockaddr *src) 389{ 390 struct rtable *tbl; 391 392 NET_ASSERT_LOCKED_EXCLUSIVE(); 393 394 tbl = rtable_get(rtableid, af); 395 if (tbl == NULL) 396 return (EAFNOSUPPORT); 397 398 tbl->r_source = src; 399 400 return (0); 401} 402 403struct sockaddr * 404rtable_getsource(unsigned int rtableid, int af) 405{ 406 struct rtable *tbl; 407 408 NET_ASSERT_LOCKED(); 409 410 tbl = rtable_get(rtableid, af); 411 if (tbl == NULL) 412 return (NULL); 413 414 return (tbl->r_source); 415} 416 417void 418rtable_clearsource(unsigned int rtableid, struct sockaddr *src) 419{ 420 struct sockaddr *addr; 421 422 addr = rtable_getsource(rtableid, src->sa_family); 423 if (addr && (addr->sa_len == src->sa_len)) { 424 if (memcmp(src, addr, addr->sa_len) == 0) { 425 rtable_setsource(rtableid, src->sa_family, NULL); 426 } 427 } 428} 429 430struct rtentry * 431rtable_lookup(unsigned int rtableid, const struct sockaddr *dst, 432 const struct sockaddr *mask, const struct sockaddr *gateway, uint8_t prio) 433{ 434 struct rtable *tbl; 435 struct art_node *an; 436 struct rtentry *rt = NULL; 437 const uint8_t *addr; 438 int plen; 439 440 tbl = rtable_get(rtableid, dst->sa_family); 441 if (tbl == NULL) 442 return (NULL); 443 444 addr = satoaddr(tbl, dst); 445 446 smr_read_enter(); 447 if (mask == NULL) { 448 /* No need for a perfect match. */ 449 an = art_match(tbl->r_art, addr); 450 } else { 451 plen = rtable_satoplen(dst->sa_family, mask); 452 if (plen == -1) 453 goto out; 454 455 an = art_lookup(tbl->r_art, addr, plen); 456 } 457 if (an == NULL) 458 goto out; 459 460 for (rt = SMR_PTR_GET(&an->an_value); rt != NULL; 461 rt = SMR_PTR_GET(&rt->rt_next)) { 462 if (prio != RTP_ANY && 463 (rt->rt_priority & RTP_MASK) != (prio & RTP_MASK)) 464 continue; 465 466 if (gateway == NULL) 467 break; 468 469 if (rt->rt_gateway->sa_len == gateway->sa_len && 470 memcmp(rt->rt_gateway, gateway, gateway->sa_len) == 0) 471 break; 472 } 473 if (rt != NULL) 474 rtref(rt); 475 476out: 477 smr_read_leave(); 478 479 return (rt); 480} 481 482struct rtentry * 483rtable_match(unsigned int rtableid, const struct sockaddr *dst, uint32_t *src) 484{ 485 struct rtable *tbl; 486 struct art_node *an; 487 struct rtentry *rt = NULL; 488 const uint8_t *addr; 489 int hash; 490 uint8_t prio; 491 492 tbl = rtable_get(rtableid, dst->sa_family); 493 if (tbl == NULL) 494 return (NULL); 495 496 addr = satoaddr(tbl, dst); 497 498 smr_read_enter(); 499 an = art_match(tbl->r_art, addr); 500 if (an == NULL) 501 goto out; 502 503 rt = SMR_PTR_GET(&an->an_value); 504 KASSERT(rt != NULL); 505 prio = rt->rt_priority; 506 507 /* Gateway selection by Hash-Threshold (RFC 2992) */ 508 if ((hash = rt_hash(rt, dst, src)) != -1) { 509 struct rtentry *mrt; 510 int threshold, npaths = 1; 511 512 KASSERT(hash <= 0xffff); 513 514 /* Only count nexthops with the same priority. */ 515 mrt = rt; 516 while ((mrt = SMR_PTR_GET(&mrt->rt_next)) != NULL) { 517 if (mrt->rt_priority == prio) 518 npaths++; 519 } 520 521 threshold = (0xffff / npaths) + 1; 522 523 /* 524 * we have no protection against concurrent modification of the 525 * route list attached to the node, so we won't necessarily 526 * have the same number of routes. for most modifications, 527 * we'll pick a route that we wouldn't have if we only saw the 528 * list before or after the change. 529 */ 530 mrt = rt; 531 while (hash > threshold) { 532 if (mrt->rt_priority == prio) { 533 rt = mrt; 534 hash -= threshold; 535 } 536 mrt = SMR_PTR_GET(&mrt->rt_next); 537 if (mrt == NULL) 538 break; 539 } 540 } 541 rtref(rt); 542out: 543 smr_read_leave(); 544 return (rt); 545} 546 547int 548rtable_insert(unsigned int rtableid, struct sockaddr *dst, 549 const struct sockaddr *mask, const struct sockaddr *gateway, uint8_t prio, 550 struct rtentry *rt) 551{ 552 struct rtable *tbl; 553 struct art_node *an, *prev; 554 const uint8_t *addr; 555 int plen; 556 unsigned int rt_flags; 557 int error = 0; 558 559 tbl = rtable_get(rtableid, dst->sa_family); 560 if (tbl == NULL) 561 return (EAFNOSUPPORT); 562 563 addr = satoaddr(tbl, dst); 564 plen = rtable_satoplen(dst->sa_family, mask); 565 if (plen == -1) 566 return (EINVAL); 567 568 an = art_get(addr, plen); 569 if (an == NULL) 570 return (ENOMEM); 571 572 /* prepare for immediate operation if insert succeeds */ 573 rt_flags = rt->rt_flags; 574 rt->rt_flags &= ~RTF_MPATH; 575 rt->rt_dest = dst; 576 rt->rt_plen = plen; 577 rt->rt_next = NULL; 578 579 rtref(rt); /* take a ref for the table */ 580 an->an_value = rt; 581 582 rw_enter_write(&tbl->r_lock); 583 prev = art_insert(tbl->r_art, an); 584 if (prev == NULL) { 585 error = ENOMEM; 586 goto put; 587 } 588 589 if (prev != an) { 590 struct rtentry *mrt; 591 int mpathok = ISSET(rt_flags, RTF_MPATH); 592 int mpath = 0; 593 594 /* 595 * An ART node with the same destination/netmask already 596 * exists. 597 */ 598 art_put(an); 599 an = prev; 600 601 /* Do not permit exactly the same dst/mask/gw pair. */ 602 for (mrt = SMR_PTR_GET_LOCKED(&an->an_value); 603 mrt != NULL; 604 mrt = SMR_PTR_GET_LOCKED(&mrt->rt_next)) { 605 if (prio != RTP_ANY && 606 (mrt->rt_priority & RTP_MASK) != (prio & RTP_MASK)) 607 continue; 608 609 if (!mpathok || 610 (mrt->rt_gateway->sa_len == gateway->sa_len && 611 memcmp(mrt->rt_gateway, gateway, 612 gateway->sa_len) == 0)) { 613 error = EEXIST; 614 goto leave; 615 } 616 mpath = RTF_MPATH; 617 } 618 619 /* The new route can be added to the list. */ 620 if (mpath) { 621 SET(rt->rt_flags, RTF_MPATH); 622 623 for (mrt = SMR_PTR_GET_LOCKED(&an->an_value); 624 mrt != NULL; 625 mrt = SMR_PTR_GET_LOCKED(&mrt->rt_next)) { 626 if ((mrt->rt_priority & RTP_MASK) != 627 (prio & RTP_MASK)) 628 continue; 629 630 SET(mrt->rt_flags, RTF_MPATH); 631 } 632 } 633 634 /* Put newly inserted entry at the right place. */ 635 rtable_mpath_insert(an, rt); 636 } 637 rw_exit_write(&tbl->r_lock); 638 return (error); 639 640put: 641 art_put(an); 642leave: 643 rw_exit_write(&tbl->r_lock); 644 rtfree(rt); 645 return (error); 646} 647 648int 649rtable_delete(unsigned int rtableid, const struct sockaddr *dst, 650 const struct sockaddr *mask, struct rtentry *rt) 651{ 652 struct rtable *tbl; 653 struct art_node *an; 654 const uint8_t *addr; 655 int plen; 656 struct rtentry *mrt; 657 658 tbl = rtable_get(rtableid, dst->sa_family); 659 if (tbl == NULL) 660 return (EAFNOSUPPORT); 661 662 addr = satoaddr(tbl, dst); 663 plen = rtable_satoplen(dst->sa_family, mask); 664 if (plen == -1) 665 return (EINVAL); 666 667 rw_enter_write(&tbl->r_lock); 668 smr_read_enter(); 669 an = art_lookup(tbl->r_art, addr, plen); 670 smr_read_leave(); 671 if (an == NULL) { 672 rw_exit_write(&tbl->r_lock); 673 return (ESRCH); 674 } 675 676 /* If this is the only route in the list then we can delete the node */ 677 if (SMR_PTR_GET_LOCKED(&an->an_value) == rt && 678 SMR_PTR_GET_LOCKED(&rt->rt_next) == NULL) { 679 struct art_node *oan; 680 oan = art_delete(tbl->r_art, addr, plen); 681 if (oan != an) 682 panic("art %p changed shape during delete", tbl->r_art); 683 art_put(an); 684 /* 685 * XXX an and the rt ref could still be alive on other cpus. 686 * this currently works because of the NET_LOCK/KERNEL_LOCK 687 * but should be fixed if we want to do route lookups outside 688 * these locks. - dlg@ 689 */ 690 } else { 691 struct rtentry **prt; 692 struct rtentry *nrt; 693 unsigned int found = 0; 694 unsigned int npaths = 0; 695 696 /* 697 * If other multipath route entries are still attached to 698 * this ART node we only have to unlink it. 699 */ 700 prt = (struct rtentry **)&an->an_value; 701 while ((mrt = SMR_PTR_GET_LOCKED(prt)) != NULL) { 702 if (mrt == rt) { 703 found = 1; 704 SMR_PTR_SET_LOCKED(prt, 705 SMR_PTR_GET_LOCKED(&mrt->rt_next)); 706 } else if ((mrt->rt_priority & RTP_MASK) == 707 (rt->rt_priority & RTP_MASK)) { 708 npaths++; 709 nrt = mrt; 710 } 711 prt = &mrt->rt_next; 712 } 713 if (!found) 714 panic("removing non-existent route"); 715 if (npaths == 1) 716 CLR(nrt->rt_flags, RTF_MPATH); 717 } 718 KASSERT(refcnt_read(&rt->rt_refcnt) >= 1); 719 rw_exit_write(&tbl->r_lock); 720 rtfree(rt); 721 722 return (0); 723} 724 725int 726rtable_walk(unsigned int rtableid, sa_family_t af, struct rtentry **prt, 727 int (*func)(struct rtentry *, void *, unsigned int), void *arg) 728{ 729 struct rtable *tbl; 730 struct art_iter ai; 731 struct art_node *an; 732 int error = 0; 733 734 tbl = rtable_get(rtableid, af); 735 if (tbl == NULL) 736 return (EAFNOSUPPORT); 737 738 rw_enter_write(&tbl->r_lock); 739 ART_FOREACH(an, tbl->r_art, &ai) { 740 /* 741 * ART nodes have a list of rtentries. 742 * 743 * art_iter holds references to the topology 744 * so it won't change, but not the an_node or rtentries. 745 */ 746 struct rtentry *rt = SMR_PTR_GET_LOCKED(&an->an_value); 747 rtref(rt); 748 749 rw_exit_write(&tbl->r_lock); 750 do { 751 struct rtentry *nrt; 752 753 smr_read_enter(); 754 /* Get ready for the next entry. */ 755 nrt = SMR_PTR_GET(&rt->rt_next); 756 if (nrt != NULL) 757 rtref(nrt); 758 smr_read_leave(); 759 760 error = func(rt, arg, rtableid); 761 if (error != 0) { 762 if (prt != NULL) 763 *prt = rt; 764 else 765 rtfree(rt); 766 767 if (nrt != NULL) 768 rtfree(nrt); 769 770 rw_enter_write(&tbl->r_lock); 771 art_iter_close(&ai); 772 rw_exit_write(&tbl->r_lock); 773 return (error); 774 } 775 776 rtfree(rt); 777 rt = nrt; 778 } while (rt != NULL); 779 rw_enter_write(&tbl->r_lock); 780 } 781 rw_exit_write(&tbl->r_lock); 782 783 return (error); 784} 785 786int 787rtable_read(unsigned int rtableid, sa_family_t af, 788 int (*func)(const struct rtentry *, void *, unsigned int), void *arg) 789{ 790 struct rtable *tbl; 791 struct art_iter ai; 792 struct art_node *an; 793 int error = 0; 794 795 tbl = rtable_get(rtableid, af); 796 if (tbl == NULL) 797 return (EAFNOSUPPORT); 798 799 rw_enter_write(&tbl->r_lock); 800 ART_FOREACH(an, tbl->r_art, &ai) { 801 struct rtentry *rt; 802 for (rt = SMR_PTR_GET_LOCKED(&an->an_value); rt != NULL; 803 rt = SMR_PTR_GET_LOCKED(&rt->rt_next)) { 804 error = func(rt, arg, rtableid); 805 if (error != 0) { 806 art_iter_close(&ai); 807 goto leave; 808 } 809 } 810 } 811leave: 812 rw_exit_write(&tbl->r_lock); 813 814 return (error); 815} 816 817struct rtentry * 818rtable_iterate(struct rtentry *rt0) 819{ 820 struct rtentry *rt = NULL; 821 822 smr_read_enter(); 823 rt = SMR_PTR_GET(&rt0->rt_next); 824 if (rt != NULL) 825 rtref(rt); 826 smr_read_leave(); 827 rtfree(rt0); 828 return (rt); 829} 830 831int 832rtable_mpath_capable(unsigned int rtableid, sa_family_t af) 833{ 834 return (1); 835} 836 837int 838rtable_mpath_reprio(unsigned int rtableid, struct sockaddr *dst, 839 int plen, uint8_t prio, struct rtentry *rt) 840{ 841 struct rtable *tbl; 842 struct art_node *an; 843 const uint8_t *addr; 844 int error = 0; 845 846 tbl = rtable_get(rtableid, dst->sa_family); 847 if (tbl == NULL) 848 return (EAFNOSUPPORT); 849 850 addr = satoaddr(tbl, dst); 851 852 rw_enter_write(&tbl->r_lock); 853 smr_read_enter(); 854 an = art_lookup(tbl->r_art, addr, plen); 855 smr_read_leave(); 856 if (an == NULL) { 857 error = ESRCH; 858 } else if (SMR_PTR_GET_LOCKED(&an->an_value) == rt && 859 SMR_PTR_GET_LOCKED(&rt->rt_next) == NULL) { 860 /* 861 * If there's only one entry on the list do not go 862 * through an insert/remove cycle. This is done to 863 * guarantee that ``an->an_rtlist'' is never empty 864 * when a node is in the tree. 865 */ 866 rt->rt_priority = prio; 867 } else { 868 struct rtentry **prt; 869 struct rtentry *mrt; 870 871 prt = (struct rtentry **)&an->an_value; 872 while ((mrt = SMR_PTR_GET_LOCKED(prt)) != NULL) { 873 if (mrt == rt) 874 break; 875 prt = &mrt->rt_next; 876 } 877 KASSERT(mrt != NULL); 878 879 SMR_PTR_SET_LOCKED(prt, SMR_PTR_GET_LOCKED(&rt->rt_next)); 880 rt->rt_priority = prio; 881 rtable_mpath_insert(an, rt); 882 error = EAGAIN; 883 } 884 rw_exit_write(&tbl->r_lock); 885 886 return (error); 887} 888 889void 890rtable_mpath_insert(struct art_node *an, struct rtentry *rt) 891{ 892 struct rtentry *mrt, **prt; 893 uint8_t prio = rt->rt_priority; 894 895 /* Iterate until we find the route to be placed after ``rt''. */ 896 897 prt = (struct rtentry **)&an->an_value; 898 while ((mrt = SMR_PTR_GET_LOCKED(prt)) != NULL) { 899 if (mrt->rt_priority > prio) 900 break; 901 902 prt = &mrt->rt_next; 903 } 904 905 SMR_PTR_SET_LOCKED(&rt->rt_next, mrt); 906 SMR_PTR_SET_LOCKED(prt, rt); 907} 908 909/* 910 * Return a pointer to the address (key). This is an heritage from the 911 * BSD radix tree needed to skip the non-address fields from the flavor 912 * of "struct sockaddr" used by this routing table. 913 */ 914static inline const uint8_t * 915satoaddr(struct rtable *tbl, const struct sockaddr *sa) 916{ 917 return (((const uint8_t *)sa) + tbl->r_off); 918} 919 920/* 921 * Return the prefix length of a mask. 922 */ 923int 924rtable_satoplen(sa_family_t af, const struct sockaddr *mask) 925{ 926 const struct domain *dp; 927 uint8_t *ap, *ep; 928 int mlen, plen = 0; 929 int i; 930 931 for (i = 0; (dp = domains[i]) != NULL; i++) { 932 if (dp->dom_rtoffset == 0) 933 continue; 934 935 if (af == dp->dom_family) 936 break; 937 } 938 if (dp == NULL) 939 return (-1); 940 941 /* Host route */ 942 if (mask == NULL) 943 return (dp->dom_maxplen); 944 945 mlen = mask->sa_len; 946 947 /* Default route */ 948 if (mlen == 0) 949 return (0); 950 951 ap = (uint8_t *)((uint8_t *)mask) + dp->dom_rtoffset; 952 ep = (uint8_t *)((uint8_t *)mask) + mlen; 953 if (ap > ep) 954 return (-1); 955 956 /* Trim trailing zeroes. */ 957 while (ap < ep && ep[-1] == 0) 958 ep--; 959 960 if (ap == ep) 961 return (0); 962 963 /* "Beauty" adapted from sbin/route/show.c ... */ 964 while (ap < ep) { 965 switch (*ap++) { 966 case 0xff: 967 plen += 8; 968 break; 969 case 0xfe: 970 plen += 7; 971 goto out; 972 case 0xfc: 973 plen += 6; 974 goto out; 975 case 0xf8: 976 plen += 5; 977 goto out; 978 case 0xf0: 979 plen += 4; 980 goto out; 981 case 0xe0: 982 plen += 3; 983 goto out; 984 case 0xc0: 985 plen += 2; 986 goto out; 987 case 0x80: 988 plen += 1; 989 goto out; 990 default: 991 /* Non contiguous mask. */ 992 return (-1); 993 } 994 } 995 996out: 997 if (plen > dp->dom_maxplen || ap != ep) 998 return -1; 999 1000 return (plen); 1001}