jcs's openbsd hax
openbsd
1/* $OpenBSD: rtable.c,v 1.95 2025/07/16 13:48:38 jsg Exp $ */
2
3/*
4 * Copyright (c) 2014-2016 Martin Pieuchot
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18
19#ifndef _KERNEL
20#include "kern_compat.h"
21#else
22#include <sys/param.h>
23#include <sys/systm.h>
24#include <sys/socket.h>
25#include <sys/malloc.h>
26#include <sys/queue.h>
27#include <sys/domain.h>
28#include <sys/srp.h>
29#include <sys/smr.h>
30#endif
31
32#include <net/rtable.h>
33#include <net/route.h>
34#include <net/art.h>
35
36/*
37 * Structures used by rtable_get() to retrieve the corresponding
38 * routing table for a given pair of ``af'' and ``rtableid''.
39 *
40 * Note that once allocated routing table heads are never freed.
41 * This way we do not need to reference count them.
42 *
43 * afmap rtmap/dommp
44 * ----------- --------- -----
45 * | 0 |--------> | 0 | 0 | ... | 0 | Array mapping rtableid (=index)
46 * ----------- --------- ----- to rdomain/loopback (=value).
47 * | AF_INET |.
48 * ----------- `. .---------. .---------.
49 * ... `----> | rtable0 | ... | rtableN | Array of pointers for
50 * ----------- '---------' '---------' IPv4 routing tables
51 * | AF_MPLS | indexed by ``rtableid''.
52 * -----------
53 */
54struct srp *afmap;
55uint8_t af2idx[AF_MAX+1]; /* To only allocate supported AF */
56uint8_t af2idx_max;
57
58/* Array of routing table pointers. */
59struct rtmap {
60 unsigned int limit;
61 void **tbl;
62};
63
64/*
65 * Array of rtableid -> rdomain mapping.
66 *
67 * Only used for the first index as described above.
68 */
69struct dommp {
70 unsigned int limit;
71 /*
72 * Array to get the routing domain and loopback interface related to
73 * a routing table. Format:
74 *
75 * 8 unused bits | 16 bits for loopback index | 8 bits for rdomain
76 */
77 unsigned int *value;
78};
79
80unsigned int rtmap_limit = 0;
81
82void rtmap_init(void);
83void rtmap_grow(unsigned int, sa_family_t);
84void rtmap_dtor(void *, void *);
85
86struct srp_gc rtmap_gc = SRP_GC_INITIALIZER(rtmap_dtor, NULL);
87
88void rtable_init_backend(void);
89struct rtable *rtable_alloc(unsigned int, unsigned int, unsigned int);
90struct rtable *rtable_get(unsigned int, sa_family_t);
91
92void
93rtmap_init(void)
94{
95 const struct domain *dp;
96 int i;
97
98 /* Start with a single table for every domain that requires it. */
99 for (i = 0; (dp = domains[i]) != NULL; i++) {
100 if (dp->dom_rtoffset == 0)
101 continue;
102
103 rtmap_grow(1, dp->dom_family);
104 }
105
106 /* Initialize the rtableid->rdomain mapping table. */
107 rtmap_grow(1, 0);
108
109 rtmap_limit = 1;
110}
111
112/*
113 * Grow the size of the array of routing table for AF ``af'' to ``nlimit''.
114 */
115void
116rtmap_grow(unsigned int nlimit, sa_family_t af)
117{
118 struct rtmap *map, *nmap;
119 int i;
120
121 KERNEL_ASSERT_LOCKED();
122
123 KASSERT(nlimit > rtmap_limit);
124
125 nmap = malloc(sizeof(*nmap), M_RTABLE, M_WAITOK);
126 nmap->limit = nlimit;
127 nmap->tbl = mallocarray(nlimit, sizeof(*nmap[0].tbl), M_RTABLE,
128 M_WAITOK|M_ZERO);
129
130 map = srp_get_locked(&afmap[af2idx[af]]);
131 if (map != NULL) {
132 KASSERT(map->limit == rtmap_limit);
133
134 for (i = 0; i < map->limit; i++)
135 nmap->tbl[i] = map->tbl[i];
136 }
137
138 srp_update_locked(&rtmap_gc, &afmap[af2idx[af]], nmap);
139}
140
141void
142rtmap_dtor(void *null, void *xmap)
143{
144 struct rtmap *map = xmap;
145
146 /*
147 * doesn't need to be serialized since this is the last reference
148 * to this map. there's nothing to race against.
149 */
150 free(map->tbl, M_RTABLE, map->limit * sizeof(*map[0].tbl));
151 free(map, M_RTABLE, sizeof(*map));
152}
153
154void
155rtable_init(void)
156{
157 const struct domain *dp;
158 int i;
159
160 KASSERT(sizeof(struct rtmap) == sizeof(struct dommp));
161
162 /* We use index 0 for the rtable/rdomain map. */
163 af2idx_max = 1;
164 memset(af2idx, 0, sizeof(af2idx));
165
166 /*
167 * Compute the maximum supported key length in case the routing
168 * table backend needs it.
169 */
170 for (i = 0; (dp = domains[i]) != NULL; i++) {
171 if (dp->dom_rtoffset == 0)
172 continue;
173
174 af2idx[dp->dom_family] = af2idx_max++;
175 }
176 rtable_init_backend();
177
178 /*
179 * Allocate AF-to-id table now that we now how many AFs this
180 * kernel supports.
181 */
182 afmap = mallocarray(af2idx_max + 1, sizeof(*afmap), M_RTABLE,
183 M_WAITOK|M_ZERO);
184
185 rtmap_init();
186
187 if (rtable_add(0) != 0)
188 panic("unable to create default routing table");
189
190 rt_timer_init();
191}
192
193int
194rtable_add(unsigned int id)
195{
196 const struct domain *dp;
197 struct rtable *tbl;
198 struct rtmap *map;
199 struct dommp *dmm;
200 sa_family_t af;
201 unsigned int off, alen;
202 int i, error = 0;
203
204 if (id > RT_TABLEID_MAX)
205 return (EINVAL);
206
207 KERNEL_LOCK();
208
209 if (rtable_exists(id))
210 goto out;
211
212 for (i = 0; (dp = domains[i]) != NULL; i++) {
213 if (dp->dom_rtoffset == 0)
214 continue;
215
216 af = dp->dom_family;
217 off = dp->dom_rtoffset;
218 alen = dp->dom_maxplen;
219
220 if (id >= rtmap_limit)
221 rtmap_grow(id + 1, af);
222
223 tbl = rtable_alloc(id, alen, off);
224 if (tbl == NULL) {
225 error = ENOMEM;
226 goto out;
227 }
228
229 map = srp_get_locked(&afmap[af2idx[af]]);
230 map->tbl[id] = tbl;
231 }
232
233 /* Reflect possible growth. */
234 if (id >= rtmap_limit) {
235 rtmap_grow(id + 1, 0);
236 rtmap_limit = id + 1;
237 }
238
239 /* Use main rtable/rdomain by default. */
240 dmm = srp_get_locked(&afmap[0]);
241 dmm->value[id] = 0;
242out:
243 KERNEL_UNLOCK();
244
245 return (error);
246}
247
248struct rtable *
249rtable_get(unsigned int rtableid, sa_family_t af)
250{
251 struct rtmap *map;
252 struct rtable *tbl = NULL;
253 struct srp_ref sr;
254
255 if (af >= nitems(af2idx) || af2idx[af] == 0)
256 return (NULL);
257
258 map = srp_enter(&sr, &afmap[af2idx[af]]);
259 if (rtableid < map->limit)
260 tbl = map->tbl[rtableid];
261 srp_leave(&sr);
262
263 return (tbl);
264}
265
266int
267rtable_exists(unsigned int rtableid)
268{
269 const struct domain *dp;
270 void *tbl;
271 int i;
272
273 for (i = 0; (dp = domains[i]) != NULL; i++) {
274 if (dp->dom_rtoffset == 0)
275 continue;
276
277 tbl = rtable_get(rtableid, dp->dom_family);
278 if (tbl != NULL)
279 return (1);
280 }
281
282 return (0);
283}
284
285int
286rtable_empty(unsigned int rtableid)
287{
288 const struct domain *dp;
289 int i;
290 struct rtable *tbl;
291
292 for (i = 0; (dp = domains[i]) != NULL; i++) {
293 if (dp->dom_rtoffset == 0)
294 continue;
295
296 tbl = rtable_get(rtableid, dp->dom_family);
297 if (tbl == NULL)
298 continue;
299 if (!art_is_empty(tbl->r_art))
300 return (0);
301 }
302
303 return (1);
304}
305
306unsigned int
307rtable_l2(unsigned int rtableid)
308{
309 struct dommp *dmm;
310 unsigned int rdomain = 0;
311 struct srp_ref sr;
312
313 dmm = srp_enter(&sr, &afmap[0]);
314 if (rtableid < dmm->limit)
315 rdomain = (dmm->value[rtableid] & RT_TABLEID_MASK);
316 srp_leave(&sr);
317
318 return (rdomain);
319}
320
321unsigned int
322rtable_loindex(unsigned int rtableid)
323{
324 struct dommp *dmm;
325 unsigned int loifidx = 0;
326 struct srp_ref sr;
327
328 dmm = srp_enter(&sr, &afmap[0]);
329 if (rtableid < dmm->limit)
330 loifidx = (dmm->value[rtableid] >> RT_TABLEID_BITS);
331 srp_leave(&sr);
332
333 return (loifidx);
334}
335
336void
337rtable_l2set(unsigned int rtableid, unsigned int rdomain, unsigned int loifidx)
338{
339 struct dommp *dmm;
340 unsigned int value;
341
342 KERNEL_ASSERT_LOCKED();
343
344 if (!rtable_exists(rtableid) || !rtable_exists(rdomain))
345 return;
346
347 value = (rdomain & RT_TABLEID_MASK) | (loifidx << RT_TABLEID_BITS);
348
349 dmm = srp_get_locked(&afmap[0]);
350 dmm->value[rtableid] = value;
351}
352
353
354static inline const uint8_t *satoaddr(struct rtable *,
355 const struct sockaddr *);
356
357void rtable_mpath_insert(struct art_node *, struct rtentry *);
358
359void
360rtable_init_backend(void)
361{
362 art_boot();
363}
364
365struct rtable *
366rtable_alloc(unsigned int rtableid, unsigned int alen, unsigned int off)
367{
368 struct rtable *tbl;
369
370 tbl = malloc(sizeof(*tbl), M_RTABLE, M_NOWAIT|M_ZERO);
371 if (tbl == NULL)
372 return (NULL);
373
374 tbl->r_art = art_alloc(alen);
375 if (tbl->r_art == NULL) {
376 free(tbl, M_RTABLE, sizeof(*tbl));
377 return (NULL);
378 }
379
380 rw_init(&tbl->r_lock, "rtable");
381 tbl->r_off = off;
382 tbl->r_source = NULL;
383
384 return (tbl);
385}
386
387int
388rtable_setsource(unsigned int rtableid, int af, struct sockaddr *src)
389{
390 struct rtable *tbl;
391
392 NET_ASSERT_LOCKED_EXCLUSIVE();
393
394 tbl = rtable_get(rtableid, af);
395 if (tbl == NULL)
396 return (EAFNOSUPPORT);
397
398 tbl->r_source = src;
399
400 return (0);
401}
402
403struct sockaddr *
404rtable_getsource(unsigned int rtableid, int af)
405{
406 struct rtable *tbl;
407
408 NET_ASSERT_LOCKED();
409
410 tbl = rtable_get(rtableid, af);
411 if (tbl == NULL)
412 return (NULL);
413
414 return (tbl->r_source);
415}
416
417void
418rtable_clearsource(unsigned int rtableid, struct sockaddr *src)
419{
420 struct sockaddr *addr;
421
422 addr = rtable_getsource(rtableid, src->sa_family);
423 if (addr && (addr->sa_len == src->sa_len)) {
424 if (memcmp(src, addr, addr->sa_len) == 0) {
425 rtable_setsource(rtableid, src->sa_family, NULL);
426 }
427 }
428}
429
430struct rtentry *
431rtable_lookup(unsigned int rtableid, const struct sockaddr *dst,
432 const struct sockaddr *mask, const struct sockaddr *gateway, uint8_t prio)
433{
434 struct rtable *tbl;
435 struct art_node *an;
436 struct rtentry *rt = NULL;
437 const uint8_t *addr;
438 int plen;
439
440 tbl = rtable_get(rtableid, dst->sa_family);
441 if (tbl == NULL)
442 return (NULL);
443
444 addr = satoaddr(tbl, dst);
445
446 smr_read_enter();
447 if (mask == NULL) {
448 /* No need for a perfect match. */
449 an = art_match(tbl->r_art, addr);
450 } else {
451 plen = rtable_satoplen(dst->sa_family, mask);
452 if (plen == -1)
453 goto out;
454
455 an = art_lookup(tbl->r_art, addr, plen);
456 }
457 if (an == NULL)
458 goto out;
459
460 for (rt = SMR_PTR_GET(&an->an_value); rt != NULL;
461 rt = SMR_PTR_GET(&rt->rt_next)) {
462 if (prio != RTP_ANY &&
463 (rt->rt_priority & RTP_MASK) != (prio & RTP_MASK))
464 continue;
465
466 if (gateway == NULL)
467 break;
468
469 if (rt->rt_gateway->sa_len == gateway->sa_len &&
470 memcmp(rt->rt_gateway, gateway, gateway->sa_len) == 0)
471 break;
472 }
473 if (rt != NULL)
474 rtref(rt);
475
476out:
477 smr_read_leave();
478
479 return (rt);
480}
481
482struct rtentry *
483rtable_match(unsigned int rtableid, const struct sockaddr *dst, uint32_t *src)
484{
485 struct rtable *tbl;
486 struct art_node *an;
487 struct rtentry *rt = NULL;
488 const uint8_t *addr;
489 int hash;
490 uint8_t prio;
491
492 tbl = rtable_get(rtableid, dst->sa_family);
493 if (tbl == NULL)
494 return (NULL);
495
496 addr = satoaddr(tbl, dst);
497
498 smr_read_enter();
499 an = art_match(tbl->r_art, addr);
500 if (an == NULL)
501 goto out;
502
503 rt = SMR_PTR_GET(&an->an_value);
504 KASSERT(rt != NULL);
505 prio = rt->rt_priority;
506
507 /* Gateway selection by Hash-Threshold (RFC 2992) */
508 if ((hash = rt_hash(rt, dst, src)) != -1) {
509 struct rtentry *mrt;
510 int threshold, npaths = 1;
511
512 KASSERT(hash <= 0xffff);
513
514 /* Only count nexthops with the same priority. */
515 mrt = rt;
516 while ((mrt = SMR_PTR_GET(&mrt->rt_next)) != NULL) {
517 if (mrt->rt_priority == prio)
518 npaths++;
519 }
520
521 threshold = (0xffff / npaths) + 1;
522
523 /*
524 * we have no protection against concurrent modification of the
525 * route list attached to the node, so we won't necessarily
526 * have the same number of routes. for most modifications,
527 * we'll pick a route that we wouldn't have if we only saw the
528 * list before or after the change.
529 */
530 mrt = rt;
531 while (hash > threshold) {
532 if (mrt->rt_priority == prio) {
533 rt = mrt;
534 hash -= threshold;
535 }
536 mrt = SMR_PTR_GET(&mrt->rt_next);
537 if (mrt == NULL)
538 break;
539 }
540 }
541 rtref(rt);
542out:
543 smr_read_leave();
544 return (rt);
545}
546
547int
548rtable_insert(unsigned int rtableid, struct sockaddr *dst,
549 const struct sockaddr *mask, const struct sockaddr *gateway, uint8_t prio,
550 struct rtentry *rt)
551{
552 struct rtable *tbl;
553 struct art_node *an, *prev;
554 const uint8_t *addr;
555 int plen;
556 unsigned int rt_flags;
557 int error = 0;
558
559 tbl = rtable_get(rtableid, dst->sa_family);
560 if (tbl == NULL)
561 return (EAFNOSUPPORT);
562
563 addr = satoaddr(tbl, dst);
564 plen = rtable_satoplen(dst->sa_family, mask);
565 if (plen == -1)
566 return (EINVAL);
567
568 an = art_get(addr, plen);
569 if (an == NULL)
570 return (ENOMEM);
571
572 /* prepare for immediate operation if insert succeeds */
573 rt_flags = rt->rt_flags;
574 rt->rt_flags &= ~RTF_MPATH;
575 rt->rt_dest = dst;
576 rt->rt_plen = plen;
577 rt->rt_next = NULL;
578
579 rtref(rt); /* take a ref for the table */
580 an->an_value = rt;
581
582 rw_enter_write(&tbl->r_lock);
583 prev = art_insert(tbl->r_art, an);
584 if (prev == NULL) {
585 error = ENOMEM;
586 goto put;
587 }
588
589 if (prev != an) {
590 struct rtentry *mrt;
591 int mpathok = ISSET(rt_flags, RTF_MPATH);
592 int mpath = 0;
593
594 /*
595 * An ART node with the same destination/netmask already
596 * exists.
597 */
598 art_put(an);
599 an = prev;
600
601 /* Do not permit exactly the same dst/mask/gw pair. */
602 for (mrt = SMR_PTR_GET_LOCKED(&an->an_value);
603 mrt != NULL;
604 mrt = SMR_PTR_GET_LOCKED(&mrt->rt_next)) {
605 if (prio != RTP_ANY &&
606 (mrt->rt_priority & RTP_MASK) != (prio & RTP_MASK))
607 continue;
608
609 if (!mpathok ||
610 (mrt->rt_gateway->sa_len == gateway->sa_len &&
611 memcmp(mrt->rt_gateway, gateway,
612 gateway->sa_len) == 0)) {
613 error = EEXIST;
614 goto leave;
615 }
616 mpath = RTF_MPATH;
617 }
618
619 /* The new route can be added to the list. */
620 if (mpath) {
621 SET(rt->rt_flags, RTF_MPATH);
622
623 for (mrt = SMR_PTR_GET_LOCKED(&an->an_value);
624 mrt != NULL;
625 mrt = SMR_PTR_GET_LOCKED(&mrt->rt_next)) {
626 if ((mrt->rt_priority & RTP_MASK) !=
627 (prio & RTP_MASK))
628 continue;
629
630 SET(mrt->rt_flags, RTF_MPATH);
631 }
632 }
633
634 /* Put newly inserted entry at the right place. */
635 rtable_mpath_insert(an, rt);
636 }
637 rw_exit_write(&tbl->r_lock);
638 return (error);
639
640put:
641 art_put(an);
642leave:
643 rw_exit_write(&tbl->r_lock);
644 rtfree(rt);
645 return (error);
646}
647
648int
649rtable_delete(unsigned int rtableid, const struct sockaddr *dst,
650 const struct sockaddr *mask, struct rtentry *rt)
651{
652 struct rtable *tbl;
653 struct art_node *an;
654 const uint8_t *addr;
655 int plen;
656 struct rtentry *mrt;
657
658 tbl = rtable_get(rtableid, dst->sa_family);
659 if (tbl == NULL)
660 return (EAFNOSUPPORT);
661
662 addr = satoaddr(tbl, dst);
663 plen = rtable_satoplen(dst->sa_family, mask);
664 if (plen == -1)
665 return (EINVAL);
666
667 rw_enter_write(&tbl->r_lock);
668 smr_read_enter();
669 an = art_lookup(tbl->r_art, addr, plen);
670 smr_read_leave();
671 if (an == NULL) {
672 rw_exit_write(&tbl->r_lock);
673 return (ESRCH);
674 }
675
676 /* If this is the only route in the list then we can delete the node */
677 if (SMR_PTR_GET_LOCKED(&an->an_value) == rt &&
678 SMR_PTR_GET_LOCKED(&rt->rt_next) == NULL) {
679 struct art_node *oan;
680 oan = art_delete(tbl->r_art, addr, plen);
681 if (oan != an)
682 panic("art %p changed shape during delete", tbl->r_art);
683 art_put(an);
684 /*
685 * XXX an and the rt ref could still be alive on other cpus.
686 * this currently works because of the NET_LOCK/KERNEL_LOCK
687 * but should be fixed if we want to do route lookups outside
688 * these locks. - dlg@
689 */
690 } else {
691 struct rtentry **prt;
692 struct rtentry *nrt;
693 unsigned int found = 0;
694 unsigned int npaths = 0;
695
696 /*
697 * If other multipath route entries are still attached to
698 * this ART node we only have to unlink it.
699 */
700 prt = (struct rtentry **)&an->an_value;
701 while ((mrt = SMR_PTR_GET_LOCKED(prt)) != NULL) {
702 if (mrt == rt) {
703 found = 1;
704 SMR_PTR_SET_LOCKED(prt,
705 SMR_PTR_GET_LOCKED(&mrt->rt_next));
706 } else if ((mrt->rt_priority & RTP_MASK) ==
707 (rt->rt_priority & RTP_MASK)) {
708 npaths++;
709 nrt = mrt;
710 }
711 prt = &mrt->rt_next;
712 }
713 if (!found)
714 panic("removing non-existent route");
715 if (npaths == 1)
716 CLR(nrt->rt_flags, RTF_MPATH);
717 }
718 KASSERT(refcnt_read(&rt->rt_refcnt) >= 1);
719 rw_exit_write(&tbl->r_lock);
720 rtfree(rt);
721
722 return (0);
723}
724
725int
726rtable_walk(unsigned int rtableid, sa_family_t af, struct rtentry **prt,
727 int (*func)(struct rtentry *, void *, unsigned int), void *arg)
728{
729 struct rtable *tbl;
730 struct art_iter ai;
731 struct art_node *an;
732 int error = 0;
733
734 tbl = rtable_get(rtableid, af);
735 if (tbl == NULL)
736 return (EAFNOSUPPORT);
737
738 rw_enter_write(&tbl->r_lock);
739 ART_FOREACH(an, tbl->r_art, &ai) {
740 /*
741 * ART nodes have a list of rtentries.
742 *
743 * art_iter holds references to the topology
744 * so it won't change, but not the an_node or rtentries.
745 */
746 struct rtentry *rt = SMR_PTR_GET_LOCKED(&an->an_value);
747 rtref(rt);
748
749 rw_exit_write(&tbl->r_lock);
750 do {
751 struct rtentry *nrt;
752
753 smr_read_enter();
754 /* Get ready for the next entry. */
755 nrt = SMR_PTR_GET(&rt->rt_next);
756 if (nrt != NULL)
757 rtref(nrt);
758 smr_read_leave();
759
760 error = func(rt, arg, rtableid);
761 if (error != 0) {
762 if (prt != NULL)
763 *prt = rt;
764 else
765 rtfree(rt);
766
767 if (nrt != NULL)
768 rtfree(nrt);
769
770 rw_enter_write(&tbl->r_lock);
771 art_iter_close(&ai);
772 rw_exit_write(&tbl->r_lock);
773 return (error);
774 }
775
776 rtfree(rt);
777 rt = nrt;
778 } while (rt != NULL);
779 rw_enter_write(&tbl->r_lock);
780 }
781 rw_exit_write(&tbl->r_lock);
782
783 return (error);
784}
785
786int
787rtable_read(unsigned int rtableid, sa_family_t af,
788 int (*func)(const struct rtentry *, void *, unsigned int), void *arg)
789{
790 struct rtable *tbl;
791 struct art_iter ai;
792 struct art_node *an;
793 int error = 0;
794
795 tbl = rtable_get(rtableid, af);
796 if (tbl == NULL)
797 return (EAFNOSUPPORT);
798
799 rw_enter_write(&tbl->r_lock);
800 ART_FOREACH(an, tbl->r_art, &ai) {
801 struct rtentry *rt;
802 for (rt = SMR_PTR_GET_LOCKED(&an->an_value); rt != NULL;
803 rt = SMR_PTR_GET_LOCKED(&rt->rt_next)) {
804 error = func(rt, arg, rtableid);
805 if (error != 0) {
806 art_iter_close(&ai);
807 goto leave;
808 }
809 }
810 }
811leave:
812 rw_exit_write(&tbl->r_lock);
813
814 return (error);
815}
816
817struct rtentry *
818rtable_iterate(struct rtentry *rt0)
819{
820 struct rtentry *rt = NULL;
821
822 smr_read_enter();
823 rt = SMR_PTR_GET(&rt0->rt_next);
824 if (rt != NULL)
825 rtref(rt);
826 smr_read_leave();
827 rtfree(rt0);
828 return (rt);
829}
830
831int
832rtable_mpath_capable(unsigned int rtableid, sa_family_t af)
833{
834 return (1);
835}
836
837int
838rtable_mpath_reprio(unsigned int rtableid, struct sockaddr *dst,
839 int plen, uint8_t prio, struct rtentry *rt)
840{
841 struct rtable *tbl;
842 struct art_node *an;
843 const uint8_t *addr;
844 int error = 0;
845
846 tbl = rtable_get(rtableid, dst->sa_family);
847 if (tbl == NULL)
848 return (EAFNOSUPPORT);
849
850 addr = satoaddr(tbl, dst);
851
852 rw_enter_write(&tbl->r_lock);
853 smr_read_enter();
854 an = art_lookup(tbl->r_art, addr, plen);
855 smr_read_leave();
856 if (an == NULL) {
857 error = ESRCH;
858 } else if (SMR_PTR_GET_LOCKED(&an->an_value) == rt &&
859 SMR_PTR_GET_LOCKED(&rt->rt_next) == NULL) {
860 /*
861 * If there's only one entry on the list do not go
862 * through an insert/remove cycle. This is done to
863 * guarantee that ``an->an_rtlist'' is never empty
864 * when a node is in the tree.
865 */
866 rt->rt_priority = prio;
867 } else {
868 struct rtentry **prt;
869 struct rtentry *mrt;
870
871 prt = (struct rtentry **)&an->an_value;
872 while ((mrt = SMR_PTR_GET_LOCKED(prt)) != NULL) {
873 if (mrt == rt)
874 break;
875 prt = &mrt->rt_next;
876 }
877 KASSERT(mrt != NULL);
878
879 SMR_PTR_SET_LOCKED(prt, SMR_PTR_GET_LOCKED(&rt->rt_next));
880 rt->rt_priority = prio;
881 rtable_mpath_insert(an, rt);
882 error = EAGAIN;
883 }
884 rw_exit_write(&tbl->r_lock);
885
886 return (error);
887}
888
889void
890rtable_mpath_insert(struct art_node *an, struct rtentry *rt)
891{
892 struct rtentry *mrt, **prt;
893 uint8_t prio = rt->rt_priority;
894
895 /* Iterate until we find the route to be placed after ``rt''. */
896
897 prt = (struct rtentry **)&an->an_value;
898 while ((mrt = SMR_PTR_GET_LOCKED(prt)) != NULL) {
899 if (mrt->rt_priority > prio)
900 break;
901
902 prt = &mrt->rt_next;
903 }
904
905 SMR_PTR_SET_LOCKED(&rt->rt_next, mrt);
906 SMR_PTR_SET_LOCKED(prt, rt);
907}
908
909/*
910 * Return a pointer to the address (key). This is an heritage from the
911 * BSD radix tree needed to skip the non-address fields from the flavor
912 * of "struct sockaddr" used by this routing table.
913 */
914static inline const uint8_t *
915satoaddr(struct rtable *tbl, const struct sockaddr *sa)
916{
917 return (((const uint8_t *)sa) + tbl->r_off);
918}
919
920/*
921 * Return the prefix length of a mask.
922 */
923int
924rtable_satoplen(sa_family_t af, const struct sockaddr *mask)
925{
926 const struct domain *dp;
927 uint8_t *ap, *ep;
928 int mlen, plen = 0;
929 int i;
930
931 for (i = 0; (dp = domains[i]) != NULL; i++) {
932 if (dp->dom_rtoffset == 0)
933 continue;
934
935 if (af == dp->dom_family)
936 break;
937 }
938 if (dp == NULL)
939 return (-1);
940
941 /* Host route */
942 if (mask == NULL)
943 return (dp->dom_maxplen);
944
945 mlen = mask->sa_len;
946
947 /* Default route */
948 if (mlen == 0)
949 return (0);
950
951 ap = (uint8_t *)((uint8_t *)mask) + dp->dom_rtoffset;
952 ep = (uint8_t *)((uint8_t *)mask) + mlen;
953 if (ap > ep)
954 return (-1);
955
956 /* Trim trailing zeroes. */
957 while (ap < ep && ep[-1] == 0)
958 ep--;
959
960 if (ap == ep)
961 return (0);
962
963 /* "Beauty" adapted from sbin/route/show.c ... */
964 while (ap < ep) {
965 switch (*ap++) {
966 case 0xff:
967 plen += 8;
968 break;
969 case 0xfe:
970 plen += 7;
971 goto out;
972 case 0xfc:
973 plen += 6;
974 goto out;
975 case 0xf8:
976 plen += 5;
977 goto out;
978 case 0xf0:
979 plen += 4;
980 goto out;
981 case 0xe0:
982 plen += 3;
983 goto out;
984 case 0xc0:
985 plen += 2;
986 goto out;
987 case 0x80:
988 plen += 1;
989 goto out;
990 default:
991 /* Non contiguous mask. */
992 return (-1);
993 }
994 }
995
996out:
997 if (plen > dp->dom_maxplen || ap != ep)
998 return -1;
999
1000 return (plen);
1001}