jcs's openbsd hax
openbsd
at jcs 2643 lines 65 kB view raw
1/* $OpenBSD: ip_carp.c,v 1.372 2025/12/19 02:04:13 dlg Exp $ */ 2 3/* 4 * Copyright (c) 2002 Michael Shalayeff. All rights reserved. 5 * Copyright (c) 2003 Ryan McBride. All rights reserved. 6 * Copyright (c) 2006-2008 Marco Pfatschbacher. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT, 21 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 22 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 23 * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 25 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 26 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 27 * THE POSSIBILITY OF SUCH DAMAGE. 28 */ 29 30/* 31 * TODO: 32 * - iface reconfigure 33 * - support for hardware checksum calculations; 34 * 35 */ 36 37#include "bridge.h" 38 39#include <sys/param.h> 40#include <sys/systm.h> 41#include <sys/mbuf.h> 42#include <sys/socket.h> 43#include <sys/timeout.h> 44#include <sys/ioctl.h> 45#include <sys/errno.h> 46#include <sys/sysctl.h> 47#include <sys/syslog.h> 48#include <sys/smr.h> 49#include <sys/refcnt.h> 50 51#include <net/if.h> 52#include <net/if_var.h> 53#include <net/if_types.h> 54 55#include <crypto/sha1.h> 56 57#include <netinet/in.h> 58#include <netinet/in_var.h> 59#include <netinet/ip.h> 60#include <netinet/ip_var.h> 61#include <netinet/if_ether.h> 62 63#include <net/if_dl.h> 64 65#ifdef INET6 66#include <netinet6/in6_var.h> 67#include <netinet/icmp6.h> 68#include <netinet/ip6.h> 69#include <netinet6/ip6_var.h> 70#include <netinet6/nd6.h> 71#include <netinet6/in6_ifattach.h> 72#endif 73 74#include "bpfilter.h" 75#if NBPFILTER > 0 76#include <net/bpf.h> 77#endif 78 79#include <netinet/ip_carp.h> 80 81/* 82 * Locks used to protect data: 83 * a atomic 84 */ 85 86struct carp_mc_entry { 87 LIST_ENTRY(carp_mc_entry) mc_entries; 88 union { 89 struct ether_multi *mcu_enm; 90 } mc_u; 91 struct sockaddr_storage mc_addr; 92}; 93#define mc_enm mc_u.mcu_enm 94 95enum { HMAC_ORIG=0, HMAC_NOV6LL=1, HMAC_MAX=2 }; 96 97struct carp_vhost_entry { 98 SMR_SLIST_ENTRY(carp_vhost_entry) vhost_entries; 99 100 struct carp_softc *parent_sc; 101 int vhe_leader; 102 int vhid; 103 int advskew; 104 enum { INIT = 0, BACKUP, MASTER } state; 105 struct timeout ad_tmo; /* advertisement timeout */ 106 struct timeout md_tmo; /* master down timeout */ 107 struct timeout md6_tmo; /* master down timeout */ 108 109 u_int64_t vhe_replay_cookie; 110 111 /* authentication */ 112#define CARP_HMAC_PAD 64 113 unsigned char vhe_pad[CARP_HMAC_PAD]; 114 SHA1_CTX vhe_sha1[HMAC_MAX]; 115 116 u_int8_t vhe_enaddr[ETHER_ADDR_LEN]; 117}; 118 119struct carp_softc { 120 struct arpcom sc_ac; 121#define sc_if sc_ac.ac_if 122#define sc_carpdevidx sc_ac.ac_if.if_carpdevidx 123 struct task sc_atask; 124 struct task sc_ltask; 125 struct task sc_dtask; 126 struct ip_moptions sc_imo; 127#ifdef INET6 128 struct ip6_moptions sc_im6o; 129 struct task sc_itask; 130#endif /* INET6 */ 131 132 SMR_LIST_ENTRY(carp_softc) sc_list; 133 struct refcnt sc_refcnt; 134 135 int sc_suppress; 136 int sc_bow_out; 137 int sc_demote_cnt; 138 139 int sc_sendad_errors; 140#define CARP_SENDAD_MAX_ERRORS(sc) (3 * (sc)->sc_vhe_count) 141 int sc_sendad_success; 142#define CARP_SENDAD_MIN_SUCCESS(sc) (3 * (sc)->sc_vhe_count) 143 144 char sc_curlladdr[ETHER_ADDR_LEN]; 145 146 SMR_SLIST_HEAD(, carp_vhost_entry) carp_vhosts; 147 int sc_vhe_count; 148 u_int8_t sc_vhids[CARP_MAXNODES]; 149 u_int8_t sc_advskews[CARP_MAXNODES]; 150 u_int8_t sc_balancing; 151 152 int sc_naddrs; 153 int sc_naddrs6; 154 int sc_advbase; /* seconds */ 155 156 /* authentication */ 157 unsigned char sc_key[CARP_KEY_LEN]; 158 159 u_int32_t sc_hashkey[2]; 160 u_int32_t sc_lsmask; /* load sharing mask */ 161 int sc_lscount; /* # load sharing interfaces (max 32) */ 162 int sc_delayed_arp; /* delayed ARP request countdown */ 163#ifdef INET6 164 int sc_send_na; /* send NA when link state up */ 165#endif /* INET6 */ 166 int sc_realmac; /* using real mac */ 167 168 struct in_addr sc_peer; 169 170 LIST_HEAD(__carp_mchead, carp_mc_entry) carp_mc_listhead; 171 struct carp_vhost_entry *cur_vhe; /* current active vhe */ 172}; 173 174int carpctl_allow = 1; /* [a] */ 175int carpctl_preempt = 0; /* [a] */ 176int carpctl_log = LOG_CRIT; /* [a] */ 177 178const struct sysctl_bounded_args carpctl_vars[] = { 179 {CARPCTL_ALLOW, &carpctl_allow, INT_MIN, INT_MAX}, 180 {CARPCTL_PREEMPT, &carpctl_preempt, INT_MIN, INT_MAX}, 181 {CARPCTL_LOG, &carpctl_log, INT_MIN, INT_MAX}, 182}; 183 184struct cpumem *carpcounters; 185 186int carp_send_all_recur = 0; 187 188#define CARP_LOG(l, sc, s) \ 189 do { \ 190 if ((int)atomic_load_int(&carpctl_log) >= l) { \ 191 if (sc) \ 192 log(l, "%s: ", \ 193 (sc)->sc_if.if_xname); \ 194 else \ 195 log(l, "carp: "); \ 196 addlog s; \ 197 addlog("\n"); \ 198 } \ 199 } while (0) 200 201void carp_hmac_prepare(struct carp_softc *); 202void carp_hmac_prepare_ctx(struct carp_vhost_entry *, u_int8_t); 203void carp_hmac_generate(struct carp_vhost_entry *, u_int32_t *, 204 unsigned char *, u_int8_t); 205int carp_hmac_verify(struct carp_vhost_entry *, u_int32_t *, 206 unsigned char *); 207void carp_proto_input_c(struct ifnet *, struct mbuf *, 208 struct carp_header *, int, sa_family_t); 209int carp_proto_input_if(struct ifnet *, struct mbuf **, int *, int); 210#ifdef INET6 211int carp6_proto_input_if(struct ifnet *, struct mbuf **, int *, int); 212#endif 213void carpattach(int); 214void carpdetach(void *); 215void carp_prepare_ad(struct mbuf *, struct carp_vhost_entry *, 216 struct carp_header *); 217void carp_send_ad_all(void); 218void carp_vhe_send_ad_all(struct carp_softc *); 219void carp_timer_ad(void *); 220void carp_send_ad(struct carp_vhost_entry *); 221void carp_send_arp(struct carp_softc *); 222void carp_timer_down(void *); 223void carp_master_down(struct carp_vhost_entry *); 224int carp_ioctl(struct ifnet *, u_long, caddr_t); 225int carp_vhids_ioctl(struct carp_softc *, struct carpreq *); 226int carp_check_dup_vhids(struct carp_softc *, struct carp_iflist *, 227 struct carpreq *); 228void carp_ifgroup_ioctl(struct ifnet *, u_long, caddr_t); 229void carp_ifgattr_ioctl(struct ifnet *, u_long, caddr_t); 230void carp_start(struct ifnet *); 231int carp_enqueue(struct ifnet *, struct mbuf *); 232void carp_transmit(struct carp_softc *, struct ifnet *, struct mbuf *); 233void carp_setrun_all(struct carp_softc *, sa_family_t); 234void carp_setrun(struct carp_vhost_entry *, sa_family_t); 235void carp_set_state_all(struct carp_softc *, int); 236void carp_set_state(struct carp_vhost_entry *, int); 237void carp_multicast_cleanup(struct carp_softc *); 238int carp_set_ifp(struct carp_softc *, struct ifnet *); 239void carp_set_enaddr(struct carp_softc *); 240void carp_set_vhe_enaddr(struct carp_vhost_entry *); 241void carp_addr_updated(void *); 242int carp_set_addr(struct carp_softc *, struct sockaddr_in *); 243int carp_join_multicast(struct carp_softc *); 244#ifdef INET6 245void carp_send_na(struct carp_softc *); 246int carp_set_addr6(struct carp_softc *, struct sockaddr_in6 *); 247int carp_join_multicast6(struct carp_softc *); 248void carp_if_linkstate(void *); 249#endif 250int carp_clone_create(struct if_clone *, int); 251int carp_clone_destroy(struct ifnet *); 252int carp_ether_addmulti(struct carp_softc *, struct ifreq *); 253int carp_ether_delmulti(struct carp_softc *, struct ifreq *); 254void carp_ether_purgemulti(struct carp_softc *); 255int carp_group_demote_count(struct carp_softc *); 256void carp_update_lsmask(struct carp_softc *); 257int carp_new_vhost(struct carp_softc *, int, int); 258void carp_destroy_vhosts(struct carp_softc *); 259void carp_del_all_timeouts(struct carp_softc *); 260int carp_vhe_match(struct carp_softc *, uint64_t); 261 262struct if_clone carp_cloner = 263 IF_CLONE_INITIALIZER("carp", carp_clone_create, carp_clone_destroy); 264 265#define carp_cksum(_m, _l) ((u_int16_t)in_cksum((_m), (_l))) 266#define CARP_IFQ_PRIO 6 267 268void 269carp_hmac_prepare(struct carp_softc *sc) 270{ 271 struct carp_vhost_entry *vhe; 272 u_int8_t i; 273 274 KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */ 275 276 SMR_SLIST_FOREACH_LOCKED(vhe, &sc->carp_vhosts, vhost_entries) { 277 for (i = 0; i < HMAC_MAX; i++) { 278 carp_hmac_prepare_ctx(vhe, i); 279 } 280 } 281} 282 283void 284carp_hmac_prepare_ctx(struct carp_vhost_entry *vhe, u_int8_t ctx) 285{ 286 struct carp_softc *sc = vhe->parent_sc; 287 288 u_int8_t version = CARP_VERSION, type = CARP_ADVERTISEMENT; 289 u_int8_t vhid = vhe->vhid & 0xff; 290 SHA1_CTX sha1ctx; 291 u_int32_t kmd[5]; 292 struct ifaddr *ifa; 293 int i, found; 294 struct in_addr last, cur, in; 295#ifdef INET6 296 struct in6_addr last6, cur6, in6; 297#endif /* INET6 */ 298 299 /* compute ipad from key */ 300 memset(vhe->vhe_pad, 0, sizeof(vhe->vhe_pad)); 301 bcopy(sc->sc_key, vhe->vhe_pad, sizeof(sc->sc_key)); 302 for (i = 0; i < sizeof(vhe->vhe_pad); i++) 303 vhe->vhe_pad[i] ^= 0x36; 304 305 /* precompute first part of inner hash */ 306 SHA1Init(&vhe->vhe_sha1[ctx]); 307 SHA1Update(&vhe->vhe_sha1[ctx], vhe->vhe_pad, sizeof(vhe->vhe_pad)); 308 SHA1Update(&vhe->vhe_sha1[ctx], (void *)&version, sizeof(version)); 309 SHA1Update(&vhe->vhe_sha1[ctx], (void *)&type, sizeof(type)); 310 311 /* generate a key for the arpbalance hash, before the vhid is hashed */ 312 if (vhe->vhe_leader) { 313 bcopy(&vhe->vhe_sha1[ctx], &sha1ctx, sizeof(sha1ctx)); 314 SHA1Final((unsigned char *)kmd, &sha1ctx); 315 sc->sc_hashkey[0] = kmd[0] ^ kmd[1]; 316 sc->sc_hashkey[1] = kmd[2] ^ kmd[3]; 317 } 318 319 /* the rest of the precomputation */ 320 if (!sc->sc_realmac && vhe->vhe_leader && 321 memcmp(sc->sc_ac.ac_enaddr, vhe->vhe_enaddr, ETHER_ADDR_LEN) != 0) 322 SHA1Update(&vhe->vhe_sha1[ctx], sc->sc_ac.ac_enaddr, 323 ETHER_ADDR_LEN); 324 325 SHA1Update(&vhe->vhe_sha1[ctx], (void *)&vhid, sizeof(vhid)); 326 327 /* Hash the addresses from smallest to largest, not interface order */ 328 cur.s_addr = 0; 329 do { 330 found = 0; 331 last = cur; 332 cur.s_addr = 0xffffffff; 333 TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) { 334 if (ifa->ifa_addr->sa_family != AF_INET) 335 continue; 336 in.s_addr = ifatoia(ifa)->ia_addr.sin_addr.s_addr; 337 if (ntohl(in.s_addr) > ntohl(last.s_addr) && 338 ntohl(in.s_addr) < ntohl(cur.s_addr)) { 339 cur.s_addr = in.s_addr; 340 found++; 341 } 342 } 343 if (found) 344 SHA1Update(&vhe->vhe_sha1[ctx], 345 (void *)&cur, sizeof(cur)); 346 } while (found); 347#ifdef INET6 348 memset(&cur6, 0x00, sizeof(cur6)); 349 do { 350 found = 0; 351 last6 = cur6; 352 memset(&cur6, 0xff, sizeof(cur6)); 353 TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) { 354 if (ifa->ifa_addr->sa_family != AF_INET6) 355 continue; 356 in6 = ifatoia6(ifa)->ia_addr.sin6_addr; 357 if (IN6_IS_SCOPE_EMBED(&in6)) { 358 if (ctx == HMAC_NOV6LL) 359 continue; 360 in6.s6_addr16[1] = 0; 361 } 362 if (memcmp(&in6, &last6, sizeof(in6)) > 0 && 363 memcmp(&in6, &cur6, sizeof(in6)) < 0) { 364 cur6 = in6; 365 found++; 366 } 367 } 368 if (found) 369 SHA1Update(&vhe->vhe_sha1[ctx], 370 (void *)&cur6, sizeof(cur6)); 371 } while (found); 372#endif /* INET6 */ 373 374 /* convert ipad to opad */ 375 for (i = 0; i < sizeof(vhe->vhe_pad); i++) 376 vhe->vhe_pad[i] ^= 0x36 ^ 0x5c; 377} 378 379void 380carp_hmac_generate(struct carp_vhost_entry *vhe, u_int32_t counter[2], 381 unsigned char md[20], u_int8_t ctx) 382{ 383 SHA1_CTX sha1ctx; 384 385 /* fetch first half of inner hash */ 386 bcopy(&vhe->vhe_sha1[ctx], &sha1ctx, sizeof(sha1ctx)); 387 388 SHA1Update(&sha1ctx, (void *)counter, sizeof(vhe->vhe_replay_cookie)); 389 SHA1Final(md, &sha1ctx); 390 391 /* outer hash */ 392 SHA1Init(&sha1ctx); 393 SHA1Update(&sha1ctx, vhe->vhe_pad, sizeof(vhe->vhe_pad)); 394 SHA1Update(&sha1ctx, md, 20); 395 SHA1Final(md, &sha1ctx); 396} 397 398int 399carp_hmac_verify(struct carp_vhost_entry *vhe, u_int32_t counter[2], 400 unsigned char md[20]) 401{ 402 unsigned char md2[20]; 403 u_int8_t i; 404 405 for (i = 0; i < HMAC_MAX; i++) { 406 carp_hmac_generate(vhe, counter, md2, i); 407 if (!timingsafe_bcmp(md, md2, sizeof(md2))) 408 return (0); 409 } 410 return (1); 411} 412 413int 414carp_proto_input(struct mbuf **mp, int *offp, int proto, int af, 415 struct netstack *ns) 416{ 417 struct ifnet *ifp; 418 419 ifp = if_get((*mp)->m_pkthdr.ph_ifidx); 420 if (ifp == NULL) { 421 m_freemp(mp); 422 return IPPROTO_DONE; 423 } 424 425 proto = carp_proto_input_if(ifp, mp, offp, proto); 426 if_put(ifp); 427 return proto; 428} 429 430/* 431 * process input packet. 432 * we have rearranged checks order compared to the rfc, 433 * but it seems more efficient this way or not possible otherwise. 434 */ 435int 436carp_proto_input_if(struct ifnet *ifp, struct mbuf **mp, int *offp, int proto) 437{ 438 struct mbuf *m = *mp; 439 struct ip *ip = mtod(m, struct ip *); 440 struct carp_softc *sc = NULL; 441 struct carp_header *ch; 442 int iplen, len, ismulti; 443 444 carpstat_inc(carps_ipackets); 445 446 if (!atomic_load_int(&carpctl_allow)) { 447 m_freem(m); 448 return IPPROTO_DONE; 449 } 450 451 ismulti = IN_MULTICAST(ip->ip_dst.s_addr); 452 453 /* check if received on a valid carp interface */ 454 switch (ifp->if_type) { 455 case IFT_CARP: 456 break; 457 case IFT_ETHER: 458 if (ismulti || !SMR_LIST_EMPTY_LOCKED(&ifp->if_carp)) 459 break; 460 /* FALLTHROUGH */ 461 default: 462 carpstat_inc(carps_badif); 463 CARP_LOG(LOG_INFO, sc, 464 ("packet received on non-carp interface: %s", 465 ifp->if_xname)); 466 m_freem(m); 467 return IPPROTO_DONE; 468 } 469 470 /* verify that the IP TTL is 255. */ 471 if (ip->ip_ttl != CARP_DFLTTL) { 472 carpstat_inc(carps_badttl); 473 CARP_LOG(LOG_NOTICE, sc, ("received ttl %d != %d on %s", 474 ip->ip_ttl, CARP_DFLTTL, ifp->if_xname)); 475 m_freem(m); 476 return IPPROTO_DONE; 477 } 478 479 /* 480 * verify that the received packet length is 481 * equal to the CARP header 482 */ 483 iplen = ip->ip_hl << 2; 484 len = iplen + sizeof(*ch); 485 if (len > m->m_pkthdr.len) { 486 carpstat_inc(carps_badlen); 487 CARP_LOG(LOG_INFO, sc, ("packet too short %d on %s", 488 m->m_pkthdr.len, ifp->if_xname)); 489 m_freem(m); 490 return IPPROTO_DONE; 491 } 492 493 if ((m = *mp = m_pullup(m, len)) == NULL) { 494 carpstat_inc(carps_hdrops); 495 return IPPROTO_DONE; 496 } 497 ip = mtod(m, struct ip *); 498 ch = (struct carp_header *)(mtod(m, caddr_t) + iplen); 499 500 /* verify the CARP checksum */ 501 m->m_data += iplen; 502 if (carp_cksum(m, len - iplen)) { 503 carpstat_inc(carps_badsum); 504 CARP_LOG(LOG_INFO, sc, ("checksum failed on %s", 505 ifp->if_xname)); 506 m_freem(m); 507 return IPPROTO_DONE; 508 } 509 m->m_data -= iplen; 510 511 KERNEL_LOCK(); 512 carp_proto_input_c(ifp, m, ch, ismulti, AF_INET); 513 KERNEL_UNLOCK(); 514 return IPPROTO_DONE; 515} 516 517#ifdef INET6 518int 519carp6_proto_input(struct mbuf **mp, int *offp, int proto, int af, 520 struct netstack *ns) 521{ 522 struct ifnet *ifp; 523 524 ifp = if_get((*mp)->m_pkthdr.ph_ifidx); 525 if (ifp == NULL) { 526 m_freemp(mp); 527 return IPPROTO_DONE; 528 } 529 530 proto = carp6_proto_input_if(ifp, mp, offp, proto); 531 if_put(ifp); 532 return proto; 533} 534 535int 536carp6_proto_input_if(struct ifnet *ifp, struct mbuf **mp, int *offp, int proto) 537{ 538 struct mbuf *m = *mp; 539 struct carp_softc *sc = NULL; 540 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); 541 struct carp_header *ch; 542 u_int len; 543 544 carpstat_inc(carps_ipackets6); 545 546 if (!atomic_load_int(&carpctl_allow)) { 547 m_freem(m); 548 return IPPROTO_DONE; 549 } 550 551 /* check if received on a valid carp interface */ 552 if (ifp->if_type != IFT_CARP) { 553 carpstat_inc(carps_badif); 554 CARP_LOG(LOG_INFO, sc, ("packet received on non-carp interface: %s", 555 ifp->if_xname)); 556 m_freem(m); 557 return IPPROTO_DONE; 558 } 559 560 /* verify that the IP TTL is 255 */ 561 if (ip6->ip6_hlim != CARP_DFLTTL) { 562 carpstat_inc(carps_badttl); 563 CARP_LOG(LOG_NOTICE, sc, ("received ttl %d != %d on %s", 564 ip6->ip6_hlim, CARP_DFLTTL, ifp->if_xname)); 565 m_freem(m); 566 return IPPROTO_DONE; 567 } 568 569 /* verify that we have a complete carp packet */ 570 len = m->m_len; 571 if ((m = *mp = m_pullup(m, *offp + sizeof(*ch))) == NULL) { 572 carpstat_inc(carps_badlen); 573 CARP_LOG(LOG_INFO, sc, ("packet size %u too small", len)); 574 return IPPROTO_DONE; 575 } 576 ch = (struct carp_header *)(mtod(m, caddr_t) + *offp); 577 578 /* verify the CARP checksum */ 579 m->m_data += *offp; 580 if (carp_cksum(m, sizeof(*ch))) { 581 carpstat_inc(carps_badsum); 582 CARP_LOG(LOG_INFO, sc, ("checksum failed, on %s", 583 ifp->if_xname)); 584 m_freem(m); 585 return IPPROTO_DONE; 586 } 587 m->m_data -= *offp; 588 589 KERNEL_LOCK(); 590 carp_proto_input_c(ifp, m, ch, 1, AF_INET6); 591 KERNEL_UNLOCK(); 592 return IPPROTO_DONE; 593} 594#endif /* INET6 */ 595 596void 597carp_proto_input_c(struct ifnet *ifp, struct mbuf *m, struct carp_header *ch, 598 int ismulti, sa_family_t af) 599{ 600 struct carp_softc *sc; 601 struct ifnet *ifp0 = NULL; 602 struct carp_iflist *cif; 603 struct carp_vhost_entry *vhe; 604 struct timeval sc_tv, ch_tv; 605 606 KERNEL_ASSERT_LOCKED(); /* touching if_carp + carp_vhosts */ 607 608 if (ifp->if_type == IFT_CARP) { 609 /* 610 * If the parent of this carp(4) got destroyed while 611 * `m' was being processed, silently drop it. 612 */ 613 ifp0 = if_get(ifp->if_carpdevidx); 614 if (ifp0 == NULL) 615 goto rele; 616 cif = &ifp0->if_carp; 617 } else 618 cif = &ifp->if_carp; 619 620 SMR_LIST_FOREACH_LOCKED(sc, cif, sc_list) { 621 if (af == AF_INET && 622 ismulti != IN_MULTICAST(sc->sc_peer.s_addr)) 623 continue; 624 SMR_SLIST_FOREACH_LOCKED(vhe, &sc->carp_vhosts, vhost_entries) { 625 if (vhe->vhid == ch->carp_vhid) 626 goto found; 627 } 628 } 629 found: 630 631 if (!sc || (sc->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) != 632 (IFF_UP|IFF_RUNNING)) { 633 carpstat_inc(carps_badvhid); 634 goto rele; 635 } 636 637 getmicrotime(&sc->sc_if.if_lastchange); 638 639 /* verify the CARP version. */ 640 if (ch->carp_version != CARP_VERSION) { 641 carpstat_inc(carps_badver); 642 sc->sc_if.if_ierrors++; 643 CARP_LOG(LOG_NOTICE, sc, ("invalid version %d != %d", 644 ch->carp_version, CARP_VERSION)); 645 goto rele; 646 } 647 648 /* verify the hash */ 649 if (carp_hmac_verify(vhe, ch->carp_counter, ch->carp_md)) { 650 carpstat_inc(carps_badauth); 651 sc->sc_if.if_ierrors++; 652 CARP_LOG(LOG_INFO, sc, ("incorrect hash")); 653 goto rele; 654 } 655 656 if (!memcmp(&vhe->vhe_replay_cookie, ch->carp_counter, 657 sizeof(ch->carp_counter))) { 658 struct ifnet *ifp2; 659 660 ifp2 = if_get(sc->sc_carpdevidx); 661 /* Do not log duplicates from non simplex interfaces */ 662 if (ifp2 && ifp2->if_flags & IFF_SIMPLEX) { 663 carpstat_inc(carps_badauth); 664 sc->sc_if.if_ierrors++; 665 CARP_LOG(LOG_WARNING, sc, 666 ("replay or network loop detected")); 667 } 668 if_put(ifp2); 669 goto rele; 670 } 671 672 sc_tv.tv_sec = sc->sc_advbase; 673 sc_tv.tv_usec = vhe->advskew * 1000000 / 256; 674 ch_tv.tv_sec = ch->carp_advbase; 675 ch_tv.tv_usec = ch->carp_advskew * 1000000 / 256; 676 677 switch (vhe->state) { 678 case INIT: 679 break; 680 case MASTER: 681 /* 682 * If we receive an advertisement from a master who's going to 683 * be more frequent than us, and whose demote count is not higher 684 * than ours, go into BACKUP state. If his demote count is lower, 685 * also go into BACKUP. 686 */ 687 if (((timercmp(&sc_tv, &ch_tv, >) || 688 timercmp(&sc_tv, &ch_tv, ==)) && 689 (ch->carp_demote <= carp_group_demote_count(sc))) || 690 ch->carp_demote < carp_group_demote_count(sc)) { 691 timeout_del(&vhe->ad_tmo); 692 carp_set_state(vhe, BACKUP); 693 carp_setrun(vhe, 0); 694 } 695 break; 696 case BACKUP: 697 /* 698 * If we're pre-empting masters who advertise slower than us, 699 * and do not have a better demote count, treat them as down. 700 * 701 */ 702 if (atomic_load_int(&carpctl_preempt) && 703 timercmp(&sc_tv, &ch_tv, <) && 704 ch->carp_demote >= carp_group_demote_count(sc)) { 705 carp_master_down(vhe); 706 break; 707 } 708 709 /* 710 * Take over masters advertising with a higher demote count, 711 * regardless of CARPCTL_PREEMPT. 712 */ 713 if (ch->carp_demote > carp_group_demote_count(sc)) { 714 carp_master_down(vhe); 715 break; 716 } 717 718 /* 719 * If the master is going to advertise at such a low frequency 720 * that he's guaranteed to time out, we'd might as well just 721 * treat him as timed out now. 722 */ 723 sc_tv.tv_sec = sc->sc_advbase * 3; 724 if (sc->sc_advbase && timercmp(&sc_tv, &ch_tv, <)) { 725 carp_master_down(vhe); 726 break; 727 } 728 729 /* 730 * Otherwise, we reset the counter and wait for the next 731 * advertisement. 732 */ 733 carp_setrun(vhe, af); 734 break; 735 } 736 737rele: 738 if_put(ifp0); 739 m_freem(m); 740 return; 741} 742 743int 744carp_sysctl_carpstat(void *oldp, size_t *oldlenp, void *newp) 745{ 746 struct carpstats carpstat; 747 748 CTASSERT(sizeof(carpstat) == (carps_ncounters * sizeof(uint64_t))); 749 memset(&carpstat, 0, sizeof carpstat); 750 counters_read(carpcounters, (uint64_t *)&carpstat, carps_ncounters, 751 NULL); 752 return (sysctl_rdstruct(oldp, oldlenp, newp, 753 &carpstat, sizeof(carpstat))); 754} 755 756int 757carp_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp, 758 size_t newlen) 759{ 760 /* All sysctl names at this level are terminal. */ 761 if (namelen != 1) 762 return (ENOTDIR); 763 764 switch (name[0]) { 765 case CARPCTL_STATS: 766 return (carp_sysctl_carpstat(oldp, oldlenp, newp)); 767 default: 768 return (sysctl_bounded_arr(carpctl_vars, nitems(carpctl_vars), 769 name, namelen, oldp, oldlenp, newp, newlen)); 770 } 771} 772 773/* 774 * Interface side of the CARP implementation. 775 */ 776 777void 778carpattach(int n) 779{ 780 if_creategroup("carp"); /* keep around even if empty */ 781 if_clone_attach(&carp_cloner); 782 carpcounters = counters_alloc(carps_ncounters); 783} 784 785int 786carp_clone_create(struct if_clone *ifc, int unit) 787{ 788 struct carp_softc *sc; 789 struct ifnet *ifp; 790 791 sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO); 792 refcnt_init(&sc->sc_refcnt); 793 794 SMR_SLIST_INIT(&sc->carp_vhosts); 795 sc->sc_vhe_count = 0; 796 if (carp_new_vhost(sc, 0, 0)) { 797 free(sc, M_DEVBUF, sizeof(*sc)); 798 return (ENOMEM); 799 } 800 801 task_set(&sc->sc_atask, carp_addr_updated, sc); 802 task_set(&sc->sc_ltask, carp_carpdev_state, sc); 803 task_set(&sc->sc_dtask, carpdetach, sc); 804#ifdef INET6 805 task_set(&sc->sc_itask, carp_if_linkstate, sc); 806#endif /* INET6 */ 807 808 sc->sc_suppress = 0; 809 sc->sc_advbase = CARP_DFLTINTV; 810 sc->sc_naddrs = sc->sc_naddrs6 = 0; 811#ifdef INET6 812 sc->sc_im6o.im6o_hlim = CARP_DFLTTL; 813#endif /* INET6 */ 814 sc->sc_imo.imo_membership = mallocarray(IP_MIN_MEMBERSHIPS, 815 sizeof(struct in_multi *), M_IPMOPTS, M_WAITOK|M_ZERO); 816 sc->sc_imo.imo_max_memberships = IP_MIN_MEMBERSHIPS; 817 818 LIST_INIT(&sc->carp_mc_listhead); 819 ifp = &sc->sc_if; 820 ifp->if_softc = sc; 821 snprintf(ifp->if_xname, sizeof ifp->if_xname, "%s%d", ifc->ifc_name, 822 unit); 823 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 824 ifp->if_ioctl = carp_ioctl; 825 ifp->if_start = carp_start; 826 ifp->if_enqueue = carp_enqueue; 827 ifp->if_xflags = IFXF_CLONED; 828 if_counters_alloc(ifp); 829 if_attach(ifp); 830 ether_ifattach(ifp); 831 ifp->if_type = IFT_CARP; 832 ifp->if_sadl->sdl_type = IFT_CARP; 833 ifp->if_output = carp_output; 834 ifp->if_priority = IF_CARP_DEFAULT_PRIORITY; 835 ifp->if_link_state = LINK_STATE_INVALID; 836 837 /* Hook carp_addr_updated to cope with address and route changes. */ 838 if_addrhook_add(&sc->sc_if, &sc->sc_atask); 839#ifdef INET6 840 if_linkstatehook_add(&sc->sc_if, &sc->sc_itask); 841#endif /* INET6 */ 842 843 return (0); 844} 845 846int 847carp_new_vhost(struct carp_softc *sc, int vhid, int advskew) 848{ 849 struct carp_vhost_entry *vhe, *vhe0, *nvhe; 850 851 vhe = malloc(sizeof(*vhe), M_DEVBUF, M_NOWAIT | M_ZERO); 852 if (vhe == NULL) 853 return (ENOMEM); 854 855 refcnt_take(&sc->sc_refcnt); /* give a sc ref to the vhe */ 856 vhe->parent_sc = sc; 857 vhe->vhid = vhid; 858 vhe->advskew = advskew; 859 vhe->state = INIT; 860 timeout_set_proc(&vhe->ad_tmo, carp_timer_ad, vhe); 861 timeout_set_proc(&vhe->md_tmo, carp_timer_down, vhe); 862 timeout_set_proc(&vhe->md6_tmo, carp_timer_down, vhe); 863 864 KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */ 865 866 /* mark the first vhe as leader */ 867 vhe0 = SMR_SLIST_FIRST_LOCKED(&sc->carp_vhosts); 868 if (vhe0 == NULL) { 869 vhe->vhe_leader = 1; 870 SMR_SLIST_INSERT_HEAD_LOCKED(&sc->carp_vhosts, 871 vhe, vhost_entries); 872 sc->sc_vhe_count = 1; 873 return (0); 874 } 875 876 while ((nvhe = SMR_SLIST_NEXT_LOCKED(vhe0, vhost_entries)) != NULL) 877 vhe0 = nvhe; 878 879 SMR_SLIST_INSERT_AFTER_LOCKED(vhe0, vhe, vhost_entries); 880 sc->sc_vhe_count++; 881 882 return (0); 883} 884 885int 886carp_clone_destroy(struct ifnet *ifp) 887{ 888 struct carp_softc *sc = ifp->if_softc; 889 890 if_addrhook_del(&sc->sc_if, &sc->sc_atask); 891#ifdef INET6 892 if_linkstatehook_del(&sc->sc_if, &sc->sc_itask); 893#endif /* INET6 */ 894 895 NET_LOCK(); 896 carpdetach(sc); 897 NET_UNLOCK(); 898 899 ether_ifdetach(ifp); 900 if_detach(ifp); 901 carp_destroy_vhosts(ifp->if_softc); 902 refcnt_finalize(&sc->sc_refcnt, "carpdtor"); 903 free(sc->sc_imo.imo_membership, M_IPMOPTS, 904 sc->sc_imo.imo_max_memberships * sizeof(struct in_multi *)); 905 free(sc, M_DEVBUF, sizeof(*sc)); 906 return (0); 907} 908 909void 910carp_del_all_timeouts(struct carp_softc *sc) 911{ 912 struct carp_vhost_entry *vhe; 913 914 KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */ 915 SMR_SLIST_FOREACH_LOCKED(vhe, &sc->carp_vhosts, vhost_entries) { 916 timeout_del(&vhe->ad_tmo); 917 timeout_del(&vhe->md_tmo); 918 timeout_del(&vhe->md6_tmo); 919 } 920} 921 922void 923carpdetach(void *arg) 924{ 925 struct carp_softc *sc = arg; 926 struct ifnet *ifp0; 927 928 carp_del_all_timeouts(sc); 929 930 if (sc->sc_demote_cnt) 931 carp_group_demote_adj(&sc->sc_if, -sc->sc_demote_cnt, "detach"); 932 sc->sc_suppress = 0; 933 sc->sc_sendad_errors = 0; 934 935 carp_set_state_all(sc, INIT); 936 sc->sc_if.if_flags &= ~IFF_UP; 937 carp_setrun_all(sc, 0); 938 carp_multicast_cleanup(sc); 939 940 ifp0 = if_get(sc->sc_carpdevidx); 941 if (ifp0 == NULL) 942 return; 943 944 KERNEL_ASSERT_LOCKED(); /* touching if_carp */ 945 946 SMR_LIST_REMOVE_LOCKED(sc, sc_list); 947 sc->sc_carpdevidx = 0; 948 949 smr_barrier(); 950 refcnt_rele_wake(&sc->sc_refcnt); 951 952 if_linkstatehook_del(ifp0, &sc->sc_ltask); 953 if_detachhook_del(ifp0, &sc->sc_dtask); 954 ifpromisc(ifp0, 0); 955 if_put(ifp0); 956} 957 958void 959carp_destroy_vhosts(struct carp_softc *sc) 960{ 961 /* XXX bow out? */ 962 struct carp_vhost_entry *vhe, *nvhe; 963 964 KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */ 965 nvhe = SMR_SLIST_FIRST_LOCKED(&sc->carp_vhosts); 966 SMR_SLIST_INIT(&sc->carp_vhosts); 967 sc->sc_vhe_count = 0; 968 969 smr_barrier(); 970 971 while ((vhe = nvhe) != NULL) { 972 nvhe = SMR_SLIST_NEXT_LOCKED(vhe, vhost_entries); 973 refcnt_rele_wake(&vhe->parent_sc->sc_refcnt); 974 free(vhe, M_DEVBUF, sizeof(*vhe)); 975 } 976} 977 978void 979carp_prepare_ad(struct mbuf *m, struct carp_vhost_entry *vhe, 980 struct carp_header *ch) 981{ 982 if (!vhe->vhe_replay_cookie) { 983 arc4random_buf(&vhe->vhe_replay_cookie, 984 sizeof(vhe->vhe_replay_cookie)); 985 } 986 987 bcopy(&vhe->vhe_replay_cookie, ch->carp_counter, 988 sizeof(ch->carp_counter)); 989 990 /* 991 * For the time being, do not include the IPv6 linklayer addresses 992 * in the HMAC. 993 */ 994 carp_hmac_generate(vhe, ch->carp_counter, ch->carp_md, HMAC_NOV6LL); 995} 996 997void 998carp_send_ad_all(void) 999{ 1000 struct ifnet *ifp; 1001 struct carp_softc *vh; 1002 1003 KERNEL_ASSERT_LOCKED(); /* touching if_carp */ 1004 1005 if (carp_send_all_recur > 0) 1006 return; 1007 ++carp_send_all_recur; 1008 TAILQ_FOREACH(ifp, &ifnetlist, if_list) { 1009 if (ifp->if_type != IFT_CARP) 1010 continue; 1011 1012 vh = ifp->if_softc; 1013 if ((vh->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) == 1014 (IFF_UP|IFF_RUNNING)) { 1015 carp_vhe_send_ad_all(vh); 1016 } 1017 } 1018 --carp_send_all_recur; 1019} 1020 1021void 1022carp_vhe_send_ad_all(struct carp_softc *sc) 1023{ 1024 struct carp_vhost_entry *vhe; 1025 1026 KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */ 1027 SMR_SLIST_FOREACH_LOCKED(vhe, &sc->carp_vhosts, vhost_entries) { 1028 if (vhe->state == MASTER) 1029 carp_send_ad(vhe); 1030 } 1031} 1032 1033void 1034carp_timer_ad(void *v) 1035{ 1036 NET_LOCK(); 1037 carp_send_ad(v); 1038 NET_UNLOCK(); 1039} 1040 1041void 1042carp_send_ad(struct carp_vhost_entry *vhe) 1043{ 1044 struct carp_header ch; 1045 uint64_t usec; 1046 struct carp_softc *sc = vhe->parent_sc; 1047 struct carp_header *ch_ptr; 1048 struct mbuf *m; 1049 int error, len, advbase, advskew; 1050 struct ifnet *ifp; 1051 struct ifaddr *ifa; 1052 struct sockaddr sa; 1053 1054 NET_ASSERT_LOCKED(); 1055 1056 if ((ifp = if_get(sc->sc_carpdevidx)) == NULL) { 1057 sc->sc_if.if_oerrors++; 1058 return; 1059 } 1060 1061 /* bow out if we've gone to backup (the carp interface is going down) */ 1062 if (sc->sc_bow_out) { 1063 advbase = 255; 1064 advskew = 255; 1065 } else { 1066 advbase = sc->sc_advbase; 1067 advskew = vhe->advskew; 1068 usec = (uint64_t)advbase * 1000000; 1069 usec += (uint64_t)advskew * 1000000 / 256; 1070 if (usec == 0) 1071 usec = 1000000 / 256; 1072 } 1073 1074 ch.carp_version = CARP_VERSION; 1075 ch.carp_type = CARP_ADVERTISEMENT; 1076 ch.carp_vhid = vhe->vhid; 1077 ch.carp_demote = carp_group_demote_count(sc) & 0xff; 1078 ch.carp_advbase = advbase; 1079 ch.carp_advskew = advskew; 1080 ch.carp_authlen = 7; /* XXX DEFINE */ 1081 ch.carp_cksum = 0; 1082 1083 sc->cur_vhe = vhe; /* we need the vhe later on the output path */ 1084 1085 if (sc->sc_naddrs) { 1086 struct ip *ip; 1087 1088 MGETHDR(m, M_DONTWAIT, MT_HEADER); 1089 if (m == NULL) { 1090 sc->sc_if.if_oerrors++; 1091 carpstat_inc(carps_onomem); 1092 /* XXX maybe less ? */ 1093 goto retry_later; 1094 } 1095 len = sizeof(*ip) + sizeof(ch); 1096 m->m_pkthdr.pf.prio = CARP_IFQ_PRIO; 1097 m->m_pkthdr.ph_rtableid = sc->sc_if.if_rdomain; 1098 m->m_pkthdr.len = len; 1099 m->m_len = len; 1100 m_align(m, len); 1101 ip = mtod(m, struct ip *); 1102 ip->ip_v = IPVERSION; 1103 ip->ip_hl = sizeof(*ip) >> 2; 1104 ip->ip_tos = IPTOS_LOWDELAY; 1105 ip->ip_len = htons(len); 1106 ip->ip_id = htons(ip_randomid()); 1107 ip->ip_off = htons(IP_DF); 1108 ip->ip_ttl = CARP_DFLTTL; 1109 ip->ip_p = IPPROTO_CARP; 1110 ip->ip_sum = 0; 1111 1112 memset(&sa, 0, sizeof(sa)); 1113 sa.sa_family = AF_INET; 1114 /* Prefer addresses on the parent interface as source for AD. */ 1115 ifa = ifaof_ifpforaddr(&sa, ifp); 1116 if (ifa == NULL) 1117 ifa = ifaof_ifpforaddr(&sa, &sc->sc_if); 1118 KASSERT(ifa != NULL); 1119 ip->ip_src.s_addr = ifatoia(ifa)->ia_addr.sin_addr.s_addr; 1120 ip->ip_dst.s_addr = sc->sc_peer.s_addr; 1121 if (IN_MULTICAST(ip->ip_dst.s_addr)) 1122 m->m_flags |= M_MCAST; 1123 1124 ch_ptr = (struct carp_header *)(ip + 1); 1125 bcopy(&ch, ch_ptr, sizeof(ch)); 1126 carp_prepare_ad(m, vhe, ch_ptr); 1127 1128 m->m_data += sizeof(*ip); 1129 ch_ptr->carp_cksum = carp_cksum(m, len - sizeof(*ip)); 1130 m->m_data -= sizeof(*ip); 1131 1132 getmicrotime(&sc->sc_if.if_lastchange); 1133 carpstat_inc(carps_opackets); 1134 1135 error = ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, 1136 NULL, 0); 1137 if (error && 1138 /* when unicast, the peer's down is not our fault */ 1139 !(!IN_MULTICAST(sc->sc_peer.s_addr) && error == EHOSTDOWN)){ 1140 if (error == ENOBUFS) 1141 carpstat_inc(carps_onomem); 1142 else 1143 CARP_LOG(LOG_WARNING, sc, 1144 ("ip_output failed: %d", error)); 1145 sc->sc_if.if_oerrors++; 1146 if (sc->sc_sendad_errors < INT_MAX) 1147 sc->sc_sendad_errors++; 1148 if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS(sc)) 1149 carp_group_demote_adj(&sc->sc_if, 1, 1150 "> snderrors"); 1151 sc->sc_sendad_success = 0; 1152 } else { 1153 if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS(sc)) { 1154 if (++sc->sc_sendad_success >= 1155 CARP_SENDAD_MIN_SUCCESS(sc)) { 1156 carp_group_demote_adj(&sc->sc_if, -1, 1157 "< snderrors"); 1158 sc->sc_sendad_errors = 0; 1159 } 1160 } else 1161 sc->sc_sendad_errors = 0; 1162 } 1163 if (vhe->vhe_leader) { 1164 if (sc->sc_delayed_arp > 0) 1165 sc->sc_delayed_arp--; 1166 if (sc->sc_delayed_arp == 0) { 1167 carp_send_arp(sc); 1168 sc->sc_delayed_arp = -1; 1169 } 1170 } 1171 } 1172#ifdef INET6 1173 if (sc->sc_naddrs6) { 1174 struct ip6_hdr *ip6; 1175 1176 MGETHDR(m, M_DONTWAIT, MT_HEADER); 1177 if (m == NULL) { 1178 sc->sc_if.if_oerrors++; 1179 carpstat_inc(carps_onomem); 1180 /* XXX maybe less ? */ 1181 goto retry_later; 1182 } 1183 len = sizeof(*ip6) + sizeof(ch); 1184 m->m_pkthdr.pf.prio = CARP_IFQ_PRIO; 1185 m->m_pkthdr.ph_rtableid = sc->sc_if.if_rdomain; 1186 m->m_pkthdr.len = len; 1187 m->m_len = len; 1188 m_align(m, len); 1189 m->m_flags |= M_MCAST; 1190 ip6 = mtod(m, struct ip6_hdr *); 1191 memset(ip6, 0, sizeof(*ip6)); 1192 ip6->ip6_vfc |= IPV6_VERSION; 1193 ip6->ip6_hlim = CARP_DFLTTL; 1194 ip6->ip6_nxt = IPPROTO_CARP; 1195 1196 /* set the source address */ 1197 memset(&sa, 0, sizeof(sa)); 1198 sa.sa_family = AF_INET6; 1199 /* Prefer addresses on the parent interface as source for AD. */ 1200 ifa = ifaof_ifpforaddr(&sa, ifp); 1201 if (ifa == NULL) 1202 ifa = ifaof_ifpforaddr(&sa, &sc->sc_if); 1203 KASSERT(ifa != NULL); 1204 bcopy(ifatoia6(ifa)->ia_addr.sin6_addr.s6_addr, 1205 &ip6->ip6_src, sizeof(struct in6_addr)); 1206 /* set the multicast destination */ 1207 1208 ip6->ip6_dst.s6_addr16[0] = htons(0xff02); 1209 ip6->ip6_dst.s6_addr16[1] = htons(ifp->if_index); 1210 ip6->ip6_dst.s6_addr8[15] = 0x12; 1211 1212 ch_ptr = (struct carp_header *)(ip6 + 1); 1213 bcopy(&ch, ch_ptr, sizeof(ch)); 1214 carp_prepare_ad(m, vhe, ch_ptr); 1215 1216 m->m_data += sizeof(*ip6); 1217 ch_ptr->carp_cksum = carp_cksum(m, len - sizeof(*ip6)); 1218 m->m_data -= sizeof(*ip6); 1219 1220 getmicrotime(&sc->sc_if.if_lastchange); 1221 carpstat_inc(carps_opackets6); 1222 1223 error = ip6_output(m, NULL, NULL, 0, &sc->sc_im6o, NULL); 1224 if (error) { 1225 if (error == ENOBUFS) 1226 carpstat_inc(carps_onomem); 1227 else 1228 CARP_LOG(LOG_WARNING, sc, 1229 ("ip6_output failed: %d", error)); 1230 sc->sc_if.if_oerrors++; 1231 if (sc->sc_sendad_errors < INT_MAX) 1232 sc->sc_sendad_errors++; 1233 if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS(sc)) 1234 carp_group_demote_adj(&sc->sc_if, 1, 1235 "> snd6errors"); 1236 sc->sc_sendad_success = 0; 1237 } else { 1238 if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS(sc)) { 1239 if (++sc->sc_sendad_success >= 1240 CARP_SENDAD_MIN_SUCCESS(sc)) { 1241 carp_group_demote_adj(&sc->sc_if, -1, 1242 "< snd6errors"); 1243 sc->sc_sendad_errors = 0; 1244 } 1245 } else 1246 sc->sc_sendad_errors = 0; 1247 } 1248 } 1249#endif /* INET6 */ 1250 1251retry_later: 1252 sc->cur_vhe = NULL; 1253 if (advbase != 255 || advskew != 255) 1254 timeout_add_usec(&vhe->ad_tmo, usec); 1255 if_put(ifp); 1256} 1257 1258/* 1259 * Broadcast a gratuitous ARP request containing 1260 * the virtual router MAC address for each IP address 1261 * associated with the virtual router. 1262 */ 1263void 1264carp_send_arp(struct carp_softc *sc) 1265{ 1266 struct ifaddr *ifa; 1267 in_addr_t in; 1268 1269 TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) { 1270 1271 if (ifa->ifa_addr->sa_family != AF_INET) 1272 continue; 1273 1274 in = ifatoia(ifa)->ia_addr.sin_addr.s_addr; 1275 arprequest(&sc->sc_if, &in, &in, sc->sc_ac.ac_enaddr); 1276 } 1277} 1278 1279#ifdef INET6 1280void 1281carp_send_na(struct carp_softc *sc) 1282{ 1283 struct ifaddr *ifa; 1284 struct in6_addr *in6, mcast = IN6ADDR_LINKLOCAL_ALLNODES_INIT; 1285 int i_am_router = (atomic_load_int(&ip6_forwarding) != 0); 1286 int flags = ND_NA_FLAG_OVERRIDE; 1287 1288 if (i_am_router) 1289 flags |= ND_NA_FLAG_ROUTER; 1290 mcast.s6_addr16[1] = htons(sc->sc_if.if_index); 1291 1292 TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) { 1293 1294 if (ifa->ifa_addr->sa_family != AF_INET6) 1295 continue; 1296 1297 in6 = &ifatoia6(ifa)->ia_addr.sin6_addr; 1298 nd6_na_output(&sc->sc_if, &mcast, in6, flags, 1, NULL); 1299 } 1300} 1301#endif /* INET6 */ 1302 1303void 1304carp_update_lsmask(struct carp_softc *sc) 1305{ 1306 struct carp_vhost_entry *vhe; 1307 int count; 1308 1309 if (sc->sc_balancing == CARP_BAL_NONE) 1310 return; 1311 1312 sc->sc_lsmask = 0; 1313 count = 0; 1314 1315 KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */ 1316 SMR_SLIST_FOREACH_LOCKED(vhe, &sc->carp_vhosts, vhost_entries) { 1317 if (vhe->state == MASTER && count < sizeof(sc->sc_lsmask) * 8) 1318 sc->sc_lsmask |= 1 << count; 1319 count++; 1320 } 1321 sc->sc_lscount = count; 1322 CARP_LOG(LOG_DEBUG, sc, ("carp_update_lsmask: %x", sc->sc_lsmask)); 1323} 1324 1325int 1326carp_iamatch(struct ifnet *ifp) 1327{ 1328 struct carp_softc *sc = ifp->if_softc; 1329 struct carp_vhost_entry *vhe; 1330 int match = 0; 1331 1332 smr_read_enter(); 1333 vhe = SMR_SLIST_FIRST(&sc->carp_vhosts); 1334 if (vhe->state == MASTER) 1335 match = 1; 1336 smr_read_leave(); 1337 1338 return (match); 1339} 1340 1341#if NBRIDGE > 0 1342int 1343carp_ourether(struct ifnet *ifp0, uint8_t *ena) 1344{ 1345 struct carp_softc *sc; 1346 uint64_t dst = ether_addr_to_e64((struct ether_addr *)ena); 1347 int match = 0; 1348 1349 KASSERT(ifp0->if_type == IFT_ETHER); 1350 1351 smr_read_enter(); 1352 SMR_LIST_FOREACH(sc, &ifp0->if_carp, sc_list) { 1353 if ((sc->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) != 1354 (IFF_UP|IFF_RUNNING)) 1355 continue; 1356 if (carp_vhe_match(sc, dst)) { 1357 match = 1; 1358 break; 1359 } 1360 } 1361 smr_read_leave(); 1362 1363 return (match); 1364} 1365#endif /* NBRIDGE > 0 */ 1366 1367int 1368carp_vhe_match(struct carp_softc *sc, uint64_t dst) 1369{ 1370 struct carp_vhost_entry *vhe; 1371 int active = 0; 1372 1373 smr_read_enter(); 1374 vhe = SMR_SLIST_FIRST(&sc->carp_vhosts); 1375 active = (vhe->state == MASTER || sc->sc_balancing >= CARP_BAL_IP); 1376 smr_read_leave(); 1377 1378 return (active && (dst == 1379 ether_addr_to_e64((struct ether_addr *)sc->sc_ac.ac_enaddr))); 1380} 1381 1382struct mbuf * 1383carp_input(struct ifnet *ifp0, struct mbuf *m, uint64_t dst, 1384 struct netstack *ns) 1385{ 1386 struct carp_softc *sc; 1387 1388 KASSERT(ifp0->if_type == IFT_ETHER); 1389 1390 smr_read_enter(); 1391 SMR_LIST_FOREACH(sc, &ifp0->if_carp, sc_list) { 1392 if ((sc->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) != 1393 (IFF_UP|IFF_RUNNING)) 1394 continue; 1395 1396 if (carp_vhe_match(sc, dst)) { 1397 /* 1398 * These packets look like layer 2 multicast but they 1399 * are unicast at layer 3. With help of the tag the 1400 * mbuf's M_MCAST flag can be removed by carp_lsdrop() 1401 * after we have passed layer 2. 1402 */ 1403 if (sc->sc_balancing == CARP_BAL_IP) { 1404 struct m_tag *mtag; 1405 mtag = m_tag_get(PACKET_TAG_CARP_BAL_IP, 0, 1406 M_NOWAIT); 1407 if (mtag == NULL) { 1408 m_freem(m); 1409 goto out; 1410 } 1411 m_tag_prepend(m, mtag); 1412 } 1413 break; 1414 } 1415 } 1416 1417 if (sc != NULL) { 1418 if_vinput(&sc->sc_if, m, ns); 1419 m = NULL; 1420 } else if (ETH64_IS_MULTICAST(dst)) { 1421 /* 1422 * XXX Should really check the list of multicast addresses 1423 * for each CARP interface _before_ copying. 1424 */ 1425 SMR_LIST_FOREACH(sc, &ifp0->if_carp, sc_list) { 1426 struct mbuf *m0; 1427 1428 if (!(sc->sc_if.if_flags & IFF_UP)) 1429 continue; 1430 1431 m0 = m_dup_pkt(m, ETHER_ALIGN, M_DONTWAIT); 1432 if (m0 == NULL) 1433 continue; 1434 1435 if_vinput(&sc->sc_if, m0, ns); 1436 } 1437 } 1438out: 1439 smr_read_leave(); 1440 1441 return (m); 1442} 1443 1444int 1445carp_lsdrop(struct ifnet *ifp, struct mbuf *m, sa_family_t af, u_int32_t *src, 1446 u_int32_t *dst, int drop) 1447{ 1448 struct carp_softc *sc; 1449 u_int32_t fold; 1450 struct m_tag *mtag; 1451 1452 if (ifp->if_type != IFT_CARP) 1453 return 0; 1454 sc = ifp->if_softc; 1455 if (sc->sc_balancing == CARP_BAL_NONE) 1456 return 0; 1457 1458 /* 1459 * Remove M_MCAST flag from mbuf of balancing ip traffic, since the fact 1460 * that it is layer 2 multicast does not implicate that it is also layer 1461 * 3 multicast. 1462 */ 1463 if (m->m_flags & M_MCAST && 1464 (mtag = m_tag_find(m, PACKET_TAG_CARP_BAL_IP, NULL))) { 1465 m_tag_delete(m, mtag); 1466 m->m_flags &= ~M_MCAST; 1467 } 1468 1469 /* 1470 * Return without making a drop decision. This allows to clear the 1471 * M_MCAST flag and do nothing else. 1472 */ 1473 if (!drop) 1474 return 0; 1475 1476 /* 1477 * Never drop carp advertisements. 1478 * XXX Bad idea to pass all broadcast / multicast traffic? 1479 */ 1480 if (m->m_flags & (M_BCAST|M_MCAST)) 1481 return 0; 1482 1483 fold = src[0] ^ dst[0]; 1484#ifdef INET6 1485 if (af == AF_INET6) { 1486 int i; 1487 for (i = 1; i < 4; i++) 1488 fold ^= src[i] ^ dst[i]; 1489 } 1490#endif 1491 if (sc->sc_lscount == 0) /* just to be safe */ 1492 return 1; 1493 1494 return ((1 << (ntohl(fold) % sc->sc_lscount)) & sc->sc_lsmask) == 0; 1495} 1496 1497void 1498carp_timer_down(void *v) 1499{ 1500 NET_LOCK(); 1501 carp_master_down(v); 1502 NET_UNLOCK(); 1503} 1504 1505void 1506carp_master_down(struct carp_vhost_entry *vhe) 1507{ 1508 struct carp_softc *sc = vhe->parent_sc; 1509 1510 NET_ASSERT_LOCKED(); 1511 1512 switch (vhe->state) { 1513 case INIT: 1514 printf("%s: master_down event in INIT state\n", 1515 sc->sc_if.if_xname); 1516 break; 1517 case MASTER: 1518 break; 1519 case BACKUP: 1520 carp_set_state(vhe, MASTER); 1521 carp_send_ad(vhe); 1522 if (sc->sc_balancing == CARP_BAL_NONE && vhe->vhe_leader) { 1523 carp_send_arp(sc); 1524 /* Schedule a delayed ARP to deal w/ some L3 switches */ 1525 sc->sc_delayed_arp = 2; 1526#ifdef INET6 1527 /* routing entry is not ready yet. do it later */ 1528 sc->sc_send_na = 1; 1529#endif /* INET6 */ 1530 } 1531 carp_setrun(vhe, 0); 1532 carpstat_inc(carps_preempt); 1533 break; 1534 } 1535} 1536 1537void 1538carp_setrun_all(struct carp_softc *sc, sa_family_t af) 1539{ 1540 struct carp_vhost_entry *vhe; 1541 1542 KERNEL_ASSERT_LOCKED(); /* touching carp_vhost */ 1543 SMR_SLIST_FOREACH_LOCKED(vhe, &sc->carp_vhosts, vhost_entries) { 1544 carp_setrun(vhe, af); 1545 } 1546} 1547 1548/* 1549 * When in backup state, af indicates whether to reset the master down timer 1550 * for v4 or v6. If it's set to zero, reset the ones which are already pending. 1551 */ 1552void 1553carp_setrun(struct carp_vhost_entry *vhe, sa_family_t af) 1554{ 1555 struct ifnet *ifp; 1556 struct carp_softc *sc = vhe->parent_sc; 1557 uint64_t usec; 1558 1559 if ((ifp = if_get(sc->sc_carpdevidx)) == NULL) { 1560 sc->sc_if.if_flags &= ~IFF_RUNNING; 1561 carp_set_state_all(sc, INIT); 1562 return; 1563 } 1564 1565 if (memcmp(((struct arpcom *)ifp)->ac_enaddr, 1566 sc->sc_ac.ac_enaddr, ETHER_ADDR_LEN) == 0) 1567 sc->sc_realmac = 1; 1568 else 1569 sc->sc_realmac = 0; 1570 1571 if_put(ifp); 1572 1573 if (sc->sc_if.if_flags & IFF_UP && vhe->vhid > 0 && 1574 (sc->sc_naddrs || sc->sc_naddrs6) && !sc->sc_suppress) { 1575 sc->sc_if.if_flags |= IFF_RUNNING; 1576 } else { 1577 sc->sc_if.if_flags &= ~IFF_RUNNING; 1578 return; 1579 } 1580 1581 usec = (uint64_t)sc->sc_advbase * 1000000; 1582 usec += (uint64_t)vhe->advskew * 1000000 / 256; 1583 if (usec == 0) 1584 usec = 1000000 / 256; 1585 1586 switch (vhe->state) { 1587 case INIT: 1588 carp_set_state(vhe, BACKUP); 1589 carp_setrun(vhe, 0); 1590 break; 1591 case BACKUP: 1592 timeout_del(&vhe->ad_tmo); 1593 if (vhe->vhe_leader) 1594 sc->sc_delayed_arp = -1; 1595 switch (af) { 1596 case AF_INET: 1597 timeout_add_usec(&vhe->md_tmo, 3 * usec); 1598 break; 1599#ifdef INET6 1600 case AF_INET6: 1601 timeout_add_usec(&vhe->md6_tmo, 3 * usec); 1602 break; 1603#endif /* INET6 */ 1604 default: 1605 if (sc->sc_naddrs) 1606 timeout_add_usec(&vhe->md_tmo, 3 * usec); 1607 if (sc->sc_naddrs6) 1608 timeout_add_usec(&vhe->md6_tmo, 3 * usec); 1609 break; 1610 } 1611 break; 1612 case MASTER: 1613 timeout_add_usec(&vhe->ad_tmo, usec); 1614 break; 1615 } 1616} 1617 1618void 1619carp_multicast_cleanup(struct carp_softc *sc) 1620{ 1621 struct ip_moptions *imo = &sc->sc_imo; 1622#ifdef INET6 1623 struct ip6_moptions *im6o = &sc->sc_im6o; 1624#endif 1625 u_int16_t n = imo->imo_num_memberships; 1626 1627 /* Clean up our own multicast memberships */ 1628 while (n-- > 0) { 1629 if (imo->imo_membership[n] != NULL) { 1630 in_delmulti(imo->imo_membership[n]); 1631 imo->imo_membership[n] = NULL; 1632 } 1633 } 1634 imo->imo_num_memberships = 0; 1635 imo->imo_ifidx = 0; 1636 1637#ifdef INET6 1638 while (!LIST_EMPTY(&im6o->im6o_memberships)) { 1639 struct in6_multi_mship *imm = 1640 LIST_FIRST(&im6o->im6o_memberships); 1641 1642 LIST_REMOVE(imm, i6mm_chain); 1643 in6_leavegroup(imm); 1644 } 1645 im6o->im6o_ifidx = 0; 1646#endif 1647 1648 /* And any other multicast memberships */ 1649 carp_ether_purgemulti(sc); 1650} 1651 1652int 1653carp_set_ifp(struct carp_softc *sc, struct ifnet *ifp0) 1654{ 1655 struct carp_iflist *cif; 1656 struct carp_softc *vr, *last = NULL, *after = NULL; 1657 int myself = 0, error = 0; 1658 1659 KERNEL_ASSERT_LOCKED(); /* touching if_carp */ 1660 1661 if (ifp0->if_type != IFT_ETHER) 1662 return (EINVAL); 1663 1664 if ((ifp0->if_flags & IFF_MULTICAST) == 0) 1665 return (EADDRNOTAVAIL); 1666 1667 KASSERT(ifp0->if_index != sc->sc_carpdevidx); 1668 1669 cif = &ifp0->if_carp; 1670 if (carp_check_dup_vhids(sc, cif, NULL)) 1671 return (EINVAL); 1672 1673 if ((error = ifpromisc(ifp0, 1))) 1674 return (error); 1675 1676 /* detach from old interface */ 1677 if (sc->sc_carpdevidx != 0) 1678 carpdetach(sc); 1679 1680 /* attach carp interface to physical interface */ 1681 if_detachhook_add(ifp0, &sc->sc_dtask); 1682 if_linkstatehook_add(ifp0, &sc->sc_ltask); 1683 1684 sc->sc_carpdevidx = ifp0->if_index; 1685 sc->sc_if.if_capabilities = ifp0->if_capabilities & 1686 (IFCAP_CSUM_MASK | IFCAP_TSOv4 | IFCAP_TSOv6); 1687 1688 SMR_LIST_FOREACH_LOCKED(vr, cif, sc_list) { 1689 struct carp_vhost_entry *vrhead, *schead; 1690 last = vr; 1691 1692 if (vr == sc) 1693 myself = 1; 1694 1695 vrhead = SMR_SLIST_FIRST_LOCKED(&vr->carp_vhosts); 1696 schead = SMR_SLIST_FIRST_LOCKED(&sc->carp_vhosts); 1697 if (vrhead->vhid < schead->vhid) 1698 after = vr; 1699 } 1700 1701 if (!myself) { 1702 refcnt_take(&sc->sc_refcnt); 1703 /* We're trying to keep things in order */ 1704 if (last == NULL) { 1705 SMR_LIST_INSERT_HEAD_LOCKED(cif, sc, sc_list); 1706 } else if (after == NULL) { 1707 SMR_LIST_INSERT_AFTER_LOCKED(last, sc, sc_list); 1708 } else { 1709 SMR_LIST_INSERT_AFTER_LOCKED(after, sc, sc_list); 1710 } 1711 } 1712 if (sc->sc_naddrs || sc->sc_naddrs6) 1713 sc->sc_if.if_flags |= IFF_UP; 1714 carp_set_enaddr(sc); 1715 1716 carp_carpdev_state(sc); 1717 1718 return (0); 1719} 1720 1721void 1722carp_set_vhe_enaddr(struct carp_vhost_entry *vhe) 1723{ 1724 struct carp_softc *sc = vhe->parent_sc; 1725 1726 if (vhe->vhid != 0 && sc->sc_carpdevidx != 0) { 1727 if (vhe->vhe_leader && sc->sc_balancing == CARP_BAL_IP) 1728 vhe->vhe_enaddr[0] = 1; 1729 else 1730 vhe->vhe_enaddr[0] = 0; 1731 vhe->vhe_enaddr[1] = 0; 1732 vhe->vhe_enaddr[2] = 0x5e; 1733 vhe->vhe_enaddr[3] = 0; 1734 vhe->vhe_enaddr[4] = 1; 1735 vhe->vhe_enaddr[5] = vhe->vhid; 1736 } else 1737 memset(vhe->vhe_enaddr, 0, ETHER_ADDR_LEN); 1738} 1739 1740void 1741carp_set_enaddr(struct carp_softc *sc) 1742{ 1743 struct carp_vhost_entry *vhe; 1744 1745 KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */ 1746 SMR_SLIST_FOREACH_LOCKED(vhe, &sc->carp_vhosts, vhost_entries) 1747 carp_set_vhe_enaddr(vhe); 1748 1749 vhe = SMR_SLIST_FIRST_LOCKED(&sc->carp_vhosts); 1750 1751 /* 1752 * Use the carp lladdr if the running one isn't manually set. 1753 * Only compare static parts of the lladdr. 1754 */ 1755 if ((memcmp(sc->sc_ac.ac_enaddr + 1, vhe->vhe_enaddr + 1, 1756 ETHER_ADDR_LEN - 2) == 0) || 1757 (!sc->sc_ac.ac_enaddr[0] && !sc->sc_ac.ac_enaddr[1] && 1758 !sc->sc_ac.ac_enaddr[2] && !sc->sc_ac.ac_enaddr[3] && 1759 !sc->sc_ac.ac_enaddr[4] && !sc->sc_ac.ac_enaddr[5])) 1760 bcopy(vhe->vhe_enaddr, sc->sc_ac.ac_enaddr, ETHER_ADDR_LEN); 1761 1762 /* Make sure the enaddr has changed before further twiddling. */ 1763 if (memcmp(sc->sc_ac.ac_enaddr, sc->sc_curlladdr, ETHER_ADDR_LEN) != 0) { 1764 bcopy(sc->sc_ac.ac_enaddr, LLADDR(sc->sc_if.if_sadl), 1765 ETHER_ADDR_LEN); 1766 bcopy(sc->sc_ac.ac_enaddr, sc->sc_curlladdr, ETHER_ADDR_LEN); 1767#ifdef INET6 1768 /* 1769 * (re)attach a link-local address which matches 1770 * our new MAC address. 1771 */ 1772 if (sc->sc_naddrs6) 1773 in6_ifattach_linklocal(&sc->sc_if, NULL); 1774#endif 1775 carp_set_state_all(sc, INIT); 1776 carp_setrun_all(sc, 0); 1777 } 1778} 1779 1780void 1781carp_addr_updated(void *v) 1782{ 1783 struct carp_softc *sc = (struct carp_softc *) v; 1784 struct ifaddr *ifa; 1785 int new_naddrs = 0, new_naddrs6 = 0; 1786 1787 TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) { 1788 if (ifa->ifa_addr->sa_family == AF_INET) 1789 new_naddrs++; 1790#ifdef INET6 1791 else if (ifa->ifa_addr->sa_family == AF_INET6) 1792 new_naddrs6++; 1793#endif /* INET6 */ 1794 } 1795 1796 /* We received address changes from if_addrhooks callback */ 1797 if (new_naddrs != sc->sc_naddrs || new_naddrs6 != sc->sc_naddrs6) { 1798 1799 sc->sc_naddrs = new_naddrs; 1800 sc->sc_naddrs6 = new_naddrs6; 1801 1802 /* Re-establish multicast membership removed by in_control */ 1803 if (IN_MULTICAST(sc->sc_peer.s_addr)) { 1804 if (!in_hasmulti(&sc->sc_peer, &sc->sc_if)) { 1805 struct in_multi **imm = 1806 sc->sc_imo.imo_membership; 1807 u_int16_t maxmem = 1808 sc->sc_imo.imo_max_memberships; 1809 1810 memset(&sc->sc_imo, 0, sizeof(sc->sc_imo)); 1811 sc->sc_imo.imo_membership = imm; 1812 sc->sc_imo.imo_max_memberships = maxmem; 1813 1814 if (sc->sc_carpdevidx != 0 && 1815 sc->sc_naddrs > 0) 1816 carp_join_multicast(sc); 1817 } 1818 } 1819 1820 if (sc->sc_naddrs == 0 && sc->sc_naddrs6 == 0) { 1821 sc->sc_if.if_flags &= ~IFF_UP; 1822 carp_set_state_all(sc, INIT); 1823 } else 1824 carp_hmac_prepare(sc); 1825 } 1826 1827 carp_setrun_all(sc, 0); 1828} 1829 1830int 1831carp_set_addr(struct carp_softc *sc, struct sockaddr_in *sin) 1832{ 1833 struct in_addr *in = &sin->sin_addr; 1834 int error; 1835 1836 KASSERT(sc->sc_carpdevidx != 0); 1837 1838 /* XXX is this necessary? */ 1839 if (in->s_addr == INADDR_ANY) { 1840 carp_setrun_all(sc, 0); 1841 return (0); 1842 } 1843 1844 if (sc->sc_naddrs == 0 && (error = carp_join_multicast(sc)) != 0) 1845 return (error); 1846 1847 carp_set_state_all(sc, INIT); 1848 1849 return (0); 1850} 1851 1852int 1853carp_join_multicast(struct carp_softc *sc) 1854{ 1855 struct ip_moptions *imo = &sc->sc_imo; 1856 struct in_multi *imm; 1857 struct in_addr addr; 1858 1859 if (!IN_MULTICAST(sc->sc_peer.s_addr)) 1860 return (0); 1861 1862 addr.s_addr = sc->sc_peer.s_addr; 1863 if ((imm = in_addmulti(&addr, &sc->sc_if)) == NULL) 1864 return (ENOBUFS); 1865 1866 imo->imo_membership[0] = imm; 1867 imo->imo_num_memberships = 1; 1868 imo->imo_ifidx = sc->sc_if.if_index; 1869 imo->imo_ttl = CARP_DFLTTL; 1870 imo->imo_loop = 0; 1871 return (0); 1872} 1873 1874 1875#ifdef INET6 1876int 1877carp_set_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6) 1878{ 1879 int error; 1880 1881 KASSERT(sc->sc_carpdevidx != 0); 1882 1883 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { 1884 carp_setrun_all(sc, 0); 1885 return (0); 1886 } 1887 1888 if (sc->sc_naddrs6 == 0 && (error = carp_join_multicast6(sc)) != 0) 1889 return (error); 1890 1891 carp_set_state_all(sc, INIT); 1892 1893 return (0); 1894} 1895 1896int 1897carp_join_multicast6(struct carp_softc *sc) 1898{ 1899 struct in6_multi_mship *imm, *imm2; 1900 struct ip6_moptions *im6o = &sc->sc_im6o; 1901 struct sockaddr_in6 addr6; 1902 int error; 1903 1904 /* Join IPv6 CARP multicast group */ 1905 memset(&addr6, 0, sizeof(addr6)); 1906 addr6.sin6_family = AF_INET6; 1907 addr6.sin6_len = sizeof(addr6); 1908 addr6.sin6_addr.s6_addr16[0] = htons(0xff02); 1909 addr6.sin6_addr.s6_addr16[1] = htons(sc->sc_if.if_index); 1910 addr6.sin6_addr.s6_addr8[15] = 0x12; 1911 if ((imm = in6_joingroup(&sc->sc_if, 1912 &addr6.sin6_addr, &error)) == NULL) { 1913 return (error); 1914 } 1915 /* join solicited multicast address */ 1916 memset(&addr6.sin6_addr, 0, sizeof(addr6.sin6_addr)); 1917 addr6.sin6_addr.s6_addr16[0] = htons(0xff02); 1918 addr6.sin6_addr.s6_addr16[1] = htons(sc->sc_if.if_index); 1919 addr6.sin6_addr.s6_addr32[1] = 0; 1920 addr6.sin6_addr.s6_addr32[2] = htonl(1); 1921 addr6.sin6_addr.s6_addr32[3] = 0; 1922 addr6.sin6_addr.s6_addr8[12] = 0xff; 1923 if ((imm2 = in6_joingroup(&sc->sc_if, 1924 &addr6.sin6_addr, &error)) == NULL) { 1925 in6_leavegroup(imm); 1926 return (error); 1927 } 1928 1929 /* apply v6 multicast membership */ 1930 im6o->im6o_ifidx = sc->sc_if.if_index; 1931 if (imm) 1932 LIST_INSERT_HEAD(&im6o->im6o_memberships, imm, 1933 i6mm_chain); 1934 if (imm2) 1935 LIST_INSERT_HEAD(&im6o->im6o_memberships, imm2, 1936 i6mm_chain); 1937 1938 return (0); 1939} 1940 1941void 1942carp_if_linkstate(void *v) 1943{ 1944 struct carp_softc *sc = v; 1945 1946 if (sc->sc_send_na) { 1947 if (sc->sc_if.if_link_state == LINK_STATE_UP) 1948 carp_send_na(sc); 1949 sc->sc_send_na = 0; 1950 } 1951} 1952#endif /* INET6 */ 1953 1954int 1955carp_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr) 1956{ 1957 struct proc *p = curproc; /* XXX */ 1958 struct carp_softc *sc = ifp->if_softc; 1959 struct carp_vhost_entry *vhe; 1960 struct carpreq carpr; 1961 struct ifaddr *ifa = (struct ifaddr *)addr; 1962 struct ifreq *ifr = (struct ifreq *)addr; 1963 struct ifnet *ifp0 = NULL; 1964 int i, error = 0; 1965 1966 switch (cmd) { 1967 case SIOCSIFADDR: 1968 if (sc->sc_carpdevidx == 0) 1969 return (EINVAL); 1970 1971 switch (ifa->ifa_addr->sa_family) { 1972 case AF_INET: 1973 sc->sc_if.if_flags |= IFF_UP; 1974 error = carp_set_addr(sc, satosin(ifa->ifa_addr)); 1975 break; 1976#ifdef INET6 1977 case AF_INET6: 1978 sc->sc_if.if_flags |= IFF_UP; 1979 error = carp_set_addr6(sc, satosin6(ifa->ifa_addr)); 1980 break; 1981#endif /* INET6 */ 1982 default: 1983 error = EAFNOSUPPORT; 1984 break; 1985 } 1986 break; 1987 1988 case SIOCSIFFLAGS: 1989 KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */ 1990 vhe = SMR_SLIST_FIRST_LOCKED(&sc->carp_vhosts); 1991 if (vhe->state != INIT && !(ifr->ifr_flags & IFF_UP)) { 1992 carp_del_all_timeouts(sc); 1993 1994 /* we need the interface up to bow out */ 1995 sc->sc_if.if_flags |= IFF_UP; 1996 sc->sc_bow_out = 1; 1997 carp_vhe_send_ad_all(sc); 1998 sc->sc_bow_out = 0; 1999 2000 sc->sc_if.if_flags &= ~IFF_UP; 2001 carp_set_state_all(sc, INIT); 2002 carp_setrun_all(sc, 0); 2003 } else if (vhe->state == INIT && (ifr->ifr_flags & IFF_UP)) { 2004 sc->sc_if.if_flags |= IFF_UP; 2005 carp_setrun_all(sc, 0); 2006 } 2007 break; 2008 2009 case SIOCSIFXFLAGS: 2010 if ((ifp0 = if_get(sc->sc_carpdevidx)) != NULL) { 2011 ifsetlro(ifp0, ISSET(ifr->ifr_flags, IFXF_LRO)); 2012 if_put(ifp0); 2013 } 2014 break; 2015 2016 case SIOCSVH: 2017 KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */ 2018 vhe = SMR_SLIST_FIRST_LOCKED(&sc->carp_vhosts); 2019 if ((error = suser(p)) != 0) 2020 break; 2021 if ((error = copyin(ifr->ifr_data, &carpr, sizeof carpr))) 2022 break; 2023 error = 1; 2024 if (carpr.carpr_carpdev[0] != '\0' && 2025 (ifp0 = if_unit(carpr.carpr_carpdev)) == NULL) 2026 return (EINVAL); 2027 if (carpr.carpr_peer.s_addr == 0) 2028 sc->sc_peer.s_addr = INADDR_CARP_GROUP; 2029 else 2030 sc->sc_peer.s_addr = carpr.carpr_peer.s_addr; 2031 if (ifp0 != NULL && ifp0->if_index != sc->sc_carpdevidx) { 2032 if ((error = carp_set_ifp(sc, ifp0))) { 2033 if_put(ifp0); 2034 return (error); 2035 } 2036 } 2037 if_put(ifp0); 2038 if (vhe->state != INIT && carpr.carpr_state != vhe->state) { 2039 switch (carpr.carpr_state) { 2040 case BACKUP: 2041 timeout_del(&vhe->ad_tmo); 2042 carp_set_state_all(sc, BACKUP); 2043 carp_setrun_all(sc, 0); 2044 break; 2045 case MASTER: 2046 KERNEL_ASSERT_LOCKED(); 2047 /* touching carp_vhosts */ 2048 SMR_SLIST_FOREACH_LOCKED(vhe, &sc->carp_vhosts, 2049 vhost_entries) 2050 carp_master_down(vhe); 2051 break; 2052 default: 2053 break; 2054 } 2055 } 2056 if ((error = carp_vhids_ioctl(sc, &carpr))) 2057 return (error); 2058 if (carpr.carpr_advbase >= 0) { 2059 if (carpr.carpr_advbase > 255) { 2060 error = EINVAL; 2061 break; 2062 } 2063 sc->sc_advbase = carpr.carpr_advbase; 2064 error--; 2065 } 2066 if (memcmp(sc->sc_advskews, carpr.carpr_advskews, 2067 sizeof(sc->sc_advskews))) { 2068 i = 0; 2069 KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */ 2070 SMR_SLIST_FOREACH_LOCKED(vhe, &sc->carp_vhosts, 2071 vhost_entries) 2072 vhe->advskew = carpr.carpr_advskews[i++]; 2073 bcopy(carpr.carpr_advskews, sc->sc_advskews, 2074 sizeof(sc->sc_advskews)); 2075 } 2076 if (sc->sc_balancing != carpr.carpr_balancing) { 2077 if (carpr.carpr_balancing > CARP_BAL_MAXID) { 2078 error = EINVAL; 2079 break; 2080 } 2081 sc->sc_balancing = carpr.carpr_balancing; 2082 carp_set_enaddr(sc); 2083 carp_update_lsmask(sc); 2084 } 2085 bcopy(carpr.carpr_key, sc->sc_key, sizeof(sc->sc_key)); 2086 if (error > 0) 2087 error = EINVAL; 2088 else { 2089 error = 0; 2090 carp_hmac_prepare(sc); 2091 carp_setrun_all(sc, 0); 2092 } 2093 break; 2094 2095 case SIOCGVH: 2096 memset(&carpr, 0, sizeof(carpr)); 2097 if ((ifp0 = if_get(sc->sc_carpdevidx)) != NULL) 2098 strlcpy(carpr.carpr_carpdev, ifp0->if_xname, IFNAMSIZ); 2099 if_put(ifp0); 2100 i = 0; 2101 KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */ 2102 SMR_SLIST_FOREACH_LOCKED(vhe, &sc->carp_vhosts, vhost_entries) { 2103 carpr.carpr_vhids[i] = vhe->vhid; 2104 carpr.carpr_advskews[i] = vhe->advskew; 2105 carpr.carpr_states[i] = vhe->state; 2106 i++; 2107 } 2108 carpr.carpr_advbase = sc->sc_advbase; 2109 carpr.carpr_balancing = sc->sc_balancing; 2110 if (suser(p) == 0) 2111 bcopy(sc->sc_key, carpr.carpr_key, 2112 sizeof(carpr.carpr_key)); 2113 carpr.carpr_peer.s_addr = sc->sc_peer.s_addr; 2114 error = copyout(&carpr, ifr->ifr_data, sizeof(carpr)); 2115 break; 2116 2117 case SIOCADDMULTI: 2118 error = carp_ether_addmulti(sc, ifr); 2119 break; 2120 2121 case SIOCDELMULTI: 2122 error = carp_ether_delmulti(sc, ifr); 2123 break; 2124 case SIOCAIFGROUP: 2125 case SIOCDIFGROUP: 2126 if (sc->sc_demote_cnt) 2127 carp_ifgroup_ioctl(ifp, cmd, addr); 2128 break; 2129 case SIOCSIFGATTR: 2130 carp_ifgattr_ioctl(ifp, cmd, addr); 2131 break; 2132 default: 2133 error = ENOTTY; 2134 } 2135 2136 if (memcmp(sc->sc_ac.ac_enaddr, sc->sc_curlladdr, ETHER_ADDR_LEN) != 0) 2137 carp_set_enaddr(sc); 2138 return (error); 2139} 2140 2141int 2142carp_check_dup_vhids(struct carp_softc *sc, struct carp_iflist *cif, 2143 struct carpreq *carpr) 2144{ 2145 struct carp_softc *vr; 2146 struct carp_vhost_entry *vhe, *vhe0; 2147 int i; 2148 2149 KERNEL_ASSERT_LOCKED(); /* touching if_carp + carp_vhosts */ 2150 2151 SMR_LIST_FOREACH_LOCKED(vr, cif, sc_list) { 2152 if (vr == sc) 2153 continue; 2154 SMR_SLIST_FOREACH_LOCKED(vhe, &vr->carp_vhosts, 2155 vhost_entries) { 2156 if (carpr) { 2157 for (i = 0; carpr->carpr_vhids[i]; i++) { 2158 if (vhe->vhid == carpr->carpr_vhids[i]) 2159 return (EINVAL); 2160 } 2161 } 2162 SMR_SLIST_FOREACH_LOCKED(vhe0, &sc->carp_vhosts, 2163 vhost_entries) { 2164 if (vhe->vhid == vhe0->vhid) 2165 return (EINVAL); 2166 } 2167 } 2168 } 2169 return (0); 2170} 2171 2172int 2173carp_vhids_ioctl(struct carp_softc *sc, struct carpreq *carpr) 2174{ 2175 int i, j; 2176 u_int8_t taken_vhids[256]; 2177 2178 if (carpr->carpr_vhids[0] == 0 || 2179 !memcmp(sc->sc_vhids, carpr->carpr_vhids, sizeof(sc->sc_vhids))) 2180 return (0); 2181 2182 memset(taken_vhids, 0, sizeof(taken_vhids)); 2183 for (i = 0; carpr->carpr_vhids[i]; i++) { 2184 struct ifnet *ifp0; 2185 2186 if (taken_vhids[carpr->carpr_vhids[i]]) 2187 return (EINVAL); 2188 taken_vhids[carpr->carpr_vhids[i]] = 1; 2189 2190 if ((ifp0 = if_get(sc->sc_carpdevidx)) != NULL) { 2191 if (carp_check_dup_vhids(sc, &ifp0->if_carp, carpr)) { 2192 if_put(ifp0); 2193 return (EINVAL); 2194 } 2195 } 2196 if_put(ifp0); 2197 if (carpr->carpr_advskews[i] >= 255) 2198 return (EINVAL); 2199 } 2200 /* set sane balancing defaults */ 2201 if (i <= 1) 2202 carpr->carpr_balancing = CARP_BAL_NONE; 2203 else if (carpr->carpr_balancing == CARP_BAL_NONE && 2204 sc->sc_balancing == CARP_BAL_NONE) 2205 carpr->carpr_balancing = CARP_BAL_IP; 2206 2207 /* destroy all */ 2208 carp_del_all_timeouts(sc); 2209 carp_destroy_vhosts(sc); 2210 memset(sc->sc_vhids, 0, sizeof(sc->sc_vhids)); 2211 2212 /* sort vhosts list by vhid */ 2213 for (j = 1; j <= 255; j++) { 2214 for (i = 0; carpr->carpr_vhids[i]; i++) { 2215 if (carpr->carpr_vhids[i] != j) 2216 continue; 2217 if (carp_new_vhost(sc, carpr->carpr_vhids[i], 2218 carpr->carpr_advskews[i])) 2219 return (ENOMEM); 2220 sc->sc_vhids[i] = carpr->carpr_vhids[i]; 2221 sc->sc_advskews[i] = carpr->carpr_advskews[i]; 2222 } 2223 } 2224 carp_set_enaddr(sc); 2225 carp_set_state_all(sc, INIT); 2226 return (0); 2227} 2228 2229void 2230carp_ifgroup_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr) 2231{ 2232 struct ifgroupreq *ifgr = (struct ifgroupreq *)addr; 2233 struct ifg_list *ifgl; 2234 int *dm, adj; 2235 2236 if (!strcmp(ifgr->ifgr_group, IFG_ALL)) 2237 return; 2238 adj = ((struct carp_softc *)ifp->if_softc)->sc_demote_cnt; 2239 if (cmd == SIOCDIFGROUP) 2240 adj = adj * -1; 2241 2242 TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) 2243 if (!strcmp(ifgl->ifgl_group->ifg_group, ifgr->ifgr_group)) { 2244 dm = &ifgl->ifgl_group->ifg_carp_demoted; 2245 if (*dm + adj >= 0) 2246 *dm += adj; 2247 else 2248 *dm = 0; 2249 } 2250} 2251 2252void 2253carp_ifgattr_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr) 2254{ 2255 struct ifgroupreq *ifgr = (struct ifgroupreq *)addr; 2256 struct carp_softc *sc = ifp->if_softc; 2257 2258 if (ifgr->ifgr_attrib.ifg_carp_demoted > 0 && (sc->sc_if.if_flags & 2259 (IFF_UP|IFF_RUNNING)) == (IFF_UP|IFF_RUNNING)) 2260 carp_vhe_send_ad_all(sc); 2261} 2262 2263void 2264carp_start(struct ifnet *ifp) 2265{ 2266 struct carp_softc *sc = ifp->if_softc; 2267 struct ifnet *ifp0; 2268 struct mbuf *m; 2269 2270 if ((ifp0 = if_get(sc->sc_carpdevidx)) == NULL) { 2271 ifq_purge(&ifp->if_snd); 2272 return; 2273 } 2274 2275 while ((m = ifq_dequeue(&ifp->if_snd)) != NULL) 2276 carp_transmit(sc, ifp0, m); 2277 if_put(ifp0); 2278} 2279 2280void 2281carp_transmit(struct carp_softc *sc, struct ifnet *ifp0, struct mbuf *m) 2282{ 2283 struct ifnet *ifp = &sc->sc_if; 2284 2285#if NBPFILTER > 0 2286 { 2287 caddr_t if_bpf = ifp->if_bpf; 2288 if (if_bpf) 2289 bpf_mtap_ether(if_bpf, m, BPF_DIRECTION_OUT); 2290 } 2291#endif /* NBPFILTER > 0 */ 2292 2293 if (!ISSET(ifp0->if_flags, IFF_RUNNING)) { 2294 counters_inc(ifp->if_counters, ifc_oerrors); 2295 m_freem(m); 2296 return; 2297 } 2298 2299 /* 2300 * Do not leak the multicast address when sending 2301 * advertisements in 'ip' and 'ip-stealth' balancing 2302 * modes. 2303 */ 2304 if (sc->sc_balancing == CARP_BAL_IP || 2305 sc->sc_balancing == CARP_BAL_IPSTEALTH) { 2306 struct ether_header *eh = mtod(m, struct ether_header *); 2307 memcpy(eh->ether_shost, sc->sc_ac.ac_enaddr, 2308 sizeof(eh->ether_shost)); 2309 } 2310 2311 if (if_enqueue(ifp0, m)) 2312 counters_inc(ifp->if_counters, ifc_oerrors); 2313} 2314 2315int 2316carp_enqueue(struct ifnet *ifp, struct mbuf *m) 2317{ 2318 struct carp_softc *sc = ifp->if_softc; 2319 struct ifnet *ifp0; 2320 2321 /* no ifq_is_priq, cos hfsc on carp doesn't make sense */ 2322 2323 /* 2324 * If the parent of this carp(4) got destroyed while 2325 * `m' was being processed, silently drop it. 2326 */ 2327 if ((ifp0 = if_get(sc->sc_carpdevidx)) == NULL) { 2328 m_freem(m); 2329 return (0); 2330 } 2331 2332 counters_pkt(ifp->if_counters, 2333 ifc_opackets, ifc_obytes, m->m_pkthdr.len); 2334 carp_transmit(sc, ifp0, m); 2335 if_put(ifp0); 2336 2337 return (0); 2338} 2339 2340int 2341carp_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *sa, 2342 struct rtentry *rt) 2343{ 2344 struct carp_softc *sc = ((struct carp_softc *)ifp->if_softc); 2345 struct carp_vhost_entry *vhe; 2346 int ismaster; 2347 2348 if (sc->cur_vhe == NULL) { 2349 smr_read_enter(); 2350 vhe = SMR_SLIST_FIRST(&sc->carp_vhosts); 2351 ismaster = (vhe->state == MASTER); 2352 smr_read_leave(); 2353 } else { 2354 ismaster = (sc->cur_vhe->state == MASTER); 2355 } 2356 2357 if ((sc->sc_balancing == CARP_BAL_NONE && !ismaster)) { 2358 m_freem(m); 2359 return (ENETUNREACH); 2360 } 2361 2362 return (ether_output(ifp, m, sa, rt)); 2363} 2364 2365void 2366carp_set_state_all(struct carp_softc *sc, int state) 2367{ 2368 struct carp_vhost_entry *vhe; 2369 2370 KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */ 2371 SMR_SLIST_FOREACH_LOCKED(vhe, &sc->carp_vhosts, vhost_entries) { 2372 if (vhe->state == state) 2373 continue; 2374 2375 carp_set_state(vhe, state); 2376 } 2377} 2378 2379void 2380carp_set_state(struct carp_vhost_entry *vhe, int state) 2381{ 2382 struct carp_softc *sc = vhe->parent_sc; 2383 static const char *carp_states[] = { CARP_STATES }; 2384 int loglevel; 2385 struct carp_vhost_entry *vhe0; 2386 2387 KASSERT(vhe->state != state); 2388 2389 if (vhe->state == INIT || state == INIT) 2390 loglevel = LOG_WARNING; 2391 else 2392 loglevel = LOG_CRIT; 2393 2394 if (sc->sc_vhe_count > 1) 2395 CARP_LOG(loglevel, sc, 2396 ("state transition (vhid %d): %s -> %s", vhe->vhid, 2397 carp_states[vhe->state], carp_states[state])); 2398 else 2399 CARP_LOG(loglevel, sc, 2400 ("state transition: %s -> %s", 2401 carp_states[vhe->state], carp_states[state])); 2402 2403 vhe->state = state; 2404 carp_update_lsmask(sc); 2405 2406 KERNEL_ASSERT_LOCKED(); /* touching carp_vhosts */ 2407 2408 sc->sc_if.if_link_state = LINK_STATE_INVALID; 2409 SMR_SLIST_FOREACH_LOCKED(vhe0, &sc->carp_vhosts, vhost_entries) { 2410 /* 2411 * Link must be up if at least one vhe is in state MASTER to 2412 * bring or keep route up. 2413 */ 2414 if (vhe0->state == MASTER) { 2415 sc->sc_if.if_link_state = LINK_STATE_UP; 2416 break; 2417 } else if (vhe0->state == BACKUP) { 2418 sc->sc_if.if_link_state = LINK_STATE_DOWN; 2419 } 2420 } 2421 if_link_state_change(&sc->sc_if); 2422} 2423 2424void 2425carp_group_demote_adj(struct ifnet *ifp, int adj, char *reason) 2426{ 2427 struct ifg_list *ifgl; 2428 int *dm, need_ad; 2429 struct carp_softc *nil = NULL; 2430 2431 if (ifp->if_type == IFT_CARP) { 2432 dm = &((struct carp_softc *)ifp->if_softc)->sc_demote_cnt; 2433 if (*dm + adj >= 0) 2434 *dm += adj; 2435 else 2436 *dm = 0; 2437 } 2438 2439 need_ad = 0; 2440 TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) { 2441 if (!strcmp(ifgl->ifgl_group->ifg_group, IFG_ALL)) 2442 continue; 2443 dm = &ifgl->ifgl_group->ifg_carp_demoted; 2444 2445 if (*dm + adj >= 0) 2446 *dm += adj; 2447 else 2448 *dm = 0; 2449 2450 if (adj > 0 && *dm == 1) 2451 need_ad = 1; 2452 CARP_LOG(LOG_ERR, nil, 2453 ("%s demoted group %s by %d to %d (%s)", 2454 ifp->if_xname, ifgl->ifgl_group->ifg_group, 2455 adj, *dm, reason)); 2456 } 2457 if (need_ad) 2458 carp_send_ad_all(); 2459} 2460 2461int 2462carp_group_demote_count(struct carp_softc *sc) 2463{ 2464 struct ifg_list *ifgl; 2465 int count = 0; 2466 2467 TAILQ_FOREACH(ifgl, &sc->sc_if.if_groups, ifgl_next) 2468 count += ifgl->ifgl_group->ifg_carp_demoted; 2469 2470 if (count == 0 && sc->sc_demote_cnt) 2471 count = sc->sc_demote_cnt; 2472 2473 return (count > 255 ? 255 : count); 2474} 2475 2476void 2477carp_carpdev_state(void *v) 2478{ 2479 struct carp_softc *sc = v; 2480 struct ifnet *ifp0; 2481 int suppressed = sc->sc_suppress; 2482 2483 if ((ifp0 = if_get(sc->sc_carpdevidx)) == NULL) 2484 return; 2485 2486 if (ifp0->if_link_state == LINK_STATE_DOWN || 2487 !(ifp0->if_flags & IFF_UP)) { 2488 sc->sc_if.if_flags &= ~IFF_RUNNING; 2489 carp_del_all_timeouts(sc); 2490 carp_set_state_all(sc, INIT); 2491 sc->sc_suppress = 1; 2492 carp_setrun_all(sc, 0); 2493 if (!suppressed) 2494 carp_group_demote_adj(&sc->sc_if, 1, "carpdev"); 2495 } else if (suppressed) { 2496 carp_set_state_all(sc, INIT); 2497 sc->sc_suppress = 0; 2498 carp_setrun_all(sc, 0); 2499 carp_group_demote_adj(&sc->sc_if, -1, "carpdev"); 2500 } 2501 2502 if_put(ifp0); 2503} 2504 2505int 2506carp_ether_addmulti(struct carp_softc *sc, struct ifreq *ifr) 2507{ 2508 struct ifnet *ifp0; 2509 struct carp_mc_entry *mc; 2510 u_int8_t addrlo[ETHER_ADDR_LEN], addrhi[ETHER_ADDR_LEN]; 2511 int error; 2512 2513 ifp0 = if_get(sc->sc_carpdevidx); 2514 if (ifp0 == NULL) 2515 return (EINVAL); 2516 2517 error = ether_addmulti(ifr, (struct arpcom *)&sc->sc_ac); 2518 if (error != ENETRESET) { 2519 if_put(ifp0); 2520 return (error); 2521 } 2522 2523 /* 2524 * This is new multicast address. We have to tell parent 2525 * about it. Also, remember this multicast address so that 2526 * we can delete them on unconfigure. 2527 */ 2528 mc = malloc(sizeof(*mc), M_DEVBUF, M_NOWAIT); 2529 if (mc == NULL) { 2530 error = ENOMEM; 2531 goto alloc_failed; 2532 } 2533 2534 /* 2535 * As ether_addmulti() returns ENETRESET, following two 2536 * statement shouldn't fail. 2537 */ 2538 (void)ether_multiaddr(&ifr->ifr_addr, addrlo, addrhi); 2539 ETHER_LOOKUP_MULTI(addrlo, addrhi, &sc->sc_ac, mc->mc_enm); 2540 memcpy(&mc->mc_addr, &ifr->ifr_addr, ifr->ifr_addr.sa_len); 2541 LIST_INSERT_HEAD(&sc->carp_mc_listhead, mc, mc_entries); 2542 2543 error = (*ifp0->if_ioctl)(ifp0, SIOCADDMULTI, (caddr_t)ifr); 2544 if (error != 0) 2545 goto ioctl_failed; 2546 2547 if_put(ifp0); 2548 2549 return (error); 2550 2551 ioctl_failed: 2552 LIST_REMOVE(mc, mc_entries); 2553 free(mc, M_DEVBUF, sizeof(*mc)); 2554 alloc_failed: 2555 (void)ether_delmulti(ifr, (struct arpcom *)&sc->sc_ac); 2556 if_put(ifp0); 2557 2558 return (error); 2559} 2560 2561int 2562carp_ether_delmulti(struct carp_softc *sc, struct ifreq *ifr) 2563{ 2564 struct ifnet *ifp0; 2565 struct ether_multi *enm; 2566 struct carp_mc_entry *mc; 2567 u_int8_t addrlo[ETHER_ADDR_LEN], addrhi[ETHER_ADDR_LEN]; 2568 int error; 2569 2570 ifp0 = if_get(sc->sc_carpdevidx); 2571 if (ifp0 == NULL) 2572 return (EINVAL); 2573 2574 /* 2575 * Find a key to lookup carp_mc_entry. We have to do this 2576 * before calling ether_delmulti for obvious reason. 2577 */ 2578 if ((error = ether_multiaddr(&ifr->ifr_addr, addrlo, addrhi)) != 0) 2579 goto rele; 2580 ETHER_LOOKUP_MULTI(addrlo, addrhi, &sc->sc_ac, enm); 2581 if (enm == NULL) { 2582 error = EINVAL; 2583 goto rele; 2584 } 2585 2586 LIST_FOREACH(mc, &sc->carp_mc_listhead, mc_entries) 2587 if (mc->mc_enm == enm) 2588 break; 2589 2590 /* We won't delete entries we didn't add */ 2591 if (mc == NULL) { 2592 error = EINVAL; 2593 goto rele; 2594 } 2595 2596 error = ether_delmulti(ifr, (struct arpcom *)&sc->sc_ac); 2597 if (error != ENETRESET) 2598 goto rele; 2599 2600 /* We no longer use this multicast address. Tell parent so. */ 2601 error = (*ifp0->if_ioctl)(ifp0, SIOCDELMULTI, (caddr_t)ifr); 2602 if (error == 0) { 2603 /* And forget about this address. */ 2604 LIST_REMOVE(mc, mc_entries); 2605 free(mc, M_DEVBUF, sizeof(*mc)); 2606 } else 2607 (void)ether_addmulti(ifr, (struct arpcom *)&sc->sc_ac); 2608rele: 2609 if_put(ifp0); 2610 return (error); 2611} 2612 2613/* 2614 * Delete any multicast address we have asked to add from parent 2615 * interface. Called when the carp is being unconfigured. 2616 */ 2617void 2618carp_ether_purgemulti(struct carp_softc *sc) 2619{ 2620 struct ifnet *ifp0; /* Parent. */ 2621 struct carp_mc_entry *mc; 2622 union { 2623 struct ifreq ifreq; 2624 struct { 2625 char ifr_name[IFNAMSIZ]; 2626 struct sockaddr_storage ifr_ss; 2627 } ifreq_storage; 2628 } u; 2629 struct ifreq *ifr = &u.ifreq; 2630 2631 if ((ifp0 = if_get(sc->sc_carpdevidx)) == NULL) 2632 return; 2633 2634 memcpy(ifr->ifr_name, ifp0->if_xname, IFNAMSIZ); 2635 while ((mc = LIST_FIRST(&sc->carp_mc_listhead)) != NULL) { 2636 memcpy(&ifr->ifr_addr, &mc->mc_addr, mc->mc_addr.ss_len); 2637 (void)(*ifp0->if_ioctl)(ifp0, SIOCDELMULTI, (caddr_t)ifr); 2638 LIST_REMOVE(mc, mc_entries); 2639 free(mc, M_DEVBUF, sizeof(*mc)); 2640 } 2641 2642 if_put(ifp0); 2643}