jcs's openbsd hax
openbsd
1/* $OpenBSD: ip6_input.c,v 1.300 2025/09/16 09:19:16 florian Exp $ */
2/* $KAME: ip6_input.c,v 1.188 2001/03/29 05:34:31 itojun Exp $ */
3
4/*
5 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. Neither the name of the project nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 */
32
33/*
34 * Copyright (c) 1982, 1986, 1988, 1993
35 * The Regents of the University of California. All rights reserved.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. Neither the name of the University nor the names of its contributors
46 * may be used to endorse or promote products derived from this software
47 * without specific prior written permission.
48 *
49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59 * SUCH DAMAGE.
60 *
61 * @(#)ip_input.c 8.2 (Berkeley) 1/4/94
62 */
63
64#include "pf.h"
65#include "carp.h"
66
67#include <sys/param.h>
68#include <sys/systm.h>
69#include <sys/mbuf.h>
70#include <sys/domain.h>
71#include <sys/sysctl.h>
72#include <sys/protosw.h>
73#include <sys/socket.h>
74#include <sys/errno.h>
75#include <sys/time.h>
76#include <sys/task.h>
77
78#include <net/if.h>
79#include <net/if_var.h>
80#include <net/if_types.h>
81#include <net/route.h>
82#include <net/netisr.h>
83
84#include <netinet/in.h>
85
86#include <netinet/ip.h>
87
88#include <netinet/in_pcb.h>
89#include <netinet/ip_var.h>
90#include <netinet6/in6_var.h>
91#include <netinet/ip6.h>
92#include <netinet6/ip6_var.h>
93#include <netinet/icmp6.h>
94#include <netinet6/nd6.h>
95
96#ifdef MROUTING
97#include <netinet6/ip6_mroute.h>
98#endif
99
100#if NPF > 0
101#include <net/pfvar.h>
102#endif
103
104#if NCARP > 0
105#include <netinet/ip_carp.h>
106#endif
107
108struct niqueue ip6intrq = NIQUEUE_INITIALIZER(IPQ_MAXLEN, NETISR_IPV6);
109
110struct cpumem *ip6counters;
111
112int ip6_ours(struct mbuf **, int *, int, int, int, struct netstack *);
113int ip6_check_rh0hdr(struct mbuf *, int *);
114int ip6_hbhchcheck(struct mbuf **, int *, int *, int);
115int ip6_hopopts_input(struct mbuf **, int *, u_int32_t *, u_int32_t *);
116struct mbuf *ip6_pullexthdr(struct mbuf *, size_t, int);
117
118static struct mbuf_queue ip6send_mq;
119
120static void ip6_send_dispatch(void *);
121static struct task ip6send_task =
122 TASK_INITIALIZER(ip6_send_dispatch, &ip6send_mq);
123
124/*
125 * IP6 initialization: fill in IP6 protocol switch table.
126 * All protocols not implemented in kernel go to raw IP6 protocol handler.
127 */
128void
129ip6_init(void)
130{
131 const struct protosw *pr;
132 int i;
133
134 pr = pffindproto(PF_INET6, IPPROTO_RAW, SOCK_RAW);
135 if (pr == NULL)
136 panic("%s", __func__);
137 for (i = 0; i < IPPROTO_MAX; i++)
138 ip6_protox[i] = pr - inet6sw;
139 for (pr = inet6domain.dom_protosw;
140 pr < inet6domain.dom_protoswNPROTOSW; pr++)
141 if (pr->pr_domain->dom_family == PF_INET6 &&
142 pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW &&
143 pr->pr_protocol < IPPROTO_MAX)
144 ip6_protox[pr->pr_protocol] = pr - inet6sw;
145 ip6_randomid_init();
146 nd6_init();
147 frag6_init();
148
149 mq_init(&ip6send_mq, 64, IPL_SOFTNET);
150
151 ip6counters = counters_alloc(ip6s_ncounters);
152#ifdef MROUTING
153 mrt6_init();
154#endif
155}
156
157/*
158 * Enqueue packet for local delivery. Queuing is used as a boundary
159 * between the network layer (input/forward path) running with
160 * NET_LOCK_SHARED() and the transport layer needing it exclusively.
161 */
162int
163ip6_ours(struct mbuf **mp, int *offp, int nxt, int af, int flags,
164 struct netstack *ns)
165{
166 /* ip6_hbhchcheck() may be run before, then off and nxt are set */
167 if (*offp == 0) {
168 nxt = ip6_hbhchcheck(mp, offp, NULL, flags);
169 if (nxt == IPPROTO_DONE)
170 return IPPROTO_DONE;
171 }
172
173 /* We are already in a IPv4/IPv6 local deliver loop. */
174 if (af != AF_UNSPEC)
175 return nxt;
176
177 nxt = ip_deliver(mp, offp, nxt, AF_INET6, 1, ns);
178 if (nxt == IPPROTO_DONE)
179 return IPPROTO_DONE;
180
181 return ip6_ours_enqueue(mp, offp, nxt);
182}
183
184int
185ip6_ours_enqueue(struct mbuf **mp, int *offp, int nxt)
186{
187 /* save values for later, use after dequeue */
188 if (*offp != sizeof(struct ip6_hdr)) {
189 struct m_tag *mtag;
190 struct ipoffnxt *ion;
191
192 /* mbuf tags are expensive, but only used for header options */
193 mtag = m_tag_get(PACKET_TAG_IP6_OFFNXT, sizeof(*ion),
194 M_NOWAIT);
195 if (mtag == NULL) {
196 ip6stat_inc(ip6s_idropped);
197 m_freemp(mp);
198 return IPPROTO_DONE;
199 }
200 ion = (struct ipoffnxt *)(mtag + 1);
201 ion->ion_off = *offp;
202 ion->ion_nxt = nxt;
203
204 m_tag_prepend(*mp, mtag);
205 }
206
207 niq_enqueue(&ip6intrq, *mp);
208 *mp = NULL;
209 return IPPROTO_DONE;
210}
211
212/*
213 * Dequeue and process locally delivered packets.
214 * This is called with exclusive NET_LOCK().
215 */
216void
217ip6intr(void)
218{
219 struct mbuf *m;
220
221 while ((m = niq_dequeue(&ip6intrq)) != NULL) {
222 struct m_tag *mtag;
223 int off, nxt;
224
225#ifdef DIAGNOSTIC
226 if ((m->m_flags & M_PKTHDR) == 0)
227 panic("ip6intr no HDR");
228#endif
229 mtag = m_tag_find(m, PACKET_TAG_IP6_OFFNXT, NULL);
230 if (mtag != NULL) {
231 struct ipoffnxt *ion;
232
233 ion = (struct ipoffnxt *)(mtag + 1);
234 off = ion->ion_off;
235 nxt = ion->ion_nxt;
236
237 m_tag_delete(m, mtag);
238 } else {
239 struct ip6_hdr *ip6;
240
241 ip6 = mtod(m, struct ip6_hdr *);
242 off = sizeof(struct ip6_hdr);
243 nxt = ip6->ip6_nxt;
244 }
245 nxt = ip_deliver(&m, &off, nxt, AF_INET6, 0, NULL);
246 KASSERT(nxt == IPPROTO_DONE);
247 }
248}
249
250void
251ipv6_input(struct ifnet *ifp, struct mbuf *m, struct netstack *ns)
252{
253 int off, nxt;
254
255 off = 0;
256 nxt = ip6_input_if(&m, &off, IPPROTO_IPV6, AF_UNSPEC, ifp, ns);
257 KASSERT(nxt == IPPROTO_DONE);
258}
259
260struct mbuf *
261ipv6_check(struct ifnet *ifp, struct mbuf *m)
262{
263 struct ip6_hdr *ip6;
264
265 if (m->m_len < sizeof(*ip6)) {
266 m = m_pullup(m, sizeof(*ip6));
267 if (m == NULL) {
268 ip6stat_inc(ip6s_toosmall);
269 return (NULL);
270 }
271 }
272
273 ip6 = mtod(m, struct ip6_hdr *);
274
275 if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) {
276 ip6stat_inc(ip6s_badvers);
277 goto bad;
278 }
279
280 /*
281 * Check against address spoofing/corruption.
282 */
283 if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_src) ||
284 IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_dst)) {
285 /*
286 * XXX: "badscope" is not very suitable for a multicast source.
287 */
288 ip6stat_inc(ip6s_badscope);
289 goto bad;
290 }
291 if ((IN6_IS_ADDR_LOOPBACK(&ip6->ip6_src) ||
292 IN6_IS_ADDR_LOOPBACK(&ip6->ip6_dst)) &&
293 (ifp->if_flags & IFF_LOOPBACK) == 0) {
294 ip6stat_inc(ip6s_badscope);
295 goto bad;
296 }
297 /* Drop packets if interface ID portion is already filled. */
298 if (((IN6_IS_SCOPE_EMBED(&ip6->ip6_src) && ip6->ip6_src.s6_addr16[1]) ||
299 (IN6_IS_SCOPE_EMBED(&ip6->ip6_dst) && ip6->ip6_dst.s6_addr16[1])) &&
300 (ifp->if_flags & IFF_LOOPBACK) == 0) {
301 ip6stat_inc(ip6s_badscope);
302 goto bad;
303 }
304 if (IN6_IS_ADDR_MC_INTFACELOCAL(&ip6->ip6_dst) &&
305 !(m->m_flags & M_LOOP)) {
306 /*
307 * In this case, the packet should come from the loopback
308 * interface. However, we cannot just check the if_flags,
309 * because ip6_mloopback() passes the "actual" interface
310 * as the outgoing/incoming interface.
311 */
312 ip6stat_inc(ip6s_badscope);
313 goto bad;
314 }
315
316 /*
317 * The following check is not documented in specs. A malicious
318 * party may be able to use IPv4 mapped addr to confuse tcp/udp stack
319 * and bypass security checks (act as if it was from 127.0.0.1 by using
320 * IPv6 src ::ffff:127.0.0.1). Be cautious.
321 *
322 * This check chokes if we are in an SIIT cloud. As none of BSDs
323 * support IPv4-less kernel compilation, we cannot support SIIT
324 * environment at all. So, it makes more sense for us to reject any
325 * malicious packets for non-SIIT environment, than try to do a
326 * partial support for SIIT environment.
327 */
328 if (IN6_IS_ADDR_V4MAPPED(&ip6->ip6_src) ||
329 IN6_IS_ADDR_V4MAPPED(&ip6->ip6_dst)) {
330 ip6stat_inc(ip6s_badscope);
331 goto bad;
332 }
333
334 /*
335 * Reject packets with IPv4 compatible addresses (auto tunnel).
336 *
337 * The code forbids automatic tunneling as per RFC4213.
338 */
339 if (IN6_IS_ADDR_V4COMPAT(&ip6->ip6_src) ||
340 IN6_IS_ADDR_V4COMPAT(&ip6->ip6_dst)) {
341 ip6stat_inc(ip6s_badscope);
342 goto bad;
343 }
344
345 return (m);
346bad:
347 m_freem(m);
348 return (NULL);
349}
350
351int
352ip6_input_if(struct mbuf **mp, int *offp, int nxt, int af, struct ifnet *ifp,
353 struct netstack *ns)
354{
355 struct route iproute, *ro = NULL;
356 struct mbuf *m;
357 struct ip6_hdr *ip6;
358 struct rtentry *rt;
359 int ours = 0;
360 u_int16_t src_scope, dst_scope;
361#if NPF > 0
362 struct in6_addr odst;
363#endif
364 int flags = 0;
365
366 KASSERT(*offp == 0);
367
368 ip6stat_inc(ip6s_total);
369 m = *mp = ipv6_check(ifp, *mp);
370 if (m == NULL)
371 goto bad;
372
373 ip6 = mtod(m, struct ip6_hdr *);
374
375#if NCARP > 0
376 if (carp_lsdrop(ifp, m, AF_INET6, ip6->ip6_src.s6_addr32,
377 ip6->ip6_dst.s6_addr32, (ip6->ip6_nxt == IPPROTO_ICMPV6 ? 0 : 1)))
378 goto bad;
379#endif
380 ip6stat_inc(ip6s_nxthist + ip6->ip6_nxt);
381
382 /*
383 * If the packet has been received on a loopback interface it
384 * can be destined to any local address, not necessarily to
385 * an address configured on `ifp'.
386 */
387 if (ifp->if_flags & IFF_LOOPBACK) {
388 if (IN6_IS_SCOPE_EMBED(&ip6->ip6_src)) {
389 src_scope = ip6->ip6_src.s6_addr16[1];
390 ip6->ip6_src.s6_addr16[1] = 0;
391 }
392 if (IN6_IS_SCOPE_EMBED(&ip6->ip6_dst)) {
393 dst_scope = ip6->ip6_dst.s6_addr16[1];
394 ip6->ip6_dst.s6_addr16[1] = 0;
395 }
396 }
397
398#if NPF > 0
399 /*
400 * Packet filter
401 */
402 odst = ip6->ip6_dst;
403 if (pf_test(AF_INET6, PF_IN, ifp, mp) != PF_PASS)
404 goto bad;
405 m = *mp;
406 if (m == NULL)
407 goto bad;
408
409 ip6 = mtod(m, struct ip6_hdr *);
410 if (!IN6_ARE_ADDR_EQUAL(&odst, &ip6->ip6_dst))
411 SET(flags, IPV6_REDIRECT);
412#endif
413
414 switch (atomic_load_int(&ip6_forwarding)) {
415 case 2:
416 SET(flags, IPV6_FORWARDING_IPSEC);
417 /* FALLTHROUGH */
418 case 1:
419 SET(flags, IPV6_FORWARDING);
420 break;
421 }
422
423 /*
424 * Without embedded scope ID we cannot find link-local
425 * addresses in the routing table.
426 */
427 if (ifp->if_flags & IFF_LOOPBACK) {
428 if (IN6_IS_SCOPE_EMBED(&ip6->ip6_src))
429 ip6->ip6_src.s6_addr16[1] = src_scope;
430 if (IN6_IS_SCOPE_EMBED(&ip6->ip6_dst))
431 ip6->ip6_dst.s6_addr16[1] = dst_scope;
432 } else {
433 if (IN6_IS_SCOPE_EMBED(&ip6->ip6_src))
434 ip6->ip6_src.s6_addr16[1] = htons(ifp->if_index);
435 if (IN6_IS_SCOPE_EMBED(&ip6->ip6_dst))
436 ip6->ip6_dst.s6_addr16[1] = htons(ifp->if_index);
437 }
438
439 /*
440 * Be more secure than RFC5095 and scan for type 0 routing headers.
441 * If pf has already scanned the header chain, do not do it twice.
442 */
443 if (!(m->m_pkthdr.pf.flags & PF_TAG_PROCESSED) &&
444 ip6_check_rh0hdr(m, offp)) {
445 ip6stat_inc(ip6s_badoptions);
446 icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_HEADER, *offp);
447 m = *mp = NULL;
448 goto bad;
449 }
450
451#if NPF > 0
452 if (pf_ouraddr(m) == 1) {
453 nxt = ip6_ours(mp, offp, nxt, af, flags, ns);
454 goto out;
455 }
456#endif
457
458 /*
459 * Multicast check
460 */
461 if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
462 /*
463 * Make sure M_MCAST is set. It should theoretically
464 * already be there, but let's play safe because upper
465 * layers check for this flag.
466 */
467 m->m_flags |= M_MCAST;
468
469 /*
470 * See if we belong to the destination multicast group on the
471 * arrival interface.
472 */
473 if (in6_hasmulti(&ip6->ip6_dst, ifp))
474 ours = 1;
475
476#ifdef MROUTING
477 if (atomic_load_int(&ip6_mforwarding) &&
478 ip6_mrouter[ifp->if_rdomain]) {
479 int error;
480
481 nxt = ip6_hbhchcheck(&m, offp, &ours, flags);
482 if (nxt == IPPROTO_DONE)
483 goto out;
484
485 ip6 = mtod(m, struct ip6_hdr *);
486
487 /*
488 * If we are acting as a multicast router, all
489 * incoming multicast packets are passed to the
490 * kernel-level multicast forwarding function.
491 * The packet is returned (relatively) intact; if
492 * ip6_mforward() returns a non-zero value, the packet
493 * must be discarded, else it may be accepted below.
494 */
495 KERNEL_LOCK();
496 error = ip6_mforward(ip6, ifp, m, flags);
497 KERNEL_UNLOCK();
498 if (error) {
499 ip6stat_inc(ip6s_cantforward);
500 goto bad;
501 }
502
503 if (ours) {
504 if (af == AF_UNSPEC)
505 nxt = ip6_ours(mp, offp, nxt, af,
506 flags, ns);
507 goto out;
508 }
509 goto bad;
510 }
511#endif
512 if (!ours) {
513 ip6stat_inc(ip6s_notmember);
514 if (!IN6_IS_ADDR_MC_LINKLOCAL(&ip6->ip6_dst))
515 ip6stat_inc(ip6s_cantforward);
516 goto bad;
517 }
518 nxt = ip6_ours(mp, offp, nxt, af, flags, ns);
519 goto out;
520 }
521
522
523 /*
524 * Unicast check
525 */
526 if (ns == NULL) {
527 ro = &iproute;
528 ro->ro_rt = NULL;
529 } else {
530 ro = &ns->ns_route;
531 }
532 rt = route6_mpath(ro, &ip6->ip6_dst, &ip6->ip6_src,
533 m->m_pkthdr.ph_rtableid);
534
535 /*
536 * Accept the packet if the route to the destination is marked
537 * as local.
538 */
539 if (rt != NULL && ISSET(rt->rt_flags, RTF_LOCAL)) {
540 struct in6_ifaddr *ia6 = ifatoia6(rt->rt_ifa);
541
542 if (!ISSET(flags, IPV6_FORWARDING) &&
543 rt->rt_ifidx != ifp->if_index &&
544 !((ifp->if_flags & IFF_LOOPBACK) ||
545 (ifp->if_type == IFT_ENC) ||
546 (m->m_pkthdr.pf.flags & PF_TAG_TRANSLATE_LOCALHOST))) {
547 /* received on wrong interface */
548#if NCARP > 0
549 struct ifnet *out_if;
550
551 /*
552 * Virtual IPs on carp interfaces need to be checked
553 * also against the parent interface and other carp
554 * interfaces sharing the same parent.
555 */
556 out_if = if_get(rt->rt_ifidx);
557 if (!(out_if && carp_strict_addr_chk(out_if, ifp))) {
558 ip6stat_inc(ip6s_wrongif);
559 if_put(out_if);
560 goto bad;
561 }
562 if_put(out_if);
563#else
564 ip6stat_inc(ip6s_wrongif);
565 goto bad;
566#endif
567 }
568 /*
569 * packets to a tentative, duplicated, or somehow invalid
570 * address must not be accepted.
571 */
572 if ((ia6->ia6_flags & (IN6_IFF_TENTATIVE|IN6_IFF_DUPLICATED)))
573 goto bad;
574 else {
575 nxt = ip6_ours(mp, offp, nxt, af, flags, ns);
576 goto out;
577 }
578 }
579
580#if NCARP > 0
581 if (ip6->ip6_nxt == IPPROTO_ICMPV6 &&
582 carp_lsdrop(ifp, m, AF_INET6, ip6->ip6_src.s6_addr32,
583 ip6->ip6_dst.s6_addr32, 1))
584 goto bad;
585#endif
586 /*
587 * Now there is no reason to process the packet if it's not our own
588 * and we're not a router.
589 */
590 if (!ISSET(flags, IPV6_FORWARDING)) {
591 ip6stat_inc(ip6s_cantforward);
592 goto bad;
593 }
594
595 nxt = ip6_hbhchcheck(&m, offp, &ours, flags);
596 if (nxt == IPPROTO_DONE)
597 goto out;
598
599 if (ours) {
600 if (af == AF_UNSPEC)
601 nxt = ip6_ours(mp, offp, nxt, af, flags, ns);
602 goto out;
603 }
604
605#ifdef IPSEC
606 if (ipsec_in_use) {
607 int rv;
608
609 rv = ipsec_forward_check(m, *offp, AF_INET6);
610 if (rv != 0) {
611 ip6stat_inc(ip6s_cantforward);
612 goto bad;
613 }
614 /*
615 * Fall through, forward packet. Outbound IPsec policy
616 * checking will occur in ip6_forward().
617 */
618 }
619#endif /* IPSEC */
620
621 ip6_forward(m, ro, flags);
622 *mp = NULL;
623 if (ro == &iproute)
624 rtfree(ro->ro_rt);
625 return IPPROTO_DONE;
626 bad:
627 nxt = IPPROTO_DONE;
628 m_freemp(mp);
629 out:
630 if (ro == &iproute)
631 rtfree(ro->ro_rt);
632 return nxt;
633}
634
635/* On error free mbuf and return IPPROTO_DONE. */
636int
637ip6_hbhchcheck(struct mbuf **mp, int *offp, int *oursp, int flags)
638{
639 struct ip6_hdr *ip6;
640 u_int32_t plen, rtalert = ~0;
641 int nxt;
642
643 ip6 = mtod(*mp, struct ip6_hdr *);
644
645 /*
646 * Process Hop-by-Hop options header if it's contained.
647 * m may be modified in ip6_hopopts_input().
648 * If a JumboPayload option is included, plen will also be modified.
649 */
650 plen = (u_int32_t)ntohs(ip6->ip6_plen);
651 *offp = sizeof(struct ip6_hdr);
652 if (ip6->ip6_nxt == IPPROTO_HOPOPTS) {
653 struct ip6_hbh *hbh;
654
655 if (ip6_hopopts_input(mp, offp, &plen, &rtalert))
656 goto bad; /* m have already been freed */
657
658 /* adjust pointer */
659 ip6 = mtod(*mp, struct ip6_hdr *);
660
661 /*
662 * if the payload length field is 0 and the next header field
663 * indicates Hop-by-Hop Options header, then a Jumbo Payload
664 * option MUST be included.
665 */
666 if (ip6->ip6_plen == 0 && plen == 0) {
667 /*
668 * Note that if a valid jumbo payload option is
669 * contained, ip6_hopopts_input() must set a valid
670 * (non-zero) payload length to the variable plen.
671 */
672 ip6stat_inc(ip6s_badoptions);
673 icmp6_error(*mp, ICMP6_PARAM_PROB,
674 ICMP6_PARAMPROB_HEADER,
675 (caddr_t)&ip6->ip6_plen - (caddr_t)ip6);
676 goto bad;
677 }
678 hbh = ip6_exthdr_get(mp, sizeof(struct ip6_hdr),
679 sizeof(struct ip6_hbh));
680 if (hbh == NULL) {
681 ip6stat_inc(ip6s_tooshort);
682 goto bad;
683 }
684 nxt = hbh->ip6h_nxt;
685
686 /*
687 * accept the packet if a router alert option is included
688 * and we act as an IPv6 router.
689 */
690 if (rtalert != ~0 && ISSET(flags, IPV6_FORWARDING) &&
691 oursp != NULL)
692 *oursp = 1;
693 } else
694 nxt = ip6->ip6_nxt;
695
696 /*
697 * Check that the amount of data in the buffers
698 * is as at least much as the IPv6 header would have us expect.
699 * Trim mbufs if longer than we expect.
700 * Drop packet if shorter than we expect.
701 */
702 if ((*mp)->m_pkthdr.len - sizeof(struct ip6_hdr) < plen) {
703 ip6stat_inc(ip6s_tooshort);
704 m_freemp(mp);
705 goto bad;
706 }
707 if ((*mp)->m_pkthdr.len > sizeof(struct ip6_hdr) + plen) {
708 if ((*mp)->m_len == (*mp)->m_pkthdr.len) {
709 (*mp)->m_len = sizeof(struct ip6_hdr) + plen;
710 (*mp)->m_pkthdr.len = sizeof(struct ip6_hdr) + plen;
711 } else {
712 m_adj((*mp), sizeof(struct ip6_hdr) + plen -
713 (*mp)->m_pkthdr.len);
714 }
715 }
716
717 return nxt;
718 bad:
719 return IPPROTO_DONE;
720}
721
722/* scan packet for RH0 routing header. Mostly stolen from pf.c:pf_test() */
723int
724ip6_check_rh0hdr(struct mbuf *m, int *offp)
725{
726 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
727 struct ip6_rthdr rthdr;
728 struct ip6_ext opt6;
729 u_int8_t proto = ip6->ip6_nxt;
730 int done = 0, lim, off, rh_cnt = 0;
731
732 off = ((caddr_t)ip6 - m->m_data) + sizeof(struct ip6_hdr);
733 lim = min(m->m_pkthdr.len, ntohs(ip6->ip6_plen) + sizeof(*ip6));
734 do {
735 switch (proto) {
736 case IPPROTO_ROUTING:
737 if (rh_cnt++) {
738 /* more than one rh header present */
739 *offp = off;
740 return (1);
741 }
742
743 if (off + sizeof(rthdr) > lim) {
744 /* packet to short to make sense */
745 *offp = off;
746 return (1);
747 }
748
749 m_copydata(m, off, sizeof(rthdr), &rthdr);
750
751 if (rthdr.ip6r_type == IPV6_RTHDR_TYPE_0) {
752 *offp = off +
753 offsetof(struct ip6_rthdr, ip6r_type);
754 return (1);
755 }
756
757 off += (rthdr.ip6r_len + 1) * 8;
758 proto = rthdr.ip6r_nxt;
759 break;
760 case IPPROTO_AH:
761 case IPPROTO_HOPOPTS:
762 case IPPROTO_DSTOPTS:
763 /* get next header and header length */
764 if (off + sizeof(opt6) > lim) {
765 /*
766 * Packet to short to make sense, we could
767 * reject the packet but as a router we
768 * should not do that so forward it.
769 */
770 return (0);
771 }
772
773 m_copydata(m, off, sizeof(opt6), &opt6);
774
775 if (proto == IPPROTO_AH)
776 off += (opt6.ip6e_len + 2) * 4;
777 else
778 off += (opt6.ip6e_len + 1) * 8;
779 proto = opt6.ip6e_nxt;
780 break;
781 case IPPROTO_FRAGMENT:
782 default:
783 /* end of header stack */
784 done = 1;
785 break;
786 }
787 } while (!done);
788
789 return (0);
790}
791
792/*
793 * Hop-by-Hop options header processing. If a valid jumbo payload option is
794 * included, the real payload length will be stored in plenp.
795 * On error free mbuf and return -1.
796 *
797 * rtalertp - XXX: should be stored in a more smart way
798 */
799int
800ip6_hopopts_input(struct mbuf **mp, int *offp, u_int32_t *plenp,
801 u_int32_t *rtalertp)
802{
803 int off = *offp, hbhlen;
804 struct ip6_hbh *hbh;
805
806 /* validation of the length of the header */
807 hbh = ip6_exthdr_get(mp, sizeof(struct ip6_hdr),
808 sizeof(struct ip6_hbh));
809 if (hbh == NULL) {
810 ip6stat_inc(ip6s_tooshort);
811 return -1;
812 }
813 hbhlen = (hbh->ip6h_len + 1) << 3;
814 hbh = ip6_exthdr_get(mp, sizeof(struct ip6_hdr), hbhlen);
815 if (hbh == NULL) {
816 ip6stat_inc(ip6s_tooshort);
817 return -1;
818 }
819 off += hbhlen;
820 hbhlen -= sizeof(struct ip6_hbh);
821
822 if (ip6_process_hopopts(mp, (u_int8_t *)hbh + sizeof(struct ip6_hbh),
823 hbhlen, rtalertp, plenp) < 0)
824 return (-1);
825
826 *offp = off;
827 return (0);
828}
829
830/*
831 * Search header for all Hop-by-hop options and process each option.
832 * This function is separate from ip6_hopopts_input() in order to
833 * handle a case where the sending node itself process its hop-by-hop
834 * options header. In such a case, the function is called from ip6_output().
835 * On error free mbuf and return -1.
836 *
837 * The function assumes that hbh header is located right after the IPv6 header
838 * (RFC2460 p7), opthead is pointer into data content in m, and opthead to
839 * opthead + hbhlen is located in continuous memory region.
840 */
841int
842ip6_process_hopopts(struct mbuf **mp, u_int8_t *opthead, int hbhlen,
843 u_int32_t *rtalertp, u_int32_t *plenp)
844{
845 struct ip6_hdr *ip6;
846 int optlen = 0;
847 u_int8_t *opt = opthead;
848 u_int16_t rtalert_val;
849 u_int32_t jumboplen;
850 const int erroff = sizeof(struct ip6_hdr) + sizeof(struct ip6_hbh);
851
852 for (; hbhlen > 0; hbhlen -= optlen, opt += optlen) {
853 switch (*opt) {
854 case IP6OPT_PAD1:
855 optlen = 1;
856 break;
857 case IP6OPT_PADN:
858 if (hbhlen < IP6OPT_MINLEN) {
859 ip6stat_inc(ip6s_toosmall);
860 goto bad;
861 }
862 optlen = *(opt + 1) + 2;
863 break;
864 case IP6OPT_ROUTER_ALERT:
865 /* XXX may need check for alignment */
866 if (hbhlen < IP6OPT_RTALERT_LEN) {
867 ip6stat_inc(ip6s_toosmall);
868 goto bad;
869 }
870 if (*(opt + 1) != IP6OPT_RTALERT_LEN - 2) {
871 /* XXX stat */
872 icmp6_error(*mp, ICMP6_PARAM_PROB,
873 ICMP6_PARAMPROB_HEADER,
874 erroff + opt + 1 - opthead);
875 return (-1);
876 }
877 optlen = IP6OPT_RTALERT_LEN;
878 memcpy((caddr_t)&rtalert_val, (caddr_t)(opt + 2), 2);
879 *rtalertp = ntohs(rtalert_val);
880 break;
881 case IP6OPT_JUMBO:
882 /* XXX may need check for alignment */
883 if (hbhlen < IP6OPT_JUMBO_LEN) {
884 ip6stat_inc(ip6s_toosmall);
885 goto bad;
886 }
887 if (*(opt + 1) != IP6OPT_JUMBO_LEN - 2) {
888 /* XXX stat */
889 icmp6_error(*mp, ICMP6_PARAM_PROB,
890 ICMP6_PARAMPROB_HEADER,
891 erroff + opt + 1 - opthead);
892 return (-1);
893 }
894 optlen = IP6OPT_JUMBO_LEN;
895
896 /*
897 * IPv6 packets that have non 0 payload length
898 * must not contain a jumbo payload option.
899 */
900 ip6 = mtod(*mp, struct ip6_hdr *);
901 if (ip6->ip6_plen) {
902 ip6stat_inc(ip6s_badoptions);
903 icmp6_error(*mp, ICMP6_PARAM_PROB,
904 ICMP6_PARAMPROB_HEADER,
905 erroff + opt - opthead);
906 return (-1);
907 }
908
909 /*
910 * We may see jumbolen in unaligned location, so
911 * we'd need to perform memcpy().
912 */
913 memcpy(&jumboplen, opt + 2, sizeof(jumboplen));
914 jumboplen = (u_int32_t)htonl(jumboplen);
915
916#if 1
917 /*
918 * if there are multiple jumbo payload options,
919 * *plenp will be non-zero and the packet will be
920 * rejected.
921 * the behavior may need some debate in ipngwg -
922 * multiple options does not make sense, however,
923 * there's no explicit mention in specification.
924 */
925 if (*plenp != 0) {
926 ip6stat_inc(ip6s_badoptions);
927 icmp6_error(*mp, ICMP6_PARAM_PROB,
928 ICMP6_PARAMPROB_HEADER,
929 erroff + opt + 2 - opthead);
930 return (-1);
931 }
932#endif
933
934 /*
935 * jumbo payload length must be larger than 65535.
936 */
937 if (jumboplen <= IPV6_MAXPACKET) {
938 ip6stat_inc(ip6s_badoptions);
939 icmp6_error(*mp, ICMP6_PARAM_PROB,
940 ICMP6_PARAMPROB_HEADER,
941 erroff + opt + 2 - opthead);
942 return (-1);
943 }
944 *plenp = jumboplen;
945
946 break;
947 default: /* unknown option */
948 if (hbhlen < IP6OPT_MINLEN) {
949 ip6stat_inc(ip6s_toosmall);
950 goto bad;
951 }
952 optlen = ip6_unknown_opt(mp, opt,
953 erroff + opt - opthead);
954 if (optlen == -1)
955 return (-1);
956 optlen += 2;
957 break;
958 }
959 }
960
961 return (0);
962
963 bad:
964 m_freemp(mp);
965 return (-1);
966}
967
968/*
969 * Unknown option processing.
970 * The third argument `off' is the offset from the IPv6 header to the option,
971 * which allows returning an ICMPv6 error even if the IPv6 header and the
972 * option header are not continuous.
973 * On error free mbuf and return -1.
974 */
975int
976ip6_unknown_opt(struct mbuf **mp, u_int8_t *optp, int off)
977{
978 struct ip6_hdr *ip6;
979
980 switch (IP6OPT_TYPE(*optp)) {
981 case IP6OPT_TYPE_SKIP: /* ignore the option */
982 return ((int)*(optp + 1));
983 case IP6OPT_TYPE_DISCARD: /* silently discard */
984 m_freemp(mp);
985 return (-1);
986 case IP6OPT_TYPE_FORCEICMP: /* send ICMP even if multicasted */
987 ip6stat_inc(ip6s_badoptions);
988 icmp6_error(*mp, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_OPTION, off);
989 return (-1);
990 case IP6OPT_TYPE_ICMP: /* send ICMP if not multicasted */
991 ip6stat_inc(ip6s_badoptions);
992 ip6 = mtod(*mp, struct ip6_hdr *);
993 if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) ||
994 ((*mp)->m_flags & (M_BCAST|M_MCAST)))
995 m_freemp(mp);
996 else
997 icmp6_error(*mp, ICMP6_PARAM_PROB,
998 ICMP6_PARAMPROB_OPTION, off);
999 return (-1);
1000 }
1001
1002 m_freemp(mp); /* XXX: NOTREACHED */
1003 return (-1);
1004}
1005
1006/*
1007 * Create the "control" list for this pcb.
1008 *
1009 * The routine will be called from upper layer handlers like udp_input().
1010 * Thus the routine assumes that the caller (udp_input) have already
1011 * called IP6_EXTHDR_CHECK() and all the extension headers are located in the
1012 * very first mbuf on the mbuf chain.
1013 * We may want to add some infinite loop prevention or sanity checks for safety.
1014 * (This applies only when you are using KAME mbuf chain restriction, i.e.
1015 * you are using IP6_EXTHDR_CHECK() not m_pulldown())
1016 */
1017void
1018ip6_savecontrol(struct inpcb *inp, struct mbuf *m, struct mbuf **mp)
1019{
1020 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
1021
1022 if (inp->inp_socket->so_options & SO_TIMESTAMP) {
1023 struct timeval tv;
1024
1025 m_microtime(m, &tv);
1026 *mp = sbcreatecontrol((caddr_t) &tv, sizeof(tv),
1027 SCM_TIMESTAMP, SOL_SOCKET);
1028 if (*mp)
1029 mp = &(*mp)->m_next;
1030 }
1031
1032 /* RFC 2292 sec. 5 */
1033 if ((inp->inp_flags & IN6P_PKTINFO) != 0) {
1034 struct in6_pktinfo pi6;
1035 memcpy(&pi6.ipi6_addr, &ip6->ip6_dst, sizeof(struct in6_addr));
1036 if (IN6_IS_SCOPE_EMBED(&pi6.ipi6_addr))
1037 pi6.ipi6_addr.s6_addr16[1] = 0;
1038 pi6.ipi6_ifindex = m ? m->m_pkthdr.ph_ifidx : 0;
1039 *mp = sbcreatecontrol((caddr_t) &pi6,
1040 sizeof(struct in6_pktinfo),
1041 IPV6_PKTINFO, IPPROTO_IPV6);
1042 if (*mp)
1043 mp = &(*mp)->m_next;
1044 }
1045
1046 if ((inp->inp_flags & IN6P_HOPLIMIT) != 0) {
1047 int hlim = ip6->ip6_hlim & 0xff;
1048 *mp = sbcreatecontrol((caddr_t) &hlim, sizeof(int),
1049 IPV6_HOPLIMIT, IPPROTO_IPV6);
1050 if (*mp)
1051 mp = &(*mp)->m_next;
1052 }
1053
1054 if ((inp->inp_flags & IN6P_TCLASS) != 0) {
1055 u_int32_t flowinfo;
1056 int tclass;
1057
1058 flowinfo = (u_int32_t)ntohl(ip6->ip6_flow & IPV6_FLOWINFO_MASK);
1059 flowinfo >>= 20;
1060
1061 tclass = flowinfo & 0xff;
1062 *mp = sbcreatecontrol((caddr_t)&tclass, sizeof(tclass),
1063 IPV6_TCLASS, IPPROTO_IPV6);
1064 if (*mp)
1065 mp = &(*mp)->m_next;
1066 }
1067
1068 /*
1069 * IPV6_HOPOPTS socket option. Recall that we required super-user
1070 * privilege for the option (see ip6_ctloutput), but it might be too
1071 * strict, since there might be some hop-by-hop options which can be
1072 * returned to normal user.
1073 * See also RFC 2292 section 6 (or RFC 3542 section 8).
1074 */
1075 if ((inp->inp_flags & IN6P_HOPOPTS) != 0) {
1076 /*
1077 * Check if a hop-by-hop options header is contained in the
1078 * received packet, and if so, store the options as ancillary
1079 * data. Note that a hop-by-hop options header must be
1080 * just after the IPv6 header, which is assured through the
1081 * IPv6 input processing.
1082 */
1083 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
1084 if (ip6->ip6_nxt == IPPROTO_HOPOPTS) {
1085 struct ip6_hbh *hbh;
1086 int hbhlen = 0;
1087 struct mbuf *ext;
1088
1089 ext = ip6_pullexthdr(m, sizeof(struct ip6_hdr),
1090 ip6->ip6_nxt);
1091 if (ext == NULL) {
1092 ip6stat_inc(ip6s_tooshort);
1093 return;
1094 }
1095 hbh = mtod(ext, struct ip6_hbh *);
1096 hbhlen = (hbh->ip6h_len + 1) << 3;
1097 if (hbhlen != ext->m_len) {
1098 m_freem(ext);
1099 ip6stat_inc(ip6s_tooshort);
1100 return;
1101 }
1102
1103 /*
1104 * XXX: We copy the whole header even if a
1105 * jumbo payload option is included, the option which
1106 * is to be removed before returning according to
1107 * RFC2292.
1108 * Note: this constraint is removed in RFC3542.
1109 */
1110 *mp = sbcreatecontrol((caddr_t)hbh, hbhlen,
1111 IPV6_HOPOPTS,
1112 IPPROTO_IPV6);
1113 if (*mp)
1114 mp = &(*mp)->m_next;
1115 m_freem(ext);
1116 }
1117 }
1118
1119 /* IPV6_DSTOPTS and IPV6_RTHDR socket options */
1120 if ((inp->inp_flags & (IN6P_RTHDR | IN6P_DSTOPTS)) != 0) {
1121 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
1122 int nxt = ip6->ip6_nxt, off = sizeof(struct ip6_hdr);
1123
1124 /*
1125 * Search for destination options headers or routing
1126 * header(s) through the header chain, and stores each
1127 * header as ancillary data.
1128 * Note that the order of the headers remains in
1129 * the chain of ancillary data.
1130 */
1131 while (1) { /* is explicit loop prevention necessary? */
1132 struct ip6_ext *ip6e = NULL;
1133 int elen;
1134 struct mbuf *ext = NULL;
1135
1136 /*
1137 * if it is not an extension header, don't try to
1138 * pull it from the chain.
1139 */
1140 switch (nxt) {
1141 case IPPROTO_DSTOPTS:
1142 case IPPROTO_ROUTING:
1143 case IPPROTO_HOPOPTS:
1144 case IPPROTO_AH: /* is it possible? */
1145 break;
1146 default:
1147 goto loopend;
1148 }
1149
1150 ext = ip6_pullexthdr(m, off, nxt);
1151 if (ext == NULL) {
1152 ip6stat_inc(ip6s_tooshort);
1153 return;
1154 }
1155 ip6e = mtod(ext, struct ip6_ext *);
1156 if (nxt == IPPROTO_AH)
1157 elen = (ip6e->ip6e_len + 2) << 2;
1158 else
1159 elen = (ip6e->ip6e_len + 1) << 3;
1160 if (elen != ext->m_len) {
1161 m_freem(ext);
1162 ip6stat_inc(ip6s_tooshort);
1163 return;
1164 }
1165
1166 switch (nxt) {
1167 case IPPROTO_DSTOPTS:
1168 if (!(inp->inp_flags & IN6P_DSTOPTS))
1169 break;
1170
1171 *mp = sbcreatecontrol((caddr_t)ip6e, elen,
1172 IPV6_DSTOPTS,
1173 IPPROTO_IPV6);
1174 if (*mp)
1175 mp = &(*mp)->m_next;
1176 break;
1177
1178 case IPPROTO_ROUTING:
1179 if (!(inp->inp_flags & IN6P_RTHDR))
1180 break;
1181
1182 *mp = sbcreatecontrol((caddr_t)ip6e, elen,
1183 IPV6_RTHDR,
1184 IPPROTO_IPV6);
1185 if (*mp)
1186 mp = &(*mp)->m_next;
1187 break;
1188
1189 case IPPROTO_HOPOPTS:
1190 case IPPROTO_AH: /* is it possible? */
1191 break;
1192
1193 default:
1194 /*
1195 * other cases have been filtered in the above.
1196 * none will visit this case. here we supply
1197 * the code just in case (nxt overwritten or
1198 * other cases).
1199 */
1200 m_freem(ext);
1201 goto loopend;
1202
1203 }
1204
1205 /* proceed with the next header. */
1206 off += elen;
1207 nxt = ip6e->ip6e_nxt;
1208 ip6e = NULL;
1209 m_freem(ext);
1210 ext = NULL;
1211 }
1212loopend:
1213 ;
1214 }
1215}
1216
1217/*
1218 * pull single extension header from mbuf chain. returns single mbuf that
1219 * contains the result, or NULL on error.
1220 */
1221struct mbuf *
1222ip6_pullexthdr(struct mbuf *m, size_t off, int nxt)
1223{
1224 struct ip6_ext ip6e;
1225 size_t elen;
1226 struct mbuf *n;
1227
1228#ifdef DIAGNOSTIC
1229 switch (nxt) {
1230 case IPPROTO_DSTOPTS:
1231 case IPPROTO_ROUTING:
1232 case IPPROTO_HOPOPTS:
1233 case IPPROTO_AH: /* is it possible? */
1234 break;
1235 default:
1236 printf("ip6_pullexthdr: invalid nxt=%d\n", nxt);
1237 }
1238#endif
1239
1240 if (off + sizeof(ip6e) > m->m_pkthdr.len)
1241 return NULL;
1242
1243 m_copydata(m, off, sizeof(ip6e), &ip6e);
1244 if (nxt == IPPROTO_AH)
1245 elen = (ip6e.ip6e_len + 2) << 2;
1246 else
1247 elen = (ip6e.ip6e_len + 1) << 3;
1248
1249 if (off + elen > m->m_pkthdr.len)
1250 return NULL;
1251
1252 MGET(n, M_DONTWAIT, MT_DATA);
1253 if (n && elen >= MLEN) {
1254 MCLGET(n, M_DONTWAIT);
1255 if ((n->m_flags & M_EXT) == 0) {
1256 m_free(n);
1257 n = NULL;
1258 }
1259 }
1260 if (n == NULL) {
1261 ip6stat_inc(ip6s_idropped);
1262 return NULL;
1263 }
1264
1265 n->m_len = 0;
1266 if (elen >= m_trailingspace(n)) {
1267 m_free(n);
1268 return NULL;
1269 }
1270
1271 m_copydata(m, off, elen, mtod(n, caddr_t));
1272 n->m_len = elen;
1273 return n;
1274}
1275
1276/*
1277 * Get offset to the previous header followed by the header
1278 * currently processed.
1279 */
1280int
1281ip6_get_prevhdr(struct mbuf *m, int off)
1282{
1283 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
1284
1285 if (off == sizeof(struct ip6_hdr)) {
1286 return offsetof(struct ip6_hdr, ip6_nxt);
1287 } else if (off < sizeof(struct ip6_hdr)) {
1288 panic("%s: off < sizeof(struct ip6_hdr)", __func__);
1289 } else {
1290 int len, nlen, nxt;
1291 struct ip6_ext ip6e;
1292
1293 nxt = ip6->ip6_nxt;
1294 len = sizeof(struct ip6_hdr);
1295 nlen = 0;
1296 while (len < off) {
1297 m_copydata(m, len, sizeof(ip6e), &ip6e);
1298
1299 switch (nxt) {
1300 case IPPROTO_FRAGMENT:
1301 nlen = sizeof(struct ip6_frag);
1302 break;
1303 case IPPROTO_AH:
1304 nlen = (ip6e.ip6e_len + 2) << 2;
1305 break;
1306 default:
1307 nlen = (ip6e.ip6e_len + 1) << 3;
1308 break;
1309 }
1310 len += nlen;
1311 nxt = ip6e.ip6e_nxt;
1312 }
1313
1314 return (len - nlen);
1315 }
1316}
1317
1318/*
1319 * get next header offset. m will be retained.
1320 */
1321int
1322ip6_nexthdr(struct mbuf *m, int off, int proto, int *nxtp)
1323{
1324 struct ip6_hdr ip6;
1325 struct ip6_ext ip6e;
1326 struct ip6_frag fh;
1327
1328 /* just in case */
1329 if (m == NULL)
1330 panic("%s: m == NULL", __func__);
1331 if ((m->m_flags & M_PKTHDR) == 0 || m->m_pkthdr.len < off)
1332 return -1;
1333
1334 switch (proto) {
1335 case IPPROTO_IPV6:
1336 if (m->m_pkthdr.len < off + sizeof(ip6))
1337 return -1;
1338 m_copydata(m, off, sizeof(ip6), &ip6);
1339 if (nxtp)
1340 *nxtp = ip6.ip6_nxt;
1341 off += sizeof(ip6);
1342 return off;
1343
1344 case IPPROTO_FRAGMENT:
1345 /*
1346 * terminate parsing if it is not the first fragment,
1347 * it does not make sense to parse through it.
1348 */
1349 if (m->m_pkthdr.len < off + sizeof(fh))
1350 return -1;
1351 m_copydata(m, off, sizeof(fh), &fh);
1352 if ((fh.ip6f_offlg & IP6F_OFF_MASK) != 0)
1353 return -1;
1354 if (nxtp)
1355 *nxtp = fh.ip6f_nxt;
1356 off += sizeof(struct ip6_frag);
1357 return off;
1358
1359 case IPPROTO_AH:
1360 if (m->m_pkthdr.len < off + sizeof(ip6e))
1361 return -1;
1362 m_copydata(m, off, sizeof(ip6e), &ip6e);
1363 if (nxtp)
1364 *nxtp = ip6e.ip6e_nxt;
1365 off += (ip6e.ip6e_len + 2) << 2;
1366 if (m->m_pkthdr.len < off)
1367 return -1;
1368 return off;
1369
1370 case IPPROTO_HOPOPTS:
1371 case IPPROTO_ROUTING:
1372 case IPPROTO_DSTOPTS:
1373 if (m->m_pkthdr.len < off + sizeof(ip6e))
1374 return -1;
1375 m_copydata(m, off, sizeof(ip6e), &ip6e);
1376 if (nxtp)
1377 *nxtp = ip6e.ip6e_nxt;
1378 off += (ip6e.ip6e_len + 1) << 3;
1379 if (m->m_pkthdr.len < off)
1380 return -1;
1381 return off;
1382
1383 case IPPROTO_NONE:
1384 case IPPROTO_ESP:
1385 case IPPROTO_IPCOMP:
1386 /* give up */
1387 return -1;
1388
1389 default:
1390 return -1;
1391 }
1392
1393 return -1;
1394}
1395
1396/*
1397 * get offset for the last header in the chain. m will be kept untainted.
1398 */
1399int
1400ip6_lasthdr(struct mbuf *m, int off, int proto, int *nxtp)
1401{
1402 int newoff;
1403 int nxt;
1404
1405 if (!nxtp) {
1406 nxt = -1;
1407 nxtp = &nxt;
1408 }
1409 while (1) {
1410 newoff = ip6_nexthdr(m, off, proto, nxtp);
1411 if (newoff < 0)
1412 return off;
1413 else if (newoff < off)
1414 return -1; /* invalid */
1415 else if (newoff == off)
1416 return newoff;
1417
1418 off = newoff;
1419 proto = *nxtp;
1420 }
1421}
1422
1423/*
1424 * System control for IP6
1425 */
1426
1427const u_char inet6ctlerrmap[PRC_NCMDS] = {
1428 0, 0, 0, 0,
1429 0, EMSGSIZE, EHOSTDOWN, EHOSTUNREACH,
1430 EHOSTUNREACH, EHOSTUNREACH, ECONNREFUSED, ECONNREFUSED,
1431 EMSGSIZE, EHOSTUNREACH, 0, 0,
1432 0, 0, 0, 0,
1433 ENOPROTOOPT
1434};
1435
1436#ifdef MROUTING
1437extern int ip6_mrtproto;
1438#endif
1439
1440#ifndef SMALL_KERNEL
1441const struct sysctl_bounded_args ipv6ctl_vars[] = {
1442 { IPV6CTL_FORWARDING, &ip6_forwarding, 0, 2 },
1443 { IPV6CTL_SENDREDIRECTS, &ip6_sendredirects, 0, 1 },
1444 { IPV6CTL_DAD_PENDING, &ip6_dad_pending, SYSCTL_INT_READONLY },
1445#ifdef MROUTING
1446 { IPV6CTL_MRTPROTO, &ip6_mrtproto, SYSCTL_INT_READONLY },
1447#endif
1448 { IPV6CTL_DEFHLIM, &ip6_defhlim, 0, 255 },
1449 { IPV6CTL_MAXFRAGPACKETS, &ip6_maxfragpackets, 0, 1000 },
1450 { IPV6CTL_HDRNESTLIMIT, &ip6_hdrnestlimit, 0, 100 },
1451 { IPV6CTL_DAD_COUNT, &ip6_dad_count, 0, 10 },
1452 { IPV6CTL_DEFMCASTHLIM, &ip6_defmcasthlim, 0, 255 },
1453 { IPV6CTL_MAXFRAGS, &ip6_maxfrags, 0, 1000 },
1454 { IPV6CTL_MFORWARDING, &ip6_mforwarding, 0, 1 },
1455 { IPV6CTL_MCAST_PMTU, &ip6_mcast_pmtu, 0, 1 },
1456 { IPV6CTL_NEIGHBORGCTHRESH, &ip6_neighborgcthresh, 0, 5 * 2048 },
1457 { IPV6CTL_MAXDYNROUTES, &ip6_maxdynroutes, 0, 5 * 4096 },
1458};
1459
1460int
1461ip6_sysctl_ip6stat(void *oldp, size_t *oldlenp, void *newp)
1462{
1463 struct ip6stat *ip6stat;
1464 int ret;
1465
1466 CTASSERT(sizeof(*ip6stat) == (ip6s_ncounters * sizeof(uint64_t)));
1467
1468 ip6stat = malloc(sizeof(*ip6stat), M_TEMP, M_WAITOK);
1469 counters_read(ip6counters, (uint64_t *)ip6stat, ip6s_ncounters, NULL);
1470 ret = sysctl_rdstruct(oldp, oldlenp, newp,
1471 ip6stat, sizeof(*ip6stat));
1472 free(ip6stat, M_TEMP, sizeof(*ip6stat));
1473
1474 return (ret);
1475}
1476#endif /* SMALL_KERNEL */
1477
1478int
1479ip6_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp,
1480 void *newp, size_t newlen)
1481{
1482 /* Almost all sysctl names at this level are terminal. */
1483 if (namelen != 1 && name[0] != IPV6CTL_IFQUEUE)
1484 return (ENOTDIR);
1485
1486 switch (name[0]) {
1487#ifndef SMALL_KERNEL
1488 case IPV6CTL_STATS:
1489 return (ip6_sysctl_ip6stat(oldp, oldlenp, newp));
1490#ifdef MROUTING
1491 case IPV6CTL_MRTSTATS:
1492 return mrt6_sysctl_mrt6stat(oldp, oldlenp, newp);
1493 case IPV6CTL_MRTMIF:
1494 if (newp)
1495 return (EPERM);
1496 return (mrt6_sysctl_mif(oldp, oldlenp));
1497 case IPV6CTL_MRTMFC:
1498 if (newp)
1499 return (EPERM);
1500 return (mrt6_sysctl_mfc(oldp, oldlenp));
1501#else
1502 case IPV6CTL_MRTSTATS:
1503 case IPV6CTL_MRTPROTO:
1504 case IPV6CTL_MRTMIF:
1505 case IPV6CTL_MRTMFC:
1506 return (EOPNOTSUPP);
1507#endif
1508 case IPV6CTL_MTUDISCTIMEOUT: {
1509 int oldval, newval, error;
1510
1511 oldval = newval = atomic_load_int(&ip6_mtudisc_timeout);
1512 error = sysctl_int_bounded(oldp, oldlenp, newp, newlen,
1513 &newval, 0, INT_MAX);
1514 if (error == 0 && oldval != newval) {
1515 rw_enter_write(&sysctl_lock);
1516 atomic_store_int(&ip6_mtudisc_timeout, newval);
1517 rt_timer_queue_change(&icmp6_mtudisc_timeout_q, newval);
1518 rw_exit_write(&sysctl_lock);
1519 }
1520
1521 return (error);
1522 }
1523 case IPV6CTL_IFQUEUE:
1524 return (sysctl_niq(name + 1, namelen - 1,
1525 oldp, oldlenp, newp, newlen, &ip6intrq));
1526 case IPV6CTL_MULTIPATH: {
1527 int oldval, newval, error;
1528
1529 oldval = newval = atomic_load_int(&ip6_multipath);
1530 error = sysctl_int_bounded(oldp, oldlenp, newp, newlen,
1531 &newval, 0, 1);
1532 if (error == 0 && oldval != newval) {
1533 atomic_store_int(&ip6_multipath, newval);
1534 membar_producer();
1535 atomic_inc_long(&rtgeneration);
1536 }
1537
1538 return (error);
1539 }
1540 default:
1541 return (sysctl_bounded_arr(ipv6ctl_vars, nitems(ipv6ctl_vars),
1542 name, namelen, oldp, oldlenp, newp, newlen));
1543#else
1544 default:
1545 return (EOPNOTSUPP);
1546#endif /* SMALL_KERNEL */
1547 }
1548 /* NOTREACHED */
1549}
1550
1551void
1552ip6_send_dispatch(void *xmq)
1553{
1554 struct mbuf_queue *mq = xmq;
1555 struct mbuf *m;
1556 struct mbuf_list ml;
1557
1558 mq_delist(mq, &ml);
1559 if (ml_empty(&ml))
1560 return;
1561
1562 NET_LOCK_SHARED();
1563 while ((m = ml_dequeue(&ml)) != NULL) {
1564 ip6_output(m, NULL, NULL, 0, NULL, NULL);
1565 }
1566 NET_UNLOCK_SHARED();
1567}
1568
1569void
1570ip6_send(struct mbuf *m)
1571{
1572 mq_enqueue(&ip6send_mq, m);
1573 task_add(net_tq(0), &ip6send_task);
1574}