jcs's openbsd hax
openbsd
1/* $OpenBSD: ip_input.c,v 1.426 2025/11/12 10:00:27 hshoexer Exp $ */
2/* $NetBSD: ip_input.c,v 1.30 1996/03/16 23:53:58 christos Exp $ */
3
4/*
5 * Copyright (c) 1982, 1986, 1988, 1993
6 * The Regents of the University of California. All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. Neither the name of the University nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 *
32 * @(#)ip_input.c 8.2 (Berkeley) 1/4/94
33 */
34
35#include "pf.h"
36#include "carp.h"
37#include "ether.h"
38
39#include <sys/param.h>
40#include <sys/systm.h>
41#include <sys/mbuf.h>
42#include <sys/domain.h>
43#include <sys/mutex.h>
44#include <sys/protosw.h>
45#include <sys/socket.h>
46#include <sys/sysctl.h>
47#include <sys/pool.h>
48#include <sys/task.h>
49
50#include <net/if.h>
51#include <net/if_var.h>
52#include <net/if_dl.h>
53#include <net/route.h>
54#include <net/netisr.h>
55
56#include <netinet/in.h>
57#include <netinet/in_systm.h>
58#include <netinet/if_ether.h>
59#include <netinet/ip.h>
60#include <netinet/in_pcb.h>
61#include <netinet/in_var.h>
62#include <netinet/ip_var.h>
63#include <netinet/ip_icmp.h>
64#include <net/if_types.h>
65
66#ifdef INET6
67#include <netinet6/ip6_var.h>
68#endif
69
70#if NPF > 0
71#include <net/pfvar.h>
72#endif
73
74#ifdef MROUTING
75#include <netinet/ip_mroute.h>
76#endif
77
78#ifdef IPSEC
79#include <netinet/ip_ipsp.h>
80#endif /* IPSEC */
81
82#if NCARP > 0
83#include <netinet/ip_carp.h>
84#endif
85
86/*
87 * Locks used to protect global variables in this file:
88 * I immutable after creation
89 * N net lock
90 * Q ipq_mutex
91 * a atomic operations
92 */
93
94/* values controllable via sysctl */
95int ip_forwarding = 0; /* [a] */
96int ipmforwarding = 0; /* [a] */
97int ipmultipath = 0; /* [a] */
98int ip_sendredirects = 1; /* [a] */
99int ip_dosourceroute = 0; /* [a] */
100int ip_defttl = IPDEFTTL; /* [a] */
101int ip_mtudisc = 1; /* [a] */
102int ip_mtudisc_timeout = IPMTUDISCTIMEOUT; /* [a] */
103int ip_directedbcast = 0; /* [a] */
104
105struct mutex ipq_mutex = MUTEX_INITIALIZER(IPL_SOFTNET);
106
107/* IP reassembly queue */
108LIST_HEAD(, ipq) ipq; /* [Q] */
109
110/* Keep track of memory used for reassembly */
111int ip_maxqueue = 300; /* [a] */
112int ip_frags = 0; /* [Q] */
113
114#ifndef SMALL_KERNEL
115const struct sysctl_bounded_args ipctl_vars[] = {
116 { IPCTL_FORWARDING, &ip_forwarding, 0, 2 },
117 { IPCTL_SENDREDIRECTS, &ip_sendredirects, 0, 1 },
118 { IPCTL_DIRECTEDBCAST, &ip_directedbcast, 0, 1 },
119#ifdef MROUTING
120 { IPCTL_MRTPROTO, &ip_mrtproto, SYSCTL_INT_READONLY },
121#endif
122 { IPCTL_DEFTTL, &ip_defttl, 0, 255 },
123 { IPCTL_IPPORT_FIRSTAUTO, &ipport_firstauto, 0, 65535 },
124 { IPCTL_IPPORT_LASTAUTO, &ipport_lastauto, 0, 65535 },
125 { IPCTL_IPPORT_HIFIRSTAUTO, &ipport_hifirstauto, 0, 65535 },
126 { IPCTL_IPPORT_HILASTAUTO, &ipport_hilastauto, 0, 65535 },
127 { IPCTL_IPPORT_MAXQUEUE, &ip_maxqueue, 0, 10000 },
128 { IPCTL_MFORWARDING, &ipmforwarding, 0, 1 },
129 { IPCTL_ARPTIMEOUT, &arpt_keep, 0, INT_MAX },
130 { IPCTL_ARPDOWN, &arpt_down, 0, INT_MAX },
131};
132#endif /* SMALL_KERNEL */
133
134struct niqueue ipintrq = NIQUEUE_INITIALIZER(IPQ_MAXLEN, NETISR_IP);
135
136struct pool ipqent_pool;
137struct pool ipq_pool;
138
139struct cpumem *ipcounters;
140
141int ip_sysctl_ipstat(void *, size_t *, void *);
142
143static struct mbuf_queue ipsend_mq;
144static struct mbuf_queue ipsendraw_mq;
145
146extern struct niqueue arpinq;
147
148int ip_ours(struct mbuf **, int *, int, int, struct netstack *);
149int ip_ours_enqueue(struct mbuf **mp, int *offp, int nxt);
150int ip_dooptions(struct mbuf *, struct ifnet *, int);
151int in_ouraddr(struct mbuf *, struct ifnet *, struct route *, int);
152
153int ip_fragcheck(struct mbuf **, int *);
154struct mbuf * ip_reass(struct ipqent *, struct ipq *);
155void ip_freef(struct ipq *);
156void ip_flush(int);
157
158static void ip_send_dispatch(void *);
159static void ip_sendraw_dispatch(void *);
160static struct task ipsend_task = TASK_INITIALIZER(ip_send_dispatch, &ipsend_mq);
161static struct task ipsendraw_task =
162 TASK_INITIALIZER(ip_sendraw_dispatch, &ipsendraw_mq);
163
164/*
165 * Used to save the IP options in case a protocol wants to respond
166 * to an incoming packet over the same route if the packet got here
167 * using IP source routing. This allows connection establishment and
168 * maintenance when the remote end is on a network that is not known
169 * to us.
170 */
171struct ip_srcrt {
172 int isr_nhops; /* number of hops */
173 struct in_addr isr_dst; /* final destination */
174 char isr_nop; /* one NOP to align */
175 char isr_hdr[IPOPT_OFFSET + 1]; /* OPTVAL, OLEN & OFFSET */
176 struct in_addr isr_routes[MAX_IPOPTLEN/sizeof(struct in_addr)];
177};
178
179void save_rte(struct mbuf *, u_char *, struct in_addr);
180
181/*
182 * IP initialization: fill in IP protocol switch table.
183 * All protocols not implemented in kernel go to raw IP protocol handler.
184 */
185void
186ip_init(void)
187{
188 const struct protosw *pr;
189 int i;
190 const u_int16_t defbaddynamicports_tcp[] = DEFBADDYNAMICPORTS_TCP;
191 const u_int16_t defbaddynamicports_udp[] = DEFBADDYNAMICPORTS_UDP;
192 const u_int16_t defrootonlyports_tcp[] = DEFROOTONLYPORTS_TCP;
193 const u_int16_t defrootonlyports_udp[] = DEFROOTONLYPORTS_UDP;
194
195 ipcounters = counters_alloc(ips_ncounters);
196
197 pool_init(&ipqent_pool, sizeof(struct ipqent), 0,
198 IPL_SOFTNET, 0, "ipqe", NULL);
199 pool_init(&ipq_pool, sizeof(struct ipq), 0,
200 IPL_SOFTNET, 0, "ipq", NULL);
201
202 pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW);
203 if (pr == NULL)
204 panic("ip_init");
205 for (i = 0; i < IPPROTO_MAX; i++)
206 ip_protox[i] = pr - inetsw;
207 for (pr = inetdomain.dom_protosw;
208 pr < inetdomain.dom_protoswNPROTOSW; pr++)
209 if (pr->pr_domain->dom_family == PF_INET &&
210 pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW &&
211 pr->pr_protocol < IPPROTO_MAX)
212 ip_protox[pr->pr_protocol] = pr - inetsw;
213 LIST_INIT(&ipq);
214
215 /* Fill in list of ports not to allocate dynamically. */
216 memset(&baddynamicports, 0, sizeof(baddynamicports));
217 for (i = 0; defbaddynamicports_tcp[i] != 0; i++)
218 DP_SET(baddynamicports.tcp, defbaddynamicports_tcp[i]);
219 for (i = 0; defbaddynamicports_udp[i] != 0; i++)
220 DP_SET(baddynamicports.udp, defbaddynamicports_udp[i]);
221
222 /* Fill in list of ports only root can bind to. */
223 memset(&rootonlyports, 0, sizeof(rootonlyports));
224 for (i = 0; defrootonlyports_tcp[i] != 0; i++)
225 DP_SET(rootonlyports.tcp, defrootonlyports_tcp[i]);
226 for (i = 0; defrootonlyports_udp[i] != 0; i++)
227 DP_SET(rootonlyports.udp, defrootonlyports_udp[i]);
228
229 mq_init(&ipsend_mq, 64, IPL_SOFTNET);
230 mq_init(&ipsendraw_mq, 64, IPL_SOFTNET);
231
232#if NETHER > 0
233 arpinit();
234#endif
235#ifdef IPSEC
236 ipsec_init();
237#endif
238#ifdef MROUTING
239 mrt_init();
240#endif
241}
242
243/*
244 * Enqueue packet for local delivery. Queuing is used as a boundary
245 * between the network layer (input/forward path) running with
246 * NET_LOCK_SHARED() and the transport layer needing it exclusively.
247 */
248int
249ip_ours(struct mbuf **mp, int *offp, int nxt, int af, struct netstack *ns)
250{
251 nxt = ip_fragcheck(mp, offp);
252 if (nxt == IPPROTO_DONE)
253 return IPPROTO_DONE;
254
255 /* We are already in a IPv4/IPv6 local deliver loop. */
256 if (af != AF_UNSPEC)
257 return nxt;
258
259 nxt = ip_deliver(mp, offp, nxt, AF_INET, 1, ns);
260 if (nxt == IPPROTO_DONE)
261 return IPPROTO_DONE;
262
263 return ip_ours_enqueue(mp, offp, nxt);
264}
265
266int
267ip_ours_enqueue(struct mbuf **mp, int *offp, int nxt)
268{
269 /* save values for later, use after dequeue */
270 if (*offp != sizeof(struct ip)) {
271 struct m_tag *mtag;
272 struct ipoffnxt *ion;
273
274 /* mbuf tags are expensive, but only used for header options */
275 mtag = m_tag_get(PACKET_TAG_IP_OFFNXT, sizeof(*ion),
276 M_NOWAIT);
277 if (mtag == NULL) {
278 ipstat_inc(ips_idropped);
279 m_freemp(mp);
280 return IPPROTO_DONE;
281 }
282 ion = (struct ipoffnxt *)(mtag + 1);
283 ion->ion_off = *offp;
284 ion->ion_nxt = nxt;
285
286 m_tag_prepend(*mp, mtag);
287 }
288
289 niq_enqueue(&ipintrq, *mp);
290 *mp = NULL;
291 return IPPROTO_DONE;
292}
293
294/*
295 * Dequeue and process locally delivered packets.
296 * This is called with exclusive NET_LOCK().
297 */
298void
299ipintr(void)
300{
301 struct mbuf *m;
302
303 while ((m = niq_dequeue(&ipintrq)) != NULL) {
304 struct m_tag *mtag;
305 int off, nxt;
306
307#ifdef DIAGNOSTIC
308 if ((m->m_flags & M_PKTHDR) == 0)
309 panic("ipintr no HDR");
310#endif
311 mtag = m_tag_find(m, PACKET_TAG_IP_OFFNXT, NULL);
312 if (mtag != NULL) {
313 struct ipoffnxt *ion;
314
315 ion = (struct ipoffnxt *)(mtag + 1);
316 off = ion->ion_off;
317 nxt = ion->ion_nxt;
318
319 m_tag_delete(m, mtag);
320 } else {
321 struct ip *ip;
322
323 ip = mtod(m, struct ip *);
324 off = ip->ip_hl << 2;
325 nxt = ip->ip_p;
326 }
327
328 nxt = ip_deliver(&m, &off, nxt, AF_INET, 0, NULL);
329 KASSERT(nxt == IPPROTO_DONE);
330 }
331}
332
333/*
334 * IPv4 input routine.
335 *
336 * Checksum and byte swap header. Process options. Forward or deliver.
337 */
338void
339ipv4_input(struct ifnet *ifp, struct mbuf *m, struct netstack *ns)
340{
341 int off, nxt;
342
343 off = 0;
344 nxt = ip_input_if(&m, &off, IPPROTO_IPV4, AF_UNSPEC, ifp, ns);
345 KASSERT(nxt == IPPROTO_DONE);
346}
347
348struct mbuf *
349ipv4_check(struct ifnet *ifp, struct mbuf *m)
350{
351 struct ip *ip;
352 int hlen, len;
353
354 if (m->m_len < sizeof(*ip)) {
355 m = m_pullup(m, sizeof(*ip));
356 if (m == NULL) {
357 ipstat_inc(ips_toosmall);
358 return (NULL);
359 }
360 }
361
362 ip = mtod(m, struct ip *);
363 if (ip->ip_v != IPVERSION) {
364 ipstat_inc(ips_badvers);
365 goto bad;
366 }
367
368 hlen = ip->ip_hl << 2;
369 if (hlen < sizeof(*ip)) { /* minimum header length */
370 ipstat_inc(ips_badhlen);
371 goto bad;
372 }
373 if (hlen > m->m_len) {
374 m = m_pullup(m, hlen);
375 if (m == NULL) {
376 ipstat_inc(ips_badhlen);
377 return (NULL);
378 }
379 ip = mtod(m, struct ip *);
380 }
381
382 /* 127/8 must not appear on wire - RFC1122 */
383 if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET ||
384 (ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) {
385 if ((ifp->if_flags & IFF_LOOPBACK) == 0) {
386 ipstat_inc(ips_badaddr);
387 goto bad;
388 }
389 }
390
391 if (!ISSET(m->m_pkthdr.csum_flags, M_IPV4_CSUM_IN_OK)) {
392 if (ISSET(m->m_pkthdr.csum_flags, M_IPV4_CSUM_IN_BAD)) {
393 ipstat_inc(ips_badsum);
394 goto bad;
395 }
396
397 ipstat_inc(ips_inswcsum);
398 if (in_cksum(m, hlen) != 0) {
399 ipstat_inc(ips_badsum);
400 goto bad;
401 }
402
403 SET(m->m_pkthdr.csum_flags, M_IPV4_CSUM_IN_OK);
404 }
405
406 /* Retrieve the packet length. */
407 len = ntohs(ip->ip_len);
408
409 /*
410 * Convert fields to host representation.
411 */
412 if (len < hlen) {
413 ipstat_inc(ips_badlen);
414 goto bad;
415 }
416
417 /*
418 * Check that the amount of data in the buffers
419 * is at least as much as the IP header would have us expect.
420 * Trim mbufs if longer than we expect.
421 * Drop packet if shorter than we expect.
422 */
423 if (m->m_pkthdr.len < len) {
424 ipstat_inc(ips_tooshort);
425 goto bad;
426 }
427 if (m->m_pkthdr.len > len) {
428 if (m->m_len == m->m_pkthdr.len) {
429 m->m_len = len;
430 m->m_pkthdr.len = len;
431 } else
432 m_adj(m, len - m->m_pkthdr.len);
433 }
434
435 return (m);
436bad:
437 m_freem(m);
438 return (NULL);
439}
440
441int
442ip_input_if(struct mbuf **mp, int *offp, int nxt, int af, struct ifnet *ifp,
443 struct netstack *ns)
444{
445 struct route iproute, *ro = NULL;
446 struct mbuf *m;
447 struct ip *ip;
448 int hlen;
449#if NPF > 0
450 struct in_addr odst;
451#endif
452 int flags = 0;
453
454 KASSERT(*offp == 0);
455
456 ipstat_inc(ips_total);
457 m = *mp = ipv4_check(ifp, *mp);
458 if (m == NULL)
459 goto bad;
460
461 ip = mtod(m, struct ip *);
462
463#if NCARP > 0
464 if (carp_lsdrop(ifp, m, AF_INET, &ip->ip_src.s_addr,
465 &ip->ip_dst.s_addr, (ip->ip_p == IPPROTO_ICMP ? 0 : 1)))
466 goto bad;
467#endif
468
469#if NPF > 0
470 /*
471 * Packet filter
472 */
473 odst = ip->ip_dst;
474 if (pf_test(AF_INET, PF_IN, ifp, mp) != PF_PASS)
475 goto bad;
476 m = *mp;
477 if (m == NULL)
478 goto bad;
479
480 ip = mtod(m, struct ip *);
481 if (odst.s_addr != ip->ip_dst.s_addr)
482 SET(flags, IP_REDIRECT);
483#endif
484
485 switch (atomic_load_int(&ip_forwarding)) {
486 case 2:
487 SET(flags, IP_FORWARDING_IPSEC);
488 /* FALLTHROUGH */
489 case 1:
490 SET(flags, IP_FORWARDING);
491 break;
492 }
493 if (atomic_load_int(&ip_directedbcast))
494 SET(flags, IP_ALLOWBROADCAST);
495
496 hlen = ip->ip_hl << 2;
497
498 /*
499 * Process options and, if not destined for us,
500 * ship it on. ip_dooptions returns 1 when an
501 * error was detected (causing an icmp message
502 * to be sent and the original packet to be freed).
503 */
504 if (hlen > sizeof (struct ip) && ip_dooptions(m, ifp, flags)) {
505 m = *mp = NULL;
506 goto bad;
507 }
508
509 if (ns == NULL) {
510 ro = &iproute;
511 ro->ro_rt = NULL;
512 } else {
513 ro = &ns->ns_route;
514 }
515 switch (in_ouraddr(m, ifp, ro, flags)) {
516 case 2:
517 goto bad;
518 case 1:
519 nxt = ip_ours(mp, offp, nxt, af, ns);
520 goto out;
521 }
522
523 if (IN_MULTICAST(ip->ip_dst.s_addr)) {
524 /*
525 * Make sure M_MCAST is set. It should theoretically
526 * already be there, but let's play safe because upper
527 * layers check for this flag.
528 */
529 m->m_flags |= M_MCAST;
530
531#ifdef MROUTING
532 if (atomic_load_int(&ipmforwarding) &&
533 ip_mrouter[ifp->if_rdomain]) {
534 int error;
535
536 if (m->m_flags & M_EXT) {
537 if ((m = *mp = m_pullup(m, hlen)) == NULL) {
538 ipstat_inc(ips_toosmall);
539 goto bad;
540 }
541 ip = mtod(m, struct ip *);
542 }
543 /*
544 * If we are acting as a multicast router, all
545 * incoming multicast packets are passed to the
546 * kernel-level multicast forwarding function.
547 * The packet is returned (relatively) intact; if
548 * ip_mforward() returns a non-zero value, the packet
549 * must be discarded, else it may be accepted below.
550 *
551 * (The IP ident field is put in the same byte order
552 * as expected when ip_mforward() is called from
553 * ip_output().)
554 */
555 KERNEL_LOCK();
556 error = ip_mforward(m, ifp, flags);
557 KERNEL_UNLOCK();
558 if (error) {
559 ipstat_inc(ips_cantforward);
560 goto bad;
561 }
562
563 /*
564 * The process-level routing daemon needs to receive
565 * all multicast IGMP packets, whether or not this
566 * host belongs to their destination groups.
567 */
568 if (ip->ip_p == IPPROTO_IGMP) {
569 nxt = ip_ours(mp, offp, nxt, af, ns);
570 goto out;
571 }
572 ipstat_inc(ips_forward);
573 }
574#endif
575 /*
576 * See if we belong to the destination multicast group on the
577 * arrival interface.
578 */
579 if (!in_hasmulti(&ip->ip_dst, ifp)) {
580 ipstat_inc(ips_notmember);
581 if (!IN_LOCAL_GROUP(ip->ip_dst.s_addr))
582 ipstat_inc(ips_cantforward);
583 goto bad;
584 }
585 nxt = ip_ours(mp, offp, nxt, af, ns);
586 goto out;
587 }
588
589#if NCARP > 0
590 if (ip->ip_p == IPPROTO_ICMP &&
591 carp_lsdrop(ifp, m, AF_INET, &ip->ip_src.s_addr,
592 &ip->ip_dst.s_addr, 1))
593 goto bad;
594#endif
595 /*
596 * Not for us; forward if possible and desirable.
597 */
598 if (!ISSET(flags, IP_FORWARDING)) {
599 ipstat_inc(ips_cantforward);
600 goto bad;
601 }
602#ifdef IPSEC
603 if (ipsec_in_use) {
604 int rv;
605
606 rv = ipsec_forward_check(m, hlen, AF_INET);
607 if (rv != 0) {
608 ipstat_inc(ips_cantforward);
609 goto bad;
610 }
611 /*
612 * Fall through, forward packet. Outbound IPsec policy
613 * checking will occur in ip_output().
614 */
615 }
616#endif /* IPSEC */
617
618 ip_forward(m, ifp, ro, flags);
619 *mp = NULL;
620 if (ro == &iproute)
621 rtfree(ro->ro_rt);
622 return IPPROTO_DONE;
623 bad:
624 nxt = IPPROTO_DONE;
625 m_freemp(mp);
626 out:
627 if (ro == &iproute)
628 rtfree(ro->ro_rt);
629 return nxt;
630}
631
632int
633ip_fragcheck(struct mbuf **mp, int *offp)
634{
635 struct ip *ip;
636 struct ipq *fp;
637 struct ipqent *ipqe;
638 int hlen;
639 uint16_t mff;
640
641 ip = mtod(*mp, struct ip *);
642 hlen = ip->ip_hl << 2;
643
644 /*
645 * If offset or more fragments are set, must reassemble.
646 * Otherwise, nothing need be done.
647 * (We could look in the reassembly queue to see
648 * if the packet was previously fragmented,
649 * but it's not worth the time; just let them time out.)
650 */
651 if (ISSET(ip->ip_off, htons(IP_OFFMASK | IP_MF))) {
652 if ((*mp)->m_flags & M_EXT) { /* XXX */
653 if ((*mp = m_pullup(*mp, hlen)) == NULL) {
654 ipstat_inc(ips_toosmall);
655 return IPPROTO_DONE;
656 }
657 ip = mtod(*mp, struct ip *);
658 }
659
660 /*
661 * Adjust ip_len to not reflect header,
662 * set ipqe_mff if more fragments are expected,
663 * convert offset of this to bytes.
664 */
665 ip->ip_len = htons(ntohs(ip->ip_len) - hlen);
666 mff = ISSET(ip->ip_off, htons(IP_MF));
667 if (mff) {
668 /*
669 * Make sure that fragments have a data length
670 * that's a non-zero multiple of 8 bytes.
671 */
672 if (ntohs(ip->ip_len) == 0 ||
673 (ntohs(ip->ip_len) & 0x7) != 0) {
674 ipstat_inc(ips_badfrags);
675 m_freemp(mp);
676 return IPPROTO_DONE;
677 }
678 }
679 ip->ip_off = htons(ntohs(ip->ip_off) << 3);
680
681 mtx_enter(&ipq_mutex);
682
683 /*
684 * Look for queue of fragments
685 * of this datagram.
686 */
687 LIST_FOREACH(fp, &ipq, ipq_q) {
688 if (ip->ip_id == fp->ipq_id &&
689 ip->ip_src.s_addr == fp->ipq_src.s_addr &&
690 ip->ip_dst.s_addr == fp->ipq_dst.s_addr &&
691 ip->ip_p == fp->ipq_p)
692 break;
693 }
694
695 /*
696 * If datagram marked as having more fragments
697 * or if this is not the first fragment,
698 * attempt reassembly; if it succeeds, proceed.
699 */
700 if (mff || ip->ip_off) {
701 int ip_maxqueue_local = atomic_load_int(&ip_maxqueue);
702
703 ipstat_inc(ips_fragments);
704 if (ip_frags + 1 > ip_maxqueue_local) {
705 ip_flush(ip_maxqueue_local);
706 ipstat_inc(ips_rcvmemdrop);
707 goto bad;
708 }
709
710 ipqe = pool_get(&ipqent_pool, PR_NOWAIT);
711 if (ipqe == NULL) {
712 ipstat_inc(ips_rcvmemdrop);
713 goto bad;
714 }
715 ip_frags++;
716 ipqe->ipqe_mff = mff;
717 ipqe->ipqe_m = *mp;
718 ipqe->ipqe_ip = ip;
719 *mp = ip_reass(ipqe, fp);
720 if (*mp == NULL)
721 goto bad;
722 ipstat_inc(ips_reassembled);
723 ip = mtod(*mp, struct ip *);
724 hlen = ip->ip_hl << 2;
725 ip->ip_len = htons(ntohs(ip->ip_len) + hlen);
726 } else {
727 if (fp != NULL)
728 ip_freef(fp);
729 }
730
731 mtx_leave(&ipq_mutex);
732 }
733
734 *offp = hlen;
735 return ip->ip_p;
736
737 bad:
738 mtx_leave(&ipq_mutex);
739 m_freemp(mp);
740 return IPPROTO_DONE;
741}
742
743#ifndef INET6
744#define IPSTAT_INC(name) ipstat_inc(ips_##name)
745#else
746#define IPSTAT_INC(name) (af == AF_INET ? \
747 ipstat_inc(ips_##name) : ip6stat_inc(ip6s_##name))
748#endif
749
750int
751ip_deliver(struct mbuf **mp, int *offp, int nxt, int af, int shared,
752 struct netstack *ns)
753{
754#ifdef INET6
755 int nest = 0;
756#endif
757
758 /*
759 * Tell launch routine the next header
760 */
761 IPSTAT_INC(delivered);
762
763 while (nxt != IPPROTO_DONE) {
764 const struct protosw *psw;
765 int naf;
766
767 switch (af) {
768 case AF_INET:
769 psw = &inetsw[ip_protox[nxt]];
770 break;
771#ifdef INET6
772 case AF_INET6:
773 psw = &inet6sw[ip6_protox[nxt]];
774 break;
775#endif
776 }
777 if (shared && !ISSET(psw->pr_flags, PR_MPINPUT)) {
778 /* delivery not finished, decrement counter, queue */
779 switch (af) {
780 case AF_INET:
781 counters_dec(ipcounters, ips_delivered);
782 return ip_ours_enqueue(mp, offp, nxt);
783#ifdef INET6
784 case AF_INET6:
785 counters_dec(ip6counters, ip6s_delivered);
786 return ip6_ours_enqueue(mp, offp, nxt);
787#endif
788 }
789 break;
790 }
791
792#ifdef INET6
793 if (af == AF_INET6 &&
794 (++nest > atomic_load_int(&ip6_hdrnestlimit))) {
795 ip6stat_inc(ip6s_toomanyhdr);
796 goto bad;
797 }
798#endif
799
800 /*
801 * protection against faulty packet - there should be
802 * more sanity checks in header chain processing.
803 */
804 if ((*mp)->m_pkthdr.len < *offp) {
805 IPSTAT_INC(tooshort);
806 goto bad;
807 }
808
809#ifdef IPSEC
810 if (ipsec_in_use) {
811 if (ipsec_local_check(*mp, *offp, nxt, af) != 0) {
812 IPSTAT_INC(cantforward);
813 goto bad;
814 }
815 }
816 /* Otherwise, just fall through and deliver the packet */
817#endif
818
819 switch (nxt) {
820 case IPPROTO_IPV4:
821 naf = AF_INET;
822 ipstat_inc(ips_delivered);
823 break;
824#ifdef INET6
825 case IPPROTO_IPV6:
826 naf = AF_INET6;
827 ip6stat_inc(ip6s_delivered);
828 break;
829#endif
830 default:
831 naf = af;
832 break;
833 }
834 nxt = (*psw->pr_input)(mp, offp, nxt, af, ns);
835 af = naf;
836 }
837 return nxt;
838 bad:
839 m_freemp(mp);
840 return IPPROTO_DONE;
841}
842#undef IPSTAT_INC
843
844int
845in_ouraddr(struct mbuf *m, struct ifnet *ifp, struct route *ro, int flags)
846{
847 struct rtentry *rt;
848 struct ip *ip;
849 int match = 0;
850
851#if NPF > 0
852 switch (pf_ouraddr(m)) {
853 case 0:
854 return (0);
855 case 1:
856 return (1);
857 default:
858 /* pf does not know it */
859 break;
860 }
861#endif
862
863 ip = mtod(m, struct ip *);
864
865 if (ip->ip_dst.s_addr == INADDR_BROADCAST ||
866 ip->ip_dst.s_addr == INADDR_ANY) {
867 m->m_flags |= M_BCAST;
868 return (1);
869 }
870
871 rt = route_mpath(ro, &ip->ip_dst, &ip->ip_src, m->m_pkthdr.ph_rtableid);
872 if (rt != NULL) {
873 if (ISSET(rt->rt_flags, RTF_LOCAL))
874 match = 1;
875
876 /*
877 * If directedbcast is enabled we only consider it local
878 * if it is received on the interface with that address.
879 */
880 if (ISSET(rt->rt_flags, RTF_BROADCAST) &&
881 (!ISSET(flags, IP_ALLOWBROADCAST) ||
882 rt->rt_ifidx == ifp->if_index)) {
883 match = 1;
884
885 /* Make sure M_BCAST is set */
886 m->m_flags |= M_BCAST;
887 }
888 }
889
890 if (!match) {
891 struct ifaddr *ifa;
892
893 /*
894 * No local address or broadcast address found, so check for
895 * ancient classful broadcast addresses.
896 * It must have been broadcast on the link layer, and for an
897 * address on the interface it was received on.
898 */
899 if (!ISSET(m->m_flags, M_BCAST) ||
900 !IN_CLASSFULBROADCAST(ip->ip_dst.s_addr, ip->ip_dst.s_addr))
901 return (0);
902
903 if (ifp->if_rdomain != rtable_l2(m->m_pkthdr.ph_rtableid))
904 return (0);
905 /*
906 * The check in the loop assumes you only rx a packet on an UP
907 * interface, and that M_BCAST will only be set on a BROADCAST
908 * interface.
909 */
910 NET_ASSERT_LOCKED();
911 TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
912 if (ifa->ifa_addr->sa_family != AF_INET)
913 continue;
914
915 if (IN_CLASSFULBROADCAST(ip->ip_dst.s_addr,
916 ifatoia(ifa)->ia_addr.sin_addr.s_addr)) {
917 match = 1;
918 break;
919 }
920 }
921 } else if (!ISSET(flags, IP_FORWARDING) &&
922 rt->rt_ifidx != ifp->if_index &&
923 !((ifp->if_flags & IFF_LOOPBACK) || (ifp->if_type == IFT_ENC) ||
924 (m->m_pkthdr.pf.flags & PF_TAG_TRANSLATE_LOCALHOST))) {
925 /* received on wrong interface. */
926#if NCARP > 0
927 struct ifnet *out_if;
928
929 /*
930 * Virtual IPs on carp interfaces need to be checked also
931 * against the parent interface and other carp interfaces
932 * sharing the same parent.
933 */
934 out_if = if_get(rt->rt_ifidx);
935 if (!(out_if && carp_strict_addr_chk(out_if, ifp))) {
936 ipstat_inc(ips_wrongif);
937 match = 2;
938 }
939 if_put(out_if);
940#else
941 ipstat_inc(ips_wrongif);
942 match = 2;
943#endif
944 }
945
946 return (match);
947}
948
949/*
950 * Take incoming datagram fragment and try to
951 * reassemble it into whole datagram. If a chain for
952 * reassembly of this datagram already exists, then it
953 * is given as fp; otherwise have to make a chain.
954 */
955struct mbuf *
956ip_reass(struct ipqent *ipqe, struct ipq *fp)
957{
958 struct mbuf *m = ipqe->ipqe_m;
959 struct ipqent *nq, *p, *q;
960 struct ip *ip;
961 struct mbuf *t;
962 int hlen = ipqe->ipqe_ip->ip_hl << 2;
963 int i, next;
964 u_int8_t ecn, ecn0;
965
966 MUTEX_ASSERT_LOCKED(&ipq_mutex);
967
968 /*
969 * Presence of header sizes in mbufs
970 * would confuse code below.
971 */
972 m->m_data += hlen;
973 m->m_len -= hlen;
974
975 /*
976 * If first fragment to arrive, create a reassembly queue.
977 */
978 if (fp == NULL) {
979 fp = pool_get(&ipq_pool, PR_NOWAIT);
980 if (fp == NULL)
981 goto dropfrag;
982 LIST_INSERT_HEAD(&ipq, fp, ipq_q);
983 fp->ipq_ttl = IPFRAGTTL;
984 fp->ipq_p = ipqe->ipqe_ip->ip_p;
985 fp->ipq_id = ipqe->ipqe_ip->ip_id;
986 LIST_INIT(&fp->ipq_fragq);
987 fp->ipq_src = ipqe->ipqe_ip->ip_src;
988 fp->ipq_dst = ipqe->ipqe_ip->ip_dst;
989 p = NULL;
990 goto insert;
991 }
992
993 /*
994 * Handle ECN by comparing this segment with the first one;
995 * if CE is set, do not lose CE.
996 * drop if CE and not-ECT are mixed for the same packet.
997 */
998 ecn = ipqe->ipqe_ip->ip_tos & IPTOS_ECN_MASK;
999 ecn0 = LIST_FIRST(&fp->ipq_fragq)->ipqe_ip->ip_tos & IPTOS_ECN_MASK;
1000 if (ecn == IPTOS_ECN_CE) {
1001 if (ecn0 == IPTOS_ECN_NOTECT)
1002 goto dropfrag;
1003 if (ecn0 != IPTOS_ECN_CE)
1004 LIST_FIRST(&fp->ipq_fragq)->ipqe_ip->ip_tos |=
1005 IPTOS_ECN_CE;
1006 }
1007 if (ecn == IPTOS_ECN_NOTECT && ecn0 != IPTOS_ECN_NOTECT)
1008 goto dropfrag;
1009
1010 /*
1011 * Find a segment which begins after this one does.
1012 */
1013 for (p = NULL, q = LIST_FIRST(&fp->ipq_fragq); q != NULL;
1014 p = q, q = LIST_NEXT(q, ipqe_q))
1015 if (ntohs(q->ipqe_ip->ip_off) > ntohs(ipqe->ipqe_ip->ip_off))
1016 break;
1017
1018 /*
1019 * If there is a preceding segment, it may provide some of
1020 * our data already. If so, drop the data from the incoming
1021 * segment. If it provides all of our data, drop us.
1022 */
1023 if (p != NULL) {
1024 i = ntohs(p->ipqe_ip->ip_off) + ntohs(p->ipqe_ip->ip_len) -
1025 ntohs(ipqe->ipqe_ip->ip_off);
1026 if (i > 0) {
1027 if (i >= ntohs(ipqe->ipqe_ip->ip_len))
1028 goto dropfrag;
1029 m_adj(ipqe->ipqe_m, i);
1030 ipqe->ipqe_ip->ip_off =
1031 htons(ntohs(ipqe->ipqe_ip->ip_off) + i);
1032 ipqe->ipqe_ip->ip_len =
1033 htons(ntohs(ipqe->ipqe_ip->ip_len) - i);
1034 }
1035 }
1036
1037 /*
1038 * While we overlap succeeding segments trim them or,
1039 * if they are completely covered, dequeue them.
1040 */
1041 for (; q != NULL &&
1042 ntohs(ipqe->ipqe_ip->ip_off) + ntohs(ipqe->ipqe_ip->ip_len) >
1043 ntohs(q->ipqe_ip->ip_off); q = nq) {
1044 i = (ntohs(ipqe->ipqe_ip->ip_off) +
1045 ntohs(ipqe->ipqe_ip->ip_len)) - ntohs(q->ipqe_ip->ip_off);
1046 if (i < ntohs(q->ipqe_ip->ip_len)) {
1047 q->ipqe_ip->ip_len =
1048 htons(ntohs(q->ipqe_ip->ip_len) - i);
1049 q->ipqe_ip->ip_off =
1050 htons(ntohs(q->ipqe_ip->ip_off) + i);
1051 m_adj(q->ipqe_m, i);
1052 break;
1053 }
1054 nq = LIST_NEXT(q, ipqe_q);
1055 m_freem(q->ipqe_m);
1056 LIST_REMOVE(q, ipqe_q);
1057 pool_put(&ipqent_pool, q);
1058 ip_frags--;
1059 }
1060
1061insert:
1062 /*
1063 * Stick new segment in its place;
1064 * check for complete reassembly.
1065 */
1066 if (p == NULL) {
1067 LIST_INSERT_HEAD(&fp->ipq_fragq, ipqe, ipqe_q);
1068 } else {
1069 LIST_INSERT_AFTER(p, ipqe, ipqe_q);
1070 }
1071 next = 0;
1072 for (p = NULL, q = LIST_FIRST(&fp->ipq_fragq); q != NULL;
1073 p = q, q = LIST_NEXT(q, ipqe_q)) {
1074 if (ntohs(q->ipqe_ip->ip_off) != next)
1075 return (0);
1076 next += ntohs(q->ipqe_ip->ip_len);
1077 }
1078 if (p->ipqe_mff)
1079 return (0);
1080
1081 /*
1082 * Reassembly is complete. Check for a bogus message size and
1083 * concatenate fragments.
1084 */
1085 q = LIST_FIRST(&fp->ipq_fragq);
1086 ip = q->ipqe_ip;
1087 if ((next + (ip->ip_hl << 2)) > IP_MAXPACKET) {
1088 ipstat_inc(ips_toolong);
1089 ip_freef(fp);
1090 return (0);
1091 }
1092 m = q->ipqe_m;
1093 t = m->m_next;
1094 m->m_next = 0;
1095 m_cat(m, t);
1096 nq = LIST_NEXT(q, ipqe_q);
1097 pool_put(&ipqent_pool, q);
1098 ip_frags--;
1099 for (q = nq; q != NULL; q = nq) {
1100 t = q->ipqe_m;
1101 nq = LIST_NEXT(q, ipqe_q);
1102 pool_put(&ipqent_pool, q);
1103 ip_frags--;
1104 m_removehdr(t);
1105 m_cat(m, t);
1106 }
1107
1108 /*
1109 * Create header for new ip packet by
1110 * modifying header of first packet;
1111 * dequeue and discard fragment reassembly header.
1112 * Make header visible.
1113 */
1114 ip->ip_len = htons(next);
1115 ip->ip_src = fp->ipq_src;
1116 ip->ip_dst = fp->ipq_dst;
1117 LIST_REMOVE(fp, ipq_q);
1118 pool_put(&ipq_pool, fp);
1119 m->m_len += (ip->ip_hl << 2);
1120 m->m_data -= (ip->ip_hl << 2);
1121 m_calchdrlen(m);
1122 return (m);
1123
1124dropfrag:
1125 ipstat_inc(ips_fragdropped);
1126 m_freem(m);
1127 pool_put(&ipqent_pool, ipqe);
1128 ip_frags--;
1129 return (NULL);
1130}
1131
1132/*
1133 * Free a fragment reassembly header and all
1134 * associated datagrams.
1135 */
1136void
1137ip_freef(struct ipq *fp)
1138{
1139 struct ipqent *q;
1140
1141 MUTEX_ASSERT_LOCKED(&ipq_mutex);
1142
1143 while ((q = LIST_FIRST(&fp->ipq_fragq)) != NULL) {
1144 LIST_REMOVE(q, ipqe_q);
1145 m_freem(q->ipqe_m);
1146 pool_put(&ipqent_pool, q);
1147 ip_frags--;
1148 }
1149 LIST_REMOVE(fp, ipq_q);
1150 pool_put(&ipq_pool, fp);
1151}
1152
1153/*
1154 * IP timer processing;
1155 * if a timer expires on a reassembly queue, discard it.
1156 */
1157void
1158ip_slowtimo(void)
1159{
1160 struct ipq *fp, *nfp;
1161
1162 mtx_enter(&ipq_mutex);
1163 LIST_FOREACH_SAFE(fp, &ipq, ipq_q, nfp) {
1164 if (--fp->ipq_ttl == 0) {
1165 ipstat_inc(ips_fragtimeout);
1166 ip_freef(fp);
1167 }
1168 }
1169 mtx_leave(&ipq_mutex);
1170}
1171
1172/*
1173 * Flush a bunch of datagram fragments, till we are down to 75%.
1174 */
1175void
1176ip_flush(int maxqueue)
1177{
1178 int max = 50;
1179
1180 MUTEX_ASSERT_LOCKED(&ipq_mutex);
1181
1182 while (!LIST_EMPTY(&ipq) && ip_frags > maxqueue * 3 / 4 && --max) {
1183 ipstat_inc(ips_fragdropped);
1184 ip_freef(LIST_FIRST(&ipq));
1185 }
1186}
1187
1188/*
1189 * Do option processing on a datagram,
1190 * possibly discarding it if bad options are encountered,
1191 * or forwarding it if source-routed.
1192 * Returns 1 if packet has been forwarded/freed,
1193 * 0 if the packet should be processed further.
1194 */
1195int
1196ip_dooptions(struct mbuf *m, struct ifnet *ifp, int flags)
1197{
1198 struct ip *ip = mtod(m, struct ip *);
1199 unsigned int rtableid = m->m_pkthdr.ph_rtableid;
1200 struct rtentry *rt;
1201 struct sockaddr_in ipaddr;
1202 u_char *cp;
1203 struct ip_timestamp ipt;
1204 struct in_ifaddr *ia;
1205 int opt, optlen, cnt, off, code, type = ICMP_PARAMPROB, forward = 0;
1206 struct in_addr sin, dst;
1207 u_int32_t ntime;
1208
1209 dst = ip->ip_dst;
1210 cp = (u_char *)(ip + 1);
1211 cnt = (ip->ip_hl << 2) - sizeof (struct ip);
1212
1213 KERNEL_LOCK();
1214 for (; cnt > 0; cnt -= optlen, cp += optlen) {
1215 opt = cp[IPOPT_OPTVAL];
1216 if (opt == IPOPT_EOL)
1217 break;
1218 if (opt == IPOPT_NOP)
1219 optlen = 1;
1220 else {
1221 if (cnt < IPOPT_OLEN + sizeof(*cp)) {
1222 code = &cp[IPOPT_OLEN] - (u_char *)ip;
1223 goto bad;
1224 }
1225 optlen = cp[IPOPT_OLEN];
1226 if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt) {
1227 code = &cp[IPOPT_OLEN] - (u_char *)ip;
1228 goto bad;
1229 }
1230 }
1231
1232 switch (opt) {
1233
1234 default:
1235 break;
1236
1237 /*
1238 * Source routing with record.
1239 * Find interface with current destination address.
1240 * If none on this machine then drop if strictly routed,
1241 * or do nothing if loosely routed.
1242 * Record interface address and bring up next address
1243 * component. If strictly routed make sure next
1244 * address is on directly accessible net.
1245 */
1246 case IPOPT_LSRR:
1247 case IPOPT_SSRR:
1248 if (atomic_load_int(&ip_dosourceroute) == 0) {
1249 type = ICMP_UNREACH;
1250 code = ICMP_UNREACH_SRCFAIL;
1251 goto bad;
1252 }
1253 if (optlen < IPOPT_OFFSET + sizeof(*cp)) {
1254 code = &cp[IPOPT_OLEN] - (u_char *)ip;
1255 goto bad;
1256 }
1257 if ((off = cp[IPOPT_OFFSET]) < IPOPT_MINOFF) {
1258 code = &cp[IPOPT_OFFSET] - (u_char *)ip;
1259 goto bad;
1260 }
1261 memset(&ipaddr, 0, sizeof(ipaddr));
1262 ipaddr.sin_family = AF_INET;
1263 ipaddr.sin_len = sizeof(ipaddr);
1264 ipaddr.sin_addr = ip->ip_dst;
1265 ia = ifatoia(ifa_ifwithaddr(sintosa(&ipaddr),
1266 m->m_pkthdr.ph_rtableid));
1267 if (ia == NULL) {
1268 if (opt == IPOPT_SSRR) {
1269 type = ICMP_UNREACH;
1270 code = ICMP_UNREACH_SRCFAIL;
1271 goto bad;
1272 }
1273 /*
1274 * Loose routing, and not at next destination
1275 * yet; nothing to do except forward.
1276 */
1277 break;
1278 }
1279 off--; /* 0 origin */
1280 if ((off + sizeof(struct in_addr)) > optlen) {
1281 /*
1282 * End of source route. Should be for us.
1283 */
1284 save_rte(m, cp, ip->ip_src);
1285 break;
1286 }
1287
1288 /*
1289 * locate outgoing interface
1290 */
1291 memset(&ipaddr, 0, sizeof(ipaddr));
1292 ipaddr.sin_family = AF_INET;
1293 ipaddr.sin_len = sizeof(ipaddr);
1294 memcpy(&ipaddr.sin_addr, cp + off,
1295 sizeof(ipaddr.sin_addr));
1296 /* keep packet in the virtual instance */
1297 rt = rtalloc(sintosa(&ipaddr), RT_RESOLVE, rtableid);
1298 if (!rtisvalid(rt) || ((opt == IPOPT_SSRR) &&
1299 ISSET(rt->rt_flags, RTF_GATEWAY))) {
1300 type = ICMP_UNREACH;
1301 code = ICMP_UNREACH_SRCFAIL;
1302 rtfree(rt);
1303 goto bad;
1304 }
1305 ia = ifatoia(rt->rt_ifa);
1306 memcpy(cp + off, &ia->ia_addr.sin_addr,
1307 sizeof(struct in_addr));
1308 rtfree(rt);
1309 cp[IPOPT_OFFSET] += sizeof(struct in_addr);
1310 ip->ip_dst = ipaddr.sin_addr;
1311 /*
1312 * Let ip_intr's mcast routing check handle mcast pkts
1313 */
1314 forward = !IN_MULTICAST(ip->ip_dst.s_addr);
1315 break;
1316
1317 case IPOPT_RR:
1318 if (optlen < IPOPT_OFFSET + sizeof(*cp)) {
1319 code = &cp[IPOPT_OLEN] - (u_char *)ip;
1320 goto bad;
1321 }
1322 if ((off = cp[IPOPT_OFFSET]) < IPOPT_MINOFF) {
1323 code = &cp[IPOPT_OFFSET] - (u_char *)ip;
1324 goto bad;
1325 }
1326
1327 /*
1328 * If no space remains, ignore.
1329 */
1330 off--; /* 0 origin */
1331 if ((off + sizeof(struct in_addr)) > optlen)
1332 break;
1333 memset(&ipaddr, 0, sizeof(ipaddr));
1334 ipaddr.sin_family = AF_INET;
1335 ipaddr.sin_len = sizeof(ipaddr);
1336 ipaddr.sin_addr = ip->ip_dst;
1337 /*
1338 * locate outgoing interface; if we're the destination,
1339 * use the incoming interface (should be same).
1340 * Again keep the packet inside the virtual instance.
1341 */
1342 rt = rtalloc(sintosa(&ipaddr), RT_RESOLVE, rtableid);
1343 if (!rtisvalid(rt)) {
1344 type = ICMP_UNREACH;
1345 code = ICMP_UNREACH_HOST;
1346 rtfree(rt);
1347 goto bad;
1348 }
1349 ia = ifatoia(rt->rt_ifa);
1350 memcpy(cp + off, &ia->ia_addr.sin_addr,
1351 sizeof(struct in_addr));
1352 rtfree(rt);
1353 cp[IPOPT_OFFSET] += sizeof(struct in_addr);
1354 break;
1355
1356 case IPOPT_TS:
1357 code = cp - (u_char *)ip;
1358 if (optlen < sizeof(struct ip_timestamp))
1359 goto bad;
1360 memcpy(&ipt, cp, sizeof(struct ip_timestamp));
1361 if (ipt.ipt_ptr < 5 || ipt.ipt_len < 5)
1362 goto bad;
1363 if (ipt.ipt_ptr - 1 + sizeof(u_int32_t) > ipt.ipt_len) {
1364 if (++ipt.ipt_oflw == 0)
1365 goto bad;
1366 break;
1367 }
1368 memcpy(&sin, cp + ipt.ipt_ptr - 1, sizeof sin);
1369 switch (ipt.ipt_flg) {
1370
1371 case IPOPT_TS_TSONLY:
1372 break;
1373
1374 case IPOPT_TS_TSANDADDR:
1375 if (ipt.ipt_ptr - 1 + sizeof(u_int32_t) +
1376 sizeof(struct in_addr) > ipt.ipt_len)
1377 goto bad;
1378 memset(&ipaddr, 0, sizeof(ipaddr));
1379 ipaddr.sin_family = AF_INET;
1380 ipaddr.sin_len = sizeof(ipaddr);
1381 ipaddr.sin_addr = dst;
1382 ia = ifatoia(ifaof_ifpforaddr(sintosa(&ipaddr),
1383 ifp));
1384 if (ia == NULL)
1385 continue;
1386 memcpy(&sin, &ia->ia_addr.sin_addr,
1387 sizeof(struct in_addr));
1388 ipt.ipt_ptr += sizeof(struct in_addr);
1389 break;
1390
1391 case IPOPT_TS_PRESPEC:
1392 if (ipt.ipt_ptr - 1 + sizeof(u_int32_t) +
1393 sizeof(struct in_addr) > ipt.ipt_len)
1394 goto bad;
1395 memset(&ipaddr, 0, sizeof(ipaddr));
1396 ipaddr.sin_family = AF_INET;
1397 ipaddr.sin_len = sizeof(ipaddr);
1398 ipaddr.sin_addr = sin;
1399 if (ifa_ifwithaddr(sintosa(&ipaddr),
1400 m->m_pkthdr.ph_rtableid) == NULL)
1401 continue;
1402 ipt.ipt_ptr += sizeof(struct in_addr);
1403 break;
1404
1405 default:
1406 /* XXX can't take &ipt->ipt_flg */
1407 code = (u_char *)&ipt.ipt_ptr -
1408 (u_char *)ip + 1;
1409 goto bad;
1410 }
1411 ntime = iptime();
1412 memcpy(cp + ipt.ipt_ptr - 1, &ntime, sizeof(u_int32_t));
1413 ipt.ipt_ptr += sizeof(u_int32_t);
1414 }
1415 }
1416 KERNEL_UNLOCK();
1417 if (forward && ISSET(flags, IP_FORWARDING)) {
1418 ip_forward(m, ifp, NULL, flags | IP_REDIRECT);
1419 return (1);
1420 }
1421 return (0);
1422bad:
1423 KERNEL_UNLOCK();
1424 icmp_error(m, type, code, 0, 0);
1425 ipstat_inc(ips_badoptions);
1426 return (1);
1427}
1428
1429/*
1430 * Save incoming source route for use in replies,
1431 * to be picked up later by ip_srcroute if the receiver is interested.
1432 */
1433void
1434save_rte(struct mbuf *m, u_char *option, struct in_addr dst)
1435{
1436 struct ip_srcrt *isr;
1437 struct m_tag *mtag;
1438 unsigned olen;
1439
1440 olen = option[IPOPT_OLEN];
1441 if (olen > sizeof(isr->isr_hdr) + sizeof(isr->isr_routes))
1442 return;
1443
1444 mtag = m_tag_get(PACKET_TAG_SRCROUTE, sizeof(*isr), M_NOWAIT);
1445 if (mtag == NULL) {
1446 ipstat_inc(ips_idropped);
1447 return;
1448 }
1449 isr = (struct ip_srcrt *)(mtag + 1);
1450
1451 memcpy(isr->isr_hdr, option, olen);
1452 isr->isr_nhops = (olen - IPOPT_OFFSET - 1) / sizeof(struct in_addr);
1453 isr->isr_dst = dst;
1454 m_tag_prepend(m, mtag);
1455}
1456
1457/*
1458 * Retrieve incoming source route for use in replies,
1459 * in the same form used by setsockopt.
1460 * The first hop is placed before the options, will be removed later.
1461 */
1462struct mbuf *
1463ip_srcroute(struct mbuf *m0)
1464{
1465 struct in_addr *p, *q;
1466 struct mbuf *m;
1467 struct ip_srcrt *isr;
1468 struct m_tag *mtag;
1469
1470 if (atomic_load_int(&ip_dosourceroute) == 0)
1471 return (NULL);
1472
1473 mtag = m_tag_find(m0, PACKET_TAG_SRCROUTE, NULL);
1474 if (mtag == NULL)
1475 return (NULL);
1476 isr = (struct ip_srcrt *)(mtag + 1);
1477
1478 if (isr->isr_nhops == 0)
1479 return (NULL);
1480 m = m_get(M_DONTWAIT, MT_SOOPTS);
1481 if (m == NULL) {
1482 ipstat_inc(ips_idropped);
1483 return (NULL);
1484 }
1485
1486#define OPTSIZ (sizeof(isr->isr_nop) + sizeof(isr->isr_hdr))
1487
1488 /* length is (nhops+1)*sizeof(addr) + sizeof(nop + header) */
1489 m->m_len = (isr->isr_nhops + 1) * sizeof(struct in_addr) + OPTSIZ;
1490
1491 /*
1492 * First save first hop for return route
1493 */
1494 p = &(isr->isr_routes[isr->isr_nhops - 1]);
1495 *(mtod(m, struct in_addr *)) = *p--;
1496
1497 /*
1498 * Copy option fields and padding (nop) to mbuf.
1499 */
1500 isr->isr_nop = IPOPT_NOP;
1501 isr->isr_hdr[IPOPT_OFFSET] = IPOPT_MINOFF;
1502 memcpy(mtod(m, caddr_t) + sizeof(struct in_addr), &isr->isr_nop,
1503 OPTSIZ);
1504 q = (struct in_addr *)(mtod(m, caddr_t) +
1505 sizeof(struct in_addr) + OPTSIZ);
1506#undef OPTSIZ
1507 /*
1508 * Record return path as an IP source route,
1509 * reversing the path (pointers are now aligned).
1510 */
1511 while (p >= isr->isr_routes) {
1512 *q++ = *p--;
1513 }
1514 /*
1515 * Last hop goes to final destination.
1516 */
1517 *q = isr->isr_dst;
1518 m_tag_delete(m0, (struct m_tag *)isr);
1519 return (m);
1520}
1521
1522/*
1523 * Strip out IP options, at higher level protocol in the kernel.
1524 */
1525void
1526ip_stripoptions(struct mbuf *m)
1527{
1528 int i;
1529 struct ip *ip = mtod(m, struct ip *);
1530 caddr_t opts;
1531 int olen;
1532
1533 olen = (ip->ip_hl<<2) - sizeof (struct ip);
1534 opts = (caddr_t)(ip + 1);
1535 i = m->m_len - (sizeof (struct ip) + olen);
1536 memmove(opts, opts + olen, i);
1537 m->m_len -= olen;
1538 if (m->m_flags & M_PKTHDR)
1539 m->m_pkthdr.len -= olen;
1540 ip->ip_hl = sizeof(struct ip) >> 2;
1541 ip->ip_len = htons(ntohs(ip->ip_len) - olen);
1542}
1543
1544const u_char inetctlerrmap[PRC_NCMDS] = {
1545 0, 0, 0, 0,
1546 0, EMSGSIZE, EHOSTDOWN, EHOSTUNREACH,
1547 EHOSTUNREACH, EHOSTUNREACH, ECONNREFUSED, ECONNREFUSED,
1548 EMSGSIZE, EHOSTUNREACH, 0, 0,
1549 0, 0, 0, 0,
1550 ENOPROTOOPT
1551};
1552
1553/*
1554 * Forward a packet. If some error occurs return the sender
1555 * an icmp packet. Note we can't always generate a meaningful
1556 * icmp message because icmp doesn't have a large enough repertoire
1557 * of codes and types.
1558 *
1559 * If not forwarding, just drop the packet. This could be confusing
1560 * if ip_forwarding was zero but some routing protocol was advancing
1561 * us as a gateway to somewhere. However, we must let the routing
1562 * protocol deal with that.
1563 *
1564 * The srcrt parameter indicates whether the packet is being forwarded
1565 * via a source route.
1566 */
1567void
1568ip_forward(struct mbuf *m, struct ifnet *ifp, struct route *ro, int flags)
1569{
1570 struct ip *ip = mtod(m, struct ip *);
1571 struct route iproute;
1572 struct rtentry *rt;
1573 u_int rtableid = m->m_pkthdr.ph_rtableid;
1574 u_int8_t loopcnt = m->m_pkthdr.ph_loopcnt;
1575 u_int icmp_len;
1576 char icmp_buf[68];
1577 CTASSERT(sizeof(icmp_buf) <= MHLEN);
1578 u_short mflags, pfflags;
1579 struct mbuf *mcopy;
1580 int error = 0, type = 0, code = 0, destmtu = 0;
1581 u_int32_t dest;
1582
1583 dest = 0;
1584 if (m->m_flags & (M_BCAST|M_MCAST) || in_canforward(ip->ip_dst) == 0) {
1585 ipstat_inc(ips_cantforward);
1586 m_freem(m);
1587 goto done;
1588 }
1589 if (ip->ip_ttl <= IPTTLDEC) {
1590 icmp_error(m, ICMP_TIMXCEED, ICMP_TIMXCEED_INTRANS, dest, 0);
1591 goto done;
1592 }
1593
1594 if (ro == NULL) {
1595 ro = &iproute;
1596 ro->ro_rt = NULL;
1597 }
1598 rt = route_mpath(ro, &ip->ip_dst, &ip->ip_src, rtableid);
1599 if (rt == NULL) {
1600 ipstat_inc(ips_noroute);
1601 icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, dest, 0);
1602 goto done;
1603 }
1604
1605 /*
1606 * Save at most 68 bytes of the packet in case we need to generate
1607 * an ICMP message to the src. The data is saved on the stack.
1608 * A new mbuf is only allocated when ICMP is actually created.
1609 */
1610 icmp_len = min(sizeof(icmp_buf), ntohs(ip->ip_len));
1611 mflags = m->m_flags;
1612 pfflags = m->m_pkthdr.pf.flags;
1613 m_copydata(m, 0, icmp_len, icmp_buf);
1614
1615 ip->ip_ttl -= IPTTLDEC;
1616
1617 /*
1618 * If forwarding packet using same interface that it came in on,
1619 * perhaps should send a redirect to sender to shortcut a hop.
1620 * Only send redirect if source is sending directly to us,
1621 * and if packet was not source routed (or has any options).
1622 * Also, don't send redirect if forwarding using a default route
1623 * or a route modified by a redirect.
1624 * Don't send redirect if we advertise destination's arp address
1625 * as ours (proxy arp).
1626 */
1627 if (rt->rt_ifidx == ifp->if_index &&
1628 !ISSET(rt->rt_flags, RTF_DYNAMIC|RTF_MODIFIED) &&
1629 satosin(rt_key(rt))->sin_addr.s_addr != INADDR_ANY &&
1630 !ISSET(flags, IP_REDIRECT) &&
1631#if NETHER > 0
1632 !arpproxy(satosin(rt_key(rt))->sin_addr, rtableid) &&
1633#endif
1634 atomic_load_int(&ip_sendredirects)) {
1635 if ((ip->ip_src.s_addr & ifatoia(rt->rt_ifa)->ia_netmask) ==
1636 ifatoia(rt->rt_ifa)->ia_net) {
1637 if (rt->rt_flags & RTF_GATEWAY)
1638 dest = satosin(rt->rt_gateway)->sin_addr.s_addr;
1639 else
1640 dest = ip->ip_dst.s_addr;
1641 /* Router requirements says to only send host redirects */
1642 type = ICMP_REDIRECT;
1643 code = ICMP_REDIRECT_HOST;
1644 }
1645 }
1646
1647 error = ip_output(m, NULL, ro, flags | IP_FORWARDING, NULL, NULL, 0);
1648 rt = ro->ro_rt;
1649 if (error)
1650 ipstat_inc(ips_cantforward);
1651 else {
1652 ipstat_inc(ips_forward);
1653 if (type)
1654 ipstat_inc(ips_redirectsent);
1655 else
1656 goto done;
1657 }
1658 switch (error) {
1659 case 0: /* forwarded, but need redirect */
1660 /* type, code set above */
1661 break;
1662
1663 case EMSGSIZE:
1664 type = ICMP_UNREACH;
1665 code = ICMP_UNREACH_NEEDFRAG;
1666 if (rt != NULL) {
1667 u_int rtmtu;
1668
1669 rtmtu = atomic_load_int(&rt->rt_mtu);
1670 if (rtmtu != 0) {
1671 destmtu = rtmtu;
1672 } else {
1673 struct ifnet *destifp;
1674
1675 destifp = if_get(rt->rt_ifidx);
1676 if (destifp != NULL)
1677 destmtu = destifp->if_mtu;
1678 if_put(destifp);
1679 }
1680 }
1681 ipstat_inc(ips_cantfrag);
1682 if (destmtu == 0)
1683 goto done;
1684 break;
1685
1686 case EACCES:
1687 /*
1688 * pf(4) blocked the packet. There is no need to send an ICMP
1689 * packet back since pf(4) takes care of it.
1690 */
1691 goto done;
1692
1693 case ENOBUFS:
1694 /*
1695 * a router should not generate ICMP_SOURCEQUENCH as
1696 * required in RFC1812 Requirements for IP Version 4 Routers.
1697 * source quench could be a big problem under DoS attacks,
1698 * or the underlying interface is rate-limited.
1699 */
1700 goto done;
1701
1702 case ENETUNREACH: /* shouldn't happen, checked above */
1703 case EHOSTUNREACH:
1704 case ENETDOWN:
1705 case EHOSTDOWN:
1706 default:
1707 type = ICMP_UNREACH;
1708 code = ICMP_UNREACH_HOST;
1709 break;
1710 }
1711
1712 mcopy = m_gethdr(M_DONTWAIT, MT_DATA);
1713 if (mcopy == NULL)
1714 goto done;
1715 mcopy->m_len = mcopy->m_pkthdr.len = icmp_len;
1716 mcopy->m_flags |= (mflags & M_COPYFLAGS);
1717 mcopy->m_pkthdr.ph_rtableid = rtableid;
1718 mcopy->m_pkthdr.ph_ifidx = ifp->if_index;
1719 mcopy->m_pkthdr.ph_loopcnt = loopcnt;
1720 mcopy->m_pkthdr.pf.flags |= (pfflags & PF_TAG_GENERATED);
1721 memcpy(mcopy->m_data, icmp_buf, icmp_len);
1722 icmp_error(mcopy, type, code, dest, destmtu);
1723
1724 done:
1725 if (ro == &iproute)
1726 rtfree(ro->ro_rt);
1727}
1728
1729#ifndef SMALL_KERNEL
1730
1731int
1732ip_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
1733 size_t newlen)
1734{
1735 int oldval, newval, error;
1736
1737 /* Almost all sysctl names at this level are terminal. */
1738 if (namelen != 1 && name[0] != IPCTL_IFQUEUE &&
1739 name[0] != IPCTL_ARPQUEUE)
1740 return (ENOTDIR);
1741
1742 switch (name[0]) {
1743 case IPCTL_SOURCEROUTE:
1744 return (sysctl_securelevel_int(oldp, oldlenp, newp, newlen,
1745 &ip_dosourceroute));
1746 case IPCTL_MTUDISC:
1747 oldval = newval = atomic_load_int(&ip_mtudisc);
1748 error = sysctl_int_bounded(oldp, oldlenp, newp, newlen,
1749 &newval, 0, 1);
1750 if (error == 0 && oldval != newval &&
1751 oldval == atomic_cas_uint(&ip_mtudisc, oldval, newval) &&
1752 newval == 0) {
1753 NET_LOCK();
1754 rt_timer_queue_flush(&ip_mtudisc_timeout_q);
1755 NET_UNLOCK();
1756 }
1757
1758 return (error);
1759 case IPCTL_MTUDISCTIMEOUT:
1760 oldval = newval = atomic_load_int(&ip_mtudisc_timeout);
1761 error = sysctl_int_bounded(oldp, oldlenp, newp, newlen,
1762 &newval, 0, INT_MAX);
1763 if (error == 0 && oldval != newval) {
1764 rw_enter_write(&sysctl_lock);
1765 atomic_store_int(&ip_mtudisc_timeout, newval);
1766 rt_timer_queue_change(&ip_mtudisc_timeout_q, newval);
1767 rw_exit_write(&sysctl_lock);
1768 }
1769
1770 return (error);
1771#ifdef IPSEC
1772 case IPCTL_ENCDEBUG:
1773 case IPCTL_IPSEC_STATS:
1774 case IPCTL_IPSEC_EXPIRE_ACQUIRE:
1775 case IPCTL_IPSEC_EMBRYONIC_SA_TIMEOUT:
1776 case IPCTL_IPSEC_REQUIRE_PFS:
1777 case IPCTL_IPSEC_SOFT_ALLOCATIONS:
1778 case IPCTL_IPSEC_ALLOCATIONS:
1779 case IPCTL_IPSEC_SOFT_BYTES:
1780 case IPCTL_IPSEC_BYTES:
1781 case IPCTL_IPSEC_TIMEOUT:
1782 case IPCTL_IPSEC_SOFT_TIMEOUT:
1783 case IPCTL_IPSEC_SOFT_FIRSTUSE:
1784 case IPCTL_IPSEC_FIRSTUSE:
1785 case IPCTL_IPSEC_ENC_ALGORITHM:
1786 case IPCTL_IPSEC_AUTH_ALGORITHM:
1787 case IPCTL_IPSEC_IPCOMP_ALGORITHM:
1788 return (ipsec_sysctl(name, namelen, oldp, oldlenp, newp,
1789 newlen));
1790#endif
1791 case IPCTL_IFQUEUE:
1792 return (sysctl_niq(name + 1, namelen - 1,
1793 oldp, oldlenp, newp, newlen, &ipintrq));
1794 case IPCTL_ARPQUEUE:
1795 return (sysctl_niq(name + 1, namelen - 1,
1796 oldp, oldlenp, newp, newlen, &arpinq));
1797 case IPCTL_ARPQUEUED:
1798 return (sysctl_rdint(oldp, oldlenp, newp,
1799 atomic_load_int(&la_hold_total)));
1800 case IPCTL_STATS:
1801 return (ip_sysctl_ipstat(oldp, oldlenp, newp));
1802#ifdef MROUTING
1803 case IPCTL_MRTSTATS:
1804 return (mrt_sysctl_mrtstat(oldp, oldlenp, newp));
1805 case IPCTL_MRTMFC:
1806 if (newp)
1807 return (EPERM);
1808 return (mrt_sysctl_mfc(oldp, oldlenp));
1809 case IPCTL_MRTVIF:
1810 if (newp)
1811 return (EPERM);
1812 return (mrt_sysctl_vif(oldp, oldlenp));
1813#else
1814 case IPCTL_MRTPROTO:
1815 case IPCTL_MRTSTATS:
1816 case IPCTL_MRTMFC:
1817 case IPCTL_MRTVIF:
1818 return (EOPNOTSUPP);
1819#endif
1820 case IPCTL_MULTIPATH:
1821 oldval = newval = atomic_load_int(&ipmultipath);
1822 error = sysctl_int_bounded(oldp, oldlenp, newp, newlen,
1823 &newval, 0, 1);
1824 if (error == 0 && oldval != newval) {
1825 atomic_store_int(&ipmultipath, newval);
1826 membar_producer();
1827 atomic_inc_long(&rtgeneration);
1828 }
1829
1830 return (error);
1831 default:
1832 return (sysctl_bounded_arr(ipctl_vars, nitems(ipctl_vars),
1833 name, namelen, oldp, oldlenp, newp, newlen));
1834 }
1835 /* NOTREACHED */
1836}
1837
1838int
1839ip_sysctl_ipstat(void *oldp, size_t *oldlenp, void *newp)
1840{
1841 uint64_t counters[ips_ncounters];
1842 struct ipstat ipstat;
1843 u_long *words = (u_long *)&ipstat;
1844 int i;
1845
1846 CTASSERT(sizeof(ipstat) == (nitems(counters) * sizeof(u_long)));
1847 memset(&ipstat, 0, sizeof ipstat);
1848 counters_read(ipcounters, counters, nitems(counters), NULL);
1849
1850 for (i = 0; i < nitems(counters); i++)
1851 words[i] = (u_long)counters[i];
1852
1853 return (sysctl_rdstruct(oldp, oldlenp, newp, &ipstat, sizeof(ipstat)));
1854}
1855#endif /* SMALL_KERNEL */
1856
1857void
1858ip_savecontrol(struct inpcb *inp, struct mbuf **mp, struct ip *ip,
1859 struct mbuf *m)
1860{
1861 if (inp->inp_socket->so_options & SO_TIMESTAMP) {
1862 struct timeval tv;
1863
1864 m_microtime(m, &tv);
1865 *mp = sbcreatecontrol((caddr_t) &tv, sizeof(tv),
1866 SCM_TIMESTAMP, SOL_SOCKET);
1867 if (*mp)
1868 mp = &(*mp)->m_next;
1869 }
1870
1871 if (inp->inp_flags & INP_RECVDSTADDR) {
1872 *mp = sbcreatecontrol((caddr_t) &ip->ip_dst,
1873 sizeof(struct in_addr), IP_RECVDSTADDR, IPPROTO_IP);
1874 if (*mp)
1875 mp = &(*mp)->m_next;
1876 }
1877#ifdef notyet
1878 /* this code is broken and will probably never be fixed. */
1879 /* options were tossed already */
1880 if (inp->inp_flags & INP_RECVOPTS) {
1881 *mp = sbcreatecontrol((caddr_t) opts_deleted_above,
1882 sizeof(struct in_addr), IP_RECVOPTS, IPPROTO_IP);
1883 if (*mp)
1884 mp = &(*mp)->m_next;
1885 }
1886 /* ip_srcroute doesn't do what we want here, need to fix */
1887 if (inp->inp_flags & INP_RECVRETOPTS) {
1888 *mp = sbcreatecontrol((caddr_t) ip_srcroute(m),
1889 sizeof(struct in_addr), IP_RECVRETOPTS, IPPROTO_IP);
1890 if (*mp)
1891 mp = &(*mp)->m_next;
1892 }
1893#endif
1894 if (inp->inp_flags & INP_RECVIF) {
1895 struct sockaddr_dl sdl;
1896 struct ifnet *ifp;
1897
1898 ifp = if_get(m->m_pkthdr.ph_ifidx);
1899 if (ifp == NULL || ifp->if_sadl == NULL) {
1900 memset(&sdl, 0, sizeof(sdl));
1901 sdl.sdl_len = offsetof(struct sockaddr_dl, sdl_data[0]);
1902 sdl.sdl_family = AF_LINK;
1903 sdl.sdl_index = ifp != NULL ? ifp->if_index : 0;
1904 sdl.sdl_nlen = sdl.sdl_alen = sdl.sdl_slen = 0;
1905 *mp = sbcreatecontrol((caddr_t) &sdl, sdl.sdl_len,
1906 IP_RECVIF, IPPROTO_IP);
1907 } else {
1908 *mp = sbcreatecontrol((caddr_t) ifp->if_sadl,
1909 ifp->if_sadl->sdl_len, IP_RECVIF, IPPROTO_IP);
1910 }
1911 if (*mp)
1912 mp = &(*mp)->m_next;
1913 if_put(ifp);
1914 }
1915 if (inp->inp_flags & INP_RECVTTL) {
1916 *mp = sbcreatecontrol((caddr_t) &ip->ip_ttl,
1917 sizeof(u_int8_t), IP_RECVTTL, IPPROTO_IP);
1918 if (*mp)
1919 mp = &(*mp)->m_next;
1920 }
1921 if (inp->inp_flags & INP_RECVRTABLE) {
1922 u_int rtableid = inp->inp_rtableid;
1923
1924#if NPF > 0
1925 if (m && m->m_pkthdr.pf.flags & PF_TAG_DIVERTED) {
1926 struct pf_divert *divert;
1927
1928 divert = pf_find_divert(m);
1929 KASSERT(divert != NULL);
1930 rtableid = divert->rdomain;
1931 }
1932#endif
1933
1934 *mp = sbcreatecontrol((caddr_t) &rtableid,
1935 sizeof(u_int), IP_RECVRTABLE, IPPROTO_IP);
1936 if (*mp)
1937 mp = &(*mp)->m_next;
1938 }
1939}
1940
1941void
1942ip_send_do_dispatch(void *xmq, int flags)
1943{
1944 struct mbuf_queue *mq = xmq;
1945 struct mbuf *m;
1946 struct mbuf_list ml;
1947 struct m_tag *mtag;
1948
1949 mq_delist(mq, &ml);
1950 if (ml_empty(&ml))
1951 return;
1952
1953 NET_LOCK_SHARED();
1954 while ((m = ml_dequeue(&ml)) != NULL) {
1955 u_int32_t ipsecflowinfo = 0;
1956
1957 if ((mtag = m_tag_find(m, PACKET_TAG_IPSEC_FLOWINFO, NULL))
1958 != NULL) {
1959 ipsecflowinfo = *(u_int32_t *)(mtag + 1);
1960 m_tag_delete(m, mtag);
1961 }
1962 ip_output(m, NULL, NULL, flags, NULL, NULL, ipsecflowinfo);
1963 }
1964 NET_UNLOCK_SHARED();
1965}
1966
1967void
1968ip_sendraw_dispatch(void *xmq)
1969{
1970 ip_send_do_dispatch(xmq, IP_RAWOUTPUT);
1971}
1972
1973void
1974ip_send_dispatch(void *xmq)
1975{
1976 ip_send_do_dispatch(xmq, 0);
1977}
1978
1979void
1980ip_send(struct mbuf *m)
1981{
1982 mq_enqueue(&ipsend_mq, m);
1983 task_add(net_tq(0), &ipsend_task);
1984}
1985
1986void
1987ip_send_raw(struct mbuf *m)
1988{
1989 mq_enqueue(&ipsendraw_mq, m);
1990 task_add(net_tq(0), &ipsendraw_task);
1991}