jcs's openbsd hax
openbsd
1/* $OpenBSD: bpf.c,v 1.235 2025/11/13 10:53:25 deraadt Exp $ */
2/* $NetBSD: bpf.c,v 1.33 1997/02/21 23:59:35 thorpej Exp $ */
3
4/*
5 * Copyright (c) 1990, 1991, 1993
6 * The Regents of the University of California. All rights reserved.
7 * Copyright (c) 2010, 2014 Henning Brauer <henning@openbsd.org>
8 *
9 * This code is derived from the Stanford/CMU enet packet filter,
10 * (net/enet.c) distributed as part of 4.3BSD, and code contributed
11 * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
12 * Berkeley Laboratory.
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions
16 * are met:
17 * 1. Redistributions of source code must retain the above copyright
18 * notice, this list of conditions and the following disclaimer.
19 * 2. Redistributions in binary form must reproduce the above copyright
20 * notice, this list of conditions and the following disclaimer in the
21 * documentation and/or other materials provided with the distribution.
22 * 3. Neither the name of the University nor the names of its contributors
23 * may be used to endorse or promote products derived from this software
24 * without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 *
38 * @(#)bpf.c 8.2 (Berkeley) 3/28/94
39 */
40
41#include <sys/param.h>
42#include <sys/systm.h>
43#include <sys/mbuf.h>
44#include <sys/timeout.h>
45#include <sys/signalvar.h>
46#include <sys/ioctl.h>
47#include <sys/conf.h>
48#include <sys/vnode.h>
49#include <sys/socket.h>
50#include <sys/sysctl.h>
51#include <sys/atomic.h>
52#include <sys/event.h>
53#include <sys/mutex.h>
54#include <sys/refcnt.h>
55#include <sys/smr.h>
56#include <sys/specdev.h>
57#include <sys/sigio.h>
58#include <sys/task.h>
59#include <sys/time.h>
60
61#include <net/if.h>
62#include <net/bpf.h>
63#include <net/bpfdesc.h>
64
65#include <netinet/in.h>
66#include <netinet/if_ether.h>
67
68#include "vlan.h"
69
70#define BPF_BUFSIZE 32768
71
72#define BPF_S_IDLE 0
73#define BPF_S_WAIT 1
74#define BPF_S_DONE 2
75
76#define PRINET 26 /* interruptible */
77
78/*
79 * Locks used to protect data:
80 * a atomic
81 */
82
83/*
84 * The default read buffer size is patchable.
85 */
86int bpf_bufsize = BPF_BUFSIZE; /* [a] */
87int bpf_maxbufsize = BPF_MAXBUFSIZE; /* [a] */
88
89/*
90 * bpf_iflist is the list of interfaces; each corresponds to an ifnet
91 * bpf_d_list is the list of descriptors
92 */
93TAILQ_HEAD(, bpf_if) bpf_iflist = TAILQ_HEAD_INITIALIZER(bpf_iflist);
94LIST_HEAD(, bpf_d) bpf_d_list = LIST_HEAD_INITIALIZER(bpf_d_list);
95
96int bpf_allocbufs(struct bpf_d *);
97void bpf_ifname(struct bpf_if*, struct ifreq *);
98void bpf_mcopy(const void *, void *, size_t);
99int bpf_movein(struct uio *, struct bpf_d *, struct mbuf **,
100 struct sockaddr *);
101int bpf_setif(struct bpf_d *, struct ifreq *);
102int bpfkqfilter(dev_t, struct knote *);
103void bpf_wakeup(struct bpf_d *);
104void bpf_wakeup_cb(void *);
105void bpf_wait_cb(void *);
106int _bpf_mtap(caddr_t, const struct mbuf *, const struct mbuf *, u_int);
107void bpf_catchpacket(struct bpf_d *, u_char *, size_t, size_t,
108 const struct bpf_hdr *);
109int bpf_getdltlist(struct bpf_d *, struct bpf_dltlist *);
110int bpf_setdlt(struct bpf_d *, u_int);
111
112void filt_bpfrdetach(struct knote *);
113int filt_bpfread(struct knote *, long);
114int filt_bpfreadmodify(struct kevent *, struct knote *);
115int filt_bpfreadprocess(struct knote *, struct kevent *);
116
117struct bpf_d *bpfilter_lookup(int);
118
119/*
120 * Called holding ``bd_mtx''.
121 */
122void bpf_attachd(struct bpf_d *, struct bpf_if *);
123void bpf_detachd(struct bpf_d *);
124void bpf_resetd(struct bpf_d *);
125
126void bpf_prog_smr(void *);
127void bpf_d_smr(void *);
128
129/*
130 * Reference count access to descriptor buffers
131 */
132void bpf_get(struct bpf_d *);
133void bpf_put(struct bpf_d *);
134
135int
136bpf_movein(struct uio *uio, struct bpf_d *d, struct mbuf **mp,
137 struct sockaddr *sockp)
138{
139 struct bpf_program_smr *bps;
140 struct bpf_insn *fcode = NULL;
141 struct mbuf *m;
142 struct m_tag *mtag;
143 int error;
144 u_int hlen, alen, mlen;
145 u_int len;
146 u_int linktype;
147 u_int slen;
148
149 /*
150 * Build a sockaddr based on the data link layer type.
151 * We do this at this level because the ethernet header
152 * is copied directly into the data field of the sockaddr.
153 * In the case of SLIP, there is no header and the packet
154 * is forwarded as is.
155 * Also, we are careful to leave room at the front of the mbuf
156 * for the link level header.
157 */
158 linktype = d->bd_bif->bif_dlt;
159 switch (linktype) {
160
161 case DLT_SLIP:
162 sockp->sa_family = AF_INET;
163 hlen = 0;
164 break;
165
166 case DLT_PPP:
167 sockp->sa_family = AF_UNSPEC;
168 hlen = 0;
169 break;
170
171 case DLT_EN10MB:
172 sockp->sa_family = AF_UNSPEC;
173 /* XXX Would MAXLINKHDR be better? */
174 hlen = ETHER_HDR_LEN;
175 break;
176
177 case DLT_IEEE802_11:
178 case DLT_IEEE802_11_RADIO:
179 sockp->sa_family = AF_UNSPEC;
180 hlen = 0;
181 break;
182
183 case DLT_RAW:
184 case DLT_NULL:
185 sockp->sa_family = AF_UNSPEC;
186 hlen = 0;
187 break;
188
189 case DLT_LOOP:
190 sockp->sa_family = AF_UNSPEC;
191 hlen = sizeof(u_int32_t);
192 break;
193
194 default:
195 return (EIO);
196 }
197
198 if (uio->uio_resid > MAXMCLBYTES)
199 return (EMSGSIZE);
200 len = uio->uio_resid;
201 if (len < hlen)
202 return (EINVAL);
203
204 /*
205 * Get the length of the payload so we can align it properly.
206 */
207 alen = len - hlen;
208
209 /*
210 * Allocate enough space for headers and the aligned payload.
211 */
212 mlen = max(max_linkhdr, hlen) + roundup(alen, sizeof(long));
213 if (mlen > MAXMCLBYTES)
214 return (EMSGSIZE);
215
216 MGETHDR(m, M_WAIT, MT_DATA);
217 if (mlen > MHLEN) {
218 MCLGETL(m, M_WAIT, mlen);
219 if ((m->m_flags & M_EXT) == 0) {
220 error = ENOBUFS;
221 goto bad;
222 }
223 }
224
225 m_align(m, alen); /* Align the payload. */
226 m->m_data -= hlen;
227
228 m->m_pkthdr.ph_ifidx = 0;
229 m->m_pkthdr.len = len;
230 m->m_len = len;
231
232 error = uiomove(mtod(m, caddr_t), len, uio);
233 if (error)
234 goto bad;
235
236 smr_read_enter();
237 bps = SMR_PTR_GET(&d->bd_wfilter);
238 if (bps != NULL)
239 fcode = bps->bps_bf.bf_insns;
240 slen = bpf_filter(fcode, mtod(m, u_char *), len, len);
241 smr_read_leave();
242
243 if (slen < len) {
244 error = EPERM;
245 goto bad;
246 }
247
248 /*
249 * Make room for link header, and copy it to sockaddr
250 */
251 if (hlen != 0) {
252 if (linktype == DLT_LOOP) {
253 u_int32_t af;
254
255 /* the link header indicates the address family */
256 KASSERT(hlen == sizeof(u_int32_t));
257 memcpy(&af, m->m_data, hlen);
258 sockp->sa_family = ntohl(af);
259 } else
260 memcpy(sockp->sa_data, m->m_data, hlen);
261
262 m->m_pkthdr.len -= hlen;
263 m->m_len -= hlen;
264 m->m_data += hlen;
265 }
266
267 /*
268 * Prepend the data link type as a mbuf tag
269 */
270 mtag = m_tag_get(PACKET_TAG_DLT, sizeof(u_int), M_WAIT);
271 *(u_int *)(mtag + 1) = linktype;
272 m_tag_prepend(m, mtag);
273
274 *mp = m;
275 return (0);
276 bad:
277 m_freem(m);
278 return (error);
279}
280
281/*
282 * Attach file to the bpf interface, i.e. make d listen on bp.
283 */
284void
285bpf_attachd(struct bpf_d *d, struct bpf_if *bp)
286{
287 MUTEX_ASSERT_LOCKED(&d->bd_mtx);
288
289 /*
290 * Point d at bp, and add d to the interface's list of listeners.
291 * Finally, point the driver's bpf cookie at the interface so
292 * it will divert packets to bpf.
293 */
294
295 d->bd_bif = bp;
296
297 KERNEL_ASSERT_LOCKED();
298 SMR_SLIST_INSERT_HEAD_LOCKED(&bp->bif_dlist, d, bd_next);
299
300 *bp->bif_driverp = bp;
301}
302
303/*
304 * Detach a file from its interface.
305 */
306void
307bpf_detachd(struct bpf_d *d)
308{
309 struct bpf_if *bp;
310
311 MUTEX_ASSERT_LOCKED(&d->bd_mtx);
312
313 bp = d->bd_bif;
314 /* Not attached. */
315 if (bp == NULL)
316 return;
317
318 /* Remove ``d'' from the interface's descriptor list. */
319 KERNEL_ASSERT_LOCKED();
320 SMR_SLIST_REMOVE_LOCKED(&bp->bif_dlist, d, bpf_d, bd_next);
321
322 if (SMR_SLIST_EMPTY_LOCKED(&bp->bif_dlist)) {
323 /*
324 * Let the driver know that there are no more listeners.
325 */
326 *bp->bif_driverp = NULL;
327 }
328
329 d->bd_bif = NULL;
330
331 /*
332 * Check if this descriptor had requested promiscuous mode.
333 * If so, turn it off.
334 */
335 if (d->bd_promisc) {
336 int error;
337
338 KASSERT(bp->bif_ifp != NULL);
339
340 d->bd_promisc = 0;
341
342 bpf_get(d);
343 mtx_leave(&d->bd_mtx);
344 NET_LOCK();
345 error = ifpromisc(bp->bif_ifp, 0);
346 NET_UNLOCK();
347 mtx_enter(&d->bd_mtx);
348 bpf_put(d);
349
350 if (error && !(error == EINVAL || error == ENODEV ||
351 error == ENXIO))
352 /*
353 * Something is really wrong if we were able to put
354 * the driver into promiscuous mode, but can't
355 * take it out.
356 */
357 panic("bpf: ifpromisc failed");
358 }
359}
360
361void
362bpfilterattach(int n)
363{
364}
365
366/*
367 * Open ethernet device. Returns ENXIO for illegal minor device number,
368 * EBUSY if file is open by another process.
369 */
370int
371bpfopen(dev_t dev, int flag, int mode, struct proc *p)
372{
373 struct bpf_d *bd;
374 int unit = minor(dev);
375
376 if (unit & ((1 << CLONE_SHIFT) - 1))
377 return (ENXIO);
378
379 KASSERT(bpfilter_lookup(unit) == NULL);
380
381 /* create on demand */
382 if ((bd = malloc(sizeof(*bd), M_DEVBUF, M_NOWAIT|M_ZERO)) == NULL)
383 return (EBUSY);
384
385 /* Mark "free" and do most initialization. */
386 bd->bd_unit = unit;
387 bd->bd_bufsize = atomic_load_int(&bpf_bufsize);
388 bd->bd_sig = SIGIO;
389 mtx_init(&bd->bd_mtx, IPL_NET);
390 task_set(&bd->bd_wake_task, bpf_wakeup_cb, bd);
391 timeout_set(&bd->bd_wait_tmo, bpf_wait_cb, bd);
392 smr_init(&bd->bd_smr);
393 sigio_init(&bd->bd_sigio);
394 klist_init_mutex(&bd->bd_klist, &bd->bd_mtx);
395
396 bd->bd_rtout = 0; /* no timeout by default */
397 bd->bd_wtout = INFSLP; /* wait for the buffer to fill by default */
398
399 refcnt_init(&bd->bd_refcnt);
400 LIST_INSERT_HEAD(&bpf_d_list, bd, bd_list);
401
402 return (0);
403}
404
405/*
406 * Close the descriptor by detaching it from its interface,
407 * deallocating its buffers, and marking it free.
408 */
409int
410bpfclose(dev_t dev, int flag, int mode, struct proc *p)
411{
412 struct bpf_d *d;
413
414 d = bpfilter_lookup(minor(dev));
415 mtx_enter(&d->bd_mtx);
416 bpf_detachd(d);
417 bpf_wakeup(d);
418 LIST_REMOVE(d, bd_list);
419 mtx_leave(&d->bd_mtx);
420 bpf_put(d);
421
422 return (0);
423}
424
425/*
426 * Rotate the packet buffers in descriptor d. Move the store buffer
427 * into the hold slot, and the free buffer into the store slot.
428 * Zero the length of the new store buffer.
429 */
430#define ROTATE_BUFFERS(d) \
431 KASSERT(d->bd_in_uiomove == 0); \
432 MUTEX_ASSERT_LOCKED(&d->bd_mtx); \
433 (d)->bd_hbuf = (d)->bd_sbuf; \
434 (d)->bd_hlen = (d)->bd_slen; \
435 (d)->bd_sbuf = (d)->bd_fbuf; \
436 (d)->bd_state = BPF_S_IDLE; \
437 (d)->bd_slen = 0; \
438 (d)->bd_fbuf = NULL;
439
440/*
441 * bpfread - read next chunk of packets from buffers
442 */
443int
444bpfread(dev_t dev, struct uio *uio, int ioflag)
445{
446 uint64_t end, now;
447 struct bpf_d *d;
448 caddr_t hbuf;
449 int error, hlen;
450
451 KERNEL_ASSERT_LOCKED();
452
453 d = bpfilter_lookup(minor(dev));
454 if (d->bd_bif == NULL)
455 return (ENXIO);
456
457 bpf_get(d);
458 mtx_enter(&d->bd_mtx);
459
460 /*
461 * Restrict application to use a buffer the same size as
462 * as kernel buffers.
463 */
464 if (uio->uio_resid != d->bd_bufsize) {
465 error = EINVAL;
466 goto out;
467 }
468
469 /*
470 * If there's a timeout, mark when the read should end.
471 */
472 if (d->bd_rtout != 0) {
473 now = nsecuptime();
474 end = now + d->bd_rtout;
475 if (end < now)
476 end = UINT64_MAX;
477 }
478
479 /*
480 * If the hold buffer is empty, then do a timed sleep, which
481 * ends when the timeout expires or when enough packets
482 * have arrived to fill the store buffer.
483 */
484 while (d->bd_hbuf == NULL) {
485 if (d->bd_bif == NULL) {
486 /* interface is gone */
487 if (d->bd_slen == 0) {
488 error = EIO;
489 goto out;
490 }
491 ROTATE_BUFFERS(d);
492 break;
493 }
494 if (d->bd_state == BPF_S_DONE) {
495 /*
496 * A packet(s) either arrived since the previous
497 * read or arrived while we were asleep.
498 * Rotate the buffers and return what's here.
499 */
500 ROTATE_BUFFERS(d);
501 break;
502 }
503 if (ISSET(ioflag, IO_NDELAY)) {
504 /* User requested non-blocking I/O */
505 error = EWOULDBLOCK;
506 } else if (d->bd_rtout == 0) {
507 /* No read timeout set. */
508 d->bd_nreaders++;
509 error = msleep_nsec(d, &d->bd_mtx, PRINET|PCATCH,
510 "bpf", INFSLP);
511 d->bd_nreaders--;
512 } else if ((now = nsecuptime()) < end) {
513 /* Read timeout has not expired yet. */
514 d->bd_nreaders++;
515 error = msleep_nsec(d, &d->bd_mtx, PRINET|PCATCH,
516 "bpf", end - now);
517 d->bd_nreaders--;
518 } else {
519 /* Read timeout has expired. */
520 error = EWOULDBLOCK;
521 }
522 if (error == EINTR || error == ERESTART)
523 goto out;
524 if (error == EWOULDBLOCK) {
525 /*
526 * On a timeout, return what's in the buffer,
527 * which may be nothing. If there is something
528 * in the store buffer, we can rotate the buffers.
529 */
530 if (d->bd_hbuf != NULL)
531 /*
532 * We filled up the buffer in between
533 * getting the timeout and arriving
534 * here, so we don't need to rotate.
535 */
536 break;
537
538 if (d->bd_slen == 0) {
539 error = 0;
540 goto out;
541 }
542 ROTATE_BUFFERS(d);
543 break;
544 }
545 }
546 /*
547 * At this point, we know we have something in the hold slot.
548 */
549 hbuf = d->bd_hbuf;
550 hlen = d->bd_hlen;
551 d->bd_hbuf = NULL;
552 d->bd_hlen = 0;
553 d->bd_fbuf = NULL;
554 d->bd_in_uiomove = 1;
555
556 /*
557 * Move data from hold buffer into user space.
558 * We know the entire buffer is transferred since
559 * we checked above that the read buffer is bpf_bufsize bytes.
560 */
561 mtx_leave(&d->bd_mtx);
562 error = uiomove(hbuf, hlen, uio);
563 mtx_enter(&d->bd_mtx);
564
565 /* Ensure that bpf_resetd() or ROTATE_BUFFERS() haven't been called. */
566 KASSERT(d->bd_fbuf == NULL);
567 KASSERT(d->bd_hbuf == NULL);
568 d->bd_fbuf = hbuf;
569 d->bd_in_uiomove = 0;
570out:
571 mtx_leave(&d->bd_mtx);
572 bpf_put(d);
573
574 return (error);
575}
576
577/*
578 * If there are processes sleeping on this descriptor, wake them up.
579 */
580void
581bpf_wakeup(struct bpf_d *d)
582{
583 MUTEX_ASSERT_LOCKED(&d->bd_mtx);
584
585 if (d->bd_nreaders)
586 wakeup(d);
587
588 knote_locked(&d->bd_klist, 0);
589
590 /*
591 * As long as pgsigio() needs to be protected
592 * by the KERNEL_LOCK() we have to delay the wakeup to
593 * another context to keep the hot path KERNEL_LOCK()-free.
594 */
595 if (d->bd_async && d->bd_sig) {
596 bpf_get(d);
597 if (!task_add(systq, &d->bd_wake_task))
598 bpf_put(d);
599 }
600}
601
602void
603bpf_wakeup_cb(void *xd)
604{
605 struct bpf_d *d = xd;
606
607 if (d->bd_async && d->bd_sig)
608 pgsigio(&d->bd_sigio, d->bd_sig, 0);
609
610 bpf_put(d);
611}
612
613void
614bpf_wait_cb(void *xd)
615{
616 struct bpf_d *d = xd;
617
618 mtx_enter(&d->bd_mtx);
619 if (d->bd_state == BPF_S_WAIT) {
620 d->bd_state = BPF_S_DONE;
621 bpf_wakeup(d);
622 }
623 mtx_leave(&d->bd_mtx);
624
625 bpf_put(d);
626}
627
628int
629bpfwrite(dev_t dev, struct uio *uio, int ioflag)
630{
631 struct bpf_d *d;
632 struct ifnet *ifp;
633 struct mbuf *m;
634 int error;
635 struct sockaddr_storage dst;
636
637 KERNEL_ASSERT_LOCKED();
638
639 d = bpfilter_lookup(minor(dev));
640 if (d->bd_bif == NULL)
641 return (ENXIO);
642
643 bpf_get(d);
644 ifp = d->bd_bif->bif_ifp;
645
646 if (ifp == NULL || (ifp->if_flags & IFF_UP) == 0) {
647 error = ENETDOWN;
648 goto out;
649 }
650
651 if (uio->uio_resid == 0) {
652 error = 0;
653 goto out;
654 }
655
656 error = bpf_movein(uio, d, &m, sstosa(&dst));
657 if (error)
658 goto out;
659
660 if (m->m_pkthdr.len > ifp->if_mtu) {
661 m_freem(m);
662 error = EMSGSIZE;
663 goto out;
664 }
665
666 m->m_pkthdr.ph_rtableid = ifp->if_rdomain;
667 m->m_pkthdr.pf.prio = ifp->if_llprio;
668
669 if (d->bd_hdrcmplt && dst.ss_family == AF_UNSPEC)
670 dst.ss_family = pseudo_AF_HDRCMPLT;
671
672 NET_LOCK();
673 error = ifp->if_output(ifp, m, sstosa(&dst), NULL);
674 NET_UNLOCK();
675
676out:
677 bpf_put(d);
678 return (error);
679}
680
681/*
682 * Reset a descriptor by flushing its packet buffer and clearing the
683 * receive and drop counts.
684 */
685void
686bpf_resetd(struct bpf_d *d)
687{
688 MUTEX_ASSERT_LOCKED(&d->bd_mtx);
689 KASSERT(d->bd_in_uiomove == 0);
690
691 if (timeout_del(&d->bd_wait_tmo))
692 bpf_put(d);
693
694 if (d->bd_hbuf != NULL) {
695 /* Free the hold buffer. */
696 d->bd_fbuf = d->bd_hbuf;
697 d->bd_hbuf = NULL;
698 }
699 d->bd_state = BPF_S_IDLE;
700 d->bd_slen = 0;
701 d->bd_hlen = 0;
702 d->bd_rcount = 0;
703 d->bd_dcount = 0;
704}
705
706static int
707bpf_set_wtout(struct bpf_d *d, uint64_t wtout)
708{
709 mtx_enter(&d->bd_mtx);
710 d->bd_wtout = wtout;
711 mtx_leave(&d->bd_mtx);
712
713 return (0);
714}
715
716static int
717bpf_set_wtimeout(struct bpf_d *d, const struct timeval *tv)
718{
719 uint64_t nsec;
720
721 if (tv->tv_sec < 0 || !timerisvalid(tv))
722 return (EINVAL);
723
724 nsec = TIMEVAL_TO_NSEC(tv);
725 if (nsec > SEC_TO_NSEC(300))
726 return (EINVAL);
727 if (nsec > MAXTSLP)
728 return (EOVERFLOW);
729
730 return (bpf_set_wtout(d, nsec));
731}
732
733static int
734bpf_get_wtimeout(struct bpf_d *d, struct timeval *tv)
735{
736 uint64_t nsec;
737
738 mtx_enter(&d->bd_mtx);
739 nsec = d->bd_wtout;
740 mtx_leave(&d->bd_mtx);
741
742 if (nsec == INFSLP)
743 return (ENXIO);
744
745 memset(tv, 0, sizeof(*tv));
746 NSEC_TO_TIMEVAL(nsec, tv);
747
748 return (0);
749}
750
751/*
752 * FIONREAD Check for read packet available.
753 * BIOCGBLEN Get buffer len [for read()].
754 * BIOCSETF Set read filter.
755 * BIOCSETFNR Set read filter without resetting descriptor.
756 * BIOCFLUSH Flush read packet buffer.
757 * BIOCPROMISC Put interface into promiscuous mode.
758 * BIOCGDLTLIST Get supported link layer types.
759 * BIOCGDLT Get link layer type.
760 * BIOCSDLT Set link layer type.
761 * BIOCGETIF Get interface name.
762 * BIOCSETIF Set interface.
763 * BIOCSRTIMEOUT Set read timeout.
764 * BIOCGRTIMEOUT Get read timeout.
765 * BIOCSWTIMEOUT Set wait timeout.
766 * BIOCGWTIMEOUT Get wait timeout.
767 * BIOCDWTIMEOUT Del wait timeout.
768 * BIOCGSTATS Get packet stats.
769 * BIOCIMMEDIATE Set immediate mode.
770 * BIOCVERSION Get filter language version.
771 * BIOCGHDRCMPLT Get "header already complete" flag
772 * BIOCSHDRCMPLT Set "header already complete" flag
773 */
774int
775bpfioctl(dev_t dev, u_long cmd, caddr_t addr, int flag, struct proc *p)
776{
777 struct bpf_d *d;
778 int error = 0;
779
780 d = bpfilter_lookup(minor(dev));
781 if (d->bd_locked) {
782 /* list of allowed ioctls when locked */
783 switch (cmd) {
784 case BIOCGBLEN:
785 case BIOCFLUSH:
786 case BIOCGDLT:
787 case BIOCGDLTLIST:
788 case BIOCGETIF:
789 case BIOCGRTIMEOUT:
790 case BIOCGWTIMEOUT:
791 case BIOCGSTATS:
792 case BIOCVERSION:
793 case BIOCGRSIG:
794 case BIOCGHDRCMPLT:
795 case FIONREAD:
796 case BIOCLOCK:
797 case BIOCSRTIMEOUT:
798 case BIOCSWTIMEOUT:
799 case BIOCDWTIMEOUT:
800 case BIOCIMMEDIATE:
801 case TIOCGPGRP:
802 case BIOCGDIRFILT:
803 break;
804 default:
805 return (EPERM);
806 }
807 }
808
809 bpf_get(d);
810
811 switch (cmd) {
812 default:
813 error = EINVAL;
814 break;
815
816 /*
817 * Check for read packet available.
818 */
819 case FIONREAD:
820 {
821 int n;
822
823 mtx_enter(&d->bd_mtx);
824 n = d->bd_slen;
825 if (d->bd_hbuf != NULL)
826 n += d->bd_hlen;
827 mtx_leave(&d->bd_mtx);
828
829 *(int *)addr = n;
830 break;
831 }
832
833 /*
834 * Get buffer len [for read()].
835 */
836 case BIOCGBLEN:
837 *(u_int *)addr = d->bd_bufsize;
838 break;
839
840 /*
841 * Set buffer length.
842 */
843 case BIOCSBLEN:
844 if (d->bd_bif != NULL)
845 error = EINVAL;
846 else {
847 u_int size = *(u_int *)addr;
848 int bpf_maxbufsize_local =
849 atomic_load_int(&bpf_maxbufsize);
850
851 if (size > bpf_maxbufsize_local)
852 *(u_int *)addr = size = bpf_maxbufsize_local;
853 else if (size < BPF_MINBUFSIZE)
854 *(u_int *)addr = size = BPF_MINBUFSIZE;
855 mtx_enter(&d->bd_mtx);
856 d->bd_bufsize = size;
857 mtx_leave(&d->bd_mtx);
858 }
859 break;
860
861 /*
862 * Set link layer read/write filter.
863 */
864 case BIOCSETF:
865 case BIOCSETFNR:
866 case BIOCSETWF:
867 error = bpf_setf(d, (struct bpf_program *)addr, cmd);
868 break;
869
870 /*
871 * Flush read packet buffer.
872 */
873 case BIOCFLUSH:
874 mtx_enter(&d->bd_mtx);
875 bpf_resetd(d);
876 mtx_leave(&d->bd_mtx);
877 break;
878
879 /*
880 * Put interface into promiscuous mode.
881 */
882 case BIOCPROMISC:
883 if (d->bd_bif == NULL) {
884 /*
885 * No interface attached yet.
886 */
887 error = EINVAL;
888 } else if (d->bd_bif->bif_ifp != NULL) {
889 if (d->bd_promisc == 0) {
890 MUTEX_ASSERT_UNLOCKED(&d->bd_mtx);
891 NET_LOCK();
892 error = ifpromisc(d->bd_bif->bif_ifp, 1);
893 NET_UNLOCK();
894 if (error == 0)
895 d->bd_promisc = 1;
896 }
897 }
898 break;
899
900 /*
901 * Get a list of supported device parameters.
902 */
903 case BIOCGDLTLIST:
904 if (d->bd_bif == NULL)
905 error = EINVAL;
906 else
907 error = bpf_getdltlist(d, (struct bpf_dltlist *)addr);
908 break;
909
910 /*
911 * Get device parameters.
912 */
913 case BIOCGDLT:
914 if (d->bd_bif == NULL)
915 error = EINVAL;
916 else
917 *(u_int *)addr = d->bd_bif->bif_dlt;
918 break;
919
920 /*
921 * Set device parameters.
922 */
923 case BIOCSDLT:
924 if (d->bd_bif == NULL)
925 error = EINVAL;
926 else {
927 mtx_enter(&d->bd_mtx);
928 error = bpf_setdlt(d, *(u_int *)addr);
929 mtx_leave(&d->bd_mtx);
930 }
931 break;
932
933 /*
934 * Set interface name.
935 */
936 case BIOCGETIF:
937 if (d->bd_bif == NULL)
938 error = EINVAL;
939 else
940 bpf_ifname(d->bd_bif, (struct ifreq *)addr);
941 break;
942
943 /*
944 * Set interface.
945 */
946 case BIOCSETIF:
947 error = bpf_setif(d, (struct ifreq *)addr);
948 break;
949
950 /*
951 * Set read timeout.
952 */
953 case BIOCSRTIMEOUT:
954 {
955 struct timeval *tv = (struct timeval *)addr;
956 uint64_t rtout;
957
958 if (tv->tv_sec < 0 || !timerisvalid(tv)) {
959 error = EINVAL;
960 break;
961 }
962 rtout = TIMEVAL_TO_NSEC(tv);
963 if (rtout > MAXTSLP) {
964 error = EOVERFLOW;
965 break;
966 }
967 mtx_enter(&d->bd_mtx);
968 d->bd_rtout = rtout;
969 mtx_leave(&d->bd_mtx);
970 break;
971 }
972
973 /*
974 * Get read timeout.
975 */
976 case BIOCGRTIMEOUT:
977 {
978 struct timeval *tv = (struct timeval *)addr;
979
980 memset(tv, 0, sizeof(*tv));
981 mtx_enter(&d->bd_mtx);
982 NSEC_TO_TIMEVAL(d->bd_rtout, tv);
983 mtx_leave(&d->bd_mtx);
984 break;
985 }
986
987 /*
988 * Get packet stats.
989 */
990 case BIOCGSTATS:
991 {
992 struct bpf_stat *bs = (struct bpf_stat *)addr;
993
994 bs->bs_recv = d->bd_rcount;
995 bs->bs_drop = d->bd_dcount;
996 break;
997 }
998
999 /*
1000 * Set immediate mode.
1001 */
1002 case BIOCIMMEDIATE:
1003 error = bpf_set_wtout(d, *(int *)addr ? 0 : INFSLP);
1004 break;
1005
1006 /*
1007 * Wait timeout.
1008 */
1009 case BIOCSWTIMEOUT:
1010 error = bpf_set_wtimeout(d, (const struct timeval *)addr);
1011 break;
1012 case BIOCGWTIMEOUT:
1013 error = bpf_get_wtimeout(d, (struct timeval *)addr);
1014 break;
1015 case BIOCDWTIMEOUT:
1016 error = bpf_set_wtout(d, INFSLP);
1017 break;
1018
1019 case BIOCVERSION:
1020 {
1021 struct bpf_version *bv = (struct bpf_version *)addr;
1022
1023 bv->bv_major = BPF_MAJOR_VERSION;
1024 bv->bv_minor = BPF_MINOR_VERSION;
1025 break;
1026 }
1027
1028 case BIOCGHDRCMPLT: /* get "header already complete" flag */
1029 *(u_int *)addr = d->bd_hdrcmplt;
1030 break;
1031
1032 case BIOCSHDRCMPLT: /* set "header already complete" flag */
1033 d->bd_hdrcmplt = *(u_int *)addr ? 1 : 0;
1034 break;
1035
1036 case BIOCLOCK: /* set "locked" flag (no reset) */
1037 d->bd_locked = 1;
1038 break;
1039
1040 case BIOCGFILDROP: /* get "filter-drop" flag */
1041 *(u_int *)addr = d->bd_fildrop;
1042 break;
1043
1044 case BIOCSFILDROP: { /* set "filter-drop" flag */
1045 unsigned int fildrop = *(u_int *)addr;
1046 switch (fildrop) {
1047 case BPF_FILDROP_PASS:
1048 case BPF_FILDROP_CAPTURE:
1049 case BPF_FILDROP_DROP:
1050 d->bd_fildrop = fildrop;
1051 break;
1052 default:
1053 error = EINVAL;
1054 break;
1055 }
1056 break;
1057 }
1058
1059 case BIOCGDIRFILT: /* get direction filter */
1060 *(u_int *)addr = d->bd_dirfilt;
1061 break;
1062
1063 case BIOCSDIRFILT: /* set direction filter */
1064 d->bd_dirfilt = (*(u_int *)addr) &
1065 (BPF_DIRECTION_IN|BPF_DIRECTION_OUT);
1066 break;
1067
1068 case FIOASYNC: /* Send signal on receive packets */
1069 d->bd_async = *(int *)addr;
1070 break;
1071
1072 case FIOSETOWN: /* Process or group to send signals to */
1073 case TIOCSPGRP:
1074 error = sigio_setown(&d->bd_sigio, cmd, addr);
1075 break;
1076
1077 case FIOGETOWN:
1078 case TIOCGPGRP:
1079 sigio_getown(&d->bd_sigio, cmd, addr);
1080 break;
1081
1082 case BIOCSRSIG: /* Set receive signal */
1083 {
1084 u_int sig;
1085
1086 sig = *(u_int *)addr;
1087
1088 if (sig >= NSIG)
1089 error = EINVAL;
1090 else
1091 d->bd_sig = sig;
1092 break;
1093 }
1094 case BIOCGRSIG:
1095 *(u_int *)addr = d->bd_sig;
1096 break;
1097 }
1098
1099 bpf_put(d);
1100 return (error);
1101}
1102
1103/*
1104 * Set d's packet filter program to fp. If this file already has a filter,
1105 * free it and replace it. Returns EINVAL for bogus requests.
1106 */
1107int
1108bpf_setf(struct bpf_d *d, struct bpf_program *fp, u_long cmd)
1109{
1110 struct bpf_program_smr *bps, *old_bps;
1111 struct bpf_insn *fcode;
1112 u_int flen, size;
1113
1114 KERNEL_ASSERT_LOCKED();
1115
1116 if (fp->bf_insns == 0) {
1117 if (fp->bf_len != 0)
1118 return (EINVAL);
1119 bps = NULL;
1120 } else {
1121 flen = fp->bf_len;
1122 if (flen > BPF_MAXINSNS)
1123 return (EINVAL);
1124
1125 fcode = mallocarray(flen, sizeof(*fp->bf_insns), M_DEVBUF,
1126 M_WAITOK | M_CANFAIL);
1127 if (fcode == NULL)
1128 return (ENOMEM);
1129
1130 size = flen * sizeof(*fp->bf_insns);
1131 if (copyin(fp->bf_insns, fcode, size) != 0 ||
1132 bpf_validate(fcode, (int)flen) == 0) {
1133 free(fcode, M_DEVBUF, size);
1134 return (EINVAL);
1135 }
1136
1137 bps = malloc(sizeof(*bps), M_DEVBUF, M_WAITOK);
1138 smr_init(&bps->bps_smr);
1139 bps->bps_bf.bf_len = flen;
1140 bps->bps_bf.bf_insns = fcode;
1141 }
1142
1143 if (cmd != BIOCSETWF) {
1144 old_bps = SMR_PTR_GET_LOCKED(&d->bd_rfilter);
1145 SMR_PTR_SET_LOCKED(&d->bd_rfilter, bps);
1146 } else {
1147 old_bps = SMR_PTR_GET_LOCKED(&d->bd_wfilter);
1148 SMR_PTR_SET_LOCKED(&d->bd_wfilter, bps);
1149 }
1150
1151 if (cmd == BIOCSETF) {
1152 mtx_enter(&d->bd_mtx);
1153 bpf_resetd(d);
1154 mtx_leave(&d->bd_mtx);
1155 }
1156
1157 if (old_bps != NULL)
1158 smr_call(&old_bps->bps_smr, bpf_prog_smr, old_bps);
1159
1160 return (0);
1161}
1162
1163/*
1164 * Detach a file from its current interface (if attached at all) and attach
1165 * to the interface indicated by the name stored in ifr.
1166 * Return an errno or 0.
1167 */
1168int
1169bpf_setif(struct bpf_d *d, struct ifreq *ifr)
1170{
1171 struct bpf_if *bp;
1172 int error = 0;
1173
1174 /*
1175 * Look through attached interfaces for the named one.
1176 */
1177 TAILQ_FOREACH(bp, &bpf_iflist, bif_next) {
1178 if (strcmp(bp->bif_name, ifr->ifr_name) == 0)
1179 break;
1180 }
1181
1182 /* Not found. */
1183 if (bp == NULL)
1184 return (ENXIO);
1185
1186 /*
1187 * Allocate the packet buffers if we need to.
1188 * If we're already attached to requested interface,
1189 * just flush the buffer.
1190 */
1191 mtx_enter(&d->bd_mtx);
1192 if (d->bd_sbuf == NULL) {
1193 if ((error = bpf_allocbufs(d)))
1194 goto out;
1195 }
1196 if (bp != d->bd_bif) {
1197 /*
1198 * Detach if attached to something else.
1199 */
1200 bpf_detachd(d);
1201 bpf_attachd(d, bp);
1202 }
1203 bpf_resetd(d);
1204out:
1205 mtx_leave(&d->bd_mtx);
1206 return (error);
1207}
1208
1209/*
1210 * Copy the interface name to the ifreq.
1211 */
1212void
1213bpf_ifname(struct bpf_if *bif, struct ifreq *ifr)
1214{
1215 bcopy(bif->bif_name, ifr->ifr_name, sizeof(ifr->ifr_name));
1216}
1217
1218const struct filterops bpfread_filtops = {
1219 .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE,
1220 .f_attach = NULL,
1221 .f_detach = filt_bpfrdetach,
1222 .f_event = filt_bpfread,
1223 .f_modify = filt_bpfreadmodify,
1224 .f_process = filt_bpfreadprocess,
1225};
1226
1227int
1228bpfkqfilter(dev_t dev, struct knote *kn)
1229{
1230 struct bpf_d *d;
1231 struct klist *klist;
1232
1233 KERNEL_ASSERT_LOCKED();
1234
1235 d = bpfilter_lookup(minor(dev));
1236 if (d == NULL)
1237 return (ENXIO);
1238
1239 switch (kn->kn_filter) {
1240 case EVFILT_READ:
1241 klist = &d->bd_klist;
1242 kn->kn_fop = &bpfread_filtops;
1243 break;
1244 default:
1245 return (EINVAL);
1246 }
1247
1248 bpf_get(d);
1249 kn->kn_hook = d;
1250 klist_insert(klist, kn);
1251
1252 return (0);
1253}
1254
1255void
1256filt_bpfrdetach(struct knote *kn)
1257{
1258 struct bpf_d *d = kn->kn_hook;
1259
1260 klist_remove(&d->bd_klist, kn);
1261 bpf_put(d);
1262}
1263
1264int
1265filt_bpfread(struct knote *kn, long hint)
1266{
1267 struct bpf_d *d = kn->kn_hook;
1268
1269 MUTEX_ASSERT_LOCKED(&d->bd_mtx);
1270
1271 kn->kn_data = d->bd_hlen;
1272 if (d->bd_state == BPF_S_DONE)
1273 kn->kn_data += d->bd_slen;
1274
1275 return (kn->kn_data > 0);
1276}
1277
1278int
1279filt_bpfreadmodify(struct kevent *kev, struct knote *kn)
1280{
1281 struct bpf_d *d = kn->kn_hook;
1282 int active;
1283
1284 mtx_enter(&d->bd_mtx);
1285 active = knote_modify_fn(kev, kn, filt_bpfread);
1286 mtx_leave(&d->bd_mtx);
1287
1288 return (active);
1289}
1290
1291int
1292filt_bpfreadprocess(struct knote *kn, struct kevent *kev)
1293{
1294 struct bpf_d *d = kn->kn_hook;
1295 int active;
1296
1297 mtx_enter(&d->bd_mtx);
1298 active = knote_process_fn(kn, kev, filt_bpfread);
1299 mtx_leave(&d->bd_mtx);
1300
1301 return (active);
1302}
1303
1304/*
1305 * Copy data from an mbuf chain into a buffer. This code is derived
1306 * from m_copydata in sys/uipc_mbuf.c.
1307 */
1308void
1309bpf_mcopy(const void *src_arg, void *dst_arg, size_t len)
1310{
1311 const struct mbuf *m;
1312 u_int count;
1313 u_char *dst;
1314
1315 m = src_arg;
1316 dst = dst_arg;
1317 while (len > 0) {
1318 if (m == NULL)
1319 panic("bpf_mcopy");
1320 count = min(m->m_len, len);
1321 bcopy(mtod(m, caddr_t), (caddr_t)dst, count);
1322 m = m->m_next;
1323 dst += count;
1324 len -= count;
1325 }
1326}
1327
1328int
1329bpf_mtap(caddr_t arg, const struct mbuf *m, u_int direction)
1330{
1331 return _bpf_mtap(arg, m, m, direction);
1332}
1333
1334int
1335_bpf_mtap(caddr_t arg, const struct mbuf *mp, const struct mbuf *m,
1336 u_int direction)
1337{
1338 struct bpf_if *bp = (struct bpf_if *)arg;
1339 struct bpf_d *d;
1340 size_t pktlen, slen;
1341 const struct mbuf *m0;
1342 struct bpf_hdr tbh;
1343 int gothdr = 0;
1344 int drop = 0;
1345
1346 if (m == NULL)
1347 return (0);
1348
1349 if (bp == NULL)
1350 return (0);
1351
1352 pktlen = 0;
1353 for (m0 = m; m0 != NULL; m0 = m0->m_next)
1354 pktlen += m0->m_len;
1355
1356 smr_read_enter();
1357 SMR_SLIST_FOREACH(d, &bp->bif_dlist, bd_next) {
1358 struct bpf_program_smr *bps;
1359 struct bpf_insn *fcode = NULL;
1360
1361 atomic_inc_long(&d->bd_rcount);
1362
1363 if (ISSET(d->bd_dirfilt, direction))
1364 continue;
1365
1366 bps = SMR_PTR_GET(&d->bd_rfilter);
1367 if (bps != NULL)
1368 fcode = bps->bps_bf.bf_insns;
1369 slen = bpf_mfilter(fcode, m, pktlen);
1370
1371 if (slen == 0)
1372 continue;
1373 if (d->bd_fildrop != BPF_FILDROP_PASS)
1374 drop = 1;
1375 if (d->bd_fildrop != BPF_FILDROP_DROP) {
1376 if (!gothdr) {
1377 struct timeval tv;
1378 memset(&tbh, 0, sizeof(tbh));
1379
1380 if (ISSET(mp->m_flags, M_PKTHDR)) {
1381 tbh.bh_ifidx = mp->m_pkthdr.ph_ifidx;
1382 tbh.bh_flowid = mp->m_pkthdr.ph_flowid;
1383 tbh.bh_flags = mp->m_pkthdr.pf.prio;
1384 if (ISSET(mp->m_pkthdr.csum_flags,
1385 M_FLOWID))
1386 SET(tbh.bh_flags, BPF_F_FLOWID);
1387 tbh.bh_csumflags =
1388 mp->m_pkthdr.csum_flags;
1389
1390 m_microtime(mp, &tv);
1391 } else
1392 microtime(&tv);
1393
1394 tbh.bh_tstamp.tv_sec = tv.tv_sec;
1395 tbh.bh_tstamp.tv_usec = tv.tv_usec;
1396 SET(tbh.bh_flags, direction << BPF_F_DIR_SHIFT);
1397
1398 gothdr = 1;
1399 }
1400
1401 mtx_enter(&d->bd_mtx);
1402 bpf_catchpacket(d, (u_char *)m, pktlen, slen, &tbh);
1403 mtx_leave(&d->bd_mtx);
1404 }
1405 }
1406 smr_read_leave();
1407
1408 return (drop);
1409}
1410
1411/*
1412 * Incoming linkage from device drivers, where a data buffer should be
1413 * prepended by an arbitrary header. In this situation we already have a
1414 * way of representing a chain of memory buffers, ie, mbufs, so reuse
1415 * the existing functionality by attaching the buffers to mbufs.
1416 *
1417 * Con up a minimal mbuf chain to pacify bpf by allocating (only) a
1418 * struct m_hdr each for the header and data on the stack.
1419 */
1420int
1421bpf_tap_hdr(caddr_t arg, const void *hdr, unsigned int hdrlen,
1422 const void *buf, unsigned int buflen, u_int direction)
1423{
1424 struct m_hdr mh, md;
1425 struct mbuf *m0 = NULL;
1426 struct mbuf **mp = &m0;
1427
1428 if (hdr != NULL) {
1429 mh.mh_flags = 0;
1430 mh.mh_next = NULL;
1431 mh.mh_len = hdrlen;
1432 mh.mh_data = (void *)hdr;
1433
1434 *mp = (struct mbuf *)&mh;
1435 mp = &mh.mh_next;
1436 }
1437
1438 if (buf != NULL) {
1439 md.mh_flags = 0;
1440 md.mh_next = NULL;
1441 md.mh_len = buflen;
1442 md.mh_data = (void *)buf;
1443
1444 *mp = (struct mbuf *)&md;
1445 }
1446
1447 return bpf_mtap(arg, m0, direction);
1448}
1449
1450/*
1451 * Incoming linkage from device drivers, where we have a mbuf chain
1452 * but need to prepend some arbitrary header from a linear buffer.
1453 *
1454 * Con up a minimal dummy header to pacify bpf. Allocate (only) a
1455 * struct m_hdr on the stack. This is safe as bpf only reads from the
1456 * fields in this header that we initialize, and will not try to free
1457 * it or keep a pointer to it.
1458 */
1459int
1460bpf_mtap_hdr(caddr_t arg, const void *data, u_int dlen, const struct mbuf *m,
1461 u_int direction)
1462{
1463 struct m_hdr mh;
1464 const struct mbuf *m0;
1465
1466 if (dlen > 0) {
1467 mh.mh_flags = 0;
1468 mh.mh_next = (struct mbuf *)m;
1469 mh.mh_len = dlen;
1470 mh.mh_data = (void *)data;
1471 m0 = (struct mbuf *)&mh;
1472 } else
1473 m0 = m;
1474
1475 return _bpf_mtap(arg, m, m0, direction);
1476}
1477
1478/*
1479 * Incoming linkage from device drivers, where we have a mbuf chain
1480 * but need to prepend the address family.
1481 *
1482 * Con up a minimal dummy header to pacify bpf. We allocate (only) a
1483 * struct m_hdr on the stack. This is safe as bpf only reads from the
1484 * fields in this header that we initialize, and will not try to free
1485 * it or keep a pointer to it.
1486 */
1487int
1488bpf_mtap_af(caddr_t arg, u_int32_t af, const struct mbuf *m, u_int direction)
1489{
1490 u_int32_t afh;
1491
1492 afh = htonl(af);
1493
1494 return bpf_mtap_hdr(arg, &afh, sizeof(afh), m, direction);
1495}
1496
1497/*
1498 * Incoming linkage from device drivers, where we have a mbuf chain
1499 * but need to prepend a VLAN encapsulation header.
1500 *
1501 * Con up a minimal dummy header to pacify bpf. Allocate (only) a
1502 * struct m_hdr on the stack. This is safe as bpf only reads from the
1503 * fields in this header that we initialize, and will not try to free
1504 * it or keep a pointer to it.
1505 */
1506int
1507bpf_mtap_ether(caddr_t arg, const struct mbuf *m, u_int direction)
1508{
1509#if NVLAN > 0
1510 struct ether_vlan_header evh;
1511 struct m_hdr mh, md;
1512
1513 if ((m->m_flags & M_VLANTAG) == 0)
1514#endif
1515 {
1516 return _bpf_mtap(arg, m, m, direction);
1517 }
1518
1519#if NVLAN > 0
1520 KASSERT(m->m_len >= ETHER_HDR_LEN);
1521
1522 memcpy(&evh, mtod(m, char *), ETHER_HDR_LEN);
1523 evh.evl_proto = evh.evl_encap_proto;
1524 evh.evl_encap_proto = htons(ETHERTYPE_VLAN);
1525 evh.evl_tag = htons(m->m_pkthdr.ether_vtag);
1526
1527 mh.mh_flags = 0;
1528 mh.mh_data = (caddr_t)&evh;
1529 mh.mh_len = sizeof(evh);
1530 mh.mh_next = (struct mbuf *)&md;
1531
1532 md.mh_flags = 0;
1533 md.mh_data = m->m_data + ETHER_HDR_LEN;
1534 md.mh_len = m->m_len - ETHER_HDR_LEN;
1535 md.mh_next = m->m_next;
1536
1537 return _bpf_mtap(arg, m, (struct mbuf *)&mh, direction);
1538#endif
1539}
1540
1541/*
1542 * Move the packet data from interface memory (pkt) into the
1543 * store buffer. Wake up listeners if needed.
1544 * "copy" is the routine called to do the actual data
1545 * transfer. bcopy is passed in to copy contiguous chunks, while
1546 * bpf_mcopy is passed in to copy mbuf chains. In the latter case,
1547 * pkt is really an mbuf.
1548 */
1549void
1550bpf_catchpacket(struct bpf_d *d, u_char *pkt, size_t pktlen, size_t snaplen,
1551 const struct bpf_hdr *tbh)
1552{
1553 struct bpf_hdr *bh;
1554 int totlen, curlen;
1555 int hdrlen, do_wakeup = 0;
1556
1557 MUTEX_ASSERT_LOCKED(&d->bd_mtx);
1558 if (d->bd_bif == NULL)
1559 return;
1560
1561 hdrlen = d->bd_bif->bif_hdrlen;
1562
1563 /*
1564 * Figure out how many bytes to move. If the packet is
1565 * greater or equal to the snapshot length, transfer that
1566 * much. Otherwise, transfer the whole packet (unless
1567 * we hit the buffer size limit).
1568 */
1569 totlen = hdrlen + min(snaplen, pktlen);
1570 if (totlen > d->bd_bufsize)
1571 totlen = d->bd_bufsize;
1572
1573 /*
1574 * Round up the end of the previous packet to the next longword.
1575 */
1576 curlen = BPF_WORDALIGN(d->bd_slen);
1577 if (curlen + totlen > d->bd_bufsize) {
1578 /*
1579 * This packet will overflow the storage buffer.
1580 * Rotate the buffers if we can, then wakeup any
1581 * pending reads.
1582 */
1583 if (d->bd_fbuf == NULL) {
1584 /*
1585 * We haven't completed the previous read yet,
1586 * so drop the packet.
1587 */
1588 ++d->bd_dcount;
1589 return;
1590 }
1591
1592 /* cancel pending wtime */
1593 if (timeout_del(&d->bd_wait_tmo))
1594 bpf_put(d);
1595
1596 ROTATE_BUFFERS(d);
1597 do_wakeup = 1;
1598 curlen = 0;
1599 }
1600
1601 /*
1602 * Append the bpf header.
1603 */
1604 bh = (struct bpf_hdr *)(d->bd_sbuf + curlen);
1605 *bh = *tbh;
1606 bh->bh_datalen = pktlen;
1607 bh->bh_hdrlen = hdrlen;
1608 bh->bh_caplen = totlen - hdrlen;
1609
1610 /*
1611 * Copy the packet data into the store buffer and update its length.
1612 */
1613 bpf_mcopy(pkt, (u_char *)bh + hdrlen, bh->bh_caplen);
1614 d->bd_slen = curlen + totlen;
1615
1616 switch (d->bd_wtout) {
1617 case 0:
1618 /*
1619 * Immediate mode is set. A packet arrived so any
1620 * reads should be woken up.
1621 */
1622 if (d->bd_state == BPF_S_IDLE)
1623 d->bd_state = BPF_S_DONE;
1624 do_wakeup = 1;
1625 break;
1626 case INFSLP:
1627 break;
1628 default:
1629 if (d->bd_state == BPF_S_IDLE) {
1630 d->bd_state = BPF_S_WAIT;
1631
1632 bpf_get(d);
1633 if (!timeout_add_nsec(&d->bd_wait_tmo, d->bd_wtout))
1634 bpf_put(d);
1635 }
1636 break;
1637 }
1638
1639 if (do_wakeup)
1640 bpf_wakeup(d);
1641}
1642
1643/*
1644 * Initialize all nonzero fields of a descriptor.
1645 */
1646int
1647bpf_allocbufs(struct bpf_d *d)
1648{
1649 MUTEX_ASSERT_LOCKED(&d->bd_mtx);
1650
1651 d->bd_fbuf = malloc(d->bd_bufsize, M_DEVBUF, M_NOWAIT);
1652 if (d->bd_fbuf == NULL)
1653 return (ENOMEM);
1654
1655 d->bd_sbuf = malloc(d->bd_bufsize, M_DEVBUF, M_NOWAIT);
1656 if (d->bd_sbuf == NULL) {
1657 free(d->bd_fbuf, M_DEVBUF, d->bd_bufsize);
1658 d->bd_fbuf = NULL;
1659 return (ENOMEM);
1660 }
1661
1662 d->bd_slen = 0;
1663 d->bd_hlen = 0;
1664
1665 return (0);
1666}
1667
1668void
1669bpf_prog_smr(void *bps_arg)
1670{
1671 struct bpf_program_smr *bps = bps_arg;
1672
1673 free(bps->bps_bf.bf_insns, M_DEVBUF,
1674 bps->bps_bf.bf_len * sizeof(struct bpf_insn));
1675 free(bps, M_DEVBUF, sizeof(struct bpf_program_smr));
1676}
1677
1678void
1679bpf_d_smr(void *smr)
1680{
1681 struct bpf_d *bd = smr;
1682
1683 sigio_free(&bd->bd_sigio);
1684 free(bd->bd_sbuf, M_DEVBUF, bd->bd_bufsize);
1685 free(bd->bd_hbuf, M_DEVBUF, bd->bd_bufsize);
1686 free(bd->bd_fbuf, M_DEVBUF, bd->bd_bufsize);
1687
1688 if (bd->bd_rfilter != NULL)
1689 bpf_prog_smr(bd->bd_rfilter);
1690 if (bd->bd_wfilter != NULL)
1691 bpf_prog_smr(bd->bd_wfilter);
1692
1693 klist_free(&bd->bd_klist);
1694 free(bd, M_DEVBUF, sizeof(*bd));
1695}
1696
1697void
1698bpf_get(struct bpf_d *bd)
1699{
1700 refcnt_take(&bd->bd_refcnt);
1701}
1702
1703/*
1704 * Free buffers currently in use by a descriptor
1705 * when the reference count drops to zero.
1706 */
1707void
1708bpf_put(struct bpf_d *bd)
1709{
1710 if (refcnt_rele(&bd->bd_refcnt) == 0)
1711 return;
1712
1713 smr_call(&bd->bd_smr, bpf_d_smr, bd);
1714}
1715
1716void *
1717bpfsattach(caddr_t *bpfp, const char *name, u_int dlt, u_int hdrlen)
1718{
1719 struct bpf_if *bp;
1720
1721 if ((bp = malloc(sizeof(*bp), M_DEVBUF, M_NOWAIT)) == NULL)
1722 panic("bpfattach");
1723 SMR_SLIST_INIT(&bp->bif_dlist);
1724 bp->bif_driverp = (struct bpf_if **)bpfp;
1725 bp->bif_name = name;
1726 bp->bif_ifp = NULL;
1727 bp->bif_dlt = dlt;
1728
1729 TAILQ_INSERT_TAIL(&bpf_iflist, bp, bif_next);
1730
1731 *bp->bif_driverp = NULL;
1732
1733 /*
1734 * Compute the length of the bpf header. This is not necessarily
1735 * equal to SIZEOF_BPF_HDR because we want to insert spacing such
1736 * that the network layer header begins on a longword boundary (for
1737 * performance reasons and to alleviate alignment restrictions).
1738 */
1739 bp->bif_hdrlen = BPF_WORDALIGN(hdrlen + SIZEOF_BPF_HDR) - hdrlen;
1740
1741 return (bp);
1742}
1743
1744void *
1745bpfxattach(caddr_t *driverp, const char *name, struct ifnet *ifp,
1746 u_int dlt, u_int hdrlen)
1747{
1748 struct bpf_if *bp;
1749
1750 bp = bpfsattach(driverp, name, dlt, hdrlen);
1751 bp->bif_ifp = ifp;
1752
1753 return (bp);
1754}
1755
1756void
1757bpfattach(caddr_t *driverp, struct ifnet *ifp, u_int dlt, u_int hdrlen)
1758{
1759 bpfxattach(driverp, ifp->if_xname, ifp, dlt, hdrlen);
1760}
1761
1762/* Detach an interface from its attached bpf device. */
1763void
1764bpfdetach(struct ifnet *ifp)
1765{
1766 struct bpf_if *bp, *nbp;
1767
1768 KERNEL_ASSERT_LOCKED();
1769
1770 TAILQ_FOREACH_SAFE(bp, &bpf_iflist, bif_next, nbp) {
1771 if (bp->bif_ifp == ifp)
1772 bpfsdetach(bp);
1773 }
1774 ifp->if_bpf = NULL;
1775}
1776
1777void
1778bpfsdetach(void *p)
1779{
1780 struct bpf_if *bp = p;
1781 struct bpf_d *bd;
1782 int maj;
1783
1784 KERNEL_ASSERT_LOCKED();
1785
1786 /* Locate the major number. */
1787 for (maj = 0; maj < nchrdev; maj++)
1788 if (cdevsw[maj].d_open == bpfopen)
1789 break;
1790
1791 while ((bd = SMR_SLIST_FIRST_LOCKED(&bp->bif_dlist))) {
1792 bpf_get(bd);
1793 vdevgone(maj, bd->bd_unit, bd->bd_unit, VCHR);
1794 klist_invalidate(&bd->bd_klist);
1795 bpf_put(bd);
1796 }
1797
1798 TAILQ_REMOVE(&bpf_iflist, bp, bif_next);
1799
1800 free(bp, M_DEVBUF, sizeof(*bp));
1801}
1802
1803#ifndef SMALL_KERNEL
1804int
1805bpf_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
1806 size_t newlen)
1807{
1808 if (namelen != 1)
1809 return (ENOTDIR);
1810
1811 switch (name[0]) {
1812 case NET_BPF_BUFSIZE:
1813 return sysctl_int_bounded(oldp, oldlenp, newp, newlen,
1814 &bpf_bufsize, BPF_MINBUFSIZE,
1815 atomic_load_int(&bpf_maxbufsize));
1816 case NET_BPF_MAXBUFSIZE:
1817 return sysctl_int_bounded(oldp, oldlenp, newp, newlen,
1818 &bpf_maxbufsize, BPF_MINBUFSIZE, MALLOC_MAX);
1819 default:
1820 return (EOPNOTSUPP);
1821 }
1822
1823 /* NOTREACHED */
1824}
1825#endif /* SMALL_KERNEL */
1826
1827struct bpf_d *
1828bpfilter_lookup(int unit)
1829{
1830 struct bpf_d *bd;
1831
1832 KERNEL_ASSERT_LOCKED();
1833
1834 LIST_FOREACH(bd, &bpf_d_list, bd_list)
1835 if (bd->bd_unit == unit)
1836 return (bd);
1837 return (NULL);
1838}
1839
1840/*
1841 * Get a list of available data link type of the interface.
1842 */
1843int
1844bpf_getdltlist(struct bpf_d *d, struct bpf_dltlist *bfl)
1845{
1846 int n, error;
1847 struct bpf_if *bp;
1848 const char *name;
1849
1850 name = d->bd_bif->bif_name;
1851 n = 0;
1852 error = 0;
1853 TAILQ_FOREACH(bp, &bpf_iflist, bif_next) {
1854 if (strcmp(name, bp->bif_name) != 0)
1855 continue;
1856 if (bfl->bfl_list != NULL) {
1857 if (n >= bfl->bfl_len)
1858 return (ENOMEM);
1859 error = copyout(&bp->bif_dlt,
1860 bfl->bfl_list + n, sizeof(u_int));
1861 if (error)
1862 break;
1863 }
1864 n++;
1865 }
1866
1867 bfl->bfl_len = n;
1868 return (error);
1869}
1870
1871/*
1872 * Set the data link type of a BPF instance.
1873 */
1874int
1875bpf_setdlt(struct bpf_d *d, u_int dlt)
1876{
1877 const char *name;
1878 struct bpf_if *bp;
1879
1880 MUTEX_ASSERT_LOCKED(&d->bd_mtx);
1881 if (d->bd_bif->bif_dlt == dlt)
1882 return (0);
1883 name = d->bd_bif->bif_name;
1884 TAILQ_FOREACH(bp, &bpf_iflist, bif_next) {
1885 if (strcmp(name, bp->bif_name) != 0)
1886 continue;
1887 if (bp->bif_dlt == dlt)
1888 break;
1889 }
1890 if (bp == NULL)
1891 return (EINVAL);
1892 bpf_detachd(d);
1893 bpf_attachd(d, bp);
1894 bpf_resetd(d);
1895 return (0);
1896}
1897
1898u_int32_t bpf_mbuf_ldw(const void *, u_int32_t, int *);
1899u_int32_t bpf_mbuf_ldh(const void *, u_int32_t, int *);
1900u_int32_t bpf_mbuf_ldb(const void *, u_int32_t, int *);
1901
1902int bpf_mbuf_copy(const struct mbuf *, u_int32_t,
1903 void *, u_int32_t);
1904
1905const struct bpf_ops bpf_mbuf_ops = {
1906 bpf_mbuf_ldw,
1907 bpf_mbuf_ldh,
1908 bpf_mbuf_ldb,
1909};
1910
1911int
1912bpf_mbuf_copy(const struct mbuf *m, u_int32_t off, void *buf, u_int32_t len)
1913{
1914 u_int8_t *cp = buf;
1915 u_int32_t count;
1916
1917 while (off >= m->m_len) {
1918 off -= m->m_len;
1919
1920 m = m->m_next;
1921 if (m == NULL)
1922 return (-1);
1923 }
1924
1925 for (;;) {
1926 count = min(m->m_len - off, len);
1927
1928 memcpy(cp, m->m_data + off, count);
1929 len -= count;
1930
1931 if (len == 0)
1932 return (0);
1933
1934 m = m->m_next;
1935 if (m == NULL)
1936 break;
1937
1938 cp += count;
1939 off = 0;
1940 }
1941
1942 return (-1);
1943}
1944
1945u_int32_t
1946bpf_mbuf_ldw(const void *m0, u_int32_t k, int *err)
1947{
1948 u_int32_t v;
1949
1950 if (bpf_mbuf_copy(m0, k, &v, sizeof(v)) != 0) {
1951 *err = 1;
1952 return (0);
1953 }
1954
1955 *err = 0;
1956 return ntohl(v);
1957}
1958
1959u_int32_t
1960bpf_mbuf_ldh(const void *m0, u_int32_t k, int *err)
1961{
1962 u_int16_t v;
1963
1964 if (bpf_mbuf_copy(m0, k, &v, sizeof(v)) != 0) {
1965 *err = 1;
1966 return (0);
1967 }
1968
1969 *err = 0;
1970 return ntohs(v);
1971}
1972
1973u_int32_t
1974bpf_mbuf_ldb(const void *m0, u_int32_t k, int *err)
1975{
1976 const struct mbuf *m = m0;
1977 u_int8_t v;
1978
1979 while (k >= m->m_len) {
1980 k -= m->m_len;
1981
1982 m = m->m_next;
1983 if (m == NULL) {
1984 *err = 1;
1985 return (0);
1986 }
1987 }
1988 v = m->m_data[k];
1989
1990 *err = 0;
1991 return v;
1992}
1993
1994u_int
1995bpf_mfilter(const struct bpf_insn *pc, const struct mbuf *m, u_int wirelen)
1996{
1997 return _bpf_filter(pc, &bpf_mbuf_ops, m, wirelen);
1998}