jcs's openbsd hax
openbsd
at jcs 1998 lines 42 kB view raw
1/* $OpenBSD: bpf.c,v 1.235 2025/11/13 10:53:25 deraadt Exp $ */ 2/* $NetBSD: bpf.c,v 1.33 1997/02/21 23:59:35 thorpej Exp $ */ 3 4/* 5 * Copyright (c) 1990, 1991, 1993 6 * The Regents of the University of California. All rights reserved. 7 * Copyright (c) 2010, 2014 Henning Brauer <henning@openbsd.org> 8 * 9 * This code is derived from the Stanford/CMU enet packet filter, 10 * (net/enet.c) distributed as part of 4.3BSD, and code contributed 11 * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence 12 * Berkeley Laboratory. 13 * 14 * Redistribution and use in source and binary forms, with or without 15 * modification, are permitted provided that the following conditions 16 * are met: 17 * 1. Redistributions of source code must retain the above copyright 18 * notice, this list of conditions and the following disclaimer. 19 * 2. Redistributions in binary form must reproduce the above copyright 20 * notice, this list of conditions and the following disclaimer in the 21 * documentation and/or other materials provided with the distribution. 22 * 3. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)bpf.c 8.2 (Berkeley) 3/28/94 39 */ 40 41#include <sys/param.h> 42#include <sys/systm.h> 43#include <sys/mbuf.h> 44#include <sys/timeout.h> 45#include <sys/signalvar.h> 46#include <sys/ioctl.h> 47#include <sys/conf.h> 48#include <sys/vnode.h> 49#include <sys/socket.h> 50#include <sys/sysctl.h> 51#include <sys/atomic.h> 52#include <sys/event.h> 53#include <sys/mutex.h> 54#include <sys/refcnt.h> 55#include <sys/smr.h> 56#include <sys/specdev.h> 57#include <sys/sigio.h> 58#include <sys/task.h> 59#include <sys/time.h> 60 61#include <net/if.h> 62#include <net/bpf.h> 63#include <net/bpfdesc.h> 64 65#include <netinet/in.h> 66#include <netinet/if_ether.h> 67 68#include "vlan.h" 69 70#define BPF_BUFSIZE 32768 71 72#define BPF_S_IDLE 0 73#define BPF_S_WAIT 1 74#define BPF_S_DONE 2 75 76#define PRINET 26 /* interruptible */ 77 78/* 79 * Locks used to protect data: 80 * a atomic 81 */ 82 83/* 84 * The default read buffer size is patchable. 85 */ 86int bpf_bufsize = BPF_BUFSIZE; /* [a] */ 87int bpf_maxbufsize = BPF_MAXBUFSIZE; /* [a] */ 88 89/* 90 * bpf_iflist is the list of interfaces; each corresponds to an ifnet 91 * bpf_d_list is the list of descriptors 92 */ 93TAILQ_HEAD(, bpf_if) bpf_iflist = TAILQ_HEAD_INITIALIZER(bpf_iflist); 94LIST_HEAD(, bpf_d) bpf_d_list = LIST_HEAD_INITIALIZER(bpf_d_list); 95 96int bpf_allocbufs(struct bpf_d *); 97void bpf_ifname(struct bpf_if*, struct ifreq *); 98void bpf_mcopy(const void *, void *, size_t); 99int bpf_movein(struct uio *, struct bpf_d *, struct mbuf **, 100 struct sockaddr *); 101int bpf_setif(struct bpf_d *, struct ifreq *); 102int bpfkqfilter(dev_t, struct knote *); 103void bpf_wakeup(struct bpf_d *); 104void bpf_wakeup_cb(void *); 105void bpf_wait_cb(void *); 106int _bpf_mtap(caddr_t, const struct mbuf *, const struct mbuf *, u_int); 107void bpf_catchpacket(struct bpf_d *, u_char *, size_t, size_t, 108 const struct bpf_hdr *); 109int bpf_getdltlist(struct bpf_d *, struct bpf_dltlist *); 110int bpf_setdlt(struct bpf_d *, u_int); 111 112void filt_bpfrdetach(struct knote *); 113int filt_bpfread(struct knote *, long); 114int filt_bpfreadmodify(struct kevent *, struct knote *); 115int filt_bpfreadprocess(struct knote *, struct kevent *); 116 117struct bpf_d *bpfilter_lookup(int); 118 119/* 120 * Called holding ``bd_mtx''. 121 */ 122void bpf_attachd(struct bpf_d *, struct bpf_if *); 123void bpf_detachd(struct bpf_d *); 124void bpf_resetd(struct bpf_d *); 125 126void bpf_prog_smr(void *); 127void bpf_d_smr(void *); 128 129/* 130 * Reference count access to descriptor buffers 131 */ 132void bpf_get(struct bpf_d *); 133void bpf_put(struct bpf_d *); 134 135int 136bpf_movein(struct uio *uio, struct bpf_d *d, struct mbuf **mp, 137 struct sockaddr *sockp) 138{ 139 struct bpf_program_smr *bps; 140 struct bpf_insn *fcode = NULL; 141 struct mbuf *m; 142 struct m_tag *mtag; 143 int error; 144 u_int hlen, alen, mlen; 145 u_int len; 146 u_int linktype; 147 u_int slen; 148 149 /* 150 * Build a sockaddr based on the data link layer type. 151 * We do this at this level because the ethernet header 152 * is copied directly into the data field of the sockaddr. 153 * In the case of SLIP, there is no header and the packet 154 * is forwarded as is. 155 * Also, we are careful to leave room at the front of the mbuf 156 * for the link level header. 157 */ 158 linktype = d->bd_bif->bif_dlt; 159 switch (linktype) { 160 161 case DLT_SLIP: 162 sockp->sa_family = AF_INET; 163 hlen = 0; 164 break; 165 166 case DLT_PPP: 167 sockp->sa_family = AF_UNSPEC; 168 hlen = 0; 169 break; 170 171 case DLT_EN10MB: 172 sockp->sa_family = AF_UNSPEC; 173 /* XXX Would MAXLINKHDR be better? */ 174 hlen = ETHER_HDR_LEN; 175 break; 176 177 case DLT_IEEE802_11: 178 case DLT_IEEE802_11_RADIO: 179 sockp->sa_family = AF_UNSPEC; 180 hlen = 0; 181 break; 182 183 case DLT_RAW: 184 case DLT_NULL: 185 sockp->sa_family = AF_UNSPEC; 186 hlen = 0; 187 break; 188 189 case DLT_LOOP: 190 sockp->sa_family = AF_UNSPEC; 191 hlen = sizeof(u_int32_t); 192 break; 193 194 default: 195 return (EIO); 196 } 197 198 if (uio->uio_resid > MAXMCLBYTES) 199 return (EMSGSIZE); 200 len = uio->uio_resid; 201 if (len < hlen) 202 return (EINVAL); 203 204 /* 205 * Get the length of the payload so we can align it properly. 206 */ 207 alen = len - hlen; 208 209 /* 210 * Allocate enough space for headers and the aligned payload. 211 */ 212 mlen = max(max_linkhdr, hlen) + roundup(alen, sizeof(long)); 213 if (mlen > MAXMCLBYTES) 214 return (EMSGSIZE); 215 216 MGETHDR(m, M_WAIT, MT_DATA); 217 if (mlen > MHLEN) { 218 MCLGETL(m, M_WAIT, mlen); 219 if ((m->m_flags & M_EXT) == 0) { 220 error = ENOBUFS; 221 goto bad; 222 } 223 } 224 225 m_align(m, alen); /* Align the payload. */ 226 m->m_data -= hlen; 227 228 m->m_pkthdr.ph_ifidx = 0; 229 m->m_pkthdr.len = len; 230 m->m_len = len; 231 232 error = uiomove(mtod(m, caddr_t), len, uio); 233 if (error) 234 goto bad; 235 236 smr_read_enter(); 237 bps = SMR_PTR_GET(&d->bd_wfilter); 238 if (bps != NULL) 239 fcode = bps->bps_bf.bf_insns; 240 slen = bpf_filter(fcode, mtod(m, u_char *), len, len); 241 smr_read_leave(); 242 243 if (slen < len) { 244 error = EPERM; 245 goto bad; 246 } 247 248 /* 249 * Make room for link header, and copy it to sockaddr 250 */ 251 if (hlen != 0) { 252 if (linktype == DLT_LOOP) { 253 u_int32_t af; 254 255 /* the link header indicates the address family */ 256 KASSERT(hlen == sizeof(u_int32_t)); 257 memcpy(&af, m->m_data, hlen); 258 sockp->sa_family = ntohl(af); 259 } else 260 memcpy(sockp->sa_data, m->m_data, hlen); 261 262 m->m_pkthdr.len -= hlen; 263 m->m_len -= hlen; 264 m->m_data += hlen; 265 } 266 267 /* 268 * Prepend the data link type as a mbuf tag 269 */ 270 mtag = m_tag_get(PACKET_TAG_DLT, sizeof(u_int), M_WAIT); 271 *(u_int *)(mtag + 1) = linktype; 272 m_tag_prepend(m, mtag); 273 274 *mp = m; 275 return (0); 276 bad: 277 m_freem(m); 278 return (error); 279} 280 281/* 282 * Attach file to the bpf interface, i.e. make d listen on bp. 283 */ 284void 285bpf_attachd(struct bpf_d *d, struct bpf_if *bp) 286{ 287 MUTEX_ASSERT_LOCKED(&d->bd_mtx); 288 289 /* 290 * Point d at bp, and add d to the interface's list of listeners. 291 * Finally, point the driver's bpf cookie at the interface so 292 * it will divert packets to bpf. 293 */ 294 295 d->bd_bif = bp; 296 297 KERNEL_ASSERT_LOCKED(); 298 SMR_SLIST_INSERT_HEAD_LOCKED(&bp->bif_dlist, d, bd_next); 299 300 *bp->bif_driverp = bp; 301} 302 303/* 304 * Detach a file from its interface. 305 */ 306void 307bpf_detachd(struct bpf_d *d) 308{ 309 struct bpf_if *bp; 310 311 MUTEX_ASSERT_LOCKED(&d->bd_mtx); 312 313 bp = d->bd_bif; 314 /* Not attached. */ 315 if (bp == NULL) 316 return; 317 318 /* Remove ``d'' from the interface's descriptor list. */ 319 KERNEL_ASSERT_LOCKED(); 320 SMR_SLIST_REMOVE_LOCKED(&bp->bif_dlist, d, bpf_d, bd_next); 321 322 if (SMR_SLIST_EMPTY_LOCKED(&bp->bif_dlist)) { 323 /* 324 * Let the driver know that there are no more listeners. 325 */ 326 *bp->bif_driverp = NULL; 327 } 328 329 d->bd_bif = NULL; 330 331 /* 332 * Check if this descriptor had requested promiscuous mode. 333 * If so, turn it off. 334 */ 335 if (d->bd_promisc) { 336 int error; 337 338 KASSERT(bp->bif_ifp != NULL); 339 340 d->bd_promisc = 0; 341 342 bpf_get(d); 343 mtx_leave(&d->bd_mtx); 344 NET_LOCK(); 345 error = ifpromisc(bp->bif_ifp, 0); 346 NET_UNLOCK(); 347 mtx_enter(&d->bd_mtx); 348 bpf_put(d); 349 350 if (error && !(error == EINVAL || error == ENODEV || 351 error == ENXIO)) 352 /* 353 * Something is really wrong if we were able to put 354 * the driver into promiscuous mode, but can't 355 * take it out. 356 */ 357 panic("bpf: ifpromisc failed"); 358 } 359} 360 361void 362bpfilterattach(int n) 363{ 364} 365 366/* 367 * Open ethernet device. Returns ENXIO for illegal minor device number, 368 * EBUSY if file is open by another process. 369 */ 370int 371bpfopen(dev_t dev, int flag, int mode, struct proc *p) 372{ 373 struct bpf_d *bd; 374 int unit = minor(dev); 375 376 if (unit & ((1 << CLONE_SHIFT) - 1)) 377 return (ENXIO); 378 379 KASSERT(bpfilter_lookup(unit) == NULL); 380 381 /* create on demand */ 382 if ((bd = malloc(sizeof(*bd), M_DEVBUF, M_NOWAIT|M_ZERO)) == NULL) 383 return (EBUSY); 384 385 /* Mark "free" and do most initialization. */ 386 bd->bd_unit = unit; 387 bd->bd_bufsize = atomic_load_int(&bpf_bufsize); 388 bd->bd_sig = SIGIO; 389 mtx_init(&bd->bd_mtx, IPL_NET); 390 task_set(&bd->bd_wake_task, bpf_wakeup_cb, bd); 391 timeout_set(&bd->bd_wait_tmo, bpf_wait_cb, bd); 392 smr_init(&bd->bd_smr); 393 sigio_init(&bd->bd_sigio); 394 klist_init_mutex(&bd->bd_klist, &bd->bd_mtx); 395 396 bd->bd_rtout = 0; /* no timeout by default */ 397 bd->bd_wtout = INFSLP; /* wait for the buffer to fill by default */ 398 399 refcnt_init(&bd->bd_refcnt); 400 LIST_INSERT_HEAD(&bpf_d_list, bd, bd_list); 401 402 return (0); 403} 404 405/* 406 * Close the descriptor by detaching it from its interface, 407 * deallocating its buffers, and marking it free. 408 */ 409int 410bpfclose(dev_t dev, int flag, int mode, struct proc *p) 411{ 412 struct bpf_d *d; 413 414 d = bpfilter_lookup(minor(dev)); 415 mtx_enter(&d->bd_mtx); 416 bpf_detachd(d); 417 bpf_wakeup(d); 418 LIST_REMOVE(d, bd_list); 419 mtx_leave(&d->bd_mtx); 420 bpf_put(d); 421 422 return (0); 423} 424 425/* 426 * Rotate the packet buffers in descriptor d. Move the store buffer 427 * into the hold slot, and the free buffer into the store slot. 428 * Zero the length of the new store buffer. 429 */ 430#define ROTATE_BUFFERS(d) \ 431 KASSERT(d->bd_in_uiomove == 0); \ 432 MUTEX_ASSERT_LOCKED(&d->bd_mtx); \ 433 (d)->bd_hbuf = (d)->bd_sbuf; \ 434 (d)->bd_hlen = (d)->bd_slen; \ 435 (d)->bd_sbuf = (d)->bd_fbuf; \ 436 (d)->bd_state = BPF_S_IDLE; \ 437 (d)->bd_slen = 0; \ 438 (d)->bd_fbuf = NULL; 439 440/* 441 * bpfread - read next chunk of packets from buffers 442 */ 443int 444bpfread(dev_t dev, struct uio *uio, int ioflag) 445{ 446 uint64_t end, now; 447 struct bpf_d *d; 448 caddr_t hbuf; 449 int error, hlen; 450 451 KERNEL_ASSERT_LOCKED(); 452 453 d = bpfilter_lookup(minor(dev)); 454 if (d->bd_bif == NULL) 455 return (ENXIO); 456 457 bpf_get(d); 458 mtx_enter(&d->bd_mtx); 459 460 /* 461 * Restrict application to use a buffer the same size as 462 * as kernel buffers. 463 */ 464 if (uio->uio_resid != d->bd_bufsize) { 465 error = EINVAL; 466 goto out; 467 } 468 469 /* 470 * If there's a timeout, mark when the read should end. 471 */ 472 if (d->bd_rtout != 0) { 473 now = nsecuptime(); 474 end = now + d->bd_rtout; 475 if (end < now) 476 end = UINT64_MAX; 477 } 478 479 /* 480 * If the hold buffer is empty, then do a timed sleep, which 481 * ends when the timeout expires or when enough packets 482 * have arrived to fill the store buffer. 483 */ 484 while (d->bd_hbuf == NULL) { 485 if (d->bd_bif == NULL) { 486 /* interface is gone */ 487 if (d->bd_slen == 0) { 488 error = EIO; 489 goto out; 490 } 491 ROTATE_BUFFERS(d); 492 break; 493 } 494 if (d->bd_state == BPF_S_DONE) { 495 /* 496 * A packet(s) either arrived since the previous 497 * read or arrived while we were asleep. 498 * Rotate the buffers and return what's here. 499 */ 500 ROTATE_BUFFERS(d); 501 break; 502 } 503 if (ISSET(ioflag, IO_NDELAY)) { 504 /* User requested non-blocking I/O */ 505 error = EWOULDBLOCK; 506 } else if (d->bd_rtout == 0) { 507 /* No read timeout set. */ 508 d->bd_nreaders++; 509 error = msleep_nsec(d, &d->bd_mtx, PRINET|PCATCH, 510 "bpf", INFSLP); 511 d->bd_nreaders--; 512 } else if ((now = nsecuptime()) < end) { 513 /* Read timeout has not expired yet. */ 514 d->bd_nreaders++; 515 error = msleep_nsec(d, &d->bd_mtx, PRINET|PCATCH, 516 "bpf", end - now); 517 d->bd_nreaders--; 518 } else { 519 /* Read timeout has expired. */ 520 error = EWOULDBLOCK; 521 } 522 if (error == EINTR || error == ERESTART) 523 goto out; 524 if (error == EWOULDBLOCK) { 525 /* 526 * On a timeout, return what's in the buffer, 527 * which may be nothing. If there is something 528 * in the store buffer, we can rotate the buffers. 529 */ 530 if (d->bd_hbuf != NULL) 531 /* 532 * We filled up the buffer in between 533 * getting the timeout and arriving 534 * here, so we don't need to rotate. 535 */ 536 break; 537 538 if (d->bd_slen == 0) { 539 error = 0; 540 goto out; 541 } 542 ROTATE_BUFFERS(d); 543 break; 544 } 545 } 546 /* 547 * At this point, we know we have something in the hold slot. 548 */ 549 hbuf = d->bd_hbuf; 550 hlen = d->bd_hlen; 551 d->bd_hbuf = NULL; 552 d->bd_hlen = 0; 553 d->bd_fbuf = NULL; 554 d->bd_in_uiomove = 1; 555 556 /* 557 * Move data from hold buffer into user space. 558 * We know the entire buffer is transferred since 559 * we checked above that the read buffer is bpf_bufsize bytes. 560 */ 561 mtx_leave(&d->bd_mtx); 562 error = uiomove(hbuf, hlen, uio); 563 mtx_enter(&d->bd_mtx); 564 565 /* Ensure that bpf_resetd() or ROTATE_BUFFERS() haven't been called. */ 566 KASSERT(d->bd_fbuf == NULL); 567 KASSERT(d->bd_hbuf == NULL); 568 d->bd_fbuf = hbuf; 569 d->bd_in_uiomove = 0; 570out: 571 mtx_leave(&d->bd_mtx); 572 bpf_put(d); 573 574 return (error); 575} 576 577/* 578 * If there are processes sleeping on this descriptor, wake them up. 579 */ 580void 581bpf_wakeup(struct bpf_d *d) 582{ 583 MUTEX_ASSERT_LOCKED(&d->bd_mtx); 584 585 if (d->bd_nreaders) 586 wakeup(d); 587 588 knote_locked(&d->bd_klist, 0); 589 590 /* 591 * As long as pgsigio() needs to be protected 592 * by the KERNEL_LOCK() we have to delay the wakeup to 593 * another context to keep the hot path KERNEL_LOCK()-free. 594 */ 595 if (d->bd_async && d->bd_sig) { 596 bpf_get(d); 597 if (!task_add(systq, &d->bd_wake_task)) 598 bpf_put(d); 599 } 600} 601 602void 603bpf_wakeup_cb(void *xd) 604{ 605 struct bpf_d *d = xd; 606 607 if (d->bd_async && d->bd_sig) 608 pgsigio(&d->bd_sigio, d->bd_sig, 0); 609 610 bpf_put(d); 611} 612 613void 614bpf_wait_cb(void *xd) 615{ 616 struct bpf_d *d = xd; 617 618 mtx_enter(&d->bd_mtx); 619 if (d->bd_state == BPF_S_WAIT) { 620 d->bd_state = BPF_S_DONE; 621 bpf_wakeup(d); 622 } 623 mtx_leave(&d->bd_mtx); 624 625 bpf_put(d); 626} 627 628int 629bpfwrite(dev_t dev, struct uio *uio, int ioflag) 630{ 631 struct bpf_d *d; 632 struct ifnet *ifp; 633 struct mbuf *m; 634 int error; 635 struct sockaddr_storage dst; 636 637 KERNEL_ASSERT_LOCKED(); 638 639 d = bpfilter_lookup(minor(dev)); 640 if (d->bd_bif == NULL) 641 return (ENXIO); 642 643 bpf_get(d); 644 ifp = d->bd_bif->bif_ifp; 645 646 if (ifp == NULL || (ifp->if_flags & IFF_UP) == 0) { 647 error = ENETDOWN; 648 goto out; 649 } 650 651 if (uio->uio_resid == 0) { 652 error = 0; 653 goto out; 654 } 655 656 error = bpf_movein(uio, d, &m, sstosa(&dst)); 657 if (error) 658 goto out; 659 660 if (m->m_pkthdr.len > ifp->if_mtu) { 661 m_freem(m); 662 error = EMSGSIZE; 663 goto out; 664 } 665 666 m->m_pkthdr.ph_rtableid = ifp->if_rdomain; 667 m->m_pkthdr.pf.prio = ifp->if_llprio; 668 669 if (d->bd_hdrcmplt && dst.ss_family == AF_UNSPEC) 670 dst.ss_family = pseudo_AF_HDRCMPLT; 671 672 NET_LOCK(); 673 error = ifp->if_output(ifp, m, sstosa(&dst), NULL); 674 NET_UNLOCK(); 675 676out: 677 bpf_put(d); 678 return (error); 679} 680 681/* 682 * Reset a descriptor by flushing its packet buffer and clearing the 683 * receive and drop counts. 684 */ 685void 686bpf_resetd(struct bpf_d *d) 687{ 688 MUTEX_ASSERT_LOCKED(&d->bd_mtx); 689 KASSERT(d->bd_in_uiomove == 0); 690 691 if (timeout_del(&d->bd_wait_tmo)) 692 bpf_put(d); 693 694 if (d->bd_hbuf != NULL) { 695 /* Free the hold buffer. */ 696 d->bd_fbuf = d->bd_hbuf; 697 d->bd_hbuf = NULL; 698 } 699 d->bd_state = BPF_S_IDLE; 700 d->bd_slen = 0; 701 d->bd_hlen = 0; 702 d->bd_rcount = 0; 703 d->bd_dcount = 0; 704} 705 706static int 707bpf_set_wtout(struct bpf_d *d, uint64_t wtout) 708{ 709 mtx_enter(&d->bd_mtx); 710 d->bd_wtout = wtout; 711 mtx_leave(&d->bd_mtx); 712 713 return (0); 714} 715 716static int 717bpf_set_wtimeout(struct bpf_d *d, const struct timeval *tv) 718{ 719 uint64_t nsec; 720 721 if (tv->tv_sec < 0 || !timerisvalid(tv)) 722 return (EINVAL); 723 724 nsec = TIMEVAL_TO_NSEC(tv); 725 if (nsec > SEC_TO_NSEC(300)) 726 return (EINVAL); 727 if (nsec > MAXTSLP) 728 return (EOVERFLOW); 729 730 return (bpf_set_wtout(d, nsec)); 731} 732 733static int 734bpf_get_wtimeout(struct bpf_d *d, struct timeval *tv) 735{ 736 uint64_t nsec; 737 738 mtx_enter(&d->bd_mtx); 739 nsec = d->bd_wtout; 740 mtx_leave(&d->bd_mtx); 741 742 if (nsec == INFSLP) 743 return (ENXIO); 744 745 memset(tv, 0, sizeof(*tv)); 746 NSEC_TO_TIMEVAL(nsec, tv); 747 748 return (0); 749} 750 751/* 752 * FIONREAD Check for read packet available. 753 * BIOCGBLEN Get buffer len [for read()]. 754 * BIOCSETF Set read filter. 755 * BIOCSETFNR Set read filter without resetting descriptor. 756 * BIOCFLUSH Flush read packet buffer. 757 * BIOCPROMISC Put interface into promiscuous mode. 758 * BIOCGDLTLIST Get supported link layer types. 759 * BIOCGDLT Get link layer type. 760 * BIOCSDLT Set link layer type. 761 * BIOCGETIF Get interface name. 762 * BIOCSETIF Set interface. 763 * BIOCSRTIMEOUT Set read timeout. 764 * BIOCGRTIMEOUT Get read timeout. 765 * BIOCSWTIMEOUT Set wait timeout. 766 * BIOCGWTIMEOUT Get wait timeout. 767 * BIOCDWTIMEOUT Del wait timeout. 768 * BIOCGSTATS Get packet stats. 769 * BIOCIMMEDIATE Set immediate mode. 770 * BIOCVERSION Get filter language version. 771 * BIOCGHDRCMPLT Get "header already complete" flag 772 * BIOCSHDRCMPLT Set "header already complete" flag 773 */ 774int 775bpfioctl(dev_t dev, u_long cmd, caddr_t addr, int flag, struct proc *p) 776{ 777 struct bpf_d *d; 778 int error = 0; 779 780 d = bpfilter_lookup(minor(dev)); 781 if (d->bd_locked) { 782 /* list of allowed ioctls when locked */ 783 switch (cmd) { 784 case BIOCGBLEN: 785 case BIOCFLUSH: 786 case BIOCGDLT: 787 case BIOCGDLTLIST: 788 case BIOCGETIF: 789 case BIOCGRTIMEOUT: 790 case BIOCGWTIMEOUT: 791 case BIOCGSTATS: 792 case BIOCVERSION: 793 case BIOCGRSIG: 794 case BIOCGHDRCMPLT: 795 case FIONREAD: 796 case BIOCLOCK: 797 case BIOCSRTIMEOUT: 798 case BIOCSWTIMEOUT: 799 case BIOCDWTIMEOUT: 800 case BIOCIMMEDIATE: 801 case TIOCGPGRP: 802 case BIOCGDIRFILT: 803 break; 804 default: 805 return (EPERM); 806 } 807 } 808 809 bpf_get(d); 810 811 switch (cmd) { 812 default: 813 error = EINVAL; 814 break; 815 816 /* 817 * Check for read packet available. 818 */ 819 case FIONREAD: 820 { 821 int n; 822 823 mtx_enter(&d->bd_mtx); 824 n = d->bd_slen; 825 if (d->bd_hbuf != NULL) 826 n += d->bd_hlen; 827 mtx_leave(&d->bd_mtx); 828 829 *(int *)addr = n; 830 break; 831 } 832 833 /* 834 * Get buffer len [for read()]. 835 */ 836 case BIOCGBLEN: 837 *(u_int *)addr = d->bd_bufsize; 838 break; 839 840 /* 841 * Set buffer length. 842 */ 843 case BIOCSBLEN: 844 if (d->bd_bif != NULL) 845 error = EINVAL; 846 else { 847 u_int size = *(u_int *)addr; 848 int bpf_maxbufsize_local = 849 atomic_load_int(&bpf_maxbufsize); 850 851 if (size > bpf_maxbufsize_local) 852 *(u_int *)addr = size = bpf_maxbufsize_local; 853 else if (size < BPF_MINBUFSIZE) 854 *(u_int *)addr = size = BPF_MINBUFSIZE; 855 mtx_enter(&d->bd_mtx); 856 d->bd_bufsize = size; 857 mtx_leave(&d->bd_mtx); 858 } 859 break; 860 861 /* 862 * Set link layer read/write filter. 863 */ 864 case BIOCSETF: 865 case BIOCSETFNR: 866 case BIOCSETWF: 867 error = bpf_setf(d, (struct bpf_program *)addr, cmd); 868 break; 869 870 /* 871 * Flush read packet buffer. 872 */ 873 case BIOCFLUSH: 874 mtx_enter(&d->bd_mtx); 875 bpf_resetd(d); 876 mtx_leave(&d->bd_mtx); 877 break; 878 879 /* 880 * Put interface into promiscuous mode. 881 */ 882 case BIOCPROMISC: 883 if (d->bd_bif == NULL) { 884 /* 885 * No interface attached yet. 886 */ 887 error = EINVAL; 888 } else if (d->bd_bif->bif_ifp != NULL) { 889 if (d->bd_promisc == 0) { 890 MUTEX_ASSERT_UNLOCKED(&d->bd_mtx); 891 NET_LOCK(); 892 error = ifpromisc(d->bd_bif->bif_ifp, 1); 893 NET_UNLOCK(); 894 if (error == 0) 895 d->bd_promisc = 1; 896 } 897 } 898 break; 899 900 /* 901 * Get a list of supported device parameters. 902 */ 903 case BIOCGDLTLIST: 904 if (d->bd_bif == NULL) 905 error = EINVAL; 906 else 907 error = bpf_getdltlist(d, (struct bpf_dltlist *)addr); 908 break; 909 910 /* 911 * Get device parameters. 912 */ 913 case BIOCGDLT: 914 if (d->bd_bif == NULL) 915 error = EINVAL; 916 else 917 *(u_int *)addr = d->bd_bif->bif_dlt; 918 break; 919 920 /* 921 * Set device parameters. 922 */ 923 case BIOCSDLT: 924 if (d->bd_bif == NULL) 925 error = EINVAL; 926 else { 927 mtx_enter(&d->bd_mtx); 928 error = bpf_setdlt(d, *(u_int *)addr); 929 mtx_leave(&d->bd_mtx); 930 } 931 break; 932 933 /* 934 * Set interface name. 935 */ 936 case BIOCGETIF: 937 if (d->bd_bif == NULL) 938 error = EINVAL; 939 else 940 bpf_ifname(d->bd_bif, (struct ifreq *)addr); 941 break; 942 943 /* 944 * Set interface. 945 */ 946 case BIOCSETIF: 947 error = bpf_setif(d, (struct ifreq *)addr); 948 break; 949 950 /* 951 * Set read timeout. 952 */ 953 case BIOCSRTIMEOUT: 954 { 955 struct timeval *tv = (struct timeval *)addr; 956 uint64_t rtout; 957 958 if (tv->tv_sec < 0 || !timerisvalid(tv)) { 959 error = EINVAL; 960 break; 961 } 962 rtout = TIMEVAL_TO_NSEC(tv); 963 if (rtout > MAXTSLP) { 964 error = EOVERFLOW; 965 break; 966 } 967 mtx_enter(&d->bd_mtx); 968 d->bd_rtout = rtout; 969 mtx_leave(&d->bd_mtx); 970 break; 971 } 972 973 /* 974 * Get read timeout. 975 */ 976 case BIOCGRTIMEOUT: 977 { 978 struct timeval *tv = (struct timeval *)addr; 979 980 memset(tv, 0, sizeof(*tv)); 981 mtx_enter(&d->bd_mtx); 982 NSEC_TO_TIMEVAL(d->bd_rtout, tv); 983 mtx_leave(&d->bd_mtx); 984 break; 985 } 986 987 /* 988 * Get packet stats. 989 */ 990 case BIOCGSTATS: 991 { 992 struct bpf_stat *bs = (struct bpf_stat *)addr; 993 994 bs->bs_recv = d->bd_rcount; 995 bs->bs_drop = d->bd_dcount; 996 break; 997 } 998 999 /* 1000 * Set immediate mode. 1001 */ 1002 case BIOCIMMEDIATE: 1003 error = bpf_set_wtout(d, *(int *)addr ? 0 : INFSLP); 1004 break; 1005 1006 /* 1007 * Wait timeout. 1008 */ 1009 case BIOCSWTIMEOUT: 1010 error = bpf_set_wtimeout(d, (const struct timeval *)addr); 1011 break; 1012 case BIOCGWTIMEOUT: 1013 error = bpf_get_wtimeout(d, (struct timeval *)addr); 1014 break; 1015 case BIOCDWTIMEOUT: 1016 error = bpf_set_wtout(d, INFSLP); 1017 break; 1018 1019 case BIOCVERSION: 1020 { 1021 struct bpf_version *bv = (struct bpf_version *)addr; 1022 1023 bv->bv_major = BPF_MAJOR_VERSION; 1024 bv->bv_minor = BPF_MINOR_VERSION; 1025 break; 1026 } 1027 1028 case BIOCGHDRCMPLT: /* get "header already complete" flag */ 1029 *(u_int *)addr = d->bd_hdrcmplt; 1030 break; 1031 1032 case BIOCSHDRCMPLT: /* set "header already complete" flag */ 1033 d->bd_hdrcmplt = *(u_int *)addr ? 1 : 0; 1034 break; 1035 1036 case BIOCLOCK: /* set "locked" flag (no reset) */ 1037 d->bd_locked = 1; 1038 break; 1039 1040 case BIOCGFILDROP: /* get "filter-drop" flag */ 1041 *(u_int *)addr = d->bd_fildrop; 1042 break; 1043 1044 case BIOCSFILDROP: { /* set "filter-drop" flag */ 1045 unsigned int fildrop = *(u_int *)addr; 1046 switch (fildrop) { 1047 case BPF_FILDROP_PASS: 1048 case BPF_FILDROP_CAPTURE: 1049 case BPF_FILDROP_DROP: 1050 d->bd_fildrop = fildrop; 1051 break; 1052 default: 1053 error = EINVAL; 1054 break; 1055 } 1056 break; 1057 } 1058 1059 case BIOCGDIRFILT: /* get direction filter */ 1060 *(u_int *)addr = d->bd_dirfilt; 1061 break; 1062 1063 case BIOCSDIRFILT: /* set direction filter */ 1064 d->bd_dirfilt = (*(u_int *)addr) & 1065 (BPF_DIRECTION_IN|BPF_DIRECTION_OUT); 1066 break; 1067 1068 case FIOASYNC: /* Send signal on receive packets */ 1069 d->bd_async = *(int *)addr; 1070 break; 1071 1072 case FIOSETOWN: /* Process or group to send signals to */ 1073 case TIOCSPGRP: 1074 error = sigio_setown(&d->bd_sigio, cmd, addr); 1075 break; 1076 1077 case FIOGETOWN: 1078 case TIOCGPGRP: 1079 sigio_getown(&d->bd_sigio, cmd, addr); 1080 break; 1081 1082 case BIOCSRSIG: /* Set receive signal */ 1083 { 1084 u_int sig; 1085 1086 sig = *(u_int *)addr; 1087 1088 if (sig >= NSIG) 1089 error = EINVAL; 1090 else 1091 d->bd_sig = sig; 1092 break; 1093 } 1094 case BIOCGRSIG: 1095 *(u_int *)addr = d->bd_sig; 1096 break; 1097 } 1098 1099 bpf_put(d); 1100 return (error); 1101} 1102 1103/* 1104 * Set d's packet filter program to fp. If this file already has a filter, 1105 * free it and replace it. Returns EINVAL for bogus requests. 1106 */ 1107int 1108bpf_setf(struct bpf_d *d, struct bpf_program *fp, u_long cmd) 1109{ 1110 struct bpf_program_smr *bps, *old_bps; 1111 struct bpf_insn *fcode; 1112 u_int flen, size; 1113 1114 KERNEL_ASSERT_LOCKED(); 1115 1116 if (fp->bf_insns == 0) { 1117 if (fp->bf_len != 0) 1118 return (EINVAL); 1119 bps = NULL; 1120 } else { 1121 flen = fp->bf_len; 1122 if (flen > BPF_MAXINSNS) 1123 return (EINVAL); 1124 1125 fcode = mallocarray(flen, sizeof(*fp->bf_insns), M_DEVBUF, 1126 M_WAITOK | M_CANFAIL); 1127 if (fcode == NULL) 1128 return (ENOMEM); 1129 1130 size = flen * sizeof(*fp->bf_insns); 1131 if (copyin(fp->bf_insns, fcode, size) != 0 || 1132 bpf_validate(fcode, (int)flen) == 0) { 1133 free(fcode, M_DEVBUF, size); 1134 return (EINVAL); 1135 } 1136 1137 bps = malloc(sizeof(*bps), M_DEVBUF, M_WAITOK); 1138 smr_init(&bps->bps_smr); 1139 bps->bps_bf.bf_len = flen; 1140 bps->bps_bf.bf_insns = fcode; 1141 } 1142 1143 if (cmd != BIOCSETWF) { 1144 old_bps = SMR_PTR_GET_LOCKED(&d->bd_rfilter); 1145 SMR_PTR_SET_LOCKED(&d->bd_rfilter, bps); 1146 } else { 1147 old_bps = SMR_PTR_GET_LOCKED(&d->bd_wfilter); 1148 SMR_PTR_SET_LOCKED(&d->bd_wfilter, bps); 1149 } 1150 1151 if (cmd == BIOCSETF) { 1152 mtx_enter(&d->bd_mtx); 1153 bpf_resetd(d); 1154 mtx_leave(&d->bd_mtx); 1155 } 1156 1157 if (old_bps != NULL) 1158 smr_call(&old_bps->bps_smr, bpf_prog_smr, old_bps); 1159 1160 return (0); 1161} 1162 1163/* 1164 * Detach a file from its current interface (if attached at all) and attach 1165 * to the interface indicated by the name stored in ifr. 1166 * Return an errno or 0. 1167 */ 1168int 1169bpf_setif(struct bpf_d *d, struct ifreq *ifr) 1170{ 1171 struct bpf_if *bp; 1172 int error = 0; 1173 1174 /* 1175 * Look through attached interfaces for the named one. 1176 */ 1177 TAILQ_FOREACH(bp, &bpf_iflist, bif_next) { 1178 if (strcmp(bp->bif_name, ifr->ifr_name) == 0) 1179 break; 1180 } 1181 1182 /* Not found. */ 1183 if (bp == NULL) 1184 return (ENXIO); 1185 1186 /* 1187 * Allocate the packet buffers if we need to. 1188 * If we're already attached to requested interface, 1189 * just flush the buffer. 1190 */ 1191 mtx_enter(&d->bd_mtx); 1192 if (d->bd_sbuf == NULL) { 1193 if ((error = bpf_allocbufs(d))) 1194 goto out; 1195 } 1196 if (bp != d->bd_bif) { 1197 /* 1198 * Detach if attached to something else. 1199 */ 1200 bpf_detachd(d); 1201 bpf_attachd(d, bp); 1202 } 1203 bpf_resetd(d); 1204out: 1205 mtx_leave(&d->bd_mtx); 1206 return (error); 1207} 1208 1209/* 1210 * Copy the interface name to the ifreq. 1211 */ 1212void 1213bpf_ifname(struct bpf_if *bif, struct ifreq *ifr) 1214{ 1215 bcopy(bif->bif_name, ifr->ifr_name, sizeof(ifr->ifr_name)); 1216} 1217 1218const struct filterops bpfread_filtops = { 1219 .f_flags = FILTEROP_ISFD | FILTEROP_MPSAFE, 1220 .f_attach = NULL, 1221 .f_detach = filt_bpfrdetach, 1222 .f_event = filt_bpfread, 1223 .f_modify = filt_bpfreadmodify, 1224 .f_process = filt_bpfreadprocess, 1225}; 1226 1227int 1228bpfkqfilter(dev_t dev, struct knote *kn) 1229{ 1230 struct bpf_d *d; 1231 struct klist *klist; 1232 1233 KERNEL_ASSERT_LOCKED(); 1234 1235 d = bpfilter_lookup(minor(dev)); 1236 if (d == NULL) 1237 return (ENXIO); 1238 1239 switch (kn->kn_filter) { 1240 case EVFILT_READ: 1241 klist = &d->bd_klist; 1242 kn->kn_fop = &bpfread_filtops; 1243 break; 1244 default: 1245 return (EINVAL); 1246 } 1247 1248 bpf_get(d); 1249 kn->kn_hook = d; 1250 klist_insert(klist, kn); 1251 1252 return (0); 1253} 1254 1255void 1256filt_bpfrdetach(struct knote *kn) 1257{ 1258 struct bpf_d *d = kn->kn_hook; 1259 1260 klist_remove(&d->bd_klist, kn); 1261 bpf_put(d); 1262} 1263 1264int 1265filt_bpfread(struct knote *kn, long hint) 1266{ 1267 struct bpf_d *d = kn->kn_hook; 1268 1269 MUTEX_ASSERT_LOCKED(&d->bd_mtx); 1270 1271 kn->kn_data = d->bd_hlen; 1272 if (d->bd_state == BPF_S_DONE) 1273 kn->kn_data += d->bd_slen; 1274 1275 return (kn->kn_data > 0); 1276} 1277 1278int 1279filt_bpfreadmodify(struct kevent *kev, struct knote *kn) 1280{ 1281 struct bpf_d *d = kn->kn_hook; 1282 int active; 1283 1284 mtx_enter(&d->bd_mtx); 1285 active = knote_modify_fn(kev, kn, filt_bpfread); 1286 mtx_leave(&d->bd_mtx); 1287 1288 return (active); 1289} 1290 1291int 1292filt_bpfreadprocess(struct knote *kn, struct kevent *kev) 1293{ 1294 struct bpf_d *d = kn->kn_hook; 1295 int active; 1296 1297 mtx_enter(&d->bd_mtx); 1298 active = knote_process_fn(kn, kev, filt_bpfread); 1299 mtx_leave(&d->bd_mtx); 1300 1301 return (active); 1302} 1303 1304/* 1305 * Copy data from an mbuf chain into a buffer. This code is derived 1306 * from m_copydata in sys/uipc_mbuf.c. 1307 */ 1308void 1309bpf_mcopy(const void *src_arg, void *dst_arg, size_t len) 1310{ 1311 const struct mbuf *m; 1312 u_int count; 1313 u_char *dst; 1314 1315 m = src_arg; 1316 dst = dst_arg; 1317 while (len > 0) { 1318 if (m == NULL) 1319 panic("bpf_mcopy"); 1320 count = min(m->m_len, len); 1321 bcopy(mtod(m, caddr_t), (caddr_t)dst, count); 1322 m = m->m_next; 1323 dst += count; 1324 len -= count; 1325 } 1326} 1327 1328int 1329bpf_mtap(caddr_t arg, const struct mbuf *m, u_int direction) 1330{ 1331 return _bpf_mtap(arg, m, m, direction); 1332} 1333 1334int 1335_bpf_mtap(caddr_t arg, const struct mbuf *mp, const struct mbuf *m, 1336 u_int direction) 1337{ 1338 struct bpf_if *bp = (struct bpf_if *)arg; 1339 struct bpf_d *d; 1340 size_t pktlen, slen; 1341 const struct mbuf *m0; 1342 struct bpf_hdr tbh; 1343 int gothdr = 0; 1344 int drop = 0; 1345 1346 if (m == NULL) 1347 return (0); 1348 1349 if (bp == NULL) 1350 return (0); 1351 1352 pktlen = 0; 1353 for (m0 = m; m0 != NULL; m0 = m0->m_next) 1354 pktlen += m0->m_len; 1355 1356 smr_read_enter(); 1357 SMR_SLIST_FOREACH(d, &bp->bif_dlist, bd_next) { 1358 struct bpf_program_smr *bps; 1359 struct bpf_insn *fcode = NULL; 1360 1361 atomic_inc_long(&d->bd_rcount); 1362 1363 if (ISSET(d->bd_dirfilt, direction)) 1364 continue; 1365 1366 bps = SMR_PTR_GET(&d->bd_rfilter); 1367 if (bps != NULL) 1368 fcode = bps->bps_bf.bf_insns; 1369 slen = bpf_mfilter(fcode, m, pktlen); 1370 1371 if (slen == 0) 1372 continue; 1373 if (d->bd_fildrop != BPF_FILDROP_PASS) 1374 drop = 1; 1375 if (d->bd_fildrop != BPF_FILDROP_DROP) { 1376 if (!gothdr) { 1377 struct timeval tv; 1378 memset(&tbh, 0, sizeof(tbh)); 1379 1380 if (ISSET(mp->m_flags, M_PKTHDR)) { 1381 tbh.bh_ifidx = mp->m_pkthdr.ph_ifidx; 1382 tbh.bh_flowid = mp->m_pkthdr.ph_flowid; 1383 tbh.bh_flags = mp->m_pkthdr.pf.prio; 1384 if (ISSET(mp->m_pkthdr.csum_flags, 1385 M_FLOWID)) 1386 SET(tbh.bh_flags, BPF_F_FLOWID); 1387 tbh.bh_csumflags = 1388 mp->m_pkthdr.csum_flags; 1389 1390 m_microtime(mp, &tv); 1391 } else 1392 microtime(&tv); 1393 1394 tbh.bh_tstamp.tv_sec = tv.tv_sec; 1395 tbh.bh_tstamp.tv_usec = tv.tv_usec; 1396 SET(tbh.bh_flags, direction << BPF_F_DIR_SHIFT); 1397 1398 gothdr = 1; 1399 } 1400 1401 mtx_enter(&d->bd_mtx); 1402 bpf_catchpacket(d, (u_char *)m, pktlen, slen, &tbh); 1403 mtx_leave(&d->bd_mtx); 1404 } 1405 } 1406 smr_read_leave(); 1407 1408 return (drop); 1409} 1410 1411/* 1412 * Incoming linkage from device drivers, where a data buffer should be 1413 * prepended by an arbitrary header. In this situation we already have a 1414 * way of representing a chain of memory buffers, ie, mbufs, so reuse 1415 * the existing functionality by attaching the buffers to mbufs. 1416 * 1417 * Con up a minimal mbuf chain to pacify bpf by allocating (only) a 1418 * struct m_hdr each for the header and data on the stack. 1419 */ 1420int 1421bpf_tap_hdr(caddr_t arg, const void *hdr, unsigned int hdrlen, 1422 const void *buf, unsigned int buflen, u_int direction) 1423{ 1424 struct m_hdr mh, md; 1425 struct mbuf *m0 = NULL; 1426 struct mbuf **mp = &m0; 1427 1428 if (hdr != NULL) { 1429 mh.mh_flags = 0; 1430 mh.mh_next = NULL; 1431 mh.mh_len = hdrlen; 1432 mh.mh_data = (void *)hdr; 1433 1434 *mp = (struct mbuf *)&mh; 1435 mp = &mh.mh_next; 1436 } 1437 1438 if (buf != NULL) { 1439 md.mh_flags = 0; 1440 md.mh_next = NULL; 1441 md.mh_len = buflen; 1442 md.mh_data = (void *)buf; 1443 1444 *mp = (struct mbuf *)&md; 1445 } 1446 1447 return bpf_mtap(arg, m0, direction); 1448} 1449 1450/* 1451 * Incoming linkage from device drivers, where we have a mbuf chain 1452 * but need to prepend some arbitrary header from a linear buffer. 1453 * 1454 * Con up a minimal dummy header to pacify bpf. Allocate (only) a 1455 * struct m_hdr on the stack. This is safe as bpf only reads from the 1456 * fields in this header that we initialize, and will not try to free 1457 * it or keep a pointer to it. 1458 */ 1459int 1460bpf_mtap_hdr(caddr_t arg, const void *data, u_int dlen, const struct mbuf *m, 1461 u_int direction) 1462{ 1463 struct m_hdr mh; 1464 const struct mbuf *m0; 1465 1466 if (dlen > 0) { 1467 mh.mh_flags = 0; 1468 mh.mh_next = (struct mbuf *)m; 1469 mh.mh_len = dlen; 1470 mh.mh_data = (void *)data; 1471 m0 = (struct mbuf *)&mh; 1472 } else 1473 m0 = m; 1474 1475 return _bpf_mtap(arg, m, m0, direction); 1476} 1477 1478/* 1479 * Incoming linkage from device drivers, where we have a mbuf chain 1480 * but need to prepend the address family. 1481 * 1482 * Con up a minimal dummy header to pacify bpf. We allocate (only) a 1483 * struct m_hdr on the stack. This is safe as bpf only reads from the 1484 * fields in this header that we initialize, and will not try to free 1485 * it or keep a pointer to it. 1486 */ 1487int 1488bpf_mtap_af(caddr_t arg, u_int32_t af, const struct mbuf *m, u_int direction) 1489{ 1490 u_int32_t afh; 1491 1492 afh = htonl(af); 1493 1494 return bpf_mtap_hdr(arg, &afh, sizeof(afh), m, direction); 1495} 1496 1497/* 1498 * Incoming linkage from device drivers, where we have a mbuf chain 1499 * but need to prepend a VLAN encapsulation header. 1500 * 1501 * Con up a minimal dummy header to pacify bpf. Allocate (only) a 1502 * struct m_hdr on the stack. This is safe as bpf only reads from the 1503 * fields in this header that we initialize, and will not try to free 1504 * it or keep a pointer to it. 1505 */ 1506int 1507bpf_mtap_ether(caddr_t arg, const struct mbuf *m, u_int direction) 1508{ 1509#if NVLAN > 0 1510 struct ether_vlan_header evh; 1511 struct m_hdr mh, md; 1512 1513 if ((m->m_flags & M_VLANTAG) == 0) 1514#endif 1515 { 1516 return _bpf_mtap(arg, m, m, direction); 1517 } 1518 1519#if NVLAN > 0 1520 KASSERT(m->m_len >= ETHER_HDR_LEN); 1521 1522 memcpy(&evh, mtod(m, char *), ETHER_HDR_LEN); 1523 evh.evl_proto = evh.evl_encap_proto; 1524 evh.evl_encap_proto = htons(ETHERTYPE_VLAN); 1525 evh.evl_tag = htons(m->m_pkthdr.ether_vtag); 1526 1527 mh.mh_flags = 0; 1528 mh.mh_data = (caddr_t)&evh; 1529 mh.mh_len = sizeof(evh); 1530 mh.mh_next = (struct mbuf *)&md; 1531 1532 md.mh_flags = 0; 1533 md.mh_data = m->m_data + ETHER_HDR_LEN; 1534 md.mh_len = m->m_len - ETHER_HDR_LEN; 1535 md.mh_next = m->m_next; 1536 1537 return _bpf_mtap(arg, m, (struct mbuf *)&mh, direction); 1538#endif 1539} 1540 1541/* 1542 * Move the packet data from interface memory (pkt) into the 1543 * store buffer. Wake up listeners if needed. 1544 * "copy" is the routine called to do the actual data 1545 * transfer. bcopy is passed in to copy contiguous chunks, while 1546 * bpf_mcopy is passed in to copy mbuf chains. In the latter case, 1547 * pkt is really an mbuf. 1548 */ 1549void 1550bpf_catchpacket(struct bpf_d *d, u_char *pkt, size_t pktlen, size_t snaplen, 1551 const struct bpf_hdr *tbh) 1552{ 1553 struct bpf_hdr *bh; 1554 int totlen, curlen; 1555 int hdrlen, do_wakeup = 0; 1556 1557 MUTEX_ASSERT_LOCKED(&d->bd_mtx); 1558 if (d->bd_bif == NULL) 1559 return; 1560 1561 hdrlen = d->bd_bif->bif_hdrlen; 1562 1563 /* 1564 * Figure out how many bytes to move. If the packet is 1565 * greater or equal to the snapshot length, transfer that 1566 * much. Otherwise, transfer the whole packet (unless 1567 * we hit the buffer size limit). 1568 */ 1569 totlen = hdrlen + min(snaplen, pktlen); 1570 if (totlen > d->bd_bufsize) 1571 totlen = d->bd_bufsize; 1572 1573 /* 1574 * Round up the end of the previous packet to the next longword. 1575 */ 1576 curlen = BPF_WORDALIGN(d->bd_slen); 1577 if (curlen + totlen > d->bd_bufsize) { 1578 /* 1579 * This packet will overflow the storage buffer. 1580 * Rotate the buffers if we can, then wakeup any 1581 * pending reads. 1582 */ 1583 if (d->bd_fbuf == NULL) { 1584 /* 1585 * We haven't completed the previous read yet, 1586 * so drop the packet. 1587 */ 1588 ++d->bd_dcount; 1589 return; 1590 } 1591 1592 /* cancel pending wtime */ 1593 if (timeout_del(&d->bd_wait_tmo)) 1594 bpf_put(d); 1595 1596 ROTATE_BUFFERS(d); 1597 do_wakeup = 1; 1598 curlen = 0; 1599 } 1600 1601 /* 1602 * Append the bpf header. 1603 */ 1604 bh = (struct bpf_hdr *)(d->bd_sbuf + curlen); 1605 *bh = *tbh; 1606 bh->bh_datalen = pktlen; 1607 bh->bh_hdrlen = hdrlen; 1608 bh->bh_caplen = totlen - hdrlen; 1609 1610 /* 1611 * Copy the packet data into the store buffer and update its length. 1612 */ 1613 bpf_mcopy(pkt, (u_char *)bh + hdrlen, bh->bh_caplen); 1614 d->bd_slen = curlen + totlen; 1615 1616 switch (d->bd_wtout) { 1617 case 0: 1618 /* 1619 * Immediate mode is set. A packet arrived so any 1620 * reads should be woken up. 1621 */ 1622 if (d->bd_state == BPF_S_IDLE) 1623 d->bd_state = BPF_S_DONE; 1624 do_wakeup = 1; 1625 break; 1626 case INFSLP: 1627 break; 1628 default: 1629 if (d->bd_state == BPF_S_IDLE) { 1630 d->bd_state = BPF_S_WAIT; 1631 1632 bpf_get(d); 1633 if (!timeout_add_nsec(&d->bd_wait_tmo, d->bd_wtout)) 1634 bpf_put(d); 1635 } 1636 break; 1637 } 1638 1639 if (do_wakeup) 1640 bpf_wakeup(d); 1641} 1642 1643/* 1644 * Initialize all nonzero fields of a descriptor. 1645 */ 1646int 1647bpf_allocbufs(struct bpf_d *d) 1648{ 1649 MUTEX_ASSERT_LOCKED(&d->bd_mtx); 1650 1651 d->bd_fbuf = malloc(d->bd_bufsize, M_DEVBUF, M_NOWAIT); 1652 if (d->bd_fbuf == NULL) 1653 return (ENOMEM); 1654 1655 d->bd_sbuf = malloc(d->bd_bufsize, M_DEVBUF, M_NOWAIT); 1656 if (d->bd_sbuf == NULL) { 1657 free(d->bd_fbuf, M_DEVBUF, d->bd_bufsize); 1658 d->bd_fbuf = NULL; 1659 return (ENOMEM); 1660 } 1661 1662 d->bd_slen = 0; 1663 d->bd_hlen = 0; 1664 1665 return (0); 1666} 1667 1668void 1669bpf_prog_smr(void *bps_arg) 1670{ 1671 struct bpf_program_smr *bps = bps_arg; 1672 1673 free(bps->bps_bf.bf_insns, M_DEVBUF, 1674 bps->bps_bf.bf_len * sizeof(struct bpf_insn)); 1675 free(bps, M_DEVBUF, sizeof(struct bpf_program_smr)); 1676} 1677 1678void 1679bpf_d_smr(void *smr) 1680{ 1681 struct bpf_d *bd = smr; 1682 1683 sigio_free(&bd->bd_sigio); 1684 free(bd->bd_sbuf, M_DEVBUF, bd->bd_bufsize); 1685 free(bd->bd_hbuf, M_DEVBUF, bd->bd_bufsize); 1686 free(bd->bd_fbuf, M_DEVBUF, bd->bd_bufsize); 1687 1688 if (bd->bd_rfilter != NULL) 1689 bpf_prog_smr(bd->bd_rfilter); 1690 if (bd->bd_wfilter != NULL) 1691 bpf_prog_smr(bd->bd_wfilter); 1692 1693 klist_free(&bd->bd_klist); 1694 free(bd, M_DEVBUF, sizeof(*bd)); 1695} 1696 1697void 1698bpf_get(struct bpf_d *bd) 1699{ 1700 refcnt_take(&bd->bd_refcnt); 1701} 1702 1703/* 1704 * Free buffers currently in use by a descriptor 1705 * when the reference count drops to zero. 1706 */ 1707void 1708bpf_put(struct bpf_d *bd) 1709{ 1710 if (refcnt_rele(&bd->bd_refcnt) == 0) 1711 return; 1712 1713 smr_call(&bd->bd_smr, bpf_d_smr, bd); 1714} 1715 1716void * 1717bpfsattach(caddr_t *bpfp, const char *name, u_int dlt, u_int hdrlen) 1718{ 1719 struct bpf_if *bp; 1720 1721 if ((bp = malloc(sizeof(*bp), M_DEVBUF, M_NOWAIT)) == NULL) 1722 panic("bpfattach"); 1723 SMR_SLIST_INIT(&bp->bif_dlist); 1724 bp->bif_driverp = (struct bpf_if **)bpfp; 1725 bp->bif_name = name; 1726 bp->bif_ifp = NULL; 1727 bp->bif_dlt = dlt; 1728 1729 TAILQ_INSERT_TAIL(&bpf_iflist, bp, bif_next); 1730 1731 *bp->bif_driverp = NULL; 1732 1733 /* 1734 * Compute the length of the bpf header. This is not necessarily 1735 * equal to SIZEOF_BPF_HDR because we want to insert spacing such 1736 * that the network layer header begins on a longword boundary (for 1737 * performance reasons and to alleviate alignment restrictions). 1738 */ 1739 bp->bif_hdrlen = BPF_WORDALIGN(hdrlen + SIZEOF_BPF_HDR) - hdrlen; 1740 1741 return (bp); 1742} 1743 1744void * 1745bpfxattach(caddr_t *driverp, const char *name, struct ifnet *ifp, 1746 u_int dlt, u_int hdrlen) 1747{ 1748 struct bpf_if *bp; 1749 1750 bp = bpfsattach(driverp, name, dlt, hdrlen); 1751 bp->bif_ifp = ifp; 1752 1753 return (bp); 1754} 1755 1756void 1757bpfattach(caddr_t *driverp, struct ifnet *ifp, u_int dlt, u_int hdrlen) 1758{ 1759 bpfxattach(driverp, ifp->if_xname, ifp, dlt, hdrlen); 1760} 1761 1762/* Detach an interface from its attached bpf device. */ 1763void 1764bpfdetach(struct ifnet *ifp) 1765{ 1766 struct bpf_if *bp, *nbp; 1767 1768 KERNEL_ASSERT_LOCKED(); 1769 1770 TAILQ_FOREACH_SAFE(bp, &bpf_iflist, bif_next, nbp) { 1771 if (bp->bif_ifp == ifp) 1772 bpfsdetach(bp); 1773 } 1774 ifp->if_bpf = NULL; 1775} 1776 1777void 1778bpfsdetach(void *p) 1779{ 1780 struct bpf_if *bp = p; 1781 struct bpf_d *bd; 1782 int maj; 1783 1784 KERNEL_ASSERT_LOCKED(); 1785 1786 /* Locate the major number. */ 1787 for (maj = 0; maj < nchrdev; maj++) 1788 if (cdevsw[maj].d_open == bpfopen) 1789 break; 1790 1791 while ((bd = SMR_SLIST_FIRST_LOCKED(&bp->bif_dlist))) { 1792 bpf_get(bd); 1793 vdevgone(maj, bd->bd_unit, bd->bd_unit, VCHR); 1794 klist_invalidate(&bd->bd_klist); 1795 bpf_put(bd); 1796 } 1797 1798 TAILQ_REMOVE(&bpf_iflist, bp, bif_next); 1799 1800 free(bp, M_DEVBUF, sizeof(*bp)); 1801} 1802 1803#ifndef SMALL_KERNEL 1804int 1805bpf_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp, 1806 size_t newlen) 1807{ 1808 if (namelen != 1) 1809 return (ENOTDIR); 1810 1811 switch (name[0]) { 1812 case NET_BPF_BUFSIZE: 1813 return sysctl_int_bounded(oldp, oldlenp, newp, newlen, 1814 &bpf_bufsize, BPF_MINBUFSIZE, 1815 atomic_load_int(&bpf_maxbufsize)); 1816 case NET_BPF_MAXBUFSIZE: 1817 return sysctl_int_bounded(oldp, oldlenp, newp, newlen, 1818 &bpf_maxbufsize, BPF_MINBUFSIZE, MALLOC_MAX); 1819 default: 1820 return (EOPNOTSUPP); 1821 } 1822 1823 /* NOTREACHED */ 1824} 1825#endif /* SMALL_KERNEL */ 1826 1827struct bpf_d * 1828bpfilter_lookup(int unit) 1829{ 1830 struct bpf_d *bd; 1831 1832 KERNEL_ASSERT_LOCKED(); 1833 1834 LIST_FOREACH(bd, &bpf_d_list, bd_list) 1835 if (bd->bd_unit == unit) 1836 return (bd); 1837 return (NULL); 1838} 1839 1840/* 1841 * Get a list of available data link type of the interface. 1842 */ 1843int 1844bpf_getdltlist(struct bpf_d *d, struct bpf_dltlist *bfl) 1845{ 1846 int n, error; 1847 struct bpf_if *bp; 1848 const char *name; 1849 1850 name = d->bd_bif->bif_name; 1851 n = 0; 1852 error = 0; 1853 TAILQ_FOREACH(bp, &bpf_iflist, bif_next) { 1854 if (strcmp(name, bp->bif_name) != 0) 1855 continue; 1856 if (bfl->bfl_list != NULL) { 1857 if (n >= bfl->bfl_len) 1858 return (ENOMEM); 1859 error = copyout(&bp->bif_dlt, 1860 bfl->bfl_list + n, sizeof(u_int)); 1861 if (error) 1862 break; 1863 } 1864 n++; 1865 } 1866 1867 bfl->bfl_len = n; 1868 return (error); 1869} 1870 1871/* 1872 * Set the data link type of a BPF instance. 1873 */ 1874int 1875bpf_setdlt(struct bpf_d *d, u_int dlt) 1876{ 1877 const char *name; 1878 struct bpf_if *bp; 1879 1880 MUTEX_ASSERT_LOCKED(&d->bd_mtx); 1881 if (d->bd_bif->bif_dlt == dlt) 1882 return (0); 1883 name = d->bd_bif->bif_name; 1884 TAILQ_FOREACH(bp, &bpf_iflist, bif_next) { 1885 if (strcmp(name, bp->bif_name) != 0) 1886 continue; 1887 if (bp->bif_dlt == dlt) 1888 break; 1889 } 1890 if (bp == NULL) 1891 return (EINVAL); 1892 bpf_detachd(d); 1893 bpf_attachd(d, bp); 1894 bpf_resetd(d); 1895 return (0); 1896} 1897 1898u_int32_t bpf_mbuf_ldw(const void *, u_int32_t, int *); 1899u_int32_t bpf_mbuf_ldh(const void *, u_int32_t, int *); 1900u_int32_t bpf_mbuf_ldb(const void *, u_int32_t, int *); 1901 1902int bpf_mbuf_copy(const struct mbuf *, u_int32_t, 1903 void *, u_int32_t); 1904 1905const struct bpf_ops bpf_mbuf_ops = { 1906 bpf_mbuf_ldw, 1907 bpf_mbuf_ldh, 1908 bpf_mbuf_ldb, 1909}; 1910 1911int 1912bpf_mbuf_copy(const struct mbuf *m, u_int32_t off, void *buf, u_int32_t len) 1913{ 1914 u_int8_t *cp = buf; 1915 u_int32_t count; 1916 1917 while (off >= m->m_len) { 1918 off -= m->m_len; 1919 1920 m = m->m_next; 1921 if (m == NULL) 1922 return (-1); 1923 } 1924 1925 for (;;) { 1926 count = min(m->m_len - off, len); 1927 1928 memcpy(cp, m->m_data + off, count); 1929 len -= count; 1930 1931 if (len == 0) 1932 return (0); 1933 1934 m = m->m_next; 1935 if (m == NULL) 1936 break; 1937 1938 cp += count; 1939 off = 0; 1940 } 1941 1942 return (-1); 1943} 1944 1945u_int32_t 1946bpf_mbuf_ldw(const void *m0, u_int32_t k, int *err) 1947{ 1948 u_int32_t v; 1949 1950 if (bpf_mbuf_copy(m0, k, &v, sizeof(v)) != 0) { 1951 *err = 1; 1952 return (0); 1953 } 1954 1955 *err = 0; 1956 return ntohl(v); 1957} 1958 1959u_int32_t 1960bpf_mbuf_ldh(const void *m0, u_int32_t k, int *err) 1961{ 1962 u_int16_t v; 1963 1964 if (bpf_mbuf_copy(m0, k, &v, sizeof(v)) != 0) { 1965 *err = 1; 1966 return (0); 1967 } 1968 1969 *err = 0; 1970 return ntohs(v); 1971} 1972 1973u_int32_t 1974bpf_mbuf_ldb(const void *m0, u_int32_t k, int *err) 1975{ 1976 const struct mbuf *m = m0; 1977 u_int8_t v; 1978 1979 while (k >= m->m_len) { 1980 k -= m->m_len; 1981 1982 m = m->m_next; 1983 if (m == NULL) { 1984 *err = 1; 1985 return (0); 1986 } 1987 } 1988 v = m->m_data[k]; 1989 1990 *err = 0; 1991 return v; 1992} 1993 1994u_int 1995bpf_mfilter(const struct bpf_insn *pc, const struct mbuf *m, u_int wirelen) 1996{ 1997 return _bpf_filter(pc, &bpf_mbuf_ops, m, wirelen); 1998}