jcs's openbsd hax
openbsd
at jcs 950 lines 21 kB view raw
1/* $OpenBSD: dt_dev.c,v 1.47 2025/12/10 09:38:41 mpi Exp $ */ 2 3/* 4 * Copyright (c) 2019 Martin Pieuchot <mpi@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19#include <sys/types.h> 20#include <sys/systm.h> 21#include <sys/param.h> 22#include <sys/clockintr.h> 23#include <sys/device.h> 24#include <sys/exec_elf.h> 25#include <sys/malloc.h> 26#include <sys/proc.h> 27#include <sys/ptrace.h> 28#include <sys/vnode.h> 29#include <uvm/uvm.h> 30#include <uvm/uvm_map.h> 31#include <uvm/uvm_vnode.h> 32#include <sys/file.h> 33#include <sys/filedesc.h> 34#include <sys/fcntl.h> 35 36#include <machine/intr.h> 37 38#include <dev/dt/dtvar.h> 39 40/* 41 * Number of frames to skip in stack traces. 42 * 43 * The number of frames required to execute dt(4) profiling code 44 * depends on the probe, context, architecture and possibly the 45 * compiler. 46 * 47 * Static probes (tracepoints) are executed in the context of the 48 * current thread and only need to skip frames up to the recording 49 * function. For example the syscall provider: 50 * 51 * dt_prov_syscall_entry+0x141 52 * syscall+0x205 <--- start here 53 * Xsyscall+0x128 54 * 55 * Probes executed in their own context, like the profile provider, 56 * need to skip the frames of that context which are different for 57 * every architecture. For example the profile provider executed 58 * from hardclock(9) on amd64: 59 * 60 * dt_prov_profile_enter+0x6e 61 * hardclock+0x1a9 62 * lapic_clockintr+0x3f 63 * Xresume_lapic_ltimer+0x26 64 * acpicpu_idle+0x1d2 <---- start here. 65 * sched_idle+0x225 66 * proc_trampoline+0x1c 67 */ 68#if defined(__amd64__) 69#define DT_FA_PROFILE 5 70#define DT_FA_STATIC 2 71#elif defined(__i386__) 72#define DT_FA_PROFILE 5 73#define DT_FA_STATIC 2 74#elif defined(__macppc__) 75#define DT_FA_PROFILE 5 76#define DT_FA_STATIC 2 77#elif defined(__octeon__) 78#define DT_FA_PROFILE 6 79#define DT_FA_STATIC 2 80#elif defined(__powerpc64__) 81#define DT_FA_PROFILE 6 82#define DT_FA_STATIC 2 83#elif defined(__sparc64__) 84#define DT_FA_PROFILE 7 85#define DT_FA_STATIC 1 86#else 87#define DT_FA_STATIC 0 88#define DT_FA_PROFILE 0 89#endif 90 91#define DT_EVTRING_SIZE 16 /* # of slots in per PCB event ring */ 92 93#define DPRINTF(x...) /* nothing */ 94 95/* 96 * Locks used to protect struct members and variables in this file: 97 * a atomic 98 * I invariant after initialization 99 * K kernel lock 100 * D dtrace rw-lock dt_lock 101 * r owned by thread doing read(2) 102 * c owned by CPU 103 * s sliced ownership, based on read/write indexes 104 * p written by CPU, read by thread doing read(2) 105 */ 106 107/* 108 * Per-CPU Event States 109 */ 110struct dt_cpubuf { 111 unsigned int dc_prod; /* [r] read index */ 112 unsigned int dc_cons; /* [c] write index */ 113 struct dt_evt *dc_ring; /* [s] ring of event states */ 114 unsigned int dc_inevt; /* [c] in event already? */ 115 116 /* Counters */ 117 unsigned int dc_dropevt; /* [p] # of events dropped */ 118 unsigned int dc_skiptick; /* [p] # of ticks skipped */ 119 unsigned int dc_recurevt; /* [p] # of recursive events */ 120 unsigned int dc_readevt; /* [r] # of events read */ 121}; 122 123/* 124 * Descriptor associated with each program opening /dev/dt. It is used 125 * to keep track of enabled PCBs. 126 */ 127struct dt_softc { 128 SLIST_ENTRY(dt_softc) ds_next; /* [K] descriptor list */ 129 int ds_unit; /* [I] D_CLONE unique unit */ 130 pid_t ds_pid; /* [I] PID of tracing program */ 131 void *ds_si; /* [I] to defer wakeup(9) */ 132 133 struct dt_pcb_list ds_pcbs; /* [K] list of enabled PCBs */ 134 int ds_recording; /* [D] currently recording? */ 135 unsigned int ds_evtcnt; /* [a] # of readable evts */ 136 137 struct dt_cpubuf ds_cpu[MAXCPUS]; /* [I] Per-cpu event states */ 138 unsigned int ds_lastcpu; /* [r] last CPU ring read(2). */ 139}; 140 141SLIST_HEAD(, dt_softc) dtdev_list; /* [K] list of open /dev/dt nodes */ 142 143/* 144 * Probes are created during dt_attach() and never modified/freed during 145 * the lifetime of the system. That's why we consider them as [I]mmutable. 146 */ 147unsigned int dt_nprobes; /* [I] # of probes available */ 148SIMPLEQ_HEAD(, dt_probe) dt_probe_list; /* [I] list of probes */ 149 150struct rwlock dt_lock = RWLOCK_INITIALIZER("dtlk"); 151volatile uint32_t dt_tracing = 0; /* [D] # of processes tracing */ 152 153int allowdt; /* [a] */ 154 155void dtattach(struct device *, struct device *, void *); 156int dtopen(dev_t, int, int, struct proc *); 157int dtclose(dev_t, int, int, struct proc *); 158int dtread(dev_t, struct uio *, int); 159int dtioctl(dev_t, u_long, caddr_t, int, struct proc *); 160 161struct dt_softc *dtlookup(int); 162struct dt_softc *dtalloc(void); 163void dtfree(struct dt_softc *); 164 165int dt_ioctl_list_probes(struct dt_softc *, struct dtioc_probe *); 166int dt_ioctl_get_args(struct dt_softc *, struct dtioc_arg *); 167int dt_ioctl_get_stats(struct dt_softc *, struct dtioc_stat *); 168int dt_ioctl_record_start(struct dt_softc *); 169void dt_ioctl_record_stop(struct dt_softc *); 170int dt_ioctl_probe_enable(struct dt_softc *, struct dtioc_req *); 171int dt_ioctl_probe_disable(struct dt_softc *, struct dtioc_req *); 172int dt_ioctl_rd_vnode(struct dt_softc *, struct dtioc_rdvn *); 173 174int dt_ring_copy(struct dt_cpubuf *, struct uio *, size_t, size_t *); 175 176void dt_wakeup(struct dt_softc *); 177void dt_deferred_wakeup(void *); 178 179void 180dtattach(struct device *parent, struct device *self, void *aux) 181{ 182 SLIST_INIT(&dtdev_list); 183 SIMPLEQ_INIT(&dt_probe_list); 184 185 /* Init providers */ 186 dt_nprobes += dt_prov_profile_init(); 187 dt_nprobes += dt_prov_syscall_init(); 188 dt_nprobes += dt_prov_static_init(); 189#ifdef DDBPROF 190 dt_nprobes += dt_prov_kprobe_init(); 191#endif 192} 193 194int 195dtopen(dev_t dev, int flags, int mode, struct proc *p) 196{ 197 struct dt_softc *sc; 198 int unit = minor(dev); 199 200 if (atomic_load_int(&allowdt) == 0) 201 return EPERM; 202 203 sc = dtalloc(); 204 if (sc == NULL) 205 return ENOMEM; 206 207 /* no sleep after this point */ 208 if (dtlookup(unit) != NULL) { 209 dtfree(sc); 210 return EBUSY; 211 } 212 213 sc->ds_unit = unit; 214 sc->ds_pid = p->p_p->ps_pid; 215 TAILQ_INIT(&sc->ds_pcbs); 216 sc->ds_lastcpu = 0; 217 sc->ds_evtcnt = 0; 218 219 SLIST_INSERT_HEAD(&dtdev_list, sc, ds_next); 220 221 DPRINTF("dt%d: pid %d open\n", sc->ds_unit, sc->ds_pid); 222 223 return 0; 224} 225 226int 227dtclose(dev_t dev, int flags, int mode, struct proc *p) 228{ 229 struct dt_softc *sc; 230 int unit = minor(dev); 231 232 sc = dtlookup(unit); 233 KASSERT(sc != NULL); 234 235 DPRINTF("dt%d: pid %d close\n", sc->ds_unit, sc->ds_pid); 236 237 SLIST_REMOVE(&dtdev_list, sc, dt_softc, ds_next); 238 dt_ioctl_record_stop(sc); 239 dt_pcb_purge(&sc->ds_pcbs); 240 dtfree(sc); 241 242 return 0; 243} 244 245int 246dtread(dev_t dev, struct uio *uio, int flags) 247{ 248 struct dt_softc *sc; 249 struct dt_cpubuf *dc; 250 int i, error = 0, unit = minor(dev); 251 size_t count, max, read = 0; 252 253 sc = dtlookup(unit); 254 KASSERT(sc != NULL); 255 256 max = howmany(uio->uio_resid, sizeof(struct dt_evt)); 257 if (max < 1) 258 return (EMSGSIZE); 259 260 while (!atomic_load_int(&sc->ds_evtcnt)) { 261 sleep_setup(sc, PWAIT | PCATCH, "dtread"); 262 error = sleep_finish(INFSLP, !atomic_load_int(&sc->ds_evtcnt)); 263 if (error == EINTR || error == ERESTART) 264 break; 265 } 266 if (error) 267 return error; 268 269 KERNEL_ASSERT_LOCKED(); 270 for (i = 0; i < ncpusfound; i++) { 271 count = 0; 272 dc = &sc->ds_cpu[(sc->ds_lastcpu + i) % ncpusfound]; 273 error = dt_ring_copy(dc, uio, max, &count); 274 if (error && count == 0) 275 break; 276 277 read += count; 278 max -= count; 279 if (max == 0) 280 break; 281 } 282 sc->ds_lastcpu += i % ncpusfound; 283 284 atomic_sub_int(&sc->ds_evtcnt, read); 285 286 return error; 287} 288 289int 290dtioctl(dev_t dev, u_long cmd, caddr_t addr, int flag, struct proc *p) 291{ 292 struct dt_softc *sc; 293 int unit = minor(dev); 294 int on, error = 0; 295 296 sc = dtlookup(unit); 297 KASSERT(sc != NULL); 298 299 switch (cmd) { 300 case DTIOCGPLIST: 301 return dt_ioctl_list_probes(sc, (struct dtioc_probe *)addr); 302 case DTIOCGARGS: 303 return dt_ioctl_get_args(sc, (struct dtioc_arg *)addr); 304 case DTIOCGSTATS: 305 return dt_ioctl_get_stats(sc, (struct dtioc_stat *)addr); 306 case DTIOCRECORD: 307 case DTIOCPRBENABLE: 308 case DTIOCPRBDISABLE: 309 case DTIOCRDVNODE: 310 /* root only ioctl(2) */ 311 break; 312 default: 313 return ENOTTY; 314 } 315 316 if ((error = suser(p)) != 0) 317 return error; 318 319 switch (cmd) { 320 case DTIOCRECORD: 321 on = *(int *)addr; 322 if (on) 323 error = dt_ioctl_record_start(sc); 324 else 325 dt_ioctl_record_stop(sc); 326 break; 327 case DTIOCPRBENABLE: 328 error = dt_ioctl_probe_enable(sc, (struct dtioc_req *)addr); 329 break; 330 case DTIOCPRBDISABLE: 331 error = dt_ioctl_probe_disable(sc, (struct dtioc_req *)addr); 332 break; 333 case DTIOCRDVNODE: 334 error = dt_ioctl_rd_vnode(sc, (struct dtioc_rdvn *)addr); 335 break; 336 default: 337 KASSERT(0); 338 } 339 340 return error; 341} 342 343struct dt_softc * 344dtlookup(int unit) 345{ 346 struct dt_softc *sc; 347 348 KERNEL_ASSERT_LOCKED(); 349 350 SLIST_FOREACH(sc, &dtdev_list, ds_next) { 351 if (sc->ds_unit == unit) 352 break; 353 } 354 355 return sc; 356} 357 358struct dt_softc * 359dtalloc(void) 360{ 361 struct dt_softc *sc; 362 struct dt_evt *dtev; 363 int i; 364 365 sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_CANFAIL|M_ZERO); 366 if (sc == NULL) 367 return NULL; 368 369 for (i = 0; i < ncpusfound; i++) { 370 dtev = mallocarray(DT_EVTRING_SIZE, sizeof(*dtev), M_DEVBUF, 371 M_WAITOK|M_CANFAIL|M_ZERO); 372 if (dtev == NULL) 373 break; 374 sc->ds_cpu[i].dc_ring = dtev; 375 } 376 if (i < ncpusfound) { 377 dtfree(sc); 378 return NULL; 379 } 380 381 sc->ds_si = softintr_establish(IPL_SOFTCLOCK | IPL_MPSAFE, 382 dt_deferred_wakeup, sc); 383 if (sc->ds_si == NULL) { 384 dtfree(sc); 385 return NULL; 386 } 387 388 return sc; 389} 390 391void 392dtfree(struct dt_softc *sc) 393{ 394 struct dt_evt *dtev; 395 int i; 396 397 if (sc->ds_si != NULL) 398 softintr_disestablish(sc->ds_si); 399 400 for (i = 0; i < ncpusfound; i++) { 401 dtev = sc->ds_cpu[i].dc_ring; 402 free(dtev, M_DEVBUF, DT_EVTRING_SIZE * sizeof(*dtev)); 403 } 404 free(sc, M_DEVBUF, sizeof(*sc)); 405} 406 407int 408dt_ioctl_list_probes(struct dt_softc *sc, struct dtioc_probe *dtpr) 409{ 410 struct dtioc_probe_info info, *dtpi; 411 struct dt_probe *dtp; 412 size_t size; 413 int error = 0; 414 415 size = dtpr->dtpr_size; 416 dtpr->dtpr_size = dt_nprobes * sizeof(*dtpi); 417 if (size == 0) 418 return 0; 419 420 dtpi = dtpr->dtpr_probes; 421 SIMPLEQ_FOREACH(dtp, &dt_probe_list, dtp_next) { 422 if (size < sizeof(*dtpi)) { 423 error = ENOSPC; 424 break; 425 } 426 memset(&info, 0, sizeof(info)); 427 info.dtpi_pbn = dtp->dtp_pbn; 428 info.dtpi_nargs = dtp->dtp_nargs; 429 strlcpy(info.dtpi_prov, dtp->dtp_prov->dtpv_name, 430 sizeof(info.dtpi_prov)); 431 strlcpy(info.dtpi_func, dtp->dtp_func, sizeof(info.dtpi_func)); 432 strlcpy(info.dtpi_name, dtp->dtp_name, sizeof(info.dtpi_name)); 433 error = copyout(&info, dtpi, sizeof(*dtpi)); 434 if (error) 435 break; 436 size -= sizeof(*dtpi); 437 dtpi++; 438 } 439 440 return error; 441} 442 443int 444dt_ioctl_get_args(struct dt_softc *sc, struct dtioc_arg *dtar) 445{ 446 struct dtioc_arg_info info, *dtai; 447 struct dt_probe *dtp; 448 size_t size, n, t; 449 uint32_t pbn; 450 int error = 0; 451 452 pbn = dtar->dtar_pbn; 453 if (pbn == 0 || pbn > dt_nprobes) 454 return EINVAL; 455 456 SIMPLEQ_FOREACH(dtp, &dt_probe_list, dtp_next) { 457 if (pbn == dtp->dtp_pbn) 458 break; 459 } 460 if (dtp == NULL) 461 return EINVAL; 462 463 if (dtp->dtp_sysnum != 0) { 464 /* currently not supported for system calls */ 465 dtar->dtar_size = 0; 466 return 0; 467 } 468 469 size = dtar->dtar_size; 470 dtar->dtar_size = dtp->dtp_nargs * sizeof(*dtar); 471 if (size == 0) 472 return 0; 473 474 t = 0; 475 dtai = dtar->dtar_args; 476 for (n = 0; n < dtp->dtp_nargs; n++) { 477 if (size < sizeof(*dtai)) { 478 error = ENOSPC; 479 break; 480 } 481 if (n >= DTMAXARGTYPES || dtp->dtp_argtype[n] == NULL) 482 continue; 483 memset(&info, 0, sizeof(info)); 484 info.dtai_pbn = dtp->dtp_pbn; 485 info.dtai_argn = t++; 486 strlcpy(info.dtai_argtype, dtp->dtp_argtype[n], 487 sizeof(info.dtai_argtype)); 488 error = copyout(&info, dtai, sizeof(*dtai)); 489 if (error) 490 break; 491 size -= sizeof(*dtai); 492 dtai++; 493 } 494 dtar->dtar_size = t * sizeof(*dtar); 495 496 return error; 497} 498 499int 500dt_ioctl_get_stats(struct dt_softc *sc, struct dtioc_stat *dtst) 501{ 502 struct dt_cpubuf *dc; 503 uint64_t readevt, dropevt, skiptick, recurevt; 504 int i; 505 506 readevt = dropevt = skiptick = 0; 507 for (i = 0; i < ncpusfound; i++) { 508 dc = &sc->ds_cpu[i]; 509 510 membar_consumer(); 511 dropevt += dc->dc_dropevt; 512 skiptick = dc->dc_skiptick; 513 recurevt = dc->dc_recurevt; 514 readevt += dc->dc_readevt; 515 } 516 517 dtst->dtst_readevt = readevt; 518 dtst->dtst_dropevt = dropevt; 519 dtst->dtst_skiptick = skiptick; 520 dtst->dtst_recurevt = recurevt; 521 return 0; 522} 523 524int 525dt_ioctl_record_start(struct dt_softc *sc) 526{ 527 uint64_t now; 528 struct dt_pcb *dp; 529 int error = 0; 530 531 rw_enter_write(&dt_lock); 532 if (sc->ds_recording) { 533 error = EBUSY; 534 goto out; 535 } 536 537 KERNEL_ASSERT_LOCKED(); 538 if (TAILQ_EMPTY(&sc->ds_pcbs)) { 539 error = ENOENT; 540 goto out; 541 } 542 543 now = nsecuptime(); 544 TAILQ_FOREACH(dp, &sc->ds_pcbs, dp_snext) { 545 struct dt_probe *dtp = dp->dp_dtp; 546 547 SMR_SLIST_INSERT_HEAD_LOCKED(&dtp->dtp_pcbs, dp, dp_pnext); 548 dtp->dtp_recording++; 549 dtp->dtp_prov->dtpv_recording++; 550 551 if (dp->dp_nsecs != 0) { 552 clockintr_bind(&dp->dp_clockintr, dp->dp_cpu, dt_clock, 553 dp); 554 clockintr_schedule(&dp->dp_clockintr, 555 now + dp->dp_nsecs); 556 } 557 } 558 sc->ds_recording = 1; 559 dt_tracing++; 560 561 out: 562 rw_exit_write(&dt_lock); 563 return error; 564} 565 566void 567dt_ioctl_record_stop(struct dt_softc *sc) 568{ 569 struct dt_pcb *dp; 570 571 rw_enter_write(&dt_lock); 572 if (!sc->ds_recording) { 573 rw_exit_write(&dt_lock); 574 return; 575 } 576 577 DPRINTF("dt%d: pid %d disable\n", sc->ds_unit, sc->ds_pid); 578 579 dt_tracing--; 580 sc->ds_recording = 0; 581 TAILQ_FOREACH(dp, &sc->ds_pcbs, dp_snext) { 582 struct dt_probe *dtp = dp->dp_dtp; 583 584 /* 585 * Set an execution barrier to ensure the shared 586 * reference to dp is inactive. 587 */ 588 if (dp->dp_nsecs != 0) 589 clockintr_unbind(&dp->dp_clockintr, CL_BARRIER); 590 591 dtp->dtp_recording--; 592 dtp->dtp_prov->dtpv_recording--; 593 SMR_SLIST_REMOVE_LOCKED(&dtp->dtp_pcbs, dp, dt_pcb, dp_pnext); 594 } 595 rw_exit_write(&dt_lock); 596 597 /* Wait until readers cannot access the PCBs. */ 598 smr_barrier(); 599} 600 601int 602dt_ioctl_probe_enable(struct dt_softc *sc, struct dtioc_req *dtrq) 603{ 604 struct dt_pcb_list plist; 605 struct dt_probe *dtp; 606 struct dt_pcb *dp; 607 int error; 608 609 SIMPLEQ_FOREACH(dtp, &dt_probe_list, dtp_next) { 610 if (dtp->dtp_pbn == dtrq->dtrq_pbn) 611 break; 612 } 613 if (dtp == NULL) 614 return ENOENT; 615 616 /* Only allow one probe of each type. */ 617 TAILQ_FOREACH(dp, &sc->ds_pcbs, dp_snext) { 618 if (dp->dp_dtp->dtp_pbn == dtrq->dtrq_pbn) 619 return EEXIST; 620 } 621 622 TAILQ_INIT(&plist); 623 error = dtp->dtp_prov->dtpv_alloc(dtp, sc, &plist, dtrq); 624 if (error) 625 return error; 626 627 DPRINTF("dt%d: pid %d enable %u : %b\n", sc->ds_unit, sc->ds_pid, 628 dtrq->dtrq_pbn, (unsigned int)dtrq->dtrq_evtflags, DTEVT_FLAG_BITS); 629 630 /* Append all PCBs to this instance */ 631 TAILQ_CONCAT(&sc->ds_pcbs, &plist, dp_snext); 632 633 return 0; 634} 635 636int 637dt_ioctl_probe_disable(struct dt_softc *sc, struct dtioc_req *dtrq) 638{ 639 struct dt_probe *dtp; 640 int error; 641 642 SIMPLEQ_FOREACH(dtp, &dt_probe_list, dtp_next) { 643 if (dtp->dtp_pbn == dtrq->dtrq_pbn) 644 break; 645 } 646 if (dtp == NULL) 647 return ENOENT; 648 649 if (dtp->dtp_prov->dtpv_dealloc) { 650 error = dtp->dtp_prov->dtpv_dealloc(dtp, sc, dtrq); 651 if (error) 652 return error; 653 } 654 655 DPRINTF("dt%d: pid %d dealloc\n", sc->ds_unit, sc->ds_pid, 656 dtrq->dtrq_pbn); 657 658 return 0; 659} 660 661int 662dt_ioctl_rd_vnode(struct dt_softc *sc, struct dtioc_rdvn *dtrv) 663{ 664 struct process *ps; 665 struct proc *p = curproc; 666 boolean_t ok; 667 struct vm_map_entry *e; 668 int err = 0; 669 int fd; 670 struct uvm_vnode *uvn; 671 struct vnode *vn; 672 struct file *fp; 673 674 if ((ps = prfind(dtrv->dtrv_pid)) == NULL) 675 return ESRCH; 676 677 vm_map_lock_read(&ps->ps_vmspace->vm_map); 678 679 ok = uvm_map_lookup_entry(&ps->ps_vmspace->vm_map, 680 (vaddr_t)dtrv->dtrv_va, &e); 681 if (ok == 0 || (e->etype & UVM_ET_OBJ) == 0 || 682 (e->protection & PROT_EXEC) == 0 || 683 !UVM_OBJ_IS_VNODE(e->object.uvm_obj)) { 684 err = ENOENT; 685 vn = NULL; 686 DPRINTF("%s no mapping for %p\n", __func__, dtrv->dtrv_va); 687 } else { 688 uvn = (struct uvm_vnode *)e->object.uvm_obj; 689 vn = uvn->u_vnode; 690 vref(vn); 691 692 dtrv->dtrv_len = (size_t)uvn->u_size; 693 dtrv->dtrv_start = (caddr_t)e->start; 694 dtrv->dtrv_offset = (caddr_t)e->offset; 695 } 696 697 vm_map_unlock_read(&ps->ps_vmspace->vm_map); 698 699 if (vn != NULL) { 700 fdplock(p->p_fd); 701 err = falloc(p, &fp, &fd); 702 fdpunlock(p->p_fd); 703 if (err != 0) { 704 vrele(vn); 705 DPRINTF("%s fdopen failed (%d)\n", __func__, err); 706 return err; 707 } 708 err = VOP_OPEN(vn, O_RDONLY, p->p_p->ps_ucred, p); 709 if (err == 0) { 710 fp->f_flag = FREAD; 711 fp->f_type = DTYPE_VNODE; 712 fp->f_ops = &vnops; 713 fp->f_data = vn; 714 dtrv->dtrv_fd = fd; 715 fdplock(p->p_fd); 716 fdinsert(p->p_fd, fd, UF_EXCLOSE, fp); 717 fdpunlock(p->p_fd); 718 FRELE(fp, p); 719 } else { 720 DPRINTF("%s vopen() failed (%d)\n", __func__, 721 err); 722 vrele(vn); 723 fdplock(p->p_fd); 724 fdremove(p->p_fd, fd); 725 fdpunlock(p->p_fd); 726 FRELE(fp, p); 727 } 728 } 729 730 return err; 731} 732 733struct dt_probe * 734dt_dev_alloc_probe(const char *func, const char *name, struct dt_provider *dtpv) 735{ 736 struct dt_probe *dtp; 737 738 dtp = malloc(sizeof(*dtp), M_DT, M_NOWAIT|M_ZERO); 739 if (dtp == NULL) 740 return NULL; 741 742 SMR_SLIST_INIT(&dtp->dtp_pcbs); 743 dtp->dtp_prov = dtpv; 744 dtp->dtp_func = func; 745 dtp->dtp_name = name; 746 dtp->dtp_sysnum = -1; 747 dtp->dtp_ref = 0; 748 749 return dtp; 750} 751 752void 753dt_dev_register_probe(struct dt_probe *dtp) 754{ 755 static uint64_t probe_nb; 756 757 dtp->dtp_pbn = ++probe_nb; 758 SIMPLEQ_INSERT_TAIL(&dt_probe_list, dtp, dtp_next); 759} 760 761struct dt_pcb * 762dt_pcb_alloc(struct dt_probe *dtp, struct dt_softc *sc) 763{ 764 struct dt_pcb *dp; 765 766 dp = malloc(sizeof(*dp), M_DT, M_WAITOK|M_CANFAIL|M_ZERO); 767 if (dp == NULL) 768 return NULL; 769 770 dp->dp_sc = sc; 771 dp->dp_dtp = dtp; 772 return dp; 773} 774 775void 776dt_pcb_free(struct dt_pcb *dp) 777{ 778 free(dp, M_DT, sizeof(*dp)); 779} 780 781void 782dt_pcb_purge(struct dt_pcb_list *plist) 783{ 784 struct dt_pcb *dp; 785 786 while ((dp = TAILQ_FIRST(plist)) != NULL) { 787 TAILQ_REMOVE(plist, dp, dp_snext); 788 dt_pcb_free(dp); 789 } 790} 791 792void 793dt_pcb_ring_skiptick(struct dt_pcb *dp, unsigned int skip) 794{ 795 struct dt_cpubuf *dc = &dp->dp_sc->ds_cpu[cpu_number()]; 796 797 dc->dc_skiptick += skip; 798 membar_producer(); 799} 800 801/* 802 * Get a reference to the next free event state from the ring. 803 */ 804struct dt_evt * 805dt_pcb_ring_get(struct dt_pcb *dp, int profiling) 806{ 807 struct proc *p = curproc; 808 struct dt_evt *dtev; 809 int prod, cons, distance; 810 struct dt_cpubuf *dc = &dp->dp_sc->ds_cpu[cpu_number()]; 811 812 if (dc->dc_inevt == 1) { 813 dc->dc_recurevt++; 814 membar_producer(); 815 return NULL; 816 } 817 818 dc->dc_inevt = 1; 819 820 membar_consumer(); 821 prod = dc->dc_prod; 822 cons = dc->dc_cons; 823 distance = prod - cons; 824 if (distance == 1 || distance == (1 - DT_EVTRING_SIZE)) { 825 /* read(2) isn't finished */ 826 dc->dc_dropevt++; 827 membar_producer(); 828 829 dc->dc_inevt = 0; 830 return NULL; 831 } 832 833 /* 834 * Save states in next free event slot. 835 */ 836 dtev = &dc->dc_ring[cons]; 837 memset(dtev, 0, sizeof(*dtev)); 838 839 dtev->dtev_pbn = dp->dp_dtp->dtp_pbn; 840 dtev->dtev_cpu = cpu_number(); 841 dtev->dtev_pid = p->p_p->ps_pid; 842 dtev->dtev_tid = p->p_tid + THREAD_PID_OFFSET; 843 nanotime(&dtev->dtev_tsp); 844 845 if (ISSET(dp->dp_evtflags, DTEVT_EXECNAME)) 846 strlcpy(dtev->dtev_comm, p->p_p->ps_comm, sizeof(dtev->dtev_comm)); 847 848 if (ISSET(dp->dp_evtflags, DTEVT_KSTACK)) { 849 if (profiling) 850 stacktrace_save_at(&dtev->dtev_kstack, DT_FA_PROFILE); 851 else 852 stacktrace_save_at(&dtev->dtev_kstack, DT_FA_STATIC); 853 } 854 if (ISSET(dp->dp_evtflags, DTEVT_USTACK)) 855 stacktrace_save_utrace(&dtev->dtev_ustack); 856 857 return dtev; 858} 859 860void 861dt_pcb_ring_consume(struct dt_pcb *dp, struct dt_evt *dtev) 862{ 863 struct dt_cpubuf *dc = &dp->dp_sc->ds_cpu[cpu_number()]; 864 865 KASSERT(dtev == &dc->dc_ring[dc->dc_cons]); 866 867 dc->dc_cons = (dc->dc_cons + 1) % DT_EVTRING_SIZE; 868 membar_producer(); 869 870 atomic_inc_int(&dp->dp_sc->ds_evtcnt); 871 dc->dc_inevt = 0; 872 873 dt_wakeup(dp->dp_sc); 874} 875 876/* 877 * Copy at most `max' events from `dc', producing the same amount 878 * of free slots. 879 */ 880int 881dt_ring_copy(struct dt_cpubuf *dc, struct uio *uio, size_t max, size_t *rcvd) 882{ 883 size_t count, copied = 0; 884 unsigned int cons, prod; 885 int error = 0; 886 887 KASSERT(max > 0); 888 889 membar_consumer(); 890 cons = dc->dc_cons; 891 prod = dc->dc_prod; 892 893 if (cons < prod) 894 count = DT_EVTRING_SIZE - prod; 895 else 896 count = cons - prod; 897 898 if (count == 0) 899 return 0; 900 901 count = MIN(count, max); 902 error = uiomove(&dc->dc_ring[prod], count * sizeof(struct dt_evt), uio); 903 if (error) 904 return error; 905 copied += count; 906 907 /* Produce */ 908 prod = (prod + count) % DT_EVTRING_SIZE; 909 910 /* If the ring didn't wrap, stop here. */ 911 if (max == copied || prod != 0 || cons == 0) 912 goto out; 913 914 count = MIN(cons, (max - copied)); 915 error = uiomove(&dc->dc_ring[0], count * sizeof(struct dt_evt), uio); 916 if (error) 917 goto out; 918 919 copied += count; 920 prod += count; 921 922out: 923 dc->dc_readevt += copied; 924 dc->dc_prod = prod; 925 membar_producer(); 926 927 *rcvd = copied; 928 return error; 929} 930 931void 932dt_wakeup(struct dt_softc *sc) 933{ 934 /* 935 * It is not always safe or possible to call wakeup(9) and grab 936 * the SCHED_LOCK() from a given tracepoint. This is true for 937 * any tracepoint that might trigger inside the scheduler or at 938 * any IPL higher than IPL_SCHED. For this reason use a soft- 939 * interrupt to defer the wakeup. 940 */ 941 softintr_schedule(sc->ds_si); 942} 943 944void 945dt_deferred_wakeup(void *arg) 946{ 947 struct dt_softc *sc = arg; 948 949 wakeup(sc); 950}