Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

x86, ptrace: new ptrace BTS API

Here's the new ptrace BTS API that supports two different overflow handling mechanisms (wrap-around and buffer-full-signal) to support two different use cases (debugging and profiling).

It further combines buffer allocation and configuration.

Opens:
- memory rlimit
- overflow signal

What would be the right signal to use?

Signed-off-by: Markus Metzger <markus.t.metzger@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

authored by

Markus Metzger and committed by
Ingo Molnar
a95d67f8 e4811f25

+243 -148
+51 -5
arch/x86/kernel/ds.c
··· 177 177 } 178 178 179 179 180 - int ds_allocate(void **dsp, size_t bts_size_in_records) 180 + int ds_allocate(void **dsp, size_t bts_size_in_bytes) 181 181 { 182 - size_t bts_size_in_bytes = 0; 183 - void *bts = 0; 184 - void *ds = 0; 182 + size_t bts_size_in_records; 183 + void *bts; 184 + void *ds; 185 185 186 186 if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts) 187 187 return -EOPNOTSUPP; 188 188 189 - if (bts_size_in_records < 0) 189 + if (bts_size_in_bytes < 0) 190 190 return -EINVAL; 191 191 192 + bts_size_in_records = 193 + bts_size_in_bytes / ds_cfg.sizeof_bts; 192 194 bts_size_in_bytes = 193 195 bts_size_in_records * ds_cfg.sizeof_bts; 194 196 ··· 235 233 if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts) 236 234 return -EOPNOTSUPP; 237 235 236 + if (!ds) 237 + return 0; 238 + 238 239 size_in_bytes = 239 240 get_bts_absolute_maximum(ds) - 240 241 get_bts_buffer_base(ds); 242 + return size_in_bytes; 243 + } 244 + 245 + int ds_get_bts_end(void *ds) 246 + { 247 + size_t size_in_bytes = ds_get_bts_size(ds); 248 + 249 + if (size_in_bytes <= 0) 250 + return size_in_bytes; 241 251 242 252 return size_in_bytes / ds_cfg.sizeof_bts; 243 253 } ··· 266 252 get_bts_buffer_base(ds); 267 253 268 254 return index_offset_in_bytes / ds_cfg.sizeof_bts; 255 + } 256 + 257 + int ds_set_overflow(void *ds, int method) 258 + { 259 + switch (method) { 260 + case DS_O_SIGNAL: 261 + return -EOPNOTSUPP; 262 + case DS_O_WRAP: 263 + return 0; 264 + default: 265 + return -EINVAL; 266 + } 267 + } 268 + 269 + int ds_get_overflow(void *ds) 270 + { 271 + return DS_O_WRAP; 272 + } 273 + 274 + int ds_clear(void *ds) 275 + { 276 + int bts_size = ds_get_bts_size(ds); 277 + void *bts_base; 278 + 279 + if (bts_size <= 0) 280 + return bts_size; 281 + 282 + bts_base = get_bts_buffer_base(ds); 283 + memset(bts_base, 0, bts_size); 284 + 285 + set_bts_index(ds, bts_base); 286 + return 0; 269 287 } 270 288 271 289 int ds_read_bts(void *ds, size_t index, struct bts_struct *out)
+138 -99
arch/x86/kernel/ptrace.c
··· 33 33 34 34 35 35 /* 36 - * The maximal size of a BTS buffer per traced task in number of BTS 37 - * records. 38 - */ 39 - #define PTRACE_BTS_BUFFER_MAX 4000 40 - 41 - /* 42 36 * does not yet catch signals sent when the child dies. 43 37 * in exit.c or in signal.c. 44 38 */ ··· 460 466 return 0; 461 467 } 462 468 463 - static int ptrace_bts_max_buffer_size(void) 464 - { 465 - return PTRACE_BTS_BUFFER_MAX; 466 - } 467 - 468 - static int ptrace_bts_get_buffer_size(struct task_struct *child) 469 + static int ptrace_bts_get_size(struct task_struct *child) 469 470 { 470 471 if (!child->thread.ds_area_msr) 471 472 return -ENXIO; 472 473 473 - return ds_get_bts_size((void *)child->thread.ds_area_msr); 474 + return ds_get_bts_index((void *)child->thread.ds_area_msr); 474 475 } 475 476 476 477 static int ptrace_bts_read_record(struct task_struct *child, ··· 474 485 { 475 486 struct bts_struct ret; 476 487 int retval; 477 - int bts_size; 488 + int bts_end; 478 489 int bts_index; 479 490 480 491 if (!child->thread.ds_area_msr) ··· 483 494 if (index < 0) 484 495 return -EINVAL; 485 496 486 - bts_size = ds_get_bts_size((void *)child->thread.ds_area_msr); 487 - if (bts_size <= index) 497 + bts_end = ds_get_bts_end((void *)child->thread.ds_area_msr); 498 + if (bts_end <= index) 488 499 return -EINVAL; 489 500 490 501 /* translate the ptrace bts index into the ds bts index */ 491 502 bts_index = ds_get_bts_index((void *)child->thread.ds_area_msr); 492 503 bts_index -= (index + 1); 493 504 if (bts_index < 0) 494 - bts_index += bts_size; 505 + bts_index += bts_end; 495 506 496 507 retval = ds_read_bts((void *)child->thread.ds_area_msr, 497 508 bts_index, &ret); ··· 519 530 return sizeof(*in); 520 531 } 521 532 522 - static int ptrace_bts_config(struct task_struct *child, 523 - unsigned long options) 533 + static int ptrace_bts_clear(struct task_struct *child) 524 534 { 525 - unsigned long debugctl_mask = ds_debugctl_mask(); 526 - int retval; 527 - 528 - retval = ptrace_bts_get_buffer_size(child); 529 - if (retval < 0) 530 - return retval; 531 - if (retval == 0) 535 + if (!child->thread.ds_area_msr) 532 536 return -ENXIO; 533 537 534 - if (options & PTRACE_BTS_O_TRACE_TASK) { 538 + return ds_clear((void *)child->thread.ds_area_msr); 539 + } 540 + 541 + static int ptrace_bts_drain(struct task_struct *child, 542 + struct bts_struct __user *out) 543 + { 544 + int end, i; 545 + void *ds = (void *)child->thread.ds_area_msr; 546 + 547 + if (!ds) 548 + return -ENXIO; 549 + 550 + end = ds_get_bts_index(ds); 551 + if (end <= 0) 552 + return end; 553 + 554 + for (i = 0; i < end; i++, out++) { 555 + struct bts_struct ret; 556 + int retval; 557 + 558 + retval = ds_read_bts(ds, i, &ret); 559 + if (retval < 0) 560 + return retval; 561 + 562 + if (copy_to_user(out, &ret, sizeof(ret))) 563 + return -EFAULT; 564 + } 565 + 566 + ds_clear(ds); 567 + 568 + return i; 569 + } 570 + 571 + static int ptrace_bts_config(struct task_struct *child, 572 + const struct ptrace_bts_config __user *ucfg) 573 + { 574 + struct ptrace_bts_config cfg; 575 + unsigned long debugctl_mask; 576 + int bts_size, ret; 577 + void *ds; 578 + 579 + if (copy_from_user(&cfg, ucfg, sizeof(cfg))) 580 + return -EFAULT; 581 + 582 + bts_size = 0; 583 + ds = (void *)child->thread.ds_area_msr; 584 + if (ds) { 585 + bts_size = ds_get_bts_size(ds); 586 + if (bts_size < 0) 587 + return bts_size; 588 + } 589 + 590 + if (bts_size != cfg.size) { 591 + ret = ds_free((void **)&child->thread.ds_area_msr); 592 + if (ret < 0) 593 + return ret; 594 + 595 + if (cfg.size > 0) 596 + ret = ds_allocate((void **)&child->thread.ds_area_msr, 597 + cfg.size); 598 + ds = (void *)child->thread.ds_area_msr; 599 + if (ds) 600 + set_tsk_thread_flag(child, TIF_DS_AREA_MSR); 601 + else 602 + clear_tsk_thread_flag(child, TIF_DS_AREA_MSR); 603 + 604 + if (ret < 0) 605 + return ret; 606 + 607 + bts_size = ds_get_bts_size(ds); 608 + if (bts_size <= 0) 609 + return bts_size; 610 + } 611 + 612 + if (ds) { 613 + if (cfg.flags & PTRACE_BTS_O_SIGNAL) { 614 + ret = ds_set_overflow(ds, DS_O_SIGNAL); 615 + } else { 616 + ret = ds_set_overflow(ds, DS_O_WRAP); 617 + } 618 + if (ret < 0) 619 + return ret; 620 + } 621 + 622 + debugctl_mask = ds_debugctl_mask(); 623 + if (ds && (cfg.flags & PTRACE_BTS_O_TRACE)) { 535 624 child->thread.debugctlmsr |= debugctl_mask; 536 625 set_tsk_thread_flag(child, TIF_DEBUGCTLMSR); 537 626 } else { ··· 622 555 clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR); 623 556 } 624 557 625 - if (options & PTRACE_BTS_O_TIMESTAMPS) 558 + if (ds && (cfg.flags & PTRACE_BTS_O_SCHED)) 626 559 set_tsk_thread_flag(child, TIF_BTS_TRACE_TS); 627 560 else 628 561 clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS); ··· 630 563 return 0; 631 564 } 632 565 633 - static int ptrace_bts_status(struct task_struct *child) 566 + static int ptrace_bts_status(struct task_struct *child, 567 + struct ptrace_bts_config __user *ucfg) 634 568 { 635 - unsigned long debugctl_mask = ds_debugctl_mask(); 636 - int retval, status = 0; 569 + void *ds = (void *)child->thread.ds_area_msr; 570 + struct ptrace_bts_config cfg; 637 571 638 - retval = ptrace_bts_get_buffer_size(child); 639 - if (retval < 0) 640 - return retval; 641 - if (retval == 0) 642 - return -ENXIO; 572 + memset(&cfg, 0, sizeof(cfg)); 643 573 644 - if (ptrace_bts_get_buffer_size(child) <= 0) 645 - return -ENXIO; 574 + if (ds) { 575 + cfg.size = ds_get_bts_size(ds); 646 576 647 - if (test_tsk_thread_flag(child, TIF_DEBUGCTLMSR) && 648 - child->thread.debugctlmsr & debugctl_mask) 649 - status |= PTRACE_BTS_O_TRACE_TASK; 650 - if (test_tsk_thread_flag(child, TIF_BTS_TRACE_TS)) 651 - status |= PTRACE_BTS_O_TIMESTAMPS; 577 + if (ds_get_overflow(ds) == DS_O_SIGNAL) 578 + cfg.flags |= PTRACE_BTS_O_SIGNAL; 652 579 653 - return status; 654 - } 580 + if (test_tsk_thread_flag(child, TIF_DEBUGCTLMSR) && 581 + child->thread.debugctlmsr & ds_debugctl_mask()) 582 + cfg.flags |= PTRACE_BTS_O_TRACE; 655 583 656 - static int ptrace_bts_allocate_bts(struct task_struct *child, 657 - int size_in_records) 658 - { 659 - int retval = 0; 660 - void *ds; 661 - 662 - if (size_in_records < 0) 663 - return -EINVAL; 664 - 665 - if (size_in_records > ptrace_bts_max_buffer_size()) 666 - return -EINVAL; 667 - 668 - if (size_in_records == 0) { 669 - ptrace_bts_config(child, /* options = */ 0); 670 - } else { 671 - retval = ds_allocate(&ds, size_in_records); 672 - if (retval) 673 - return retval; 584 + if (test_tsk_thread_flag(child, TIF_BTS_TRACE_TS)) 585 + cfg.flags |= PTRACE_BTS_O_SCHED; 674 586 } 675 587 676 - if (child->thread.ds_area_msr) 677 - ds_free((void **)&child->thread.ds_area_msr); 588 + if (copy_to_user(ucfg, &cfg, sizeof(cfg))) 589 + return -EFAULT; 678 590 679 - child->thread.ds_area_msr = (unsigned long)ds; 680 - if (child->thread.ds_area_msr) 681 - set_tsk_thread_flag(child, TIF_DS_AREA_MSR); 682 - else 683 - clear_tsk_thread_flag(child, TIF_DS_AREA_MSR); 684 - 685 - return retval; 591 + return sizeof(cfg); 686 592 } 687 593 688 594 void ptrace_bts_take_timestamp(struct task_struct *tsk, ··· 665 625 .qualifier = qualifier, 666 626 .variant.jiffies = jiffies 667 627 }; 668 - 669 - if (ptrace_bts_get_buffer_size(tsk) <= 0) 670 - return; 671 628 672 629 ptrace_bts_write_record(tsk, &rec); 673 630 } ··· 845 808 break; 846 809 #endif 847 810 848 - case PTRACE_BTS_MAX_BUFFER_SIZE: 849 - ret = ptrace_bts_max_buffer_size(); 850 - break; 851 - 852 - case PTRACE_BTS_ALLOCATE_BUFFER: 853 - ret = ptrace_bts_allocate_bts(child, data); 854 - break; 855 - 856 - case PTRACE_BTS_GET_BUFFER_SIZE: 857 - ret = ptrace_bts_get_buffer_size(child); 858 - break; 859 - 860 - case PTRACE_BTS_READ_RECORD: 861 - ret = ptrace_bts_read_record 862 - (child, data, 863 - (struct bts_struct __user *) addr); 864 - break; 865 - 866 811 case PTRACE_BTS_CONFIG: 867 - ret = ptrace_bts_config(child, data); 812 + ret = ptrace_bts_config 813 + (child, (struct ptrace_bts_config __user *)addr); 868 814 break; 869 815 870 816 case PTRACE_BTS_STATUS: 871 - ret = ptrace_bts_status(child); 817 + ret = ptrace_bts_status 818 + (child, (struct ptrace_bts_config __user *)addr); 819 + break; 820 + 821 + case PTRACE_BTS_SIZE: 822 + ret = ptrace_bts_get_size(child); 823 + break; 824 + 825 + case PTRACE_BTS_GET: 826 + ret = ptrace_bts_read_record 827 + (child, data, (struct bts_struct __user *) addr); 828 + break; 829 + 830 + case PTRACE_BTS_CLEAR: 831 + ret = ptrace_bts_clear(child); 832 + break; 833 + 834 + case PTRACE_BTS_DRAIN: 835 + ret = ptrace_bts_drain 836 + (child, (struct bts_struct __user *) addr); 872 837 break; 873 838 874 839 default: ··· 1056 1017 case PTRACE_SETOPTIONS: 1057 1018 case PTRACE_SET_THREAD_AREA: 1058 1019 case PTRACE_GET_THREAD_AREA: 1059 - case PTRACE_BTS_MAX_BUFFER_SIZE: 1060 - case PTRACE_BTS_ALLOCATE_BUFFER: 1061 - case PTRACE_BTS_GET_BUFFER_SIZE: 1062 - case PTRACE_BTS_READ_RECORD: 1063 1020 case PTRACE_BTS_CONFIG: 1064 1021 case PTRACE_BTS_STATUS: 1022 + case PTRACE_BTS_SIZE: 1023 + case PTRACE_BTS_GET: 1024 + case PTRACE_BTS_CLEAR: 1025 + case PTRACE_BTS_DRAIN: 1065 1026 return sys_ptrace(request, pid, addr, data); 1066 1027 1067 1028 default:
+7
include/asm-x86/ds.h
··· 52 52 } variant; 53 53 }; 54 54 55 + /* Overflow handling mechanisms */ 56 + #define DS_O_SIGNAL 1 /* send overflow signal */ 57 + #define DS_O_WRAP 2 /* wrap around */ 55 58 56 59 extern int ds_allocate(void **, size_t); 57 60 extern int ds_free(void **); 58 61 extern int ds_get_bts_size(void *); 62 + extern int ds_get_bts_end(void *); 59 63 extern int ds_get_bts_index(void *); 64 + extern int ds_set_overflow(void *, int); 65 + extern int ds_get_overflow(void *); 66 + extern int ds_clear(void *); 60 67 extern int ds_read_bts(void *, size_t, struct bts_struct *); 61 68 extern int ds_write_bts(void *, const struct bts_struct *); 62 69 extern unsigned long ds_debugctl_mask(void);
+46 -44
include/asm-x86/ptrace-abi.h
··· 80 80 81 81 #define PTRACE_SINGLEBLOCK 33 /* resume execution until next branch */ 82 82 83 - /* Return maximal BTS buffer size in number of records, 84 - if successuf; -1, otherwise. 85 - EOPNOTSUPP...processor does not support bts tracing */ 86 - #define PTRACE_BTS_MAX_BUFFER_SIZE 40 83 + /* configuration/status structure used in PTRACE_BTS_CONFIG and 84 + PTRACE_BTS_STATUS commands. 85 + */ 86 + struct ptrace_bts_config { 87 + /* requested or actual size of BTS buffer in bytes */ 88 + unsigned long size; 89 + /* bitmask of below flags */ 90 + unsigned long flags; 91 + }; 87 92 88 - /* Allocate new bts buffer (free old one, if exists) of size DATA bts records; 89 - parameter ADDR is ignored. 90 - Return 0, if successful; -1, otherwise. 91 - EOPNOTSUPP...processor does not support bts tracing 92 - EINVAL.......invalid size in records 93 - ENOMEM.......out of memory */ 94 - #define PTRACE_BTS_ALLOCATE_BUFFER 41 93 + #define PTRACE_BTS_O_TRACE 0x1 /* branch trace */ 94 + #define PTRACE_BTS_O_SCHED 0x2 /* scheduling events w/ jiffies */ 95 + #define PTRACE_BTS_O_SIGNAL 0x4 /* send SIG? on buffer overflow 96 + instead of wrapping around */ 97 + #define PTRACE_BTS_O_CUT_SIZE 0x8 /* cut requested size to max available 98 + instead of failing */ 95 99 96 - /* Return the size of the bts buffer in number of bts records, 97 - if successful; -1, otherwise. 98 - EOPNOTSUPP...processor does not support bts tracing 99 - ENXIO........no buffer allocated */ 100 - #define PTRACE_BTS_GET_BUFFER_SIZE 42 101 - 102 - /* Read the DATA'th bts record into a ptrace_bts_record buffer 103 - provided in ADDR. 104 - Records are ordered from newest to oldest. 105 - Return 0, if successful; -1, otherwise 106 - EOPNOTSUPP...processor does not support bts tracing 107 - ENXIO........no buffer allocated 108 - EINVAL.......invalid index */ 109 - #define PTRACE_BTS_READ_RECORD 43 110 - 111 - /* Configure last branch trace; the configuration is given as a bit-mask of 112 - PTRACE_BTS_O_* options in DATA; parameter ADDR is ignored. 113 - Return 0, if successful; -1, otherwise 114 - EOPNOTSUPP...processor does not support bts tracing 115 - ENXIO........no buffer allocated */ 116 - #define PTRACE_BTS_CONFIG 44 117 - 118 - /* Return the configuration as bit-mask of PTRACE_BTS_O_* options 119 - if successful; -1, otherwise. 120 - EOPNOTSUPP...processor does not support bts tracing 121 - ENXIO........no buffer allocated */ 122 - #define PTRACE_BTS_STATUS 45 123 - 124 - /* Trace configuration options */ 125 - /* Collect last branch trace */ 126 - #define PTRACE_BTS_O_TRACE_TASK 0x1 127 - /* Take timestamps when the task arrives and departs */ 128 - #define PTRACE_BTS_O_TIMESTAMPS 0x2 100 + #define PTRACE_BTS_CONFIG 40 101 + /* Configure branch trace recording. 102 + DATA is ignored, ADDR points to a struct ptrace_bts_config. 103 + A new buffer is allocated, iff the size changes. 104 + */ 105 + #define PTRACE_BTS_STATUS 41 106 + /* Return the current configuration. 107 + DATA is ignored, ADDR points to a struct ptrace_bts_config 108 + that will contain the result. 109 + */ 110 + #define PTRACE_BTS_SIZE 42 111 + /* Return the number of available BTS records. 112 + DATA and ADDR are ignored. 113 + */ 114 + #define PTRACE_BTS_GET 43 115 + /* Get a single BTS record. 116 + DATA defines the index into the BTS array, where 0 is the newest 117 + entry, and higher indices refer to older entries. 118 + ADDR is pointing to struct bts_struct (see asm/ds.h). 119 + */ 120 + #define PTRACE_BTS_CLEAR 44 121 + /* Clear the BTS buffer. 122 + DATA and ADDR are ignored. 123 + */ 124 + #define PTRACE_BTS_DRAIN 45 125 + /* Read all available BTS records and clear the buffer. 126 + DATA is ignored. ADDR points to an array of struct bts_struct of 127 + suitable size. 128 + BTS records are read from oldest to newest. 129 + Returns number of BTS records drained. 130 + */ 129 131 130 132 #endif
+1
include/asm-x86/ptrace.h
··· 9 9 10 10 #ifdef __KERNEL__ 11 11 12 + /* the DS BTS struct is used for ptrace as well */ 12 13 #include <asm/ds.h> 13 14 14 15 struct task_struct;