Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

s390/cpum_sf: Add raw data sampling to support the diagnostic-sampling function

Also support the diagnostic-sampling function in addition to the basic-sampling
function. Diagnostic-sampling data entries contain hardware model specific
sampling data and additional programs are required to analyze the data.

To deliver diagnostic-sampling, as well, as basis-sampling data entries to user
space, introduce support for sampling "raw data". If this particular perf
sampling type (PERF_SAMPLE_RAW) is used, sampling data entries are copied
to user space. External programs can then analyze these data.

Signed-off-by: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>

authored by

Hendrik Brueckner and committed by
Martin Schwidefsky
7e75fc3f dd127b3b

+377 -96
+27 -13
arch/s390/include/asm/cpu_mf.h
··· 59 59 /* QUERY SAMPLING INFORMATION block */ 60 60 struct hws_qsi_info_block { /* Bit(s) */ 61 61 unsigned int b0_13:14; /* 0-13: zeros */ 62 - unsigned int as:1; /* 14: sampling authorisation control*/ 63 - unsigned int b15_21:7; /* 15-21: zeros */ 64 - unsigned int es:1; /* 22: sampling enable control */ 65 - unsigned int b23_29:7; /* 23-29: zeros */ 66 - unsigned int cs:1; /* 30: sampling activation control */ 67 - unsigned int:1; /* 31: reserved */ 68 - unsigned int bsdes:16; /* 4-5: size of basic sampling entry */ 62 + unsigned int as:1; /* 14: basic-sampling authorization */ 63 + unsigned int ad:1; /* 15: diag-sampling authorization */ 64 + unsigned int b16_21:6; /* 16-21: zeros */ 65 + unsigned int es:1; /* 22: basic-sampling enable control */ 66 + unsigned int ed:1; /* 23: diag-sampling enable control */ 67 + unsigned int b24_29:6; /* 24-29: zeros */ 68 + unsigned int cs:1; /* 30: basic-sampling activation control */ 69 + unsigned int cd:1; /* 31: diag-sampling activation control */ 70 + unsigned int bsdes:16; /* 4-5: size of basic sampling entry */ 69 71 unsigned int dsdes:16; /* 6-7: size of diagnostic sampling entry */ 70 72 unsigned long min_sampl_rate; /* 8-15: minimum sampling interval */ 71 73 unsigned long max_sampl_rate; /* 16-23: maximum sampling interval*/ ··· 84 82 unsigned int s:1; /* 0: maximum buffer indicator */ 85 83 unsigned int h:1; /* 1: part. level reserved for VM use*/ 86 84 unsigned long long b2_53:52;/* 2-53: zeros */ 87 - unsigned int es:1; /* 54: sampling enable control */ 88 - unsigned int b55_61:7; /* 55-61: - zeros */ 89 - unsigned int cs:1; /* 62: sampling activation control */ 90 - unsigned int b63:1; /* 63: zero */ 85 + unsigned int es:1; /* 54: basic-sampling enable control */ 86 + unsigned int ed:1; /* 55: diag-sampling enable control */ 87 + unsigned int b56_61:6; /* 56-61: - zeros */ 88 + unsigned int cs:1; /* 62: basic-sampling activation control */ 89 + unsigned int cd:1; /* 63: diag-sampling activation control */ 91 90 unsigned long interval; /* 8-15: sampling interval */ 92 91 unsigned long tear; /* 16-23: TEAR contents */ 93 92 unsigned long dear; /* 24-31: DEAR contents */ ··· 99 96 unsigned long rsvrd4; /* reserved */ 100 97 } __packed; 101 98 102 - 103 - struct hws_data_entry { 99 + struct hws_basic_entry { 104 100 unsigned int def:16; /* 0-15 Data Entry Format */ 105 101 unsigned int R:4; /* 16-19 reserved */ 106 102 unsigned int U:4; /* 20-23 Number of unique instruct. */ ··· 114 112 unsigned long long ia; /* Instruction Address */ 115 113 unsigned long long gpp; /* Guest Program Parameter */ 116 114 unsigned long long hpp; /* Host Program Parameter */ 115 + } __packed; 116 + 117 + struct hws_diag_entry { 118 + unsigned int def:16; /* 0-15 Data Entry Format */ 119 + unsigned int R:14; /* 16-19 and 20-30 reserved */ 120 + unsigned int I:1; /* 31 entry valid or invalid */ 121 + u8 data[]; /* Machine-dependent sample data */ 122 + } __packed; 123 + 124 + struct hws_combined_entry { 125 + struct hws_basic_entry basic; /* Basic-sampling data entry */ 126 + struct hws_diag_entry diag; /* Diagnostic-sampling data entry */ 117 127 } __packed; 118 128 119 129 struct hws_trailer_entry {
+26 -2
arch/s390/include/asm/perf_event.h
··· 52 52 #define PERF_CPUM_CF_MAX_CTR 256 53 53 54 54 /* Perf PMU definitions for the sampling facility */ 55 - #define PERF_CPUM_SF_MAX_CTR 1 56 - #define PERF_EVENT_CPUM_SF 0xB0000UL /* Raw event ID */ 55 + #define PERF_CPUM_SF_MAX_CTR 2 56 + #define PERF_EVENT_CPUM_SF 0xB0000UL /* Event: Basic-sampling */ 57 + #define PERF_EVENT_CPUM_SF_DIAG 0xBD000UL /* Event: Combined-sampling */ 58 + #define PERF_CPUM_SF_BASIC_MODE 0x0001 /* Basic-sampling flag */ 59 + #define PERF_CPUM_SF_DIAG_MODE 0x0002 /* Diagnostic-sampling flag */ 60 + #define PERF_CPUM_SF_MODE_MASK (PERF_CPUM_SF_BASIC_MODE| \ 61 + PERF_CPUM_SF_DIAG_MODE) 57 62 58 63 #define REG_NONE 0 59 64 #define REG_OVERFLOW 1 60 65 #define OVERFLOW_REG(hwc) ((hwc)->extra_reg.config) 61 66 #define SFB_ALLOC_REG(hwc) ((hwc)->extra_reg.alloc) 67 + #define RAWSAMPLE_REG(hwc) ((hwc)->config) 62 68 #define TEAR_REG(hwc) ((hwc)->last_tag) 63 69 #define SAMPL_RATE(hwc) ((hwc)->event_base) 70 + #define SAMPL_FLAGS(hwc) ((hwc)->config_base) 71 + #define SAMPL_DIAG_MODE(hwc) (SAMPL_FLAGS(hwc) & PERF_CPUM_SF_DIAG_MODE) 72 + 73 + /* Structure for sampling data entries to be passed as perf raw sample data 74 + * to user space. Note that raw sample data must be aligned and, thus, might 75 + * be padded with zeros. 76 + */ 77 + struct sf_raw_sample { 78 + #define SF_RAW_SAMPLE_BASIC PERF_CPUM_SF_BASIC_MODE 79 + #define SF_RAW_SAMPLE_DIAG PERF_CPUM_SF_DIAG_MODE 80 + u64 format; 81 + u32 size; /* Size of sf_raw_sample */ 82 + u16 bsdes; /* Basic-sampling data entry size */ 83 + u16 dsdes; /* Diagnostic-sampling data entry size */ 84 + struct hws_basic_entry basic; /* Basic-sampling data entry */ 85 + struct hws_diag_entry diag; /* Diagnostic-sampling data entry */ 86 + u8 padding[]; /* Padding to next multiple of 8 */ 87 + } __packed; 64 88 65 89 /* Perf hardware reserve and release functions */ 66 90 int perf_reserve_sampling(void);
+309 -71
arch/s390/kernel/perf_cpum_sf.c
··· 17 17 #include <linux/percpu.h> 18 18 #include <linux/notifier.h> 19 19 #include <linux/export.h> 20 + #include <linux/slab.h> 20 21 #include <linux/mm.h> 21 22 #include <linux/moduleparam.h> 22 23 #include <asm/cpu_mf.h> ··· 32 31 #define CPUM_SF_MIN_SDBT 1 33 32 34 33 /* Number of sample-data-blocks per sample-data-block-table (SDBT): 35 - * The table contains SDB origin (8 bytes) and one SDBT origin that 36 - * points to the next table. 34 + * A table contains SDB pointers (8 bytes) and one table-link entry 35 + * that points to the origin of the next SDBT. 37 36 */ 38 37 #define CPUM_SF_SDB_PER_TABLE ((PAGE_SIZE - 8) / 8) 39 38 ··· 49 48 50 49 /* Minimum and maximum sampling buffer sizes: 51 50 * 52 - * This number represents the maximum size of the sampling buffer 53 - * taking the number of sample-data-block-tables into account. 51 + * This number represents the maximum size of the sampling buffer taking 52 + * the number of sample-data-block-tables into account. Note that these 53 + * numbers apply to the basic-sampling function only. 54 + * The maximum number of SDBs is increased by CPUM_SF_SDB_DIAG_FACTOR if 55 + * the diagnostic-sampling function is active. 54 56 * 55 57 * Sampling buffer size Buffer characteristics 56 58 * --------------------------------------------------- ··· 67 63 */ 68 64 static unsigned long __read_mostly CPUM_SF_MIN_SDB = 15; 69 65 static unsigned long __read_mostly CPUM_SF_MAX_SDB = 8176; 66 + static unsigned long __read_mostly CPUM_SF_SDB_DIAG_FACTOR = 1; 70 67 71 68 struct sf_buffer { 72 69 unsigned long *sdbt; /* Sample-data-block-table origin */ ··· 295 290 296 291 static void sfb_set_limits(unsigned long min, unsigned long max) 297 292 { 293 + struct hws_qsi_info_block si; 294 + 298 295 CPUM_SF_MIN_SDB = min; 299 296 CPUM_SF_MAX_SDB = max; 297 + 298 + memset(&si, 0, sizeof(si)); 299 + if (!qsi(&si)) 300 + CPUM_SF_SDB_DIAG_FACTOR = DIV_ROUND_UP(si.dsdes, si.bsdes); 301 + } 302 + 303 + static unsigned long sfb_max_limit(struct hw_perf_event *hwc) 304 + { 305 + return SAMPL_DIAG_MODE(hwc) ? CPUM_SF_MAX_SDB * CPUM_SF_SDB_DIAG_FACTOR 306 + : CPUM_SF_MAX_SDB; 300 307 } 301 308 302 309 static unsigned long sfb_pending_allocs(struct sf_buffer *sfb, ··· 329 312 330 313 static void sfb_account_allocs(unsigned long num, struct hw_perf_event *hwc) 331 314 { 332 - /* Limit the number SDBs to not exceed the maximum */ 333 - num = min_t(unsigned long, num, CPUM_SF_MAX_SDB - SFB_ALLOC_REG(hwc)); 315 + /* Limit the number of SDBs to not exceed the maximum */ 316 + num = min_t(unsigned long, num, sfb_max_limit(hwc) - SFB_ALLOC_REG(hwc)); 334 317 if (num) 335 318 SFB_ALLOC_REG(hwc) += num; 336 319 } ··· 341 324 sfb_account_allocs(num, hwc); 342 325 } 343 326 344 - static int allocate_sdbt(struct cpu_hw_sf *cpuhw, struct hw_perf_event *hwc) 327 + static size_t event_sample_size(struct hw_perf_event *hwc) 345 328 { 346 - unsigned long n_sdb, freq; 347 - unsigned long factor; 329 + struct sf_raw_sample *sfr = (struct sf_raw_sample *) RAWSAMPLE_REG(hwc); 330 + size_t sample_size; 331 + 332 + /* The sample size depends on the sampling function: The basic-sampling 333 + * function must be always enabled, diagnostic-sampling function is 334 + * optional. 335 + */ 336 + sample_size = sfr->bsdes; 337 + if (SAMPL_DIAG_MODE(hwc)) 338 + sample_size += sfr->dsdes; 339 + 340 + return sample_size; 341 + } 342 + 343 + static void deallocate_buffers(struct cpu_hw_sf *cpuhw) 344 + { 345 + if (cpuhw->sfb.sdbt) 346 + free_sampling_buffer(&cpuhw->sfb); 347 + } 348 + 349 + static int allocate_buffers(struct cpu_hw_sf *cpuhw, struct hw_perf_event *hwc) 350 + { 351 + unsigned long n_sdb, freq, factor; 352 + size_t sfr_size, sample_size; 353 + struct sf_raw_sample *sfr; 354 + 355 + /* Allocate raw sample buffer 356 + * 357 + * The raw sample buffer is used to temporarily store sampling data 358 + * entries for perf raw sample processing. The buffer size mainly 359 + * depends on the size of diagnostic-sampling data entries which is 360 + * machine-specific. The exact size calculation includes: 361 + * 1. The first 4 bytes of diagnostic-sampling data entries are 362 + * already reflected in the sf_raw_sample structure. Subtract 363 + * these bytes. 364 + * 2. The perf raw sample data must be 8-byte aligned (u64) and 365 + * perf's internal data size must be considered too. So add 366 + * an additional u32 for correct alignment and subtract before 367 + * allocating the buffer. 368 + * 3. Store the raw sample buffer pointer in the perf event 369 + * hardware structure. 370 + */ 371 + sfr_size = ALIGN((sizeof(*sfr) - sizeof(sfr->diag) + cpuhw->qsi.dsdes) + 372 + sizeof(u32), sizeof(u64)); 373 + sfr_size -= sizeof(u32); 374 + sfr = kzalloc(sfr_size, GFP_KERNEL); 375 + if (!sfr) 376 + return -ENOMEM; 377 + sfr->size = sfr_size; 378 + sfr->bsdes = cpuhw->qsi.bsdes; 379 + sfr->dsdes = cpuhw->qsi.dsdes; 380 + RAWSAMPLE_REG(hwc) = (unsigned long) sfr; 348 381 349 382 /* Calculate sampling buffers using 4K pages 350 383 * 351 - * 1. Use frequency as input. The samping buffer is designed for 352 - * a complete second. This can be adjusted through the "factor" 353 - * variable. 384 + * 1. Determine the sample data size which depends on the used 385 + * sampling functions, for example, basic-sampling or 386 + * basic-sampling with diagnostic-sampling. 387 + * 388 + * 2. Use the sampling frequency as input. The sampling buffer is 389 + * designed for almost one second. This can be adjusted through 390 + * the "factor" variable. 354 391 * In any case, alloc_sampling_buffer() sets the Alert Request 355 - * Control indicator to trigger measurement-alert to harvest 392 + * Control indicator to trigger a measurement-alert to harvest 356 393 * sample-data-blocks (sdb). 357 394 * 358 - * 2. Compute the number of sample-data-blocks and ensure a minimum 395 + * 3. Compute the number of sample-data-blocks and ensure a minimum 359 396 * of CPUM_SF_MIN_SDB. Also ensure the upper limit does not 360 - * exceed CPUM_SF_MAX_SDB. See also the remarks for these 361 - * symbolic constants. 397 + * exceed a "calculated" maximum. The symbolic maximum is 398 + * designed for basic-sampling only and needs to be increased if 399 + * diagnostic-sampling is active. 400 + * See also the remarks for these symbolic constants. 362 401 * 363 - * 3. Compute number of pages used for the sample-data-block-table 364 - * and ensure a minimum of CPUM_SF_MIN_SDBT (at minimum one table 365 - * to manage up to 511 sample-data-blocks). 402 + * 4. Compute the number of sample-data-block-tables (SDBT) and 403 + * ensure a minimum of CPUM_SF_MIN_SDBT (one table can manage up 404 + * to 511 SDBs). 366 405 */ 406 + sample_size = event_sample_size(hwc); 367 407 freq = sample_rate_to_freq(&cpuhw->qsi, SAMPL_RATE(hwc)); 368 408 factor = 1; 369 - n_sdb = DIV_ROUND_UP(freq, factor * ((PAGE_SIZE-64) / cpuhw->qsi.bsdes)); 409 + n_sdb = DIV_ROUND_UP(freq, factor * ((PAGE_SIZE-64) / sample_size)); 370 410 if (n_sdb < CPUM_SF_MIN_SDB) 371 411 n_sdb = CPUM_SF_MIN_SDB; 372 412 ··· 440 366 return 0; 441 367 442 368 debug_sprintf_event(sfdbg, 3, 443 - "allocate_sdbt: rate=%lu f=%lu sdb=%lu/%lu cpuhw=%p\n", 444 - SAMPL_RATE(hwc), freq, n_sdb, CPUM_SF_MAX_SDB, cpuhw); 369 + "allocate_buffers: rate=%lu f=%lu sdb=%lu/%lu" 370 + " sample_size=%lu cpuhw=%p\n", 371 + SAMPL_RATE(hwc), freq, n_sdb, sfb_max_limit(hwc), 372 + sample_size, cpuhw); 445 373 446 374 return alloc_sampling_buffer(&cpuhw->sfb, 447 375 sfb_pending_allocs(&cpuhw->sfb, hwc)); ··· 585 509 if (err) { 586 510 pr_err("Switching off the sampling facility failed " 587 511 "with rc=%i\n", err); 588 - } else { 589 - if (cpusf->sfb.sdbt) 590 - free_sampling_buffer(&cpusf->sfb); 591 - } 512 + } else 513 + deallocate_buffers(cpusf); 592 514 debug_sprintf_event(sfdbg, 5, 593 515 "setup_pmc_cpu: released: cpuhw=%p\n", cpusf); 594 516 break; ··· 624 550 625 551 static void hw_perf_event_destroy(struct perf_event *event) 626 552 { 553 + /* Free raw sample buffer */ 554 + if (RAWSAMPLE_REG(&event->hw)) 555 + kfree((void *) RAWSAMPLE_REG(&event->hw)); 556 + 627 557 /* Release PMC if this is the last perf event */ 628 558 if (!atomic_add_unless(&num_events, -1, 1)) { 629 559 mutex_lock(&pmc_reserve_mutex); ··· 647 569 static void hw_reset_registers(struct hw_perf_event *hwc, 648 570 unsigned long *sdbt_origin) 649 571 { 572 + struct sf_raw_sample *sfr; 573 + 650 574 /* (Re)set to first sample-data-block-table */ 651 575 TEAR_REG(hwc) = (unsigned long) sdbt_origin; 576 + 577 + /* (Re)set raw sampling buffer register */ 578 + sfr = (struct sf_raw_sample *) RAWSAMPLE_REG(hwc); 579 + memset(&sfr->basic, 0, sizeof(sfr->basic)); 580 + memset(&sfr->diag, 0, sfr->dsdes); 652 581 } 653 582 654 583 static unsigned long hw_limit_rate(const struct hws_qsi_info_block *si, ··· 719 634 goto out; 720 635 } 721 636 637 + /* Always enable basic sampling */ 638 + SAMPL_FLAGS(hwc) = PERF_CPUM_SF_BASIC_MODE; 639 + 640 + /* Check if diagnostic sampling is requested. Deny if the required 641 + * sampling authorization is missing. 642 + */ 643 + if (attr->config == PERF_EVENT_CPUM_SF_DIAG) { 644 + if (!si.ad) { 645 + err = -EPERM; 646 + goto out; 647 + } 648 + SAMPL_FLAGS(hwc) |= PERF_CPUM_SF_DIAG_MODE; 649 + } 650 + 722 651 /* The sampling information (si) contains information about the 723 652 * min/max sampling intervals and the CPU speed. So calculate the 724 653 * correct sampling interval and avoid the whole period adjust ··· 778 679 */ 779 680 if (cpuhw) 780 681 /* Event is pinned to a particular CPU */ 781 - err = allocate_sdbt(cpuhw, hwc); 682 + err = allocate_buffers(cpuhw, hwc); 782 683 else { 783 684 /* Event is not pinned, allocate sampling buffer on 784 685 * each online CPU 785 686 */ 786 687 for_each_online_cpu(cpu) { 787 688 cpuhw = &per_cpu(cpu_hw_sf, cpu); 788 - err = allocate_sdbt(cpuhw, hwc); 689 + err = allocate_buffers(cpuhw, hwc); 789 690 if (err) 790 691 break; 791 692 } ··· 804 705 805 706 switch (event->attr.type) { 806 707 case PERF_TYPE_RAW: 807 - if (event->attr.config != PERF_EVENT_CPUM_SF) 708 + if ((event->attr.config != PERF_EVENT_CPUM_SF) && 709 + (event->attr.config != PERF_EVENT_CPUM_SF_DIAG)) 808 710 return -ENOENT; 809 711 break; 810 712 case PERF_TYPE_HARDWARE: ··· 886 786 return; 887 787 } 888 788 889 - debug_sprintf_event(sfdbg, 6, "pmu_enable: es=%i cs=%i tear=%p dear=%p\n", 890 - cpuhw->lsctl.es, cpuhw->lsctl.cs, 789 + debug_sprintf_event(sfdbg, 6, "pmu_enable: es=%i cs=%i ed=%i cd=%i " 790 + "tear=%p dear=%p\n", cpuhw->lsctl.es, cpuhw->lsctl.cs, 791 + cpuhw->lsctl.ed, cpuhw->lsctl.cd, 891 792 (void *) cpuhw->lsctl.tear, (void *) cpuhw->lsctl.dear); 892 793 } 893 794 ··· 908 807 /* Switch off sampling activation control */ 909 808 inactive = cpuhw->lsctl; 910 809 inactive.cs = 0; 810 + inactive.cd = 0; 911 811 912 812 err = lsctl(&inactive); 913 813 if (err) { ··· 969 867 * 970 868 * Return non-zero if an event overflow occurred. 971 869 */ 972 - static int perf_push_sample(struct perf_event *event, 973 - struct hws_data_entry *sample) 870 + static int perf_push_sample(struct perf_event *event, struct sf_raw_sample *sfr) 974 871 { 975 872 int overflow; 976 873 struct pt_regs regs; 977 874 struct perf_sf_sde_regs *sde_regs; 978 875 struct perf_sample_data data; 876 + struct perf_raw_record raw; 979 877 980 - /* Skip samples that are invalid or for which the instruction address 981 - * is not predictable. For the latter, the wait-state bit is set. 982 - */ 983 - if (sample->I || sample->W) 984 - return 0; 985 - 878 + /* Setup perf sample */ 986 879 perf_sample_data_init(&data, 0, event->hw.last_period); 880 + raw.size = sfr->size; 881 + raw.data = sfr; 882 + data.raw = &raw; 987 883 988 884 /* Setup pt_regs to look like an CPU-measurement external interrupt 989 885 * using the Program Request Alert code. The regs.int_parm_long ··· 993 893 regs.int_parm = CPU_MF_INT_SF_PRA; 994 894 sde_regs = (struct perf_sf_sde_regs *) &regs.int_parm_long; 995 895 996 - regs.psw.addr = sample->ia; 997 - if (sample->T) 896 + regs.psw.addr = sfr->basic.ia; 897 + if (sfr->basic.T) 998 898 regs.psw.mask |= PSW_MASK_DAT; 999 - if (sample->W) 899 + if (sfr->basic.W) 1000 900 regs.psw.mask |= PSW_MASK_WAIT; 1001 - if (sample->P) 901 + if (sfr->basic.P) 1002 902 regs.psw.mask |= PSW_MASK_PSTATE; 1003 - switch (sample->AS) { 903 + switch (sfr->basic.AS) { 1004 904 case 0x0: 1005 905 regs.psw.mask |= PSW_ASC_PRIMARY; 1006 906 break; ··· 1022 922 * purposes too. 1023 923 * For now, simply use a non-zero value as guest indicator. 1024 924 */ 1025 - if (sample->hpp) 925 + if (sfr->basic.hpp) 1026 926 sde_regs->in_guest = 1; 1027 927 1028 928 overflow = 0; ··· 1042 942 local64_add(count, &event->count); 1043 943 } 1044 944 945 + static int sample_format_is_valid(struct hws_combined_entry *sample, 946 + unsigned int flags) 947 + { 948 + if (likely(flags & PERF_CPUM_SF_BASIC_MODE)) 949 + /* Only basic-sampling data entries with data-entry-format 950 + * version of 0x0001 can be processed. 951 + */ 952 + if (sample->basic.def != 0x0001) 953 + return 0; 954 + if (flags & PERF_CPUM_SF_DIAG_MODE) 955 + /* The data-entry-format number of diagnostic-sampling data 956 + * entries can vary. Because diagnostic data is just passed 957 + * through, do only a sanity check on the DEF. 958 + */ 959 + if (sample->diag.def < 0x8001) 960 + return 0; 961 + return 1; 962 + } 963 + 964 + static int sample_is_consistent(struct hws_combined_entry *sample, 965 + unsigned long flags) 966 + { 967 + /* This check applies only to basic-sampling data entries of potentially 968 + * combined-sampling data entries. Invalid entries cannot be processed 969 + * by the PMU and, thus, do not deliver an associated 970 + * diagnostic-sampling data entry. 971 + */ 972 + if (unlikely(!(flags & PERF_CPUM_SF_BASIC_MODE))) 973 + return 0; 974 + /* 975 + * Samples are skipped, if they are invalid or for which the 976 + * instruction address is not predictable, i.e., the wait-state bit is 977 + * set. 978 + */ 979 + if (sample->basic.I || sample->basic.W) 980 + return 0; 981 + return 1; 982 + } 983 + 984 + static void reset_sample_slot(struct hws_combined_entry *sample, 985 + unsigned long flags) 986 + { 987 + if (likely(flags & PERF_CPUM_SF_BASIC_MODE)) 988 + sample->basic.def = 0; 989 + if (flags & PERF_CPUM_SF_DIAG_MODE) 990 + sample->diag.def = 0; 991 + } 992 + 993 + static void sfr_store_sample(struct sf_raw_sample *sfr, 994 + struct hws_combined_entry *sample) 995 + { 996 + if (likely(sfr->format & PERF_CPUM_SF_BASIC_MODE)) 997 + sfr->basic = sample->basic; 998 + if (sfr->format & PERF_CPUM_SF_DIAG_MODE) 999 + memcpy(&sfr->diag, &sample->diag, sfr->dsdes); 1000 + } 1001 + 1002 + static void debug_sample_entry(struct hws_combined_entry *sample, 1003 + struct hws_trailer_entry *te, 1004 + unsigned long flags) 1005 + { 1006 + debug_sprintf_event(sfdbg, 4, "hw_collect_samples: Found unknown " 1007 + "sampling data entry: te->f=%i basic.def=%04x (%p)" 1008 + " diag.def=%04x (%p)\n", te->f, 1009 + sample->basic.def, &sample->basic, 1010 + (flags & PERF_CPUM_SF_DIAG_MODE) 1011 + ? sample->diag.def : 0xFFFF, 1012 + (flags & PERF_CPUM_SF_DIAG_MODE) 1013 + ? &sample->diag : NULL); 1014 + } 1015 + 1045 1016 /* hw_collect_samples() - Walk through a sample-data-block and collect samples 1046 1017 * @event: The perf event 1047 1018 * @sdbt: Sample-data-block table 1048 1019 * @overflow: Event overflow counter 1049 1020 * 1050 - * Walks through a sample-data-block and collects hardware sample-data that is 1051 - * pushed to the perf event subsystem. The overflow reports the number of 1052 - * samples that has been discarded due to an event overflow. 1021 + * Walks through a sample-data-block and collects sampling data entries that are 1022 + * then pushed to the perf event subsystem. Depending on the sampling function, 1023 + * there can be either basic-sampling or combined-sampling data entries. A 1024 + * combined-sampling data entry consists of a basic- and a diagnostic-sampling 1025 + * data entry. The sampling function is determined by the flags in the perf 1026 + * event hardware structure. The function always works with a combined-sampling 1027 + * data entry but ignores the the diagnostic portion if it is not available. 1028 + * 1029 + * Note that the implementation focuses on basic-sampling data entries and, if 1030 + * such an entry is not valid, the entire combined-sampling data entry is 1031 + * ignored. 1032 + * 1033 + * The overflow variables counts the number of samples that has been discarded 1034 + * due to a perf event overflow. 1053 1035 */ 1054 1036 static void hw_collect_samples(struct perf_event *event, unsigned long *sdbt, 1055 1037 unsigned long long *overflow) 1056 1038 { 1057 - struct hws_data_entry *sample; 1058 - unsigned long *trailer; 1039 + unsigned long flags = SAMPL_FLAGS(&event->hw); 1040 + struct hws_combined_entry *sample; 1041 + struct hws_trailer_entry *te; 1042 + struct sf_raw_sample *sfr; 1043 + size_t sample_size; 1059 1044 1060 - trailer = trailer_entry_ptr(*sdbt); 1061 - sample = (struct hws_data_entry *) *sdbt; 1062 - while ((unsigned long *) sample < trailer) { 1045 + /* Prepare and initialize raw sample data */ 1046 + sfr = (struct sf_raw_sample *) RAWSAMPLE_REG(&event->hw); 1047 + sfr->format = flags & PERF_CPUM_SF_MODE_MASK; 1048 + 1049 + sample_size = event_sample_size(&event->hw); 1050 + te = (struct hws_trailer_entry *) trailer_entry_ptr(*sdbt); 1051 + sample = (struct hws_combined_entry *) *sdbt; 1052 + while ((unsigned long *) sample < (unsigned long *) te) { 1063 1053 /* Check for an empty sample */ 1064 - if (!sample->def) 1054 + if (!sample->basic.def) 1065 1055 break; 1066 1056 1067 1057 /* Update perf event period */ 1068 1058 perf_event_count_update(event, SAMPL_RATE(&event->hw)); 1069 1059 1070 - /* Check for basic sampling mode */ 1071 - if (sample->def == 0x0001) { 1060 + /* Check sampling data entry */ 1061 + if (sample_format_is_valid(sample, flags)) { 1072 1062 /* If an event overflow occurred, the PMU is stopped to 1073 1063 * throttle event delivery. Remaining sample data is 1074 1064 * discarded. 1075 1065 */ 1076 - if (!*overflow) 1077 - *overflow = perf_push_sample(event, sample); 1078 - else 1066 + if (!*overflow) { 1067 + if (sample_is_consistent(sample, flags)) { 1068 + /* Deliver sample data to perf */ 1069 + sfr_store_sample(sfr, sample); 1070 + *overflow = perf_push_sample(event, sfr); 1071 + } 1072 + } else 1079 1073 /* Count discarded samples */ 1080 1074 *overflow += 1; 1081 - } else 1082 - /* Sample slot is not yet written or other record */ 1083 - debug_sprintf_event(sfdbg, 5, "hw_collect_samples: " 1084 - "Unknown sample data entry format:" 1085 - " %i\n", sample->def); 1075 + } else { 1076 + debug_sample_entry(sample, te, flags); 1077 + /* Sample slot is not yet written or other record. 1078 + * 1079 + * This condition can occur if the buffer was reused 1080 + * from a combined basic- and diagnostic-sampling. 1081 + * If only basic-sampling is then active, entries are 1082 + * written into the larger diagnostic entries. 1083 + * This is typically the case for sample-data-blocks 1084 + * that are not full. Stop processing if the first 1085 + * invalid format was detected. 1086 + */ 1087 + if (!te->f) 1088 + break; 1089 + } 1086 1090 1087 1091 /* Reset sample slot and advance to next sample */ 1088 - sample->def = 0; 1089 - sample++; 1092 + reset_sample_slot(sample, flags); 1093 + sample += sample_size; 1090 1094 } 1091 1095 } 1092 1096 ··· 1308 1104 perf_pmu_disable(event->pmu); 1309 1105 event->hw.state = 0; 1310 1106 cpuhw->lsctl.cs = 1; 1107 + if (SAMPL_DIAG_MODE(&event->hw)) 1108 + cpuhw->lsctl.cd = 1; 1311 1109 perf_pmu_enable(event->pmu); 1312 1110 } 1313 1111 ··· 1325 1119 1326 1120 perf_pmu_disable(event->pmu); 1327 1121 cpuhw->lsctl.cs = 0; 1122 + cpuhw->lsctl.cd = 0; 1328 1123 event->hw.state |= PERF_HES_STOPPED; 1329 1124 1330 1125 if ((flags & PERF_EF_UPDATE) && !(event->hw.state & PERF_HES_UPTODATE)) { ··· 1365 1158 1366 1159 /* Ensure sampling functions are in the disabled state. If disabled, 1367 1160 * switch on sampling enable control. */ 1368 - if (WARN_ON_ONCE(cpuhw->lsctl.es == 1)) { 1161 + if (WARN_ON_ONCE(cpuhw->lsctl.es == 1 || cpuhw->lsctl.ed == 1)) { 1369 1162 err = -EAGAIN; 1370 1163 goto out; 1371 1164 } 1372 1165 cpuhw->lsctl.es = 1; 1166 + if (SAMPL_DIAG_MODE(&event->hw)) 1167 + cpuhw->lsctl.ed = 1; 1373 1168 1374 1169 /* Set in_use flag and store event */ 1375 1170 event->hw.idx = 0; /* only one sampling event per CPU supported */ ··· 1394 1185 cpumsf_pmu_stop(event, PERF_EF_UPDATE); 1395 1186 1396 1187 cpuhw->lsctl.es = 0; 1188 + cpuhw->lsctl.ed = 0; 1397 1189 cpuhw->flags &= ~PMU_F_IN_USE; 1398 1190 cpuhw->event = NULL; 1399 1191 ··· 1408 1198 } 1409 1199 1410 1200 CPUMF_EVENT_ATTR(SF, SF_CYCLES_BASIC, PERF_EVENT_CPUM_SF); 1201 + CPUMF_EVENT_ATTR(SF, SF_CYCLES_BASIC_DIAG, PERF_EVENT_CPUM_SF_DIAG); 1411 1202 1412 1203 static struct attribute *cpumsf_pmu_events_attr[] = { 1413 1204 CPUMF_EVENT_PTR(SF, SF_CYCLES_BASIC), 1205 + CPUMF_EVENT_PTR(SF, SF_CYCLES_BASIC_DIAG), 1414 1206 NULL, 1415 1207 }; 1416 1208 ··· 1563 1351 return rc; 1564 1352 1565 1353 sfb_set_limits(min, max); 1566 - pr_info("Changed sampling buffer settings: min=%lu max=%lu\n", 1567 - CPUM_SF_MIN_SDB, CPUM_SF_MAX_SDB); 1354 + pr_info("The sampling buffer limits have changed to: " 1355 + "min=%lu max=%lu (diag=x%lu)\n", 1356 + CPUM_SF_MIN_SDB, CPUM_SF_MAX_SDB, CPUM_SF_SDB_DIAG_FACTOR); 1568 1357 return 0; 1569 1358 } 1570 1359 ··· 1575 1362 .get = param_get_sfb_size, 1576 1363 }; 1577 1364 1365 + #define RS_INIT_FAILURE_QSI 0x0001 1366 + #define RS_INIT_FAILURE_BSDES 0x0002 1367 + #define RS_INIT_FAILURE_ALRT 0x0003 1368 + #define RS_INIT_FAILURE_PERF 0x0004 1369 + static void __init pr_cpumsf_err(unsigned int reason) 1370 + { 1371 + pr_err("Sampling facility support for perf is not available: " 1372 + "reason=%04x\n", reason); 1373 + } 1374 + 1578 1375 static int __init init_cpum_sampling_pmu(void) 1579 1376 { 1377 + struct hws_qsi_info_block si; 1580 1378 int err; 1581 1379 1582 1380 if (!cpum_sf_avail()) 1583 1381 return -ENODEV; 1382 + 1383 + memset(&si, 0, sizeof(si)); 1384 + if (qsi(&si)) { 1385 + pr_cpumsf_err(RS_INIT_FAILURE_QSI); 1386 + return -ENODEV; 1387 + } 1388 + 1389 + if (si.bsdes != sizeof(struct hws_basic_entry)) { 1390 + pr_cpumsf_err(RS_INIT_FAILURE_BSDES); 1391 + return -EINVAL; 1392 + } 1393 + 1394 + if (si.ad) 1395 + sfb_set_limits(CPUM_SF_MIN_SDB, CPUM_SF_MAX_SDB); 1584 1396 1585 1397 sfdbg = debug_register(KMSG_COMPONENT, 2, 1, 80); 1586 1398 if (!sfdbg) ··· 1614 1376 1615 1377 err = register_external_interrupt(0x1407, cpumf_measurement_alert); 1616 1378 if (err) { 1617 - pr_err("Failed to register for CPU-measurement alerts\n"); 1379 + pr_cpumsf_err(RS_INIT_FAILURE_ALRT); 1618 1380 goto out; 1619 1381 } 1620 1382 1621 1383 err = perf_pmu_register(&cpumf_sampling, "cpum_sf", PERF_TYPE_RAW); 1622 1384 if (err) { 1623 - pr_err("Failed to register cpum_sf pmu\n"); 1385 + pr_cpumsf_err(RS_INIT_FAILURE_PERF); 1624 1386 unregister_external_interrupt(0x1407, cpumf_measurement_alert); 1625 1387 goto out; 1626 1388 }
+13 -8
arch/s390/kernel/perf_event.c
··· 139 139 int cpu = smp_processor_id(); 140 140 141 141 memset(&si, 0, sizeof(si)); 142 - if (qsi(&si)) { 143 - pr_err("CPU[%i]: CPM_SF: qsi failed\n"); 142 + if (qsi(&si)) 144 143 return; 145 - } 146 144 147 - pr_info("CPU[%i]: CPM_SF: as=%i es=%i cs=%i bsdes=%i dsdes=%i" 148 - " min=%i max=%i cpu_speed=%i tear=%p dear=%p\n", 149 - cpu, si.as, si.es, si.cs, si.bsdes, si.dsdes, 150 - si.min_sampl_rate, si.max_sampl_rate, si.cpu_speed, 151 - si.tear, si.dear); 145 + pr_info("CPU[%i] CPUM_SF: basic=%i diag=%i min=%i max=%i cpu_speed=%i\n", 146 + cpu, si.as, si.ad, si.min_sampl_rate, si.max_sampl_rate, 147 + si.cpu_speed); 148 + 149 + if (si.as) 150 + pr_info("CPU[%i] CPUM_SF: Basic-sampling: a=%i e=%i c=%i" 151 + " bsdes=%i tear=%p dear=%p\n", cpu, 152 + si.as, si.es, si.cs, si.bsdes, si.tear, si.dear); 153 + if (si.ad) 154 + pr_info("CPU[%i] CPUM_SF: Diagnostic-sampling: a=%i e=%i c=%i" 155 + " dsdes=%i tear=%p dear=%p\n", cpu, 156 + si.ad, si.ed, si.cd, si.dsdes, si.tear, si.dear); 152 157 } 153 158 154 159 void perf_event_print_debug(void)
+2 -2
arch/s390/oprofile/hwsampler.c
··· 799 799 static void add_samples_to_oprofile(unsigned int cpu, unsigned long *sdbt, 800 800 unsigned long *dear) 801 801 { 802 - struct hws_data_entry *sample_data_ptr; 802 + struct hws_basic_entry *sample_data_ptr; 803 803 unsigned long *trailer; 804 804 805 805 trailer = trailer_entry_ptr(*sdbt); ··· 809 809 trailer = dear; 810 810 } 811 811 812 - sample_data_ptr = (struct hws_data_entry *)(*sdbt); 812 + sample_data_ptr = (struct hws_basic_entry *)(*sdbt); 813 813 814 814 while ((unsigned long *)sample_data_ptr < trailer) { 815 815 struct pt_regs *regs = NULL;