Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

perf: cs-etm: Create decoders based on the trace ID mappings

Now that each queue has a unique set of trace ID mappings, use this
list to create the decoders. In unformatted mode just add a single
mapping so only one decoder is made.

Previously each queue would have a decoder created for each traced CPU
on the system but this won't work anymore because CPUs can have
overlapping trace IDs.

This also means that the CORESIGHT_TRACE_ID_UNUSED_FLAG isn't needed
any more. If mappings aren't added then decoders aren't created, rather
than needing a flag to suppress creation.

Reviewed-by: Mike Leach <mike.leach@linaro.org>
Signed-off-by: James Clark <james.clark@arm.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Alexandre Torgue <alexandre.torgue@foss.st.com>
Cc: Anshuman Khandual <anshuman.khandual@arm.com>
Cc: Ganapatrao Kulkarni <gankulkarni@os.amperecomputing.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: John Garry <john.g.garry@oracle.com>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Leo Yan <leo.yan@linux.dev>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Maxime Coquelin <mcoquelin.stm32@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Suzuki Poulouse <suzuki.poulose@arm.com>
Cc: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/20240722101202.26915-5-james.clark@linaro.org
Signed-off-by: James Clark <james.clark@linaro.org>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

authored by

James Clark and committed by
Arnaldo Carvalho de Melo
19c3e4db 77c123f5

+54 -121
+3 -5
tools/perf/arch/arm/util/cs-etm.c
··· 654 654 /* Get trace configuration register */ 655 655 data[CS_ETMV4_TRCCONFIGR] = cs_etmv4_get_config(itr); 656 656 /* traceID set to legacy version, in case new perf running on older system */ 657 - data[CS_ETMV4_TRCTRACEIDR] = cs_etm_get_legacy_trace_id(cpu) | 658 - CORESIGHT_TRACE_ID_UNUSED_FLAG; 657 + data[CS_ETMV4_TRCTRACEIDR] = cs_etm_get_legacy_trace_id(cpu); 659 658 660 659 /* Get read-only information from sysFS */ 661 660 cs_etm_get_ro(cs_etm_pmu, cpu, metadata_etmv4_ro[CS_ETMV4_TRCIDR0], ··· 686 687 /* Get trace configuration register */ 687 688 data[CS_ETE_TRCCONFIGR] = cs_etmv4_get_config(itr); 688 689 /* traceID set to legacy version, in case new perf running on older system */ 689 - data[CS_ETE_TRCTRACEIDR] = cs_etm_get_legacy_trace_id(cpu) | CORESIGHT_TRACE_ID_UNUSED_FLAG; 690 + data[CS_ETE_TRCTRACEIDR] = cs_etm_get_legacy_trace_id(cpu); 690 691 691 692 /* Get read-only information from sysFS */ 692 693 cs_etm_get_ro(cs_etm_pmu, cpu, metadata_ete_ro[CS_ETE_TRCIDR0], &data[CS_ETE_TRCIDR0]); ··· 742 743 /* Get configuration register */ 743 744 info->priv[*offset + CS_ETM_ETMCR] = cs_etm_get_config(itr); 744 745 /* traceID set to legacy value in case new perf running on old system */ 745 - info->priv[*offset + CS_ETM_ETMTRACEIDR] = cs_etm_get_legacy_trace_id(cpu) | 746 - CORESIGHT_TRACE_ID_UNUSED_FLAG; 746 + info->priv[*offset + CS_ETM_ETMTRACEIDR] = cs_etm_get_legacy_trace_id(cpu); 747 747 /* Get read-only information from sysFS */ 748 748 cs_etm_get_ro(cs_etm_pmu, cpu, metadata_etmv3_ro[CS_ETM_ETMCCER], 749 749 &info->priv[*offset + CS_ETM_ETMCCER]);
-4
tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
··· 684 684 return -1; 685 685 } 686 686 687 - /* if the CPU has no trace ID associated, no decoder needed */ 688 - if (csid == CORESIGHT_TRACE_ID_UNUSED_VAL) 689 - return 0; 690 - 691 687 if (d_params->operation == CS_ETM_OPERATION_DECODE) { 692 688 if (ocsd_dt_create_decoder(decoder->dcd_tree, 693 689 decoder->decoder_name,
+51 -102
tools/perf/util/cs-etm.c
··· 348 348 349 349 /* 350 350 * update metadata trace ID from the value found in the AUX_HW_INFO packet. 351 - * This will also clear the CORESIGHT_TRACE_ID_UNUSED_FLAG flag if present. 352 351 */ 353 352 static int cs_etm__metadata_set_trace_id(u8 trace_chan_id, u64 *cpu_metadata) 354 353 { ··· 699 700 } 700 701 701 702 static void cs_etm__set_trace_param_etmv3(struct cs_etm_trace_params *t_params, 702 - struct cs_etm_auxtrace *etm, int t_idx, 703 - int m_idx, u32 etmidr) 703 + u64 *metadata, u32 etmidr) 704 704 { 705 - u64 **metadata = etm->metadata; 706 - 707 - t_params[t_idx].protocol = cs_etm__get_v7_protocol_version(etmidr); 708 - t_params[t_idx].etmv3.reg_ctrl = metadata[m_idx][CS_ETM_ETMCR]; 709 - t_params[t_idx].etmv3.reg_trc_id = metadata[m_idx][CS_ETM_ETMTRACEIDR]; 705 + t_params->protocol = cs_etm__get_v7_protocol_version(etmidr); 706 + t_params->etmv3.reg_ctrl = metadata[CS_ETM_ETMCR]; 707 + t_params->etmv3.reg_trc_id = metadata[CS_ETM_ETMTRACEIDR]; 710 708 } 711 709 712 710 static void cs_etm__set_trace_param_etmv4(struct cs_etm_trace_params *t_params, 713 - struct cs_etm_auxtrace *etm, int t_idx, 714 - int m_idx) 711 + u64 *metadata) 715 712 { 716 - u64 **metadata = etm->metadata; 717 - 718 - t_params[t_idx].protocol = CS_ETM_PROTO_ETMV4i; 719 - t_params[t_idx].etmv4.reg_idr0 = metadata[m_idx][CS_ETMV4_TRCIDR0]; 720 - t_params[t_idx].etmv4.reg_idr1 = metadata[m_idx][CS_ETMV4_TRCIDR1]; 721 - t_params[t_idx].etmv4.reg_idr2 = metadata[m_idx][CS_ETMV4_TRCIDR2]; 722 - t_params[t_idx].etmv4.reg_idr8 = metadata[m_idx][CS_ETMV4_TRCIDR8]; 723 - t_params[t_idx].etmv4.reg_configr = metadata[m_idx][CS_ETMV4_TRCCONFIGR]; 724 - t_params[t_idx].etmv4.reg_traceidr = metadata[m_idx][CS_ETMV4_TRCTRACEIDR]; 713 + t_params->protocol = CS_ETM_PROTO_ETMV4i; 714 + t_params->etmv4.reg_idr0 = metadata[CS_ETMV4_TRCIDR0]; 715 + t_params->etmv4.reg_idr1 = metadata[CS_ETMV4_TRCIDR1]; 716 + t_params->etmv4.reg_idr2 = metadata[CS_ETMV4_TRCIDR2]; 717 + t_params->etmv4.reg_idr8 = metadata[CS_ETMV4_TRCIDR8]; 718 + t_params->etmv4.reg_configr = metadata[CS_ETMV4_TRCCONFIGR]; 719 + t_params->etmv4.reg_traceidr = metadata[CS_ETMV4_TRCTRACEIDR]; 725 720 } 726 721 727 722 static void cs_etm__set_trace_param_ete(struct cs_etm_trace_params *t_params, 728 - struct cs_etm_auxtrace *etm, int t_idx, 729 - int m_idx) 723 + u64 *metadata) 730 724 { 731 - u64 **metadata = etm->metadata; 732 - 733 - t_params[t_idx].protocol = CS_ETM_PROTO_ETE; 734 - t_params[t_idx].ete.reg_idr0 = metadata[m_idx][CS_ETE_TRCIDR0]; 735 - t_params[t_idx].ete.reg_idr1 = metadata[m_idx][CS_ETE_TRCIDR1]; 736 - t_params[t_idx].ete.reg_idr2 = metadata[m_idx][CS_ETE_TRCIDR2]; 737 - t_params[t_idx].ete.reg_idr8 = metadata[m_idx][CS_ETE_TRCIDR8]; 738 - t_params[t_idx].ete.reg_configr = metadata[m_idx][CS_ETE_TRCCONFIGR]; 739 - t_params[t_idx].ete.reg_traceidr = metadata[m_idx][CS_ETE_TRCTRACEIDR]; 740 - t_params[t_idx].ete.reg_devarch = metadata[m_idx][CS_ETE_TRCDEVARCH]; 725 + t_params->protocol = CS_ETM_PROTO_ETE; 726 + t_params->ete.reg_idr0 = metadata[CS_ETE_TRCIDR0]; 727 + t_params->ete.reg_idr1 = metadata[CS_ETE_TRCIDR1]; 728 + t_params->ete.reg_idr2 = metadata[CS_ETE_TRCIDR2]; 729 + t_params->ete.reg_idr8 = metadata[CS_ETE_TRCIDR8]; 730 + t_params->ete.reg_configr = metadata[CS_ETE_TRCCONFIGR]; 731 + t_params->ete.reg_traceidr = metadata[CS_ETE_TRCTRACEIDR]; 732 + t_params->ete.reg_devarch = metadata[CS_ETE_TRCDEVARCH]; 741 733 } 742 734 743 735 static int cs_etm__init_trace_params(struct cs_etm_trace_params *t_params, 744 - struct cs_etm_auxtrace *etm, 745 - enum cs_etm_format format, 746 - int sample_cpu, 747 - int decoders) 736 + struct cs_etm_queue *etmq) 748 737 { 749 - int t_idx, m_idx; 750 - u32 etmidr; 751 - u64 architecture; 738 + struct int_node *inode; 752 739 753 - for (t_idx = 0; t_idx < decoders; t_idx++) { 754 - if (format == FORMATTED) 755 - m_idx = t_idx; 756 - else { 757 - m_idx = get_cpu_data_idx(etm, sample_cpu); 758 - if (m_idx == -1) { 759 - pr_warning("CS_ETM: unknown CPU, falling back to first metadata\n"); 760 - m_idx = 0; 761 - } 762 - } 763 - 764 - architecture = etm->metadata[m_idx][CS_ETM_MAGIC]; 740 + intlist__for_each_entry(inode, etmq->traceid_list) { 741 + u64 *metadata = inode->priv; 742 + u64 architecture = metadata[CS_ETM_MAGIC]; 743 + u32 etmidr; 765 744 766 745 switch (architecture) { 767 746 case __perf_cs_etmv3_magic: 768 - etmidr = etm->metadata[m_idx][CS_ETM_ETMIDR]; 769 - cs_etm__set_trace_param_etmv3(t_params, etm, t_idx, m_idx, etmidr); 747 + etmidr = metadata[CS_ETM_ETMIDR]; 748 + cs_etm__set_trace_param_etmv3(t_params++, metadata, etmidr); 770 749 break; 771 750 case __perf_cs_etmv4_magic: 772 - cs_etm__set_trace_param_etmv4(t_params, etm, t_idx, m_idx); 751 + cs_etm__set_trace_param_etmv4(t_params++, metadata); 773 752 break; 774 753 case __perf_cs_ete_magic: 775 - cs_etm__set_trace_param_ete(t_params, etm, t_idx, m_idx); 754 + cs_etm__set_trace_param_ete(t_params++, metadata); 776 755 break; 777 756 default: 778 757 return -EINVAL; ··· 3164 3187 } 3165 3188 3166 3189 /* 3167 - * If we found AUX_HW_ID packets, then set any metadata marked as unused to the 3168 - * unused value to reduce the number of unneeded decoders created. 3169 - */ 3170 - static int cs_etm__clear_unused_trace_ids_metadata(int num_cpu, u64 **metadata) 3171 - { 3172 - u64 cs_etm_magic; 3173 - int i; 3174 - 3175 - for (i = 0; i < num_cpu; i++) { 3176 - cs_etm_magic = metadata[i][CS_ETM_MAGIC]; 3177 - switch (cs_etm_magic) { 3178 - case __perf_cs_etmv3_magic: 3179 - if (metadata[i][CS_ETM_ETMTRACEIDR] & CORESIGHT_TRACE_ID_UNUSED_FLAG) 3180 - metadata[i][CS_ETM_ETMTRACEIDR] = CORESIGHT_TRACE_ID_UNUSED_VAL; 3181 - break; 3182 - case __perf_cs_etmv4_magic: 3183 - case __perf_cs_ete_magic: 3184 - if (metadata[i][CS_ETMV4_TRCTRACEIDR] & CORESIGHT_TRACE_ID_UNUSED_FLAG) 3185 - metadata[i][CS_ETMV4_TRCTRACEIDR] = CORESIGHT_TRACE_ID_UNUSED_VAL; 3186 - break; 3187 - default: 3188 - /* unknown magic number */ 3189 - return -EINVAL; 3190 - } 3191 - } 3192 - return 0; 3193 - } 3194 - 3195 - /* 3196 3190 * Use the data gathered by the peeks for HW_ID (trace ID mappings) and AUX 3197 3191 * (formatted or not) packets to create the decoders. 3198 3192 */ 3199 3193 static int cs_etm__create_queue_decoders(struct cs_etm_queue *etmq) 3200 3194 { 3201 3195 struct cs_etm_decoder_params d_params; 3196 + struct cs_etm_trace_params *t_params; 3197 + int decoders = intlist__nr_entries(etmq->traceid_list); 3198 + 3199 + if (decoders == 0) 3200 + return 0; 3202 3201 3203 3202 /* 3204 3203 * Each queue can only contain data from one CPU when unformatted, so only one decoder is 3205 3204 * needed. 3206 3205 */ 3207 - int decoders = etmq->format == FORMATTED ? etmq->etm->num_cpu : 1; 3206 + if (etmq->format == UNFORMATTED) 3207 + assert(decoders == 1); 3208 3208 3209 3209 /* Use metadata to fill in trace parameters for trace decoder */ 3210 - struct cs_etm_trace_params *t_params = zalloc(sizeof(*t_params) * decoders); 3210 + t_params = zalloc(sizeof(*t_params) * decoders); 3211 3211 3212 3212 if (!t_params) 3213 3213 goto out_free; 3214 3214 3215 - if (cs_etm__init_trace_params(t_params, etmq->etm, etmq->format, 3216 - etmq->queue_nr, decoders)) 3215 + if (cs_etm__init_trace_params(t_params, etmq)) 3217 3216 goto out_free; 3218 3217 3219 3218 /* Set decoder parameters to decode trace packets */ ··· 3415 3462 /* 3416 3463 * Map Trace ID values to CPU metadata. 3417 3464 * 3418 - * Trace metadata will always contain Trace ID values from the legacy algorithm. If the 3419 - * files has been recorded by a "new" perf updated to handle AUX_HW_ID then the metadata 3420 - * ID value will also have the CORESIGHT_TRACE_ID_UNUSED_FLAG set. 3465 + * Trace metadata will always contain Trace ID values from the legacy algorithm 3466 + * in case it's read by a version of Perf that doesn't know about HW_ID packets 3467 + * or the kernel doesn't emit them. 3421 3468 * 3422 3469 * The updated kernel drivers that use AUX_HW_ID to sent Trace IDs will attempt to use 3423 3470 * the same IDs as the old algorithm as far as is possible, unless there are clashes ··· 3426 3473 * 3427 3474 * For a perf able to interpret AUX_HW_ID packets we first check for the presence of 3428 3475 * those packets. If they are there then the values will be mapped and plugged into 3429 - * the metadata. We then set any remaining metadata values with the used flag to a 3430 - * value CORESIGHT_TRACE_ID_UNUSED_VAL - which indicates no decoder is required. 3476 + * the metadata and decoders are only created for each mapping received. 3431 3477 * 3432 3478 * If no AUX_HW_ID packets are present - which means a file recorded on an old kernel 3433 - * then we map Trace ID values to CPU directly from the metadata - clearing any unused 3434 - * flags if present. 3479 + * then we map Trace ID values to CPU directly from the metadata and create decoders 3480 + * for all mappings. 3435 3481 */ 3436 3482 3437 3483 /* Scan for AUX_OUTPUT_HW_ID records to map trace ID values to CPU metadata */ ··· 3441 3489 if (err) 3442 3490 goto err_free_queues; 3443 3491 3444 - /* if HW ID found then clear any unused metadata ID values */ 3445 - if (aux_hw_id_found) 3446 - err = cs_etm__clear_unused_trace_ids_metadata(num_cpu, metadata); 3447 - /* otherwise, this is a file with metadata values only, map from metadata */ 3448 - else 3492 + /* if no HW ID found this is a file with metadata values only, map from metadata */ 3493 + if (!aux_hw_id_found) { 3449 3494 err = cs_etm__map_trace_ids_metadata(etm, num_cpu, metadata); 3450 - 3451 - if (err) 3452 - goto err_free_queues; 3495 + if (err) 3496 + goto err_free_queues; 3497 + } 3453 3498 3454 3499 err = cs_etm__create_decoders(etm); 3455 3500 if (err)
-10
tools/perf/util/cs-etm.h
··· 230 230 /* CoreSight trace ID is currently the bottom 7 bits of the value */ 231 231 #define CORESIGHT_TRACE_ID_VAL_MASK GENMASK(6, 0) 232 232 233 - /* 234 - * perf record will set the legacy meta data values as unused initially. 235 - * This allows perf report to manage the decoders created when dynamic 236 - * allocation in operation. 237 - */ 238 - #define CORESIGHT_TRACE_ID_UNUSED_FLAG BIT(31) 239 - 240 - /* Value to set for unused trace ID values */ 241 - #define CORESIGHT_TRACE_ID_UNUSED_VAL 0x7F 242 - 243 233 int cs_etm__process_auxtrace_info(union perf_event *event, 244 234 struct perf_session *session); 245 235 void cs_etm_get_default_config(const struct perf_pmu *pmu, struct perf_event_attr *attr);