Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

perf/marvell: Odyssey LLC-TAD performance monitor support

Each TAD provides eight 64-bit counters for monitoring
cache behavior.The driver always configures the same counter for
all the TADs. The user would end up effectively reserving one of
eight counters in every TAD to look across all TADs.
The occurrences of events are aggregated and presented to the user
at the end of running the workload. The driver does not provide a
way for the user to partition TADs so that different TADs are used for
different applications.

The performance events reflect various internal or interface activities.
By combining the values from multiple performance counters, cache
performance can be measured in terms such as: cache miss rate, cache
allocations, interface retry rate, internal resource occupancy, etc.

Each supported counter's event and formatting information is exposed
to sysfs at /sys/devices/tad/. Use perf tool stat command to measure
the pmu events. For instance:

perf stat -e tad_hit_ltg,tad_hit_dtg <workload>

Signed-off-by: Gowthami Thiagarajan <gthiagarajan@marvell.com>
Link: https://lore.kernel.org/r/20241108040619.753343-6-gthiagarajan@marvell.com
Signed-off-by: Will Deacon <will@kernel.org>

authored by

Gowthami Thiagarajan and committed by
Will Deacon
5fcccba1 59731e23

+73
+1
Documentation/admin-guide/perf/index.rst
··· 15 15 qcom_l3_pmu 16 16 starfive_starlink_pmu 17 17 mrvl-odyssey-ddr-pmu 18 + mrvl-odyssey-tad-pmu 18 19 arm-ccn 19 20 arm-cmn 20 21 arm-ni
+37
Documentation/admin-guide/perf/mrvl-odyssey-tad-pmu.rst
··· 1 + ==================================================================== 2 + Marvell Odyssey LLC-TAD Performance Monitoring Unit (PMU UNCORE) 3 + ==================================================================== 4 + 5 + Each TAD provides eight 64-bit counters for monitoring 6 + cache behavior.The driver always configures the same counter for 7 + all the TADs. The user would end up effectively reserving one of 8 + eight counters in every TAD to look across all TADs. 9 + The occurrences of events are aggregated and presented to the user 10 + at the end of running the workload. The driver does not provide a 11 + way for the user to partition TADs so that different TADs are used for 12 + different applications. 13 + 14 + The performance events reflect various internal or interface activities. 15 + By combining the values from multiple performance counters, cache 16 + performance can be measured in terms such as: cache miss rate, cache 17 + allocations, interface retry rate, internal resource occupancy, etc. 18 + 19 + The PMU driver exposes the available events and format options under sysfs:: 20 + 21 + /sys/bus/event_source/devices/tad/events/ 22 + /sys/bus/event_source/devices/tad/format/ 23 + 24 + Examples:: 25 + 26 + $ perf list | grep tad 27 + tad/tad_alloc_any/ [Kernel PMU event] 28 + tad/tad_alloc_dtg/ [Kernel PMU event] 29 + tad/tad_alloc_ltg/ [Kernel PMU event] 30 + tad/tad_hit_any/ [Kernel PMU event] 31 + tad/tad_hit_dtg/ [Kernel PMU event] 32 + tad/tad_hit_ltg/ [Kernel PMU event] 33 + tad/tad_req_msh_in_exlmn/ [Kernel PMU event] 34 + tad/tad_tag_rd/ [Kernel PMU event] 35 + tad/tad_tot_cycle/ [Kernel PMU event] 36 + 37 + $ perf stat -e tad_alloc_dtg,tad_alloc_ltg,tad_alloc_any,tad_hit_dtg,tad_hit_ltg,tad_hit_any,tad_tag_rd <workload>
+35
drivers/perf/marvell_cn10k_tad_pmu.c
··· 39 39 40 40 enum mrvl_tad_pmu_version { 41 41 TAD_PMU_V1 = 1, 42 + TAD_PMU_V2, 42 43 }; 43 44 44 45 struct tad_pmu_data { ··· 223 222 .attrs = tad_pmu_event_attrs, 224 223 }; 225 224 225 + static struct attribute *ody_tad_pmu_event_attrs[] = { 226 + TAD_PMU_EVENT_ATTR(tad_req_msh_in_exlmn, 0x3), 227 + TAD_PMU_EVENT_ATTR(tad_alloc_dtg, 0x1a), 228 + TAD_PMU_EVENT_ATTR(tad_alloc_ltg, 0x1b), 229 + TAD_PMU_EVENT_ATTR(tad_alloc_any, 0x1c), 230 + TAD_PMU_EVENT_ATTR(tad_hit_dtg, 0x1d), 231 + TAD_PMU_EVENT_ATTR(tad_hit_ltg, 0x1e), 232 + TAD_PMU_EVENT_ATTR(tad_hit_any, 0x1f), 233 + TAD_PMU_EVENT_ATTR(tad_tag_rd, 0x20), 234 + TAD_PMU_EVENT_ATTR(tad_tot_cycle, 0xFF), 235 + NULL 236 + }; 237 + 238 + static const struct attribute_group ody_tad_pmu_events_attr_group = { 239 + .name = "events", 240 + .attrs = ody_tad_pmu_event_attrs, 241 + }; 242 + 226 243 PMU_FORMAT_ATTR(event, "config:0-7"); 227 244 228 245 static struct attribute *tad_pmu_format_attrs[] = { ··· 274 255 275 256 static const struct attribute_group *tad_pmu_attr_groups[] = { 276 257 &tad_pmu_events_attr_group, 258 + &tad_pmu_format_attr_group, 259 + &tad_pmu_cpumask_attr_group, 260 + NULL 261 + }; 262 + 263 + static const struct attribute_group *ody_tad_pmu_attr_groups[] = { 264 + &ody_tad_pmu_events_attr_group, 277 265 &tad_pmu_format_attr_group, 278 266 &tad_pmu_cpumask_attr_group, 279 267 NULL ··· 376 350 377 351 if (version == TAD_PMU_V1) 378 352 tad_pmu->pmu.attr_groups = tad_pmu_attr_groups; 353 + else 354 + tad_pmu->pmu.attr_groups = ody_tad_pmu_attr_groups; 379 355 380 356 tad_pmu->cpu = raw_smp_processor_id(); 381 357 ··· 413 385 }; 414 386 #endif 415 387 388 + #ifdef CONFIG_ACPI 389 + static const struct tad_pmu_data tad_pmu_v2_data = { 390 + .id = TAD_PMU_V2, 391 + }; 392 + #endif 393 + 416 394 #ifdef CONFIG_OF 417 395 static const struct of_device_id tad_pmu_of_match[] = { 418 396 { .compatible = "marvell,cn10k-tad-pmu", .data = &tad_pmu_data }, ··· 429 395 #ifdef CONFIG_ACPI 430 396 static const struct acpi_device_id tad_pmu_acpi_match[] = { 431 397 {"MRVL000B", (kernel_ulong_t)&tad_pmu_data}, 398 + {"MRVL000D", (kernel_ulong_t)&tad_pmu_v2_data}, 432 399 {}, 433 400 }; 434 401 MODULE_DEVICE_TABLE(acpi, tad_pmu_acpi_match);