Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

perf vendor events intel: Alderlake/sapphirerapids metric fixes

As events are deduplicated by name, ensure PMU prefixes are always
used in metrics. Previously they may be missed on the first event in a
formula.

Update metric constraints for architectures with topdown l2 events.

Conversion script updated in:
https://github.com/intel/perfmon/pull/128

Reported-by: Arnaldo Carvalho de Melo <acme@kernel.org>
Reviewed-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Ian Rogers <irogers@google.com>
Tested-by: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Edward Baker <edward.baker@intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Closes: https://lore.kernel.org/lkml/ZZam-EG-UepcXtWw@kernel.org/
Link: https://lore.kernel.org/r/20240104231903.775717-1-irogers@google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

authored by

Ian Rogers and committed by
Arnaldo Carvalho de Melo
becc24e9 e30dca91

+123 -160
+118 -136
tools/perf/pmu-events/arch/x86/alderlake/adl-metrics.json
··· 114 114 }, 115 115 { 116 116 "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to certain allocation restrictions.", 117 - "MetricExpr": "TOPDOWN_BE_BOUND.ALLOC_RESTRICTIONS / tma_info_core_slots", 117 + "MetricExpr": "cpu_atom@TOPDOWN_BE_BOUND.ALLOC_RESTRICTIONS@ / tma_info_core_slots", 118 118 "MetricGroup": "TopdownL3;tma_L3_group;tma_resource_bound_group", 119 119 "MetricName": "tma_alloc_restriction", 120 120 "MetricThreshold": "tma_alloc_restriction > 0.1", ··· 124 124 { 125 125 "BriefDescription": "Counts the total number of issue slots that were not consumed by the backend due to backend stalls", 126 126 "DefaultMetricgroupName": "TopdownL1", 127 - "MetricExpr": "TOPDOWN_BE_BOUND.ALL / tma_info_core_slots", 127 + "MetricExpr": "cpu_atom@TOPDOWN_BE_BOUND.ALL@ / tma_info_core_slots", 128 128 "MetricGroup": "Default;TopdownL1;tma_L1_group", 129 129 "MetricName": "tma_backend_bound", 130 130 "MetricThreshold": "tma_backend_bound > 0.1", ··· 169 169 }, 170 170 { 171 171 "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to BACLEARS, which occurs when the Branch Target Buffer (BTB) prediction or lack thereof, was corrected by a later branch predictor in the frontend", 172 - "MetricExpr": "TOPDOWN_FE_BOUND.BRANCH_DETECT / tma_info_core_slots", 172 + "MetricExpr": "cpu_atom@TOPDOWN_FE_BOUND.BRANCH_DETECT@ / tma_info_core_slots", 173 173 "MetricGroup": "TopdownL3;tma_L3_group;tma_fetch_latency_group", 174 174 "MetricName": "tma_branch_detect", 175 175 "MetricThreshold": "tma_branch_detect > 0.05", ··· 179 179 }, 180 180 { 181 181 "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to branch mispredicts.", 182 - "MetricExpr": "TOPDOWN_BAD_SPECULATION.MISPREDICT / tma_info_core_slots", 182 + "MetricExpr": "cpu_atom@TOPDOWN_BAD_SPECULATION.MISPREDICT@ / tma_info_core_slots", 183 183 "MetricGroup": "TopdownL2;tma_L2_group;tma_bad_speculation_group", 184 184 "MetricName": "tma_branch_mispredicts", 185 185 "MetricThreshold": "tma_branch_mispredicts > 0.05", ··· 189 189 }, 190 190 { 191 191 "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to BTCLEARS, which occurs when the Branch Target Buffer (BTB) predicts a taken branch.", 192 - "MetricExpr": "TOPDOWN_FE_BOUND.BRANCH_RESTEER / tma_info_core_slots", 192 + "MetricExpr": "cpu_atom@TOPDOWN_FE_BOUND.BRANCH_RESTEER@ / tma_info_core_slots", 193 193 "MetricGroup": "TopdownL3;tma_L3_group;tma_fetch_latency_group", 194 194 "MetricName": "tma_branch_resteer", 195 195 "MetricThreshold": "tma_branch_resteer > 0.05", ··· 198 198 }, 199 199 { 200 200 "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to the microcode sequencer (MS).", 201 - "MetricExpr": "TOPDOWN_FE_BOUND.CISC / tma_info_core_slots", 201 + "MetricExpr": "cpu_atom@TOPDOWN_FE_BOUND.CISC@ / tma_info_core_slots", 202 202 "MetricGroup": "TopdownL3;tma_L3_group;tma_fetch_bandwidth_group", 203 203 "MetricName": "tma_cisc", 204 204 "MetricThreshold": "tma_cisc > 0.05", ··· 217 217 }, 218 218 { 219 219 "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to decode stalls.", 220 - "MetricExpr": "TOPDOWN_FE_BOUND.DECODE / tma_info_core_slots", 220 + "MetricExpr": "cpu_atom@TOPDOWN_FE_BOUND.DECODE@ / tma_info_core_slots", 221 221 "MetricGroup": "TopdownL3;tma_L3_group;tma_fetch_bandwidth_group", 222 222 "MetricName": "tma_decode", 223 223 "MetricThreshold": "tma_decode > 0.05", ··· 235 235 }, 236 236 { 237 237 "BriefDescription": "Counts the number of cycles the core is stalled due to a demand load miss which hit in DRAM or MMIO (Non-DRAM).", 238 - "MetricConstraint": "NO_GROUP_EVENTS", 239 238 "MetricExpr": "cpu_atom@MEM_BOUND_STALLS.LOAD_DRAM_HIT@ / tma_info_core_clks - max((cpu_atom@MEM_BOUND_STALLS.LOAD@ - cpu_atom@LD_HEAD.L1_MISS_AT_RET@) / tma_info_core_clks, 0) * cpu_atom@MEM_BOUND_STALLS.LOAD_DRAM_HIT@ / cpu_atom@MEM_BOUND_STALLS.LOAD@", 240 239 "MetricGroup": "TopdownL3;tma_L3_group;tma_memory_bound_group", 241 240 "MetricName": "tma_dram_bound", ··· 244 245 }, 245 246 { 246 247 "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to a machine clear classified as a fast nuke due to memory ordering, memory disambiguation and memory renaming.", 247 - "MetricExpr": "TOPDOWN_BAD_SPECULATION.FASTNUKE / tma_info_core_slots", 248 + "MetricExpr": "cpu_atom@TOPDOWN_BAD_SPECULATION.FASTNUKE@ / tma_info_core_slots", 248 249 "MetricGroup": "TopdownL3;tma_L3_group;tma_machine_clears_group", 249 250 "MetricName": "tma_fast_nuke", 250 251 "MetricThreshold": "tma_fast_nuke > 0.05", ··· 253 254 }, 254 255 { 255 256 "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to frontend bandwidth restrictions due to decode, predecode, cisc, and other limitations.", 256 - "MetricExpr": "TOPDOWN_FE_BOUND.FRONTEND_BANDWIDTH / tma_info_core_slots", 257 + "MetricExpr": "cpu_atom@TOPDOWN_FE_BOUND.FRONTEND_BANDWIDTH@ / tma_info_core_slots", 257 258 "MetricGroup": "TopdownL2;tma_L2_group;tma_frontend_bound_group", 258 259 "MetricName": "tma_fetch_bandwidth", 259 260 "MetricThreshold": "tma_fetch_bandwidth > 0.1", ··· 263 264 }, 264 265 { 265 266 "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to frontend bandwidth restrictions due to decode, predecode, cisc, and other limitations.", 266 - "MetricExpr": "TOPDOWN_FE_BOUND.FRONTEND_LATENCY / tma_info_core_slots", 267 + "MetricExpr": "cpu_atom@TOPDOWN_FE_BOUND.FRONTEND_LATENCY@ / tma_info_core_slots", 267 268 "MetricGroup": "TopdownL2;tma_L2_group;tma_frontend_bound_group", 268 269 "MetricName": "tma_fetch_latency", 269 270 "MetricThreshold": "tma_fetch_latency > 0.15", ··· 282 283 }, 283 284 { 284 285 "BriefDescription": "Counts the number of floating point divide operations per uop.", 285 - "MetricExpr": "UOPS_RETIRED.FPDIV / tma_info_core_slots", 286 + "MetricExpr": "cpu_atom@UOPS_RETIRED.FPDIV@ / tma_info_core_slots", 286 287 "MetricGroup": "TopdownL3;tma_L3_group;tma_base_group", 287 288 "MetricName": "tma_fpdiv_uops", 288 289 "MetricThreshold": "tma_fpdiv_uops > 0.2", ··· 292 293 { 293 294 "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to frontend stalls.", 294 295 "DefaultMetricgroupName": "TopdownL1", 295 - "MetricExpr": "TOPDOWN_FE_BOUND.ALL / tma_info_core_slots", 296 + "MetricExpr": "cpu_atom@TOPDOWN_FE_BOUND.ALL@ / tma_info_core_slots", 296 297 "MetricGroup": "Default;TopdownL1;tma_L1_group", 297 298 "MetricName": "tma_frontend_bound", 298 299 "MetricThreshold": "tma_frontend_bound > 0.2", ··· 302 303 }, 303 304 { 304 305 "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to instruction cache misses.", 305 - "MetricExpr": "TOPDOWN_FE_BOUND.ICACHE / tma_info_core_slots", 306 + "MetricExpr": "cpu_atom@TOPDOWN_FE_BOUND.ICACHE@ / tma_info_core_slots", 306 307 "MetricGroup": "TopdownL3;tma_L3_group;tma_fetch_latency_group", 307 308 "MetricName": "tma_icache_misses", 308 309 "MetricThreshold": "tma_icache_misses > 0.05", ··· 329 330 }, 330 331 { 331 332 "BriefDescription": "Instructions Per Cycle", 332 - "MetricExpr": "INST_RETIRED.ANY / tma_info_core_clks", 333 + "MetricExpr": "cpu_atom@INST_RETIRED.ANY@ / tma_info_core_clks", 333 334 "MetricName": "tma_info_core_ipc", 334 335 "Unit": "cpu_atom" 335 336 }, ··· 341 342 }, 342 343 { 343 344 "BriefDescription": "Uops Per Instruction", 344 - "MetricExpr": "UOPS_RETIRED.ALL / INST_RETIRED.ANY", 345 + "MetricExpr": "cpu_atom@UOPS_RETIRED.ALL@ / INST_RETIRED.ANY", 345 346 "MetricName": "tma_info_core_upi", 346 347 "Unit": "cpu_atom" 347 348 }, ··· 365 366 }, 366 367 { 367 368 "BriefDescription": "Ratio of all branches which mispredict", 368 - "MetricExpr": "BR_MISP_RETIRED.ALL_BRANCHES / BR_INST_RETIRED.ALL_BRANCHES", 369 + "MetricExpr": "cpu_atom@BR_MISP_RETIRED.ALL_BRANCHES@ / BR_INST_RETIRED.ALL_BRANCHES", 369 370 "MetricName": "tma_info_inst_mix_branch_mispredict_ratio", 370 371 "Unit": "cpu_atom" 371 372 }, 372 373 { 373 374 "BriefDescription": "Ratio between Mispredicted branches and unknown branches", 374 - "MetricExpr": "BR_MISP_RETIRED.ALL_BRANCHES / BACLEARS.ANY", 375 + "MetricExpr": "cpu_atom@BR_MISP_RETIRED.ALL_BRANCHES@ / BACLEARS.ANY", 375 376 "MetricName": "tma_info_inst_mix_branch_mispredict_to_unknown_branch_ratio", 376 377 "Unit": "cpu_atom" 377 378 }, ··· 389 390 }, 390 391 { 391 392 "BriefDescription": "Instructions per Branch (lower number means higher occurrence rate)", 392 - "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.ALL_BRANCHES", 393 + "MetricExpr": "cpu_atom@INST_RETIRED.ANY@ / BR_INST_RETIRED.ALL_BRANCHES", 393 394 "MetricName": "tma_info_inst_mix_ipbranch", 394 395 "Unit": "cpu_atom" 395 396 }, 396 397 { 397 398 "BriefDescription": "Instruction per (near) call (lower number means higher occurrence rate)", 398 - "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.CALL", 399 + "MetricExpr": "cpu_atom@INST_RETIRED.ANY@ / BR_INST_RETIRED.CALL", 399 400 "MetricName": "tma_info_inst_mix_ipcall", 400 401 "Unit": "cpu_atom" 401 402 }, 402 403 { 403 404 "BriefDescription": "Instructions per Far Branch", 404 - "MetricExpr": "INST_RETIRED.ANY / (cpu_atom@BR_INST_RETIRED.FAR_BRANCH@ / 2)", 405 + "MetricExpr": "cpu_atom@INST_RETIRED.ANY@ / (cpu_atom@BR_INST_RETIRED.FAR_BRANCH@ / 2)", 405 406 "MetricName": "tma_info_inst_mix_ipfarbranch", 406 407 "Unit": "cpu_atom" 407 408 }, 408 409 { 409 410 "BriefDescription": "Instructions per Load", 410 - "MetricExpr": "INST_RETIRED.ANY / MEM_UOPS_RETIRED.ALL_LOADS", 411 + "MetricExpr": "cpu_atom@INST_RETIRED.ANY@ / MEM_UOPS_RETIRED.ALL_LOADS", 411 412 "MetricName": "tma_info_inst_mix_ipload", 412 413 "Unit": "cpu_atom" 413 414 }, 414 415 { 415 416 "BriefDescription": "Instructions per retired conditional Branch Misprediction where the branch was not taken", 416 - "MetricExpr": "INST_RETIRED.ANY / (cpu_atom@BR_MISP_RETIRED.COND@ - cpu_atom@BR_MISP_RETIRED.COND_TAKEN@)", 417 + "MetricExpr": "cpu_atom@INST_RETIRED.ANY@ / (cpu_atom@BR_MISP_RETIRED.COND@ - cpu_atom@BR_MISP_RETIRED.COND_TAKEN@)", 417 418 "MetricName": "tma_info_inst_mix_ipmisp_cond_ntaken", 418 419 "Unit": "cpu_atom" 419 420 }, 420 421 { 421 422 "BriefDescription": "Instructions per retired conditional Branch Misprediction where the branch was taken", 422 - "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.COND_TAKEN", 423 + "MetricExpr": "cpu_atom@INST_RETIRED.ANY@ / BR_MISP_RETIRED.COND_TAKEN", 423 424 "MetricName": "tma_info_inst_mix_ipmisp_cond_taken", 424 425 "Unit": "cpu_atom" 425 426 }, 426 427 { 427 428 "BriefDescription": "Instructions per retired indirect call or jump Branch Misprediction", 428 - "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.INDIRECT", 429 + "MetricExpr": "cpu_atom@INST_RETIRED.ANY@ / BR_MISP_RETIRED.INDIRECT", 429 430 "MetricName": "tma_info_inst_mix_ipmisp_indirect", 430 431 "Unit": "cpu_atom" 431 432 }, 432 433 { 433 434 "BriefDescription": "Instructions per retired return Branch Misprediction", 434 - "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.RETURN", 435 + "MetricExpr": "cpu_atom@INST_RETIRED.ANY@ / BR_MISP_RETIRED.RETURN", 435 436 "MetricName": "tma_info_inst_mix_ipmisp_ret", 436 437 "Unit": "cpu_atom" 437 438 }, 438 439 { 439 440 "BriefDescription": "Instructions per retired Branch Misprediction", 440 - "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.ALL_BRANCHES", 441 + "MetricExpr": "cpu_atom@INST_RETIRED.ANY@ / BR_MISP_RETIRED.ALL_BRANCHES", 441 442 "MetricName": "tma_info_inst_mix_ipmispredict", 442 443 "Unit": "cpu_atom" 443 444 }, 444 445 { 445 446 "BriefDescription": "Instructions per Store", 446 - "MetricExpr": "INST_RETIRED.ANY / MEM_UOPS_RETIRED.ALL_STORES", 447 + "MetricExpr": "cpu_atom@INST_RETIRED.ANY@ / MEM_UOPS_RETIRED.ALL_STORES", 447 448 "MetricName": "tma_info_inst_mix_ipstore", 448 449 "Unit": "cpu_atom" 449 450 }, ··· 479 480 }, 480 481 { 481 482 "BriefDescription": "Cycle cost per DRAM hit", 482 - "MetricExpr": "MEM_BOUND_STALLS.LOAD_DRAM_HIT / MEM_LOAD_UOPS_RETIRED.DRAM_HIT", 483 + "MetricExpr": "cpu_atom@MEM_BOUND_STALLS.LOAD_DRAM_HIT@ / MEM_LOAD_UOPS_RETIRED.DRAM_HIT", 483 484 "MetricName": "tma_info_memory_cycles_per_demand_load_dram_hit", 484 485 "Unit": "cpu_atom" 485 486 }, 486 487 { 487 488 "BriefDescription": "Cycle cost per L2 hit", 488 - "MetricExpr": "MEM_BOUND_STALLS.LOAD_L2_HIT / MEM_LOAD_UOPS_RETIRED.L2_HIT", 489 + "MetricExpr": "cpu_atom@MEM_BOUND_STALLS.LOAD_L2_HIT@ / MEM_LOAD_UOPS_RETIRED.L2_HIT", 489 490 "MetricName": "tma_info_memory_cycles_per_demand_load_l2_hit", 490 491 "Unit": "cpu_atom" 491 492 }, 492 493 { 493 494 "BriefDescription": "Cycle cost per LLC hit", 494 - "MetricExpr": "MEM_BOUND_STALLS.LOAD_LLC_HIT / MEM_LOAD_UOPS_RETIRED.L3_HIT", 495 + "MetricExpr": "cpu_atom@MEM_BOUND_STALLS.LOAD_LLC_HIT@ / MEM_LOAD_UOPS_RETIRED.L3_HIT", 495 496 "MetricName": "tma_info_memory_cycles_per_demand_load_l3_hit", 496 497 "Unit": "cpu_atom" 497 498 }, ··· 503 504 }, 504 505 { 505 506 "BriefDescription": "Average CPU Utilization", 506 - "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / TSC", 507 + "MetricExpr": "cpu_atom@CPU_CLK_UNHALTED.REF_TSC@ / TSC", 507 508 "MetricName": "tma_info_system_cpu_utilization", 508 509 "Unit": "cpu_atom" 509 510 }, ··· 523 524 }, 524 525 { 525 526 "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to Instruction Table Lookaside Buffer (ITLB) misses.", 526 - "MetricExpr": "TOPDOWN_FE_BOUND.ITLB / tma_info_core_slots", 527 + "MetricExpr": "cpu_atom@TOPDOWN_FE_BOUND.ITLB@ / tma_info_core_slots", 527 528 "MetricGroup": "TopdownL3;tma_L3_group;tma_fetch_latency_group", 528 529 "MetricName": "tma_itlb_misses", 529 530 "MetricThreshold": "tma_itlb_misses > 0.05", ··· 532 533 }, 533 534 { 534 535 "BriefDescription": "Counts the number of cycles that the oldest load of the load buffer is stalled at retirement due to a load block.", 535 - "MetricExpr": "LD_HEAD.L1_BOUND_AT_RET / tma_info_core_clks", 536 + "MetricExpr": "cpu_atom@LD_HEAD.L1_BOUND_AT_RET@ / tma_info_core_clks", 536 537 "MetricGroup": "TopdownL3;tma_L3_group;tma_memory_bound_group", 537 538 "MetricName": "tma_l1_bound", 538 539 "MetricThreshold": "tma_l1_bound > 0.1", ··· 541 542 }, 542 543 { 543 544 "BriefDescription": "Counts the number of cycles a core is stalled due to a demand load which hit in the L2 Cache.", 544 - "MetricConstraint": "NO_GROUP_EVENTS", 545 545 "MetricExpr": "cpu_atom@MEM_BOUND_STALLS.LOAD_L2_HIT@ / tma_info_core_clks - max((cpu_atom@MEM_BOUND_STALLS.LOAD@ - cpu_atom@LD_HEAD.L1_MISS_AT_RET@) / tma_info_core_clks, 0) * cpu_atom@MEM_BOUND_STALLS.LOAD_L2_HIT@ / cpu_atom@MEM_BOUND_STALLS.LOAD@", 546 546 "MetricGroup": "TopdownL3;tma_L3_group;tma_memory_bound_group", 547 547 "MetricName": "tma_l2_bound", ··· 550 552 }, 551 553 { 552 554 "BriefDescription": "Counts the number of cycles a core is stalled due to a demand load which hit in the Last Level Cache (LLC) or other core with HITE/F/M.", 553 - "MetricConstraint": "NO_GROUP_EVENTS_NMI", 554 555 "MetricExpr": "cpu_atom@MEM_BOUND_STALLS.LOAD_LLC_HIT@ / tma_info_core_clks - max((cpu_atom@MEM_BOUND_STALLS.LOAD@ - cpu_atom@LD_HEAD.L1_MISS_AT_RET@) / tma_info_core_clks, 0) * cpu_atom@MEM_BOUND_STALLS.LOAD_LLC_HIT@ / cpu_atom@MEM_BOUND_STALLS.LOAD@", 555 556 "MetricGroup": "TopdownL3;tma_L3_group;tma_memory_bound_group", 556 557 "MetricName": "tma_l3_bound", ··· 568 571 }, 569 572 { 570 573 "BriefDescription": "Counts the total number of issue slots that were not consumed by the backend because allocation is stalled due to a machine clear (nuke) of any kind including memory ordering and memory disambiguation.", 571 - "MetricExpr": "TOPDOWN_BAD_SPECULATION.MACHINE_CLEARS / tma_info_core_slots", 574 + "MetricExpr": "cpu_atom@TOPDOWN_BAD_SPECULATION.MACHINE_CLEARS@ / tma_info_core_slots", 572 575 "MetricGroup": "TopdownL2;tma_L2_group;tma_bad_speculation_group", 573 576 "MetricName": "tma_machine_clears", 574 577 "MetricThreshold": "tma_machine_clears > 0.05", ··· 578 581 }, 579 582 { 580 583 "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to memory reservation stalls in which a scheduler is not able to accept uops.", 581 - "MetricExpr": "TOPDOWN_BE_BOUND.MEM_SCHEDULER / tma_info_core_slots", 584 + "MetricExpr": "cpu_atom@TOPDOWN_BE_BOUND.MEM_SCHEDULER@ / tma_info_core_slots", 582 585 "MetricGroup": "TopdownL3;tma_L3_group;tma_resource_bound_group", 583 586 "MetricName": "tma_mem_scheduler", 584 587 "MetricThreshold": "tma_mem_scheduler > 0.1", ··· 587 590 }, 588 591 { 589 592 "BriefDescription": "Counts the number of cycles the core is stalled due to stores or loads.", 590 - "MetricExpr": "min(cpu_atom@TOPDOWN_BE_BOUND.ALL@ / tma_info_core_slots, cpu_atom@LD_HEAD.ANY_AT_RET@ / tma_info_core_clks + tma_store_bound)", 593 + "MetricExpr": "min(tma_backend_bound, cpu_atom@LD_HEAD.ANY_AT_RET@ / tma_info_core_clks + tma_store_bound)", 591 594 "MetricGroup": "TopdownL2;tma_L2_group;tma_backend_bound_group", 592 595 "MetricName": "tma_memory_bound", 593 596 "MetricThreshold": "tma_memory_bound > 0.2", ··· 606 609 }, 607 610 { 608 611 "BriefDescription": "Counts the number of uops that are from the complex flows issued by the micro-sequencer (MS)", 609 - "MetricExpr": "UOPS_RETIRED.MS / tma_info_core_slots", 612 + "MetricExpr": "cpu_atom@UOPS_RETIRED.MS@ / tma_info_core_slots", 610 613 "MetricGroup": "TopdownL2;tma_L2_group;tma_retiring_group", 611 614 "MetricName": "tma_ms_uops", 612 615 "MetricThreshold": "tma_ms_uops > 0.05", ··· 617 620 }, 618 621 { 619 622 "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to IEC or FPC RAT stalls, which can be due to FIQ or IEC reservation stalls in which the integer, floating point or SIMD scheduler is not able to accept uops.", 620 - "MetricExpr": "TOPDOWN_BE_BOUND.NON_MEM_SCHEDULER / tma_info_core_slots", 623 + "MetricExpr": "cpu_atom@TOPDOWN_BE_BOUND.NON_MEM_SCHEDULER@ / tma_info_core_slots", 621 624 "MetricGroup": "TopdownL3;tma_L3_group;tma_resource_bound_group", 622 625 "MetricName": "tma_non_mem_scheduler", 623 626 "MetricThreshold": "tma_non_mem_scheduler > 0.1", ··· 626 629 }, 627 630 { 628 631 "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to a machine clear (slow nuke).", 629 - "MetricExpr": "TOPDOWN_BAD_SPECULATION.NUKE / tma_info_core_slots", 632 + "MetricExpr": "cpu_atom@TOPDOWN_BAD_SPECULATION.NUKE@ / tma_info_core_slots", 630 633 "MetricGroup": "TopdownL3;tma_L3_group;tma_machine_clears_group", 631 634 "MetricName": "tma_nuke", 632 635 "MetricThreshold": "tma_nuke > 0.05", ··· 635 638 }, 636 639 { 637 640 "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to other common frontend stalls not categorized.", 638 - "MetricExpr": "TOPDOWN_FE_BOUND.OTHER / tma_info_core_slots", 641 + "MetricExpr": "cpu_atom@TOPDOWN_FE_BOUND.OTHER@ / tma_info_core_slots", 639 642 "MetricGroup": "TopdownL3;tma_L3_group;tma_fetch_bandwidth_group", 640 643 "MetricName": "tma_other_fb", 641 644 "MetricThreshold": "tma_other_fb > 0.05", ··· 644 647 }, 645 648 { 646 649 "BriefDescription": "Counts the number of cycles that the oldest load of the load buffer is stalled at retirement due to a number of other load blocks.", 647 - "MetricExpr": "LD_HEAD.OTHER_AT_RET / tma_info_core_clks", 650 + "MetricExpr": "cpu_atom@LD_HEAD.OTHER_AT_RET@ / tma_info_core_clks", 648 651 "MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group", 649 652 "MetricName": "tma_other_l1", 650 653 "MetricThreshold": "tma_other_l1 > 0.05", ··· 680 683 }, 681 684 { 682 685 "BriefDescription": "Counts the number of issue slots that were not delivered by the frontend due to wrong predecodes.", 683 - "MetricExpr": "TOPDOWN_FE_BOUND.PREDECODE / tma_info_core_slots", 686 + "MetricExpr": "cpu_atom@TOPDOWN_FE_BOUND.PREDECODE@ / tma_info_core_slots", 684 687 "MetricGroup": "TopdownL3;tma_L3_group;tma_fetch_bandwidth_group", 685 688 "MetricName": "tma_predecode", 686 689 "MetricThreshold": "tma_predecode > 0.05", ··· 689 692 }, 690 693 { 691 694 "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to the physical register file unable to accept an entry (marble stalls).", 692 - "MetricExpr": "TOPDOWN_BE_BOUND.REGISTER / tma_info_core_slots", 695 + "MetricExpr": "cpu_atom@TOPDOWN_BE_BOUND.REGISTER@ / tma_info_core_slots", 693 696 "MetricGroup": "TopdownL3;tma_L3_group;tma_resource_bound_group", 694 697 "MetricName": "tma_register", 695 698 "MetricThreshold": "tma_register > 0.1", ··· 698 701 }, 699 702 { 700 703 "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to the reorder buffer being full (ROB stalls).", 701 - "MetricExpr": "TOPDOWN_BE_BOUND.REORDER_BUFFER / tma_info_core_slots", 704 + "MetricExpr": "cpu_atom@TOPDOWN_BE_BOUND.REORDER_BUFFER@ / tma_info_core_slots", 702 705 "MetricGroup": "TopdownL3;tma_L3_group;tma_resource_bound_group", 703 706 "MetricName": "tma_reorder_buffer", 704 707 "MetricThreshold": "tma_reorder_buffer > 0.1", ··· 719 722 { 720 723 "BriefDescription": "Counts the number of issue slots that result in retirement slots.", 721 724 "DefaultMetricgroupName": "TopdownL1", 722 - "MetricExpr": "TOPDOWN_RETIRING.ALL / tma_info_core_slots", 725 + "MetricExpr": "cpu_atom@TOPDOWN_RETIRING.ALL@ / tma_info_core_slots", 723 726 "MetricGroup": "Default;TopdownL1;tma_L1_group", 724 727 "MetricName": "tma_retiring", 725 728 "MetricThreshold": "tma_retiring > 0.75", ··· 738 741 }, 739 742 { 740 743 "BriefDescription": "Counts the number of issue slots that were not consumed by the backend due to scoreboards from the instruction queue (IQ), jump execution unit (JEU), or microcode sequencer (MS).", 741 - "MetricExpr": "TOPDOWN_BE_BOUND.SERIALIZATION / tma_info_core_slots", 744 + "MetricExpr": "cpu_atom@TOPDOWN_BE_BOUND.SERIALIZATION@ / tma_info_core_slots", 742 745 "MetricGroup": "TopdownL3;tma_L3_group;tma_resource_bound_group", 743 746 "MetricName": "tma_serialization", 744 747 "MetricThreshold": "tma_serialization > 0.1", ··· 765 768 }, 766 769 { 767 770 "BriefDescription": "Counts the number of cycles that the oldest load of the load buffer is stalled at retirement due to a first level TLB miss.", 768 - "MetricExpr": "LD_HEAD.DTLB_MISS_AT_RET / tma_info_core_clks", 771 + "MetricExpr": "cpu_atom@LD_HEAD.DTLB_MISS_AT_RET@ / tma_info_core_clks", 769 772 "MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group", 770 773 "MetricName": "tma_stlb_hit", 771 774 "MetricThreshold": "tma_stlb_hit > 0.05", ··· 774 777 }, 775 778 { 776 779 "BriefDescription": "Counts the number of cycles that the oldest load of the load buffer is stalled at retirement due to a second level TLB miss requiring a page walk.", 777 - "MetricExpr": "LD_HEAD.PGWALK_AT_RET / tma_info_core_clks", 780 + "MetricExpr": "cpu_atom@LD_HEAD.PGWALK_AT_RET@ / tma_info_core_clks", 778 781 "MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group", 779 782 "MetricName": "tma_stlb_miss", 780 783 "MetricThreshold": "tma_stlb_miss > 0.05", ··· 792 795 }, 793 796 { 794 797 "BriefDescription": "Counts the number of cycles that the oldest load of the load buffer is stalled at retirement due to a store forward block.", 795 - "MetricConstraint": "NO_GROUP_EVENTS_NMI", 796 - "MetricExpr": "LD_HEAD.ST_ADDR_AT_RET / tma_info_core_clks", 798 + "MetricExpr": "cpu_atom@LD_HEAD.ST_ADDR_AT_RET@ / tma_info_core_clks", 797 799 "MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group", 798 800 "MetricName": "tma_store_fwd_blk", 799 801 "MetricThreshold": "tma_store_fwd_blk > 0.05", ··· 871 875 }, 872 876 { 873 877 "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to Branch Resteers", 874 - "MetricExpr": "INT_MISC.CLEAR_RESTEER_CYCLES / tma_info_thread_clks + tma_unknown_branches", 878 + "MetricExpr": "cpu_core@INT_MISC.CLEAR_RESTEER_CYCLES@ / tma_info_thread_clks + tma_unknown_branches", 875 879 "MetricGroup": "FetchLat;TopdownL3;tma_L3_group;tma_fetch_latency_group", 876 880 "MetricName": "tma_branch_resteers", 877 881 "MetricThreshold": "tma_branch_resteers > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)", ··· 901 905 }, 902 906 { 903 907 "BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling synchronizations due to contested accesses", 904 - "MetricConstraint": "NO_GROUP_EVENTS", 905 908 "MetricExpr": "(25 * tma_info_system_average_frequency * (cpu_core@MEM_LOAD_L3_HIT_RETIRED.XSNP_FWD@ * (cpu_core@OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM@ / (cpu_core@OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM@ + cpu_core@OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD@))) + 24 * tma_info_system_average_frequency * cpu_core@MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS@) * (1 + cpu_core@MEM_LOAD_RETIRED.FB_HIT@ / cpu_core@MEM_LOAD_RETIRED.L1_MISS@ / 2) / tma_info_thread_clks", 906 909 "MetricGroup": "DataSharing;Offcore;Snoop;TopdownL4;tma_L4_group;tma_issueSyncxn;tma_l3_bound_group", 907 910 "MetricName": "tma_contested_accesses", ··· 922 927 }, 923 928 { 924 929 "BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling synchronizations due to data-sharing accesses", 925 - "MetricConstraint": "NO_GROUP_EVENTS", 926 930 "MetricExpr": "24 * tma_info_system_average_frequency * (cpu_core@MEM_LOAD_L3_HIT_RETIRED.XSNP_NO_FWD@ + cpu_core@MEM_LOAD_L3_HIT_RETIRED.XSNP_FWD@ * (1 - cpu_core@OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM@ / (cpu_core@OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM@ + cpu_core@OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD@))) * (1 + cpu_core@MEM_LOAD_RETIRED.FB_HIT@ / cpu_core@MEM_LOAD_RETIRED.L1_MISS@ / 2) / tma_info_thread_clks", 927 931 "MetricGroup": "Offcore;Snoop;TopdownL4;tma_L4_group;tma_issueSyncxn;tma_l3_bound_group", 928 932 "MetricName": "tma_data_sharing", ··· 942 948 }, 943 949 { 944 950 "BriefDescription": "This metric represents fraction of cycles where the Divider unit was active", 945 - "MetricExpr": "ARITH.DIV_ACTIVE / tma_info_thread_clks", 951 + "MetricExpr": "cpu_core@ARITH.DIV_ACTIVE@ / tma_info_thread_clks", 946 952 "MetricGroup": "TopdownL3;tma_L3_group;tma_core_bound_group", 947 953 "MetricName": "tma_divider", 948 954 "MetricThreshold": "tma_divider > 0.2 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2)", ··· 952 958 }, 953 959 { 954 960 "BriefDescription": "This metric estimates how often the CPU was stalled on accesses to external memory (DRAM) by loads", 955 - "MetricConstraint": "NO_GROUP_EVENTS", 956 961 "MetricExpr": "cpu_core@MEMORY_ACTIVITY.STALLS_L3_MISS@ / tma_info_thread_clks", 957 962 "MetricGroup": "MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group", 958 963 "MetricName": "tma_dram_bound", ··· 972 979 }, 973 980 { 974 981 "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to switches from DSB to MITE pipelines", 975 - "MetricExpr": "DSB2MITE_SWITCHES.PENALTY_CYCLES / tma_info_thread_clks", 982 + "MetricExpr": "cpu_core@DSB2MITE_SWITCHES.PENALTY_CYCLES@ / tma_info_thread_clks", 976 983 "MetricGroup": "DSBmiss;FetchLat;TopdownL3;tma_L3_group;tma_fetch_latency_group;tma_issueFB", 977 984 "MetricName": "tma_dsb_switches", 978 985 "MetricThreshold": "tma_dsb_switches > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)", ··· 1012 1019 }, 1013 1020 { 1014 1021 "BriefDescription": "This metric does a *rough estimation* of how often L1D Fill Buffer unavailability limited additional L1D miss memory access requests to proceed", 1015 - "MetricExpr": "L1D_PEND_MISS.FB_FULL / tma_info_thread_clks", 1022 + "MetricExpr": "cpu_core@L1D_PEND_MISS.FB_FULL@ / tma_info_thread_clks", 1016 1023 "MetricGroup": "MemoryBW;TopdownL4;tma_L4_group;tma_issueBW;tma_issueSL;tma_issueSmSt;tma_l1_bound_group", 1017 1024 "MetricName": "tma_fb_full", 1018 1025 "MetricThreshold": "tma_fb_full > 0.3", ··· 1147 1154 }, 1148 1155 { 1149 1156 "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to instruction cache misses", 1150 - "MetricExpr": "ICACHE_DATA.STALLS / tma_info_thread_clks", 1157 + "MetricExpr": "cpu_core@ICACHE_DATA.STALLS@ / tma_info_thread_clks", 1151 1158 "MetricGroup": "BigFoot;FetchLat;IcMiss;TopdownL3;tma_L3_group;tma_fetch_latency_group", 1152 1159 "MetricName": "tma_icache_misses", 1153 1160 "MetricThreshold": "tma_icache_misses > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)", ··· 1157 1164 }, 1158 1165 { 1159 1166 "BriefDescription": "Branch Misprediction Cost: Fraction of TMA slots wasted per non-speculative branch misprediction (retired JEClear)", 1160 - "MetricConstraint": "NO_GROUP_EVENTS", 1161 1167 "MetricExpr": "(tma_branch_mispredicts + tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)) * tma_info_thread_slots / BR_MISP_RETIRED.ALL_BRANCHES", 1162 1168 "MetricGroup": "Bad;BrMispredicts;tma_issueBM", 1163 1169 "MetricName": "tma_info_bad_spec_branch_misprediction_cost", ··· 1165 1173 }, 1166 1174 { 1167 1175 "BriefDescription": "Instructions per retired mispredicts for conditional non-taken branches (lower number means higher occurrence rate).", 1168 - "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.COND_NTAKEN", 1176 + "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / BR_MISP_RETIRED.COND_NTAKEN", 1169 1177 "MetricGroup": "Bad;BrMispredicts", 1170 1178 "MetricName": "tma_info_bad_spec_ipmisp_cond_ntaken", 1171 1179 "MetricThreshold": "tma_info_bad_spec_ipmisp_cond_ntaken < 200", ··· 1173 1181 }, 1174 1182 { 1175 1183 "BriefDescription": "Instructions per retired mispredicts for conditional taken branches (lower number means higher occurrence rate).", 1176 - "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.COND_TAKEN", 1184 + "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / BR_MISP_RETIRED.COND_TAKEN", 1177 1185 "MetricGroup": "Bad;BrMispredicts", 1178 1186 "MetricName": "tma_info_bad_spec_ipmisp_cond_taken", 1179 1187 "MetricThreshold": "tma_info_bad_spec_ipmisp_cond_taken < 200", ··· 1189 1197 }, 1190 1198 { 1191 1199 "BriefDescription": "Instructions per retired mispredicts for return branches (lower number means higher occurrence rate).", 1192 - "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.RET", 1200 + "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / BR_MISP_RETIRED.RET", 1193 1201 "MetricGroup": "Bad;BrMispredicts", 1194 1202 "MetricName": "tma_info_bad_spec_ipmisp_ret", 1195 1203 "MetricThreshold": "tma_info_bad_spec_ipmisp_ret < 500", ··· 1197 1205 }, 1198 1206 { 1199 1207 "BriefDescription": "Number of Instructions per non-speculative Branch Misprediction (JEClear) (lower number means higher occurrence rate)", 1200 - "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.ALL_BRANCHES", 1208 + "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / BR_MISP_RETIRED.ALL_BRANCHES", 1201 1209 "MetricGroup": "Bad;BadSpec;BrMispredicts", 1202 1210 "MetricName": "tma_info_bad_spec_ipmispredict", 1203 1211 "MetricThreshold": "tma_info_bad_spec_ipmispredict < 200", ··· 1205 1213 }, 1206 1214 { 1207 1215 "BriefDescription": "Probability of Core Bound bottleneck hidden by SMT-profiling artifacts", 1208 - "MetricConstraint": "NO_GROUP_EVENTS", 1209 1216 "MetricExpr": "(100 * (1 - tma_core_bound / tma_ports_utilization if tma_core_bound < tma_ports_utilization else 1) if tma_info_system_smt_2t_utilization > 0.5 else 0)", 1210 1217 "MetricGroup": "Cor;SMT", 1211 1218 "MetricName": "tma_info_botlnk_l0_core_bound_likely", ··· 1213 1222 }, 1214 1223 { 1215 1224 "BriefDescription": "Total pipeline cost of DSB (uop cache) misses - subset of the Instruction_Fetch_BW Bottleneck", 1216 - "MetricConstraint": "NO_GROUP_EVENTS", 1217 1225 "MetricExpr": "100 * (tma_fetch_latency * tma_dsb_switches / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) + tma_fetch_bandwidth * tma_mite / (tma_dsb + tma_lsd + tma_mite))", 1218 1226 "MetricGroup": "DSBmiss;Fed;tma_issueFB", 1219 1227 "MetricName": "tma_info_botlnk_l2_dsb_misses", ··· 1222 1232 }, 1223 1233 { 1224 1234 "BriefDescription": "Total pipeline cost of Instruction Cache misses - subset of the Big_Code Bottleneck", 1225 - "MetricConstraint": "NO_GROUP_EVENTS", 1226 1235 "MetricExpr": "100 * (tma_fetch_latency * tma_icache_misses / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))", 1227 1236 "MetricGroup": "Fed;FetchLat;IcMiss;tma_issueFL", 1228 1237 "MetricName": "tma_info_botlnk_l2_ic_misses", ··· 1231 1242 }, 1232 1243 { 1233 1244 "BriefDescription": "Total pipeline cost of instruction fetch related bottlenecks by large code footprint programs (i-side cache; TLB and BTB misses)", 1234 - "MetricConstraint": "NO_GROUP_EVENTS", 1235 1245 "MetricExpr": "100 * tma_fetch_latency * (tma_itlb_misses + tma_icache_misses + tma_unknown_branches) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)", 1236 1246 "MetricGroup": "BigFoot;Fed;Frontend;IcMiss;MemoryTLB;tma_issueBC", 1237 1247 "MetricName": "tma_info_bottleneck_big_code", ··· 1249 1261 }, 1250 1262 { 1251 1263 "BriefDescription": "Total pipeline cost of instruction fetch bandwidth related bottlenecks", 1252 - "MetricConstraint": "NO_GROUP_EVENTS", 1253 1264 "MetricExpr": "100 * (tma_frontend_bound - tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)) - tma_info_bottleneck_big_code", 1254 1265 "MetricGroup": "Fed;FetchBW;Frontend", 1255 1266 "MetricName": "tma_info_bottleneck_instruction_fetch_bw", ··· 1257 1270 }, 1258 1271 { 1259 1272 "BriefDescription": "Total pipeline cost of (external) Memory Bandwidth related bottlenecks", 1260 - "MetricConstraint": "NO_GROUP_EVENTS", 1261 1273 "MetricExpr": "100 * tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_mem_bandwidth / (tma_mem_bandwidth + tma_mem_latency)) + tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_sq_full / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full))) + tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_fb_full / (tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk))", 1262 1274 "MetricGroup": "Mem;MemoryBW;Offcore;tma_issueBW", 1263 1275 "MetricName": "tma_info_bottleneck_memory_bandwidth", ··· 1266 1280 }, 1267 1281 { 1268 1282 "BriefDescription": "Total pipeline cost of Memory Address Translation related bottlenecks (data-side TLBs)", 1269 - "MetricConstraint": "NO_GROUP_EVENTS", 1270 1283 "MetricExpr": "100 * tma_memory_bound * (tma_l1_bound / max(tma_memory_bound, tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_dtlb_load / max(tma_l1_bound, tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_dtlb_store / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)))", 1271 1284 "MetricGroup": "Mem;MemoryTLB;Offcore;tma_issueTLB", 1272 1285 "MetricName": "tma_info_bottleneck_memory_data_tlbs", ··· 1275 1290 }, 1276 1291 { 1277 1292 "BriefDescription": "Total pipeline cost of Memory Latency related bottlenecks (external memory and off-core caches)", 1278 - "MetricConstraint": "NO_GROUP_EVENTS", 1279 1293 "MetricExpr": "100 * tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_mem_latency / (tma_mem_bandwidth + tma_mem_latency)) + tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_l3_hit_latency / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_l2_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound))", 1280 1294 "MetricGroup": "Mem;MemoryLat;Offcore;tma_issueLat", 1281 1295 "MetricName": "tma_info_bottleneck_memory_latency", ··· 1284 1300 }, 1285 1301 { 1286 1302 "BriefDescription": "Total pipeline cost of Branch Misprediction related bottlenecks", 1287 - "MetricConstraint": "NO_GROUP_EVENTS", 1288 1303 "MetricExpr": "100 * (tma_branch_mispredicts + tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))", 1289 1304 "MetricGroup": "Bad;BadSpec;BrMispredicts;tma_issueBM", 1290 1305 "MetricName": "tma_info_bottleneck_mispredictions", ··· 1300 1317 }, 1301 1318 { 1302 1319 "BriefDescription": "Fraction of branches that are non-taken conditionals", 1303 - "MetricExpr": "BR_INST_RETIRED.COND_NTAKEN / BR_INST_RETIRED.ALL_BRANCHES", 1320 + "MetricExpr": "cpu_core@BR_INST_RETIRED.COND_NTAKEN@ / BR_INST_RETIRED.ALL_BRANCHES", 1304 1321 "MetricGroup": "Bad;Branches;CodeGen;PGO", 1305 1322 "MetricName": "tma_info_branches_cond_nt", 1306 1323 "Unit": "cpu_core" 1307 1324 }, 1308 1325 { 1309 1326 "BriefDescription": "Fraction of branches that are taken conditionals", 1310 - "MetricExpr": "BR_INST_RETIRED.COND_TAKEN / BR_INST_RETIRED.ALL_BRANCHES", 1327 + "MetricExpr": "cpu_core@BR_INST_RETIRED.COND_TAKEN@ / BR_INST_RETIRED.ALL_BRANCHES", 1311 1328 "MetricGroup": "Bad;Branches;CodeGen;PGO", 1312 1329 "MetricName": "tma_info_branches_cond_tk", 1313 1330 "Unit": "cpu_core" ··· 1335 1352 }, 1336 1353 { 1337 1354 "BriefDescription": "Instructions Per Cycle across hyper-threads (per physical core)", 1338 - "MetricExpr": "INST_RETIRED.ANY / tma_info_core_core_clks", 1355 + "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / tma_info_core_core_clks", 1339 1356 "MetricGroup": "Ret;SMT;TmaL1;tma_L1_group", 1340 1357 "MetricName": "tma_info_core_coreipc", 1341 1358 "Unit": "cpu_core" ··· 1357 1374 }, 1358 1375 { 1359 1376 "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per-core", 1360 - "MetricExpr": "UOPS_EXECUTED.THREAD / (cpu_core@UOPS_EXECUTED.CORE_CYCLES_GE_1@ / 2 if #SMT_on else cpu_core@UOPS_EXECUTED.CORE_CYCLES_GE_1@)", 1377 + "MetricExpr": "cpu_core@UOPS_EXECUTED.THREAD@ / (cpu_core@UOPS_EXECUTED.CORE_CYCLES_GE_1@ / 2 if #SMT_on else cpu_core@UOPS_EXECUTED.CORE_CYCLES_GE_1@)", 1361 1378 "MetricGroup": "Backend;Cor;Pipeline;PortsUtil", 1362 1379 "MetricName": "tma_info_core_ilp", 1363 1380 "Unit": "cpu_core" 1364 1381 }, 1365 1382 { 1366 1383 "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache)", 1367 - "MetricExpr": "IDQ.DSB_UOPS / cpu_core@UOPS_ISSUED.ANY@", 1384 + "MetricExpr": "cpu_core@IDQ.DSB_UOPS@ / cpu_core@UOPS_ISSUED.ANY@", 1368 1385 "MetricGroup": "DSB;Fed;FetchBW;tma_issueFB", 1369 1386 "MetricName": "tma_info_frontend_dsb_coverage", 1370 1387 "MetricThreshold": "tma_info_frontend_dsb_coverage < 0.7 & tma_info_thread_ipc / 6 > 0.35", ··· 1373 1390 }, 1374 1391 { 1375 1392 "BriefDescription": "Average number of cycles of a switch from the DSB fetch-unit to MITE fetch unit - see DSB_Switches tree node for details.", 1376 - "MetricExpr": "DSB2MITE_SWITCHES.PENALTY_CYCLES / cpu_core@DSB2MITE_SWITCHES.PENALTY_CYCLES\\,cmask\\=1\\,edge@", 1393 + "MetricExpr": "cpu_core@DSB2MITE_SWITCHES.PENALTY_CYCLES@ / cpu_core@DSB2MITE_SWITCHES.PENALTY_CYCLES\\,cmask\\=1\\,edge@", 1377 1394 "MetricGroup": "DSBmiss", 1378 1395 "MetricName": "tma_info_frontend_dsb_switch_cost", 1379 1396 "Unit": "cpu_core" 1380 1397 }, 1381 1398 { 1382 1399 "BriefDescription": "Average number of Uops issued by front-end when it issued something", 1383 - "MetricExpr": "UOPS_ISSUED.ANY / cpu_core@UOPS_ISSUED.ANY\\,cmask\\=1@", 1400 + "MetricExpr": "cpu_core@UOPS_ISSUED.ANY@ / cpu_core@UOPS_ISSUED.ANY\\,cmask\\=1@", 1384 1401 "MetricGroup": "Fed;FetchBW", 1385 1402 "MetricName": "tma_info_frontend_fetch_upc", 1386 1403 "Unit": "cpu_core" 1387 1404 }, 1388 1405 { 1389 1406 "BriefDescription": "Average Latency for L1 instruction cache misses", 1390 - "MetricExpr": "ICACHE_DATA.STALLS / cpu_core@ICACHE_DATA.STALLS\\,cmask\\=1\\,edge@", 1407 + "MetricExpr": "cpu_core@ICACHE_DATA.STALLS@ / cpu_core@ICACHE_DATA.STALLS\\,cmask\\=1\\,edge@", 1391 1408 "MetricGroup": "Fed;FetchLat;IcMiss", 1392 1409 "MetricName": "tma_info_frontend_icache_miss_latency", 1393 1410 "Unit": "cpu_core" 1394 1411 }, 1395 1412 { 1396 1413 "BriefDescription": "Instructions per non-speculative DSB miss (lower number means higher occurrence rate)", 1397 - "MetricExpr": "INST_RETIRED.ANY / FRONTEND_RETIRED.ANY_DSB_MISS", 1414 + "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / FRONTEND_RETIRED.ANY_DSB_MISS", 1398 1415 "MetricGroup": "DSBmiss;Fed", 1399 1416 "MetricName": "tma_info_frontend_ipdsb_miss_ret", 1400 1417 "MetricThreshold": "tma_info_frontend_ipdsb_miss_ret < 50", ··· 1423 1440 }, 1424 1441 { 1425 1442 "BriefDescription": "Fraction of Uops delivered by the LSD (Loop Stream Detector; aka Loop Cache)", 1426 - "MetricExpr": "LSD.UOPS / cpu_core@UOPS_ISSUED.ANY@", 1443 + "MetricExpr": "cpu_core@LSD.UOPS@ / cpu_core@UOPS_ISSUED.ANY@", 1427 1444 "MetricGroup": "Fed;LSD", 1428 1445 "MetricName": "tma_info_frontend_lsd_coverage", 1429 1446 "Unit": "cpu_core" 1430 1447 }, 1431 1448 { 1432 1449 "BriefDescription": "Branch instructions per taken branch.", 1433 - "MetricExpr": "BR_INST_RETIRED.ALL_BRANCHES / BR_INST_RETIRED.NEAR_TAKEN", 1450 + "MetricExpr": "cpu_core@BR_INST_RETIRED.ALL_BRANCHES@ / BR_INST_RETIRED.NEAR_TAKEN", 1434 1451 "MetricGroup": "Branches;Fed;PGO", 1435 1452 "MetricName": "tma_info_inst_mix_bptkbranch", 1436 1453 "Unit": "cpu_core" ··· 1445 1462 }, 1446 1463 { 1447 1464 "BriefDescription": "Instructions per FP Arithmetic instruction (lower number means higher occurrence rate)", 1448 - "MetricExpr": "INST_RETIRED.ANY / (cpu_core@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ + cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE\\,umask\\=0x3c@)", 1465 + "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / (cpu_core@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ + cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE\\,umask\\=0x3c@)", 1449 1466 "MetricGroup": "Flops;InsType", 1450 1467 "MetricName": "tma_info_inst_mix_iparith", 1451 1468 "MetricThreshold": "tma_info_inst_mix_iparith < 10", ··· 1454 1471 }, 1455 1472 { 1456 1473 "BriefDescription": "Instructions per FP Arithmetic AVX/SSE 128-bit instruction (lower number means higher occurrence rate)", 1457 - "MetricExpr": "INST_RETIRED.ANY / (cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE@ + cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE@)", 1474 + "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / (cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE@ + cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE@)", 1458 1475 "MetricGroup": "Flops;FpVector;InsType", 1459 1476 "MetricName": "tma_info_inst_mix_iparith_avx128", 1460 1477 "MetricThreshold": "tma_info_inst_mix_iparith_avx128 < 10", ··· 1463 1480 }, 1464 1481 { 1465 1482 "BriefDescription": "Instructions per FP Arithmetic AVX* 256-bit instruction (lower number means higher occurrence rate)", 1466 - "MetricExpr": "INST_RETIRED.ANY / (cpu_core@FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE@ + cpu_core@FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE@)", 1483 + "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / (cpu_core@FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE@ + cpu_core@FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE@)", 1467 1484 "MetricGroup": "Flops;FpVector;InsType", 1468 1485 "MetricName": "tma_info_inst_mix_iparith_avx256", 1469 1486 "MetricThreshold": "tma_info_inst_mix_iparith_avx256 < 10", ··· 1472 1489 }, 1473 1490 { 1474 1491 "BriefDescription": "Instructions per FP Arithmetic Scalar Double-Precision instruction (lower number means higher occurrence rate)", 1475 - "MetricExpr": "INST_RETIRED.ANY / FP_ARITH_INST_RETIRED.SCALAR_DOUBLE", 1492 + "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / FP_ARITH_INST_RETIRED.SCALAR_DOUBLE", 1476 1493 "MetricGroup": "Flops;FpScalar;InsType", 1477 1494 "MetricName": "tma_info_inst_mix_iparith_scalar_dp", 1478 1495 "MetricThreshold": "tma_info_inst_mix_iparith_scalar_dp < 10", ··· 1481 1498 }, 1482 1499 { 1483 1500 "BriefDescription": "Instructions per FP Arithmetic Scalar Single-Precision instruction (lower number means higher occurrence rate)", 1484 - "MetricExpr": "INST_RETIRED.ANY / FP_ARITH_INST_RETIRED.SCALAR_SINGLE", 1501 + "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / FP_ARITH_INST_RETIRED.SCALAR_SINGLE", 1485 1502 "MetricGroup": "Flops;FpScalar;InsType", 1486 1503 "MetricName": "tma_info_inst_mix_iparith_scalar_sp", 1487 1504 "MetricThreshold": "tma_info_inst_mix_iparith_scalar_sp < 10", ··· 1490 1507 }, 1491 1508 { 1492 1509 "BriefDescription": "Instructions per Branch (lower number means higher occurrence rate)", 1493 - "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.ALL_BRANCHES", 1510 + "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / BR_INST_RETIRED.ALL_BRANCHES", 1494 1511 "MetricGroup": "Branches;Fed;InsType", 1495 1512 "MetricName": "tma_info_inst_mix_ipbranch", 1496 1513 "MetricThreshold": "tma_info_inst_mix_ipbranch < 8", ··· 1498 1515 }, 1499 1516 { 1500 1517 "BriefDescription": "Instructions per (near) call (lower number means higher occurrence rate)", 1501 - "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_CALL", 1518 + "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / BR_INST_RETIRED.NEAR_CALL", 1502 1519 "MetricGroup": "Branches;Fed;PGO", 1503 1520 "MetricName": "tma_info_inst_mix_ipcall", 1504 1521 "MetricThreshold": "tma_info_inst_mix_ipcall < 200", ··· 1506 1523 }, 1507 1524 { 1508 1525 "BriefDescription": "Instructions per Floating Point (FP) Operation (lower number means higher occurrence rate)", 1509 - "MetricExpr": "INST_RETIRED.ANY / (cpu_core@FP_ARITH_INST_RETIRED.SCALAR_SINGLE@ + cpu_core@FP_ARITH_INST_RETIRED.SCALAR_DOUBLE@ + 2 * cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE@ + 4 * (cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE@ + cpu_core@FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE@) + 8 * cpu_core@FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE@)", 1526 + "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / (cpu_core@FP_ARITH_INST_RETIRED.SCALAR_SINGLE@ + cpu_core@FP_ARITH_INST_RETIRED.SCALAR_DOUBLE@ + 2 * cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE@ + 4 * (cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE@ + cpu_core@FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE@) + 8 * cpu_core@FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE@)", 1510 1527 "MetricGroup": "Flops;InsType", 1511 1528 "MetricName": "tma_info_inst_mix_ipflop", 1512 1529 "MetricThreshold": "tma_info_inst_mix_ipflop < 10", ··· 1514 1531 }, 1515 1532 { 1516 1533 "BriefDescription": "Instructions per Load (lower number means higher occurrence rate)", 1517 - "MetricExpr": "INST_RETIRED.ANY / MEM_INST_RETIRED.ALL_LOADS", 1534 + "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / MEM_INST_RETIRED.ALL_LOADS", 1518 1535 "MetricGroup": "InsType", 1519 1536 "MetricName": "tma_info_inst_mix_ipload", 1520 1537 "MetricThreshold": "tma_info_inst_mix_ipload < 3", ··· 1522 1539 }, 1523 1540 { 1524 1541 "BriefDescription": "Instructions per Store (lower number means higher occurrence rate)", 1525 - "MetricExpr": "INST_RETIRED.ANY / MEM_INST_RETIRED.ALL_STORES", 1542 + "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / MEM_INST_RETIRED.ALL_STORES", 1526 1543 "MetricGroup": "InsType", 1527 1544 "MetricName": "tma_info_inst_mix_ipstore", 1528 1545 "MetricThreshold": "tma_info_inst_mix_ipstore < 8", ··· 1530 1547 }, 1531 1548 { 1532 1549 "BriefDescription": "Instructions per Software prefetch instruction (of any type: NTA/T0/T1/T2/Prefetch) (lower number means higher occurrence rate)", 1533 - "MetricExpr": "INST_RETIRED.ANY / cpu_core@SW_PREFETCH_ACCESS.T0\\,umask\\=0xF@", 1550 + "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / cpu_core@SW_PREFETCH_ACCESS.T0\\,umask\\=0xF@", 1534 1551 "MetricGroup": "Prefetches", 1535 1552 "MetricName": "tma_info_inst_mix_ipswpf", 1536 1553 "MetricThreshold": "tma_info_inst_mix_ipswpf < 100", ··· 1538 1555 }, 1539 1556 { 1540 1557 "BriefDescription": "Instruction per taken branch", 1541 - "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_TAKEN", 1558 + "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / BR_INST_RETIRED.NEAR_TAKEN", 1542 1559 "MetricGroup": "Branches;Fed;FetchBW;Frontend;PGO;tma_issueFB", 1543 1560 "MetricName": "tma_info_inst_mix_iptb", 1544 1561 "MetricThreshold": "tma_info_inst_mix_iptb < 13", ··· 1638 1655 }, 1639 1656 { 1640 1657 "BriefDescription": "Actual Average Latency for L1 data-cache miss demand load operations (in core cycles)", 1641 - "MetricExpr": "L1D_PEND_MISS.PENDING / MEM_LOAD_COMPLETED.L1_MISS_ANY", 1658 + "MetricExpr": "cpu_core@L1D_PEND_MISS.PENDING@ / MEM_LOAD_COMPLETED.L1_MISS_ANY", 1642 1659 "MetricGroup": "Mem;MemoryBound;MemoryLat", 1643 1660 "MetricName": "tma_info_memory_load_miss_real_latency", 1644 1661 "Unit": "cpu_core" 1645 1662 }, 1646 1663 { 1647 1664 "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss", 1648 - "MetricExpr": "L1D_PEND_MISS.PENDING / L1D_PEND_MISS.PENDING_CYCLES", 1665 + "MetricExpr": "cpu_core@L1D_PEND_MISS.PENDING@ / L1D_PEND_MISS.PENDING_CYCLES", 1649 1666 "MetricGroup": "Mem;MemoryBW;MemoryBound", 1650 1667 "MetricName": "tma_info_memory_mlp", 1651 1668 "PublicDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss. Per-Logical Processor)", ··· 1653 1670 }, 1654 1671 { 1655 1672 "BriefDescription": "Average Parallel L2 cache miss data reads", 1656 - "MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD / OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD", 1673 + "MetricExpr": "cpu_core@OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD@ / OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD", 1657 1674 "MetricGroup": "Memory_BW;Offcore", 1658 1675 "MetricName": "tma_info_memory_oro_data_l2_mlp", 1659 1676 "Unit": "cpu_core" 1660 1677 }, 1661 1678 { 1662 1679 "BriefDescription": "Average Latency for L2 cache miss demand Loads", 1663 - "MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD / OFFCORE_REQUESTS.DEMAND_DATA_RD", 1680 + "MetricExpr": "cpu_core@OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD@ / OFFCORE_REQUESTS.DEMAND_DATA_RD", 1664 1681 "MetricGroup": "Memory_Lat;Offcore", 1665 1682 "MetricName": "tma_info_memory_oro_load_l2_miss_latency", 1666 1683 "Unit": "cpu_core" 1667 1684 }, 1668 1685 { 1669 1686 "BriefDescription": "Average Parallel L2 cache miss demand Loads", 1670 - "MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD / cpu_core@OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD\\,cmask\\=1@", 1687 + "MetricExpr": "cpu_core@OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD@ / cpu_core@OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD\\,cmask\\=1@", 1671 1688 "MetricGroup": "Memory_BW;Offcore", 1672 1689 "MetricName": "tma_info_memory_oro_load_l2_mlp", 1673 1690 "Unit": "cpu_core" 1674 1691 }, 1675 1692 { 1676 1693 "BriefDescription": "Average Latency for L3 cache miss demand Loads", 1677 - "MetricExpr": "OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD / OFFCORE_REQUESTS.L3_MISS_DEMAND_DATA_RD", 1694 + "MetricExpr": "cpu_core@OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD@ / OFFCORE_REQUESTS.L3_MISS_DEMAND_DATA_RD", 1678 1695 "MetricGroup": "Memory_Lat;Offcore", 1679 1696 "MetricName": "tma_info_memory_oro_load_l3_miss_latency", 1680 1697 "Unit": "cpu_core" ··· 1738 1755 }, 1739 1756 { 1740 1757 "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is execution) per-thread", 1741 - "MetricExpr": "UOPS_EXECUTED.THREAD / cpu_core@UOPS_EXECUTED.THREAD\\,cmask\\=1@", 1758 + "MetricExpr": "cpu_core@UOPS_EXECUTED.THREAD@ / cpu_core@UOPS_EXECUTED.THREAD\\,cmask\\=1@", 1742 1759 "MetricGroup": "Cor;Pipeline;PortsUtil;SMT", 1743 1760 "MetricName": "tma_info_pipeline_execute", 1744 1761 "Unit": "cpu_core" 1745 1762 }, 1746 1763 { 1747 1764 "BriefDescription": "Instructions per a microcode Assist invocation", 1748 - "MetricExpr": "INST_RETIRED.ANY / cpu_core@ASSISTS.ANY\\,umask\\=0x1B@", 1765 + "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / cpu_core@ASSISTS.ANY\\,umask\\=0x1B@", 1749 1766 "MetricGroup": "Pipeline;Ret;Retire", 1750 1767 "MetricName": "tma_info_pipeline_ipassist", 1751 1768 "MetricThreshold": "tma_info_pipeline_ipassist < 100e3", ··· 1761 1778 }, 1762 1779 { 1763 1780 "BriefDescription": "Estimated fraction of retirement-cycles dealing with repeat instructions", 1764 - "MetricExpr": "INST_RETIRED.REP_ITERATION / cpu_core@UOPS_RETIRED.SLOTS\\,cmask\\=1@", 1781 + "MetricExpr": "cpu_core@INST_RETIRED.REP_ITERATION@ / cpu_core@UOPS_RETIRED.SLOTS\\,cmask\\=1@", 1765 1782 "MetricGroup": "Pipeline;Ret", 1766 1783 "MetricName": "tma_info_pipeline_strings_cycles", 1767 1784 "MetricThreshold": "tma_info_pipeline_strings_cycles > 0.1", ··· 1776 1793 }, 1777 1794 { 1778 1795 "BriefDescription": "Average CPU Utilization", 1779 - "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / TSC", 1796 + "MetricExpr": "cpu_core@CPU_CLK_UNHALTED.REF_TSC@ / TSC", 1780 1797 "MetricGroup": "HPC;Summary", 1781 1798 "MetricName": "tma_info_system_cpu_utilization", 1782 1799 "Unit": "cpu_core" ··· 1799 1816 }, 1800 1817 { 1801 1818 "BriefDescription": "Instructions per Far Branch ( Far Branches apply upon transition from application to operating system, handling interrupts, exceptions) [lower number means higher occurrence rate]", 1802 - "MetricExpr": "INST_RETIRED.ANY / cpu_core@BR_INST_RETIRED.FAR_BRANCH@u", 1819 + "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / cpu_core@BR_INST_RETIRED.FAR_BRANCH@u", 1803 1820 "MetricGroup": "Branches;OS", 1804 1821 "MetricName": "tma_info_system_ipfarbranch", 1805 1822 "MetricThreshold": "tma_info_system_ipfarbranch < 1e6", ··· 1830 1847 }, 1831 1848 { 1832 1849 "BriefDescription": "Average latency of data read request to external memory (in nanoseconds)", 1850 + "MetricConstraint": "NO_GROUP_EVENTS", 1833 1851 "MetricExpr": "(UNC_ARB_TRK_OCCUPANCY.RD + UNC_ARB_DAT_OCCUPANCY.RD) / UNC_ARB_TRK_REQUESTS.RD", 1834 1852 "MetricGroup": "Mem;MemoryLat;SoC", 1835 1853 "MetricName": "tma_info_system_mem_read_latency", ··· 1839 1855 }, 1840 1856 { 1841 1857 "BriefDescription": "Average latency of all requests to external memory (in Uncore cycles)", 1858 + "MetricConstraint": "NO_GROUP_EVENTS", 1842 1859 "MetricExpr": "(UNC_ARB_TRK_OCCUPANCY.ALL + UNC_ARB_DAT_OCCUPANCY.RD) / UNC_ARB_TRK_REQUESTS.ALL", 1843 1860 "MetricGroup": "Mem;SoC", 1844 1861 "MetricName": "tma_info_system_mem_request_latency", ··· 1882 1897 }, 1883 1898 { 1884 1899 "BriefDescription": "The ratio of Executed- by Issued-Uops", 1885 - "MetricExpr": "UOPS_EXECUTED.THREAD / UOPS_ISSUED.ANY", 1900 + "MetricExpr": "cpu_core@UOPS_EXECUTED.THREAD@ / UOPS_ISSUED.ANY", 1886 1901 "MetricGroup": "Cor;Pipeline", 1887 1902 "MetricName": "tma_info_thread_execute_per_issue", 1888 1903 "PublicDescription": "The ratio of Executed- by Issued-Uops. Ratio > 1 suggests high rate of uop micro-fusions. Ratio < 1 suggest high rate of \"execute\" at rename stage.", ··· 1890 1905 }, 1891 1906 { 1892 1907 "BriefDescription": "Instructions Per Cycle (per Logical Processor)", 1893 - "MetricExpr": "INST_RETIRED.ANY / tma_info_thread_clks", 1908 + "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / tma_info_thread_clks", 1894 1909 "MetricGroup": "Ret;Summary", 1895 1910 "MetricName": "tma_info_thread_ipc", 1896 1911 "Unit": "cpu_core" ··· 1957 1972 }, 1958 1973 { 1959 1974 "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to Instruction TLB (ITLB) misses", 1960 - "MetricExpr": "ICACHE_TAG.STALLS / tma_info_thread_clks", 1975 + "MetricExpr": "cpu_core@ICACHE_TAG.STALLS@ / tma_info_thread_clks", 1961 1976 "MetricGroup": "BigFoot;FetchLat;MemoryTLB;TopdownL3;tma_L3_group;tma_fetch_latency_group", 1962 1977 "MetricName": "tma_itlb_misses", 1963 1978 "MetricThreshold": "tma_itlb_misses > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)", ··· 1977 1992 }, 1978 1993 { 1979 1994 "BriefDescription": "This metric estimates how often the CPU was stalled due to L2 cache accesses by loads", 1980 - "MetricConstraint": "NO_GROUP_EVENTS", 1981 1995 "MetricExpr": "(cpu_core@MEMORY_ACTIVITY.STALLS_L1D_MISS@ - cpu_core@MEMORY_ACTIVITY.STALLS_L2_MISS@) / tma_info_thread_clks", 1982 1996 "MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group", 1983 1997 "MetricName": "tma_l2_bound", ··· 1987 2003 }, 1988 2004 { 1989 2005 "BriefDescription": "This metric estimates how often the CPU was stalled due to loads accesses to L3 cache or contended with a sibling Core", 1990 - "MetricConstraint": "NO_GROUP_EVENTS_NMI", 1991 2006 "MetricExpr": "(cpu_core@MEMORY_ACTIVITY.STALLS_L2_MISS@ - cpu_core@MEMORY_ACTIVITY.STALLS_L3_MISS@) / tma_info_thread_clks", 1992 2007 "MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group", 1993 2008 "MetricName": "tma_l3_bound", ··· 2007 2024 }, 2008 2025 { 2009 2026 "BriefDescription": "This metric represents fraction of cycles CPU was stalled due to Length Changing Prefixes (LCPs)", 2010 - "MetricExpr": "DECODE.LCP / tma_info_thread_clks", 2027 + "MetricExpr": "cpu_core@DECODE.LCP@ / tma_info_thread_clks", 2011 2028 "MetricGroup": "FetchLat;TopdownL3;tma_L3_group;tma_fetch_latency_group;tma_issueFB", 2012 2029 "MetricName": "tma_lcp", 2013 2030 "MetricThreshold": "tma_lcp > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15)", ··· 2028 2045 }, 2029 2046 { 2030 2047 "BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port for Load operations", 2031 - "MetricExpr": "UOPS_DISPATCHED.PORT_2_3_10 / (3 * tma_info_core_core_clks)", 2048 + "MetricExpr": "cpu_core@UOPS_DISPATCHED.PORT_2_3_10@ / (3 * tma_info_core_core_clks)", 2032 2049 "MetricGroup": "TopdownL5;tma_L5_group;tma_ports_utilized_3m_group", 2033 2050 "MetricName": "tma_load_op_utilization", 2034 2051 "MetricThreshold": "tma_load_op_utilization > 0.6", ··· 2047 2064 }, 2048 2065 { 2049 2066 "BriefDescription": "This metric estimates the fraction of cycles where the Second-level TLB (STLB) was missed by load accesses, performing a hardware page walk", 2050 - "MetricExpr": "DTLB_LOAD_MISSES.WALK_ACTIVE / tma_info_thread_clks", 2067 + "MetricExpr": "cpu_core@DTLB_LOAD_MISSES.WALK_ACTIVE@ / tma_info_thread_clks", 2051 2068 "MetricGroup": "MemoryTLB;TopdownL5;tma_L5_group;tma_dtlb_load_group", 2052 2069 "MetricName": "tma_load_stlb_miss", 2053 2070 "MetricThreshold": "tma_load_stlb_miss > 0.05 & (tma_dtlb_load > 0.1 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)))", ··· 2056 2073 }, 2057 2074 { 2058 2075 "BriefDescription": "This metric represents fraction of cycles the CPU spent handling cache misses due to lock operations", 2059 - "MetricConstraint": "NO_GROUP_EVENTS", 2060 2076 "MetricExpr": "(16 * max(0, cpu_core@MEM_INST_RETIRED.LOCK_LOADS@ - cpu_core@L2_RQSTS.ALL_RFO@) + cpu_core@MEM_INST_RETIRED.LOCK_LOADS@ / cpu_core@MEM_INST_RETIRED.ALL_STORES@ * (10 * cpu_core@L2_RQSTS.RFO_HIT@ + min(cpu_core@CPU_CLK_UNHALTED.THREAD@, cpu_core@OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO@))) / tma_info_thread_clks", 2061 2077 "MetricGroup": "Offcore;TopdownL4;tma_L4_group;tma_issueRFO;tma_l1_bound_group", 2062 2078 "MetricName": "tma_lock_latency", ··· 2118 2136 }, 2119 2137 { 2120 2138 "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to LFENCE Instructions.", 2139 + "MetricConstraint": "NO_GROUP_EVENTS_NMI", 2121 2140 "MetricExpr": "13 * cpu_core@MISC2_RETIRED.LFENCE@ / tma_info_thread_clks", 2122 2141 "MetricGroup": "TopdownL6;tma_L6_group;tma_serializing_operation_group", 2123 2142 "MetricName": "tma_memory_fence", ··· 2128 2145 }, 2129 2146 { 2130 2147 "BriefDescription": "This metric represents fraction of slots where the CPU was retiring memory operations -- uops for memory load or store accesses.", 2131 - "MetricConstraint": "NO_GROUP_EVENTS", 2132 2148 "MetricExpr": "tma_light_operations * cpu_core@MEM_UOP_RETIRED.ANY@ / (tma_retiring * tma_info_thread_slots)", 2133 2149 "MetricGroup": "Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group", 2134 2150 "MetricName": "tma_memory_operations", ··· 2137 2155 }, 2138 2156 { 2139 2157 "BriefDescription": "This metric represents fraction of slots the CPU was retiring uops fetched by the Microcode Sequencer (MS) unit", 2140 - "MetricExpr": "UOPS_RETIRED.MS / tma_info_thread_slots", 2158 + "MetricExpr": "cpu_core@UOPS_RETIRED.MS@ / tma_info_thread_slots", 2141 2159 "MetricGroup": "MicroSeq;TopdownL3;tma_L3_group;tma_heavy_operations_group;tma_issueMC;tma_issueMS", 2142 2160 "MetricName": "tma_microcode_sequencer", 2143 2161 "MetricThreshold": "tma_microcode_sequencer > 0.05 & tma_heavy_operations > 0.1", ··· 2207 2225 }, 2208 2226 { 2209 2227 "BriefDescription": "This metric represents the remaining light uops fraction the CPU has executed - remaining means not covered by other sibling nodes", 2210 - "MetricConstraint": "NO_GROUP_EVENTS", 2211 2228 "MetricExpr": "max(0, tma_light_operations - (tma_fp_arith + tma_int_operations + tma_memory_operations + tma_fused_instructions + tma_non_fused_branches + tma_nop_instructions))", 2212 2229 "MetricGroup": "Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group", 2213 2230 "MetricName": "tma_other_light_ops", ··· 2227 2246 }, 2228 2247 { 2229 2248 "BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 0 ([SNB+] ALU; [HSW+] ALU and 2nd branch)", 2230 - "MetricExpr": "UOPS_DISPATCHED.PORT_0 / tma_info_core_core_clks", 2249 + "MetricExpr": "cpu_core@UOPS_DISPATCHED.PORT_0@ / tma_info_core_core_clks", 2231 2250 "MetricGroup": "Compute;TopdownL6;tma_L6_group;tma_alu_op_utilization_group;tma_issue2P", 2232 2251 "MetricName": "tma_port_0", 2233 2252 "MetricThreshold": "tma_port_0 > 0.6", ··· 2237 2256 }, 2238 2257 { 2239 2258 "BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 1 (ALU)", 2240 - "MetricExpr": "UOPS_DISPATCHED.PORT_1 / tma_info_core_core_clks", 2259 + "MetricExpr": "cpu_core@UOPS_DISPATCHED.PORT_1@ / tma_info_core_core_clks", 2241 2260 "MetricGroup": "TopdownL6;tma_L6_group;tma_alu_op_utilization_group;tma_issue2P", 2242 2261 "MetricName": "tma_port_1", 2243 2262 "MetricThreshold": "tma_port_1 > 0.6", ··· 2247 2266 }, 2248 2267 { 2249 2268 "BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port 6 ([HSW+]Primary Branch and simple ALU)", 2250 - "MetricExpr": "UOPS_DISPATCHED.PORT_6 / tma_info_core_core_clks", 2269 + "MetricExpr": "cpu_core@UOPS_DISPATCHED.PORT_6@ / tma_info_core_core_clks", 2251 2270 "MetricGroup": "TopdownL6;tma_L6_group;tma_alu_op_utilization_group;tma_issue2P", 2252 2271 "MetricName": "tma_port_6", 2253 2272 "MetricThreshold": "tma_port_6 > 0.6", ··· 2277 2296 }, 2278 2297 { 2279 2298 "BriefDescription": "This metric represents fraction of cycles where the CPU executed total of 1 uop per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)", 2280 - "MetricExpr": "EXE_ACTIVITY.1_PORTS_UTIL / tma_info_thread_clks", 2299 + "MetricExpr": "cpu_core@EXE_ACTIVITY.1_PORTS_UTIL@ / tma_info_thread_clks", 2281 2300 "MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_issueL1;tma_ports_utilization_group", 2282 2301 "MetricName": "tma_ports_utilized_1", 2283 2302 "MetricThreshold": "tma_ports_utilized_1 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))", ··· 2287 2306 }, 2288 2307 { 2289 2308 "BriefDescription": "This metric represents fraction of cycles CPU executed total of 2 uops per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)", 2290 - "MetricExpr": "EXE_ACTIVITY.2_PORTS_UTIL / tma_info_thread_clks", 2309 + "MetricConstraint": "NO_GROUP_EVENTS_NMI", 2310 + "MetricExpr": "cpu_core@EXE_ACTIVITY.2_PORTS_UTIL@ / tma_info_thread_clks", 2291 2311 "MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_issue2P;tma_ports_utilization_group", 2292 2312 "MetricName": "tma_ports_utilized_2", 2293 2313 "MetricThreshold": "tma_ports_utilized_2 > 0.15 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))", ··· 2298 2316 }, 2299 2317 { 2300 2318 "BriefDescription": "This metric represents fraction of cycles CPU executed total of 3 or more uops per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)", 2301 - "MetricExpr": "UOPS_EXECUTED.CYCLES_GE_3 / tma_info_thread_clks", 2319 + "MetricConstraint": "NO_GROUP_EVENTS_NMI", 2320 + "MetricExpr": "cpu_core@UOPS_EXECUTED.CYCLES_GE_3@ / tma_info_thread_clks", 2302 2321 "MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_ports_utilization_group", 2303 2322 "MetricName": "tma_ports_utilized_3m", 2304 2323 "MetricThreshold": "tma_ports_utilized_3m > 0.7 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))", ··· 2321 2338 }, 2322 2339 { 2323 2340 "BriefDescription": "This metric represents fraction of cycles the CPU issue-pipeline was stalled due to serializing operations", 2324 - "MetricExpr": "RESOURCE_STALLS.SCOREBOARD / tma_info_thread_clks", 2341 + "MetricExpr": "cpu_core@RESOURCE_STALLS.SCOREBOARD@ / tma_info_thread_clks", 2325 2342 "MetricGroup": "PortsUtil;TopdownL5;tma_L5_group;tma_issueSO;tma_ports_utilized_0_group", 2326 2343 "MetricName": "tma_serializing_operation", 2327 2344 "MetricThreshold": "tma_serializing_operation > 0.1 & (tma_ports_utilized_0 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2)))", ··· 2331 2348 }, 2332 2349 { 2333 2350 "BriefDescription": "This metric represents Shuffle (cross \"vector lane\" data transfers) uops fraction the CPU has retired.", 2334 - "MetricExpr": "INT_VEC_RETIRED.SHUFFLES / (tma_retiring * tma_info_thread_slots)", 2351 + "MetricExpr": "cpu_core@INT_VEC_RETIRED.SHUFFLES@ / (tma_retiring * tma_info_thread_slots)", 2335 2352 "MetricGroup": "HPC;Pipeline;TopdownL4;tma_L4_group;tma_int_operations_group", 2336 2353 "MetricName": "tma_shuffles", 2337 2354 "MetricThreshold": "tma_shuffles > 0.1 & (tma_int_operations > 0.1 & tma_light_operations > 0.6)", ··· 2340 2357 }, 2341 2358 { 2342 2359 "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to PAUSE Instructions", 2343 - "MetricExpr": "CPU_CLK_UNHALTED.PAUSE / tma_info_thread_clks", 2360 + "MetricConstraint": "NO_GROUP_EVENTS_NMI", 2361 + "MetricExpr": "cpu_core@CPU_CLK_UNHALTED.PAUSE@ / tma_info_thread_clks", 2344 2362 "MetricGroup": "TopdownL6;tma_L6_group;tma_serializing_operation_group", 2345 2363 "MetricName": "tma_slow_pause", 2346 2364 "MetricThreshold": "tma_slow_pause > 0.05 & (tma_serializing_operation > 0.1 & (tma_ports_utilized_0 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))))", ··· 2361 2377 }, 2362 2378 { 2363 2379 "BriefDescription": "This metric represents rate of split store accesses", 2364 - "MetricConstraint": "NO_GROUP_EVENTS_NMI", 2365 - "MetricExpr": "MEM_INST_RETIRED.SPLIT_STORES / tma_info_core_core_clks", 2380 + "MetricExpr": "cpu_core@MEM_INST_RETIRED.SPLIT_STORES@ / tma_info_core_core_clks", 2366 2381 "MetricGroup": "TopdownL4;tma_L4_group;tma_issueSpSt;tma_store_bound_group", 2367 2382 "MetricName": "tma_split_stores", 2368 2383 "MetricThreshold": "tma_split_stores > 0.2 & (tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))", ··· 2381 2398 }, 2382 2399 { 2383 2400 "BriefDescription": "This metric estimates how often CPU was stalled due to RFO store memory accesses; RFO store issue a read-for-ownership request before the write", 2384 - "MetricExpr": "EXE_ACTIVITY.BOUND_ON_STORES / tma_info_thread_clks", 2401 + "MetricExpr": "cpu_core@EXE_ACTIVITY.BOUND_ON_STORES@ / tma_info_thread_clks", 2385 2402 "MetricGroup": "MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group", 2386 2403 "MetricName": "tma_store_bound", 2387 2404 "MetricThreshold": "tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)", ··· 2391 2408 }, 2392 2409 { 2393 2410 "BriefDescription": "This metric roughly estimates fraction of cycles when the memory subsystem had loads blocked since they could not forward data from earlier (in program order) overlapping stores", 2394 - "MetricConstraint": "NO_GROUP_EVENTS_NMI", 2395 2411 "MetricExpr": "13 * cpu_core@LD_BLOCKS.STORE_FORWARD@ / tma_info_thread_clks", 2396 2412 "MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group", 2397 2413 "MetricName": "tma_store_fwd_blk", ··· 2430 2448 }, 2431 2449 { 2432 2450 "BriefDescription": "This metric estimates the fraction of cycles where the STLB was missed by store accesses, performing a hardware page walk", 2433 - "MetricExpr": "DTLB_STORE_MISSES.WALK_ACTIVE / tma_info_core_core_clks", 2451 + "MetricExpr": "cpu_core@DTLB_STORE_MISSES.WALK_ACTIVE@ / tma_info_core_core_clks", 2434 2452 "MetricGroup": "MemoryTLB;TopdownL5;tma_L5_group;tma_dtlb_store_group", 2435 2453 "MetricName": "tma_store_stlb_miss", 2436 2454 "MetricThreshold": "tma_store_stlb_miss > 0.05 & (tma_dtlb_store > 0.05 & (tma_store_bound > 0.2 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)))", ··· 2449 2467 }, 2450 2468 { 2451 2469 "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to new branch address clears", 2452 - "MetricExpr": "INT_MISC.UNKNOWN_BRANCH_CYCLES / tma_info_thread_clks", 2470 + "MetricExpr": "cpu_core@INT_MISC.UNKNOWN_BRANCH_CYCLES@ / tma_info_thread_clks", 2453 2471 "MetricGroup": "BigFoot;FetchLat;TopdownL4;tma_L4_group;tma_branch_resteers_group", 2454 2472 "MetricName": "tma_unknown_branches", 2455 2473 "MetricThreshold": "tma_unknown_branches > 0.05 & (tma_branch_resteers > 0.05 & (tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15))",
-4
tools/perf/pmu-events/arch/x86/alderlaken/adln-metrics.json
··· 195 195 }, 196 196 { 197 197 "BriefDescription": "Counts the number of cycles the core is stalled due to a demand load miss which hit in DRAM or MMIO (Non-DRAM).", 198 - "MetricConstraint": "NO_GROUP_EVENTS", 199 198 "MetricExpr": "MEM_BOUND_STALLS.LOAD_DRAM_HIT / tma_info_core_clks - max((MEM_BOUND_STALLS.LOAD - LD_HEAD.L1_MISS_AT_RET) / tma_info_core_clks, 0) * MEM_BOUND_STALLS.LOAD_DRAM_HIT / MEM_BOUND_STALLS.LOAD", 200 199 "MetricGroup": "TopdownL3;tma_L3_group;tma_memory_bound_group", 201 200 "MetricName": "tma_dram_bound", ··· 456 457 }, 457 458 { 458 459 "BriefDescription": "Counts the number of cycles a core is stalled due to a demand load which hit in the L2 Cache.", 459 - "MetricConstraint": "NO_GROUP_EVENTS", 460 460 "MetricExpr": "MEM_BOUND_STALLS.LOAD_L2_HIT / tma_info_core_clks - max((MEM_BOUND_STALLS.LOAD - LD_HEAD.L1_MISS_AT_RET) / tma_info_core_clks, 0) * MEM_BOUND_STALLS.LOAD_L2_HIT / MEM_BOUND_STALLS.LOAD", 461 461 "MetricGroup": "TopdownL3;tma_L3_group;tma_memory_bound_group", 462 462 "MetricName": "tma_l2_bound", ··· 464 466 }, 465 467 { 466 468 "BriefDescription": "Counts the number of cycles a core is stalled due to a demand load which hit in the Last Level Cache (LLC) or other core with HITE/F/M.", 467 - "MetricConstraint": "NO_GROUP_EVENTS_NMI", 468 469 "MetricExpr": "MEM_BOUND_STALLS.LOAD_LLC_HIT / tma_info_core_clks - max((MEM_BOUND_STALLS.LOAD - LD_HEAD.L1_MISS_AT_RET) / tma_info_core_clks, 0) * MEM_BOUND_STALLS.LOAD_LLC_HIT / MEM_BOUND_STALLS.LOAD", 469 470 "MetricGroup": "TopdownL3;tma_L3_group;tma_memory_bound_group", 470 471 "MetricName": "tma_l3_bound", ··· 680 683 }, 681 684 { 682 685 "BriefDescription": "Counts the number of cycles that the oldest load of the load buffer is stalled at retirement due to a store forward block.", 683 - "MetricConstraint": "NO_GROUP_EVENTS_NMI", 684 686 "MetricExpr": "LD_HEAD.ST_ADDR_AT_RET / tma_info_core_clks", 685 687 "MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group", 686 688 "MetricName": "tma_store_fwd_blk",
+5 -20
tools/perf/pmu-events/arch/x86/sapphirerapids/spr-metrics.json
··· 400 400 }, 401 401 { 402 402 "BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling synchronizations due to contested accesses", 403 - "MetricConstraint": "NO_GROUP_EVENTS", 404 403 "MetricExpr": "(76 * tma_info_system_average_frequency * (MEM_LOAD_L3_HIT_RETIRED.XSNP_FWD * (OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM / (OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM + OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD))) + 75.5 * tma_info_system_average_frequency * MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS) * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks", 405 404 "MetricGroup": "DataSharing;Offcore;Snoop;TopdownL4;tma_L4_group;tma_issueSyncxn;tma_l3_bound_group", 406 405 "MetricName": "tma_contested_accesses", ··· 420 421 }, 421 422 { 422 423 "BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling synchronizations due to data-sharing accesses", 423 - "MetricConstraint": "NO_GROUP_EVENTS", 424 424 "MetricExpr": "75.5 * tma_info_system_average_frequency * (MEM_LOAD_L3_HIT_RETIRED.XSNP_NO_FWD + MEM_LOAD_L3_HIT_RETIRED.XSNP_FWD * (1 - OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM / (OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM + OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD))) * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks", 425 425 "MetricGroup": "Offcore;Snoop;TopdownL4;tma_L4_group;tma_issueSyncxn;tma_l3_bound_group", 426 426 "MetricName": "tma_data_sharing", ··· 447 449 }, 448 450 { 449 451 "BriefDescription": "This metric estimates how often the CPU was stalled on accesses to external memory (DRAM) by loads", 450 - "MetricConstraint": "NO_GROUP_EVENTS", 451 452 "MetricExpr": "(MEMORY_ACTIVITY.STALLS_L3_MISS / tma_info_thread_clks - tma_pmm_bound if #has_pmem > 0 else MEMORY_ACTIVITY.STALLS_L3_MISS / tma_info_thread_clks)", 452 453 "MetricGroup": "MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group", 453 454 "MetricName": "tma_dram_bound", ··· 653 656 }, 654 657 { 655 658 "BriefDescription": "Branch Misprediction Cost: Fraction of TMA slots wasted per non-speculative branch misprediction (retired JEClear)", 656 - "MetricConstraint": "NO_GROUP_EVENTS", 657 659 "MetricExpr": "(tma_branch_mispredicts + tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)) * tma_info_thread_slots / BR_MISP_RETIRED.ALL_BRANCHES", 658 660 "MetricGroup": "Bad;BrMispredicts;tma_issueBM", 659 661 "MetricName": "tma_info_bad_spec_branch_misprediction_cost", ··· 695 699 }, 696 700 { 697 701 "BriefDescription": "Probability of Core Bound bottleneck hidden by SMT-profiling artifacts", 698 - "MetricConstraint": "NO_GROUP_EVENTS", 699 702 "MetricExpr": "(100 * (1 - tma_core_bound / tma_ports_utilization if tma_core_bound < tma_ports_utilization else 1) if tma_info_system_smt_2t_utilization > 0.5 else 0)", 700 703 "MetricGroup": "Cor;SMT", 701 704 "MetricName": "tma_info_botlnk_l0_core_bound_likely", ··· 702 707 }, 703 708 { 704 709 "BriefDescription": "Total pipeline cost of DSB (uop cache) misses - subset of the Instruction_Fetch_BW Bottleneck", 705 - "MetricConstraint": "NO_GROUP_EVENTS", 706 710 "MetricExpr": "100 * (tma_fetch_latency * tma_dsb_switches / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) + tma_fetch_bandwidth * tma_mite / (tma_dsb + tma_mite))", 707 711 "MetricGroup": "DSBmiss;Fed;tma_issueFB", 708 712 "MetricName": "tma_info_botlnk_l2_dsb_misses", ··· 710 716 }, 711 717 { 712 718 "BriefDescription": "Total pipeline cost of Instruction Cache misses - subset of the Big_Code Bottleneck", 713 - "MetricConstraint": "NO_GROUP_EVENTS", 714 719 "MetricExpr": "100 * (tma_fetch_latency * tma_icache_misses / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))", 715 720 "MetricGroup": "Fed;FetchLat;IcMiss;tma_issueFL", 716 721 "MetricName": "tma_info_botlnk_l2_ic_misses", ··· 718 725 }, 719 726 { 720 727 "BriefDescription": "Total pipeline cost of instruction fetch related bottlenecks by large code footprint programs (i-side cache; TLB and BTB misses)", 721 - "MetricConstraint": "NO_GROUP_EVENTS", 722 728 "MetricExpr": "100 * tma_fetch_latency * (tma_itlb_misses + tma_icache_misses + tma_unknown_branches) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)", 723 729 "MetricGroup": "BigFoot;Fed;Frontend;IcMiss;MemoryTLB;tma_issueBC", 724 730 "MetricName": "tma_info_bottleneck_big_code", ··· 734 742 }, 735 743 { 736 744 "BriefDescription": "Total pipeline cost of instruction fetch bandwidth related bottlenecks", 737 - "MetricConstraint": "NO_GROUP_EVENTS", 738 745 "MetricExpr": "100 * (tma_frontend_bound - tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)) - tma_info_bottleneck_big_code", 739 746 "MetricGroup": "Fed;FetchBW;Frontend", 740 747 "MetricName": "tma_info_bottleneck_instruction_fetch_bw", ··· 741 750 }, 742 751 { 743 752 "BriefDescription": "Total pipeline cost of (external) Memory Bandwidth related bottlenecks", 744 - "MetricConstraint": "NO_GROUP_EVENTS", 745 753 "MetricExpr": "100 * tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_mem_bandwidth / (tma_mem_bandwidth + tma_mem_latency)) + tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_sq_full / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full))) + tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_fb_full / (tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk))", 746 754 "MetricGroup": "Mem;MemoryBW;Offcore;tma_issueBW", 747 755 "MetricName": "tma_info_bottleneck_memory_bandwidth", ··· 749 759 }, 750 760 { 751 761 "BriefDescription": "Total pipeline cost of Memory Address Translation related bottlenecks (data-side TLBs)", 752 - "MetricConstraint": "NO_GROUP_EVENTS", 753 762 "MetricExpr": "100 * tma_memory_bound * (tma_l1_bound / max(tma_memory_bound, tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_dtlb_load / max(tma_l1_bound, tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_dtlb_store / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)))", 754 763 "MetricGroup": "Mem;MemoryTLB;Offcore;tma_issueTLB", 755 764 "MetricName": "tma_info_bottleneck_memory_data_tlbs", ··· 757 768 }, 758 769 { 759 770 "BriefDescription": "Total pipeline cost of Memory Latency related bottlenecks (external memory and off-core caches)", 760 - "MetricConstraint": "NO_GROUP_EVENTS", 761 771 "MetricExpr": "100 * tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_mem_latency / (tma_mem_bandwidth + tma_mem_latency)) + tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_l3_hit_latency / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_l2_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound))", 762 772 "MetricGroup": "Mem;MemoryLat;Offcore;tma_issueLat", 763 773 "MetricName": "tma_info_bottleneck_memory_latency", ··· 765 777 }, 766 778 { 767 779 "BriefDescription": "Total pipeline cost of Branch Misprediction related bottlenecks", 768 - "MetricConstraint": "NO_GROUP_EVENTS", 769 780 "MetricExpr": "100 * (tma_branch_mispredicts + tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))", 770 781 "MetricGroup": "Bad;BadSpec;BrMispredicts;tma_issueBM", 771 782 "MetricName": "tma_info_bottleneck_mispredictions", ··· 1288 1301 }, 1289 1302 { 1290 1303 "BriefDescription": "Average latency of data read request to external memory (in nanoseconds)", 1304 + "MetricConstraint": "NO_GROUP_EVENTS", 1291 1305 "MetricExpr": "1e9 * (UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD / UNC_CHA_TOR_INSERTS.IA_MISS_DRD) / (tma_info_system_socket_clks / duration_time)", 1292 1306 "MetricGroup": "Mem;MemoryLat;SoC", 1293 1307 "MetricName": "tma_info_system_mem_read_latency", ··· 1443 1455 }, 1444 1456 { 1445 1457 "BriefDescription": "This metric estimates how often the CPU was stalled due to L2 cache accesses by loads", 1446 - "MetricConstraint": "NO_GROUP_EVENTS", 1447 1458 "MetricExpr": "(MEMORY_ACTIVITY.STALLS_L1D_MISS - MEMORY_ACTIVITY.STALLS_L2_MISS) / tma_info_thread_clks", 1448 1459 "MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group", 1449 1460 "MetricName": "tma_l2_bound", ··· 1452 1465 }, 1453 1466 { 1454 1467 "BriefDescription": "This metric estimates how often the CPU was stalled due to loads accesses to L3 cache or contended with a sibling Core", 1455 - "MetricConstraint": "NO_GROUP_EVENTS_NMI", 1456 1468 "MetricExpr": "(MEMORY_ACTIVITY.STALLS_L2_MISS - MEMORY_ACTIVITY.STALLS_L3_MISS) / tma_info_thread_clks", 1457 1469 "MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group", 1458 1470 "MetricName": "tma_l3_bound", ··· 1524 1538 }, 1525 1539 { 1526 1540 "BriefDescription": "This metric represents fraction of cycles the CPU spent handling cache misses due to lock operations", 1527 - "MetricConstraint": "NO_GROUP_EVENTS", 1528 1541 "MetricExpr": "(16 * max(0, MEM_INST_RETIRED.LOCK_LOADS - L2_RQSTS.ALL_RFO) + MEM_INST_RETIRED.LOCK_LOADS / MEM_INST_RETIRED.ALL_STORES * (10 * L2_RQSTS.RFO_HIT + min(CPU_CLK_UNHALTED.THREAD, OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO))) / tma_info_thread_clks", 1529 1542 "MetricGroup": "Offcore;TopdownL4;tma_L4_group;tma_issueRFO;tma_l1_bound_group", 1530 1543 "MetricName": "tma_lock_latency", ··· 1581 1596 }, 1582 1597 { 1583 1598 "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to LFENCE Instructions.", 1599 + "MetricConstraint": "NO_GROUP_EVENTS_NMI", 1584 1600 "MetricExpr": "13 * MISC2_RETIRED.LFENCE / tma_info_thread_clks", 1585 1601 "MetricGroup": "TopdownL6;tma_L6_group;tma_serializing_operation_group", 1586 1602 "MetricName": "tma_memory_fence", ··· 1590 1604 }, 1591 1605 { 1592 1606 "BriefDescription": "This metric represents fraction of slots where the CPU was retiring memory operations -- uops for memory load or store accesses.", 1593 - "MetricConstraint": "NO_GROUP_EVENTS", 1594 1607 "MetricExpr": "tma_light_operations * MEM_UOP_RETIRED.ANY / (tma_retiring * tma_info_thread_slots)", 1595 1608 "MetricGroup": "Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group", 1596 1609 "MetricName": "tma_memory_operations", ··· 1661 1676 }, 1662 1677 { 1663 1678 "BriefDescription": "This metric represents the remaining light uops fraction the CPU has executed - remaining means not covered by other sibling nodes", 1664 - "MetricConstraint": "NO_GROUP_EVENTS", 1665 1679 "MetricExpr": "max(0, tma_light_operations - (tma_fp_arith + tma_int_operations + tma_memory_operations + tma_fused_instructions + tma_non_fused_branches + tma_nop_instructions))", 1666 1680 "MetricGroup": "Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group", 1667 1681 "MetricName": "tma_other_light_ops", ··· 1742 1758 }, 1743 1759 { 1744 1760 "BriefDescription": "This metric represents fraction of cycles CPU executed total of 2 uops per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)", 1761 + "MetricConstraint": "NO_GROUP_EVENTS_NMI", 1745 1762 "MetricExpr": "EXE_ACTIVITY.2_PORTS_UTIL / tma_info_thread_clks", 1746 1763 "MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_issue2P;tma_ports_utilization_group", 1747 1764 "MetricName": "tma_ports_utilized_2", ··· 1752 1767 }, 1753 1768 { 1754 1769 "BriefDescription": "This metric represents fraction of cycles CPU executed total of 3 or more uops per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)", 1770 + "MetricConstraint": "NO_GROUP_EVENTS_NMI", 1755 1771 "MetricExpr": "UOPS_EXECUTED.CYCLES_GE_3 / tma_info_thread_clks", 1756 1772 "MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_ports_utilization_group", 1757 1773 "MetricName": "tma_ports_utilized_3m", ··· 1808 1822 }, 1809 1823 { 1810 1824 "BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to PAUSE Instructions", 1825 + "MetricConstraint": "NO_GROUP_EVENTS_NMI", 1811 1826 "MetricExpr": "CPU_CLK_UNHALTED.PAUSE / tma_info_thread_clks", 1812 1827 "MetricGroup": "TopdownL6;tma_L6_group;tma_serializing_operation_group", 1813 1828 "MetricName": "tma_slow_pause", ··· 1827 1840 }, 1828 1841 { 1829 1842 "BriefDescription": "This metric represents rate of split store accesses", 1830 - "MetricConstraint": "NO_GROUP_EVENTS_NMI", 1831 1843 "MetricExpr": "MEM_INST_RETIRED.SPLIT_STORES / tma_info_core_core_clks", 1832 1844 "MetricGroup": "TopdownL4;tma_L4_group;tma_issueSpSt;tma_store_bound_group", 1833 1845 "MetricName": "tma_split_stores", ··· 1854 1868 }, 1855 1869 { 1856 1870 "BriefDescription": "This metric roughly estimates fraction of cycles when the memory subsystem had loads blocked since they could not forward data from earlier (in program order) overlapping stores", 1857 - "MetricConstraint": "NO_GROUP_EVENTS_NMI", 1858 1871 "MetricExpr": "13 * LD_BLOCKS.STORE_FORWARD / tma_info_thread_clks", 1859 1872 "MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group", 1860 1873 "MetricName": "tma_store_fwd_blk",