Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

perf vendor events amd: Fix Zen 4 cache latency events

L3PMCx0AC and L3PMCx0AD, used in l3_xi_sampled_latency* events, have a
quirk that requires them to be programmed with SliceId set to 0x3.
Without this, the events do not count at all and affects dependent
metrics such as l3_read_miss_latency.

If ThreadMask is not specified, the amd-uncore driver internally sets
ThreadMask to 0x3, EnAllCores to 0x1 and EnAllSlices to 0x1 but does
not set SliceId. Since SliceId must also be set to 0x3 in this case,
specify all the other fields explicitly.

E.g.

$ sudo perf stat -e l3_xi_sampled_latency.all,l3_xi_sampled_latency_requests.all -a sleep 1

Before:

Performance counter stats for 'system wide':

0 l3_xi_sampled_latency.all
0 l3_xi_sampled_latency_requests.all

1.005155399 seconds time elapsed

After:

Performance counter stats for 'system wide':

921,446 l3_xi_sampled_latency.all
54,210 l3_xi_sampled_latency_requests.all

1.005664472 seconds time elapsed

Fixes: 5b2ca349c313 ("perf vendor events amd: Add Zen 4 uncore events")
Signed-off-by: Sandipan Das <sandipan.das@amd.com>
Reviewed-by: Ian Rogers <irogers@google.com>
Cc: ananth.narayan@amd.com
Cc: ravi.bangoria@amd.com
Cc: eranian@google.com
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Link: https://lore.kernel.org/r/20240301084431.646221-1-sandipan.das@amd.com

authored by

Sandipan Das and committed by
Namhyung Kim
498d3486 507ad2bd

+60
+56
tools/perf/pmu-events/arch/x86/amdzen4/cache.json
··· 676 676 "EventCode": "0xac", 677 677 "BriefDescription": "Average sampled latency when data is sourced from DRAM in the same NUMA node.", 678 678 "UMask": "0x01", 679 + "EnAllCores": "0x1", 680 + "EnAllSlices": "0x1", 681 + "SliceId": "0x3", 682 + "ThreadMask": "0x3", 679 683 "Unit": "L3PMC" 680 684 }, 681 685 { ··· 687 683 "EventCode": "0xac", 688 684 "BriefDescription": "Average sampled latency when data is sourced from DRAM in a different NUMA node.", 689 685 "UMask": "0x02", 686 + "EnAllCores": "0x1", 687 + "EnAllSlices": "0x1", 688 + "SliceId": "0x3", 689 + "ThreadMask": "0x3", 690 690 "Unit": "L3PMC" 691 691 }, 692 692 { ··· 698 690 "EventCode": "0xac", 699 691 "BriefDescription": "Average sampled latency when data is sourced from another CCX's cache when the address was in the same NUMA node.", 700 692 "UMask": "0x04", 693 + "EnAllCores": "0x1", 694 + "EnAllSlices": "0x1", 695 + "SliceId": "0x3", 696 + "ThreadMask": "0x3", 701 697 "Unit": "L3PMC" 702 698 }, 703 699 { ··· 709 697 "EventCode": "0xac", 710 698 "BriefDescription": "Average sampled latency when data is sourced from another CCX's cache when the address was in a different NUMA node.", 711 699 "UMask": "0x08", 700 + "EnAllCores": "0x1", 701 + "EnAllSlices": "0x1", 702 + "SliceId": "0x3", 703 + "ThreadMask": "0x3", 712 704 "Unit": "L3PMC" 713 705 }, 714 706 { ··· 720 704 "EventCode": "0xac", 721 705 "BriefDescription": "Average sampled latency when data is sourced from extension memory (CXL) in the same NUMA node.", 722 706 "UMask": "0x10", 707 + "EnAllCores": "0x1", 708 + "EnAllSlices": "0x1", 709 + "SliceId": "0x3", 710 + "ThreadMask": "0x3", 723 711 "Unit": "L3PMC" 724 712 }, 725 713 { ··· 731 711 "EventCode": "0xac", 732 712 "BriefDescription": "Average sampled latency when data is sourced from extension memory (CXL) in a different NUMA node.", 733 713 "UMask": "0x20", 714 + "EnAllCores": "0x1", 715 + "EnAllSlices": "0x1", 716 + "SliceId": "0x3", 717 + "ThreadMask": "0x3", 734 718 "Unit": "L3PMC" 735 719 }, 736 720 { ··· 742 718 "EventCode": "0xac", 743 719 "BriefDescription": "Average sampled latency from all data sources.", 744 720 "UMask": "0x3f", 721 + "EnAllCores": "0x1", 722 + "EnAllSlices": "0x1", 723 + "SliceId": "0x3", 724 + "ThreadMask": "0x3", 745 725 "Unit": "L3PMC" 746 726 }, 747 727 { ··· 753 725 "EventCode": "0xad", 754 726 "BriefDescription": "L3 cache fill requests sourced from DRAM in the same NUMA node.", 755 727 "UMask": "0x01", 728 + "EnAllCores": "0x1", 729 + "EnAllSlices": "0x1", 730 + "SliceId": "0x3", 731 + "ThreadMask": "0x3", 756 732 "Unit": "L3PMC" 757 733 }, 758 734 { ··· 764 732 "EventCode": "0xad", 765 733 "BriefDescription": "L3 cache fill requests sourced from DRAM in a different NUMA node.", 766 734 "UMask": "0x02", 735 + "EnAllCores": "0x1", 736 + "EnAllSlices": "0x1", 737 + "SliceId": "0x3", 738 + "ThreadMask": "0x3", 767 739 "Unit": "L3PMC" 768 740 }, 769 741 { ··· 775 739 "EventCode": "0xad", 776 740 "BriefDescription": "L3 cache fill requests sourced from another CCX's cache when the address was in the same NUMA node.", 777 741 "UMask": "0x04", 742 + "EnAllCores": "0x1", 743 + "EnAllSlices": "0x1", 744 + "SliceId": "0x3", 745 + "ThreadMask": "0x3", 778 746 "Unit": "L3PMC" 779 747 }, 780 748 { ··· 786 746 "EventCode": "0xad", 787 747 "BriefDescription": "L3 cache fill requests sourced from another CCX's cache when the address was in a different NUMA node.", 788 748 "UMask": "0x08", 749 + "EnAllCores": "0x1", 750 + "EnAllSlices": "0x1", 751 + "SliceId": "0x3", 752 + "ThreadMask": "0x3", 789 753 "Unit": "L3PMC" 790 754 }, 791 755 { ··· 797 753 "EventCode": "0xad", 798 754 "BriefDescription": "L3 cache fill requests sourced from extension memory (CXL) in the same NUMA node.", 799 755 "UMask": "0x10", 756 + "EnAllCores": "0x1", 757 + "EnAllSlices": "0x1", 758 + "SliceId": "0x3", 759 + "ThreadMask": "0x3", 800 760 "Unit": "L3PMC" 801 761 }, 802 762 { ··· 808 760 "EventCode": "0xad", 809 761 "BriefDescription": "L3 cache fill requests sourced from extension memory (CXL) in a different NUMA node.", 810 762 "UMask": "0x20", 763 + "EnAllCores": "0x1", 764 + "EnAllSlices": "0x1", 765 + "SliceId": "0x3", 766 + "ThreadMask": "0x3", 811 767 "Unit": "L3PMC" 812 768 }, 813 769 { ··· 819 767 "EventCode": "0xad", 820 768 "BriefDescription": "L3 cache fill requests sourced from all data sources.", 821 769 "UMask": "0x3f", 770 + "EnAllCores": "0x1", 771 + "EnAllSlices": "0x1", 772 + "SliceId": "0x3", 773 + "ThreadMask": "0x3", 822 774 "Unit": "L3PMC" 823 775 } 824 776 ]
+4
tools/perf/pmu-events/jevents.py
··· 373 373 ('UMask', 'umask='), 374 374 ('NodeType', 'type='), 375 375 ('RdWrMask', 'rdwrmask='), 376 + ('EnAllCores', 'enallcores='), 377 + ('EnAllSlices', 'enallslices='), 378 + ('SliceId', 'sliceid='), 379 + ('ThreadMask', 'threadmask='), 376 380 ] 377 381 for key, value in event_fields: 378 382 if key in jd and not is_zero(jd[key]):