Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'perf-split' of git://linux-arm.org/linux-2.6-wd into devel-stable

+2419 -2414
+34 -2414
arch/arm/kernel/perf_event.c
··· 4 4 * ARM performance counter support. 5 5 * 6 6 * Copyright (C) 2009 picoChip Designs, Ltd., Jamie Iles 7 - * 8 - * ARMv7 support: Jean Pihet <jpihet@mvista.com> 9 - * 2010 (c) MontaVista Software, LLC. 7 + * Copyright (C) 2010 ARM Ltd., Will Deacon <will.deacon@arm.com> 10 8 * 11 9 * This code is based on the sparc64 perf event code, which is in turn based 12 10 * on the x86 code. Callchain code is based on the ARM OProfile backtrace ··· 67 69 }; 68 70 DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events); 69 71 70 - /* PMU names. */ 71 - static const char *arm_pmu_names[] = { 72 - [ARM_PERF_PMU_ID_XSCALE1] = "xscale1", 73 - [ARM_PERF_PMU_ID_XSCALE2] = "xscale2", 74 - [ARM_PERF_PMU_ID_V6] = "v6", 75 - [ARM_PERF_PMU_ID_V6MP] = "v6mpcore", 76 - [ARM_PERF_PMU_ID_CA8] = "ARMv7 Cortex-A8", 77 - [ARM_PERF_PMU_ID_CA9] = "ARMv7 Cortex-A9", 78 - }; 79 - 80 72 struct arm_pmu { 81 73 enum arm_perf_pmu_ids id; 74 + const char *name; 82 75 irqreturn_t (*handle_irq)(int irq_num, void *dev); 83 76 void (*enable)(struct hw_perf_event *evt, int idx); 84 77 void (*disable)(struct hw_perf_event *evt, int idx); 85 - int (*event_map)(int evt); 86 - u64 (*raw_event)(u64); 87 78 int (*get_event_idx)(struct cpu_hw_events *cpuc, 88 79 struct hw_perf_event *hwc); 89 80 u32 (*read_counter)(int idx); 90 81 void (*write_counter)(int idx, u32 val); 91 82 void (*start)(void); 92 83 void (*stop)(void); 84 + const unsigned (*cache_map)[PERF_COUNT_HW_CACHE_MAX] 85 + [PERF_COUNT_HW_CACHE_OP_MAX] 86 + [PERF_COUNT_HW_CACHE_RESULT_MAX]; 87 + const unsigned (*event_map)[PERF_COUNT_HW_MAX]; 88 + u32 raw_event_mask; 93 89 int num_events; 94 90 u64 max_period; 95 91 }; ··· 128 136 129 137 #define CACHE_OP_UNSUPPORTED 0xFFFF 130 138 131 - static unsigned armpmu_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] 132 - [PERF_COUNT_HW_CACHE_OP_MAX] 133 - [PERF_COUNT_HW_CACHE_RESULT_MAX]; 134 - 135 139 static int 136 140 armpmu_map_cache_event(u64 config) 137 141 { ··· 145 157 if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX) 146 158 return -EINVAL; 147 159 148 - ret = (int)armpmu_perf_cache_map[cache_type][cache_op][cache_result]; 160 + ret = (int)(*armpmu->cache_map)[cache_type][cache_op][cache_result]; 149 161 150 162 if (ret == CACHE_OP_UNSUPPORTED) 151 163 return -ENOENT; 152 164 153 165 return ret; 166 + } 167 + 168 + static int 169 + armpmu_map_event(u64 config) 170 + { 171 + int mapping = (*armpmu->event_map)[config]; 172 + return mapping == HW_OP_UNSUPPORTED ? -EOPNOTSUPP : mapping; 173 + } 174 + 175 + static int 176 + armpmu_map_raw_event(u64 config) 177 + { 178 + return (int)(config & armpmu->raw_event_mask); 154 179 } 155 180 156 181 static int ··· 459 458 460 459 /* Decode the generic type into an ARM event identifier. */ 461 460 if (PERF_TYPE_HARDWARE == event->attr.type) { 462 - mapping = armpmu->event_map(event->attr.config); 461 + mapping = armpmu_map_event(event->attr.config); 463 462 } else if (PERF_TYPE_HW_CACHE == event->attr.type) { 464 463 mapping = armpmu_map_cache_event(event->attr.config); 465 464 } else if (PERF_TYPE_RAW == event->attr.type) { 466 - mapping = armpmu->raw_event(event->attr.config); 465 + mapping = armpmu_map_raw_event(event->attr.config); 467 466 } else { 468 467 pr_debug("event type %x not supported\n", event->attr.type); 469 468 return -EOPNOTSUPP; ··· 604 603 .read = armpmu_read, 605 604 }; 606 605 607 - /* 608 - * ARMv6 Performance counter handling code. 609 - * 610 - * ARMv6 has 2 configurable performance counters and a single cycle counter. 611 - * They all share a single reset bit but can be written to zero so we can use 612 - * that for a reset. 613 - * 614 - * The counters can't be individually enabled or disabled so when we remove 615 - * one event and replace it with another we could get spurious counts from the 616 - * wrong event. However, we can take advantage of the fact that the 617 - * performance counters can export events to the event bus, and the event bus 618 - * itself can be monitored. This requires that we *don't* export the events to 619 - * the event bus. The procedure for disabling a configurable counter is: 620 - * - change the counter to count the ETMEXTOUT[0] signal (0x20). This 621 - * effectively stops the counter from counting. 622 - * - disable the counter's interrupt generation (each counter has it's 623 - * own interrupt enable bit). 624 - * Once stopped, the counter value can be written as 0 to reset. 625 - * 626 - * To enable a counter: 627 - * - enable the counter's interrupt generation. 628 - * - set the new event type. 629 - * 630 - * Note: the dedicated cycle counter only counts cycles and can't be 631 - * enabled/disabled independently of the others. When we want to disable the 632 - * cycle counter, we have to just disable the interrupt reporting and start 633 - * ignoring that counter. When re-enabling, we have to reset the value and 634 - * enable the interrupt. 635 - */ 636 - 637 - enum armv6_perf_types { 638 - ARMV6_PERFCTR_ICACHE_MISS = 0x0, 639 - ARMV6_PERFCTR_IBUF_STALL = 0x1, 640 - ARMV6_PERFCTR_DDEP_STALL = 0x2, 641 - ARMV6_PERFCTR_ITLB_MISS = 0x3, 642 - ARMV6_PERFCTR_DTLB_MISS = 0x4, 643 - ARMV6_PERFCTR_BR_EXEC = 0x5, 644 - ARMV6_PERFCTR_BR_MISPREDICT = 0x6, 645 - ARMV6_PERFCTR_INSTR_EXEC = 0x7, 646 - ARMV6_PERFCTR_DCACHE_HIT = 0x9, 647 - ARMV6_PERFCTR_DCACHE_ACCESS = 0xA, 648 - ARMV6_PERFCTR_DCACHE_MISS = 0xB, 649 - ARMV6_PERFCTR_DCACHE_WBACK = 0xC, 650 - ARMV6_PERFCTR_SW_PC_CHANGE = 0xD, 651 - ARMV6_PERFCTR_MAIN_TLB_MISS = 0xF, 652 - ARMV6_PERFCTR_EXPL_D_ACCESS = 0x10, 653 - ARMV6_PERFCTR_LSU_FULL_STALL = 0x11, 654 - ARMV6_PERFCTR_WBUF_DRAINED = 0x12, 655 - ARMV6_PERFCTR_CPU_CYCLES = 0xFF, 656 - ARMV6_PERFCTR_NOP = 0x20, 657 - }; 658 - 659 - enum armv6_counters { 660 - ARMV6_CYCLE_COUNTER = 1, 661 - ARMV6_COUNTER0, 662 - ARMV6_COUNTER1, 663 - }; 664 - 665 - /* 666 - * The hardware events that we support. We do support cache operations but 667 - * we have harvard caches and no way to combine instruction and data 668 - * accesses/misses in hardware. 669 - */ 670 - static const unsigned armv6_perf_map[PERF_COUNT_HW_MAX] = { 671 - [PERF_COUNT_HW_CPU_CYCLES] = ARMV6_PERFCTR_CPU_CYCLES, 672 - [PERF_COUNT_HW_INSTRUCTIONS] = ARMV6_PERFCTR_INSTR_EXEC, 673 - [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED, 674 - [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED, 675 - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV6_PERFCTR_BR_EXEC, 676 - [PERF_COUNT_HW_BRANCH_MISSES] = ARMV6_PERFCTR_BR_MISPREDICT, 677 - [PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED, 678 - }; 679 - 680 - static const unsigned armv6_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] 681 - [PERF_COUNT_HW_CACHE_OP_MAX] 682 - [PERF_COUNT_HW_CACHE_RESULT_MAX] = { 683 - [C(L1D)] = { 684 - /* 685 - * The performance counters don't differentiate between read 686 - * and write accesses/misses so this isn't strictly correct, 687 - * but it's the best we can do. Writes and reads get 688 - * combined. 689 - */ 690 - [C(OP_READ)] = { 691 - [C(RESULT_ACCESS)] = ARMV6_PERFCTR_DCACHE_ACCESS, 692 - [C(RESULT_MISS)] = ARMV6_PERFCTR_DCACHE_MISS, 693 - }, 694 - [C(OP_WRITE)] = { 695 - [C(RESULT_ACCESS)] = ARMV6_PERFCTR_DCACHE_ACCESS, 696 - [C(RESULT_MISS)] = ARMV6_PERFCTR_DCACHE_MISS, 697 - }, 698 - [C(OP_PREFETCH)] = { 699 - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 700 - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 701 - }, 702 - }, 703 - [C(L1I)] = { 704 - [C(OP_READ)] = { 705 - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 706 - [C(RESULT_MISS)] = ARMV6_PERFCTR_ICACHE_MISS, 707 - }, 708 - [C(OP_WRITE)] = { 709 - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 710 - [C(RESULT_MISS)] = ARMV6_PERFCTR_ICACHE_MISS, 711 - }, 712 - [C(OP_PREFETCH)] = { 713 - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 714 - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 715 - }, 716 - }, 717 - [C(LL)] = { 718 - [C(OP_READ)] = { 719 - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 720 - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 721 - }, 722 - [C(OP_WRITE)] = { 723 - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 724 - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 725 - }, 726 - [C(OP_PREFETCH)] = { 727 - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 728 - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 729 - }, 730 - }, 731 - [C(DTLB)] = { 732 - /* 733 - * The ARM performance counters can count micro DTLB misses, 734 - * micro ITLB misses and main TLB misses. There isn't an event 735 - * for TLB misses, so use the micro misses here and if users 736 - * want the main TLB misses they can use a raw counter. 737 - */ 738 - [C(OP_READ)] = { 739 - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 740 - [C(RESULT_MISS)] = ARMV6_PERFCTR_DTLB_MISS, 741 - }, 742 - [C(OP_WRITE)] = { 743 - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 744 - [C(RESULT_MISS)] = ARMV6_PERFCTR_DTLB_MISS, 745 - }, 746 - [C(OP_PREFETCH)] = { 747 - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 748 - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 749 - }, 750 - }, 751 - [C(ITLB)] = { 752 - [C(OP_READ)] = { 753 - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 754 - [C(RESULT_MISS)] = ARMV6_PERFCTR_ITLB_MISS, 755 - }, 756 - [C(OP_WRITE)] = { 757 - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 758 - [C(RESULT_MISS)] = ARMV6_PERFCTR_ITLB_MISS, 759 - }, 760 - [C(OP_PREFETCH)] = { 761 - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 762 - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 763 - }, 764 - }, 765 - [C(BPU)] = { 766 - [C(OP_READ)] = { 767 - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 768 - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 769 - }, 770 - [C(OP_WRITE)] = { 771 - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 772 - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 773 - }, 774 - [C(OP_PREFETCH)] = { 775 - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 776 - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 777 - }, 778 - }, 779 - }; 780 - 781 - enum armv6mpcore_perf_types { 782 - ARMV6MPCORE_PERFCTR_ICACHE_MISS = 0x0, 783 - ARMV6MPCORE_PERFCTR_IBUF_STALL = 0x1, 784 - ARMV6MPCORE_PERFCTR_DDEP_STALL = 0x2, 785 - ARMV6MPCORE_PERFCTR_ITLB_MISS = 0x3, 786 - ARMV6MPCORE_PERFCTR_DTLB_MISS = 0x4, 787 - ARMV6MPCORE_PERFCTR_BR_EXEC = 0x5, 788 - ARMV6MPCORE_PERFCTR_BR_NOTPREDICT = 0x6, 789 - ARMV6MPCORE_PERFCTR_BR_MISPREDICT = 0x7, 790 - ARMV6MPCORE_PERFCTR_INSTR_EXEC = 0x8, 791 - ARMV6MPCORE_PERFCTR_DCACHE_RDACCESS = 0xA, 792 - ARMV6MPCORE_PERFCTR_DCACHE_RDMISS = 0xB, 793 - ARMV6MPCORE_PERFCTR_DCACHE_WRACCESS = 0xC, 794 - ARMV6MPCORE_PERFCTR_DCACHE_WRMISS = 0xD, 795 - ARMV6MPCORE_PERFCTR_DCACHE_EVICTION = 0xE, 796 - ARMV6MPCORE_PERFCTR_SW_PC_CHANGE = 0xF, 797 - ARMV6MPCORE_PERFCTR_MAIN_TLB_MISS = 0x10, 798 - ARMV6MPCORE_PERFCTR_EXPL_MEM_ACCESS = 0x11, 799 - ARMV6MPCORE_PERFCTR_LSU_FULL_STALL = 0x12, 800 - ARMV6MPCORE_PERFCTR_WBUF_DRAINED = 0x13, 801 - ARMV6MPCORE_PERFCTR_CPU_CYCLES = 0xFF, 802 - }; 803 - 804 - /* 805 - * The hardware events that we support. We do support cache operations but 806 - * we have harvard caches and no way to combine instruction and data 807 - * accesses/misses in hardware. 808 - */ 809 - static const unsigned armv6mpcore_perf_map[PERF_COUNT_HW_MAX] = { 810 - [PERF_COUNT_HW_CPU_CYCLES] = ARMV6MPCORE_PERFCTR_CPU_CYCLES, 811 - [PERF_COUNT_HW_INSTRUCTIONS] = ARMV6MPCORE_PERFCTR_INSTR_EXEC, 812 - [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED, 813 - [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED, 814 - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV6MPCORE_PERFCTR_BR_EXEC, 815 - [PERF_COUNT_HW_BRANCH_MISSES] = ARMV6MPCORE_PERFCTR_BR_MISPREDICT, 816 - [PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED, 817 - }; 818 - 819 - static const unsigned armv6mpcore_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] 820 - [PERF_COUNT_HW_CACHE_OP_MAX] 821 - [PERF_COUNT_HW_CACHE_RESULT_MAX] = { 822 - [C(L1D)] = { 823 - [C(OP_READ)] = { 824 - [C(RESULT_ACCESS)] = 825 - ARMV6MPCORE_PERFCTR_DCACHE_RDACCESS, 826 - [C(RESULT_MISS)] = 827 - ARMV6MPCORE_PERFCTR_DCACHE_RDMISS, 828 - }, 829 - [C(OP_WRITE)] = { 830 - [C(RESULT_ACCESS)] = 831 - ARMV6MPCORE_PERFCTR_DCACHE_WRACCESS, 832 - [C(RESULT_MISS)] = 833 - ARMV6MPCORE_PERFCTR_DCACHE_WRMISS, 834 - }, 835 - [C(OP_PREFETCH)] = { 836 - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 837 - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 838 - }, 839 - }, 840 - [C(L1I)] = { 841 - [C(OP_READ)] = { 842 - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 843 - [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ICACHE_MISS, 844 - }, 845 - [C(OP_WRITE)] = { 846 - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 847 - [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ICACHE_MISS, 848 - }, 849 - [C(OP_PREFETCH)] = { 850 - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 851 - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 852 - }, 853 - }, 854 - [C(LL)] = { 855 - [C(OP_READ)] = { 856 - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 857 - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 858 - }, 859 - [C(OP_WRITE)] = { 860 - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 861 - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 862 - }, 863 - [C(OP_PREFETCH)] = { 864 - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 865 - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 866 - }, 867 - }, 868 - [C(DTLB)] = { 869 - /* 870 - * The ARM performance counters can count micro DTLB misses, 871 - * micro ITLB misses and main TLB misses. There isn't an event 872 - * for TLB misses, so use the micro misses here and if users 873 - * want the main TLB misses they can use a raw counter. 874 - */ 875 - [C(OP_READ)] = { 876 - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 877 - [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_DTLB_MISS, 878 - }, 879 - [C(OP_WRITE)] = { 880 - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 881 - [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_DTLB_MISS, 882 - }, 883 - [C(OP_PREFETCH)] = { 884 - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 885 - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 886 - }, 887 - }, 888 - [C(ITLB)] = { 889 - [C(OP_READ)] = { 890 - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 891 - [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ITLB_MISS, 892 - }, 893 - [C(OP_WRITE)] = { 894 - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 895 - [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ITLB_MISS, 896 - }, 897 - [C(OP_PREFETCH)] = { 898 - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 899 - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 900 - }, 901 - }, 902 - [C(BPU)] = { 903 - [C(OP_READ)] = { 904 - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 905 - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 906 - }, 907 - [C(OP_WRITE)] = { 908 - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 909 - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 910 - }, 911 - [C(OP_PREFETCH)] = { 912 - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 913 - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 914 - }, 915 - }, 916 - }; 917 - 918 - static inline unsigned long 919 - armv6_pmcr_read(void) 920 - { 921 - u32 val; 922 - asm volatile("mrc p15, 0, %0, c15, c12, 0" : "=r"(val)); 923 - return val; 924 - } 925 - 926 - static inline void 927 - armv6_pmcr_write(unsigned long val) 928 - { 929 - asm volatile("mcr p15, 0, %0, c15, c12, 0" : : "r"(val)); 930 - } 931 - 932 - #define ARMV6_PMCR_ENABLE (1 << 0) 933 - #define ARMV6_PMCR_CTR01_RESET (1 << 1) 934 - #define ARMV6_PMCR_CCOUNT_RESET (1 << 2) 935 - #define ARMV6_PMCR_CCOUNT_DIV (1 << 3) 936 - #define ARMV6_PMCR_COUNT0_IEN (1 << 4) 937 - #define ARMV6_PMCR_COUNT1_IEN (1 << 5) 938 - #define ARMV6_PMCR_CCOUNT_IEN (1 << 6) 939 - #define ARMV6_PMCR_COUNT0_OVERFLOW (1 << 8) 940 - #define ARMV6_PMCR_COUNT1_OVERFLOW (1 << 9) 941 - #define ARMV6_PMCR_CCOUNT_OVERFLOW (1 << 10) 942 - #define ARMV6_PMCR_EVT_COUNT0_SHIFT 20 943 - #define ARMV6_PMCR_EVT_COUNT0_MASK (0xFF << ARMV6_PMCR_EVT_COUNT0_SHIFT) 944 - #define ARMV6_PMCR_EVT_COUNT1_SHIFT 12 945 - #define ARMV6_PMCR_EVT_COUNT1_MASK (0xFF << ARMV6_PMCR_EVT_COUNT1_SHIFT) 946 - 947 - #define ARMV6_PMCR_OVERFLOWED_MASK \ 948 - (ARMV6_PMCR_COUNT0_OVERFLOW | ARMV6_PMCR_COUNT1_OVERFLOW | \ 949 - ARMV6_PMCR_CCOUNT_OVERFLOW) 950 - 951 - static inline int 952 - armv6_pmcr_has_overflowed(unsigned long pmcr) 953 - { 954 - return (pmcr & ARMV6_PMCR_OVERFLOWED_MASK); 955 - } 956 - 957 - static inline int 958 - armv6_pmcr_counter_has_overflowed(unsigned long pmcr, 959 - enum armv6_counters counter) 960 - { 961 - int ret = 0; 962 - 963 - if (ARMV6_CYCLE_COUNTER == counter) 964 - ret = pmcr & ARMV6_PMCR_CCOUNT_OVERFLOW; 965 - else if (ARMV6_COUNTER0 == counter) 966 - ret = pmcr & ARMV6_PMCR_COUNT0_OVERFLOW; 967 - else if (ARMV6_COUNTER1 == counter) 968 - ret = pmcr & ARMV6_PMCR_COUNT1_OVERFLOW; 969 - else 970 - WARN_ONCE(1, "invalid counter number (%d)\n", counter); 971 - 972 - return ret; 973 - } 974 - 975 - static inline u32 976 - armv6pmu_read_counter(int counter) 977 - { 978 - unsigned long value = 0; 979 - 980 - if (ARMV6_CYCLE_COUNTER == counter) 981 - asm volatile("mrc p15, 0, %0, c15, c12, 1" : "=r"(value)); 982 - else if (ARMV6_COUNTER0 == counter) 983 - asm volatile("mrc p15, 0, %0, c15, c12, 2" : "=r"(value)); 984 - else if (ARMV6_COUNTER1 == counter) 985 - asm volatile("mrc p15, 0, %0, c15, c12, 3" : "=r"(value)); 986 - else 987 - WARN_ONCE(1, "invalid counter number (%d)\n", counter); 988 - 989 - return value; 990 - } 991 - 992 - static inline void 993 - armv6pmu_write_counter(int counter, 994 - u32 value) 995 - { 996 - if (ARMV6_CYCLE_COUNTER == counter) 997 - asm volatile("mcr p15, 0, %0, c15, c12, 1" : : "r"(value)); 998 - else if (ARMV6_COUNTER0 == counter) 999 - asm volatile("mcr p15, 0, %0, c15, c12, 2" : : "r"(value)); 1000 - else if (ARMV6_COUNTER1 == counter) 1001 - asm volatile("mcr p15, 0, %0, c15, c12, 3" : : "r"(value)); 1002 - else 1003 - WARN_ONCE(1, "invalid counter number (%d)\n", counter); 1004 - } 1005 - 1006 - void 1007 - armv6pmu_enable_event(struct hw_perf_event *hwc, 1008 - int idx) 1009 - { 1010 - unsigned long val, mask, evt, flags; 1011 - 1012 - if (ARMV6_CYCLE_COUNTER == idx) { 1013 - mask = 0; 1014 - evt = ARMV6_PMCR_CCOUNT_IEN; 1015 - } else if (ARMV6_COUNTER0 == idx) { 1016 - mask = ARMV6_PMCR_EVT_COUNT0_MASK; 1017 - evt = (hwc->config_base << ARMV6_PMCR_EVT_COUNT0_SHIFT) | 1018 - ARMV6_PMCR_COUNT0_IEN; 1019 - } else if (ARMV6_COUNTER1 == idx) { 1020 - mask = ARMV6_PMCR_EVT_COUNT1_MASK; 1021 - evt = (hwc->config_base << ARMV6_PMCR_EVT_COUNT1_SHIFT) | 1022 - ARMV6_PMCR_COUNT1_IEN; 1023 - } else { 1024 - WARN_ONCE(1, "invalid counter number (%d)\n", idx); 1025 - return; 1026 - } 1027 - 1028 - /* 1029 - * Mask out the current event and set the counter to count the event 1030 - * that we're interested in. 1031 - */ 1032 - spin_lock_irqsave(&pmu_lock, flags); 1033 - val = armv6_pmcr_read(); 1034 - val &= ~mask; 1035 - val |= evt; 1036 - armv6_pmcr_write(val); 1037 - spin_unlock_irqrestore(&pmu_lock, flags); 1038 - } 1039 - 1040 - static irqreturn_t 1041 - armv6pmu_handle_irq(int irq_num, 1042 - void *dev) 1043 - { 1044 - unsigned long pmcr = armv6_pmcr_read(); 1045 - struct perf_sample_data data; 1046 - struct cpu_hw_events *cpuc; 1047 - struct pt_regs *regs; 1048 - int idx; 1049 - 1050 - if (!armv6_pmcr_has_overflowed(pmcr)) 1051 - return IRQ_NONE; 1052 - 1053 - regs = get_irq_regs(); 1054 - 1055 - /* 1056 - * The interrupts are cleared by writing the overflow flags back to 1057 - * the control register. All of the other bits don't have any effect 1058 - * if they are rewritten, so write the whole value back. 1059 - */ 1060 - armv6_pmcr_write(pmcr); 1061 - 1062 - perf_sample_data_init(&data, 0); 1063 - 1064 - cpuc = &__get_cpu_var(cpu_hw_events); 1065 - for (idx = 0; idx <= armpmu->num_events; ++idx) { 1066 - struct perf_event *event = cpuc->events[idx]; 1067 - struct hw_perf_event *hwc; 1068 - 1069 - if (!test_bit(idx, cpuc->active_mask)) 1070 - continue; 1071 - 1072 - /* 1073 - * We have a single interrupt for all counters. Check that 1074 - * each counter has overflowed before we process it. 1075 - */ 1076 - if (!armv6_pmcr_counter_has_overflowed(pmcr, idx)) 1077 - continue; 1078 - 1079 - hwc = &event->hw; 1080 - armpmu_event_update(event, hwc, idx); 1081 - data.period = event->hw.last_period; 1082 - if (!armpmu_event_set_period(event, hwc, idx)) 1083 - continue; 1084 - 1085 - if (perf_event_overflow(event, 0, &data, regs)) 1086 - armpmu->disable(hwc, idx); 1087 - } 1088 - 1089 - /* 1090 - * Handle the pending perf events. 1091 - * 1092 - * Note: this call *must* be run with interrupts disabled. For 1093 - * platforms that can have the PMU interrupts raised as an NMI, this 1094 - * will not work. 1095 - */ 1096 - irq_work_run(); 1097 - 1098 - return IRQ_HANDLED; 1099 - } 1100 - 1101 - static void 1102 - armv6pmu_start(void) 1103 - { 1104 - unsigned long flags, val; 1105 - 1106 - spin_lock_irqsave(&pmu_lock, flags); 1107 - val = armv6_pmcr_read(); 1108 - val |= ARMV6_PMCR_ENABLE; 1109 - armv6_pmcr_write(val); 1110 - spin_unlock_irqrestore(&pmu_lock, flags); 1111 - } 1112 - 1113 - void 1114 - armv6pmu_stop(void) 1115 - { 1116 - unsigned long flags, val; 1117 - 1118 - spin_lock_irqsave(&pmu_lock, flags); 1119 - val = armv6_pmcr_read(); 1120 - val &= ~ARMV6_PMCR_ENABLE; 1121 - armv6_pmcr_write(val); 1122 - spin_unlock_irqrestore(&pmu_lock, flags); 1123 - } 1124 - 1125 - static inline int 1126 - armv6pmu_event_map(int config) 1127 - { 1128 - int mapping = armv6_perf_map[config]; 1129 - if (HW_OP_UNSUPPORTED == mapping) 1130 - mapping = -EOPNOTSUPP; 1131 - return mapping; 1132 - } 1133 - 1134 - static inline int 1135 - armv6mpcore_pmu_event_map(int config) 1136 - { 1137 - int mapping = armv6mpcore_perf_map[config]; 1138 - if (HW_OP_UNSUPPORTED == mapping) 1139 - mapping = -EOPNOTSUPP; 1140 - return mapping; 1141 - } 1142 - 1143 - static u64 1144 - armv6pmu_raw_event(u64 config) 1145 - { 1146 - return config & 0xff; 1147 - } 1148 - 1149 - static int 1150 - armv6pmu_get_event_idx(struct cpu_hw_events *cpuc, 1151 - struct hw_perf_event *event) 1152 - { 1153 - /* Always place a cycle counter into the cycle counter. */ 1154 - if (ARMV6_PERFCTR_CPU_CYCLES == event->config_base) { 1155 - if (test_and_set_bit(ARMV6_CYCLE_COUNTER, cpuc->used_mask)) 1156 - return -EAGAIN; 1157 - 1158 - return ARMV6_CYCLE_COUNTER; 1159 - } else { 1160 - /* 1161 - * For anything other than a cycle counter, try and use 1162 - * counter0 and counter1. 1163 - */ 1164 - if (!test_and_set_bit(ARMV6_COUNTER1, cpuc->used_mask)) { 1165 - return ARMV6_COUNTER1; 1166 - } 1167 - 1168 - if (!test_and_set_bit(ARMV6_COUNTER0, cpuc->used_mask)) { 1169 - return ARMV6_COUNTER0; 1170 - } 1171 - 1172 - /* The counters are all in use. */ 1173 - return -EAGAIN; 1174 - } 1175 - } 1176 - 1177 - static void 1178 - armv6pmu_disable_event(struct hw_perf_event *hwc, 1179 - int idx) 1180 - { 1181 - unsigned long val, mask, evt, flags; 1182 - 1183 - if (ARMV6_CYCLE_COUNTER == idx) { 1184 - mask = ARMV6_PMCR_CCOUNT_IEN; 1185 - evt = 0; 1186 - } else if (ARMV6_COUNTER0 == idx) { 1187 - mask = ARMV6_PMCR_COUNT0_IEN | ARMV6_PMCR_EVT_COUNT0_MASK; 1188 - evt = ARMV6_PERFCTR_NOP << ARMV6_PMCR_EVT_COUNT0_SHIFT; 1189 - } else if (ARMV6_COUNTER1 == idx) { 1190 - mask = ARMV6_PMCR_COUNT1_IEN | ARMV6_PMCR_EVT_COUNT1_MASK; 1191 - evt = ARMV6_PERFCTR_NOP << ARMV6_PMCR_EVT_COUNT1_SHIFT; 1192 - } else { 1193 - WARN_ONCE(1, "invalid counter number (%d)\n", idx); 1194 - return; 1195 - } 1196 - 1197 - /* 1198 - * Mask out the current event and set the counter to count the number 1199 - * of ETM bus signal assertion cycles. The external reporting should 1200 - * be disabled and so this should never increment. 1201 - */ 1202 - spin_lock_irqsave(&pmu_lock, flags); 1203 - val = armv6_pmcr_read(); 1204 - val &= ~mask; 1205 - val |= evt; 1206 - armv6_pmcr_write(val); 1207 - spin_unlock_irqrestore(&pmu_lock, flags); 1208 - } 1209 - 1210 - static void 1211 - armv6mpcore_pmu_disable_event(struct hw_perf_event *hwc, 1212 - int idx) 1213 - { 1214 - unsigned long val, mask, flags, evt = 0; 1215 - 1216 - if (ARMV6_CYCLE_COUNTER == idx) { 1217 - mask = ARMV6_PMCR_CCOUNT_IEN; 1218 - } else if (ARMV6_COUNTER0 == idx) { 1219 - mask = ARMV6_PMCR_COUNT0_IEN; 1220 - } else if (ARMV6_COUNTER1 == idx) { 1221 - mask = ARMV6_PMCR_COUNT1_IEN; 1222 - } else { 1223 - WARN_ONCE(1, "invalid counter number (%d)\n", idx); 1224 - return; 1225 - } 1226 - 1227 - /* 1228 - * Unlike UP ARMv6, we don't have a way of stopping the counters. We 1229 - * simply disable the interrupt reporting. 1230 - */ 1231 - spin_lock_irqsave(&pmu_lock, flags); 1232 - val = armv6_pmcr_read(); 1233 - val &= ~mask; 1234 - val |= evt; 1235 - armv6_pmcr_write(val); 1236 - spin_unlock_irqrestore(&pmu_lock, flags); 1237 - } 1238 - 1239 - static const struct arm_pmu armv6pmu = { 1240 - .id = ARM_PERF_PMU_ID_V6, 1241 - .handle_irq = armv6pmu_handle_irq, 1242 - .enable = armv6pmu_enable_event, 1243 - .disable = armv6pmu_disable_event, 1244 - .event_map = armv6pmu_event_map, 1245 - .raw_event = armv6pmu_raw_event, 1246 - .read_counter = armv6pmu_read_counter, 1247 - .write_counter = armv6pmu_write_counter, 1248 - .get_event_idx = armv6pmu_get_event_idx, 1249 - .start = armv6pmu_start, 1250 - .stop = armv6pmu_stop, 1251 - .num_events = 3, 1252 - .max_period = (1LLU << 32) - 1, 1253 - }; 1254 - 1255 - /* 1256 - * ARMv6mpcore is almost identical to single core ARMv6 with the exception 1257 - * that some of the events have different enumerations and that there is no 1258 - * *hack* to stop the programmable counters. To stop the counters we simply 1259 - * disable the interrupt reporting and update the event. When unthrottling we 1260 - * reset the period and enable the interrupt reporting. 1261 - */ 1262 - static const struct arm_pmu armv6mpcore_pmu = { 1263 - .id = ARM_PERF_PMU_ID_V6MP, 1264 - .handle_irq = armv6pmu_handle_irq, 1265 - .enable = armv6pmu_enable_event, 1266 - .disable = armv6mpcore_pmu_disable_event, 1267 - .event_map = armv6mpcore_pmu_event_map, 1268 - .raw_event = armv6pmu_raw_event, 1269 - .read_counter = armv6pmu_read_counter, 1270 - .write_counter = armv6pmu_write_counter, 1271 - .get_event_idx = armv6pmu_get_event_idx, 1272 - .start = armv6pmu_start, 1273 - .stop = armv6pmu_stop, 1274 - .num_events = 3, 1275 - .max_period = (1LLU << 32) - 1, 1276 - }; 1277 - 1278 - /* 1279 - * ARMv7 Cortex-A8 and Cortex-A9 Performance Events handling code. 1280 - * 1281 - * Copied from ARMv6 code, with the low level code inspired 1282 - * by the ARMv7 Oprofile code. 1283 - * 1284 - * Cortex-A8 has up to 4 configurable performance counters and 1285 - * a single cycle counter. 1286 - * Cortex-A9 has up to 31 configurable performance counters and 1287 - * a single cycle counter. 1288 - * 1289 - * All counters can be enabled/disabled and IRQ masked separately. The cycle 1290 - * counter and all 4 performance counters together can be reset separately. 1291 - */ 1292 - 1293 - /* Common ARMv7 event types */ 1294 - enum armv7_perf_types { 1295 - ARMV7_PERFCTR_PMNC_SW_INCR = 0x00, 1296 - ARMV7_PERFCTR_IFETCH_MISS = 0x01, 1297 - ARMV7_PERFCTR_ITLB_MISS = 0x02, 1298 - ARMV7_PERFCTR_DCACHE_REFILL = 0x03, 1299 - ARMV7_PERFCTR_DCACHE_ACCESS = 0x04, 1300 - ARMV7_PERFCTR_DTLB_REFILL = 0x05, 1301 - ARMV7_PERFCTR_DREAD = 0x06, 1302 - ARMV7_PERFCTR_DWRITE = 0x07, 1303 - 1304 - ARMV7_PERFCTR_EXC_TAKEN = 0x09, 1305 - ARMV7_PERFCTR_EXC_EXECUTED = 0x0A, 1306 - ARMV7_PERFCTR_CID_WRITE = 0x0B, 1307 - /* ARMV7_PERFCTR_PC_WRITE is equivalent to HW_BRANCH_INSTRUCTIONS. 1308 - * It counts: 1309 - * - all branch instructions, 1310 - * - instructions that explicitly write the PC, 1311 - * - exception generating instructions. 1312 - */ 1313 - ARMV7_PERFCTR_PC_WRITE = 0x0C, 1314 - ARMV7_PERFCTR_PC_IMM_BRANCH = 0x0D, 1315 - ARMV7_PERFCTR_UNALIGNED_ACCESS = 0x0F, 1316 - ARMV7_PERFCTR_PC_BRANCH_MIS_PRED = 0x10, 1317 - ARMV7_PERFCTR_CLOCK_CYCLES = 0x11, 1318 - 1319 - ARMV7_PERFCTR_PC_BRANCH_MIS_USED = 0x12, 1320 - 1321 - ARMV7_PERFCTR_CPU_CYCLES = 0xFF 1322 - }; 1323 - 1324 - /* ARMv7 Cortex-A8 specific event types */ 1325 - enum armv7_a8_perf_types { 1326 - ARMV7_PERFCTR_INSTR_EXECUTED = 0x08, 1327 - 1328 - ARMV7_PERFCTR_PC_PROC_RETURN = 0x0E, 1329 - 1330 - ARMV7_PERFCTR_WRITE_BUFFER_FULL = 0x40, 1331 - ARMV7_PERFCTR_L2_STORE_MERGED = 0x41, 1332 - ARMV7_PERFCTR_L2_STORE_BUFF = 0x42, 1333 - ARMV7_PERFCTR_L2_ACCESS = 0x43, 1334 - ARMV7_PERFCTR_L2_CACH_MISS = 0x44, 1335 - ARMV7_PERFCTR_AXI_READ_CYCLES = 0x45, 1336 - ARMV7_PERFCTR_AXI_WRITE_CYCLES = 0x46, 1337 - ARMV7_PERFCTR_MEMORY_REPLAY = 0x47, 1338 - ARMV7_PERFCTR_UNALIGNED_ACCESS_REPLAY = 0x48, 1339 - ARMV7_PERFCTR_L1_DATA_MISS = 0x49, 1340 - ARMV7_PERFCTR_L1_INST_MISS = 0x4A, 1341 - ARMV7_PERFCTR_L1_DATA_COLORING = 0x4B, 1342 - ARMV7_PERFCTR_L1_NEON_DATA = 0x4C, 1343 - ARMV7_PERFCTR_L1_NEON_CACH_DATA = 0x4D, 1344 - ARMV7_PERFCTR_L2_NEON = 0x4E, 1345 - ARMV7_PERFCTR_L2_NEON_HIT = 0x4F, 1346 - ARMV7_PERFCTR_L1_INST = 0x50, 1347 - ARMV7_PERFCTR_PC_RETURN_MIS_PRED = 0x51, 1348 - ARMV7_PERFCTR_PC_BRANCH_FAILED = 0x52, 1349 - ARMV7_PERFCTR_PC_BRANCH_TAKEN = 0x53, 1350 - ARMV7_PERFCTR_PC_BRANCH_EXECUTED = 0x54, 1351 - ARMV7_PERFCTR_OP_EXECUTED = 0x55, 1352 - ARMV7_PERFCTR_CYCLES_INST_STALL = 0x56, 1353 - ARMV7_PERFCTR_CYCLES_INST = 0x57, 1354 - ARMV7_PERFCTR_CYCLES_NEON_DATA_STALL = 0x58, 1355 - ARMV7_PERFCTR_CYCLES_NEON_INST_STALL = 0x59, 1356 - ARMV7_PERFCTR_NEON_CYCLES = 0x5A, 1357 - 1358 - ARMV7_PERFCTR_PMU0_EVENTS = 0x70, 1359 - ARMV7_PERFCTR_PMU1_EVENTS = 0x71, 1360 - ARMV7_PERFCTR_PMU_EVENTS = 0x72, 1361 - }; 1362 - 1363 - /* ARMv7 Cortex-A9 specific event types */ 1364 - enum armv7_a9_perf_types { 1365 - ARMV7_PERFCTR_JAVA_HW_BYTECODE_EXEC = 0x40, 1366 - ARMV7_PERFCTR_JAVA_SW_BYTECODE_EXEC = 0x41, 1367 - ARMV7_PERFCTR_JAZELLE_BRANCH_EXEC = 0x42, 1368 - 1369 - ARMV7_PERFCTR_COHERENT_LINE_MISS = 0x50, 1370 - ARMV7_PERFCTR_COHERENT_LINE_HIT = 0x51, 1371 - 1372 - ARMV7_PERFCTR_ICACHE_DEP_STALL_CYCLES = 0x60, 1373 - ARMV7_PERFCTR_DCACHE_DEP_STALL_CYCLES = 0x61, 1374 - ARMV7_PERFCTR_TLB_MISS_DEP_STALL_CYCLES = 0x62, 1375 - ARMV7_PERFCTR_STREX_EXECUTED_PASSED = 0x63, 1376 - ARMV7_PERFCTR_STREX_EXECUTED_FAILED = 0x64, 1377 - ARMV7_PERFCTR_DATA_EVICTION = 0x65, 1378 - ARMV7_PERFCTR_ISSUE_STAGE_NO_INST = 0x66, 1379 - ARMV7_PERFCTR_ISSUE_STAGE_EMPTY = 0x67, 1380 - ARMV7_PERFCTR_INST_OUT_OF_RENAME_STAGE = 0x68, 1381 - 1382 - ARMV7_PERFCTR_PREDICTABLE_FUNCT_RETURNS = 0x6E, 1383 - 1384 - ARMV7_PERFCTR_MAIN_UNIT_EXECUTED_INST = 0x70, 1385 - ARMV7_PERFCTR_SECOND_UNIT_EXECUTED_INST = 0x71, 1386 - ARMV7_PERFCTR_LD_ST_UNIT_EXECUTED_INST = 0x72, 1387 - ARMV7_PERFCTR_FP_EXECUTED_INST = 0x73, 1388 - ARMV7_PERFCTR_NEON_EXECUTED_INST = 0x74, 1389 - 1390 - ARMV7_PERFCTR_PLD_FULL_DEP_STALL_CYCLES = 0x80, 1391 - ARMV7_PERFCTR_DATA_WR_DEP_STALL_CYCLES = 0x81, 1392 - ARMV7_PERFCTR_ITLB_MISS_DEP_STALL_CYCLES = 0x82, 1393 - ARMV7_PERFCTR_DTLB_MISS_DEP_STALL_CYCLES = 0x83, 1394 - ARMV7_PERFCTR_MICRO_ITLB_MISS_DEP_STALL_CYCLES = 0x84, 1395 - ARMV7_PERFCTR_MICRO_DTLB_MISS_DEP_STALL_CYCLES = 0x85, 1396 - ARMV7_PERFCTR_DMB_DEP_STALL_CYCLES = 0x86, 1397 - 1398 - ARMV7_PERFCTR_INTGR_CLK_ENABLED_CYCLES = 0x8A, 1399 - ARMV7_PERFCTR_DATA_ENGINE_CLK_EN_CYCLES = 0x8B, 1400 - 1401 - ARMV7_PERFCTR_ISB_INST = 0x90, 1402 - ARMV7_PERFCTR_DSB_INST = 0x91, 1403 - ARMV7_PERFCTR_DMB_INST = 0x92, 1404 - ARMV7_PERFCTR_EXT_INTERRUPTS = 0x93, 1405 - 1406 - ARMV7_PERFCTR_PLE_CACHE_LINE_RQST_COMPLETED = 0xA0, 1407 - ARMV7_PERFCTR_PLE_CACHE_LINE_RQST_SKIPPED = 0xA1, 1408 - ARMV7_PERFCTR_PLE_FIFO_FLUSH = 0xA2, 1409 - ARMV7_PERFCTR_PLE_RQST_COMPLETED = 0xA3, 1410 - ARMV7_PERFCTR_PLE_FIFO_OVERFLOW = 0xA4, 1411 - ARMV7_PERFCTR_PLE_RQST_PROG = 0xA5 1412 - }; 1413 - 1414 - /* 1415 - * Cortex-A8 HW events mapping 1416 - * 1417 - * The hardware events that we support. We do support cache operations but 1418 - * we have harvard caches and no way to combine instruction and data 1419 - * accesses/misses in hardware. 1420 - */ 1421 - static const unsigned armv7_a8_perf_map[PERF_COUNT_HW_MAX] = { 1422 - [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES, 1423 - [PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_PERFCTR_INSTR_EXECUTED, 1424 - [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED, 1425 - [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED, 1426 - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE, 1427 - [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, 1428 - [PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_CLOCK_CYCLES, 1429 - }; 1430 - 1431 - static const unsigned armv7_a8_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] 1432 - [PERF_COUNT_HW_CACHE_OP_MAX] 1433 - [PERF_COUNT_HW_CACHE_RESULT_MAX] = { 1434 - [C(L1D)] = { 1435 - /* 1436 - * The performance counters don't differentiate between read 1437 - * and write accesses/misses so this isn't strictly correct, 1438 - * but it's the best we can do. Writes and reads get 1439 - * combined. 1440 - */ 1441 - [C(OP_READ)] = { 1442 - [C(RESULT_ACCESS)] = ARMV7_PERFCTR_DCACHE_ACCESS, 1443 - [C(RESULT_MISS)] = ARMV7_PERFCTR_DCACHE_REFILL, 1444 - }, 1445 - [C(OP_WRITE)] = { 1446 - [C(RESULT_ACCESS)] = ARMV7_PERFCTR_DCACHE_ACCESS, 1447 - [C(RESULT_MISS)] = ARMV7_PERFCTR_DCACHE_REFILL, 1448 - }, 1449 - [C(OP_PREFETCH)] = { 1450 - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 1451 - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 1452 - }, 1453 - }, 1454 - [C(L1I)] = { 1455 - [C(OP_READ)] = { 1456 - [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_INST, 1457 - [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_INST_MISS, 1458 - }, 1459 - [C(OP_WRITE)] = { 1460 - [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_INST, 1461 - [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_INST_MISS, 1462 - }, 1463 - [C(OP_PREFETCH)] = { 1464 - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 1465 - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 1466 - }, 1467 - }, 1468 - [C(LL)] = { 1469 - [C(OP_READ)] = { 1470 - [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L2_ACCESS, 1471 - [C(RESULT_MISS)] = ARMV7_PERFCTR_L2_CACH_MISS, 1472 - }, 1473 - [C(OP_WRITE)] = { 1474 - [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L2_ACCESS, 1475 - [C(RESULT_MISS)] = ARMV7_PERFCTR_L2_CACH_MISS, 1476 - }, 1477 - [C(OP_PREFETCH)] = { 1478 - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 1479 - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 1480 - }, 1481 - }, 1482 - [C(DTLB)] = { 1483 - /* 1484 - * Only ITLB misses and DTLB refills are supported. 1485 - * If users want the DTLB refills misses a raw counter 1486 - * must be used. 1487 - */ 1488 - [C(OP_READ)] = { 1489 - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 1490 - [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, 1491 - }, 1492 - [C(OP_WRITE)] = { 1493 - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 1494 - [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, 1495 - }, 1496 - [C(OP_PREFETCH)] = { 1497 - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 1498 - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 1499 - }, 1500 - }, 1501 - [C(ITLB)] = { 1502 - [C(OP_READ)] = { 1503 - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 1504 - [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_MISS, 1505 - }, 1506 - [C(OP_WRITE)] = { 1507 - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 1508 - [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_MISS, 1509 - }, 1510 - [C(OP_PREFETCH)] = { 1511 - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 1512 - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 1513 - }, 1514 - }, 1515 - [C(BPU)] = { 1516 - [C(OP_READ)] = { 1517 - [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_WRITE, 1518 - [C(RESULT_MISS)] 1519 - = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, 1520 - }, 1521 - [C(OP_WRITE)] = { 1522 - [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_WRITE, 1523 - [C(RESULT_MISS)] 1524 - = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, 1525 - }, 1526 - [C(OP_PREFETCH)] = { 1527 - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 1528 - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 1529 - }, 1530 - }, 1531 - }; 1532 - 1533 - /* 1534 - * Cortex-A9 HW events mapping 1535 - */ 1536 - static const unsigned armv7_a9_perf_map[PERF_COUNT_HW_MAX] = { 1537 - [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES, 1538 - [PERF_COUNT_HW_INSTRUCTIONS] = 1539 - ARMV7_PERFCTR_INST_OUT_OF_RENAME_STAGE, 1540 - [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV7_PERFCTR_COHERENT_LINE_HIT, 1541 - [PERF_COUNT_HW_CACHE_MISSES] = ARMV7_PERFCTR_COHERENT_LINE_MISS, 1542 - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE, 1543 - [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, 1544 - [PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_CLOCK_CYCLES, 1545 - }; 1546 - 1547 - static const unsigned armv7_a9_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] 1548 - [PERF_COUNT_HW_CACHE_OP_MAX] 1549 - [PERF_COUNT_HW_CACHE_RESULT_MAX] = { 1550 - [C(L1D)] = { 1551 - /* 1552 - * The performance counters don't differentiate between read 1553 - * and write accesses/misses so this isn't strictly correct, 1554 - * but it's the best we can do. Writes and reads get 1555 - * combined. 1556 - */ 1557 - [C(OP_READ)] = { 1558 - [C(RESULT_ACCESS)] = ARMV7_PERFCTR_DCACHE_ACCESS, 1559 - [C(RESULT_MISS)] = ARMV7_PERFCTR_DCACHE_REFILL, 1560 - }, 1561 - [C(OP_WRITE)] = { 1562 - [C(RESULT_ACCESS)] = ARMV7_PERFCTR_DCACHE_ACCESS, 1563 - [C(RESULT_MISS)] = ARMV7_PERFCTR_DCACHE_REFILL, 1564 - }, 1565 - [C(OP_PREFETCH)] = { 1566 - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 1567 - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 1568 - }, 1569 - }, 1570 - [C(L1I)] = { 1571 - [C(OP_READ)] = { 1572 - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 1573 - [C(RESULT_MISS)] = ARMV7_PERFCTR_IFETCH_MISS, 1574 - }, 1575 - [C(OP_WRITE)] = { 1576 - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 1577 - [C(RESULT_MISS)] = ARMV7_PERFCTR_IFETCH_MISS, 1578 - }, 1579 - [C(OP_PREFETCH)] = { 1580 - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 1581 - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 1582 - }, 1583 - }, 1584 - [C(LL)] = { 1585 - [C(OP_READ)] = { 1586 - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 1587 - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 1588 - }, 1589 - [C(OP_WRITE)] = { 1590 - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 1591 - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 1592 - }, 1593 - [C(OP_PREFETCH)] = { 1594 - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 1595 - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 1596 - }, 1597 - }, 1598 - [C(DTLB)] = { 1599 - /* 1600 - * Only ITLB misses and DTLB refills are supported. 1601 - * If users want the DTLB refills misses a raw counter 1602 - * must be used. 1603 - */ 1604 - [C(OP_READ)] = { 1605 - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 1606 - [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, 1607 - }, 1608 - [C(OP_WRITE)] = { 1609 - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 1610 - [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, 1611 - }, 1612 - [C(OP_PREFETCH)] = { 1613 - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 1614 - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 1615 - }, 1616 - }, 1617 - [C(ITLB)] = { 1618 - [C(OP_READ)] = { 1619 - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 1620 - [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_MISS, 1621 - }, 1622 - [C(OP_WRITE)] = { 1623 - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 1624 - [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_MISS, 1625 - }, 1626 - [C(OP_PREFETCH)] = { 1627 - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 1628 - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 1629 - }, 1630 - }, 1631 - [C(BPU)] = { 1632 - [C(OP_READ)] = { 1633 - [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_WRITE, 1634 - [C(RESULT_MISS)] 1635 - = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, 1636 - }, 1637 - [C(OP_WRITE)] = { 1638 - [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_WRITE, 1639 - [C(RESULT_MISS)] 1640 - = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, 1641 - }, 1642 - [C(OP_PREFETCH)] = { 1643 - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 1644 - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 1645 - }, 1646 - }, 1647 - }; 1648 - 1649 - /* 1650 - * Perf Events counters 1651 - */ 1652 - enum armv7_counters { 1653 - ARMV7_CYCLE_COUNTER = 1, /* Cycle counter */ 1654 - ARMV7_COUNTER0 = 2, /* First event counter */ 1655 - }; 1656 - 1657 - /* 1658 - * The cycle counter is ARMV7_CYCLE_COUNTER. 1659 - * The first event counter is ARMV7_COUNTER0. 1660 - * The last event counter is (ARMV7_COUNTER0 + armpmu->num_events - 1). 1661 - */ 1662 - #define ARMV7_COUNTER_LAST (ARMV7_COUNTER0 + armpmu->num_events - 1) 1663 - 1664 - /* 1665 - * ARMv7 low level PMNC access 1666 - */ 1667 - 1668 - /* 1669 - * Per-CPU PMNC: config reg 1670 - */ 1671 - #define ARMV7_PMNC_E (1 << 0) /* Enable all counters */ 1672 - #define ARMV7_PMNC_P (1 << 1) /* Reset all counters */ 1673 - #define ARMV7_PMNC_C (1 << 2) /* Cycle counter reset */ 1674 - #define ARMV7_PMNC_D (1 << 3) /* CCNT counts every 64th cpu cycle */ 1675 - #define ARMV7_PMNC_X (1 << 4) /* Export to ETM */ 1676 - #define ARMV7_PMNC_DP (1 << 5) /* Disable CCNT if non-invasive debug*/ 1677 - #define ARMV7_PMNC_N_SHIFT 11 /* Number of counters supported */ 1678 - #define ARMV7_PMNC_N_MASK 0x1f 1679 - #define ARMV7_PMNC_MASK 0x3f /* Mask for writable bits */ 1680 - 1681 - /* 1682 - * Available counters 1683 - */ 1684 - #define ARMV7_CNT0 0 /* First event counter */ 1685 - #define ARMV7_CCNT 31 /* Cycle counter */ 1686 - 1687 - /* Perf Event to low level counters mapping */ 1688 - #define ARMV7_EVENT_CNT_TO_CNTx (ARMV7_COUNTER0 - ARMV7_CNT0) 1689 - 1690 - /* 1691 - * CNTENS: counters enable reg 1692 - */ 1693 - #define ARMV7_CNTENS_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx)) 1694 - #define ARMV7_CNTENS_C (1 << ARMV7_CCNT) 1695 - 1696 - /* 1697 - * CNTENC: counters disable reg 1698 - */ 1699 - #define ARMV7_CNTENC_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx)) 1700 - #define ARMV7_CNTENC_C (1 << ARMV7_CCNT) 1701 - 1702 - /* 1703 - * INTENS: counters overflow interrupt enable reg 1704 - */ 1705 - #define ARMV7_INTENS_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx)) 1706 - #define ARMV7_INTENS_C (1 << ARMV7_CCNT) 1707 - 1708 - /* 1709 - * INTENC: counters overflow interrupt disable reg 1710 - */ 1711 - #define ARMV7_INTENC_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx)) 1712 - #define ARMV7_INTENC_C (1 << ARMV7_CCNT) 1713 - 1714 - /* 1715 - * EVTSEL: Event selection reg 1716 - */ 1717 - #define ARMV7_EVTSEL_MASK 0xff /* Mask for writable bits */ 1718 - 1719 - /* 1720 - * SELECT: Counter selection reg 1721 - */ 1722 - #define ARMV7_SELECT_MASK 0x1f /* Mask for writable bits */ 1723 - 1724 - /* 1725 - * FLAG: counters overflow flag status reg 1726 - */ 1727 - #define ARMV7_FLAG_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx)) 1728 - #define ARMV7_FLAG_C (1 << ARMV7_CCNT) 1729 - #define ARMV7_FLAG_MASK 0xffffffff /* Mask for writable bits */ 1730 - #define ARMV7_OVERFLOWED_MASK ARMV7_FLAG_MASK 1731 - 1732 - static inline unsigned long armv7_pmnc_read(void) 1733 - { 1734 - u32 val; 1735 - asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r"(val)); 1736 - return val; 1737 - } 1738 - 1739 - static inline void armv7_pmnc_write(unsigned long val) 1740 - { 1741 - val &= ARMV7_PMNC_MASK; 1742 - asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r"(val)); 1743 - } 1744 - 1745 - static inline int armv7_pmnc_has_overflowed(unsigned long pmnc) 1746 - { 1747 - return pmnc & ARMV7_OVERFLOWED_MASK; 1748 - } 1749 - 1750 - static inline int armv7_pmnc_counter_has_overflowed(unsigned long pmnc, 1751 - enum armv7_counters counter) 1752 - { 1753 - int ret = 0; 1754 - 1755 - if (counter == ARMV7_CYCLE_COUNTER) 1756 - ret = pmnc & ARMV7_FLAG_C; 1757 - else if ((counter >= ARMV7_COUNTER0) && (counter <= ARMV7_COUNTER_LAST)) 1758 - ret = pmnc & ARMV7_FLAG_P(counter); 1759 - else 1760 - pr_err("CPU%u checking wrong counter %d overflow status\n", 1761 - smp_processor_id(), counter); 1762 - 1763 - return ret; 1764 - } 1765 - 1766 - static inline int armv7_pmnc_select_counter(unsigned int idx) 1767 - { 1768 - u32 val; 1769 - 1770 - if ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST)) { 1771 - pr_err("CPU%u selecting wrong PMNC counter" 1772 - " %d\n", smp_processor_id(), idx); 1773 - return -1; 1774 - } 1775 - 1776 - val = (idx - ARMV7_EVENT_CNT_TO_CNTx) & ARMV7_SELECT_MASK; 1777 - asm volatile("mcr p15, 0, %0, c9, c12, 5" : : "r" (val)); 1778 - 1779 - return idx; 1780 - } 1781 - 1782 - static inline u32 armv7pmu_read_counter(int idx) 1783 - { 1784 - unsigned long value = 0; 1785 - 1786 - if (idx == ARMV7_CYCLE_COUNTER) 1787 - asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (value)); 1788 - else if ((idx >= ARMV7_COUNTER0) && (idx <= ARMV7_COUNTER_LAST)) { 1789 - if (armv7_pmnc_select_counter(idx) == idx) 1790 - asm volatile("mrc p15, 0, %0, c9, c13, 2" 1791 - : "=r" (value)); 1792 - } else 1793 - pr_err("CPU%u reading wrong counter %d\n", 1794 - smp_processor_id(), idx); 1795 - 1796 - return value; 1797 - } 1798 - 1799 - static inline void armv7pmu_write_counter(int idx, u32 value) 1800 - { 1801 - if (idx == ARMV7_CYCLE_COUNTER) 1802 - asm volatile("mcr p15, 0, %0, c9, c13, 0" : : "r" (value)); 1803 - else if ((idx >= ARMV7_COUNTER0) && (idx <= ARMV7_COUNTER_LAST)) { 1804 - if (armv7_pmnc_select_counter(idx) == idx) 1805 - asm volatile("mcr p15, 0, %0, c9, c13, 2" 1806 - : : "r" (value)); 1807 - } else 1808 - pr_err("CPU%u writing wrong counter %d\n", 1809 - smp_processor_id(), idx); 1810 - } 1811 - 1812 - static inline void armv7_pmnc_write_evtsel(unsigned int idx, u32 val) 1813 - { 1814 - if (armv7_pmnc_select_counter(idx) == idx) { 1815 - val &= ARMV7_EVTSEL_MASK; 1816 - asm volatile("mcr p15, 0, %0, c9, c13, 1" : : "r" (val)); 1817 - } 1818 - } 1819 - 1820 - static inline u32 armv7_pmnc_enable_counter(unsigned int idx) 1821 - { 1822 - u32 val; 1823 - 1824 - if ((idx != ARMV7_CYCLE_COUNTER) && 1825 - ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) { 1826 - pr_err("CPU%u enabling wrong PMNC counter" 1827 - " %d\n", smp_processor_id(), idx); 1828 - return -1; 1829 - } 1830 - 1831 - if (idx == ARMV7_CYCLE_COUNTER) 1832 - val = ARMV7_CNTENS_C; 1833 - else 1834 - val = ARMV7_CNTENS_P(idx); 1835 - 1836 - asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r" (val)); 1837 - 1838 - return idx; 1839 - } 1840 - 1841 - static inline u32 armv7_pmnc_disable_counter(unsigned int idx) 1842 - { 1843 - u32 val; 1844 - 1845 - 1846 - if ((idx != ARMV7_CYCLE_COUNTER) && 1847 - ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) { 1848 - pr_err("CPU%u disabling wrong PMNC counter" 1849 - " %d\n", smp_processor_id(), idx); 1850 - return -1; 1851 - } 1852 - 1853 - if (idx == ARMV7_CYCLE_COUNTER) 1854 - val = ARMV7_CNTENC_C; 1855 - else 1856 - val = ARMV7_CNTENC_P(idx); 1857 - 1858 - asm volatile("mcr p15, 0, %0, c9, c12, 2" : : "r" (val)); 1859 - 1860 - return idx; 1861 - } 1862 - 1863 - static inline u32 armv7_pmnc_enable_intens(unsigned int idx) 1864 - { 1865 - u32 val; 1866 - 1867 - if ((idx != ARMV7_CYCLE_COUNTER) && 1868 - ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) { 1869 - pr_err("CPU%u enabling wrong PMNC counter" 1870 - " interrupt enable %d\n", smp_processor_id(), idx); 1871 - return -1; 1872 - } 1873 - 1874 - if (idx == ARMV7_CYCLE_COUNTER) 1875 - val = ARMV7_INTENS_C; 1876 - else 1877 - val = ARMV7_INTENS_P(idx); 1878 - 1879 - asm volatile("mcr p15, 0, %0, c9, c14, 1" : : "r" (val)); 1880 - 1881 - return idx; 1882 - } 1883 - 1884 - static inline u32 armv7_pmnc_disable_intens(unsigned int idx) 1885 - { 1886 - u32 val; 1887 - 1888 - if ((idx != ARMV7_CYCLE_COUNTER) && 1889 - ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) { 1890 - pr_err("CPU%u disabling wrong PMNC counter" 1891 - " interrupt enable %d\n", smp_processor_id(), idx); 1892 - return -1; 1893 - } 1894 - 1895 - if (idx == ARMV7_CYCLE_COUNTER) 1896 - val = ARMV7_INTENC_C; 1897 - else 1898 - val = ARMV7_INTENC_P(idx); 1899 - 1900 - asm volatile("mcr p15, 0, %0, c9, c14, 2" : : "r" (val)); 1901 - 1902 - return idx; 1903 - } 1904 - 1905 - static inline u32 armv7_pmnc_getreset_flags(void) 1906 - { 1907 - u32 val; 1908 - 1909 - /* Read */ 1910 - asm volatile("mrc p15, 0, %0, c9, c12, 3" : "=r" (val)); 1911 - 1912 - /* Write to clear flags */ 1913 - val &= ARMV7_FLAG_MASK; 1914 - asm volatile("mcr p15, 0, %0, c9, c12, 3" : : "r" (val)); 1915 - 1916 - return val; 1917 - } 1918 - 1919 - #ifdef DEBUG 1920 - static void armv7_pmnc_dump_regs(void) 1921 - { 1922 - u32 val; 1923 - unsigned int cnt; 1924 - 1925 - printk(KERN_INFO "PMNC registers dump:\n"); 1926 - 1927 - asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r" (val)); 1928 - printk(KERN_INFO "PMNC =0x%08x\n", val); 1929 - 1930 - asm volatile("mrc p15, 0, %0, c9, c12, 1" : "=r" (val)); 1931 - printk(KERN_INFO "CNTENS=0x%08x\n", val); 1932 - 1933 - asm volatile("mrc p15, 0, %0, c9, c14, 1" : "=r" (val)); 1934 - printk(KERN_INFO "INTENS=0x%08x\n", val); 1935 - 1936 - asm volatile("mrc p15, 0, %0, c9, c12, 3" : "=r" (val)); 1937 - printk(KERN_INFO "FLAGS =0x%08x\n", val); 1938 - 1939 - asm volatile("mrc p15, 0, %0, c9, c12, 5" : "=r" (val)); 1940 - printk(KERN_INFO "SELECT=0x%08x\n", val); 1941 - 1942 - asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (val)); 1943 - printk(KERN_INFO "CCNT =0x%08x\n", val); 1944 - 1945 - for (cnt = ARMV7_COUNTER0; cnt < ARMV7_COUNTER_LAST; cnt++) { 1946 - armv7_pmnc_select_counter(cnt); 1947 - asm volatile("mrc p15, 0, %0, c9, c13, 2" : "=r" (val)); 1948 - printk(KERN_INFO "CNT[%d] count =0x%08x\n", 1949 - cnt-ARMV7_EVENT_CNT_TO_CNTx, val); 1950 - asm volatile("mrc p15, 0, %0, c9, c13, 1" : "=r" (val)); 1951 - printk(KERN_INFO "CNT[%d] evtsel=0x%08x\n", 1952 - cnt-ARMV7_EVENT_CNT_TO_CNTx, val); 1953 - } 1954 - } 1955 - #endif 1956 - 1957 - void armv7pmu_enable_event(struct hw_perf_event *hwc, int idx) 1958 - { 1959 - unsigned long flags; 1960 - 1961 - /* 1962 - * Enable counter and interrupt, and set the counter to count 1963 - * the event that we're interested in. 1964 - */ 1965 - spin_lock_irqsave(&pmu_lock, flags); 1966 - 1967 - /* 1968 - * Disable counter 1969 - */ 1970 - armv7_pmnc_disable_counter(idx); 1971 - 1972 - /* 1973 - * Set event (if destined for PMNx counters) 1974 - * We don't need to set the event if it's a cycle count 1975 - */ 1976 - if (idx != ARMV7_CYCLE_COUNTER) 1977 - armv7_pmnc_write_evtsel(idx, hwc->config_base); 1978 - 1979 - /* 1980 - * Enable interrupt for this counter 1981 - */ 1982 - armv7_pmnc_enable_intens(idx); 1983 - 1984 - /* 1985 - * Enable counter 1986 - */ 1987 - armv7_pmnc_enable_counter(idx); 1988 - 1989 - spin_unlock_irqrestore(&pmu_lock, flags); 1990 - } 1991 - 1992 - static void armv7pmu_disable_event(struct hw_perf_event *hwc, int idx) 1993 - { 1994 - unsigned long flags; 1995 - 1996 - /* 1997 - * Disable counter and interrupt 1998 - */ 1999 - spin_lock_irqsave(&pmu_lock, flags); 2000 - 2001 - /* 2002 - * Disable counter 2003 - */ 2004 - armv7_pmnc_disable_counter(idx); 2005 - 2006 - /* 2007 - * Disable interrupt for this counter 2008 - */ 2009 - armv7_pmnc_disable_intens(idx); 2010 - 2011 - spin_unlock_irqrestore(&pmu_lock, flags); 2012 - } 2013 - 2014 - static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev) 2015 - { 2016 - unsigned long pmnc; 2017 - struct perf_sample_data data; 2018 - struct cpu_hw_events *cpuc; 2019 - struct pt_regs *regs; 2020 - int idx; 2021 - 2022 - /* 2023 - * Get and reset the IRQ flags 2024 - */ 2025 - pmnc = armv7_pmnc_getreset_flags(); 2026 - 2027 - /* 2028 - * Did an overflow occur? 2029 - */ 2030 - if (!armv7_pmnc_has_overflowed(pmnc)) 2031 - return IRQ_NONE; 2032 - 2033 - /* 2034 - * Handle the counter(s) overflow(s) 2035 - */ 2036 - regs = get_irq_regs(); 2037 - 2038 - perf_sample_data_init(&data, 0); 2039 - 2040 - cpuc = &__get_cpu_var(cpu_hw_events); 2041 - for (idx = 0; idx <= armpmu->num_events; ++idx) { 2042 - struct perf_event *event = cpuc->events[idx]; 2043 - struct hw_perf_event *hwc; 2044 - 2045 - if (!test_bit(idx, cpuc->active_mask)) 2046 - continue; 2047 - 2048 - /* 2049 - * We have a single interrupt for all counters. Check that 2050 - * each counter has overflowed before we process it. 2051 - */ 2052 - if (!armv7_pmnc_counter_has_overflowed(pmnc, idx)) 2053 - continue; 2054 - 2055 - hwc = &event->hw; 2056 - armpmu_event_update(event, hwc, idx); 2057 - data.period = event->hw.last_period; 2058 - if (!armpmu_event_set_period(event, hwc, idx)) 2059 - continue; 2060 - 2061 - if (perf_event_overflow(event, 0, &data, regs)) 2062 - armpmu->disable(hwc, idx); 2063 - } 2064 - 2065 - /* 2066 - * Handle the pending perf events. 2067 - * 2068 - * Note: this call *must* be run with interrupts disabled. For 2069 - * platforms that can have the PMU interrupts raised as an NMI, this 2070 - * will not work. 2071 - */ 2072 - irq_work_run(); 2073 - 2074 - return IRQ_HANDLED; 2075 - } 2076 - 2077 - static void armv7pmu_start(void) 2078 - { 2079 - unsigned long flags; 2080 - 2081 - spin_lock_irqsave(&pmu_lock, flags); 2082 - /* Enable all counters */ 2083 - armv7_pmnc_write(armv7_pmnc_read() | ARMV7_PMNC_E); 2084 - spin_unlock_irqrestore(&pmu_lock, flags); 2085 - } 2086 - 2087 - static void armv7pmu_stop(void) 2088 - { 2089 - unsigned long flags; 2090 - 2091 - spin_lock_irqsave(&pmu_lock, flags); 2092 - /* Disable all counters */ 2093 - armv7_pmnc_write(armv7_pmnc_read() & ~ARMV7_PMNC_E); 2094 - spin_unlock_irqrestore(&pmu_lock, flags); 2095 - } 2096 - 2097 - static inline int armv7_a8_pmu_event_map(int config) 2098 - { 2099 - int mapping = armv7_a8_perf_map[config]; 2100 - if (HW_OP_UNSUPPORTED == mapping) 2101 - mapping = -EOPNOTSUPP; 2102 - return mapping; 2103 - } 2104 - 2105 - static inline int armv7_a9_pmu_event_map(int config) 2106 - { 2107 - int mapping = armv7_a9_perf_map[config]; 2108 - if (HW_OP_UNSUPPORTED == mapping) 2109 - mapping = -EOPNOTSUPP; 2110 - return mapping; 2111 - } 2112 - 2113 - static u64 armv7pmu_raw_event(u64 config) 2114 - { 2115 - return config & 0xff; 2116 - } 2117 - 2118 - static int armv7pmu_get_event_idx(struct cpu_hw_events *cpuc, 2119 - struct hw_perf_event *event) 2120 - { 2121 - int idx; 2122 - 2123 - /* Always place a cycle counter into the cycle counter. */ 2124 - if (event->config_base == ARMV7_PERFCTR_CPU_CYCLES) { 2125 - if (test_and_set_bit(ARMV7_CYCLE_COUNTER, cpuc->used_mask)) 2126 - return -EAGAIN; 2127 - 2128 - return ARMV7_CYCLE_COUNTER; 2129 - } else { 2130 - /* 2131 - * For anything other than a cycle counter, try and use 2132 - * the events counters 2133 - */ 2134 - for (idx = ARMV7_COUNTER0; idx <= armpmu->num_events; ++idx) { 2135 - if (!test_and_set_bit(idx, cpuc->used_mask)) 2136 - return idx; 2137 - } 2138 - 2139 - /* The counters are all in use. */ 2140 - return -EAGAIN; 2141 - } 2142 - } 2143 - 2144 - static struct arm_pmu armv7pmu = { 2145 - .handle_irq = armv7pmu_handle_irq, 2146 - .enable = armv7pmu_enable_event, 2147 - .disable = armv7pmu_disable_event, 2148 - .raw_event = armv7pmu_raw_event, 2149 - .read_counter = armv7pmu_read_counter, 2150 - .write_counter = armv7pmu_write_counter, 2151 - .get_event_idx = armv7pmu_get_event_idx, 2152 - .start = armv7pmu_start, 2153 - .stop = armv7pmu_stop, 2154 - .max_period = (1LLU << 32) - 1, 2155 - }; 2156 - 2157 - static u32 __init armv7_reset_read_pmnc(void) 2158 - { 2159 - u32 nb_cnt; 2160 - 2161 - /* Initialize & Reset PMNC: C and P bits */ 2162 - armv7_pmnc_write(ARMV7_PMNC_P | ARMV7_PMNC_C); 2163 - 2164 - /* Read the nb of CNTx counters supported from PMNC */ 2165 - nb_cnt = (armv7_pmnc_read() >> ARMV7_PMNC_N_SHIFT) & ARMV7_PMNC_N_MASK; 2166 - 2167 - /* Add the CPU cycles counter and return */ 2168 - return nb_cnt + 1; 2169 - } 2170 - 2171 - /* 2172 - * ARMv5 [xscale] Performance counter handling code. 2173 - * 2174 - * Based on xscale OProfile code. 2175 - * 2176 - * There are two variants of the xscale PMU that we support: 2177 - * - xscale1pmu: 2 event counters and a cycle counter 2178 - * - xscale2pmu: 4 event counters and a cycle counter 2179 - * The two variants share event definitions, but have different 2180 - * PMU structures. 2181 - */ 2182 - 2183 - enum xscale_perf_types { 2184 - XSCALE_PERFCTR_ICACHE_MISS = 0x00, 2185 - XSCALE_PERFCTR_ICACHE_NO_DELIVER = 0x01, 2186 - XSCALE_PERFCTR_DATA_STALL = 0x02, 2187 - XSCALE_PERFCTR_ITLB_MISS = 0x03, 2188 - XSCALE_PERFCTR_DTLB_MISS = 0x04, 2189 - XSCALE_PERFCTR_BRANCH = 0x05, 2190 - XSCALE_PERFCTR_BRANCH_MISS = 0x06, 2191 - XSCALE_PERFCTR_INSTRUCTION = 0x07, 2192 - XSCALE_PERFCTR_DCACHE_FULL_STALL = 0x08, 2193 - XSCALE_PERFCTR_DCACHE_FULL_STALL_CONTIG = 0x09, 2194 - XSCALE_PERFCTR_DCACHE_ACCESS = 0x0A, 2195 - XSCALE_PERFCTR_DCACHE_MISS = 0x0B, 2196 - XSCALE_PERFCTR_DCACHE_WRITE_BACK = 0x0C, 2197 - XSCALE_PERFCTR_PC_CHANGED = 0x0D, 2198 - XSCALE_PERFCTR_BCU_REQUEST = 0x10, 2199 - XSCALE_PERFCTR_BCU_FULL = 0x11, 2200 - XSCALE_PERFCTR_BCU_DRAIN = 0x12, 2201 - XSCALE_PERFCTR_BCU_ECC_NO_ELOG = 0x14, 2202 - XSCALE_PERFCTR_BCU_1_BIT_ERR = 0x15, 2203 - XSCALE_PERFCTR_RMW = 0x16, 2204 - /* XSCALE_PERFCTR_CCNT is not hardware defined */ 2205 - XSCALE_PERFCTR_CCNT = 0xFE, 2206 - XSCALE_PERFCTR_UNUSED = 0xFF, 2207 - }; 2208 - 2209 - enum xscale_counters { 2210 - XSCALE_CYCLE_COUNTER = 1, 2211 - XSCALE_COUNTER0, 2212 - XSCALE_COUNTER1, 2213 - XSCALE_COUNTER2, 2214 - XSCALE_COUNTER3, 2215 - }; 2216 - 2217 - static const unsigned xscale_perf_map[PERF_COUNT_HW_MAX] = { 2218 - [PERF_COUNT_HW_CPU_CYCLES] = XSCALE_PERFCTR_CCNT, 2219 - [PERF_COUNT_HW_INSTRUCTIONS] = XSCALE_PERFCTR_INSTRUCTION, 2220 - [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED, 2221 - [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED, 2222 - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = XSCALE_PERFCTR_BRANCH, 2223 - [PERF_COUNT_HW_BRANCH_MISSES] = XSCALE_PERFCTR_BRANCH_MISS, 2224 - [PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED, 2225 - }; 2226 - 2227 - static const unsigned xscale_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] 2228 - [PERF_COUNT_HW_CACHE_OP_MAX] 2229 - [PERF_COUNT_HW_CACHE_RESULT_MAX] = { 2230 - [C(L1D)] = { 2231 - [C(OP_READ)] = { 2232 - [C(RESULT_ACCESS)] = XSCALE_PERFCTR_DCACHE_ACCESS, 2233 - [C(RESULT_MISS)] = XSCALE_PERFCTR_DCACHE_MISS, 2234 - }, 2235 - [C(OP_WRITE)] = { 2236 - [C(RESULT_ACCESS)] = XSCALE_PERFCTR_DCACHE_ACCESS, 2237 - [C(RESULT_MISS)] = XSCALE_PERFCTR_DCACHE_MISS, 2238 - }, 2239 - [C(OP_PREFETCH)] = { 2240 - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 2241 - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 2242 - }, 2243 - }, 2244 - [C(L1I)] = { 2245 - [C(OP_READ)] = { 2246 - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 2247 - [C(RESULT_MISS)] = XSCALE_PERFCTR_ICACHE_MISS, 2248 - }, 2249 - [C(OP_WRITE)] = { 2250 - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 2251 - [C(RESULT_MISS)] = XSCALE_PERFCTR_ICACHE_MISS, 2252 - }, 2253 - [C(OP_PREFETCH)] = { 2254 - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 2255 - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 2256 - }, 2257 - }, 2258 - [C(LL)] = { 2259 - [C(OP_READ)] = { 2260 - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 2261 - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 2262 - }, 2263 - [C(OP_WRITE)] = { 2264 - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 2265 - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 2266 - }, 2267 - [C(OP_PREFETCH)] = { 2268 - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 2269 - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 2270 - }, 2271 - }, 2272 - [C(DTLB)] = { 2273 - [C(OP_READ)] = { 2274 - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 2275 - [C(RESULT_MISS)] = XSCALE_PERFCTR_DTLB_MISS, 2276 - }, 2277 - [C(OP_WRITE)] = { 2278 - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 2279 - [C(RESULT_MISS)] = XSCALE_PERFCTR_DTLB_MISS, 2280 - }, 2281 - [C(OP_PREFETCH)] = { 2282 - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 2283 - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 2284 - }, 2285 - }, 2286 - [C(ITLB)] = { 2287 - [C(OP_READ)] = { 2288 - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 2289 - [C(RESULT_MISS)] = XSCALE_PERFCTR_ITLB_MISS, 2290 - }, 2291 - [C(OP_WRITE)] = { 2292 - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 2293 - [C(RESULT_MISS)] = XSCALE_PERFCTR_ITLB_MISS, 2294 - }, 2295 - [C(OP_PREFETCH)] = { 2296 - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 2297 - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 2298 - }, 2299 - }, 2300 - [C(BPU)] = { 2301 - [C(OP_READ)] = { 2302 - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 2303 - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 2304 - }, 2305 - [C(OP_WRITE)] = { 2306 - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 2307 - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 2308 - }, 2309 - [C(OP_PREFETCH)] = { 2310 - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 2311 - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 2312 - }, 2313 - }, 2314 - }; 2315 - 2316 - #define XSCALE_PMU_ENABLE 0x001 2317 - #define XSCALE_PMN_RESET 0x002 2318 - #define XSCALE_CCNT_RESET 0x004 2319 - #define XSCALE_PMU_RESET (CCNT_RESET | PMN_RESET) 2320 - #define XSCALE_PMU_CNT64 0x008 2321 - 2322 - static inline int 2323 - xscalepmu_event_map(int config) 2324 - { 2325 - int mapping = xscale_perf_map[config]; 2326 - if (HW_OP_UNSUPPORTED == mapping) 2327 - mapping = -EOPNOTSUPP; 2328 - return mapping; 2329 - } 2330 - 2331 - static u64 2332 - xscalepmu_raw_event(u64 config) 2333 - { 2334 - return config & 0xff; 2335 - } 2336 - 2337 - #define XSCALE1_OVERFLOWED_MASK 0x700 2338 - #define XSCALE1_CCOUNT_OVERFLOW 0x400 2339 - #define XSCALE1_COUNT0_OVERFLOW 0x100 2340 - #define XSCALE1_COUNT1_OVERFLOW 0x200 2341 - #define XSCALE1_CCOUNT_INT_EN 0x040 2342 - #define XSCALE1_COUNT0_INT_EN 0x010 2343 - #define XSCALE1_COUNT1_INT_EN 0x020 2344 - #define XSCALE1_COUNT0_EVT_SHFT 12 2345 - #define XSCALE1_COUNT0_EVT_MASK (0xff << XSCALE1_COUNT0_EVT_SHFT) 2346 - #define XSCALE1_COUNT1_EVT_SHFT 20 2347 - #define XSCALE1_COUNT1_EVT_MASK (0xff << XSCALE1_COUNT1_EVT_SHFT) 2348 - 2349 - static inline u32 2350 - xscale1pmu_read_pmnc(void) 2351 - { 2352 - u32 val; 2353 - asm volatile("mrc p14, 0, %0, c0, c0, 0" : "=r" (val)); 2354 - return val; 2355 - } 2356 - 2357 - static inline void 2358 - xscale1pmu_write_pmnc(u32 val) 2359 - { 2360 - /* upper 4bits and 7, 11 are write-as-0 */ 2361 - val &= 0xffff77f; 2362 - asm volatile("mcr p14, 0, %0, c0, c0, 0" : : "r" (val)); 2363 - } 2364 - 2365 - static inline int 2366 - xscale1_pmnc_counter_has_overflowed(unsigned long pmnc, 2367 - enum xscale_counters counter) 2368 - { 2369 - int ret = 0; 2370 - 2371 - switch (counter) { 2372 - case XSCALE_CYCLE_COUNTER: 2373 - ret = pmnc & XSCALE1_CCOUNT_OVERFLOW; 2374 - break; 2375 - case XSCALE_COUNTER0: 2376 - ret = pmnc & XSCALE1_COUNT0_OVERFLOW; 2377 - break; 2378 - case XSCALE_COUNTER1: 2379 - ret = pmnc & XSCALE1_COUNT1_OVERFLOW; 2380 - break; 2381 - default: 2382 - WARN_ONCE(1, "invalid counter number (%d)\n", counter); 2383 - } 2384 - 2385 - return ret; 2386 - } 2387 - 2388 - static irqreturn_t 2389 - xscale1pmu_handle_irq(int irq_num, void *dev) 2390 - { 2391 - unsigned long pmnc; 2392 - struct perf_sample_data data; 2393 - struct cpu_hw_events *cpuc; 2394 - struct pt_regs *regs; 2395 - int idx; 2396 - 2397 - /* 2398 - * NOTE: there's an A stepping erratum that states if an overflow 2399 - * bit already exists and another occurs, the previous 2400 - * Overflow bit gets cleared. There's no workaround. 2401 - * Fixed in B stepping or later. 2402 - */ 2403 - pmnc = xscale1pmu_read_pmnc(); 2404 - 2405 - /* 2406 - * Write the value back to clear the overflow flags. Overflow 2407 - * flags remain in pmnc for use below. We also disable the PMU 2408 - * while we process the interrupt. 2409 - */ 2410 - xscale1pmu_write_pmnc(pmnc & ~XSCALE_PMU_ENABLE); 2411 - 2412 - if (!(pmnc & XSCALE1_OVERFLOWED_MASK)) 2413 - return IRQ_NONE; 2414 - 2415 - regs = get_irq_regs(); 2416 - 2417 - perf_sample_data_init(&data, 0); 2418 - 2419 - cpuc = &__get_cpu_var(cpu_hw_events); 2420 - for (idx = 0; idx <= armpmu->num_events; ++idx) { 2421 - struct perf_event *event = cpuc->events[idx]; 2422 - struct hw_perf_event *hwc; 2423 - 2424 - if (!test_bit(idx, cpuc->active_mask)) 2425 - continue; 2426 - 2427 - if (!xscale1_pmnc_counter_has_overflowed(pmnc, idx)) 2428 - continue; 2429 - 2430 - hwc = &event->hw; 2431 - armpmu_event_update(event, hwc, idx); 2432 - data.period = event->hw.last_period; 2433 - if (!armpmu_event_set_period(event, hwc, idx)) 2434 - continue; 2435 - 2436 - if (perf_event_overflow(event, 0, &data, regs)) 2437 - armpmu->disable(hwc, idx); 2438 - } 2439 - 2440 - irq_work_run(); 2441 - 2442 - /* 2443 - * Re-enable the PMU. 2444 - */ 2445 - pmnc = xscale1pmu_read_pmnc() | XSCALE_PMU_ENABLE; 2446 - xscale1pmu_write_pmnc(pmnc); 2447 - 2448 - return IRQ_HANDLED; 2449 - } 2450 - 2451 - static void 2452 - xscale1pmu_enable_event(struct hw_perf_event *hwc, int idx) 2453 - { 2454 - unsigned long val, mask, evt, flags; 2455 - 2456 - switch (idx) { 2457 - case XSCALE_CYCLE_COUNTER: 2458 - mask = 0; 2459 - evt = XSCALE1_CCOUNT_INT_EN; 2460 - break; 2461 - case XSCALE_COUNTER0: 2462 - mask = XSCALE1_COUNT0_EVT_MASK; 2463 - evt = (hwc->config_base << XSCALE1_COUNT0_EVT_SHFT) | 2464 - XSCALE1_COUNT0_INT_EN; 2465 - break; 2466 - case XSCALE_COUNTER1: 2467 - mask = XSCALE1_COUNT1_EVT_MASK; 2468 - evt = (hwc->config_base << XSCALE1_COUNT1_EVT_SHFT) | 2469 - XSCALE1_COUNT1_INT_EN; 2470 - break; 2471 - default: 2472 - WARN_ONCE(1, "invalid counter number (%d)\n", idx); 2473 - return; 2474 - } 2475 - 2476 - spin_lock_irqsave(&pmu_lock, flags); 2477 - val = xscale1pmu_read_pmnc(); 2478 - val &= ~mask; 2479 - val |= evt; 2480 - xscale1pmu_write_pmnc(val); 2481 - spin_unlock_irqrestore(&pmu_lock, flags); 2482 - } 2483 - 2484 - static void 2485 - xscale1pmu_disable_event(struct hw_perf_event *hwc, int idx) 2486 - { 2487 - unsigned long val, mask, evt, flags; 2488 - 2489 - switch (idx) { 2490 - case XSCALE_CYCLE_COUNTER: 2491 - mask = XSCALE1_CCOUNT_INT_EN; 2492 - evt = 0; 2493 - break; 2494 - case XSCALE_COUNTER0: 2495 - mask = XSCALE1_COUNT0_INT_EN | XSCALE1_COUNT0_EVT_MASK; 2496 - evt = XSCALE_PERFCTR_UNUSED << XSCALE1_COUNT0_EVT_SHFT; 2497 - break; 2498 - case XSCALE_COUNTER1: 2499 - mask = XSCALE1_COUNT1_INT_EN | XSCALE1_COUNT1_EVT_MASK; 2500 - evt = XSCALE_PERFCTR_UNUSED << XSCALE1_COUNT1_EVT_SHFT; 2501 - break; 2502 - default: 2503 - WARN_ONCE(1, "invalid counter number (%d)\n", idx); 2504 - return; 2505 - } 2506 - 2507 - spin_lock_irqsave(&pmu_lock, flags); 2508 - val = xscale1pmu_read_pmnc(); 2509 - val &= ~mask; 2510 - val |= evt; 2511 - xscale1pmu_write_pmnc(val); 2512 - spin_unlock_irqrestore(&pmu_lock, flags); 2513 - } 2514 - 2515 - static int 2516 - xscale1pmu_get_event_idx(struct cpu_hw_events *cpuc, 2517 - struct hw_perf_event *event) 2518 - { 2519 - if (XSCALE_PERFCTR_CCNT == event->config_base) { 2520 - if (test_and_set_bit(XSCALE_CYCLE_COUNTER, cpuc->used_mask)) 2521 - return -EAGAIN; 2522 - 2523 - return XSCALE_CYCLE_COUNTER; 2524 - } else { 2525 - if (!test_and_set_bit(XSCALE_COUNTER1, cpuc->used_mask)) { 2526 - return XSCALE_COUNTER1; 2527 - } 2528 - 2529 - if (!test_and_set_bit(XSCALE_COUNTER0, cpuc->used_mask)) { 2530 - return XSCALE_COUNTER0; 2531 - } 2532 - 2533 - return -EAGAIN; 2534 - } 2535 - } 2536 - 2537 - static void 2538 - xscale1pmu_start(void) 2539 - { 2540 - unsigned long flags, val; 2541 - 2542 - spin_lock_irqsave(&pmu_lock, flags); 2543 - val = xscale1pmu_read_pmnc(); 2544 - val |= XSCALE_PMU_ENABLE; 2545 - xscale1pmu_write_pmnc(val); 2546 - spin_unlock_irqrestore(&pmu_lock, flags); 2547 - } 2548 - 2549 - static void 2550 - xscale1pmu_stop(void) 2551 - { 2552 - unsigned long flags, val; 2553 - 2554 - spin_lock_irqsave(&pmu_lock, flags); 2555 - val = xscale1pmu_read_pmnc(); 2556 - val &= ~XSCALE_PMU_ENABLE; 2557 - xscale1pmu_write_pmnc(val); 2558 - spin_unlock_irqrestore(&pmu_lock, flags); 2559 - } 2560 - 2561 - static inline u32 2562 - xscale1pmu_read_counter(int counter) 2563 - { 2564 - u32 val = 0; 2565 - 2566 - switch (counter) { 2567 - case XSCALE_CYCLE_COUNTER: 2568 - asm volatile("mrc p14, 0, %0, c1, c0, 0" : "=r" (val)); 2569 - break; 2570 - case XSCALE_COUNTER0: 2571 - asm volatile("mrc p14, 0, %0, c2, c0, 0" : "=r" (val)); 2572 - break; 2573 - case XSCALE_COUNTER1: 2574 - asm volatile("mrc p14, 0, %0, c3, c0, 0" : "=r" (val)); 2575 - break; 2576 - } 2577 - 2578 - return val; 2579 - } 2580 - 2581 - static inline void 2582 - xscale1pmu_write_counter(int counter, u32 val) 2583 - { 2584 - switch (counter) { 2585 - case XSCALE_CYCLE_COUNTER: 2586 - asm volatile("mcr p14, 0, %0, c1, c0, 0" : : "r" (val)); 2587 - break; 2588 - case XSCALE_COUNTER0: 2589 - asm volatile("mcr p14, 0, %0, c2, c0, 0" : : "r" (val)); 2590 - break; 2591 - case XSCALE_COUNTER1: 2592 - asm volatile("mcr p14, 0, %0, c3, c0, 0" : : "r" (val)); 2593 - break; 2594 - } 2595 - } 2596 - 2597 - static const struct arm_pmu xscale1pmu = { 2598 - .id = ARM_PERF_PMU_ID_XSCALE1, 2599 - .handle_irq = xscale1pmu_handle_irq, 2600 - .enable = xscale1pmu_enable_event, 2601 - .disable = xscale1pmu_disable_event, 2602 - .event_map = xscalepmu_event_map, 2603 - .raw_event = xscalepmu_raw_event, 2604 - .read_counter = xscale1pmu_read_counter, 2605 - .write_counter = xscale1pmu_write_counter, 2606 - .get_event_idx = xscale1pmu_get_event_idx, 2607 - .start = xscale1pmu_start, 2608 - .stop = xscale1pmu_stop, 2609 - .num_events = 3, 2610 - .max_period = (1LLU << 32) - 1, 2611 - }; 2612 - 2613 - #define XSCALE2_OVERFLOWED_MASK 0x01f 2614 - #define XSCALE2_CCOUNT_OVERFLOW 0x001 2615 - #define XSCALE2_COUNT0_OVERFLOW 0x002 2616 - #define XSCALE2_COUNT1_OVERFLOW 0x004 2617 - #define XSCALE2_COUNT2_OVERFLOW 0x008 2618 - #define XSCALE2_COUNT3_OVERFLOW 0x010 2619 - #define XSCALE2_CCOUNT_INT_EN 0x001 2620 - #define XSCALE2_COUNT0_INT_EN 0x002 2621 - #define XSCALE2_COUNT1_INT_EN 0x004 2622 - #define XSCALE2_COUNT2_INT_EN 0x008 2623 - #define XSCALE2_COUNT3_INT_EN 0x010 2624 - #define XSCALE2_COUNT0_EVT_SHFT 0 2625 - #define XSCALE2_COUNT0_EVT_MASK (0xff << XSCALE2_COUNT0_EVT_SHFT) 2626 - #define XSCALE2_COUNT1_EVT_SHFT 8 2627 - #define XSCALE2_COUNT1_EVT_MASK (0xff << XSCALE2_COUNT1_EVT_SHFT) 2628 - #define XSCALE2_COUNT2_EVT_SHFT 16 2629 - #define XSCALE2_COUNT2_EVT_MASK (0xff << XSCALE2_COUNT2_EVT_SHFT) 2630 - #define XSCALE2_COUNT3_EVT_SHFT 24 2631 - #define XSCALE2_COUNT3_EVT_MASK (0xff << XSCALE2_COUNT3_EVT_SHFT) 2632 - 2633 - static inline u32 2634 - xscale2pmu_read_pmnc(void) 2635 - { 2636 - u32 val; 2637 - asm volatile("mrc p14, 0, %0, c0, c1, 0" : "=r" (val)); 2638 - /* bits 1-2 and 4-23 are read-unpredictable */ 2639 - return val & 0xff000009; 2640 - } 2641 - 2642 - static inline void 2643 - xscale2pmu_write_pmnc(u32 val) 2644 - { 2645 - /* bits 4-23 are write-as-0, 24-31 are write ignored */ 2646 - val &= 0xf; 2647 - asm volatile("mcr p14, 0, %0, c0, c1, 0" : : "r" (val)); 2648 - } 2649 - 2650 - static inline u32 2651 - xscale2pmu_read_overflow_flags(void) 2652 - { 2653 - u32 val; 2654 - asm volatile("mrc p14, 0, %0, c5, c1, 0" : "=r" (val)); 2655 - return val; 2656 - } 2657 - 2658 - static inline void 2659 - xscale2pmu_write_overflow_flags(u32 val) 2660 - { 2661 - asm volatile("mcr p14, 0, %0, c5, c1, 0" : : "r" (val)); 2662 - } 2663 - 2664 - static inline u32 2665 - xscale2pmu_read_event_select(void) 2666 - { 2667 - u32 val; 2668 - asm volatile("mrc p14, 0, %0, c8, c1, 0" : "=r" (val)); 2669 - return val; 2670 - } 2671 - 2672 - static inline void 2673 - xscale2pmu_write_event_select(u32 val) 2674 - { 2675 - asm volatile("mcr p14, 0, %0, c8, c1, 0" : : "r"(val)); 2676 - } 2677 - 2678 - static inline u32 2679 - xscale2pmu_read_int_enable(void) 2680 - { 2681 - u32 val; 2682 - asm volatile("mrc p14, 0, %0, c4, c1, 0" : "=r" (val)); 2683 - return val; 2684 - } 2685 - 2686 - static void 2687 - xscale2pmu_write_int_enable(u32 val) 2688 - { 2689 - asm volatile("mcr p14, 0, %0, c4, c1, 0" : : "r" (val)); 2690 - } 2691 - 2692 - static inline int 2693 - xscale2_pmnc_counter_has_overflowed(unsigned long of_flags, 2694 - enum xscale_counters counter) 2695 - { 2696 - int ret = 0; 2697 - 2698 - switch (counter) { 2699 - case XSCALE_CYCLE_COUNTER: 2700 - ret = of_flags & XSCALE2_CCOUNT_OVERFLOW; 2701 - break; 2702 - case XSCALE_COUNTER0: 2703 - ret = of_flags & XSCALE2_COUNT0_OVERFLOW; 2704 - break; 2705 - case XSCALE_COUNTER1: 2706 - ret = of_flags & XSCALE2_COUNT1_OVERFLOW; 2707 - break; 2708 - case XSCALE_COUNTER2: 2709 - ret = of_flags & XSCALE2_COUNT2_OVERFLOW; 2710 - break; 2711 - case XSCALE_COUNTER3: 2712 - ret = of_flags & XSCALE2_COUNT3_OVERFLOW; 2713 - break; 2714 - default: 2715 - WARN_ONCE(1, "invalid counter number (%d)\n", counter); 2716 - } 2717 - 2718 - return ret; 2719 - } 2720 - 2721 - static irqreturn_t 2722 - xscale2pmu_handle_irq(int irq_num, void *dev) 2723 - { 2724 - unsigned long pmnc, of_flags; 2725 - struct perf_sample_data data; 2726 - struct cpu_hw_events *cpuc; 2727 - struct pt_regs *regs; 2728 - int idx; 2729 - 2730 - /* Disable the PMU. */ 2731 - pmnc = xscale2pmu_read_pmnc(); 2732 - xscale2pmu_write_pmnc(pmnc & ~XSCALE_PMU_ENABLE); 2733 - 2734 - /* Check the overflow flag register. */ 2735 - of_flags = xscale2pmu_read_overflow_flags(); 2736 - if (!(of_flags & XSCALE2_OVERFLOWED_MASK)) 2737 - return IRQ_NONE; 2738 - 2739 - /* Clear the overflow bits. */ 2740 - xscale2pmu_write_overflow_flags(of_flags); 2741 - 2742 - regs = get_irq_regs(); 2743 - 2744 - perf_sample_data_init(&data, 0); 2745 - 2746 - cpuc = &__get_cpu_var(cpu_hw_events); 2747 - for (idx = 0; idx <= armpmu->num_events; ++idx) { 2748 - struct perf_event *event = cpuc->events[idx]; 2749 - struct hw_perf_event *hwc; 2750 - 2751 - if (!test_bit(idx, cpuc->active_mask)) 2752 - continue; 2753 - 2754 - if (!xscale2_pmnc_counter_has_overflowed(pmnc, idx)) 2755 - continue; 2756 - 2757 - hwc = &event->hw; 2758 - armpmu_event_update(event, hwc, idx); 2759 - data.period = event->hw.last_period; 2760 - if (!armpmu_event_set_period(event, hwc, idx)) 2761 - continue; 2762 - 2763 - if (perf_event_overflow(event, 0, &data, regs)) 2764 - armpmu->disable(hwc, idx); 2765 - } 2766 - 2767 - irq_work_run(); 2768 - 2769 - /* 2770 - * Re-enable the PMU. 2771 - */ 2772 - pmnc = xscale2pmu_read_pmnc() | XSCALE_PMU_ENABLE; 2773 - xscale2pmu_write_pmnc(pmnc); 2774 - 2775 - return IRQ_HANDLED; 2776 - } 2777 - 2778 - static void 2779 - xscale2pmu_enable_event(struct hw_perf_event *hwc, int idx) 2780 - { 2781 - unsigned long flags, ien, evtsel; 2782 - 2783 - ien = xscale2pmu_read_int_enable(); 2784 - evtsel = xscale2pmu_read_event_select(); 2785 - 2786 - switch (idx) { 2787 - case XSCALE_CYCLE_COUNTER: 2788 - ien |= XSCALE2_CCOUNT_INT_EN; 2789 - break; 2790 - case XSCALE_COUNTER0: 2791 - ien |= XSCALE2_COUNT0_INT_EN; 2792 - evtsel &= ~XSCALE2_COUNT0_EVT_MASK; 2793 - evtsel |= hwc->config_base << XSCALE2_COUNT0_EVT_SHFT; 2794 - break; 2795 - case XSCALE_COUNTER1: 2796 - ien |= XSCALE2_COUNT1_INT_EN; 2797 - evtsel &= ~XSCALE2_COUNT1_EVT_MASK; 2798 - evtsel |= hwc->config_base << XSCALE2_COUNT1_EVT_SHFT; 2799 - break; 2800 - case XSCALE_COUNTER2: 2801 - ien |= XSCALE2_COUNT2_INT_EN; 2802 - evtsel &= ~XSCALE2_COUNT2_EVT_MASK; 2803 - evtsel |= hwc->config_base << XSCALE2_COUNT2_EVT_SHFT; 2804 - break; 2805 - case XSCALE_COUNTER3: 2806 - ien |= XSCALE2_COUNT3_INT_EN; 2807 - evtsel &= ~XSCALE2_COUNT3_EVT_MASK; 2808 - evtsel |= hwc->config_base << XSCALE2_COUNT3_EVT_SHFT; 2809 - break; 2810 - default: 2811 - WARN_ONCE(1, "invalid counter number (%d)\n", idx); 2812 - return; 2813 - } 2814 - 2815 - spin_lock_irqsave(&pmu_lock, flags); 2816 - xscale2pmu_write_event_select(evtsel); 2817 - xscale2pmu_write_int_enable(ien); 2818 - spin_unlock_irqrestore(&pmu_lock, flags); 2819 - } 2820 - 2821 - static void 2822 - xscale2pmu_disable_event(struct hw_perf_event *hwc, int idx) 2823 - { 2824 - unsigned long flags, ien, evtsel; 2825 - 2826 - ien = xscale2pmu_read_int_enable(); 2827 - evtsel = xscale2pmu_read_event_select(); 2828 - 2829 - switch (idx) { 2830 - case XSCALE_CYCLE_COUNTER: 2831 - ien &= ~XSCALE2_CCOUNT_INT_EN; 2832 - break; 2833 - case XSCALE_COUNTER0: 2834 - ien &= ~XSCALE2_COUNT0_INT_EN; 2835 - evtsel &= ~XSCALE2_COUNT0_EVT_MASK; 2836 - evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT0_EVT_SHFT; 2837 - break; 2838 - case XSCALE_COUNTER1: 2839 - ien &= ~XSCALE2_COUNT1_INT_EN; 2840 - evtsel &= ~XSCALE2_COUNT1_EVT_MASK; 2841 - evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT1_EVT_SHFT; 2842 - break; 2843 - case XSCALE_COUNTER2: 2844 - ien &= ~XSCALE2_COUNT2_INT_EN; 2845 - evtsel &= ~XSCALE2_COUNT2_EVT_MASK; 2846 - evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT2_EVT_SHFT; 2847 - break; 2848 - case XSCALE_COUNTER3: 2849 - ien &= ~XSCALE2_COUNT3_INT_EN; 2850 - evtsel &= ~XSCALE2_COUNT3_EVT_MASK; 2851 - evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT3_EVT_SHFT; 2852 - break; 2853 - default: 2854 - WARN_ONCE(1, "invalid counter number (%d)\n", idx); 2855 - return; 2856 - } 2857 - 2858 - spin_lock_irqsave(&pmu_lock, flags); 2859 - xscale2pmu_write_event_select(evtsel); 2860 - xscale2pmu_write_int_enable(ien); 2861 - spin_unlock_irqrestore(&pmu_lock, flags); 2862 - } 2863 - 2864 - static int 2865 - xscale2pmu_get_event_idx(struct cpu_hw_events *cpuc, 2866 - struct hw_perf_event *event) 2867 - { 2868 - int idx = xscale1pmu_get_event_idx(cpuc, event); 2869 - if (idx >= 0) 2870 - goto out; 2871 - 2872 - if (!test_and_set_bit(XSCALE_COUNTER3, cpuc->used_mask)) 2873 - idx = XSCALE_COUNTER3; 2874 - else if (!test_and_set_bit(XSCALE_COUNTER2, cpuc->used_mask)) 2875 - idx = XSCALE_COUNTER2; 2876 - out: 2877 - return idx; 2878 - } 2879 - 2880 - static void 2881 - xscale2pmu_start(void) 2882 - { 2883 - unsigned long flags, val; 2884 - 2885 - spin_lock_irqsave(&pmu_lock, flags); 2886 - val = xscale2pmu_read_pmnc() & ~XSCALE_PMU_CNT64; 2887 - val |= XSCALE_PMU_ENABLE; 2888 - xscale2pmu_write_pmnc(val); 2889 - spin_unlock_irqrestore(&pmu_lock, flags); 2890 - } 2891 - 2892 - static void 2893 - xscale2pmu_stop(void) 2894 - { 2895 - unsigned long flags, val; 2896 - 2897 - spin_lock_irqsave(&pmu_lock, flags); 2898 - val = xscale2pmu_read_pmnc(); 2899 - val &= ~XSCALE_PMU_ENABLE; 2900 - xscale2pmu_write_pmnc(val); 2901 - spin_unlock_irqrestore(&pmu_lock, flags); 2902 - } 2903 - 2904 - static inline u32 2905 - xscale2pmu_read_counter(int counter) 2906 - { 2907 - u32 val = 0; 2908 - 2909 - switch (counter) { 2910 - case XSCALE_CYCLE_COUNTER: 2911 - asm volatile("mrc p14, 0, %0, c1, c1, 0" : "=r" (val)); 2912 - break; 2913 - case XSCALE_COUNTER0: 2914 - asm volatile("mrc p14, 0, %0, c0, c2, 0" : "=r" (val)); 2915 - break; 2916 - case XSCALE_COUNTER1: 2917 - asm volatile("mrc p14, 0, %0, c1, c2, 0" : "=r" (val)); 2918 - break; 2919 - case XSCALE_COUNTER2: 2920 - asm volatile("mrc p14, 0, %0, c2, c2, 0" : "=r" (val)); 2921 - break; 2922 - case XSCALE_COUNTER3: 2923 - asm volatile("mrc p14, 0, %0, c3, c2, 0" : "=r" (val)); 2924 - break; 2925 - } 2926 - 2927 - return val; 2928 - } 2929 - 2930 - static inline void 2931 - xscale2pmu_write_counter(int counter, u32 val) 2932 - { 2933 - switch (counter) { 2934 - case XSCALE_CYCLE_COUNTER: 2935 - asm volatile("mcr p14, 0, %0, c1, c1, 0" : : "r" (val)); 2936 - break; 2937 - case XSCALE_COUNTER0: 2938 - asm volatile("mcr p14, 0, %0, c0, c2, 0" : : "r" (val)); 2939 - break; 2940 - case XSCALE_COUNTER1: 2941 - asm volatile("mcr p14, 0, %0, c1, c2, 0" : : "r" (val)); 2942 - break; 2943 - case XSCALE_COUNTER2: 2944 - asm volatile("mcr p14, 0, %0, c2, c2, 0" : : "r" (val)); 2945 - break; 2946 - case XSCALE_COUNTER3: 2947 - asm volatile("mcr p14, 0, %0, c3, c2, 0" : : "r" (val)); 2948 - break; 2949 - } 2950 - } 2951 - 2952 - static const struct arm_pmu xscale2pmu = { 2953 - .id = ARM_PERF_PMU_ID_XSCALE2, 2954 - .handle_irq = xscale2pmu_handle_irq, 2955 - .enable = xscale2pmu_enable_event, 2956 - .disable = xscale2pmu_disable_event, 2957 - .event_map = xscalepmu_event_map, 2958 - .raw_event = xscalepmu_raw_event, 2959 - .read_counter = xscale2pmu_read_counter, 2960 - .write_counter = xscale2pmu_write_counter, 2961 - .get_event_idx = xscale2pmu_get_event_idx, 2962 - .start = xscale2pmu_start, 2963 - .stop = xscale2pmu_stop, 2964 - .num_events = 5, 2965 - .max_period = (1LLU << 32) - 1, 2966 - }; 606 + /* Include the PMU-specific implementations. */ 607 + #include "perf_event_xscale.c" 608 + #include "perf_event_v6.c" 609 + #include "perf_event_v7.c" 2967 610 2968 611 static int __init 2969 612 init_hw_perf_events(void) ··· 622 2977 case 0xB360: /* ARM1136 */ 623 2978 case 0xB560: /* ARM1156 */ 624 2979 case 0xB760: /* ARM1176 */ 625 - armpmu = &armv6pmu; 626 - memcpy(armpmu_perf_cache_map, armv6_perf_cache_map, 627 - sizeof(armv6_perf_cache_map)); 2980 + armpmu = armv6pmu_init(); 628 2981 break; 629 2982 case 0xB020: /* ARM11mpcore */ 630 - armpmu = &armv6mpcore_pmu; 631 - memcpy(armpmu_perf_cache_map, 632 - armv6mpcore_perf_cache_map, 633 - sizeof(armv6mpcore_perf_cache_map)); 2983 + armpmu = armv6mpcore_pmu_init(); 634 2984 break; 635 2985 case 0xC080: /* Cortex-A8 */ 636 - armv7pmu.id = ARM_PERF_PMU_ID_CA8; 637 - memcpy(armpmu_perf_cache_map, armv7_a8_perf_cache_map, 638 - sizeof(armv7_a8_perf_cache_map)); 639 - armv7pmu.event_map = armv7_a8_pmu_event_map; 640 - armpmu = &armv7pmu; 641 - 642 - /* Reset PMNC and read the nb of CNTx counters 643 - supported */ 644 - armv7pmu.num_events = armv7_reset_read_pmnc(); 2986 + armpmu = armv7_a8_pmu_init(); 645 2987 break; 646 2988 case 0xC090: /* Cortex-A9 */ 647 - armv7pmu.id = ARM_PERF_PMU_ID_CA9; 648 - memcpy(armpmu_perf_cache_map, armv7_a9_perf_cache_map, 649 - sizeof(armv7_a9_perf_cache_map)); 650 - armv7pmu.event_map = armv7_a9_pmu_event_map; 651 - armpmu = &armv7pmu; 652 - 653 - /* Reset PMNC and read the nb of CNTx counters 654 - supported */ 655 - armv7pmu.num_events = armv7_reset_read_pmnc(); 2989 + armpmu = armv7_a9_pmu_init(); 656 2990 break; 657 2991 } 658 2992 /* Intel CPUs [xscale]. */ ··· 639 3015 part_number = (cpuid >> 13) & 0x7; 640 3016 switch (part_number) { 641 3017 case 1: 642 - armpmu = &xscale1pmu; 643 - memcpy(armpmu_perf_cache_map, xscale_perf_cache_map, 644 - sizeof(xscale_perf_cache_map)); 3018 + armpmu = xscale1pmu_init(); 645 3019 break; 646 3020 case 2: 647 - armpmu = &xscale2pmu; 648 - memcpy(armpmu_perf_cache_map, xscale_perf_cache_map, 649 - sizeof(xscale_perf_cache_map)); 3021 + armpmu = xscale2pmu_init(); 650 3022 break; 651 3023 } 652 3024 } 653 3025 654 3026 if (armpmu) { 655 3027 pr_info("enabled with %s PMU driver, %d counters available\n", 656 - arm_pmu_names[armpmu->id], armpmu->num_events); 3028 + armpmu->name, armpmu->num_events); 657 3029 } else { 658 3030 pr_info("no hardware support available\n"); 659 3031 }
+672
arch/arm/kernel/perf_event_v6.c
··· 1 + /* 2 + * ARMv6 Performance counter handling code. 3 + * 4 + * Copyright (C) 2009 picoChip Designs, Ltd., Jamie Iles 5 + * 6 + * ARMv6 has 2 configurable performance counters and a single cycle counter. 7 + * They all share a single reset bit but can be written to zero so we can use 8 + * that for a reset. 9 + * 10 + * The counters can't be individually enabled or disabled so when we remove 11 + * one event and replace it with another we could get spurious counts from the 12 + * wrong event. However, we can take advantage of the fact that the 13 + * performance counters can export events to the event bus, and the event bus 14 + * itself can be monitored. This requires that we *don't* export the events to 15 + * the event bus. The procedure for disabling a configurable counter is: 16 + * - change the counter to count the ETMEXTOUT[0] signal (0x20). This 17 + * effectively stops the counter from counting. 18 + * - disable the counter's interrupt generation (each counter has it's 19 + * own interrupt enable bit). 20 + * Once stopped, the counter value can be written as 0 to reset. 21 + * 22 + * To enable a counter: 23 + * - enable the counter's interrupt generation. 24 + * - set the new event type. 25 + * 26 + * Note: the dedicated cycle counter only counts cycles and can't be 27 + * enabled/disabled independently of the others. When we want to disable the 28 + * cycle counter, we have to just disable the interrupt reporting and start 29 + * ignoring that counter. When re-enabling, we have to reset the value and 30 + * enable the interrupt. 31 + */ 32 + 33 + #ifdef CONFIG_CPU_V6 34 + enum armv6_perf_types { 35 + ARMV6_PERFCTR_ICACHE_MISS = 0x0, 36 + ARMV6_PERFCTR_IBUF_STALL = 0x1, 37 + ARMV6_PERFCTR_DDEP_STALL = 0x2, 38 + ARMV6_PERFCTR_ITLB_MISS = 0x3, 39 + ARMV6_PERFCTR_DTLB_MISS = 0x4, 40 + ARMV6_PERFCTR_BR_EXEC = 0x5, 41 + ARMV6_PERFCTR_BR_MISPREDICT = 0x6, 42 + ARMV6_PERFCTR_INSTR_EXEC = 0x7, 43 + ARMV6_PERFCTR_DCACHE_HIT = 0x9, 44 + ARMV6_PERFCTR_DCACHE_ACCESS = 0xA, 45 + ARMV6_PERFCTR_DCACHE_MISS = 0xB, 46 + ARMV6_PERFCTR_DCACHE_WBACK = 0xC, 47 + ARMV6_PERFCTR_SW_PC_CHANGE = 0xD, 48 + ARMV6_PERFCTR_MAIN_TLB_MISS = 0xF, 49 + ARMV6_PERFCTR_EXPL_D_ACCESS = 0x10, 50 + ARMV6_PERFCTR_LSU_FULL_STALL = 0x11, 51 + ARMV6_PERFCTR_WBUF_DRAINED = 0x12, 52 + ARMV6_PERFCTR_CPU_CYCLES = 0xFF, 53 + ARMV6_PERFCTR_NOP = 0x20, 54 + }; 55 + 56 + enum armv6_counters { 57 + ARMV6_CYCLE_COUNTER = 1, 58 + ARMV6_COUNTER0, 59 + ARMV6_COUNTER1, 60 + }; 61 + 62 + /* 63 + * The hardware events that we support. We do support cache operations but 64 + * we have harvard caches and no way to combine instruction and data 65 + * accesses/misses in hardware. 66 + */ 67 + static const unsigned armv6_perf_map[PERF_COUNT_HW_MAX] = { 68 + [PERF_COUNT_HW_CPU_CYCLES] = ARMV6_PERFCTR_CPU_CYCLES, 69 + [PERF_COUNT_HW_INSTRUCTIONS] = ARMV6_PERFCTR_INSTR_EXEC, 70 + [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED, 71 + [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED, 72 + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV6_PERFCTR_BR_EXEC, 73 + [PERF_COUNT_HW_BRANCH_MISSES] = ARMV6_PERFCTR_BR_MISPREDICT, 74 + [PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED, 75 + }; 76 + 77 + static const unsigned armv6_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] 78 + [PERF_COUNT_HW_CACHE_OP_MAX] 79 + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { 80 + [C(L1D)] = { 81 + /* 82 + * The performance counters don't differentiate between read 83 + * and write accesses/misses so this isn't strictly correct, 84 + * but it's the best we can do. Writes and reads get 85 + * combined. 86 + */ 87 + [C(OP_READ)] = { 88 + [C(RESULT_ACCESS)] = ARMV6_PERFCTR_DCACHE_ACCESS, 89 + [C(RESULT_MISS)] = ARMV6_PERFCTR_DCACHE_MISS, 90 + }, 91 + [C(OP_WRITE)] = { 92 + [C(RESULT_ACCESS)] = ARMV6_PERFCTR_DCACHE_ACCESS, 93 + [C(RESULT_MISS)] = ARMV6_PERFCTR_DCACHE_MISS, 94 + }, 95 + [C(OP_PREFETCH)] = { 96 + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 97 + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 98 + }, 99 + }, 100 + [C(L1I)] = { 101 + [C(OP_READ)] = { 102 + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 103 + [C(RESULT_MISS)] = ARMV6_PERFCTR_ICACHE_MISS, 104 + }, 105 + [C(OP_WRITE)] = { 106 + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 107 + [C(RESULT_MISS)] = ARMV6_PERFCTR_ICACHE_MISS, 108 + }, 109 + [C(OP_PREFETCH)] = { 110 + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 111 + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 112 + }, 113 + }, 114 + [C(LL)] = { 115 + [C(OP_READ)] = { 116 + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 117 + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 118 + }, 119 + [C(OP_WRITE)] = { 120 + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 121 + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 122 + }, 123 + [C(OP_PREFETCH)] = { 124 + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 125 + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 126 + }, 127 + }, 128 + [C(DTLB)] = { 129 + /* 130 + * The ARM performance counters can count micro DTLB misses, 131 + * micro ITLB misses and main TLB misses. There isn't an event 132 + * for TLB misses, so use the micro misses here and if users 133 + * want the main TLB misses they can use a raw counter. 134 + */ 135 + [C(OP_READ)] = { 136 + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 137 + [C(RESULT_MISS)] = ARMV6_PERFCTR_DTLB_MISS, 138 + }, 139 + [C(OP_WRITE)] = { 140 + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 141 + [C(RESULT_MISS)] = ARMV6_PERFCTR_DTLB_MISS, 142 + }, 143 + [C(OP_PREFETCH)] = { 144 + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 145 + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 146 + }, 147 + }, 148 + [C(ITLB)] = { 149 + [C(OP_READ)] = { 150 + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 151 + [C(RESULT_MISS)] = ARMV6_PERFCTR_ITLB_MISS, 152 + }, 153 + [C(OP_WRITE)] = { 154 + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 155 + [C(RESULT_MISS)] = ARMV6_PERFCTR_ITLB_MISS, 156 + }, 157 + [C(OP_PREFETCH)] = { 158 + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 159 + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 160 + }, 161 + }, 162 + [C(BPU)] = { 163 + [C(OP_READ)] = { 164 + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 165 + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 166 + }, 167 + [C(OP_WRITE)] = { 168 + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 169 + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 170 + }, 171 + [C(OP_PREFETCH)] = { 172 + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 173 + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 174 + }, 175 + }, 176 + }; 177 + 178 + enum armv6mpcore_perf_types { 179 + ARMV6MPCORE_PERFCTR_ICACHE_MISS = 0x0, 180 + ARMV6MPCORE_PERFCTR_IBUF_STALL = 0x1, 181 + ARMV6MPCORE_PERFCTR_DDEP_STALL = 0x2, 182 + ARMV6MPCORE_PERFCTR_ITLB_MISS = 0x3, 183 + ARMV6MPCORE_PERFCTR_DTLB_MISS = 0x4, 184 + ARMV6MPCORE_PERFCTR_BR_EXEC = 0x5, 185 + ARMV6MPCORE_PERFCTR_BR_NOTPREDICT = 0x6, 186 + ARMV6MPCORE_PERFCTR_BR_MISPREDICT = 0x7, 187 + ARMV6MPCORE_PERFCTR_INSTR_EXEC = 0x8, 188 + ARMV6MPCORE_PERFCTR_DCACHE_RDACCESS = 0xA, 189 + ARMV6MPCORE_PERFCTR_DCACHE_RDMISS = 0xB, 190 + ARMV6MPCORE_PERFCTR_DCACHE_WRACCESS = 0xC, 191 + ARMV6MPCORE_PERFCTR_DCACHE_WRMISS = 0xD, 192 + ARMV6MPCORE_PERFCTR_DCACHE_EVICTION = 0xE, 193 + ARMV6MPCORE_PERFCTR_SW_PC_CHANGE = 0xF, 194 + ARMV6MPCORE_PERFCTR_MAIN_TLB_MISS = 0x10, 195 + ARMV6MPCORE_PERFCTR_EXPL_MEM_ACCESS = 0x11, 196 + ARMV6MPCORE_PERFCTR_LSU_FULL_STALL = 0x12, 197 + ARMV6MPCORE_PERFCTR_WBUF_DRAINED = 0x13, 198 + ARMV6MPCORE_PERFCTR_CPU_CYCLES = 0xFF, 199 + }; 200 + 201 + /* 202 + * The hardware events that we support. We do support cache operations but 203 + * we have harvard caches and no way to combine instruction and data 204 + * accesses/misses in hardware. 205 + */ 206 + static const unsigned armv6mpcore_perf_map[PERF_COUNT_HW_MAX] = { 207 + [PERF_COUNT_HW_CPU_CYCLES] = ARMV6MPCORE_PERFCTR_CPU_CYCLES, 208 + [PERF_COUNT_HW_INSTRUCTIONS] = ARMV6MPCORE_PERFCTR_INSTR_EXEC, 209 + [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED, 210 + [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED, 211 + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV6MPCORE_PERFCTR_BR_EXEC, 212 + [PERF_COUNT_HW_BRANCH_MISSES] = ARMV6MPCORE_PERFCTR_BR_MISPREDICT, 213 + [PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED, 214 + }; 215 + 216 + static const unsigned armv6mpcore_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] 217 + [PERF_COUNT_HW_CACHE_OP_MAX] 218 + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { 219 + [C(L1D)] = { 220 + [C(OP_READ)] = { 221 + [C(RESULT_ACCESS)] = 222 + ARMV6MPCORE_PERFCTR_DCACHE_RDACCESS, 223 + [C(RESULT_MISS)] = 224 + ARMV6MPCORE_PERFCTR_DCACHE_RDMISS, 225 + }, 226 + [C(OP_WRITE)] = { 227 + [C(RESULT_ACCESS)] = 228 + ARMV6MPCORE_PERFCTR_DCACHE_WRACCESS, 229 + [C(RESULT_MISS)] = 230 + ARMV6MPCORE_PERFCTR_DCACHE_WRMISS, 231 + }, 232 + [C(OP_PREFETCH)] = { 233 + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 234 + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 235 + }, 236 + }, 237 + [C(L1I)] = { 238 + [C(OP_READ)] = { 239 + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 240 + [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ICACHE_MISS, 241 + }, 242 + [C(OP_WRITE)] = { 243 + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 244 + [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ICACHE_MISS, 245 + }, 246 + [C(OP_PREFETCH)] = { 247 + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 248 + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 249 + }, 250 + }, 251 + [C(LL)] = { 252 + [C(OP_READ)] = { 253 + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 254 + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 255 + }, 256 + [C(OP_WRITE)] = { 257 + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 258 + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 259 + }, 260 + [C(OP_PREFETCH)] = { 261 + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 262 + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 263 + }, 264 + }, 265 + [C(DTLB)] = { 266 + /* 267 + * The ARM performance counters can count micro DTLB misses, 268 + * micro ITLB misses and main TLB misses. There isn't an event 269 + * for TLB misses, so use the micro misses here and if users 270 + * want the main TLB misses they can use a raw counter. 271 + */ 272 + [C(OP_READ)] = { 273 + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 274 + [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_DTLB_MISS, 275 + }, 276 + [C(OP_WRITE)] = { 277 + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 278 + [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_DTLB_MISS, 279 + }, 280 + [C(OP_PREFETCH)] = { 281 + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 282 + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 283 + }, 284 + }, 285 + [C(ITLB)] = { 286 + [C(OP_READ)] = { 287 + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 288 + [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ITLB_MISS, 289 + }, 290 + [C(OP_WRITE)] = { 291 + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 292 + [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ITLB_MISS, 293 + }, 294 + [C(OP_PREFETCH)] = { 295 + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 296 + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 297 + }, 298 + }, 299 + [C(BPU)] = { 300 + [C(OP_READ)] = { 301 + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 302 + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 303 + }, 304 + [C(OP_WRITE)] = { 305 + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 306 + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 307 + }, 308 + [C(OP_PREFETCH)] = { 309 + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 310 + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 311 + }, 312 + }, 313 + }; 314 + 315 + static inline unsigned long 316 + armv6_pmcr_read(void) 317 + { 318 + u32 val; 319 + asm volatile("mrc p15, 0, %0, c15, c12, 0" : "=r"(val)); 320 + return val; 321 + } 322 + 323 + static inline void 324 + armv6_pmcr_write(unsigned long val) 325 + { 326 + asm volatile("mcr p15, 0, %0, c15, c12, 0" : : "r"(val)); 327 + } 328 + 329 + #define ARMV6_PMCR_ENABLE (1 << 0) 330 + #define ARMV6_PMCR_CTR01_RESET (1 << 1) 331 + #define ARMV6_PMCR_CCOUNT_RESET (1 << 2) 332 + #define ARMV6_PMCR_CCOUNT_DIV (1 << 3) 333 + #define ARMV6_PMCR_COUNT0_IEN (1 << 4) 334 + #define ARMV6_PMCR_COUNT1_IEN (1 << 5) 335 + #define ARMV6_PMCR_CCOUNT_IEN (1 << 6) 336 + #define ARMV6_PMCR_COUNT0_OVERFLOW (1 << 8) 337 + #define ARMV6_PMCR_COUNT1_OVERFLOW (1 << 9) 338 + #define ARMV6_PMCR_CCOUNT_OVERFLOW (1 << 10) 339 + #define ARMV6_PMCR_EVT_COUNT0_SHIFT 20 340 + #define ARMV6_PMCR_EVT_COUNT0_MASK (0xFF << ARMV6_PMCR_EVT_COUNT0_SHIFT) 341 + #define ARMV6_PMCR_EVT_COUNT1_SHIFT 12 342 + #define ARMV6_PMCR_EVT_COUNT1_MASK (0xFF << ARMV6_PMCR_EVT_COUNT1_SHIFT) 343 + 344 + #define ARMV6_PMCR_OVERFLOWED_MASK \ 345 + (ARMV6_PMCR_COUNT0_OVERFLOW | ARMV6_PMCR_COUNT1_OVERFLOW | \ 346 + ARMV6_PMCR_CCOUNT_OVERFLOW) 347 + 348 + static inline int 349 + armv6_pmcr_has_overflowed(unsigned long pmcr) 350 + { 351 + return pmcr & ARMV6_PMCR_OVERFLOWED_MASK; 352 + } 353 + 354 + static inline int 355 + armv6_pmcr_counter_has_overflowed(unsigned long pmcr, 356 + enum armv6_counters counter) 357 + { 358 + int ret = 0; 359 + 360 + if (ARMV6_CYCLE_COUNTER == counter) 361 + ret = pmcr & ARMV6_PMCR_CCOUNT_OVERFLOW; 362 + else if (ARMV6_COUNTER0 == counter) 363 + ret = pmcr & ARMV6_PMCR_COUNT0_OVERFLOW; 364 + else if (ARMV6_COUNTER1 == counter) 365 + ret = pmcr & ARMV6_PMCR_COUNT1_OVERFLOW; 366 + else 367 + WARN_ONCE(1, "invalid counter number (%d)\n", counter); 368 + 369 + return ret; 370 + } 371 + 372 + static inline u32 373 + armv6pmu_read_counter(int counter) 374 + { 375 + unsigned long value = 0; 376 + 377 + if (ARMV6_CYCLE_COUNTER == counter) 378 + asm volatile("mrc p15, 0, %0, c15, c12, 1" : "=r"(value)); 379 + else if (ARMV6_COUNTER0 == counter) 380 + asm volatile("mrc p15, 0, %0, c15, c12, 2" : "=r"(value)); 381 + else if (ARMV6_COUNTER1 == counter) 382 + asm volatile("mrc p15, 0, %0, c15, c12, 3" : "=r"(value)); 383 + else 384 + WARN_ONCE(1, "invalid counter number (%d)\n", counter); 385 + 386 + return value; 387 + } 388 + 389 + static inline void 390 + armv6pmu_write_counter(int counter, 391 + u32 value) 392 + { 393 + if (ARMV6_CYCLE_COUNTER == counter) 394 + asm volatile("mcr p15, 0, %0, c15, c12, 1" : : "r"(value)); 395 + else if (ARMV6_COUNTER0 == counter) 396 + asm volatile("mcr p15, 0, %0, c15, c12, 2" : : "r"(value)); 397 + else if (ARMV6_COUNTER1 == counter) 398 + asm volatile("mcr p15, 0, %0, c15, c12, 3" : : "r"(value)); 399 + else 400 + WARN_ONCE(1, "invalid counter number (%d)\n", counter); 401 + } 402 + 403 + void 404 + armv6pmu_enable_event(struct hw_perf_event *hwc, 405 + int idx) 406 + { 407 + unsigned long val, mask, evt, flags; 408 + 409 + if (ARMV6_CYCLE_COUNTER == idx) { 410 + mask = 0; 411 + evt = ARMV6_PMCR_CCOUNT_IEN; 412 + } else if (ARMV6_COUNTER0 == idx) { 413 + mask = ARMV6_PMCR_EVT_COUNT0_MASK; 414 + evt = (hwc->config_base << ARMV6_PMCR_EVT_COUNT0_SHIFT) | 415 + ARMV6_PMCR_COUNT0_IEN; 416 + } else if (ARMV6_COUNTER1 == idx) { 417 + mask = ARMV6_PMCR_EVT_COUNT1_MASK; 418 + evt = (hwc->config_base << ARMV6_PMCR_EVT_COUNT1_SHIFT) | 419 + ARMV6_PMCR_COUNT1_IEN; 420 + } else { 421 + WARN_ONCE(1, "invalid counter number (%d)\n", idx); 422 + return; 423 + } 424 + 425 + /* 426 + * Mask out the current event and set the counter to count the event 427 + * that we're interested in. 428 + */ 429 + spin_lock_irqsave(&pmu_lock, flags); 430 + val = armv6_pmcr_read(); 431 + val &= ~mask; 432 + val |= evt; 433 + armv6_pmcr_write(val); 434 + spin_unlock_irqrestore(&pmu_lock, flags); 435 + } 436 + 437 + static irqreturn_t 438 + armv6pmu_handle_irq(int irq_num, 439 + void *dev) 440 + { 441 + unsigned long pmcr = armv6_pmcr_read(); 442 + struct perf_sample_data data; 443 + struct cpu_hw_events *cpuc; 444 + struct pt_regs *regs; 445 + int idx; 446 + 447 + if (!armv6_pmcr_has_overflowed(pmcr)) 448 + return IRQ_NONE; 449 + 450 + regs = get_irq_regs(); 451 + 452 + /* 453 + * The interrupts are cleared by writing the overflow flags back to 454 + * the control register. All of the other bits don't have any effect 455 + * if they are rewritten, so write the whole value back. 456 + */ 457 + armv6_pmcr_write(pmcr); 458 + 459 + perf_sample_data_init(&data, 0); 460 + 461 + cpuc = &__get_cpu_var(cpu_hw_events); 462 + for (idx = 0; idx <= armpmu->num_events; ++idx) { 463 + struct perf_event *event = cpuc->events[idx]; 464 + struct hw_perf_event *hwc; 465 + 466 + if (!test_bit(idx, cpuc->active_mask)) 467 + continue; 468 + 469 + /* 470 + * We have a single interrupt for all counters. Check that 471 + * each counter has overflowed before we process it. 472 + */ 473 + if (!armv6_pmcr_counter_has_overflowed(pmcr, idx)) 474 + continue; 475 + 476 + hwc = &event->hw; 477 + armpmu_event_update(event, hwc, idx); 478 + data.period = event->hw.last_period; 479 + if (!armpmu_event_set_period(event, hwc, idx)) 480 + continue; 481 + 482 + if (perf_event_overflow(event, 0, &data, regs)) 483 + armpmu->disable(hwc, idx); 484 + } 485 + 486 + /* 487 + * Handle the pending perf events. 488 + * 489 + * Note: this call *must* be run with interrupts disabled. For 490 + * platforms that can have the PMU interrupts raised as an NMI, this 491 + * will not work. 492 + */ 493 + irq_work_run(); 494 + 495 + return IRQ_HANDLED; 496 + } 497 + 498 + static void 499 + armv6pmu_start(void) 500 + { 501 + unsigned long flags, val; 502 + 503 + spin_lock_irqsave(&pmu_lock, flags); 504 + val = armv6_pmcr_read(); 505 + val |= ARMV6_PMCR_ENABLE; 506 + armv6_pmcr_write(val); 507 + spin_unlock_irqrestore(&pmu_lock, flags); 508 + } 509 + 510 + static void 511 + armv6pmu_stop(void) 512 + { 513 + unsigned long flags, val; 514 + 515 + spin_lock_irqsave(&pmu_lock, flags); 516 + val = armv6_pmcr_read(); 517 + val &= ~ARMV6_PMCR_ENABLE; 518 + armv6_pmcr_write(val); 519 + spin_unlock_irqrestore(&pmu_lock, flags); 520 + } 521 + 522 + static int 523 + armv6pmu_get_event_idx(struct cpu_hw_events *cpuc, 524 + struct hw_perf_event *event) 525 + { 526 + /* Always place a cycle counter into the cycle counter. */ 527 + if (ARMV6_PERFCTR_CPU_CYCLES == event->config_base) { 528 + if (test_and_set_bit(ARMV6_CYCLE_COUNTER, cpuc->used_mask)) 529 + return -EAGAIN; 530 + 531 + return ARMV6_CYCLE_COUNTER; 532 + } else { 533 + /* 534 + * For anything other than a cycle counter, try and use 535 + * counter0 and counter1. 536 + */ 537 + if (!test_and_set_bit(ARMV6_COUNTER1, cpuc->used_mask)) 538 + return ARMV6_COUNTER1; 539 + 540 + if (!test_and_set_bit(ARMV6_COUNTER0, cpuc->used_mask)) 541 + return ARMV6_COUNTER0; 542 + 543 + /* The counters are all in use. */ 544 + return -EAGAIN; 545 + } 546 + } 547 + 548 + static void 549 + armv6pmu_disable_event(struct hw_perf_event *hwc, 550 + int idx) 551 + { 552 + unsigned long val, mask, evt, flags; 553 + 554 + if (ARMV6_CYCLE_COUNTER == idx) { 555 + mask = ARMV6_PMCR_CCOUNT_IEN; 556 + evt = 0; 557 + } else if (ARMV6_COUNTER0 == idx) { 558 + mask = ARMV6_PMCR_COUNT0_IEN | ARMV6_PMCR_EVT_COUNT0_MASK; 559 + evt = ARMV6_PERFCTR_NOP << ARMV6_PMCR_EVT_COUNT0_SHIFT; 560 + } else if (ARMV6_COUNTER1 == idx) { 561 + mask = ARMV6_PMCR_COUNT1_IEN | ARMV6_PMCR_EVT_COUNT1_MASK; 562 + evt = ARMV6_PERFCTR_NOP << ARMV6_PMCR_EVT_COUNT1_SHIFT; 563 + } else { 564 + WARN_ONCE(1, "invalid counter number (%d)\n", idx); 565 + return; 566 + } 567 + 568 + /* 569 + * Mask out the current event and set the counter to count the number 570 + * of ETM bus signal assertion cycles. The external reporting should 571 + * be disabled and so this should never increment. 572 + */ 573 + spin_lock_irqsave(&pmu_lock, flags); 574 + val = armv6_pmcr_read(); 575 + val &= ~mask; 576 + val |= evt; 577 + armv6_pmcr_write(val); 578 + spin_unlock_irqrestore(&pmu_lock, flags); 579 + } 580 + 581 + static void 582 + armv6mpcore_pmu_disable_event(struct hw_perf_event *hwc, 583 + int idx) 584 + { 585 + unsigned long val, mask, flags, evt = 0; 586 + 587 + if (ARMV6_CYCLE_COUNTER == idx) { 588 + mask = ARMV6_PMCR_CCOUNT_IEN; 589 + } else if (ARMV6_COUNTER0 == idx) { 590 + mask = ARMV6_PMCR_COUNT0_IEN; 591 + } else if (ARMV6_COUNTER1 == idx) { 592 + mask = ARMV6_PMCR_COUNT1_IEN; 593 + } else { 594 + WARN_ONCE(1, "invalid counter number (%d)\n", idx); 595 + return; 596 + } 597 + 598 + /* 599 + * Unlike UP ARMv6, we don't have a way of stopping the counters. We 600 + * simply disable the interrupt reporting. 601 + */ 602 + spin_lock_irqsave(&pmu_lock, flags); 603 + val = armv6_pmcr_read(); 604 + val &= ~mask; 605 + val |= evt; 606 + armv6_pmcr_write(val); 607 + spin_unlock_irqrestore(&pmu_lock, flags); 608 + } 609 + 610 + static const struct arm_pmu armv6pmu = { 611 + .id = ARM_PERF_PMU_ID_V6, 612 + .name = "v6", 613 + .handle_irq = armv6pmu_handle_irq, 614 + .enable = armv6pmu_enable_event, 615 + .disable = armv6pmu_disable_event, 616 + .read_counter = armv6pmu_read_counter, 617 + .write_counter = armv6pmu_write_counter, 618 + .get_event_idx = armv6pmu_get_event_idx, 619 + .start = armv6pmu_start, 620 + .stop = armv6pmu_stop, 621 + .cache_map = &armv6_perf_cache_map, 622 + .event_map = &armv6_perf_map, 623 + .raw_event_mask = 0xFF, 624 + .num_events = 3, 625 + .max_period = (1LLU << 32) - 1, 626 + }; 627 + 628 + const struct arm_pmu *__init armv6pmu_init(void) 629 + { 630 + return &armv6pmu; 631 + } 632 + 633 + /* 634 + * ARMv6mpcore is almost identical to single core ARMv6 with the exception 635 + * that some of the events have different enumerations and that there is no 636 + * *hack* to stop the programmable counters. To stop the counters we simply 637 + * disable the interrupt reporting and update the event. When unthrottling we 638 + * reset the period and enable the interrupt reporting. 639 + */ 640 + static const struct arm_pmu armv6mpcore_pmu = { 641 + .id = ARM_PERF_PMU_ID_V6MP, 642 + .name = "v6mpcore", 643 + .handle_irq = armv6pmu_handle_irq, 644 + .enable = armv6pmu_enable_event, 645 + .disable = armv6mpcore_pmu_disable_event, 646 + .read_counter = armv6pmu_read_counter, 647 + .write_counter = armv6pmu_write_counter, 648 + .get_event_idx = armv6pmu_get_event_idx, 649 + .start = armv6pmu_start, 650 + .stop = armv6pmu_stop, 651 + .cache_map = &armv6mpcore_perf_cache_map, 652 + .event_map = &armv6mpcore_perf_map, 653 + .raw_event_mask = 0xFF, 654 + .num_events = 3, 655 + .max_period = (1LLU << 32) - 1, 656 + }; 657 + 658 + const struct arm_pmu *__init armv6mpcore_pmu_init(void) 659 + { 660 + return &armv6mpcore_pmu; 661 + } 662 + #else 663 + const struct arm_pmu *__init armv6pmu_init(void) 664 + { 665 + return NULL; 666 + } 667 + 668 + const struct arm_pmu *__init armv6mpcore_pmu_init(void) 669 + { 670 + return NULL; 671 + } 672 + #endif /* CONFIG_CPU_V6 */
+906
arch/arm/kernel/perf_event_v7.c
··· 1 + /* 2 + * ARMv7 Cortex-A8 and Cortex-A9 Performance Events handling code. 3 + * 4 + * ARMv7 support: Jean Pihet <jpihet@mvista.com> 5 + * 2010 (c) MontaVista Software, LLC. 6 + * 7 + * Copied from ARMv6 code, with the low level code inspired 8 + * by the ARMv7 Oprofile code. 9 + * 10 + * Cortex-A8 has up to 4 configurable performance counters and 11 + * a single cycle counter. 12 + * Cortex-A9 has up to 31 configurable performance counters and 13 + * a single cycle counter. 14 + * 15 + * All counters can be enabled/disabled and IRQ masked separately. The cycle 16 + * counter and all 4 performance counters together can be reset separately. 17 + */ 18 + 19 + #ifdef CONFIG_CPU_V7 20 + /* Common ARMv7 event types */ 21 + enum armv7_perf_types { 22 + ARMV7_PERFCTR_PMNC_SW_INCR = 0x00, 23 + ARMV7_PERFCTR_IFETCH_MISS = 0x01, 24 + ARMV7_PERFCTR_ITLB_MISS = 0x02, 25 + ARMV7_PERFCTR_DCACHE_REFILL = 0x03, 26 + ARMV7_PERFCTR_DCACHE_ACCESS = 0x04, 27 + ARMV7_PERFCTR_DTLB_REFILL = 0x05, 28 + ARMV7_PERFCTR_DREAD = 0x06, 29 + ARMV7_PERFCTR_DWRITE = 0x07, 30 + 31 + ARMV7_PERFCTR_EXC_TAKEN = 0x09, 32 + ARMV7_PERFCTR_EXC_EXECUTED = 0x0A, 33 + ARMV7_PERFCTR_CID_WRITE = 0x0B, 34 + /* ARMV7_PERFCTR_PC_WRITE is equivalent to HW_BRANCH_INSTRUCTIONS. 35 + * It counts: 36 + * - all branch instructions, 37 + * - instructions that explicitly write the PC, 38 + * - exception generating instructions. 39 + */ 40 + ARMV7_PERFCTR_PC_WRITE = 0x0C, 41 + ARMV7_PERFCTR_PC_IMM_BRANCH = 0x0D, 42 + ARMV7_PERFCTR_UNALIGNED_ACCESS = 0x0F, 43 + ARMV7_PERFCTR_PC_BRANCH_MIS_PRED = 0x10, 44 + ARMV7_PERFCTR_CLOCK_CYCLES = 0x11, 45 + 46 + ARMV7_PERFCTR_PC_BRANCH_MIS_USED = 0x12, 47 + 48 + ARMV7_PERFCTR_CPU_CYCLES = 0xFF 49 + }; 50 + 51 + /* ARMv7 Cortex-A8 specific event types */ 52 + enum armv7_a8_perf_types { 53 + ARMV7_PERFCTR_INSTR_EXECUTED = 0x08, 54 + 55 + ARMV7_PERFCTR_PC_PROC_RETURN = 0x0E, 56 + 57 + ARMV7_PERFCTR_WRITE_BUFFER_FULL = 0x40, 58 + ARMV7_PERFCTR_L2_STORE_MERGED = 0x41, 59 + ARMV7_PERFCTR_L2_STORE_BUFF = 0x42, 60 + ARMV7_PERFCTR_L2_ACCESS = 0x43, 61 + ARMV7_PERFCTR_L2_CACH_MISS = 0x44, 62 + ARMV7_PERFCTR_AXI_READ_CYCLES = 0x45, 63 + ARMV7_PERFCTR_AXI_WRITE_CYCLES = 0x46, 64 + ARMV7_PERFCTR_MEMORY_REPLAY = 0x47, 65 + ARMV7_PERFCTR_UNALIGNED_ACCESS_REPLAY = 0x48, 66 + ARMV7_PERFCTR_L1_DATA_MISS = 0x49, 67 + ARMV7_PERFCTR_L1_INST_MISS = 0x4A, 68 + ARMV7_PERFCTR_L1_DATA_COLORING = 0x4B, 69 + ARMV7_PERFCTR_L1_NEON_DATA = 0x4C, 70 + ARMV7_PERFCTR_L1_NEON_CACH_DATA = 0x4D, 71 + ARMV7_PERFCTR_L2_NEON = 0x4E, 72 + ARMV7_PERFCTR_L2_NEON_HIT = 0x4F, 73 + ARMV7_PERFCTR_L1_INST = 0x50, 74 + ARMV7_PERFCTR_PC_RETURN_MIS_PRED = 0x51, 75 + ARMV7_PERFCTR_PC_BRANCH_FAILED = 0x52, 76 + ARMV7_PERFCTR_PC_BRANCH_TAKEN = 0x53, 77 + ARMV7_PERFCTR_PC_BRANCH_EXECUTED = 0x54, 78 + ARMV7_PERFCTR_OP_EXECUTED = 0x55, 79 + ARMV7_PERFCTR_CYCLES_INST_STALL = 0x56, 80 + ARMV7_PERFCTR_CYCLES_INST = 0x57, 81 + ARMV7_PERFCTR_CYCLES_NEON_DATA_STALL = 0x58, 82 + ARMV7_PERFCTR_CYCLES_NEON_INST_STALL = 0x59, 83 + ARMV7_PERFCTR_NEON_CYCLES = 0x5A, 84 + 85 + ARMV7_PERFCTR_PMU0_EVENTS = 0x70, 86 + ARMV7_PERFCTR_PMU1_EVENTS = 0x71, 87 + ARMV7_PERFCTR_PMU_EVENTS = 0x72, 88 + }; 89 + 90 + /* ARMv7 Cortex-A9 specific event types */ 91 + enum armv7_a9_perf_types { 92 + ARMV7_PERFCTR_JAVA_HW_BYTECODE_EXEC = 0x40, 93 + ARMV7_PERFCTR_JAVA_SW_BYTECODE_EXEC = 0x41, 94 + ARMV7_PERFCTR_JAZELLE_BRANCH_EXEC = 0x42, 95 + 96 + ARMV7_PERFCTR_COHERENT_LINE_MISS = 0x50, 97 + ARMV7_PERFCTR_COHERENT_LINE_HIT = 0x51, 98 + 99 + ARMV7_PERFCTR_ICACHE_DEP_STALL_CYCLES = 0x60, 100 + ARMV7_PERFCTR_DCACHE_DEP_STALL_CYCLES = 0x61, 101 + ARMV7_PERFCTR_TLB_MISS_DEP_STALL_CYCLES = 0x62, 102 + ARMV7_PERFCTR_STREX_EXECUTED_PASSED = 0x63, 103 + ARMV7_PERFCTR_STREX_EXECUTED_FAILED = 0x64, 104 + ARMV7_PERFCTR_DATA_EVICTION = 0x65, 105 + ARMV7_PERFCTR_ISSUE_STAGE_NO_INST = 0x66, 106 + ARMV7_PERFCTR_ISSUE_STAGE_EMPTY = 0x67, 107 + ARMV7_PERFCTR_INST_OUT_OF_RENAME_STAGE = 0x68, 108 + 109 + ARMV7_PERFCTR_PREDICTABLE_FUNCT_RETURNS = 0x6E, 110 + 111 + ARMV7_PERFCTR_MAIN_UNIT_EXECUTED_INST = 0x70, 112 + ARMV7_PERFCTR_SECOND_UNIT_EXECUTED_INST = 0x71, 113 + ARMV7_PERFCTR_LD_ST_UNIT_EXECUTED_INST = 0x72, 114 + ARMV7_PERFCTR_FP_EXECUTED_INST = 0x73, 115 + ARMV7_PERFCTR_NEON_EXECUTED_INST = 0x74, 116 + 117 + ARMV7_PERFCTR_PLD_FULL_DEP_STALL_CYCLES = 0x80, 118 + ARMV7_PERFCTR_DATA_WR_DEP_STALL_CYCLES = 0x81, 119 + ARMV7_PERFCTR_ITLB_MISS_DEP_STALL_CYCLES = 0x82, 120 + ARMV7_PERFCTR_DTLB_MISS_DEP_STALL_CYCLES = 0x83, 121 + ARMV7_PERFCTR_MICRO_ITLB_MISS_DEP_STALL_CYCLES = 0x84, 122 + ARMV7_PERFCTR_MICRO_DTLB_MISS_DEP_STALL_CYCLES = 0x85, 123 + ARMV7_PERFCTR_DMB_DEP_STALL_CYCLES = 0x86, 124 + 125 + ARMV7_PERFCTR_INTGR_CLK_ENABLED_CYCLES = 0x8A, 126 + ARMV7_PERFCTR_DATA_ENGINE_CLK_EN_CYCLES = 0x8B, 127 + 128 + ARMV7_PERFCTR_ISB_INST = 0x90, 129 + ARMV7_PERFCTR_DSB_INST = 0x91, 130 + ARMV7_PERFCTR_DMB_INST = 0x92, 131 + ARMV7_PERFCTR_EXT_INTERRUPTS = 0x93, 132 + 133 + ARMV7_PERFCTR_PLE_CACHE_LINE_RQST_COMPLETED = 0xA0, 134 + ARMV7_PERFCTR_PLE_CACHE_LINE_RQST_SKIPPED = 0xA1, 135 + ARMV7_PERFCTR_PLE_FIFO_FLUSH = 0xA2, 136 + ARMV7_PERFCTR_PLE_RQST_COMPLETED = 0xA3, 137 + ARMV7_PERFCTR_PLE_FIFO_OVERFLOW = 0xA4, 138 + ARMV7_PERFCTR_PLE_RQST_PROG = 0xA5 139 + }; 140 + 141 + /* 142 + * Cortex-A8 HW events mapping 143 + * 144 + * The hardware events that we support. We do support cache operations but 145 + * we have harvard caches and no way to combine instruction and data 146 + * accesses/misses in hardware. 147 + */ 148 + static const unsigned armv7_a8_perf_map[PERF_COUNT_HW_MAX] = { 149 + [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES, 150 + [PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_PERFCTR_INSTR_EXECUTED, 151 + [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED, 152 + [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED, 153 + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE, 154 + [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, 155 + [PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_CLOCK_CYCLES, 156 + }; 157 + 158 + static const unsigned armv7_a8_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] 159 + [PERF_COUNT_HW_CACHE_OP_MAX] 160 + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { 161 + [C(L1D)] = { 162 + /* 163 + * The performance counters don't differentiate between read 164 + * and write accesses/misses so this isn't strictly correct, 165 + * but it's the best we can do. Writes and reads get 166 + * combined. 167 + */ 168 + [C(OP_READ)] = { 169 + [C(RESULT_ACCESS)] = ARMV7_PERFCTR_DCACHE_ACCESS, 170 + [C(RESULT_MISS)] = ARMV7_PERFCTR_DCACHE_REFILL, 171 + }, 172 + [C(OP_WRITE)] = { 173 + [C(RESULT_ACCESS)] = ARMV7_PERFCTR_DCACHE_ACCESS, 174 + [C(RESULT_MISS)] = ARMV7_PERFCTR_DCACHE_REFILL, 175 + }, 176 + [C(OP_PREFETCH)] = { 177 + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 178 + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 179 + }, 180 + }, 181 + [C(L1I)] = { 182 + [C(OP_READ)] = { 183 + [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_INST, 184 + [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_INST_MISS, 185 + }, 186 + [C(OP_WRITE)] = { 187 + [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_INST, 188 + [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_INST_MISS, 189 + }, 190 + [C(OP_PREFETCH)] = { 191 + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 192 + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 193 + }, 194 + }, 195 + [C(LL)] = { 196 + [C(OP_READ)] = { 197 + [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L2_ACCESS, 198 + [C(RESULT_MISS)] = ARMV7_PERFCTR_L2_CACH_MISS, 199 + }, 200 + [C(OP_WRITE)] = { 201 + [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L2_ACCESS, 202 + [C(RESULT_MISS)] = ARMV7_PERFCTR_L2_CACH_MISS, 203 + }, 204 + [C(OP_PREFETCH)] = { 205 + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 206 + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 207 + }, 208 + }, 209 + [C(DTLB)] = { 210 + /* 211 + * Only ITLB misses and DTLB refills are supported. 212 + * If users want the DTLB refills misses a raw counter 213 + * must be used. 214 + */ 215 + [C(OP_READ)] = { 216 + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 217 + [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, 218 + }, 219 + [C(OP_WRITE)] = { 220 + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 221 + [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, 222 + }, 223 + [C(OP_PREFETCH)] = { 224 + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 225 + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 226 + }, 227 + }, 228 + [C(ITLB)] = { 229 + [C(OP_READ)] = { 230 + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 231 + [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_MISS, 232 + }, 233 + [C(OP_WRITE)] = { 234 + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 235 + [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_MISS, 236 + }, 237 + [C(OP_PREFETCH)] = { 238 + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 239 + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 240 + }, 241 + }, 242 + [C(BPU)] = { 243 + [C(OP_READ)] = { 244 + [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_WRITE, 245 + [C(RESULT_MISS)] 246 + = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, 247 + }, 248 + [C(OP_WRITE)] = { 249 + [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_WRITE, 250 + [C(RESULT_MISS)] 251 + = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, 252 + }, 253 + [C(OP_PREFETCH)] = { 254 + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 255 + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 256 + }, 257 + }, 258 + }; 259 + 260 + /* 261 + * Cortex-A9 HW events mapping 262 + */ 263 + static const unsigned armv7_a9_perf_map[PERF_COUNT_HW_MAX] = { 264 + [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES, 265 + [PERF_COUNT_HW_INSTRUCTIONS] = 266 + ARMV7_PERFCTR_INST_OUT_OF_RENAME_STAGE, 267 + [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV7_PERFCTR_COHERENT_LINE_HIT, 268 + [PERF_COUNT_HW_CACHE_MISSES] = ARMV7_PERFCTR_COHERENT_LINE_MISS, 269 + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE, 270 + [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, 271 + [PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_CLOCK_CYCLES, 272 + }; 273 + 274 + static const unsigned armv7_a9_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] 275 + [PERF_COUNT_HW_CACHE_OP_MAX] 276 + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { 277 + [C(L1D)] = { 278 + /* 279 + * The performance counters don't differentiate between read 280 + * and write accesses/misses so this isn't strictly correct, 281 + * but it's the best we can do. Writes and reads get 282 + * combined. 283 + */ 284 + [C(OP_READ)] = { 285 + [C(RESULT_ACCESS)] = ARMV7_PERFCTR_DCACHE_ACCESS, 286 + [C(RESULT_MISS)] = ARMV7_PERFCTR_DCACHE_REFILL, 287 + }, 288 + [C(OP_WRITE)] = { 289 + [C(RESULT_ACCESS)] = ARMV7_PERFCTR_DCACHE_ACCESS, 290 + [C(RESULT_MISS)] = ARMV7_PERFCTR_DCACHE_REFILL, 291 + }, 292 + [C(OP_PREFETCH)] = { 293 + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 294 + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 295 + }, 296 + }, 297 + [C(L1I)] = { 298 + [C(OP_READ)] = { 299 + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 300 + [C(RESULT_MISS)] = ARMV7_PERFCTR_IFETCH_MISS, 301 + }, 302 + [C(OP_WRITE)] = { 303 + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 304 + [C(RESULT_MISS)] = ARMV7_PERFCTR_IFETCH_MISS, 305 + }, 306 + [C(OP_PREFETCH)] = { 307 + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 308 + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 309 + }, 310 + }, 311 + [C(LL)] = { 312 + [C(OP_READ)] = { 313 + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 314 + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 315 + }, 316 + [C(OP_WRITE)] = { 317 + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 318 + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 319 + }, 320 + [C(OP_PREFETCH)] = { 321 + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 322 + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 323 + }, 324 + }, 325 + [C(DTLB)] = { 326 + /* 327 + * Only ITLB misses and DTLB refills are supported. 328 + * If users want the DTLB refills misses a raw counter 329 + * must be used. 330 + */ 331 + [C(OP_READ)] = { 332 + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 333 + [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, 334 + }, 335 + [C(OP_WRITE)] = { 336 + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 337 + [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, 338 + }, 339 + [C(OP_PREFETCH)] = { 340 + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 341 + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 342 + }, 343 + }, 344 + [C(ITLB)] = { 345 + [C(OP_READ)] = { 346 + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 347 + [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_MISS, 348 + }, 349 + [C(OP_WRITE)] = { 350 + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 351 + [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_MISS, 352 + }, 353 + [C(OP_PREFETCH)] = { 354 + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 355 + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 356 + }, 357 + }, 358 + [C(BPU)] = { 359 + [C(OP_READ)] = { 360 + [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_WRITE, 361 + [C(RESULT_MISS)] 362 + = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, 363 + }, 364 + [C(OP_WRITE)] = { 365 + [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_WRITE, 366 + [C(RESULT_MISS)] 367 + = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, 368 + }, 369 + [C(OP_PREFETCH)] = { 370 + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 371 + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 372 + }, 373 + }, 374 + }; 375 + 376 + /* 377 + * Perf Events counters 378 + */ 379 + enum armv7_counters { 380 + ARMV7_CYCLE_COUNTER = 1, /* Cycle counter */ 381 + ARMV7_COUNTER0 = 2, /* First event counter */ 382 + }; 383 + 384 + /* 385 + * The cycle counter is ARMV7_CYCLE_COUNTER. 386 + * The first event counter is ARMV7_COUNTER0. 387 + * The last event counter is (ARMV7_COUNTER0 + armpmu->num_events - 1). 388 + */ 389 + #define ARMV7_COUNTER_LAST (ARMV7_COUNTER0 + armpmu->num_events - 1) 390 + 391 + /* 392 + * ARMv7 low level PMNC access 393 + */ 394 + 395 + /* 396 + * Per-CPU PMNC: config reg 397 + */ 398 + #define ARMV7_PMNC_E (1 << 0) /* Enable all counters */ 399 + #define ARMV7_PMNC_P (1 << 1) /* Reset all counters */ 400 + #define ARMV7_PMNC_C (1 << 2) /* Cycle counter reset */ 401 + #define ARMV7_PMNC_D (1 << 3) /* CCNT counts every 64th cpu cycle */ 402 + #define ARMV7_PMNC_X (1 << 4) /* Export to ETM */ 403 + #define ARMV7_PMNC_DP (1 << 5) /* Disable CCNT if non-invasive debug*/ 404 + #define ARMV7_PMNC_N_SHIFT 11 /* Number of counters supported */ 405 + #define ARMV7_PMNC_N_MASK 0x1f 406 + #define ARMV7_PMNC_MASK 0x3f /* Mask for writable bits */ 407 + 408 + /* 409 + * Available counters 410 + */ 411 + #define ARMV7_CNT0 0 /* First event counter */ 412 + #define ARMV7_CCNT 31 /* Cycle counter */ 413 + 414 + /* Perf Event to low level counters mapping */ 415 + #define ARMV7_EVENT_CNT_TO_CNTx (ARMV7_COUNTER0 - ARMV7_CNT0) 416 + 417 + /* 418 + * CNTENS: counters enable reg 419 + */ 420 + #define ARMV7_CNTENS_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx)) 421 + #define ARMV7_CNTENS_C (1 << ARMV7_CCNT) 422 + 423 + /* 424 + * CNTENC: counters disable reg 425 + */ 426 + #define ARMV7_CNTENC_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx)) 427 + #define ARMV7_CNTENC_C (1 << ARMV7_CCNT) 428 + 429 + /* 430 + * INTENS: counters overflow interrupt enable reg 431 + */ 432 + #define ARMV7_INTENS_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx)) 433 + #define ARMV7_INTENS_C (1 << ARMV7_CCNT) 434 + 435 + /* 436 + * INTENC: counters overflow interrupt disable reg 437 + */ 438 + #define ARMV7_INTENC_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx)) 439 + #define ARMV7_INTENC_C (1 << ARMV7_CCNT) 440 + 441 + /* 442 + * EVTSEL: Event selection reg 443 + */ 444 + #define ARMV7_EVTSEL_MASK 0xff /* Mask for writable bits */ 445 + 446 + /* 447 + * SELECT: Counter selection reg 448 + */ 449 + #define ARMV7_SELECT_MASK 0x1f /* Mask for writable bits */ 450 + 451 + /* 452 + * FLAG: counters overflow flag status reg 453 + */ 454 + #define ARMV7_FLAG_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx)) 455 + #define ARMV7_FLAG_C (1 << ARMV7_CCNT) 456 + #define ARMV7_FLAG_MASK 0xffffffff /* Mask for writable bits */ 457 + #define ARMV7_OVERFLOWED_MASK ARMV7_FLAG_MASK 458 + 459 + static inline unsigned long armv7_pmnc_read(void) 460 + { 461 + u32 val; 462 + asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r"(val)); 463 + return val; 464 + } 465 + 466 + static inline void armv7_pmnc_write(unsigned long val) 467 + { 468 + val &= ARMV7_PMNC_MASK; 469 + asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r"(val)); 470 + } 471 + 472 + static inline int armv7_pmnc_has_overflowed(unsigned long pmnc) 473 + { 474 + return pmnc & ARMV7_OVERFLOWED_MASK; 475 + } 476 + 477 + static inline int armv7_pmnc_counter_has_overflowed(unsigned long pmnc, 478 + enum armv7_counters counter) 479 + { 480 + int ret = 0; 481 + 482 + if (counter == ARMV7_CYCLE_COUNTER) 483 + ret = pmnc & ARMV7_FLAG_C; 484 + else if ((counter >= ARMV7_COUNTER0) && (counter <= ARMV7_COUNTER_LAST)) 485 + ret = pmnc & ARMV7_FLAG_P(counter); 486 + else 487 + pr_err("CPU%u checking wrong counter %d overflow status\n", 488 + smp_processor_id(), counter); 489 + 490 + return ret; 491 + } 492 + 493 + static inline int armv7_pmnc_select_counter(unsigned int idx) 494 + { 495 + u32 val; 496 + 497 + if ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST)) { 498 + pr_err("CPU%u selecting wrong PMNC counter" 499 + " %d\n", smp_processor_id(), idx); 500 + return -1; 501 + } 502 + 503 + val = (idx - ARMV7_EVENT_CNT_TO_CNTx) & ARMV7_SELECT_MASK; 504 + asm volatile("mcr p15, 0, %0, c9, c12, 5" : : "r" (val)); 505 + 506 + return idx; 507 + } 508 + 509 + static inline u32 armv7pmu_read_counter(int idx) 510 + { 511 + unsigned long value = 0; 512 + 513 + if (idx == ARMV7_CYCLE_COUNTER) 514 + asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (value)); 515 + else if ((idx >= ARMV7_COUNTER0) && (idx <= ARMV7_COUNTER_LAST)) { 516 + if (armv7_pmnc_select_counter(idx) == idx) 517 + asm volatile("mrc p15, 0, %0, c9, c13, 2" 518 + : "=r" (value)); 519 + } else 520 + pr_err("CPU%u reading wrong counter %d\n", 521 + smp_processor_id(), idx); 522 + 523 + return value; 524 + } 525 + 526 + static inline void armv7pmu_write_counter(int idx, u32 value) 527 + { 528 + if (idx == ARMV7_CYCLE_COUNTER) 529 + asm volatile("mcr p15, 0, %0, c9, c13, 0" : : "r" (value)); 530 + else if ((idx >= ARMV7_COUNTER0) && (idx <= ARMV7_COUNTER_LAST)) { 531 + if (armv7_pmnc_select_counter(idx) == idx) 532 + asm volatile("mcr p15, 0, %0, c9, c13, 2" 533 + : : "r" (value)); 534 + } else 535 + pr_err("CPU%u writing wrong counter %d\n", 536 + smp_processor_id(), idx); 537 + } 538 + 539 + static inline void armv7_pmnc_write_evtsel(unsigned int idx, u32 val) 540 + { 541 + if (armv7_pmnc_select_counter(idx) == idx) { 542 + val &= ARMV7_EVTSEL_MASK; 543 + asm volatile("mcr p15, 0, %0, c9, c13, 1" : : "r" (val)); 544 + } 545 + } 546 + 547 + static inline u32 armv7_pmnc_enable_counter(unsigned int idx) 548 + { 549 + u32 val; 550 + 551 + if ((idx != ARMV7_CYCLE_COUNTER) && 552 + ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) { 553 + pr_err("CPU%u enabling wrong PMNC counter" 554 + " %d\n", smp_processor_id(), idx); 555 + return -1; 556 + } 557 + 558 + if (idx == ARMV7_CYCLE_COUNTER) 559 + val = ARMV7_CNTENS_C; 560 + else 561 + val = ARMV7_CNTENS_P(idx); 562 + 563 + asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r" (val)); 564 + 565 + return idx; 566 + } 567 + 568 + static inline u32 armv7_pmnc_disable_counter(unsigned int idx) 569 + { 570 + u32 val; 571 + 572 + 573 + if ((idx != ARMV7_CYCLE_COUNTER) && 574 + ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) { 575 + pr_err("CPU%u disabling wrong PMNC counter" 576 + " %d\n", smp_processor_id(), idx); 577 + return -1; 578 + } 579 + 580 + if (idx == ARMV7_CYCLE_COUNTER) 581 + val = ARMV7_CNTENC_C; 582 + else 583 + val = ARMV7_CNTENC_P(idx); 584 + 585 + asm volatile("mcr p15, 0, %0, c9, c12, 2" : : "r" (val)); 586 + 587 + return idx; 588 + } 589 + 590 + static inline u32 armv7_pmnc_enable_intens(unsigned int idx) 591 + { 592 + u32 val; 593 + 594 + if ((idx != ARMV7_CYCLE_COUNTER) && 595 + ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) { 596 + pr_err("CPU%u enabling wrong PMNC counter" 597 + " interrupt enable %d\n", smp_processor_id(), idx); 598 + return -1; 599 + } 600 + 601 + if (idx == ARMV7_CYCLE_COUNTER) 602 + val = ARMV7_INTENS_C; 603 + else 604 + val = ARMV7_INTENS_P(idx); 605 + 606 + asm volatile("mcr p15, 0, %0, c9, c14, 1" : : "r" (val)); 607 + 608 + return idx; 609 + } 610 + 611 + static inline u32 armv7_pmnc_disable_intens(unsigned int idx) 612 + { 613 + u32 val; 614 + 615 + if ((idx != ARMV7_CYCLE_COUNTER) && 616 + ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) { 617 + pr_err("CPU%u disabling wrong PMNC counter" 618 + " interrupt enable %d\n", smp_processor_id(), idx); 619 + return -1; 620 + } 621 + 622 + if (idx == ARMV7_CYCLE_COUNTER) 623 + val = ARMV7_INTENC_C; 624 + else 625 + val = ARMV7_INTENC_P(idx); 626 + 627 + asm volatile("mcr p15, 0, %0, c9, c14, 2" : : "r" (val)); 628 + 629 + return idx; 630 + } 631 + 632 + static inline u32 armv7_pmnc_getreset_flags(void) 633 + { 634 + u32 val; 635 + 636 + /* Read */ 637 + asm volatile("mrc p15, 0, %0, c9, c12, 3" : "=r" (val)); 638 + 639 + /* Write to clear flags */ 640 + val &= ARMV7_FLAG_MASK; 641 + asm volatile("mcr p15, 0, %0, c9, c12, 3" : : "r" (val)); 642 + 643 + return val; 644 + } 645 + 646 + #ifdef DEBUG 647 + static void armv7_pmnc_dump_regs(void) 648 + { 649 + u32 val; 650 + unsigned int cnt; 651 + 652 + printk(KERN_INFO "PMNC registers dump:\n"); 653 + 654 + asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r" (val)); 655 + printk(KERN_INFO "PMNC =0x%08x\n", val); 656 + 657 + asm volatile("mrc p15, 0, %0, c9, c12, 1" : "=r" (val)); 658 + printk(KERN_INFO "CNTENS=0x%08x\n", val); 659 + 660 + asm volatile("mrc p15, 0, %0, c9, c14, 1" : "=r" (val)); 661 + printk(KERN_INFO "INTENS=0x%08x\n", val); 662 + 663 + asm volatile("mrc p15, 0, %0, c9, c12, 3" : "=r" (val)); 664 + printk(KERN_INFO "FLAGS =0x%08x\n", val); 665 + 666 + asm volatile("mrc p15, 0, %0, c9, c12, 5" : "=r" (val)); 667 + printk(KERN_INFO "SELECT=0x%08x\n", val); 668 + 669 + asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (val)); 670 + printk(KERN_INFO "CCNT =0x%08x\n", val); 671 + 672 + for (cnt = ARMV7_COUNTER0; cnt < ARMV7_COUNTER_LAST; cnt++) { 673 + armv7_pmnc_select_counter(cnt); 674 + asm volatile("mrc p15, 0, %0, c9, c13, 2" : "=r" (val)); 675 + printk(KERN_INFO "CNT[%d] count =0x%08x\n", 676 + cnt-ARMV7_EVENT_CNT_TO_CNTx, val); 677 + asm volatile("mrc p15, 0, %0, c9, c13, 1" : "=r" (val)); 678 + printk(KERN_INFO "CNT[%d] evtsel=0x%08x\n", 679 + cnt-ARMV7_EVENT_CNT_TO_CNTx, val); 680 + } 681 + } 682 + #endif 683 + 684 + void armv7pmu_enable_event(struct hw_perf_event *hwc, int idx) 685 + { 686 + unsigned long flags; 687 + 688 + /* 689 + * Enable counter and interrupt, and set the counter to count 690 + * the event that we're interested in. 691 + */ 692 + spin_lock_irqsave(&pmu_lock, flags); 693 + 694 + /* 695 + * Disable counter 696 + */ 697 + armv7_pmnc_disable_counter(idx); 698 + 699 + /* 700 + * Set event (if destined for PMNx counters) 701 + * We don't need to set the event if it's a cycle count 702 + */ 703 + if (idx != ARMV7_CYCLE_COUNTER) 704 + armv7_pmnc_write_evtsel(idx, hwc->config_base); 705 + 706 + /* 707 + * Enable interrupt for this counter 708 + */ 709 + armv7_pmnc_enable_intens(idx); 710 + 711 + /* 712 + * Enable counter 713 + */ 714 + armv7_pmnc_enable_counter(idx); 715 + 716 + spin_unlock_irqrestore(&pmu_lock, flags); 717 + } 718 + 719 + static void armv7pmu_disable_event(struct hw_perf_event *hwc, int idx) 720 + { 721 + unsigned long flags; 722 + 723 + /* 724 + * Disable counter and interrupt 725 + */ 726 + spin_lock_irqsave(&pmu_lock, flags); 727 + 728 + /* 729 + * Disable counter 730 + */ 731 + armv7_pmnc_disable_counter(idx); 732 + 733 + /* 734 + * Disable interrupt for this counter 735 + */ 736 + armv7_pmnc_disable_intens(idx); 737 + 738 + spin_unlock_irqrestore(&pmu_lock, flags); 739 + } 740 + 741 + static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev) 742 + { 743 + unsigned long pmnc; 744 + struct perf_sample_data data; 745 + struct cpu_hw_events *cpuc; 746 + struct pt_regs *regs; 747 + int idx; 748 + 749 + /* 750 + * Get and reset the IRQ flags 751 + */ 752 + pmnc = armv7_pmnc_getreset_flags(); 753 + 754 + /* 755 + * Did an overflow occur? 756 + */ 757 + if (!armv7_pmnc_has_overflowed(pmnc)) 758 + return IRQ_NONE; 759 + 760 + /* 761 + * Handle the counter(s) overflow(s) 762 + */ 763 + regs = get_irq_regs(); 764 + 765 + perf_sample_data_init(&data, 0); 766 + 767 + cpuc = &__get_cpu_var(cpu_hw_events); 768 + for (idx = 0; idx <= armpmu->num_events; ++idx) { 769 + struct perf_event *event = cpuc->events[idx]; 770 + struct hw_perf_event *hwc; 771 + 772 + if (!test_bit(idx, cpuc->active_mask)) 773 + continue; 774 + 775 + /* 776 + * We have a single interrupt for all counters. Check that 777 + * each counter has overflowed before we process it. 778 + */ 779 + if (!armv7_pmnc_counter_has_overflowed(pmnc, idx)) 780 + continue; 781 + 782 + hwc = &event->hw; 783 + armpmu_event_update(event, hwc, idx); 784 + data.period = event->hw.last_period; 785 + if (!armpmu_event_set_period(event, hwc, idx)) 786 + continue; 787 + 788 + if (perf_event_overflow(event, 0, &data, regs)) 789 + armpmu->disable(hwc, idx); 790 + } 791 + 792 + /* 793 + * Handle the pending perf events. 794 + * 795 + * Note: this call *must* be run with interrupts disabled. For 796 + * platforms that can have the PMU interrupts raised as an NMI, this 797 + * will not work. 798 + */ 799 + irq_work_run(); 800 + 801 + return IRQ_HANDLED; 802 + } 803 + 804 + static void armv7pmu_start(void) 805 + { 806 + unsigned long flags; 807 + 808 + spin_lock_irqsave(&pmu_lock, flags); 809 + /* Enable all counters */ 810 + armv7_pmnc_write(armv7_pmnc_read() | ARMV7_PMNC_E); 811 + spin_unlock_irqrestore(&pmu_lock, flags); 812 + } 813 + 814 + static void armv7pmu_stop(void) 815 + { 816 + unsigned long flags; 817 + 818 + spin_lock_irqsave(&pmu_lock, flags); 819 + /* Disable all counters */ 820 + armv7_pmnc_write(armv7_pmnc_read() & ~ARMV7_PMNC_E); 821 + spin_unlock_irqrestore(&pmu_lock, flags); 822 + } 823 + 824 + static int armv7pmu_get_event_idx(struct cpu_hw_events *cpuc, 825 + struct hw_perf_event *event) 826 + { 827 + int idx; 828 + 829 + /* Always place a cycle counter into the cycle counter. */ 830 + if (event->config_base == ARMV7_PERFCTR_CPU_CYCLES) { 831 + if (test_and_set_bit(ARMV7_CYCLE_COUNTER, cpuc->used_mask)) 832 + return -EAGAIN; 833 + 834 + return ARMV7_CYCLE_COUNTER; 835 + } else { 836 + /* 837 + * For anything other than a cycle counter, try and use 838 + * the events counters 839 + */ 840 + for (idx = ARMV7_COUNTER0; idx <= armpmu->num_events; ++idx) { 841 + if (!test_and_set_bit(idx, cpuc->used_mask)) 842 + return idx; 843 + } 844 + 845 + /* The counters are all in use. */ 846 + return -EAGAIN; 847 + } 848 + } 849 + 850 + static struct arm_pmu armv7pmu = { 851 + .handle_irq = armv7pmu_handle_irq, 852 + .enable = armv7pmu_enable_event, 853 + .disable = armv7pmu_disable_event, 854 + .read_counter = armv7pmu_read_counter, 855 + .write_counter = armv7pmu_write_counter, 856 + .get_event_idx = armv7pmu_get_event_idx, 857 + .start = armv7pmu_start, 858 + .stop = armv7pmu_stop, 859 + .raw_event_mask = 0xFF, 860 + .max_period = (1LLU << 32) - 1, 861 + }; 862 + 863 + static u32 __init armv7_reset_read_pmnc(void) 864 + { 865 + u32 nb_cnt; 866 + 867 + /* Initialize & Reset PMNC: C and P bits */ 868 + armv7_pmnc_write(ARMV7_PMNC_P | ARMV7_PMNC_C); 869 + 870 + /* Read the nb of CNTx counters supported from PMNC */ 871 + nb_cnt = (armv7_pmnc_read() >> ARMV7_PMNC_N_SHIFT) & ARMV7_PMNC_N_MASK; 872 + 873 + /* Add the CPU cycles counter and return */ 874 + return nb_cnt + 1; 875 + } 876 + 877 + const struct arm_pmu *__init armv7_a8_pmu_init(void) 878 + { 879 + armv7pmu.id = ARM_PERF_PMU_ID_CA8; 880 + armv7pmu.name = "ARMv7 Cortex-A8"; 881 + armv7pmu.cache_map = &armv7_a8_perf_cache_map; 882 + armv7pmu.event_map = &armv7_a8_perf_map; 883 + armv7pmu.num_events = armv7_reset_read_pmnc(); 884 + return &armv7pmu; 885 + } 886 + 887 + const struct arm_pmu *__init armv7_a9_pmu_init(void) 888 + { 889 + armv7pmu.id = ARM_PERF_PMU_ID_CA9; 890 + armv7pmu.name = "ARMv7 Cortex-A9"; 891 + armv7pmu.cache_map = &armv7_a9_perf_cache_map; 892 + armv7pmu.event_map = &armv7_a9_perf_map; 893 + armv7pmu.num_events = armv7_reset_read_pmnc(); 894 + return &armv7pmu; 895 + } 896 + #else 897 + const struct arm_pmu *__init armv7_a8_pmu_init(void) 898 + { 899 + return NULL; 900 + } 901 + 902 + const struct arm_pmu *__init armv7_a9_pmu_init(void) 903 + { 904 + return NULL; 905 + } 906 + #endif /* CONFIG_CPU_V7 */
+807
arch/arm/kernel/perf_event_xscale.c
··· 1 + /* 2 + * ARMv5 [xscale] Performance counter handling code. 3 + * 4 + * Copyright (C) 2010, ARM Ltd., Will Deacon <will.deacon@arm.com> 5 + * 6 + * Based on the previous xscale OProfile code. 7 + * 8 + * There are two variants of the xscale PMU that we support: 9 + * - xscale1pmu: 2 event counters and a cycle counter 10 + * - xscale2pmu: 4 event counters and a cycle counter 11 + * The two variants share event definitions, but have different 12 + * PMU structures. 13 + */ 14 + 15 + #ifdef CONFIG_CPU_XSCALE 16 + enum xscale_perf_types { 17 + XSCALE_PERFCTR_ICACHE_MISS = 0x00, 18 + XSCALE_PERFCTR_ICACHE_NO_DELIVER = 0x01, 19 + XSCALE_PERFCTR_DATA_STALL = 0x02, 20 + XSCALE_PERFCTR_ITLB_MISS = 0x03, 21 + XSCALE_PERFCTR_DTLB_MISS = 0x04, 22 + XSCALE_PERFCTR_BRANCH = 0x05, 23 + XSCALE_PERFCTR_BRANCH_MISS = 0x06, 24 + XSCALE_PERFCTR_INSTRUCTION = 0x07, 25 + XSCALE_PERFCTR_DCACHE_FULL_STALL = 0x08, 26 + XSCALE_PERFCTR_DCACHE_FULL_STALL_CONTIG = 0x09, 27 + XSCALE_PERFCTR_DCACHE_ACCESS = 0x0A, 28 + XSCALE_PERFCTR_DCACHE_MISS = 0x0B, 29 + XSCALE_PERFCTR_DCACHE_WRITE_BACK = 0x0C, 30 + XSCALE_PERFCTR_PC_CHANGED = 0x0D, 31 + XSCALE_PERFCTR_BCU_REQUEST = 0x10, 32 + XSCALE_PERFCTR_BCU_FULL = 0x11, 33 + XSCALE_PERFCTR_BCU_DRAIN = 0x12, 34 + XSCALE_PERFCTR_BCU_ECC_NO_ELOG = 0x14, 35 + XSCALE_PERFCTR_BCU_1_BIT_ERR = 0x15, 36 + XSCALE_PERFCTR_RMW = 0x16, 37 + /* XSCALE_PERFCTR_CCNT is not hardware defined */ 38 + XSCALE_PERFCTR_CCNT = 0xFE, 39 + XSCALE_PERFCTR_UNUSED = 0xFF, 40 + }; 41 + 42 + enum xscale_counters { 43 + XSCALE_CYCLE_COUNTER = 1, 44 + XSCALE_COUNTER0, 45 + XSCALE_COUNTER1, 46 + XSCALE_COUNTER2, 47 + XSCALE_COUNTER3, 48 + }; 49 + 50 + static const unsigned xscale_perf_map[PERF_COUNT_HW_MAX] = { 51 + [PERF_COUNT_HW_CPU_CYCLES] = XSCALE_PERFCTR_CCNT, 52 + [PERF_COUNT_HW_INSTRUCTIONS] = XSCALE_PERFCTR_INSTRUCTION, 53 + [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED, 54 + [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED, 55 + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = XSCALE_PERFCTR_BRANCH, 56 + [PERF_COUNT_HW_BRANCH_MISSES] = XSCALE_PERFCTR_BRANCH_MISS, 57 + [PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED, 58 + }; 59 + 60 + static const unsigned xscale_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] 61 + [PERF_COUNT_HW_CACHE_OP_MAX] 62 + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { 63 + [C(L1D)] = { 64 + [C(OP_READ)] = { 65 + [C(RESULT_ACCESS)] = XSCALE_PERFCTR_DCACHE_ACCESS, 66 + [C(RESULT_MISS)] = XSCALE_PERFCTR_DCACHE_MISS, 67 + }, 68 + [C(OP_WRITE)] = { 69 + [C(RESULT_ACCESS)] = XSCALE_PERFCTR_DCACHE_ACCESS, 70 + [C(RESULT_MISS)] = XSCALE_PERFCTR_DCACHE_MISS, 71 + }, 72 + [C(OP_PREFETCH)] = { 73 + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 74 + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 75 + }, 76 + }, 77 + [C(L1I)] = { 78 + [C(OP_READ)] = { 79 + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 80 + [C(RESULT_MISS)] = XSCALE_PERFCTR_ICACHE_MISS, 81 + }, 82 + [C(OP_WRITE)] = { 83 + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 84 + [C(RESULT_MISS)] = XSCALE_PERFCTR_ICACHE_MISS, 85 + }, 86 + [C(OP_PREFETCH)] = { 87 + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 88 + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 89 + }, 90 + }, 91 + [C(LL)] = { 92 + [C(OP_READ)] = { 93 + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 94 + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 95 + }, 96 + [C(OP_WRITE)] = { 97 + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 98 + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 99 + }, 100 + [C(OP_PREFETCH)] = { 101 + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 102 + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 103 + }, 104 + }, 105 + [C(DTLB)] = { 106 + [C(OP_READ)] = { 107 + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 108 + [C(RESULT_MISS)] = XSCALE_PERFCTR_DTLB_MISS, 109 + }, 110 + [C(OP_WRITE)] = { 111 + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 112 + [C(RESULT_MISS)] = XSCALE_PERFCTR_DTLB_MISS, 113 + }, 114 + [C(OP_PREFETCH)] = { 115 + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 116 + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 117 + }, 118 + }, 119 + [C(ITLB)] = { 120 + [C(OP_READ)] = { 121 + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 122 + [C(RESULT_MISS)] = XSCALE_PERFCTR_ITLB_MISS, 123 + }, 124 + [C(OP_WRITE)] = { 125 + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 126 + [C(RESULT_MISS)] = XSCALE_PERFCTR_ITLB_MISS, 127 + }, 128 + [C(OP_PREFETCH)] = { 129 + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 130 + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 131 + }, 132 + }, 133 + [C(BPU)] = { 134 + [C(OP_READ)] = { 135 + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 136 + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 137 + }, 138 + [C(OP_WRITE)] = { 139 + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 140 + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 141 + }, 142 + [C(OP_PREFETCH)] = { 143 + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 144 + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 145 + }, 146 + }, 147 + }; 148 + 149 + #define XSCALE_PMU_ENABLE 0x001 150 + #define XSCALE_PMN_RESET 0x002 151 + #define XSCALE_CCNT_RESET 0x004 152 + #define XSCALE_PMU_RESET (CCNT_RESET | PMN_RESET) 153 + #define XSCALE_PMU_CNT64 0x008 154 + 155 + #define XSCALE1_OVERFLOWED_MASK 0x700 156 + #define XSCALE1_CCOUNT_OVERFLOW 0x400 157 + #define XSCALE1_COUNT0_OVERFLOW 0x100 158 + #define XSCALE1_COUNT1_OVERFLOW 0x200 159 + #define XSCALE1_CCOUNT_INT_EN 0x040 160 + #define XSCALE1_COUNT0_INT_EN 0x010 161 + #define XSCALE1_COUNT1_INT_EN 0x020 162 + #define XSCALE1_COUNT0_EVT_SHFT 12 163 + #define XSCALE1_COUNT0_EVT_MASK (0xff << XSCALE1_COUNT0_EVT_SHFT) 164 + #define XSCALE1_COUNT1_EVT_SHFT 20 165 + #define XSCALE1_COUNT1_EVT_MASK (0xff << XSCALE1_COUNT1_EVT_SHFT) 166 + 167 + static inline u32 168 + xscale1pmu_read_pmnc(void) 169 + { 170 + u32 val; 171 + asm volatile("mrc p14, 0, %0, c0, c0, 0" : "=r" (val)); 172 + return val; 173 + } 174 + 175 + static inline void 176 + xscale1pmu_write_pmnc(u32 val) 177 + { 178 + /* upper 4bits and 7, 11 are write-as-0 */ 179 + val &= 0xffff77f; 180 + asm volatile("mcr p14, 0, %0, c0, c0, 0" : : "r" (val)); 181 + } 182 + 183 + static inline int 184 + xscale1_pmnc_counter_has_overflowed(unsigned long pmnc, 185 + enum xscale_counters counter) 186 + { 187 + int ret = 0; 188 + 189 + switch (counter) { 190 + case XSCALE_CYCLE_COUNTER: 191 + ret = pmnc & XSCALE1_CCOUNT_OVERFLOW; 192 + break; 193 + case XSCALE_COUNTER0: 194 + ret = pmnc & XSCALE1_COUNT0_OVERFLOW; 195 + break; 196 + case XSCALE_COUNTER1: 197 + ret = pmnc & XSCALE1_COUNT1_OVERFLOW; 198 + break; 199 + default: 200 + WARN_ONCE(1, "invalid counter number (%d)\n", counter); 201 + } 202 + 203 + return ret; 204 + } 205 + 206 + static irqreturn_t 207 + xscale1pmu_handle_irq(int irq_num, void *dev) 208 + { 209 + unsigned long pmnc; 210 + struct perf_sample_data data; 211 + struct cpu_hw_events *cpuc; 212 + struct pt_regs *regs; 213 + int idx; 214 + 215 + /* 216 + * NOTE: there's an A stepping erratum that states if an overflow 217 + * bit already exists and another occurs, the previous 218 + * Overflow bit gets cleared. There's no workaround. 219 + * Fixed in B stepping or later. 220 + */ 221 + pmnc = xscale1pmu_read_pmnc(); 222 + 223 + /* 224 + * Write the value back to clear the overflow flags. Overflow 225 + * flags remain in pmnc for use below. We also disable the PMU 226 + * while we process the interrupt. 227 + */ 228 + xscale1pmu_write_pmnc(pmnc & ~XSCALE_PMU_ENABLE); 229 + 230 + if (!(pmnc & XSCALE1_OVERFLOWED_MASK)) 231 + return IRQ_NONE; 232 + 233 + regs = get_irq_regs(); 234 + 235 + perf_sample_data_init(&data, 0); 236 + 237 + cpuc = &__get_cpu_var(cpu_hw_events); 238 + for (idx = 0; idx <= armpmu->num_events; ++idx) { 239 + struct perf_event *event = cpuc->events[idx]; 240 + struct hw_perf_event *hwc; 241 + 242 + if (!test_bit(idx, cpuc->active_mask)) 243 + continue; 244 + 245 + if (!xscale1_pmnc_counter_has_overflowed(pmnc, idx)) 246 + continue; 247 + 248 + hwc = &event->hw; 249 + armpmu_event_update(event, hwc, idx); 250 + data.period = event->hw.last_period; 251 + if (!armpmu_event_set_period(event, hwc, idx)) 252 + continue; 253 + 254 + if (perf_event_overflow(event, 0, &data, regs)) 255 + armpmu->disable(hwc, idx); 256 + } 257 + 258 + irq_work_run(); 259 + 260 + /* 261 + * Re-enable the PMU. 262 + */ 263 + pmnc = xscale1pmu_read_pmnc() | XSCALE_PMU_ENABLE; 264 + xscale1pmu_write_pmnc(pmnc); 265 + 266 + return IRQ_HANDLED; 267 + } 268 + 269 + static void 270 + xscale1pmu_enable_event(struct hw_perf_event *hwc, int idx) 271 + { 272 + unsigned long val, mask, evt, flags; 273 + 274 + switch (idx) { 275 + case XSCALE_CYCLE_COUNTER: 276 + mask = 0; 277 + evt = XSCALE1_CCOUNT_INT_EN; 278 + break; 279 + case XSCALE_COUNTER0: 280 + mask = XSCALE1_COUNT0_EVT_MASK; 281 + evt = (hwc->config_base << XSCALE1_COUNT0_EVT_SHFT) | 282 + XSCALE1_COUNT0_INT_EN; 283 + break; 284 + case XSCALE_COUNTER1: 285 + mask = XSCALE1_COUNT1_EVT_MASK; 286 + evt = (hwc->config_base << XSCALE1_COUNT1_EVT_SHFT) | 287 + XSCALE1_COUNT1_INT_EN; 288 + break; 289 + default: 290 + WARN_ONCE(1, "invalid counter number (%d)\n", idx); 291 + return; 292 + } 293 + 294 + spin_lock_irqsave(&pmu_lock, flags); 295 + val = xscale1pmu_read_pmnc(); 296 + val &= ~mask; 297 + val |= evt; 298 + xscale1pmu_write_pmnc(val); 299 + spin_unlock_irqrestore(&pmu_lock, flags); 300 + } 301 + 302 + static void 303 + xscale1pmu_disable_event(struct hw_perf_event *hwc, int idx) 304 + { 305 + unsigned long val, mask, evt, flags; 306 + 307 + switch (idx) { 308 + case XSCALE_CYCLE_COUNTER: 309 + mask = XSCALE1_CCOUNT_INT_EN; 310 + evt = 0; 311 + break; 312 + case XSCALE_COUNTER0: 313 + mask = XSCALE1_COUNT0_INT_EN | XSCALE1_COUNT0_EVT_MASK; 314 + evt = XSCALE_PERFCTR_UNUSED << XSCALE1_COUNT0_EVT_SHFT; 315 + break; 316 + case XSCALE_COUNTER1: 317 + mask = XSCALE1_COUNT1_INT_EN | XSCALE1_COUNT1_EVT_MASK; 318 + evt = XSCALE_PERFCTR_UNUSED << XSCALE1_COUNT1_EVT_SHFT; 319 + break; 320 + default: 321 + WARN_ONCE(1, "invalid counter number (%d)\n", idx); 322 + return; 323 + } 324 + 325 + spin_lock_irqsave(&pmu_lock, flags); 326 + val = xscale1pmu_read_pmnc(); 327 + val &= ~mask; 328 + val |= evt; 329 + xscale1pmu_write_pmnc(val); 330 + spin_unlock_irqrestore(&pmu_lock, flags); 331 + } 332 + 333 + static int 334 + xscale1pmu_get_event_idx(struct cpu_hw_events *cpuc, 335 + struct hw_perf_event *event) 336 + { 337 + if (XSCALE_PERFCTR_CCNT == event->config_base) { 338 + if (test_and_set_bit(XSCALE_CYCLE_COUNTER, cpuc->used_mask)) 339 + return -EAGAIN; 340 + 341 + return XSCALE_CYCLE_COUNTER; 342 + } else { 343 + if (!test_and_set_bit(XSCALE_COUNTER1, cpuc->used_mask)) 344 + return XSCALE_COUNTER1; 345 + 346 + if (!test_and_set_bit(XSCALE_COUNTER0, cpuc->used_mask)) 347 + return XSCALE_COUNTER0; 348 + 349 + return -EAGAIN; 350 + } 351 + } 352 + 353 + static void 354 + xscale1pmu_start(void) 355 + { 356 + unsigned long flags, val; 357 + 358 + spin_lock_irqsave(&pmu_lock, flags); 359 + val = xscale1pmu_read_pmnc(); 360 + val |= XSCALE_PMU_ENABLE; 361 + xscale1pmu_write_pmnc(val); 362 + spin_unlock_irqrestore(&pmu_lock, flags); 363 + } 364 + 365 + static void 366 + xscale1pmu_stop(void) 367 + { 368 + unsigned long flags, val; 369 + 370 + spin_lock_irqsave(&pmu_lock, flags); 371 + val = xscale1pmu_read_pmnc(); 372 + val &= ~XSCALE_PMU_ENABLE; 373 + xscale1pmu_write_pmnc(val); 374 + spin_unlock_irqrestore(&pmu_lock, flags); 375 + } 376 + 377 + static inline u32 378 + xscale1pmu_read_counter(int counter) 379 + { 380 + u32 val = 0; 381 + 382 + switch (counter) { 383 + case XSCALE_CYCLE_COUNTER: 384 + asm volatile("mrc p14, 0, %0, c1, c0, 0" : "=r" (val)); 385 + break; 386 + case XSCALE_COUNTER0: 387 + asm volatile("mrc p14, 0, %0, c2, c0, 0" : "=r" (val)); 388 + break; 389 + case XSCALE_COUNTER1: 390 + asm volatile("mrc p14, 0, %0, c3, c0, 0" : "=r" (val)); 391 + break; 392 + } 393 + 394 + return val; 395 + } 396 + 397 + static inline void 398 + xscale1pmu_write_counter(int counter, u32 val) 399 + { 400 + switch (counter) { 401 + case XSCALE_CYCLE_COUNTER: 402 + asm volatile("mcr p14, 0, %0, c1, c0, 0" : : "r" (val)); 403 + break; 404 + case XSCALE_COUNTER0: 405 + asm volatile("mcr p14, 0, %0, c2, c0, 0" : : "r" (val)); 406 + break; 407 + case XSCALE_COUNTER1: 408 + asm volatile("mcr p14, 0, %0, c3, c0, 0" : : "r" (val)); 409 + break; 410 + } 411 + } 412 + 413 + static const struct arm_pmu xscale1pmu = { 414 + .id = ARM_PERF_PMU_ID_XSCALE1, 415 + .name = "xscale1", 416 + .handle_irq = xscale1pmu_handle_irq, 417 + .enable = xscale1pmu_enable_event, 418 + .disable = xscale1pmu_disable_event, 419 + .read_counter = xscale1pmu_read_counter, 420 + .write_counter = xscale1pmu_write_counter, 421 + .get_event_idx = xscale1pmu_get_event_idx, 422 + .start = xscale1pmu_start, 423 + .stop = xscale1pmu_stop, 424 + .cache_map = &xscale_perf_cache_map, 425 + .event_map = &xscale_perf_map, 426 + .raw_event_mask = 0xFF, 427 + .num_events = 3, 428 + .max_period = (1LLU << 32) - 1, 429 + }; 430 + 431 + const struct arm_pmu *__init xscale1pmu_init(void) 432 + { 433 + return &xscale1pmu; 434 + } 435 + 436 + #define XSCALE2_OVERFLOWED_MASK 0x01f 437 + #define XSCALE2_CCOUNT_OVERFLOW 0x001 438 + #define XSCALE2_COUNT0_OVERFLOW 0x002 439 + #define XSCALE2_COUNT1_OVERFLOW 0x004 440 + #define XSCALE2_COUNT2_OVERFLOW 0x008 441 + #define XSCALE2_COUNT3_OVERFLOW 0x010 442 + #define XSCALE2_CCOUNT_INT_EN 0x001 443 + #define XSCALE2_COUNT0_INT_EN 0x002 444 + #define XSCALE2_COUNT1_INT_EN 0x004 445 + #define XSCALE2_COUNT2_INT_EN 0x008 446 + #define XSCALE2_COUNT3_INT_EN 0x010 447 + #define XSCALE2_COUNT0_EVT_SHFT 0 448 + #define XSCALE2_COUNT0_EVT_MASK (0xff << XSCALE2_COUNT0_EVT_SHFT) 449 + #define XSCALE2_COUNT1_EVT_SHFT 8 450 + #define XSCALE2_COUNT1_EVT_MASK (0xff << XSCALE2_COUNT1_EVT_SHFT) 451 + #define XSCALE2_COUNT2_EVT_SHFT 16 452 + #define XSCALE2_COUNT2_EVT_MASK (0xff << XSCALE2_COUNT2_EVT_SHFT) 453 + #define XSCALE2_COUNT3_EVT_SHFT 24 454 + #define XSCALE2_COUNT3_EVT_MASK (0xff << XSCALE2_COUNT3_EVT_SHFT) 455 + 456 + static inline u32 457 + xscale2pmu_read_pmnc(void) 458 + { 459 + u32 val; 460 + asm volatile("mrc p14, 0, %0, c0, c1, 0" : "=r" (val)); 461 + /* bits 1-2 and 4-23 are read-unpredictable */ 462 + return val & 0xff000009; 463 + } 464 + 465 + static inline void 466 + xscale2pmu_write_pmnc(u32 val) 467 + { 468 + /* bits 4-23 are write-as-0, 24-31 are write ignored */ 469 + val &= 0xf; 470 + asm volatile("mcr p14, 0, %0, c0, c1, 0" : : "r" (val)); 471 + } 472 + 473 + static inline u32 474 + xscale2pmu_read_overflow_flags(void) 475 + { 476 + u32 val; 477 + asm volatile("mrc p14, 0, %0, c5, c1, 0" : "=r" (val)); 478 + return val; 479 + } 480 + 481 + static inline void 482 + xscale2pmu_write_overflow_flags(u32 val) 483 + { 484 + asm volatile("mcr p14, 0, %0, c5, c1, 0" : : "r" (val)); 485 + } 486 + 487 + static inline u32 488 + xscale2pmu_read_event_select(void) 489 + { 490 + u32 val; 491 + asm volatile("mrc p14, 0, %0, c8, c1, 0" : "=r" (val)); 492 + return val; 493 + } 494 + 495 + static inline void 496 + xscale2pmu_write_event_select(u32 val) 497 + { 498 + asm volatile("mcr p14, 0, %0, c8, c1, 0" : : "r"(val)); 499 + } 500 + 501 + static inline u32 502 + xscale2pmu_read_int_enable(void) 503 + { 504 + u32 val; 505 + asm volatile("mrc p14, 0, %0, c4, c1, 0" : "=r" (val)); 506 + return val; 507 + } 508 + 509 + static void 510 + xscale2pmu_write_int_enable(u32 val) 511 + { 512 + asm volatile("mcr p14, 0, %0, c4, c1, 0" : : "r" (val)); 513 + } 514 + 515 + static inline int 516 + xscale2_pmnc_counter_has_overflowed(unsigned long of_flags, 517 + enum xscale_counters counter) 518 + { 519 + int ret = 0; 520 + 521 + switch (counter) { 522 + case XSCALE_CYCLE_COUNTER: 523 + ret = of_flags & XSCALE2_CCOUNT_OVERFLOW; 524 + break; 525 + case XSCALE_COUNTER0: 526 + ret = of_flags & XSCALE2_COUNT0_OVERFLOW; 527 + break; 528 + case XSCALE_COUNTER1: 529 + ret = of_flags & XSCALE2_COUNT1_OVERFLOW; 530 + break; 531 + case XSCALE_COUNTER2: 532 + ret = of_flags & XSCALE2_COUNT2_OVERFLOW; 533 + break; 534 + case XSCALE_COUNTER3: 535 + ret = of_flags & XSCALE2_COUNT3_OVERFLOW; 536 + break; 537 + default: 538 + WARN_ONCE(1, "invalid counter number (%d)\n", counter); 539 + } 540 + 541 + return ret; 542 + } 543 + 544 + static irqreturn_t 545 + xscale2pmu_handle_irq(int irq_num, void *dev) 546 + { 547 + unsigned long pmnc, of_flags; 548 + struct perf_sample_data data; 549 + struct cpu_hw_events *cpuc; 550 + struct pt_regs *regs; 551 + int idx; 552 + 553 + /* Disable the PMU. */ 554 + pmnc = xscale2pmu_read_pmnc(); 555 + xscale2pmu_write_pmnc(pmnc & ~XSCALE_PMU_ENABLE); 556 + 557 + /* Check the overflow flag register. */ 558 + of_flags = xscale2pmu_read_overflow_flags(); 559 + if (!(of_flags & XSCALE2_OVERFLOWED_MASK)) 560 + return IRQ_NONE; 561 + 562 + /* Clear the overflow bits. */ 563 + xscale2pmu_write_overflow_flags(of_flags); 564 + 565 + regs = get_irq_regs(); 566 + 567 + perf_sample_data_init(&data, 0); 568 + 569 + cpuc = &__get_cpu_var(cpu_hw_events); 570 + for (idx = 0; idx <= armpmu->num_events; ++idx) { 571 + struct perf_event *event = cpuc->events[idx]; 572 + struct hw_perf_event *hwc; 573 + 574 + if (!test_bit(idx, cpuc->active_mask)) 575 + continue; 576 + 577 + if (!xscale2_pmnc_counter_has_overflowed(pmnc, idx)) 578 + continue; 579 + 580 + hwc = &event->hw; 581 + armpmu_event_update(event, hwc, idx); 582 + data.period = event->hw.last_period; 583 + if (!armpmu_event_set_period(event, hwc, idx)) 584 + continue; 585 + 586 + if (perf_event_overflow(event, 0, &data, regs)) 587 + armpmu->disable(hwc, idx); 588 + } 589 + 590 + irq_work_run(); 591 + 592 + /* 593 + * Re-enable the PMU. 594 + */ 595 + pmnc = xscale2pmu_read_pmnc() | XSCALE_PMU_ENABLE; 596 + xscale2pmu_write_pmnc(pmnc); 597 + 598 + return IRQ_HANDLED; 599 + } 600 + 601 + static void 602 + xscale2pmu_enable_event(struct hw_perf_event *hwc, int idx) 603 + { 604 + unsigned long flags, ien, evtsel; 605 + 606 + ien = xscale2pmu_read_int_enable(); 607 + evtsel = xscale2pmu_read_event_select(); 608 + 609 + switch (idx) { 610 + case XSCALE_CYCLE_COUNTER: 611 + ien |= XSCALE2_CCOUNT_INT_EN; 612 + break; 613 + case XSCALE_COUNTER0: 614 + ien |= XSCALE2_COUNT0_INT_EN; 615 + evtsel &= ~XSCALE2_COUNT0_EVT_MASK; 616 + evtsel |= hwc->config_base << XSCALE2_COUNT0_EVT_SHFT; 617 + break; 618 + case XSCALE_COUNTER1: 619 + ien |= XSCALE2_COUNT1_INT_EN; 620 + evtsel &= ~XSCALE2_COUNT1_EVT_MASK; 621 + evtsel |= hwc->config_base << XSCALE2_COUNT1_EVT_SHFT; 622 + break; 623 + case XSCALE_COUNTER2: 624 + ien |= XSCALE2_COUNT2_INT_EN; 625 + evtsel &= ~XSCALE2_COUNT2_EVT_MASK; 626 + evtsel |= hwc->config_base << XSCALE2_COUNT2_EVT_SHFT; 627 + break; 628 + case XSCALE_COUNTER3: 629 + ien |= XSCALE2_COUNT3_INT_EN; 630 + evtsel &= ~XSCALE2_COUNT3_EVT_MASK; 631 + evtsel |= hwc->config_base << XSCALE2_COUNT3_EVT_SHFT; 632 + break; 633 + default: 634 + WARN_ONCE(1, "invalid counter number (%d)\n", idx); 635 + return; 636 + } 637 + 638 + spin_lock_irqsave(&pmu_lock, flags); 639 + xscale2pmu_write_event_select(evtsel); 640 + xscale2pmu_write_int_enable(ien); 641 + spin_unlock_irqrestore(&pmu_lock, flags); 642 + } 643 + 644 + static void 645 + xscale2pmu_disable_event(struct hw_perf_event *hwc, int idx) 646 + { 647 + unsigned long flags, ien, evtsel; 648 + 649 + ien = xscale2pmu_read_int_enable(); 650 + evtsel = xscale2pmu_read_event_select(); 651 + 652 + switch (idx) { 653 + case XSCALE_CYCLE_COUNTER: 654 + ien &= ~XSCALE2_CCOUNT_INT_EN; 655 + break; 656 + case XSCALE_COUNTER0: 657 + ien &= ~XSCALE2_COUNT0_INT_EN; 658 + evtsel &= ~XSCALE2_COUNT0_EVT_MASK; 659 + evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT0_EVT_SHFT; 660 + break; 661 + case XSCALE_COUNTER1: 662 + ien &= ~XSCALE2_COUNT1_INT_EN; 663 + evtsel &= ~XSCALE2_COUNT1_EVT_MASK; 664 + evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT1_EVT_SHFT; 665 + break; 666 + case XSCALE_COUNTER2: 667 + ien &= ~XSCALE2_COUNT2_INT_EN; 668 + evtsel &= ~XSCALE2_COUNT2_EVT_MASK; 669 + evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT2_EVT_SHFT; 670 + break; 671 + case XSCALE_COUNTER3: 672 + ien &= ~XSCALE2_COUNT3_INT_EN; 673 + evtsel &= ~XSCALE2_COUNT3_EVT_MASK; 674 + evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT3_EVT_SHFT; 675 + break; 676 + default: 677 + WARN_ONCE(1, "invalid counter number (%d)\n", idx); 678 + return; 679 + } 680 + 681 + spin_lock_irqsave(&pmu_lock, flags); 682 + xscale2pmu_write_event_select(evtsel); 683 + xscale2pmu_write_int_enable(ien); 684 + spin_unlock_irqrestore(&pmu_lock, flags); 685 + } 686 + 687 + static int 688 + xscale2pmu_get_event_idx(struct cpu_hw_events *cpuc, 689 + struct hw_perf_event *event) 690 + { 691 + int idx = xscale1pmu_get_event_idx(cpuc, event); 692 + if (idx >= 0) 693 + goto out; 694 + 695 + if (!test_and_set_bit(XSCALE_COUNTER3, cpuc->used_mask)) 696 + idx = XSCALE_COUNTER3; 697 + else if (!test_and_set_bit(XSCALE_COUNTER2, cpuc->used_mask)) 698 + idx = XSCALE_COUNTER2; 699 + out: 700 + return idx; 701 + } 702 + 703 + static void 704 + xscale2pmu_start(void) 705 + { 706 + unsigned long flags, val; 707 + 708 + spin_lock_irqsave(&pmu_lock, flags); 709 + val = xscale2pmu_read_pmnc() & ~XSCALE_PMU_CNT64; 710 + val |= XSCALE_PMU_ENABLE; 711 + xscale2pmu_write_pmnc(val); 712 + spin_unlock_irqrestore(&pmu_lock, flags); 713 + } 714 + 715 + static void 716 + xscale2pmu_stop(void) 717 + { 718 + unsigned long flags, val; 719 + 720 + spin_lock_irqsave(&pmu_lock, flags); 721 + val = xscale2pmu_read_pmnc(); 722 + val &= ~XSCALE_PMU_ENABLE; 723 + xscale2pmu_write_pmnc(val); 724 + spin_unlock_irqrestore(&pmu_lock, flags); 725 + } 726 + 727 + static inline u32 728 + xscale2pmu_read_counter(int counter) 729 + { 730 + u32 val = 0; 731 + 732 + switch (counter) { 733 + case XSCALE_CYCLE_COUNTER: 734 + asm volatile("mrc p14, 0, %0, c1, c1, 0" : "=r" (val)); 735 + break; 736 + case XSCALE_COUNTER0: 737 + asm volatile("mrc p14, 0, %0, c0, c2, 0" : "=r" (val)); 738 + break; 739 + case XSCALE_COUNTER1: 740 + asm volatile("mrc p14, 0, %0, c1, c2, 0" : "=r" (val)); 741 + break; 742 + case XSCALE_COUNTER2: 743 + asm volatile("mrc p14, 0, %0, c2, c2, 0" : "=r" (val)); 744 + break; 745 + case XSCALE_COUNTER3: 746 + asm volatile("mrc p14, 0, %0, c3, c2, 0" : "=r" (val)); 747 + break; 748 + } 749 + 750 + return val; 751 + } 752 + 753 + static inline void 754 + xscale2pmu_write_counter(int counter, u32 val) 755 + { 756 + switch (counter) { 757 + case XSCALE_CYCLE_COUNTER: 758 + asm volatile("mcr p14, 0, %0, c1, c1, 0" : : "r" (val)); 759 + break; 760 + case XSCALE_COUNTER0: 761 + asm volatile("mcr p14, 0, %0, c0, c2, 0" : : "r" (val)); 762 + break; 763 + case XSCALE_COUNTER1: 764 + asm volatile("mcr p14, 0, %0, c1, c2, 0" : : "r" (val)); 765 + break; 766 + case XSCALE_COUNTER2: 767 + asm volatile("mcr p14, 0, %0, c2, c2, 0" : : "r" (val)); 768 + break; 769 + case XSCALE_COUNTER3: 770 + asm volatile("mcr p14, 0, %0, c3, c2, 0" : : "r" (val)); 771 + break; 772 + } 773 + } 774 + 775 + static const struct arm_pmu xscale2pmu = { 776 + .id = ARM_PERF_PMU_ID_XSCALE2, 777 + .name = "xscale2", 778 + .handle_irq = xscale2pmu_handle_irq, 779 + .enable = xscale2pmu_enable_event, 780 + .disable = xscale2pmu_disable_event, 781 + .read_counter = xscale2pmu_read_counter, 782 + .write_counter = xscale2pmu_write_counter, 783 + .get_event_idx = xscale2pmu_get_event_idx, 784 + .start = xscale2pmu_start, 785 + .stop = xscale2pmu_stop, 786 + .cache_map = &xscale_perf_cache_map, 787 + .event_map = &xscale_perf_map, 788 + .raw_event_mask = 0xFF, 789 + .num_events = 5, 790 + .max_period = (1LLU << 32) - 1, 791 + }; 792 + 793 + const struct arm_pmu *__init xscale2pmu_init(void) 794 + { 795 + return &xscale2pmu; 796 + } 797 + #else 798 + const struct arm_pmu *__init xscale1pmu_init(void) 799 + { 800 + return NULL; 801 + } 802 + 803 + const struct arm_pmu *__init xscale2pmu_init(void) 804 + { 805 + return NULL; 806 + } 807 + #endif /* CONFIG_CPU_XSCALE */