Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'csky-for-linus-5.3-rc1' of git://github.com/c-sky/csky-linux

Pull arch/csky pupdates from Guo Ren:
"This round of csky subsystem gives two features (ASID algorithm
update, Perf pmu record support) and some fixups.

ASID updates:
- Revert mmu ASID mechanism
- Add new asid lib code from arm
- Use generic asid algorithm to implement switch_mm
- Improve tlb operation with help of asid

Perf pmu record support:
- Init pmu as a device
- Add count-width property for csky pmu
- Add pmu interrupt support
- Fix perf record in kernel/user space
- dt-bindings: Add csky PMU bindings

Fixes:
- Fixup no panic in kernel for some traps
- Fixup some error count in 810 & 860.
- Fixup abiv1 memset error"

* tag 'csky-for-linus-5.3-rc1' of git://github.com/c-sky/csky-linux:
csky: Fixup abiv1 memset error
csky: Improve tlb operation with help of asid
csky: Use generic asid algorithm to implement switch_mm
csky: Add new asid lib code from arm
csky: Revert mmu ASID mechanism
dt-bindings: csky: Add csky PMU bindings
dt-bindings: interrupt-controller: Update csky mpintc
csky: Fixup some error count in 810 & 860.
csky: Fix perf record in kernel/user space
csky: Add pmu interrupt support
csky: Add count-width property for csky pmu
csky: Init pmu as a device
csky: Fixup no panic in kernel for some traps
csky: Select intc & timer drivers

+890 -358
+38
Documentation/devicetree/bindings/csky/pmu.txt
··· 1 + =============================== 2 + C-SKY Performance Monitor Units 3 + =============================== 4 + 5 + C-SKY Performance Monitor is designed for ck807/ck810/ck860 SMP soc and 6 + it could count cpu's events for helping analysis performance issues. 7 + 8 + ============================ 9 + PMU node bindings definition 10 + ============================ 11 + 12 + Description: Describes PMU 13 + 14 + PROPERTIES 15 + 16 + - compatible 17 + Usage: required 18 + Value type: <string> 19 + Definition: must be "csky,csky-pmu" 20 + - interrupts 21 + Usage: required 22 + Value type: <u32 IRQ_TYPE_XXX> 23 + Definition: must be pmu irq num defined by soc 24 + - count-width 25 + Usage: optional 26 + Value type: <u32> 27 + Definition: the width of pmu counter 28 + 29 + Examples: 30 + --------- 31 + #include <dt-bindings/interrupt-controller/irq.h> 32 + 33 + pmu: performace-monitor { 34 + compatible = "csky,csky-pmu"; 35 + interrupts = <23 IRQ_TYPE_EDGE_RISING>; 36 + interrupt-parent = <&intc>; 37 + count-width = <48>; 38 + };
+4
arch/csky/Kconfig
··· 10 10 select COMMON_CLK 11 11 select CLKSRC_MMIO 12 12 select CLKSRC_OF 13 + select CSKY_MPINTC if CPU_CK860 14 + select CSKY_MP_TIMER if CPU_CK860 15 + select CSKY_APB_INTC 13 16 select DMA_DIRECT_REMAP 14 17 select IRQ_DOMAIN 15 18 select HANDLE_DOMAIN_IRQ ··· 33 30 select GENERIC_IRQ_MULTI_HANDLER 34 31 select GENERIC_SCHED_CLOCK 35 32 select GENERIC_SMP_IDLE_THREAD 33 + select GX6605S_TIMER if CPU_CK610 36 34 select HAVE_ARCH_TRACEHOOK 37 35 select HAVE_ARCH_AUDITSYSCALL 38 36 select HAVE_DYNAMIC_FTRACE
-1
arch/csky/abiv1/Makefile
··· 5 5 obj-y += cacheflush.o 6 6 obj-y += mmap.o 7 7 obj-y += memcpy.o 8 - obj-y += memset.o 9 8 obj-y += strksyms.o
+6
arch/csky/abiv1/inc/abi/ckmmu.h
··· 78 78 cpwcr("cpcr8", 0x04000000); 79 79 } 80 80 81 + 82 + static inline void local_tlb_invalid_all(void) 83 + { 84 + tlb_invalid_all(); 85 + } 86 + 81 87 static inline void tlb_invalid_indexed(void) 82 88 { 83 89 cpwcr("cpcr8", 0x02000000);
-3
arch/csky/abiv1/inc/abi/string.h
··· 7 7 #define __HAVE_ARCH_MEMCPY 8 8 extern void *memcpy(void *, const void *, __kernel_size_t); 9 9 10 - #define __HAVE_ARCH_MEMSET 11 - extern void *memset(void *, int, __kernel_size_t); 12 - 13 10 #endif /* __ABI_CSKY_STRING_H */
-37
arch/csky/abiv1/memset.c
··· 1 - // SPDX-License-Identifier: GPL-2.0 2 - // Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd. 3 - 4 - #include <linux/types.h> 5 - 6 - void *memset(void *dest, int c, size_t l) 7 - { 8 - char *d = dest; 9 - int ch = c & 0xff; 10 - int tmp = (ch | ch << 8 | ch << 16 | ch << 24); 11 - 12 - while (((uintptr_t)d & 0x3) && l--) 13 - *d++ = ch; 14 - 15 - while (l >= 16) { 16 - *(((u32 *)d)) = tmp; 17 - *(((u32 *)d)+1) = tmp; 18 - *(((u32 *)d)+2) = tmp; 19 - *(((u32 *)d)+3) = tmp; 20 - l -= 16; 21 - d += 16; 22 - } 23 - 24 - while (l > 3) { 25 - *(((u32 *)d)) = tmp; 26 - l -= 4; 27 - d += 4; 28 - } 29 - 30 - while (l) { 31 - *d = ch; 32 - l--; 33 - d++; 34 - } 35 - 36 - return dest; 37 - }
-1
arch/csky/abiv1/strksyms.c
··· 4 4 #include <linux/module.h> 5 5 6 6 EXPORT_SYMBOL(memcpy); 7 - EXPORT_SYMBOL(memset);
+10
arch/csky/abiv2/inc/abi/ckmmu.h
··· 85 85 #endif 86 86 } 87 87 88 + static inline void local_tlb_invalid_all(void) 89 + { 90 + #ifdef CONFIG_CPU_HAS_TLBI 91 + asm volatile("tlbi.all\n":::"memory"); 92 + sync_is(); 93 + #else 94 + tlb_invalid_all(); 95 + #endif 96 + } 97 + 88 98 static inline void tlb_invalid_indexed(void) 89 99 { 90 100 mtcr("cr<8, 15>", 0x02000000);
+78
arch/csky/include/asm/asid.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + #ifndef __ASM_ASM_ASID_H 3 + #define __ASM_ASM_ASID_H 4 + 5 + #include <linux/atomic.h> 6 + #include <linux/compiler.h> 7 + #include <linux/cpumask.h> 8 + #include <linux/percpu.h> 9 + #include <linux/spinlock.h> 10 + 11 + struct asid_info 12 + { 13 + atomic64_t generation; 14 + unsigned long *map; 15 + atomic64_t __percpu *active; 16 + u64 __percpu *reserved; 17 + u32 bits; 18 + /* Lock protecting the structure */ 19 + raw_spinlock_t lock; 20 + /* Which CPU requires context flush on next call */ 21 + cpumask_t flush_pending; 22 + /* Number of ASID allocated by context (shift value) */ 23 + unsigned int ctxt_shift; 24 + /* Callback to locally flush the context. */ 25 + void (*flush_cpu_ctxt_cb)(void); 26 + }; 27 + 28 + #define NUM_ASIDS(info) (1UL << ((info)->bits)) 29 + #define NUM_CTXT_ASIDS(info) (NUM_ASIDS(info) >> (info)->ctxt_shift) 30 + 31 + #define active_asid(info, cpu) *per_cpu_ptr((info)->active, cpu) 32 + 33 + void asid_new_context(struct asid_info *info, atomic64_t *pasid, 34 + unsigned int cpu, struct mm_struct *mm); 35 + 36 + /* 37 + * Check the ASID is still valid for the context. If not generate a new ASID. 38 + * 39 + * @pasid: Pointer to the current ASID batch 40 + * @cpu: current CPU ID. Must have been acquired throught get_cpu() 41 + */ 42 + static inline void asid_check_context(struct asid_info *info, 43 + atomic64_t *pasid, unsigned int cpu, 44 + struct mm_struct *mm) 45 + { 46 + u64 asid, old_active_asid; 47 + 48 + asid = atomic64_read(pasid); 49 + 50 + /* 51 + * The memory ordering here is subtle. 52 + * If our active_asid is non-zero and the ASID matches the current 53 + * generation, then we update the active_asid entry with a relaxed 54 + * cmpxchg. Racing with a concurrent rollover means that either: 55 + * 56 + * - We get a zero back from the cmpxchg and end up waiting on the 57 + * lock. Taking the lock synchronises with the rollover and so 58 + * we are forced to see the updated generation. 59 + * 60 + * - We get a valid ASID back from the cmpxchg, which means the 61 + * relaxed xchg in flush_context will treat us as reserved 62 + * because atomic RmWs are totally ordered for a given location. 63 + */ 64 + old_active_asid = atomic64_read(&active_asid(info, cpu)); 65 + if (old_active_asid && 66 + !((asid ^ atomic64_read(&info->generation)) >> info->bits) && 67 + atomic64_cmpxchg_relaxed(&active_asid(info, cpu), 68 + old_active_asid, asid)) 69 + return; 70 + 71 + asid_new_context(info, pasid, cpu, mm); 72 + } 73 + 74 + int asid_allocator_init(struct asid_info *info, 75 + u32 bits, unsigned int asid_per_ctxt, 76 + void (*flush_cpu_ctxt_cb)(void)); 77 + 78 + #endif
+1 -1
arch/csky/include/asm/mmu.h
··· 5 5 #define __ASM_CSKY_MMU_H 6 6 7 7 typedef struct { 8 - unsigned long asid[NR_CPUS]; 8 + atomic64_t asid; 9 9 void *vdso; 10 10 } mm_context_t; 11 11
+13 -103
arch/csky/include/asm/mmu_context.h
··· 16 16 17 17 #define TLBMISS_HANDLER_SETUP_PGD(pgd) \ 18 18 setup_pgd(__pa(pgd), false) 19 + 19 20 #define TLBMISS_HANDLER_SETUP_PGD_KERNEL(pgd) \ 20 21 setup_pgd(__pa(pgd), true) 21 22 22 - #define cpu_context(cpu, mm) ((mm)->context.asid[cpu]) 23 - #define cpu_asid(cpu, mm) (cpu_context((cpu), (mm)) & ASID_MASK) 24 - #define asid_cache(cpu) (cpu_data[cpu].asid_cache) 23 + #define ASID_MASK ((1 << CONFIG_CPU_ASID_BITS) - 1) 24 + #define cpu_asid(mm) (atomic64_read(&mm->context.asid) & ASID_MASK) 25 25 26 - #define ASID_FIRST_VERSION (1 << CONFIG_CPU_ASID_BITS) 27 - #define ASID_INC 0x1 28 - #define ASID_MASK (ASID_FIRST_VERSION - 1) 29 - #define ASID_VERSION_MASK ~ASID_MASK 26 + #define init_new_context(tsk,mm) ({ atomic64_set(&(mm)->context.asid, 0); 0; }) 27 + #define activate_mm(prev,next) switch_mm(prev, next, current) 30 28 31 29 #define destroy_context(mm) do {} while (0) 32 30 #define enter_lazy_tlb(mm, tsk) do {} while (0) 33 31 #define deactivate_mm(tsk, mm) do {} while (0) 34 32 35 - /* 36 - * All unused by hardware upper bits will be considered 37 - * as a software asid extension. 38 - */ 33 + void check_and_switch_context(struct mm_struct *mm, unsigned int cpu); 34 + 39 35 static inline void 40 - get_new_mmu_context(struct mm_struct *mm, unsigned long cpu) 41 - { 42 - unsigned long asid = asid_cache(cpu); 43 - 44 - asid += ASID_INC; 45 - if (!(asid & ASID_MASK)) { 46 - flush_tlb_all(); /* start new asid cycle */ 47 - if (!asid) /* fix version if needed */ 48 - asid = ASID_FIRST_VERSION; 49 - } 50 - cpu_context(cpu, mm) = asid_cache(cpu) = asid; 51 - } 52 - 53 - /* 54 - * Initialize the context related info for a new mm_struct 55 - * instance. 56 - */ 57 - static inline int 58 - init_new_context(struct task_struct *tsk, struct mm_struct *mm) 59 - { 60 - int i; 61 - 62 - for_each_online_cpu(i) 63 - cpu_context(i, mm) = 0; 64 - return 0; 65 - } 66 - 67 - static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, 68 - struct task_struct *tsk) 36 + switch_mm(struct mm_struct *prev, struct mm_struct *next, 37 + struct task_struct *tsk) 69 38 { 70 39 unsigned int cpu = smp_processor_id(); 71 - unsigned long flags; 72 40 73 - local_irq_save(flags); 74 - /* Check if our ASID is of an older version and thus invalid */ 75 - if ((cpu_context(cpu, next) ^ asid_cache(cpu)) & ASID_VERSION_MASK) 76 - get_new_mmu_context(next, cpu); 77 - write_mmu_entryhi(cpu_asid(cpu, next)); 41 + if (prev != next) 42 + check_and_switch_context(next, cpu); 43 + 78 44 TLBMISS_HANDLER_SETUP_PGD(next->pgd); 79 - 80 - /* 81 - * Mark current->active_mm as not "active" anymore. 82 - * We don't want to mislead possible IPI tlb flush routines. 83 - */ 84 - cpumask_clear_cpu(cpu, mm_cpumask(prev)); 85 - cpumask_set_cpu(cpu, mm_cpumask(next)); 86 - 87 - local_irq_restore(flags); 45 + write_mmu_entryhi(next->context.asid.counter); 88 46 } 89 - 90 - /* 91 - * After we have set current->mm to a new value, this activates 92 - * the context for the new mm so we see the new mappings. 93 - */ 94 - static inline void 95 - activate_mm(struct mm_struct *prev, struct mm_struct *next) 96 - { 97 - unsigned long flags; 98 - int cpu = smp_processor_id(); 99 - 100 - local_irq_save(flags); 101 - 102 - /* Unconditionally get a new ASID. */ 103 - get_new_mmu_context(next, cpu); 104 - 105 - write_mmu_entryhi(cpu_asid(cpu, next)); 106 - TLBMISS_HANDLER_SETUP_PGD(next->pgd); 107 - 108 - /* mark mmu ownership change */ 109 - cpumask_clear_cpu(cpu, mm_cpumask(prev)); 110 - cpumask_set_cpu(cpu, mm_cpumask(next)); 111 - 112 - local_irq_restore(flags); 113 - } 114 - 115 - /* 116 - * If mm is currently active_mm, we can't really drop it. Instead, 117 - * we will get a new one for it. 118 - */ 119 - static inline void 120 - drop_mmu_context(struct mm_struct *mm, unsigned int cpu) 121 - { 122 - unsigned long flags; 123 - 124 - local_irq_save(flags); 125 - 126 - if (cpumask_test_cpu(cpu, mm_cpumask(mm))) { 127 - get_new_mmu_context(mm, cpu); 128 - write_mmu_entryhi(cpu_asid(cpu, mm)); 129 - } else { 130 - /* will get a new context next time */ 131 - cpu_context(cpu, mm) = 0; 132 - } 133 - 134 - local_irq_restore(flags); 135 - } 136 - 137 47 #endif /* __ASM_CSKY_MMU_CONTEXT_H */
-2
arch/csky/include/asm/pgtable.h
··· 290 290 extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; 291 291 extern void paging_init(void); 292 292 293 - extern void show_jtlb_table(void); 294 - 295 293 void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, 296 294 pte_t *pte); 297 295
+390 -50
arch/csky/kernel/perf_event.c
··· 9 9 #include <linux/platform_device.h> 10 10 11 11 #define CSKY_PMU_MAX_EVENTS 32 12 + #define DEFAULT_COUNT_WIDTH 48 12 13 13 - #define HPCR "<0, 0x0>" /* PMU Control reg */ 14 - #define HPCNTENR "<0, 0x4>" /* Count Enable reg */ 14 + #define HPCR "<0, 0x0>" /* PMU Control reg */ 15 + #define HPSPR "<0, 0x1>" /* Start PC reg */ 16 + #define HPEPR "<0, 0x2>" /* End PC reg */ 17 + #define HPSIR "<0, 0x3>" /* Soft Counter reg */ 18 + #define HPCNTENR "<0, 0x4>" /* Count Enable reg */ 19 + #define HPINTENR "<0, 0x5>" /* Interrupt Enable reg */ 20 + #define HPOFSR "<0, 0x6>" /* Interrupt Status reg */ 21 + 22 + /* The events for a given PMU register set. */ 23 + struct pmu_hw_events { 24 + /* 25 + * The events that are active on the PMU for the given index. 26 + */ 27 + struct perf_event *events[CSKY_PMU_MAX_EVENTS]; 28 + 29 + /* 30 + * A 1 bit for an index indicates that the counter is being used for 31 + * an event. A 0 means that the counter can be used. 32 + */ 33 + unsigned long used_mask[BITS_TO_LONGS(CSKY_PMU_MAX_EVENTS)]; 34 + }; 15 35 16 36 static uint64_t (*hw_raw_read_mapping[CSKY_PMU_MAX_EVENTS])(void); 17 37 static void (*hw_raw_write_mapping[CSKY_PMU_MAX_EVENTS])(uint64_t val); 18 38 19 - struct csky_pmu_t { 20 - struct pmu pmu; 21 - uint32_t hpcr; 39 + static struct csky_pmu_t { 40 + struct pmu pmu; 41 + struct pmu_hw_events __percpu *hw_events; 42 + struct platform_device *plat_device; 43 + uint32_t count_width; 44 + uint32_t hpcr; 45 + u64 max_period; 22 46 } csky_pmu; 47 + static int csky_pmu_irq; 48 + 49 + #define to_csky_pmu(p) (container_of(p, struct csky_pmu, pmu)) 23 50 24 51 #define cprgr(reg) \ 25 52 ({ \ ··· 728 701 #define CACHE_OP_UNSUPPORTED 0xffff 729 702 static const int csky_pmu_cache_map[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { 730 703 [C(L1D)] = { 704 + #ifdef CONFIG_CPU_CK810 705 + [C(OP_READ)] = { 706 + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 707 + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 708 + }, 709 + [C(OP_WRITE)] = { 710 + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 711 + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 712 + }, 713 + [C(OP_PREFETCH)] = { 714 + [C(RESULT_ACCESS)] = 0x5, 715 + [C(RESULT_MISS)] = 0x6, 716 + }, 717 + #else 731 718 [C(OP_READ)] = { 732 719 [C(RESULT_ACCESS)] = 0x14, 733 720 [C(RESULT_MISS)] = 0x15, ··· 751 710 [C(RESULT_MISS)] = 0x17, 752 711 }, 753 712 [C(OP_PREFETCH)] = { 754 - [C(RESULT_ACCESS)] = 0x5, 755 - [C(RESULT_MISS)] = 0x6, 713 + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 714 + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 756 715 }, 716 + #endif 757 717 }, 758 718 [C(L1I)] = { 759 719 [C(OP_READ)] = { ··· 771 729 }, 772 730 }, 773 731 [C(LL)] = { 732 + #ifdef CONFIG_CPU_CK810 733 + [C(OP_READ)] = { 734 + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 735 + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 736 + }, 737 + [C(OP_WRITE)] = { 738 + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 739 + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 740 + }, 741 + [C(OP_PREFETCH)] = { 742 + [C(RESULT_ACCESS)] = 0x7, 743 + [C(RESULT_MISS)] = 0x8, 744 + }, 745 + #else 774 746 [C(OP_READ)] = { 775 747 [C(RESULT_ACCESS)] = 0x18, 776 748 [C(RESULT_MISS)] = 0x19, ··· 794 738 [C(RESULT_MISS)] = 0x1b, 795 739 }, 796 740 [C(OP_PREFETCH)] = { 797 - [C(RESULT_ACCESS)] = 0x7, 798 - [C(RESULT_MISS)] = 0x8, 741 + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 742 + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 799 743 }, 744 + #endif 800 745 }, 801 746 [C(DTLB)] = { 747 + #ifdef CONFIG_CPU_CK810 802 748 [C(OP_READ)] = { 803 - [C(RESULT_ACCESS)] = 0x5, 804 - [C(RESULT_MISS)] = 0xb, 749 + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 750 + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 805 751 }, 806 752 [C(OP_WRITE)] = { 807 753 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 808 754 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 809 755 }, 756 + #else 757 + [C(OP_READ)] = { 758 + [C(RESULT_ACCESS)] = 0x14, 759 + [C(RESULT_MISS)] = 0xb, 760 + }, 761 + [C(OP_WRITE)] = { 762 + [C(RESULT_ACCESS)] = 0x16, 763 + [C(RESULT_MISS)] = 0xb, 764 + }, 765 + #endif 810 766 [C(OP_PREFETCH)] = { 811 767 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 812 768 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 813 769 }, 814 770 }, 815 771 [C(ITLB)] = { 772 + #ifdef CONFIG_CPU_CK810 773 + [C(OP_READ)] = { 774 + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 775 + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, 776 + }, 777 + #else 816 778 [C(OP_READ)] = { 817 779 [C(RESULT_ACCESS)] = 0x3, 818 780 [C(RESULT_MISS)] = 0xa, 819 781 }, 782 + #endif 820 783 [C(OP_WRITE)] = { 821 784 [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, 822 785 [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, ··· 875 800 }, 876 801 }; 877 802 803 + int csky_pmu_event_set_period(struct perf_event *event) 804 + { 805 + struct hw_perf_event *hwc = &event->hw; 806 + s64 left = local64_read(&hwc->period_left); 807 + s64 period = hwc->sample_period; 808 + int ret = 0; 809 + 810 + if (unlikely(left <= -period)) { 811 + left = period; 812 + local64_set(&hwc->period_left, left); 813 + hwc->last_period = period; 814 + ret = 1; 815 + } 816 + 817 + if (unlikely(left <= 0)) { 818 + left += period; 819 + local64_set(&hwc->period_left, left); 820 + hwc->last_period = period; 821 + ret = 1; 822 + } 823 + 824 + if (left > (s64)csky_pmu.max_period) 825 + left = csky_pmu.max_period; 826 + 827 + /* 828 + * The hw event starts counting from this event offset, 829 + * mark it to be able to extract future "deltas": 830 + */ 831 + local64_set(&hwc->prev_count, (u64)(-left)); 832 + 833 + if (hw_raw_write_mapping[hwc->idx] != NULL) 834 + hw_raw_write_mapping[hwc->idx]((u64)(-left) & 835 + csky_pmu.max_period); 836 + 837 + cpwcr(HPOFSR, ~BIT(hwc->idx) & cprcr(HPOFSR)); 838 + 839 + perf_event_update_userpage(event); 840 + 841 + return ret; 842 + } 843 + 878 844 static void csky_perf_event_update(struct perf_event *event, 879 845 struct hw_perf_event *hwc) 880 846 { 881 847 uint64_t prev_raw_count = local64_read(&hwc->prev_count); 882 - uint64_t new_raw_count = hw_raw_read_mapping[hwc->idx](); 848 + /* 849 + * Sign extend count value to 64bit, otherwise delta calculation 850 + * would be incorrect when overflow occurs. 851 + */ 852 + uint64_t new_raw_count = sign_extend64( 853 + hw_raw_read_mapping[hwc->idx](), csky_pmu.count_width - 1); 883 854 int64_t delta = new_raw_count - prev_raw_count; 884 855 885 856 /* ··· 935 814 local64_set(&hwc->prev_count, new_raw_count); 936 815 local64_add(delta, &event->count); 937 816 local64_sub(delta, &hwc->period_left); 817 + } 818 + 819 + static void csky_pmu_reset(void *info) 820 + { 821 + cpwcr(HPCR, BIT(31) | BIT(30) | BIT(1)); 938 822 } 939 823 940 824 static void csky_pmu_read(struct perf_event *event) ··· 970 844 struct hw_perf_event *hwc = &event->hw; 971 845 int ret; 972 846 847 + switch (event->attr.type) { 848 + case PERF_TYPE_HARDWARE: 849 + if (event->attr.config >= PERF_COUNT_HW_MAX) 850 + return -ENOENT; 851 + ret = csky_pmu_hw_map[event->attr.config]; 852 + if (ret == HW_OP_UNSUPPORTED) 853 + return -ENOENT; 854 + hwc->idx = ret; 855 + break; 856 + case PERF_TYPE_HW_CACHE: 857 + ret = csky_pmu_cache_event(event->attr.config); 858 + if (ret == CACHE_OP_UNSUPPORTED) 859 + return -ENOENT; 860 + hwc->idx = ret; 861 + break; 862 + case PERF_TYPE_RAW: 863 + if (hw_raw_read_mapping[event->attr.config] == NULL) 864 + return -ENOENT; 865 + hwc->idx = event->attr.config; 866 + break; 867 + default: 868 + return -ENOENT; 869 + } 870 + 973 871 if (event->attr.exclude_user) 974 872 csky_pmu.hpcr = BIT(2); 975 873 else if (event->attr.exclude_kernel) ··· 1003 853 1004 854 csky_pmu.hpcr |= BIT(1) | BIT(0); 1005 855 1006 - switch (event->attr.type) { 1007 - case PERF_TYPE_HARDWARE: 1008 - if (event->attr.config >= PERF_COUNT_HW_MAX) 1009 - return -ENOENT; 1010 - ret = csky_pmu_hw_map[event->attr.config]; 1011 - if (ret == HW_OP_UNSUPPORTED) 1012 - return -ENOENT; 1013 - hwc->idx = ret; 1014 - return 0; 1015 - case PERF_TYPE_HW_CACHE: 1016 - ret = csky_pmu_cache_event(event->attr.config); 1017 - if (ret == CACHE_OP_UNSUPPORTED) 1018 - return -ENOENT; 1019 - hwc->idx = ret; 1020 - return 0; 1021 - case PERF_TYPE_RAW: 1022 - if (hw_raw_read_mapping[event->attr.config] == NULL) 1023 - return -ENOENT; 1024 - hwc->idx = event->attr.config; 1025 - return 0; 1026 - default: 1027 - return -ENOENT; 1028 - } 856 + return 0; 1029 857 } 1030 858 1031 859 /* starts all counters */ ··· 1020 892 1021 893 static void csky_pmu_start(struct perf_event *event, int flags) 1022 894 { 895 + unsigned long flg; 1023 896 struct hw_perf_event *hwc = &event->hw; 1024 897 int idx = hwc->idx; 1025 898 ··· 1032 903 1033 904 hwc->state = 0; 1034 905 906 + csky_pmu_event_set_period(event); 907 + 908 + local_irq_save(flg); 909 + 910 + cpwcr(HPINTENR, BIT(idx) | cprcr(HPINTENR)); 1035 911 cpwcr(HPCNTENR, BIT(idx) | cprcr(HPCNTENR)); 912 + 913 + local_irq_restore(flg); 914 + } 915 + 916 + static void csky_pmu_stop_event(struct perf_event *event) 917 + { 918 + unsigned long flg; 919 + struct hw_perf_event *hwc = &event->hw; 920 + int idx = hwc->idx; 921 + 922 + local_irq_save(flg); 923 + 924 + cpwcr(HPINTENR, ~BIT(idx) & cprcr(HPINTENR)); 925 + cpwcr(HPCNTENR, ~BIT(idx) & cprcr(HPCNTENR)); 926 + 927 + local_irq_restore(flg); 1036 928 } 1037 929 1038 930 static void csky_pmu_stop(struct perf_event *event, int flags) 1039 931 { 1040 - struct hw_perf_event *hwc = &event->hw; 1041 - int idx = hwc->idx; 1042 - 1043 932 if (!(event->hw.state & PERF_HES_STOPPED)) { 1044 - cpwcr(HPCNTENR, ~BIT(idx) & cprcr(HPCNTENR)); 933 + csky_pmu_stop_event(event); 1045 934 event->hw.state |= PERF_HES_STOPPED; 1046 935 } 1047 936 ··· 1072 925 1073 926 static void csky_pmu_del(struct perf_event *event, int flags) 1074 927 { 928 + struct pmu_hw_events *hw_events = this_cpu_ptr(csky_pmu.hw_events); 929 + struct hw_perf_event *hwc = &event->hw; 930 + 1075 931 csky_pmu_stop(event, PERF_EF_UPDATE); 932 + 933 + hw_events->events[hwc->idx] = NULL; 1076 934 1077 935 perf_event_update_userpage(event); 1078 936 } ··· 1085 933 /* allocate hardware counter and optionally start counting */ 1086 934 static int csky_pmu_add(struct perf_event *event, int flags) 1087 935 { 936 + struct pmu_hw_events *hw_events = this_cpu_ptr(csky_pmu.hw_events); 1088 937 struct hw_perf_event *hwc = &event->hw; 1089 938 1090 - local64_set(&hwc->prev_count, 0); 1091 - 1092 - if (hw_raw_write_mapping[hwc->idx] != NULL) 1093 - hw_raw_write_mapping[hwc->idx](0); 939 + hw_events->events[hwc->idx] = event; 1094 940 1095 941 hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED; 942 + 1096 943 if (flags & PERF_EF_START) 1097 944 csky_pmu_start(event, PERF_EF_RELOAD); 1098 945 ··· 1100 949 return 0; 1101 950 } 1102 951 1103 - int __init init_hw_perf_events(void) 952 + static irqreturn_t csky_pmu_handle_irq(int irq_num, void *dev) 1104 953 { 954 + struct perf_sample_data data; 955 + struct pmu_hw_events *cpuc = this_cpu_ptr(csky_pmu.hw_events); 956 + struct pt_regs *regs; 957 + int idx; 958 + 959 + /* 960 + * Did an overflow occur? 961 + */ 962 + if (!cprcr(HPOFSR)) 963 + return IRQ_NONE; 964 + 965 + /* 966 + * Handle the counter(s) overflow(s) 967 + */ 968 + regs = get_irq_regs(); 969 + 970 + csky_pmu_disable(&csky_pmu.pmu); 971 + 972 + for (idx = 0; idx < CSKY_PMU_MAX_EVENTS; ++idx) { 973 + struct perf_event *event = cpuc->events[idx]; 974 + struct hw_perf_event *hwc; 975 + 976 + /* Ignore if we don't have an event. */ 977 + if (!event) 978 + continue; 979 + /* 980 + * We have a single interrupt for all counters. Check that 981 + * each counter has overflowed before we process it. 982 + */ 983 + if (!(cprcr(HPOFSR) & BIT(idx))) 984 + continue; 985 + 986 + hwc = &event->hw; 987 + csky_perf_event_update(event, &event->hw); 988 + perf_sample_data_init(&data, 0, hwc->last_period); 989 + csky_pmu_event_set_period(event); 990 + 991 + if (perf_event_overflow(event, &data, regs)) 992 + csky_pmu_stop_event(event); 993 + } 994 + 995 + csky_pmu_enable(&csky_pmu.pmu); 996 + 997 + /* 998 + * Handle the pending perf events. 999 + * 1000 + * Note: this call *must* be run with interrupts disabled. For 1001 + * platforms that can have the PMU interrupts raised as an NMI, this 1002 + * will not work. 1003 + */ 1004 + irq_work_run(); 1005 + 1006 + return IRQ_HANDLED; 1007 + } 1008 + 1009 + static int csky_pmu_request_irq(irq_handler_t handler) 1010 + { 1011 + int err, irqs; 1012 + struct platform_device *pmu_device = csky_pmu.plat_device; 1013 + 1014 + if (!pmu_device) 1015 + return -ENODEV; 1016 + 1017 + irqs = min(pmu_device->num_resources, num_possible_cpus()); 1018 + if (irqs < 1) { 1019 + pr_err("no irqs for PMUs defined\n"); 1020 + return -ENODEV; 1021 + } 1022 + 1023 + csky_pmu_irq = platform_get_irq(pmu_device, 0); 1024 + if (csky_pmu_irq < 0) 1025 + return -ENODEV; 1026 + err = request_percpu_irq(csky_pmu_irq, handler, "csky-pmu", 1027 + this_cpu_ptr(csky_pmu.hw_events)); 1028 + if (err) { 1029 + pr_err("unable to request IRQ%d for CSKY PMU counters\n", 1030 + csky_pmu_irq); 1031 + return err; 1032 + } 1033 + 1034 + return 0; 1035 + } 1036 + 1037 + static void csky_pmu_free_irq(void) 1038 + { 1039 + int irq; 1040 + struct platform_device *pmu_device = csky_pmu.plat_device; 1041 + 1042 + irq = platform_get_irq(pmu_device, 0); 1043 + if (irq >= 0) 1044 + free_percpu_irq(irq, this_cpu_ptr(csky_pmu.hw_events)); 1045 + } 1046 + 1047 + int init_hw_perf_events(void) 1048 + { 1049 + csky_pmu.hw_events = alloc_percpu_gfp(struct pmu_hw_events, 1050 + GFP_KERNEL); 1051 + if (!csky_pmu.hw_events) { 1052 + pr_info("failed to allocate per-cpu PMU data.\n"); 1053 + return -ENOMEM; 1054 + } 1055 + 1105 1056 csky_pmu.pmu = (struct pmu) { 1106 1057 .pmu_enable = csky_pmu_enable, 1107 1058 .pmu_disable = csky_pmu_disable, ··· 1275 1022 hw_raw_write_mapping[0x1a] = csky_pmu_write_l2wac; 1276 1023 hw_raw_write_mapping[0x1b] = csky_pmu_write_l2wmc; 1277 1024 1278 - csky_pmu.pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT; 1279 - 1280 - cpwcr(HPCR, BIT(31) | BIT(30) | BIT(1)); 1281 - 1282 - return perf_pmu_register(&csky_pmu.pmu, "cpu", PERF_TYPE_RAW); 1025 + return 0; 1283 1026 } 1284 - arch_initcall(init_hw_perf_events); 1027 + 1028 + static int csky_pmu_starting_cpu(unsigned int cpu) 1029 + { 1030 + enable_percpu_irq(csky_pmu_irq, 0); 1031 + return 0; 1032 + } 1033 + 1034 + static int csky_pmu_dying_cpu(unsigned int cpu) 1035 + { 1036 + disable_percpu_irq(csky_pmu_irq); 1037 + return 0; 1038 + } 1039 + 1040 + int csky_pmu_device_probe(struct platform_device *pdev, 1041 + const struct of_device_id *of_table) 1042 + { 1043 + struct device_node *node = pdev->dev.of_node; 1044 + int ret; 1045 + 1046 + ret = init_hw_perf_events(); 1047 + if (ret) { 1048 + pr_notice("[perf] failed to probe PMU!\n"); 1049 + return ret; 1050 + } 1051 + 1052 + if (of_property_read_u32(node, "count-width", 1053 + &csky_pmu.count_width)) { 1054 + csky_pmu.count_width = DEFAULT_COUNT_WIDTH; 1055 + } 1056 + csky_pmu.max_period = BIT(csky_pmu.count_width) - 1; 1057 + 1058 + csky_pmu.plat_device = pdev; 1059 + 1060 + /* Ensure the PMU has sane values out of reset. */ 1061 + on_each_cpu(csky_pmu_reset, &csky_pmu, 1); 1062 + 1063 + ret = csky_pmu_request_irq(csky_pmu_handle_irq); 1064 + if (ret) { 1065 + csky_pmu.pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT; 1066 + pr_notice("[perf] PMU request irq fail!\n"); 1067 + } 1068 + 1069 + ret = cpuhp_setup_state(CPUHP_AP_PERF_ONLINE, "AP_PERF_ONLINE", 1070 + csky_pmu_starting_cpu, 1071 + csky_pmu_dying_cpu); 1072 + if (ret) { 1073 + csky_pmu_free_irq(); 1074 + free_percpu(csky_pmu.hw_events); 1075 + return ret; 1076 + } 1077 + 1078 + ret = perf_pmu_register(&csky_pmu.pmu, "cpu", PERF_TYPE_RAW); 1079 + if (ret) { 1080 + csky_pmu_free_irq(); 1081 + free_percpu(csky_pmu.hw_events); 1082 + } 1083 + 1084 + return ret; 1085 + } 1086 + 1087 + const static struct of_device_id csky_pmu_of_device_ids[] = { 1088 + {.compatible = "csky,csky-pmu"}, 1089 + {}, 1090 + }; 1091 + 1092 + static int csky_pmu_dev_probe(struct platform_device *pdev) 1093 + { 1094 + return csky_pmu_device_probe(pdev, csky_pmu_of_device_ids); 1095 + } 1096 + 1097 + static struct platform_driver csky_pmu_driver = { 1098 + .driver = { 1099 + .name = "csky-pmu", 1100 + .of_match_table = csky_pmu_of_device_ids, 1101 + }, 1102 + .probe = csky_pmu_dev_probe, 1103 + }; 1104 + 1105 + static int __init csky_pmu_probe(void) 1106 + { 1107 + int ret; 1108 + 1109 + ret = platform_driver_register(&csky_pmu_driver); 1110 + if (ret) 1111 + pr_notice("[perf] PMU initialization failed\n"); 1112 + else 1113 + pr_notice("[perf] PMU initialization done\n"); 1114 + 1115 + return ret; 1116 + } 1117 + 1118 + device_initcall(csky_pmu_probe);
-2
arch/csky/kernel/smp.c
··· 212 212 TLBMISS_HANDLER_SETUP_PGD(swapper_pg_dir); 213 213 TLBMISS_HANDLER_SETUP_PGD_KERNEL(swapper_pg_dir); 214 214 215 - asid_cache(smp_processor_id()) = ASID_FIRST_VERSION; 216 - 217 215 #ifdef CONFIG_CPU_HAS_FPU 218 216 init_fpu(); 219 217 #endif
+5
arch/csky/kernel/traps.c
··· 120 120 121 121 switch (vector) { 122 122 case VEC_ZERODIV: 123 + die_if_kernel("Kernel mode ZERO DIV", regs, vector); 123 124 sig = SIGFPE; 124 125 break; 125 126 /* ptrace */ ··· 129 128 sig = SIGTRAP; 130 129 break; 131 130 case VEC_ILLEGAL: 131 + die_if_kernel("Kernel mode ILLEGAL", regs, vector); 132 132 #ifndef CONFIG_CPU_NO_USER_BKPT 133 133 if (*(uint16_t *)instruction_pointer(regs) != USR_BKPT) 134 134 #endif ··· 141 139 case VEC_TRAP1: 142 140 /* jtagserver breakpoint */ 143 141 case VEC_BREAKPOINT: 142 + die_if_kernel("Kernel mode BKPT", regs, vector); 144 143 info.si_code = TRAP_BRKPT; 145 144 sig = SIGTRAP; 146 145 break; ··· 153 150 #endif 154 151 #ifdef CONFIG_CPU_HAS_FPU 155 152 case VEC_FPE: 153 + die_if_kernel("Kernel mode FPE", regs, vector); 156 154 return fpu_fpe(regs); 157 155 case VEC_PRIV: 156 + die_if_kernel("Kernel mode PRIV", regs, vector); 158 157 if (fpu_libc_helper(regs)) 159 158 return; 160 159 #endif
+2
arch/csky/mm/Makefile
··· 12 12 obj-y += ioremap.o 13 13 obj-y += syscache.o 14 14 obj-y += tlb.o 15 + obj-y += asid.o 16 + obj-y += context.o
+189
arch/csky/mm/asid.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* 3 + * Generic ASID allocator. 4 + * 5 + * Based on arch/arm/mm/context.c 6 + * 7 + * Copyright (C) 2002-2003 Deep Blue Solutions Ltd, all rights reserved. 8 + * Copyright (C) 2012 ARM Ltd. 9 + */ 10 + 11 + #include <linux/slab.h> 12 + #include <linux/mm_types.h> 13 + 14 + #include <asm/asid.h> 15 + 16 + #define reserved_asid(info, cpu) *per_cpu_ptr((info)->reserved, cpu) 17 + 18 + #define ASID_MASK(info) (~GENMASK((info)->bits - 1, 0)) 19 + #define ASID_FIRST_VERSION(info) (1UL << ((info)->bits)) 20 + 21 + #define asid2idx(info, asid) (((asid) & ~ASID_MASK(info)) >> (info)->ctxt_shift) 22 + #define idx2asid(info, idx) (((idx) << (info)->ctxt_shift) & ~ASID_MASK(info)) 23 + 24 + static void flush_context(struct asid_info *info) 25 + { 26 + int i; 27 + u64 asid; 28 + 29 + /* Update the list of reserved ASIDs and the ASID bitmap. */ 30 + bitmap_clear(info->map, 0, NUM_CTXT_ASIDS(info)); 31 + 32 + for_each_possible_cpu(i) { 33 + asid = atomic64_xchg_relaxed(&active_asid(info, i), 0); 34 + /* 35 + * If this CPU has already been through a 36 + * rollover, but hasn't run another task in 37 + * the meantime, we must preserve its reserved 38 + * ASID, as this is the only trace we have of 39 + * the process it is still running. 40 + */ 41 + if (asid == 0) 42 + asid = reserved_asid(info, i); 43 + __set_bit(asid2idx(info, asid), info->map); 44 + reserved_asid(info, i) = asid; 45 + } 46 + 47 + /* 48 + * Queue a TLB invalidation for each CPU to perform on next 49 + * context-switch 50 + */ 51 + cpumask_setall(&info->flush_pending); 52 + } 53 + 54 + static bool check_update_reserved_asid(struct asid_info *info, u64 asid, 55 + u64 newasid) 56 + { 57 + int cpu; 58 + bool hit = false; 59 + 60 + /* 61 + * Iterate over the set of reserved ASIDs looking for a match. 62 + * If we find one, then we can update our mm to use newasid 63 + * (i.e. the same ASID in the current generation) but we can't 64 + * exit the loop early, since we need to ensure that all copies 65 + * of the old ASID are updated to reflect the mm. Failure to do 66 + * so could result in us missing the reserved ASID in a future 67 + * generation. 68 + */ 69 + for_each_possible_cpu(cpu) { 70 + if (reserved_asid(info, cpu) == asid) { 71 + hit = true; 72 + reserved_asid(info, cpu) = newasid; 73 + } 74 + } 75 + 76 + return hit; 77 + } 78 + 79 + static u64 new_context(struct asid_info *info, atomic64_t *pasid, 80 + struct mm_struct *mm) 81 + { 82 + static u32 cur_idx = 1; 83 + u64 asid = atomic64_read(pasid); 84 + u64 generation = atomic64_read(&info->generation); 85 + 86 + if (asid != 0) { 87 + u64 newasid = generation | (asid & ~ASID_MASK(info)); 88 + 89 + /* 90 + * If our current ASID was active during a rollover, we 91 + * can continue to use it and this was just a false alarm. 92 + */ 93 + if (check_update_reserved_asid(info, asid, newasid)) 94 + return newasid; 95 + 96 + /* 97 + * We had a valid ASID in a previous life, so try to re-use 98 + * it if possible. 99 + */ 100 + if (!__test_and_set_bit(asid2idx(info, asid), info->map)) 101 + return newasid; 102 + } 103 + 104 + /* 105 + * Allocate a free ASID. If we can't find one, take a note of the 106 + * currently active ASIDs and mark the TLBs as requiring flushes. We 107 + * always count from ASID #2 (index 1), as we use ASID #0 when setting 108 + * a reserved TTBR0 for the init_mm and we allocate ASIDs in even/odd 109 + * pairs. 110 + */ 111 + asid = find_next_zero_bit(info->map, NUM_CTXT_ASIDS(info), cur_idx); 112 + if (asid != NUM_CTXT_ASIDS(info)) 113 + goto set_asid; 114 + 115 + /* We're out of ASIDs, so increment the global generation count */ 116 + generation = atomic64_add_return_relaxed(ASID_FIRST_VERSION(info), 117 + &info->generation); 118 + flush_context(info); 119 + 120 + /* We have more ASIDs than CPUs, so this will always succeed */ 121 + asid = find_next_zero_bit(info->map, NUM_CTXT_ASIDS(info), 1); 122 + 123 + set_asid: 124 + __set_bit(asid, info->map); 125 + cur_idx = asid; 126 + cpumask_clear(mm_cpumask(mm)); 127 + return idx2asid(info, asid) | generation; 128 + } 129 + 130 + /* 131 + * Generate a new ASID for the context. 132 + * 133 + * @pasid: Pointer to the current ASID batch allocated. It will be updated 134 + * with the new ASID batch. 135 + * @cpu: current CPU ID. Must have been acquired through get_cpu() 136 + */ 137 + void asid_new_context(struct asid_info *info, atomic64_t *pasid, 138 + unsigned int cpu, struct mm_struct *mm) 139 + { 140 + unsigned long flags; 141 + u64 asid; 142 + 143 + raw_spin_lock_irqsave(&info->lock, flags); 144 + /* Check that our ASID belongs to the current generation. */ 145 + asid = atomic64_read(pasid); 146 + if ((asid ^ atomic64_read(&info->generation)) >> info->bits) { 147 + asid = new_context(info, pasid, mm); 148 + atomic64_set(pasid, asid); 149 + } 150 + 151 + if (cpumask_test_and_clear_cpu(cpu, &info->flush_pending)) 152 + info->flush_cpu_ctxt_cb(); 153 + 154 + atomic64_set(&active_asid(info, cpu), asid); 155 + cpumask_set_cpu(cpu, mm_cpumask(mm)); 156 + raw_spin_unlock_irqrestore(&info->lock, flags); 157 + } 158 + 159 + /* 160 + * Initialize the ASID allocator 161 + * 162 + * @info: Pointer to the asid allocator structure 163 + * @bits: Number of ASIDs available 164 + * @asid_per_ctxt: Number of ASIDs to allocate per-context. ASIDs are 165 + * allocated contiguously for a given context. This value should be a power of 166 + * 2. 167 + */ 168 + int asid_allocator_init(struct asid_info *info, 169 + u32 bits, unsigned int asid_per_ctxt, 170 + void (*flush_cpu_ctxt_cb)(void)) 171 + { 172 + info->bits = bits; 173 + info->ctxt_shift = ilog2(asid_per_ctxt); 174 + info->flush_cpu_ctxt_cb = flush_cpu_ctxt_cb; 175 + /* 176 + * Expect allocation after rollover to fail if we don't have at least 177 + * one more ASID than CPUs. ASID #0 is always reserved. 178 + */ 179 + WARN_ON(NUM_CTXT_ASIDS(info) - 1 <= num_possible_cpus()); 180 + atomic64_set(&info->generation, ASID_FIRST_VERSION(info)); 181 + info->map = kcalloc(BITS_TO_LONGS(NUM_CTXT_ASIDS(info)), 182 + sizeof(*info->map), GFP_KERNEL); 183 + if (!info->map) 184 + return -ENOMEM; 185 + 186 + raw_spin_lock_init(&info->lock); 187 + 188 + return 0; 189 + }
+46
arch/csky/mm/context.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + // Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd. 3 + 4 + #include <linux/bitops.h> 5 + #include <linux/sched.h> 6 + #include <linux/slab.h> 7 + #include <linux/mm.h> 8 + 9 + #include <asm/asid.h> 10 + #include <asm/mmu_context.h> 11 + #include <asm/smp.h> 12 + #include <asm/tlbflush.h> 13 + 14 + static DEFINE_PER_CPU(atomic64_t, active_asids); 15 + static DEFINE_PER_CPU(u64, reserved_asids); 16 + 17 + struct asid_info asid_info; 18 + 19 + void check_and_switch_context(struct mm_struct *mm, unsigned int cpu) 20 + { 21 + asid_check_context(&asid_info, &mm->context.asid, cpu, mm); 22 + } 23 + 24 + static void asid_flush_cpu_ctxt(void) 25 + { 26 + local_tlb_invalid_all(); 27 + } 28 + 29 + static int asids_init(void) 30 + { 31 + BUG_ON(((1 << CONFIG_CPU_ASID_BITS) - 1) <= num_possible_cpus()); 32 + 33 + if (asid_allocator_init(&asid_info, CONFIG_CPU_ASID_BITS, 1, 34 + asid_flush_cpu_ctxt)) 35 + panic("Unable to initialize ASID allocator for %lu ASIDs\n", 36 + NUM_ASIDS(&asid_info)); 37 + 38 + asid_info.active = &active_asids; 39 + asid_info.reserved = &reserved_asids; 40 + 41 + pr_info("ASID allocator initialised with %lu entries\n", 42 + NUM_CTXT_ASIDS(&asid_info)); 43 + 44 + return 0; 45 + } 46 + early_initcall(asids_init);
-2
arch/csky/mm/init.c
··· 114 114 TLBMISS_HANDLER_SETUP_PGD(swapper_pg_dir); 115 115 TLBMISS_HANDLER_SETUP_PGD_KERNEL(swapper_pg_dir); 116 116 117 - asid_cache(smp_processor_id()) = ASID_FIRST_VERSION; 118 - 119 117 /* Setup page mask to 4k */ 120 118 write_mmu_pagemask(0); 121 119 }
+108 -156
arch/csky/mm/tlb.c
··· 10 10 #include <asm/pgtable.h> 11 11 #include <asm/setup.h> 12 12 13 - #define CSKY_TLB_SIZE CONFIG_CPU_TLB_SIZE 13 + /* 14 + * One C-SKY MMU TLB entry contain two PFN/page entry, ie: 15 + * 1VPN -> 2PFN 16 + */ 17 + #define TLB_ENTRY_SIZE (PAGE_SIZE * 2) 18 + #define TLB_ENTRY_SIZE_MASK (PAGE_MASK << 1) 14 19 15 20 void flush_tlb_all(void) 16 21 { ··· 24 19 25 20 void flush_tlb_mm(struct mm_struct *mm) 26 21 { 27 - int cpu = smp_processor_id(); 28 - 29 - if (cpu_context(cpu, mm) != 0) 30 - drop_mmu_context(mm, cpu); 31 - 22 + #ifdef CONFIG_CPU_HAS_TLBI 23 + asm volatile("tlbi.asids %0"::"r"(cpu_asid(mm))); 24 + #else 32 25 tlb_invalid_all(); 26 + #endif 33 27 } 34 28 29 + /* 30 + * MMU operation regs only could invalid tlb entry in jtlb and we 31 + * need change asid field to invalid I-utlb & D-utlb. 32 + */ 33 + #ifndef CONFIG_CPU_HAS_TLBI 35 34 #define restore_asid_inv_utlb(oldpid, newpid) \ 36 35 do { \ 37 - if ((oldpid & ASID_MASK) == newpid) \ 36 + if (oldpid == newpid) \ 38 37 write_mmu_entryhi(oldpid + 1); \ 39 38 write_mmu_entryhi(oldpid); \ 40 39 } while (0) 40 + #endif 41 41 42 42 void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, 43 - unsigned long end) 43 + unsigned long end) 44 44 { 45 - struct mm_struct *mm = vma->vm_mm; 46 - int cpu = smp_processor_id(); 45 + unsigned long newpid = cpu_asid(vma->vm_mm); 47 46 48 - if (cpu_context(cpu, mm) != 0) { 49 - unsigned long size, flags; 50 - int newpid = cpu_asid(cpu, mm); 47 + start &= TLB_ENTRY_SIZE_MASK; 48 + end += TLB_ENTRY_SIZE - 1; 49 + end &= TLB_ENTRY_SIZE_MASK; 51 50 52 - local_irq_save(flags); 53 - size = (end - start + (PAGE_SIZE - 1)) >> PAGE_SHIFT; 54 - size = (size + 1) >> 1; 55 - if (size <= CSKY_TLB_SIZE/2) { 56 - start &= (PAGE_MASK << 1); 57 - end += ((PAGE_SIZE << 1) - 1); 58 - end &= (PAGE_MASK << 1); 59 51 #ifdef CONFIG_CPU_HAS_TLBI 60 - while (start < end) { 61 - asm volatile("tlbi.vaas %0" 62 - ::"r"(start | newpid)); 63 - start += (PAGE_SIZE << 1); 64 - } 65 - sync_is(); 66 - #else 67 - { 68 - int oldpid = read_mmu_entryhi(); 69 - 70 - while (start < end) { 71 - int idx; 72 - 73 - write_mmu_entryhi(start | newpid); 74 - start += (PAGE_SIZE << 1); 75 - tlb_probe(); 76 - idx = read_mmu_index(); 77 - if (idx >= 0) 78 - tlb_invalid_indexed(); 79 - } 80 - restore_asid_inv_utlb(oldpid, newpid); 81 - } 82 - #endif 83 - } else { 84 - drop_mmu_context(mm, cpu); 85 - } 86 - local_irq_restore(flags); 52 + while (start < end) { 53 + asm volatile("tlbi.vas %0"::"r"(start | newpid)); 54 + start += 2*PAGE_SIZE; 87 55 } 88 - } 89 - 90 - void flush_tlb_kernel_range(unsigned long start, unsigned long end) 91 - { 92 - unsigned long size, flags; 56 + sync_is(); 57 + #else 58 + { 59 + unsigned long flags, oldpid; 93 60 94 61 local_irq_save(flags); 95 - size = (end - start + (PAGE_SIZE - 1)) >> PAGE_SHIFT; 96 - if (size <= CSKY_TLB_SIZE) { 97 - start &= (PAGE_MASK << 1); 98 - end += ((PAGE_SIZE << 1) - 1); 99 - end &= (PAGE_MASK << 1); 100 - #ifdef CONFIG_CPU_HAS_TLBI 101 - while (start < end) { 102 - asm volatile("tlbi.vaas %0"::"r"(start)); 103 - start += (PAGE_SIZE << 1); 104 - } 105 - sync_is(); 106 - #else 107 - { 108 - int oldpid = read_mmu_entryhi(); 62 + oldpid = read_mmu_entryhi() & ASID_MASK; 63 + while (start < end) { 64 + int idx; 109 65 110 - while (start < end) { 111 - int idx; 112 - 113 - write_mmu_entryhi(start); 114 - start += (PAGE_SIZE << 1); 115 - tlb_probe(); 116 - idx = read_mmu_index(); 117 - if (idx >= 0) 118 - tlb_invalid_indexed(); 119 - } 120 - restore_asid_inv_utlb(oldpid, 0); 121 - } 122 - #endif 123 - } else { 124 - flush_tlb_all(); 125 - } 126 - 127 - local_irq_restore(flags); 128 - } 129 - 130 - void flush_tlb_page(struct vm_area_struct *vma, unsigned long page) 131 - { 132 - int cpu = smp_processor_id(); 133 - int newpid = cpu_asid(cpu, vma->vm_mm); 134 - 135 - if (!vma || cpu_context(cpu, vma->vm_mm) != 0) { 136 - page &= (PAGE_MASK << 1); 137 - 138 - #ifdef CONFIG_CPU_HAS_TLBI 139 - asm volatile("tlbi.vaas %0"::"r"(page | newpid)); 140 - sync_is(); 141 - #else 142 - { 143 - int oldpid, idx; 144 - unsigned long flags; 145 - 146 - local_irq_save(flags); 147 - oldpid = read_mmu_entryhi(); 148 - write_mmu_entryhi(page | newpid); 66 + write_mmu_entryhi(start | newpid); 67 + start += 2*PAGE_SIZE; 149 68 tlb_probe(); 150 69 idx = read_mmu_index(); 151 70 if (idx >= 0) 152 71 tlb_invalid_indexed(); 153 - 154 - restore_asid_inv_utlb(oldpid, newpid); 155 - local_irq_restore(flags); 156 - } 157 - #endif 158 72 } 73 + restore_asid_inv_utlb(oldpid, newpid); 74 + local_irq_restore(flags); 75 + } 76 + #endif 159 77 } 160 78 161 - /* 162 - * Remove one kernel space TLB entry. This entry is assumed to be marked 163 - * global so we don't do the ASID thing. 164 - */ 165 - void flush_tlb_one(unsigned long page) 79 + void flush_tlb_kernel_range(unsigned long start, unsigned long end) 166 80 { 167 - int oldpid; 168 - 169 - oldpid = read_mmu_entryhi(); 170 - page &= (PAGE_MASK << 1); 81 + start &= TLB_ENTRY_SIZE_MASK; 82 + end += TLB_ENTRY_SIZE - 1; 83 + end &= TLB_ENTRY_SIZE_MASK; 171 84 172 85 #ifdef CONFIG_CPU_HAS_TLBI 173 - page = page | (oldpid & 0xfff); 174 - asm volatile("tlbi.vaas %0"::"r"(page)); 86 + while (start < end) { 87 + asm volatile("tlbi.vaas %0"::"r"(start)); 88 + start += 2*PAGE_SIZE; 89 + } 175 90 sync_is(); 176 91 #else 177 92 { 178 - int idx; 179 - unsigned long flags; 180 - 181 - page = page | (oldpid & 0xff); 93 + unsigned long flags, oldpid; 182 94 183 95 local_irq_save(flags); 184 - write_mmu_entryhi(page); 96 + oldpid = read_mmu_entryhi() & ASID_MASK; 97 + while (start < end) { 98 + int idx; 99 + 100 + write_mmu_entryhi(start | oldpid); 101 + start += 2*PAGE_SIZE; 102 + tlb_probe(); 103 + idx = read_mmu_index(); 104 + if (idx >= 0) 105 + tlb_invalid_indexed(); 106 + } 107 + restore_asid_inv_utlb(oldpid, oldpid); 108 + local_irq_restore(flags); 109 + } 110 + #endif 111 + } 112 + 113 + void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr) 114 + { 115 + int newpid = cpu_asid(vma->vm_mm); 116 + 117 + addr &= TLB_ENTRY_SIZE_MASK; 118 + 119 + #ifdef CONFIG_CPU_HAS_TLBI 120 + asm volatile("tlbi.vas %0"::"r"(addr | newpid)); 121 + sync_is(); 122 + #else 123 + { 124 + int oldpid, idx; 125 + unsigned long flags; 126 + 127 + local_irq_save(flags); 128 + oldpid = read_mmu_entryhi() & ASID_MASK; 129 + write_mmu_entryhi(addr | newpid); 185 130 tlb_probe(); 186 131 idx = read_mmu_index(); 187 132 if (idx >= 0) 188 133 tlb_invalid_indexed(); 134 + 135 + restore_asid_inv_utlb(oldpid, newpid); 136 + local_irq_restore(flags); 137 + } 138 + #endif 139 + } 140 + 141 + void flush_tlb_one(unsigned long addr) 142 + { 143 + addr &= TLB_ENTRY_SIZE_MASK; 144 + 145 + #ifdef CONFIG_CPU_HAS_TLBI 146 + asm volatile("tlbi.vaas %0"::"r"(addr)); 147 + sync_is(); 148 + #else 149 + { 150 + int oldpid, idx; 151 + unsigned long flags; 152 + 153 + local_irq_save(flags); 154 + oldpid = read_mmu_entryhi() & ASID_MASK; 155 + write_mmu_entryhi(addr | oldpid); 156 + tlb_probe(); 157 + idx = read_mmu_index(); 158 + if (idx >= 0) 159 + tlb_invalid_indexed(); 160 + 189 161 restore_asid_inv_utlb(oldpid, oldpid); 190 162 local_irq_restore(flags); 191 163 } 192 164 #endif 193 165 } 194 166 EXPORT_SYMBOL(flush_tlb_one); 195 - 196 - /* show current 32 jtlbs */ 197 - void show_jtlb_table(void) 198 - { 199 - unsigned long flags; 200 - int entryhi, entrylo0, entrylo1; 201 - int entry; 202 - int oldpid; 203 - 204 - local_irq_save(flags); 205 - entry = 0; 206 - pr_info("\n\n\n"); 207 - 208 - oldpid = read_mmu_entryhi(); 209 - while (entry < CSKY_TLB_SIZE) { 210 - write_mmu_index(entry); 211 - tlb_read(); 212 - entryhi = read_mmu_entryhi(); 213 - entrylo0 = read_mmu_entrylo0(); 214 - entrylo0 = entrylo0; 215 - entrylo1 = read_mmu_entrylo1(); 216 - entrylo1 = entrylo1; 217 - pr_info("jtlb[%d]: entryhi - 0x%x; entrylo0 - 0x%x;" 218 - " entrylo1 - 0x%x\n", 219 - entry, entryhi, entrylo0, entrylo1); 220 - entry++; 221 - } 222 - write_mmu_entryhi(oldpid); 223 - local_irq_restore(flags); 224 - }