Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge patch series "riscv: errata: thead: use riscv_nonstd_cache_ops for CMO"

Jisheng Zhang <jszhang@kernel.org> says:

Previously, we use alternative mechanism to dynamically patch
the CMO operations for THEAD C906/C910 during boot for performance
reason. But as pointed out by Arnd, "there is already a significant
cost in accessing the invalidated cache lines afterwards, which is
likely going to be much higher than the cost of an indirect branch".
And indeed, there's no performance difference with GMAC and EMMC per
my test on Sipeed Lichee Pi 4A board.

Use riscv_nonstd_cache_ops for THEAD C906/C910 CMO to simplify
the alternative code, and to acchieve Arnd's goal -- "I think
moving the THEAD ops at the same level as all nonstandard operations
makes sense, but I'd still leave CMO as an explicit fast path that
avoids the indirect branch. This seems like the right thing to do both
for readability and for platforms on which the indirect branch has a
noticeable overhead."

To make bisect easy, I use two patches here: patch1 does the conversion
which just mimics current CMO behavior via. riscv_nonstd_cache_ops, I
assume no functionalities changes. patch2 uses T-HEAD PA based CMO
instructions so that we don't need to covert PA to VA.

* b4-shazam-merge:
riscv: errata: thead: use pa based instructions for CMO
riscv: errata: thead: use riscv_nonstd_cache_ops for CMO

Link: https://lore.kernel.org/r/20231114143338.2406-1-jszhang@kernel.org
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>

+74 -46
+1
arch/riscv/Kconfig.errata
··· 79 79 depends on ERRATA_THEAD && MMU 80 80 select DMA_DIRECT_REMAP 81 81 select RISCV_DMA_NONCOHERENT 82 + select RISCV_NONSTANDARD_CACHE_OPS 82 83 default y 83 84 help 84 85 This will apply the cache management errata to handle the
+67 -2
arch/riscv/errata/thead/errata.c
··· 12 12 #include <asm/alternative.h> 13 13 #include <asm/cacheflush.h> 14 14 #include <asm/cpufeature.h> 15 + #include <asm/dma-noncoherent.h> 15 16 #include <asm/errata_list.h> 16 17 #include <asm/hwprobe.h> 18 + #include <asm/io.h> 17 19 #include <asm/patch.h> 18 20 #include <asm/vendorid_list.h> 19 21 ··· 35 33 return false; 36 34 } 37 35 36 + /* 37 + * th.dcache.ipa rs1 (invalidate, physical address) 38 + * | 31 - 25 | 24 - 20 | 19 - 15 | 14 - 12 | 11 - 7 | 6 - 0 | 39 + * 0000001 01010 rs1 000 00000 0001011 40 + * th.dcache.iva rs1 (invalidate, virtual address) 41 + * 0000001 00110 rs1 000 00000 0001011 42 + * 43 + * th.dcache.cpa rs1 (clean, physical address) 44 + * | 31 - 25 | 24 - 20 | 19 - 15 | 14 - 12 | 11 - 7 | 6 - 0 | 45 + * 0000001 01001 rs1 000 00000 0001011 46 + * th.dcache.cva rs1 (clean, virtual address) 47 + * 0000001 00101 rs1 000 00000 0001011 48 + * 49 + * th.dcache.cipa rs1 (clean then invalidate, physical address) 50 + * | 31 - 25 | 24 - 20 | 19 - 15 | 14 - 12 | 11 - 7 | 6 - 0 | 51 + * 0000001 01011 rs1 000 00000 0001011 52 + * th.dcache.civa rs1 (clean then invalidate, virtual address) 53 + * 0000001 00111 rs1 000 00000 0001011 54 + * 55 + * th.sync.s (make sure all cache operations finished) 56 + * | 31 - 25 | 24 - 20 | 19 - 15 | 14 - 12 | 11 - 7 | 6 - 0 | 57 + * 0000000 11001 00000 000 00000 0001011 58 + */ 59 + #define THEAD_INVAL_A0 ".long 0x02a5000b" 60 + #define THEAD_CLEAN_A0 ".long 0x0295000b" 61 + #define THEAD_FLUSH_A0 ".long 0x02b5000b" 62 + #define THEAD_SYNC_S ".long 0x0190000b" 63 + 64 + #define THEAD_CMO_OP(_op, _start, _size, _cachesize) \ 65 + asm volatile("mv a0, %1\n\t" \ 66 + "j 2f\n\t" \ 67 + "3:\n\t" \ 68 + THEAD_##_op##_A0 "\n\t" \ 69 + "add a0, a0, %0\n\t" \ 70 + "2:\n\t" \ 71 + "bltu a0, %2, 3b\n\t" \ 72 + THEAD_SYNC_S \ 73 + : : "r"(_cachesize), \ 74 + "r"((unsigned long)(_start) & ~((_cachesize) - 1UL)), \ 75 + "r"((unsigned long)(_start) + (_size)) \ 76 + : "a0") 77 + 78 + static void thead_errata_cache_inv(phys_addr_t paddr, size_t size) 79 + { 80 + THEAD_CMO_OP(INVAL, paddr, size, riscv_cbom_block_size); 81 + } 82 + 83 + static void thead_errata_cache_wback(phys_addr_t paddr, size_t size) 84 + { 85 + THEAD_CMO_OP(CLEAN, paddr, size, riscv_cbom_block_size); 86 + } 87 + 88 + static void thead_errata_cache_wback_inv(phys_addr_t paddr, size_t size) 89 + { 90 + THEAD_CMO_OP(FLUSH, paddr, size, riscv_cbom_block_size); 91 + } 92 + 93 + static const struct riscv_nonstd_cache_ops thead_errata_cmo_ops = { 94 + .wback = &thead_errata_cache_wback, 95 + .inv = &thead_errata_cache_inv, 96 + .wback_inv = &thead_errata_cache_wback_inv, 97 + }; 98 + 38 99 static bool errata_probe_cmo(unsigned int stage, 39 100 unsigned long arch_id, unsigned long impid) 40 101 { ··· 113 48 if (stage == RISCV_ALTERNATIVES_BOOT) { 114 49 riscv_cbom_block_size = L1_CACHE_BYTES; 115 50 riscv_noncoherent_supported(); 51 + riscv_noncoherent_register_cache_ops(&thead_errata_cmo_ops); 116 52 } 117 53 118 54 return true; ··· 143 77 if (errata_probe_pbmt(stage, archid, impid)) 144 78 cpu_req_errata |= BIT(ERRATA_THEAD_PBMT); 145 79 146 - if (errata_probe_cmo(stage, archid, impid)) 147 - cpu_req_errata |= BIT(ERRATA_THEAD_CMO); 80 + errata_probe_cmo(stage, archid, impid); 148 81 149 82 if (errata_probe_pmu(stage, archid, impid)) 150 83 cpu_req_errata |= BIT(ERRATA_THEAD_PMU);
+6 -44
arch/riscv/include/asm/errata_list.h
··· 24 24 25 25 #ifdef CONFIG_ERRATA_THEAD 26 26 #define ERRATA_THEAD_PBMT 0 27 - #define ERRATA_THEAD_CMO 1 28 - #define ERRATA_THEAD_PMU 2 29 - #define ERRATA_THEAD_NUMBER 3 27 + #define ERRATA_THEAD_PMU 1 28 + #define ERRATA_THEAD_NUMBER 2 30 29 #endif 31 30 32 31 #ifdef __ASSEMBLY__ ··· 93 94 #define ALT_THEAD_PMA(_val) 94 95 #endif 95 96 96 - /* 97 - * th.dcache.ipa rs1 (invalidate, physical address) 98 - * | 31 - 25 | 24 - 20 | 19 - 15 | 14 - 12 | 11 - 7 | 6 - 0 | 99 - * 0000001 01010 rs1 000 00000 0001011 100 - * th.dache.iva rs1 (invalida, virtual address) 101 - * 0000001 00110 rs1 000 00000 0001011 102 - * 103 - * th.dcache.cpa rs1 (clean, physical address) 104 - * | 31 - 25 | 24 - 20 | 19 - 15 | 14 - 12 | 11 - 7 | 6 - 0 | 105 - * 0000001 01001 rs1 000 00000 0001011 106 - * th.dcache.cva rs1 (clean, virtual address) 107 - * 0000001 00101 rs1 000 00000 0001011 108 - * 109 - * th.dcache.cipa rs1 (clean then invalidate, physical address) 110 - * | 31 - 25 | 24 - 20 | 19 - 15 | 14 - 12 | 11 - 7 | 6 - 0 | 111 - * 0000001 01011 rs1 000 00000 0001011 112 - * th.dcache.civa rs1 (... virtual address) 113 - * 0000001 00111 rs1 000 00000 0001011 114 - * 115 - * th.sync.s (make sure all cache operations finished) 116 - * | 31 - 25 | 24 - 20 | 19 - 15 | 14 - 12 | 11 - 7 | 6 - 0 | 117 - * 0000000 11001 00000 000 00000 0001011 118 - */ 119 - #define THEAD_INVAL_A0 ".long 0x0265000b" 120 - #define THEAD_CLEAN_A0 ".long 0x0255000b" 121 - #define THEAD_FLUSH_A0 ".long 0x0275000b" 122 - #define THEAD_SYNC_S ".long 0x0190000b" 123 - 124 97 #define ALT_CMO_OP(_op, _start, _size, _cachesize) \ 125 - asm volatile(ALTERNATIVE_2( \ 126 - __nops(6), \ 98 + asm volatile(ALTERNATIVE( \ 99 + __nops(5), \ 127 100 "mv a0, %1\n\t" \ 128 101 "j 2f\n\t" \ 129 102 "3:\n\t" \ 130 103 CBO_##_op(a0) \ 131 104 "add a0, a0, %0\n\t" \ 132 105 "2:\n\t" \ 133 - "bltu a0, %2, 3b\n\t" \ 134 - "nop", 0, RISCV_ISA_EXT_ZICBOM, CONFIG_RISCV_ISA_ZICBOM, \ 135 - "mv a0, %1\n\t" \ 136 - "j 2f\n\t" \ 137 - "3:\n\t" \ 138 - THEAD_##_op##_A0 "\n\t" \ 139 - "add a0, a0, %0\n\t" \ 140 - "2:\n\t" \ 141 - "bltu a0, %2, 3b\n\t" \ 142 - THEAD_SYNC_S, THEAD_VENDOR_ID, \ 143 - ERRATA_THEAD_CMO, CONFIG_ERRATA_THEAD_CMO) \ 106 + "bltu a0, %2, 3b\n\t", \ 107 + 0, RISCV_ISA_EXT_ZICBOM, CONFIG_RISCV_ISA_ZICBOM) \ 144 108 : : "r"(_cachesize), \ 145 109 "r"((unsigned long)(_start) & ~((_cachesize) - 1UL)), \ 146 110 "r"((unsigned long)(_start) + (_size)) \