Merge tag 'arc-5.15-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/vgupta/arc

+8 -33

arch/arc/Kconfig

··· 116 116 default ARC_CPU_770 if ISA_ARCOMPACT 117 117 default ARC_CPU_HS if ISA_ARCV2 118 118 119 - if ISA_ARCOMPACT 120 - 121 - config ARC_CPU_750D 122 - bool "ARC750D" 123 - select ARC_CANT_LLSC 124 - help 125 - Support for ARC750 core 126 - 127 119 config ARC_CPU_770 128 120 bool "ARC770" 121 + depends on ISA_ARCOMPACT 129 122 select ARC_HAS_SWAPE 130 123 help 131 124 Support for ARC770 core introduced with Rel 4.10 (Summer 2011) ··· 127 134 Shared Address Spaces (for sharing TLB entries in MMU) 128 135 -Caches: New Prog Model, Region Flush 129 136 -Insns: endian swap, load-locked/store-conditional, time-stamp-ctr 130 - 131 - endif #ISA_ARCOMPACT 132 137 133 138 config ARC_CPU_HS 134 139 bool "ARC-HS" ··· 265 274 266 275 choice 267 276 prompt "MMU Version" 268 - default ARC_MMU_V3 if ARC_CPU_770 269 - default ARC_MMU_V2 if ARC_CPU_750D 270 - default ARC_MMU_V4 if ARC_CPU_HS 271 - 272 - if ISA_ARCOMPACT 273 - 274 - config ARC_MMU_V1 275 - bool "MMU v1" 276 - help 277 - Orig ARC700 MMU 278 - 279 - config ARC_MMU_V2 280 - bool "MMU v2" 281 - help 282 - Fixed the deficiency of v1 - possible thrashing in memcpy scenario 283 - when 2 D-TLB and 1 I-TLB entries index into same 2way set. 277 + default ARC_MMU_V3 if ISA_ARCOMPACT 278 + default ARC_MMU_V4 if ISA_ARCV2 284 279 285 280 config ARC_MMU_V3 286 281 bool "MMU v3" 287 - depends on ARC_CPU_770 282 + depends on ISA_ARCOMPACT 288 283 help 289 284 Introduced with ARC700 4.10: New Features 290 285 Variable Page size (1k-16k), var JTLB size 128 x (2 or 4) 291 286 Shared Address Spaces (SASID) 292 - 293 - endif 294 287 295 288 config ARC_MMU_V4 296 289 bool "MMU v4" ··· 294 319 295 320 config ARC_PAGE_SIZE_16K 296 321 bool "16KB" 297 - depends on ARC_MMU_V3 || ARC_MMU_V4 298 322 299 323 config ARC_PAGE_SIZE_4K 300 324 bool "4KB" ··· 313 339 bool "16MB" 314 340 315 341 endchoice 342 + 343 + config PGTABLE_LEVELS 344 + int "Number of Page table levels" 345 + default 2 316 346 317 347 config ARC_COMPACT_IRQ_LEVELS 318 348 depends on ISA_ARCOMPACT ··· 540 562 but not slower, and it will give very useful debugging information. 541 563 If you don't debug the kernel, you can say N, but we may not be able 542 564 to solve problems without frame unwind information 543 - 544 - config ARC_DBG_TLB_PARANOIA 545 - bool "Paranoia Checks in Low Level TLB Handlers" 546 565 547 566 config ARC_DBG_JUMP_LABEL 548 567 bool "Paranoid checks in Static Keys (jump labels) code"

+97

arch/arc/include/asm/atomic-llsc.h

··· 1 + /* SPDX-License-Identifier: GPL-2.0-only */ 2 + 3 + #ifndef _ASM_ARC_ATOMIC_LLSC_H 4 + #define _ASM_ARC_ATOMIC_LLSC_H 5 + 6 + #define arch_atomic_set(v, i) WRITE_ONCE(((v)->counter), (i)) 7 + 8 + #define ATOMIC_OP(op, c_op, asm_op) \ 9 + static inline void arch_atomic_##op(int i, atomic_t *v) \ 10 + { \ 11 + unsigned int val; \ 12 + \ 13 + __asm__ __volatile__( \ 14 + "1: llock %[val], [%[ctr]] \n" \ 15 + " " #asm_op " %[val], %[val], %[i] \n" \ 16 + " scond %[val], [%[ctr]] \n" \ 17 + " bnz 1b \n" \ 18 + : [val] "=&r" (val) /* Early clobber to prevent reg reuse */ \ 19 + : [ctr] "r" (&v->counter), /* Not "m": llock only supports reg direct addr mode */ \ 20 + [i] "ir" (i) \ 21 + : "cc"); \ 22 + } \ 23 + 24 + #define ATOMIC_OP_RETURN(op, c_op, asm_op) \ 25 + static inline int arch_atomic_##op##_return_relaxed(int i, atomic_t *v) \ 26 + { \ 27 + unsigned int val; \ 28 + \ 29 + __asm__ __volatile__( \ 30 + "1: llock %[val], [%[ctr]] \n" \ 31 + " " #asm_op " %[val], %[val], %[i] \n" \ 32 + " scond %[val], [%[ctr]] \n" \ 33 + " bnz 1b \n" \ 34 + : [val] "=&r" (val) \ 35 + : [ctr] "r" (&v->counter), \ 36 + [i] "ir" (i) \ 37 + : "cc"); \ 38 + \ 39 + return val; \ 40 + } 41 + 42 + #define arch_atomic_add_return_relaxed arch_atomic_add_return_relaxed 43 + #define arch_atomic_sub_return_relaxed arch_atomic_sub_return_relaxed 44 + 45 + #define ATOMIC_FETCH_OP(op, c_op, asm_op) \ 46 + static inline int arch_atomic_fetch_##op##_relaxed(int i, atomic_t *v) \ 47 + { \ 48 + unsigned int val, orig; \ 49 + \ 50 + __asm__ __volatile__( \ 51 + "1: llock %[orig], [%[ctr]] \n" \ 52 + " " #asm_op " %[val], %[orig], %[i] \n" \ 53 + " scond %[val], [%[ctr]] \n" \ 54 + " bnz 1b \n" \ 55 + : [val] "=&r" (val), \ 56 + [orig] "=&r" (orig) \ 57 + : [ctr] "r" (&v->counter), \ 58 + [i] "ir" (i) \ 59 + : "cc"); \ 60 + \ 61 + return orig; \ 62 + } 63 + 64 + #define arch_atomic_fetch_add_relaxed arch_atomic_fetch_add_relaxed 65 + #define arch_atomic_fetch_sub_relaxed arch_atomic_fetch_sub_relaxed 66 + 67 + #define arch_atomic_fetch_and_relaxed arch_atomic_fetch_and_relaxed 68 + #define arch_atomic_fetch_andnot_relaxed arch_atomic_fetch_andnot_relaxed 69 + #define arch_atomic_fetch_or_relaxed arch_atomic_fetch_or_relaxed 70 + #define arch_atomic_fetch_xor_relaxed arch_atomic_fetch_xor_relaxed 71 + 72 + #define ATOMIC_OPS(op, c_op, asm_op) \ 73 + ATOMIC_OP(op, c_op, asm_op) \ 74 + ATOMIC_OP_RETURN(op, c_op, asm_op) \ 75 + ATOMIC_FETCH_OP(op, c_op, asm_op) 76 + 77 + ATOMIC_OPS(add, +=, add) 78 + ATOMIC_OPS(sub, -=, sub) 79 + 80 + #undef ATOMIC_OPS 81 + #define ATOMIC_OPS(op, c_op, asm_op) \ 82 + ATOMIC_OP(op, c_op, asm_op) \ 83 + ATOMIC_FETCH_OP(op, c_op, asm_op) 84 + 85 + ATOMIC_OPS(and, &=, and) 86 + ATOMIC_OPS(andnot, &= ~, bic) 87 + ATOMIC_OPS(or, |=, or) 88 + ATOMIC_OPS(xor, ^=, xor) 89 + 90 + #define arch_atomic_andnot arch_atomic_andnot 91 + 92 + #undef ATOMIC_OPS 93 + #undef ATOMIC_FETCH_OP 94 + #undef ATOMIC_OP_RETURN 95 + #undef ATOMIC_OP 96 + 97 + #endif

+102

arch/arc/include/asm/atomic-spinlock.h

··· 1 + /* SPDX-License-Identifier: GPL-2.0-only */ 2 + 3 + #ifndef _ASM_ARC_ATOMIC_SPLOCK_H 4 + #define _ASM_ARC_ATOMIC_SPLOCK_H 5 + 6 + /* 7 + * Non hardware assisted Atomic-R-M-W 8 + * Locking would change to irq-disabling only (UP) and spinlocks (SMP) 9 + */ 10 + 11 + static inline void arch_atomic_set(atomic_t *v, int i) 12 + { 13 + /* 14 + * Independent of hardware support, all of the atomic_xxx() APIs need 15 + * to follow the same locking rules to make sure that a "hardware" 16 + * atomic insn (e.g. LD) doesn't clobber an "emulated" atomic insn 17 + * sequence 18 + * 19 + * Thus atomic_set() despite being 1 insn (and seemingly atomic) 20 + * requires the locking. 21 + */ 22 + unsigned long flags; 23 + 24 + atomic_ops_lock(flags); 25 + WRITE_ONCE(v->counter, i); 26 + atomic_ops_unlock(flags); 27 + } 28 + 29 + #define arch_atomic_set_release(v, i) arch_atomic_set((v), (i)) 30 + 31 + #define ATOMIC_OP(op, c_op, asm_op) \ 32 + static inline void arch_atomic_##op(int i, atomic_t *v) \ 33 + { \ 34 + unsigned long flags; \ 35 + \ 36 + atomic_ops_lock(flags); \ 37 + v->counter c_op i; \ 38 + atomic_ops_unlock(flags); \ 39 + } 40 + 41 + #define ATOMIC_OP_RETURN(op, c_op, asm_op) \ 42 + static inline int arch_atomic_##op##_return(int i, atomic_t *v) \ 43 + { \ 44 + unsigned long flags; \ 45 + unsigned int temp; \ 46 + \ 47 + /* \ 48 + * spin lock/unlock provides the needed smp_mb() before/after \ 49 + */ \ 50 + atomic_ops_lock(flags); \ 51 + temp = v->counter; \ 52 + temp c_op i; \ 53 + v->counter = temp; \ 54 + atomic_ops_unlock(flags); \ 55 + \ 56 + return temp; \ 57 + } 58 + 59 + #define ATOMIC_FETCH_OP(op, c_op, asm_op) \ 60 + static inline int arch_atomic_fetch_##op(int i, atomic_t *v) \ 61 + { \ 62 + unsigned long flags; \ 63 + unsigned int orig; \ 64 + \ 65 + /* \ 66 + * spin lock/unlock provides the needed smp_mb() before/after \ 67 + */ \ 68 + atomic_ops_lock(flags); \ 69 + orig = v->counter; \ 70 + v->counter c_op i; \ 71 + atomic_ops_unlock(flags); \ 72 + \ 73 + return orig; \ 74 + } 75 + 76 + #define ATOMIC_OPS(op, c_op, asm_op) \ 77 + ATOMIC_OP(op, c_op, asm_op) \ 78 + ATOMIC_OP_RETURN(op, c_op, asm_op) \ 79 + ATOMIC_FETCH_OP(op, c_op, asm_op) 80 + 81 + ATOMIC_OPS(add, +=, add) 82 + ATOMIC_OPS(sub, -=, sub) 83 + 84 + #undef ATOMIC_OPS 85 + #define ATOMIC_OPS(op, c_op, asm_op) \ 86 + ATOMIC_OP(op, c_op, asm_op) \ 87 + ATOMIC_FETCH_OP(op, c_op, asm_op) 88 + 89 + ATOMIC_OPS(and, &=, and) 90 + ATOMIC_OPS(andnot, &= ~, bic) 91 + ATOMIC_OPS(or, |=, or) 92 + ATOMIC_OPS(xor, ^=, xor) 93 + 94 + #define arch_atomic_andnot arch_atomic_andnot 95 + #define arch_atomic_fetch_andnot arch_atomic_fetch_andnot 96 + 97 + #undef ATOMIC_OPS 98 + #undef ATOMIC_FETCH_OP 99 + #undef ATOMIC_OP_RETURN 100 + #undef ATOMIC_OP 101 + 102 + #endif

+26 -418

arch/arc/include/asm/atomic.h

··· 17 17 #define arch_atomic_read(v) READ_ONCE((v)->counter) 18 18 19 19 #ifdef CONFIG_ARC_HAS_LLSC 20 - 21 - #define arch_atomic_set(v, i) WRITE_ONCE(((v)->counter), (i)) 22 - 23 - #define ATOMIC_OP(op, c_op, asm_op) \ 24 - static inline void arch_atomic_##op(int i, atomic_t *v) \ 25 - { \ 26 - unsigned int val; \ 27 - \ 28 - __asm__ __volatile__( \ 29 - "1: llock %[val], [%[ctr]] \n" \ 30 - " " #asm_op " %[val], %[val], %[i] \n" \ 31 - " scond %[val], [%[ctr]] \n" \ 32 - " bnz 1b \n" \ 33 - : [val] "=&r" (val) /* Early clobber to prevent reg reuse */ \ 34 - : [ctr] "r" (&v->counter), /* Not "m": llock only supports reg direct addr mode */ \ 35 - [i] "ir" (i) \ 36 - : "cc"); \ 37 - } \ 38 - 39 - #define ATOMIC_OP_RETURN(op, c_op, asm_op) \ 40 - static inline int arch_atomic_##op##_return(int i, atomic_t *v) \ 41 - { \ 42 - unsigned int val; \ 43 - \ 44 - /* \ 45 - * Explicit full memory barrier needed before/after as \ 46 - * LLOCK/SCOND themselves don't provide any such semantics \ 47 - */ \ 48 - smp_mb(); \ 49 - \ 50 - __asm__ __volatile__( \ 51 - "1: llock %[val], [%[ctr]] \n" \ 52 - " " #asm_op " %[val], %[val], %[i] \n" \ 53 - " scond %[val], [%[ctr]] \n" \ 54 - " bnz 1b \n" \ 55 - : [val] "=&r" (val) \ 56 - : [ctr] "r" (&v->counter), \ 57 - [i] "ir" (i) \ 58 - : "cc"); \ 59 - \ 60 - smp_mb(); \ 61 - \ 62 - return val; \ 63 - } 64 - 65 - #define ATOMIC_FETCH_OP(op, c_op, asm_op) \ 66 - static inline int arch_atomic_fetch_##op(int i, atomic_t *v) \ 67 - { \ 68 - unsigned int val, orig; \ 69 - \ 70 - /* \ 71 - * Explicit full memory barrier needed before/after as \ 72 - * LLOCK/SCOND themselves don't provide any such semantics \ 73 - */ \ 74 - smp_mb(); \ 75 - \ 76 - __asm__ __volatile__( \ 77 - "1: llock %[orig], [%[ctr]] \n" \ 78 - " " #asm_op " %[val], %[orig], %[i] \n" \ 79 - " scond %[val], [%[ctr]] \n" \ 80 - " bnz 1b \n" \ 81 - : [val] "=&r" (val), \ 82 - [orig] "=&r" (orig) \ 83 - : [ctr] "r" (&v->counter), \ 84 - [i] "ir" (i) \ 85 - : "cc"); \ 86 - \ 87 - smp_mb(); \ 88 - \ 89 - return orig; \ 90 - } 91 - 92 - #else /* !CONFIG_ARC_HAS_LLSC */ 93 - 94 - #ifndef CONFIG_SMP 95 - 96 - /* violating atomic_xxx API locking protocol in UP for optimization sake */ 97 - #define arch_atomic_set(v, i) WRITE_ONCE(((v)->counter), (i)) 98 - 20 + #include <asm/atomic-llsc.h> 99 21 #else 22 + #include <asm/atomic-spinlock.h> 23 + #endif 100 24 101 - static inline void arch_atomic_set(atomic_t *v, int i) 102 - { 103 - /* 104 - * Independent of hardware support, all of the atomic_xxx() APIs need 105 - * to follow the same locking rules to make sure that a "hardware" 106 - * atomic insn (e.g. LD) doesn't clobber an "emulated" atomic insn 107 - * sequence 108 - * 109 - * Thus atomic_set() despite being 1 insn (and seemingly atomic) 110 - * requires the locking. 111 - */ 112 - unsigned long flags; 25 + #define arch_atomic_cmpxchg(v, o, n) \ 26 + ({ \ 27 + arch_cmpxchg(&((v)->counter), (o), (n)); \ 28 + }) 113 29 114 - atomic_ops_lock(flags); 115 - WRITE_ONCE(v->counter, i); 116 - atomic_ops_unlock(flags); 117 - } 30 + #ifdef arch_cmpxchg_relaxed 31 + #define arch_atomic_cmpxchg_relaxed(v, o, n) \ 32 + ({ \ 33 + arch_cmpxchg_relaxed(&((v)->counter), (o), (n)); \ 34 + }) 35 + #endif 118 36 119 - #define arch_atomic_set_release(v, i) arch_atomic_set((v), (i)) 37 + #define arch_atomic_xchg(v, n) \ 38 + ({ \ 39 + arch_xchg(&((v)->counter), (n)); \ 40 + }) 120 41 42 + #ifdef arch_xchg_relaxed 43 + #define arch_atomic_xchg_relaxed(v, n) \ 44 + ({ \ 45 + arch_xchg_relaxed(&((v)->counter), (n)); \ 46 + }) 121 47 #endif 122 48 123 49 /* 124 - * Non hardware assisted Atomic-R-M-W 125 - * Locking would change to irq-disabling only (UP) and spinlocks (SMP) 50 + * 64-bit atomics 126 51 */ 127 - 128 - #define ATOMIC_OP(op, c_op, asm_op) \ 129 - static inline void arch_atomic_##op(int i, atomic_t *v) \ 130 - { \ 131 - unsigned long flags; \ 132 - \ 133 - atomic_ops_lock(flags); \ 134 - v->counter c_op i; \ 135 - atomic_ops_unlock(flags); \ 136 - } 137 - 138 - #define ATOMIC_OP_RETURN(op, c_op, asm_op) \ 139 - static inline int arch_atomic_##op##_return(int i, atomic_t *v) \ 140 - { \ 141 - unsigned long flags; \ 142 - unsigned long temp; \ 143 - \ 144 - /* \ 145 - * spin lock/unlock provides the needed smp_mb() before/after \ 146 - */ \ 147 - atomic_ops_lock(flags); \ 148 - temp = v->counter; \ 149 - temp c_op i; \ 150 - v->counter = temp; \ 151 - atomic_ops_unlock(flags); \ 152 - \ 153 - return temp; \ 154 - } 155 - 156 - #define ATOMIC_FETCH_OP(op, c_op, asm_op) \ 157 - static inline int arch_atomic_fetch_##op(int i, atomic_t *v) \ 158 - { \ 159 - unsigned long flags; \ 160 - unsigned long orig; \ 161 - \ 162 - /* \ 163 - * spin lock/unlock provides the needed smp_mb() before/after \ 164 - */ \ 165 - atomic_ops_lock(flags); \ 166 - orig = v->counter; \ 167 - v->counter c_op i; \ 168 - atomic_ops_unlock(flags); \ 169 - \ 170 - return orig; \ 171 - } 172 - 173 - #endif /* !CONFIG_ARC_HAS_LLSC */ 174 - 175 - #define ATOMIC_OPS(op, c_op, asm_op) \ 176 - ATOMIC_OP(op, c_op, asm_op) \ 177 - ATOMIC_OP_RETURN(op, c_op, asm_op) \ 178 - ATOMIC_FETCH_OP(op, c_op, asm_op) 179 - 180 - ATOMIC_OPS(add, +=, add) 181 - ATOMIC_OPS(sub, -=, sub) 182 - 183 - #undef ATOMIC_OPS 184 - #define ATOMIC_OPS(op, c_op, asm_op) \ 185 - ATOMIC_OP(op, c_op, asm_op) \ 186 - ATOMIC_FETCH_OP(op, c_op, asm_op) 187 - 188 - ATOMIC_OPS(and, &=, and) 189 - ATOMIC_OPS(andnot, &= ~, bic) 190 - ATOMIC_OPS(or, |=, or) 191 - ATOMIC_OPS(xor, ^=, xor) 192 - 193 - #define arch_atomic_andnot arch_atomic_andnot 194 - #define arch_atomic_fetch_andnot arch_atomic_fetch_andnot 195 - 196 - #undef ATOMIC_OPS 197 - #undef ATOMIC_FETCH_OP 198 - #undef ATOMIC_OP_RETURN 199 - #undef ATOMIC_OP 200 - 201 52 #ifdef CONFIG_GENERIC_ATOMIC64 202 - 203 53 #include <asm-generic/atomic64.h> 204 - 205 - #else /* Kconfig ensures this is only enabled with needed h/w assist */ 206 - 207 - /* 208 - * ARCv2 supports 64-bit exclusive load (LLOCKD) / store (SCONDD) 209 - * - The address HAS to be 64-bit aligned 210 - * - There are 2 semantics involved here: 211 - * = exclusive implies no interim update between load/store to same addr 212 - * = both words are observed/updated together: this is guaranteed even 213 - * for regular 64-bit load (LDD) / store (STD). Thus atomic64_set() 214 - * is NOT required to use LLOCKD+SCONDD, STD suffices 215 - */ 216 - 217 - typedef struct { 218 - s64 __aligned(8) counter; 219 - } atomic64_t; 220 - 221 - #define ATOMIC64_INIT(a) { (a) } 222 - 223 - static inline s64 arch_atomic64_read(const atomic64_t *v) 224 - { 225 - s64 val; 226 - 227 - __asm__ __volatile__( 228 - " ldd %0, [%1] \n" 229 - : "=r"(val) 230 - : "r"(&v->counter)); 231 - 232 - return val; 233 - } 234 - 235 - static inline void arch_atomic64_set(atomic64_t *v, s64 a) 236 - { 237 - /* 238 - * This could have been a simple assignment in "C" but would need 239 - * explicit volatile. Otherwise gcc optimizers could elide the store 240 - * which borked atomic64 self-test 241 - * In the inline asm version, memory clobber needed for exact same 242 - * reason, to tell gcc about the store. 243 - * 244 - * This however is not needed for sibling atomic64_add() etc since both 245 - * load/store are explicitly done in inline asm. As long as API is used 246 - * for each access, gcc has no way to optimize away any load/store 247 - */ 248 - __asm__ __volatile__( 249 - " std %0, [%1] \n" 250 - : 251 - : "r"(a), "r"(&v->counter) 252 - : "memory"); 253 - } 254 - 255 - #define ATOMIC64_OP(op, op1, op2) \ 256 - static inline void arch_atomic64_##op(s64 a, atomic64_t *v) \ 257 - { \ 258 - s64 val; \ 259 - \ 260 - __asm__ __volatile__( \ 261 - "1: \n" \ 262 - " llockd %0, [%1] \n" \ 263 - " " #op1 " %L0, %L0, %L2 \n" \ 264 - " " #op2 " %H0, %H0, %H2 \n" \ 265 - " scondd %0, [%1] \n" \ 266 - " bnz 1b \n" \ 267 - : "=&r"(val) \ 268 - : "r"(&v->counter), "ir"(a) \ 269 - : "cc"); \ 270 - } \ 271 - 272 - #define ATOMIC64_OP_RETURN(op, op1, op2) \ 273 - static inline s64 arch_atomic64_##op##_return(s64 a, atomic64_t *v) \ 274 - { \ 275 - s64 val; \ 276 - \ 277 - smp_mb(); \ 278 - \ 279 - __asm__ __volatile__( \ 280 - "1: \n" \ 281 - " llockd %0, [%1] \n" \ 282 - " " #op1 " %L0, %L0, %L2 \n" \ 283 - " " #op2 " %H0, %H0, %H2 \n" \ 284 - " scondd %0, [%1] \n" \ 285 - " bnz 1b \n" \ 286 - : [val] "=&r"(val) \ 287 - : "r"(&v->counter), "ir"(a) \ 288 - : "cc"); /* memory clobber comes from smp_mb() */ \ 289 - \ 290 - smp_mb(); \ 291 - \ 292 - return val; \ 293 - } 294 - 295 - #define ATOMIC64_FETCH_OP(op, op1, op2) \ 296 - static inline s64 arch_atomic64_fetch_##op(s64 a, atomic64_t *v) \ 297 - { \ 298 - s64 val, orig; \ 299 - \ 300 - smp_mb(); \ 301 - \ 302 - __asm__ __volatile__( \ 303 - "1: \n" \ 304 - " llockd %0, [%2] \n" \ 305 - " " #op1 " %L1, %L0, %L3 \n" \ 306 - " " #op2 " %H1, %H0, %H3 \n" \ 307 - " scondd %1, [%2] \n" \ 308 - " bnz 1b \n" \ 309 - : "=&r"(orig), "=&r"(val) \ 310 - : "r"(&v->counter), "ir"(a) \ 311 - : "cc"); /* memory clobber comes from smp_mb() */ \ 312 - \ 313 - smp_mb(); \ 314 - \ 315 - return orig; \ 316 - } 317 - 318 - #define ATOMIC64_OPS(op, op1, op2) \ 319 - ATOMIC64_OP(op, op1, op2) \ 320 - ATOMIC64_OP_RETURN(op, op1, op2) \ 321 - ATOMIC64_FETCH_OP(op, op1, op2) 322 - 323 - ATOMIC64_OPS(add, add.f, adc) 324 - ATOMIC64_OPS(sub, sub.f, sbc) 325 - ATOMIC64_OPS(and, and, and) 326 - ATOMIC64_OPS(andnot, bic, bic) 327 - ATOMIC64_OPS(or, or, or) 328 - ATOMIC64_OPS(xor, xor, xor) 329 - 330 - #define arch_atomic64_andnot arch_atomic64_andnot 331 - #define arch_atomic64_fetch_andnot arch_atomic64_fetch_andnot 332 - 333 - #undef ATOMIC64_OPS 334 - #undef ATOMIC64_FETCH_OP 335 - #undef ATOMIC64_OP_RETURN 336 - #undef ATOMIC64_OP 337 - 338 - static inline s64 339 - arch_atomic64_cmpxchg(atomic64_t *ptr, s64 expected, s64 new) 340 - { 341 - s64 prev; 342 - 343 - smp_mb(); 344 - 345 - __asm__ __volatile__( 346 - "1: llockd %0, [%1] \n" 347 - " brne %L0, %L2, 2f \n" 348 - " brne %H0, %H2, 2f \n" 349 - " scondd %3, [%1] \n" 350 - " bnz 1b \n" 351 - "2: \n" 352 - : "=&r"(prev) 353 - : "r"(ptr), "ir"(expected), "r"(new) 354 - : "cc"); /* memory clobber comes from smp_mb() */ 355 - 356 - smp_mb(); 357 - 358 - return prev; 359 - } 360 - 361 - static inline s64 arch_atomic64_xchg(atomic64_t *ptr, s64 new) 362 - { 363 - s64 prev; 364 - 365 - smp_mb(); 366 - 367 - __asm__ __volatile__( 368 - "1: llockd %0, [%1] \n" 369 - " scondd %2, [%1] \n" 370 - " bnz 1b \n" 371 - "2: \n" 372 - : "=&r"(prev) 373 - : "r"(ptr), "r"(new) 374 - : "cc"); /* memory clobber comes from smp_mb() */ 375 - 376 - smp_mb(); 377 - 378 - return prev; 379 - } 380 - 381 - /** 382 - * arch_atomic64_dec_if_positive - decrement by 1 if old value positive 383 - * @v: pointer of type atomic64_t 384 - * 385 - * The function returns the old value of *v minus 1, even if 386 - * the atomic variable, v, was not decremented. 387 - */ 388 - 389 - static inline s64 arch_atomic64_dec_if_positive(atomic64_t *v) 390 - { 391 - s64 val; 392 - 393 - smp_mb(); 394 - 395 - __asm__ __volatile__( 396 - "1: llockd %0, [%1] \n" 397 - " sub.f %L0, %L0, 1 # w0 - 1, set C on borrow\n" 398 - " sub.c %H0, %H0, 1 # if C set, w1 - 1\n" 399 - " brlt %H0, 0, 2f \n" 400 - " scondd %0, [%1] \n" 401 - " bnz 1b \n" 402 - "2: \n" 403 - : "=&r"(val) 404 - : "r"(&v->counter) 405 - : "cc"); /* memory clobber comes from smp_mb() */ 406 - 407 - smp_mb(); 408 - 409 - return val; 410 - } 411 - #define arch_atomic64_dec_if_positive arch_atomic64_dec_if_positive 412 - 413 - /** 414 - * arch_atomic64_fetch_add_unless - add unless the number is a given value 415 - * @v: pointer of type atomic64_t 416 - * @a: the amount to add to v... 417 - * @u: ...unless v is equal to u. 418 - * 419 - * Atomically adds @a to @v, if it was not @u. 420 - * Returns the old value of @v 421 - */ 422 - static inline s64 arch_atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u) 423 - { 424 - s64 old, temp; 425 - 426 - smp_mb(); 427 - 428 - __asm__ __volatile__( 429 - "1: llockd %0, [%2] \n" 430 - " brne %L0, %L4, 2f # continue to add since v != u \n" 431 - " breq.d %H0, %H4, 3f # return since v == u \n" 432 - "2: \n" 433 - " add.f %L1, %L0, %L3 \n" 434 - " adc %H1, %H0, %H3 \n" 435 - " scondd %1, [%2] \n" 436 - " bnz 1b \n" 437 - "3: \n" 438 - : "=&r"(old), "=&r" (temp) 439 - : "r"(&v->counter), "r"(a), "r"(u) 440 - : "cc"); /* memory clobber comes from smp_mb() */ 441 - 442 - smp_mb(); 443 - 444 - return old; 445 - } 446 - #define arch_atomic64_fetch_add_unless arch_atomic64_fetch_add_unless 447 - 448 - #endif /* !CONFIG_GENERIC_ATOMIC64 */ 54 + #else 55 + #include <asm/atomic64-arcv2.h> 56 + #endif 449 57 450 58 #endif /* !__ASSEMBLY__ */ 451 59

+250

arch/arc/include/asm/atomic64-arcv2.h

··· 1 + /* SPDX-License-Identifier: GPL-2.0-only */ 2 + 3 + /* 4 + * ARCv2 supports 64-bit exclusive load (LLOCKD) / store (SCONDD) 5 + * - The address HAS to be 64-bit aligned 6 + */ 7 + 8 + #ifndef _ASM_ARC_ATOMIC64_ARCV2_H 9 + #define _ASM_ARC_ATOMIC64_ARCV2_H 10 + 11 + typedef struct { 12 + s64 __aligned(8) counter; 13 + } atomic64_t; 14 + 15 + #define ATOMIC64_INIT(a) { (a) } 16 + 17 + static inline s64 arch_atomic64_read(const atomic64_t *v) 18 + { 19 + s64 val; 20 + 21 + __asm__ __volatile__( 22 + " ldd %0, [%1] \n" 23 + : "=r"(val) 24 + : "r"(&v->counter)); 25 + 26 + return val; 27 + } 28 + 29 + static inline void arch_atomic64_set(atomic64_t *v, s64 a) 30 + { 31 + /* 32 + * This could have been a simple assignment in "C" but would need 33 + * explicit volatile. Otherwise gcc optimizers could elide the store 34 + * which borked atomic64 self-test 35 + * In the inline asm version, memory clobber needed for exact same 36 + * reason, to tell gcc about the store. 37 + * 38 + * This however is not needed for sibling atomic64_add() etc since both 39 + * load/store are explicitly done in inline asm. As long as API is used 40 + * for each access, gcc has no way to optimize away any load/store 41 + */ 42 + __asm__ __volatile__( 43 + " std %0, [%1] \n" 44 + : 45 + : "r"(a), "r"(&v->counter) 46 + : "memory"); 47 + } 48 + 49 + #define ATOMIC64_OP(op, op1, op2) \ 50 + static inline void arch_atomic64_##op(s64 a, atomic64_t *v) \ 51 + { \ 52 + s64 val; \ 53 + \ 54 + __asm__ __volatile__( \ 55 + "1: \n" \ 56 + " llockd %0, [%1] \n" \ 57 + " " #op1 " %L0, %L0, %L2 \n" \ 58 + " " #op2 " %H0, %H0, %H2 \n" \ 59 + " scondd %0, [%1] \n" \ 60 + " bnz 1b \n" \ 61 + : "=&r"(val) \ 62 + : "r"(&v->counter), "ir"(a) \ 63 + : "cc"); \ 64 + } \ 65 + 66 + #define ATOMIC64_OP_RETURN(op, op1, op2) \ 67 + static inline s64 arch_atomic64_##op##_return_relaxed(s64 a, atomic64_t *v) \ 68 + { \ 69 + s64 val; \ 70 + \ 71 + __asm__ __volatile__( \ 72 + "1: \n" \ 73 + " llockd %0, [%1] \n" \ 74 + " " #op1 " %L0, %L0, %L2 \n" \ 75 + " " #op2 " %H0, %H0, %H2 \n" \ 76 + " scondd %0, [%1] \n" \ 77 + " bnz 1b \n" \ 78 + : [val] "=&r"(val) \ 79 + : "r"(&v->counter), "ir"(a) \ 80 + : "cc"); /* memory clobber comes from smp_mb() */ \ 81 + \ 82 + return val; \ 83 + } 84 + 85 + #define arch_atomic64_add_return_relaxed arch_atomic64_add_return_relaxed 86 + #define arch_atomic64_sub_return_relaxed arch_atomic64_sub_return_relaxed 87 + 88 + #define ATOMIC64_FETCH_OP(op, op1, op2) \ 89 + static inline s64 arch_atomic64_fetch_##op##_relaxed(s64 a, atomic64_t *v) \ 90 + { \ 91 + s64 val, orig; \ 92 + \ 93 + __asm__ __volatile__( \ 94 + "1: \n" \ 95 + " llockd %0, [%2] \n" \ 96 + " " #op1 " %L1, %L0, %L3 \n" \ 97 + " " #op2 " %H1, %H0, %H3 \n" \ 98 + " scondd %1, [%2] \n" \ 99 + " bnz 1b \n" \ 100 + : "=&r"(orig), "=&r"(val) \ 101 + : "r"(&v->counter), "ir"(a) \ 102 + : "cc"); /* memory clobber comes from smp_mb() */ \ 103 + \ 104 + return orig; \ 105 + } 106 + 107 + #define arch_atomic64_fetch_add_relaxed arch_atomic64_fetch_add_relaxed 108 + #define arch_atomic64_fetch_sub_relaxed arch_atomic64_fetch_sub_relaxed 109 + 110 + #define arch_atomic64_fetch_and_relaxed arch_atomic64_fetch_and_relaxed 111 + #define arch_atomic64_fetch_andnot_relaxed arch_atomic64_fetch_andnot_relaxed 112 + #define arch_atomic64_fetch_or_relaxed arch_atomic64_fetch_or_relaxed 113 + #define arch_atomic64_fetch_xor_relaxed arch_atomic64_fetch_xor_relaxed 114 + 115 + #define ATOMIC64_OPS(op, op1, op2) \ 116 + ATOMIC64_OP(op, op1, op2) \ 117 + ATOMIC64_OP_RETURN(op, op1, op2) \ 118 + ATOMIC64_FETCH_OP(op, op1, op2) 119 + 120 + ATOMIC64_OPS(add, add.f, adc) 121 + ATOMIC64_OPS(sub, sub.f, sbc) 122 + 123 + #undef ATOMIC64_OPS 124 + #define ATOMIC64_OPS(op, op1, op2) \ 125 + ATOMIC64_OP(op, op1, op2) \ 126 + ATOMIC64_FETCH_OP(op, op1, op2) 127 + 128 + ATOMIC64_OPS(and, and, and) 129 + ATOMIC64_OPS(andnot, bic, bic) 130 + ATOMIC64_OPS(or, or, or) 131 + ATOMIC64_OPS(xor, xor, xor) 132 + 133 + #define arch_atomic64_andnot arch_atomic64_andnot 134 + 135 + #undef ATOMIC64_OPS 136 + #undef ATOMIC64_FETCH_OP 137 + #undef ATOMIC64_OP_RETURN 138 + #undef ATOMIC64_OP 139 + 140 + static inline s64 141 + arch_atomic64_cmpxchg(atomic64_t *ptr, s64 expected, s64 new) 142 + { 143 + s64 prev; 144 + 145 + smp_mb(); 146 + 147 + __asm__ __volatile__( 148 + "1: llockd %0, [%1] \n" 149 + " brne %L0, %L2, 2f \n" 150 + " brne %H0, %H2, 2f \n" 151 + " scondd %3, [%1] \n" 152 + " bnz 1b \n" 153 + "2: \n" 154 + : "=&r"(prev) 155 + : "r"(ptr), "ir"(expected), "r"(new) 156 + : "cc"); /* memory clobber comes from smp_mb() */ 157 + 158 + smp_mb(); 159 + 160 + return prev; 161 + } 162 + 163 + static inline s64 arch_atomic64_xchg(atomic64_t *ptr, s64 new) 164 + { 165 + s64 prev; 166 + 167 + smp_mb(); 168 + 169 + __asm__ __volatile__( 170 + "1: llockd %0, [%1] \n" 171 + " scondd %2, [%1] \n" 172 + " bnz 1b \n" 173 + "2: \n" 174 + : "=&r"(prev) 175 + : "r"(ptr), "r"(new) 176 + : "cc"); /* memory clobber comes from smp_mb() */ 177 + 178 + smp_mb(); 179 + 180 + return prev; 181 + } 182 + 183 + /** 184 + * arch_atomic64_dec_if_positive - decrement by 1 if old value positive 185 + * @v: pointer of type atomic64_t 186 + * 187 + * The function returns the old value of *v minus 1, even if 188 + * the atomic variable, v, was not decremented. 189 + */ 190 + 191 + static inline s64 arch_atomic64_dec_if_positive(atomic64_t *v) 192 + { 193 + s64 val; 194 + 195 + smp_mb(); 196 + 197 + __asm__ __volatile__( 198 + "1: llockd %0, [%1] \n" 199 + " sub.f %L0, %L0, 1 # w0 - 1, set C on borrow\n" 200 + " sub.c %H0, %H0, 1 # if C set, w1 - 1\n" 201 + " brlt %H0, 0, 2f \n" 202 + " scondd %0, [%1] \n" 203 + " bnz 1b \n" 204 + "2: \n" 205 + : "=&r"(val) 206 + : "r"(&v->counter) 207 + : "cc"); /* memory clobber comes from smp_mb() */ 208 + 209 + smp_mb(); 210 + 211 + return val; 212 + } 213 + #define arch_atomic64_dec_if_positive arch_atomic64_dec_if_positive 214 + 215 + /** 216 + * arch_atomic64_fetch_add_unless - add unless the number is a given value 217 + * @v: pointer of type atomic64_t 218 + * @a: the amount to add to v... 219 + * @u: ...unless v is equal to u. 220 + * 221 + * Atomically adds @a to @v, if it was not @u. 222 + * Returns the old value of @v 223 + */ 224 + static inline s64 arch_atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u) 225 + { 226 + s64 old, temp; 227 + 228 + smp_mb(); 229 + 230 + __asm__ __volatile__( 231 + "1: llockd %0, [%2] \n" 232 + " brne %L0, %L4, 2f # continue to add since v != u \n" 233 + " breq.d %H0, %H4, 3f # return since v == u \n" 234 + "2: \n" 235 + " add.f %L1, %L0, %L3 \n" 236 + " adc %H1, %H0, %H3 \n" 237 + " scondd %1, [%2] \n" 238 + " bnz 1b \n" 239 + "3: \n" 240 + : "=&r"(old), "=&r" (temp) 241 + : "r"(&v->counter), "r"(a), "r"(u) 242 + : "cc"); /* memory clobber comes from smp_mb() */ 243 + 244 + smp_mb(); 245 + 246 + return old; 247 + } 248 + #define arch_atomic64_fetch_add_unless arch_atomic64_fetch_add_unless 249 + 250 + #endif

+4 -184

arch/arc/include/asm/bitops.h

··· 14 14 15 15 #include <linux/types.h> 16 16 #include <linux/compiler.h> 17 - #include <asm/barrier.h> 18 - #ifndef CONFIG_ARC_HAS_LLSC 19 - #include <asm/smp.h> 20 - #endif 21 - 22 - #ifdef CONFIG_ARC_HAS_LLSC 23 - 24 - /* 25 - * Hardware assisted Atomic-R-M-W 26 - */ 27 - 28 - #define BIT_OP(op, c_op, asm_op) \ 29 - static inline void op##_bit(unsigned long nr, volatile unsigned long *m)\ 30 - { \ 31 - unsigned int temp; \ 32 - \ 33 - m += nr >> 5; \ 34 - \ 35 - nr &= 0x1f; \ 36 - \ 37 - __asm__ __volatile__( \ 38 - "1: llock %0, [%1] \n" \ 39 - " " #asm_op " %0, %0, %2 \n" \ 40 - " scond %0, [%1] \n" \ 41 - " bnz 1b \n" \ 42 - : "=&r"(temp) /* Early clobber, to prevent reg reuse */ \ 43 - : "r"(m), /* Not "m": llock only supports reg direct addr mode */ \ 44 - "ir"(nr) \ 45 - : "cc"); \ 46 - } 47 - 48 - /* 49 - * Semantically: 50 - * Test the bit 51 - * if clear 52 - * set it and return 0 (old value) 53 - * else 54 - * return 1 (old value). 55 - * 56 - * Since ARC lacks a equivalent h/w primitive, the bit is set unconditionally 57 - * and the old value of bit is returned 58 - */ 59 - #define TEST_N_BIT_OP(op, c_op, asm_op) \ 60 - static inline int test_and_##op##_bit(unsigned long nr, volatile unsigned long *m)\ 61 - { \ 62 - unsigned long old, temp; \ 63 - \ 64 - m += nr >> 5; \ 65 - \ 66 - nr &= 0x1f; \ 67 - \ 68 - /* \ 69 - * Explicit full memory barrier needed before/after as \ 70 - * LLOCK/SCOND themselves don't provide any such smenatic \ 71 - */ \ 72 - smp_mb(); \ 73 - \ 74 - __asm__ __volatile__( \ 75 - "1: llock %0, [%2] \n" \ 76 - " " #asm_op " %1, %0, %3 \n" \ 77 - " scond %1, [%2] \n" \ 78 - " bnz 1b \n" \ 79 - : "=&r"(old), "=&r"(temp) \ 80 - : "r"(m), "ir"(nr) \ 81 - : "cc"); \ 82 - \ 83 - smp_mb(); \ 84 - \ 85 - return (old & (1 << nr)) != 0; \ 86 - } 87 - 88 - #else /* !CONFIG_ARC_HAS_LLSC */ 89 - 90 - /* 91 - * Non hardware assisted Atomic-R-M-W 92 - * Locking would change to irq-disabling only (UP) and spinlocks (SMP) 93 - * 94 - * There's "significant" micro-optimization in writing our own variants of 95 - * bitops (over generic variants) 96 - * 97 - * (1) The generic APIs have "signed" @nr while we have it "unsigned" 98 - * This avoids extra code to be generated for pointer arithmatic, since 99 - * is "not sure" that index is NOT -ve 100 - * (2) Utilize the fact that ARCompact bit fidding insn (BSET/BCLR/ASL) etc 101 - * only consider bottom 5 bits of @nr, so NO need to mask them off. 102 - * (GCC Quirk: however for constant @nr we still need to do the masking 103 - * at compile time) 104 - */ 105 - 106 - #define BIT_OP(op, c_op, asm_op) \ 107 - static inline void op##_bit(unsigned long nr, volatile unsigned long *m)\ 108 - { \ 109 - unsigned long temp, flags; \ 110 - m += nr >> 5; \ 111 - \ 112 - /* \ 113 - * spin lock/unlock provide the needed smp_mb() before/after \ 114 - */ \ 115 - bitops_lock(flags); \ 116 - \ 117 - temp = *m; \ 118 - *m = temp c_op (1UL << (nr & 0x1f)); \ 119 - \ 120 - bitops_unlock(flags); \ 121 - } 122 - 123 - #define TEST_N_BIT_OP(op, c_op, asm_op) \ 124 - static inline int test_and_##op##_bit(unsigned long nr, volatile unsigned long *m)\ 125 - { \ 126 - unsigned long old, flags; \ 127 - m += nr >> 5; \ 128 - \ 129 - bitops_lock(flags); \ 130 - \ 131 - old = *m; \ 132 - *m = old c_op (1UL << (nr & 0x1f)); \ 133 - \ 134 - bitops_unlock(flags); \ 135 - \ 136 - return (old & (1UL << (nr & 0x1f))) != 0; \ 137 - } 138 - 139 - #endif 140 - 141 - /*************************************** 142 - * Non atomic variants 143 - **************************************/ 144 - 145 - #define __BIT_OP(op, c_op, asm_op) \ 146 - static inline void __##op##_bit(unsigned long nr, volatile unsigned long *m) \ 147 - { \ 148 - unsigned long temp; \ 149 - m += nr >> 5; \ 150 - \ 151 - temp = *m; \ 152 - *m = temp c_op (1UL << (nr & 0x1f)); \ 153 - } 154 - 155 - #define __TEST_N_BIT_OP(op, c_op, asm_op) \ 156 - static inline int __test_and_##op##_bit(unsigned long nr, volatile unsigned long *m)\ 157 - { \ 158 - unsigned long old; \ 159 - m += nr >> 5; \ 160 - \ 161 - old = *m; \ 162 - *m = old c_op (1UL << (nr & 0x1f)); \ 163 - \ 164 - return (old & (1UL << (nr & 0x1f))) != 0; \ 165 - } 166 - 167 - #define BIT_OPS(op, c_op, asm_op) \ 168 - \ 169 - /* set_bit(), clear_bit(), change_bit() */ \ 170 - BIT_OP(op, c_op, asm_op) \ 171 - \ 172 - /* test_and_set_bit(), test_and_clear_bit(), test_and_change_bit() */\ 173 - TEST_N_BIT_OP(op, c_op, asm_op) \ 174 - \ 175 - /* __set_bit(), __clear_bit(), __change_bit() */ \ 176 - __BIT_OP(op, c_op, asm_op) \ 177 - \ 178 - /* __test_and_set_bit(), __test_and_clear_bit(), __test_and_change_bit() */\ 179 - __TEST_N_BIT_OP(op, c_op, asm_op) 180 - 181 - BIT_OPS(set, |, bset) 182 - BIT_OPS(clear, & ~, bclr) 183 - BIT_OPS(change, ^, bxor) 184 - 185 - /* 186 - * This routine doesn't need to be atomic. 187 - */ 188 - static inline int 189 - test_bit(unsigned int nr, const volatile unsigned long *addr) 190 - { 191 - unsigned long mask; 192 - 193 - addr += nr >> 5; 194 - 195 - mask = 1UL << (nr & 0x1f); 196 - 197 - return ((mask & *addr) != 0); 198 - } 199 17 200 18 #ifdef CONFIG_ISA_ARCOMPACT 201 19 ··· 114 296 * @result: [1-32] 115 297 * fls(1) = 1, fls(0x80000000) = 32, fls(0) = 0 116 298 */ 117 - static inline __attribute__ ((const)) int fls(unsigned long x) 299 + static inline __attribute__ ((const)) int fls(unsigned int x) 118 300 { 119 301 int n; 120 302 ··· 141 323 * ffs = Find First Set in word (LSB to MSB) 142 324 * @result: [1-32], 0 if all 0's 143 325 */ 144 - static inline __attribute__ ((const)) int ffs(unsigned long x) 326 + static inline __attribute__ ((const)) int ffs(unsigned int x) 145 327 { 146 328 int n; 147 329 ··· 186 368 #include <asm-generic/bitops/fls64.h> 187 369 #include <asm-generic/bitops/sched.h> 188 370 #include <asm-generic/bitops/lock.h> 371 + #include <asm-generic/bitops/atomic.h> 372 + #include <asm-generic/bitops/non-atomic.h> 189 373 190 374 #include <asm-generic/bitops/find.h> 191 375 #include <asm-generic/bitops/le.h>

-4

arch/arc/include/asm/cache.h

··· 62 62 #define ARCH_SLAB_MINALIGN 8 63 63 #endif 64 64 65 - extern void arc_cache_init(void); 66 - extern char *arc_cache_mumbojumbo(int cpu_id, char *buf, int len); 67 - extern void read_decode_cache_bcr(void); 68 - 69 65 extern int ioc_enable; 70 66 extern unsigned long perip_base, perip_end; 71 67

+115 -130

arch/arc/include/asm/cmpxchg.h

··· 6 6 #ifndef __ASM_ARC_CMPXCHG_H 7 7 #define __ASM_ARC_CMPXCHG_H 8 8 9 + #include <linux/build_bug.h> 9 10 #include <linux/types.h> 10 11 11 12 #include <asm/barrier.h> ··· 14 13 15 14 #ifdef CONFIG_ARC_HAS_LLSC 16 15 17 - static inline unsigned long 18 - __cmpxchg(volatile void *ptr, unsigned long expected, unsigned long new) 19 - { 20 - unsigned long prev; 21 - 22 - /* 23 - * Explicit full memory barrier needed before/after as 24 - * LLOCK/SCOND themselves don't provide any such semantics 25 - */ 26 - smp_mb(); 27 - 28 - __asm__ __volatile__( 29 - "1: llock %0, [%1] \n" 30 - " brne %0, %2, 2f \n" 31 - " scond %3, [%1] \n" 32 - " bnz 1b \n" 33 - "2: \n" 34 - : "=&r"(prev) /* Early clobber, to prevent reg reuse */ 35 - : "r"(ptr), /* Not "m": llock only supports reg direct addr mode */ 36 - "ir"(expected), 37 - "r"(new) /* can't be "ir". scond can't take LIMM for "b" */ 38 - : "cc", "memory"); /* so that gcc knows memory is being written here */ 39 - 40 - smp_mb(); 41 - 42 - return prev; 43 - } 44 - 45 - #else /* !CONFIG_ARC_HAS_LLSC */ 46 - 47 - static inline unsigned long 48 - __cmpxchg(volatile void *ptr, unsigned long expected, unsigned long new) 49 - { 50 - unsigned long flags; 51 - int prev; 52 - volatile unsigned long *p = ptr; 53 - 54 - /* 55 - * spin lock/unlock provide the needed smp_mb() before/after 56 - */ 57 - atomic_ops_lock(flags); 58 - prev = *p; 59 - if (prev == expected) 60 - *p = new; 61 - atomic_ops_unlock(flags); 62 - return prev; 63 - } 64 - 65 - #endif 66 - 67 - #define arch_cmpxchg(ptr, o, n) ({ \ 68 - (typeof(*(ptr)))__cmpxchg((ptr), \ 69 - (unsigned long)(o), \ 70 - (unsigned long)(n)); \ 16 + /* 17 + * if (*ptr == @old) 18 + * *ptr = @new 19 + */ 20 + #define __cmpxchg(ptr, old, new) \ 21 + ({ \ 22 + __typeof__(*(ptr)) _prev; \ 23 + \ 24 + __asm__ __volatile__( \ 25 + "1: llock %0, [%1] \n" \ 26 + " brne %0, %2, 2f \n" \ 27 + " scond %3, [%1] \n" \ 28 + " bnz 1b \n" \ 29 + "2: \n" \ 30 + : "=&r"(_prev) /* Early clobber prevent reg reuse */ \ 31 + : "r"(ptr), /* Not "m": llock only supports reg */ \ 32 + "ir"(old), \ 33 + "r"(new) /* Not "ir": scond can't take LIMM */ \ 34 + : "cc", \ 35 + "memory"); /* gcc knows memory is clobbered */ \ 36 + \ 37 + _prev; \ 71 38 }) 72 39 73 - /* 74 - * atomic_cmpxchg is same as cmpxchg 75 - * LLSC: only different in data-type, semantics are exactly same 76 - * !LLSC: cmpxchg() has to use an external lock atomic_ops_lock to guarantee 77 - * semantics, and this lock also happens to be used by atomic_*() 78 - */ 79 - #define arch_atomic_cmpxchg(v, o, n) ((int)arch_cmpxchg(&((v)->counter), (o), (n))) 80 - 81 - 82 - /* 83 - * xchg (reg with memory) based on "Native atomic" EX insn 84 - */ 85 - static inline unsigned long __xchg(unsigned long val, volatile void *ptr, 86 - int size) 87 - { 88 - extern unsigned long __xchg_bad_pointer(void); 89 - 90 - switch (size) { 91 - case 4: 92 - smp_mb(); 93 - 94 - __asm__ __volatile__( 95 - " ex %0, [%1] \n" 96 - : "+r"(val) 97 - : "r"(ptr) 98 - : "memory"); 99 - 100 - smp_mb(); 101 - 102 - return val; 103 - } 104 - return __xchg_bad_pointer(); 105 - } 106 - 107 - #define _xchg(ptr, with) ((typeof(*(ptr)))__xchg((unsigned long)(with), (ptr), \ 108 - sizeof(*(ptr)))) 109 - 110 - /* 111 - * xchg() maps directly to ARC EX instruction which guarantees atomicity. 112 - * However in !LLSC config, it also needs to be use @atomic_ops_lock spinlock 113 - * due to a subtle reason: 114 - * - For !LLSC, cmpxchg() needs to use that lock (see above) and there is lot 115 - * of kernel code which calls xchg()/cmpxchg() on same data (see llist.h) 116 - * Hence xchg() needs to follow same locking rules. 117 - * 118 - * Technically the lock is also needed for UP (boils down to irq save/restore) 119 - * but we can cheat a bit since cmpxchg() atomic_ops_lock() would cause irqs to 120 - * be disabled thus can't possibly be interrupted/preempted/clobbered by xchg() 121 - * Other way around, xchg is one instruction anyways, so can't be interrupted 122 - * as such 123 - */ 124 - 125 - #if !defined(CONFIG_ARC_HAS_LLSC) && defined(CONFIG_SMP) 126 - 127 - #define arch_xchg(ptr, with) \ 128 - ({ \ 129 - unsigned long flags; \ 130 - typeof(*(ptr)) old_val; \ 131 - \ 132 - atomic_ops_lock(flags); \ 133 - old_val = _xchg(ptr, with); \ 134 - atomic_ops_unlock(flags); \ 135 - old_val; \ 40 + #define arch_cmpxchg_relaxed(ptr, old, new) \ 41 + ({ \ 42 + __typeof__(ptr) _p_ = (ptr); \ 43 + __typeof__(*(ptr)) _o_ = (old); \ 44 + __typeof__(*(ptr)) _n_ = (new); \ 45 + __typeof__(*(ptr)) _prev_; \ 46 + \ 47 + switch(sizeof((_p_))) { \ 48 + case 4: \ 49 + _prev_ = __cmpxchg(_p_, _o_, _n_); \ 50 + break; \ 51 + default: \ 52 + BUILD_BUG(); \ 53 + } \ 54 + _prev_; \ 136 55 }) 137 56 138 57 #else 139 58 140 - #define arch_xchg(ptr, with) _xchg(ptr, with) 59 + #define arch_cmpxchg(ptr, old, new) \ 60 + ({ \ 61 + volatile __typeof__(ptr) _p_ = (ptr); \ 62 + __typeof__(*(ptr)) _o_ = (old); \ 63 + __typeof__(*(ptr)) _n_ = (new); \ 64 + __typeof__(*(ptr)) _prev_; \ 65 + unsigned long __flags; \ 66 + \ 67 + BUILD_BUG_ON(sizeof(_p_) != 4); \ 68 + \ 69 + /* \ 70 + * spin lock/unlock provide the needed smp_mb() before/after \ 71 + */ \ 72 + atomic_ops_lock(__flags); \ 73 + _prev_ = *_p_; \ 74 + if (_prev_ == _o_) \ 75 + *_p_ = _n_; \ 76 + atomic_ops_unlock(__flags); \ 77 + _prev_; \ 78 + }) 141 79 142 80 #endif 143 81 144 82 /* 145 - * "atomic" variant of xchg() 146 - * REQ: It needs to follow the same serialization rules as other atomic_xxx() 147 - * Since xchg() doesn't always do that, it would seem that following definition 148 - * is incorrect. But here's the rationale: 149 - * SMP : Even xchg() takes the atomic_ops_lock, so OK. 150 - * LLSC: atomic_ops_lock are not relevant at all (even if SMP, since LLSC 151 - * is natively "SMP safe", no serialization required). 152 - * UP : other atomics disable IRQ, so no way a difft ctxt atomic_xchg() 153 - * could clobber them. atomic_xchg() itself would be 1 insn, so it 154 - * can't be clobbered by others. Thus no serialization required when 155 - * atomic_xchg is involved. 83 + * xchg 156 84 */ 157 - #define arch_atomic_xchg(v, new) (arch_xchg(&((v)->counter), new)) 85 + #ifdef CONFIG_ARC_HAS_LLSC 86 + 87 + #define __xchg(ptr, val) \ 88 + ({ \ 89 + __asm__ __volatile__( \ 90 + " ex %0, [%1] \n" /* set new value */ \ 91 + : "+r"(val) \ 92 + : "r"(ptr) \ 93 + : "memory"); \ 94 + _val_; /* get old value */ \ 95 + }) 96 + 97 + #define arch_xchg_relaxed(ptr, val) \ 98 + ({ \ 99 + __typeof__(ptr) _p_ = (ptr); \ 100 + __typeof__(*(ptr)) _val_ = (val); \ 101 + \ 102 + switch(sizeof(*(_p_))) { \ 103 + case 4: \ 104 + _val_ = __xchg(_p_, _val_); \ 105 + break; \ 106 + default: \ 107 + BUILD_BUG(); \ 108 + } \ 109 + _val_; \ 110 + }) 111 + 112 + #else /* !CONFIG_ARC_HAS_LLSC */ 113 + 114 + /* 115 + * EX instructions is baseline and present in !LLSC too. But in this 116 + * regime it still needs use @atomic_ops_lock spinlock to allow interop 117 + * with cmpxchg() which uses spinlock in !LLSC 118 + * (llist.h use xchg and cmpxchg on sama data) 119 + */ 120 + 121 + #define arch_xchg(ptr, val) \ 122 + ({ \ 123 + __typeof__(ptr) _p_ = (ptr); \ 124 + __typeof__(*(ptr)) _val_ = (val); \ 125 + \ 126 + unsigned long __flags; \ 127 + \ 128 + atomic_ops_lock(__flags); \ 129 + \ 130 + __asm__ __volatile__( \ 131 + " ex %0, [%1] \n" \ 132 + : "+r"(_val_) \ 133 + : "r"(_p_) \ 134 + : "memory"); \ 135 + \ 136 + atomic_ops_unlock(__flags); \ 137 + _val_; \ 138 + }) 139 + 140 + #endif 158 141 159 142 #endif

-8

arch/arc/include/asm/entry-compact.h

··· 126 126 * to be saved again on kernel mode stack, as part of pt_regs. 127 127 *-------------------------------------------------------------*/ 128 128 .macro PROLOG_FREEUP_REG reg, mem 129 - #ifndef ARC_USE_SCRATCH_REG 130 - sr \reg, [ARC_REG_SCRATCH_DATA0] 131 - #else 132 129 st \reg, [\mem] 133 - #endif 134 130 .endm 135 131 136 132 .macro PROLOG_RESTORE_REG reg, mem 137 - #ifndef ARC_USE_SCRATCH_REG 138 - lr \reg, [ARC_REG_SCRATCH_DATA0] 139 - #else 140 133 ld \reg, [\mem] 141 - #endif 142 134 .endm 143 135 144 136 /*--------------------------------------------------------------

-8

arch/arc/include/asm/hugepage.h

··· 58 58 extern void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr, 59 59 pmd_t *pmd); 60 60 61 - /* Generic variants assume pgtable_t is struct page *, hence need for these */ 62 - #define __HAVE_ARCH_PGTABLE_DEPOSIT 63 - extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, 64 - pgtable_t pgtable); 65 - 66 - #define __HAVE_ARCH_PGTABLE_WITHDRAW 67 - extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp); 68 - 69 61 #define __HAVE_ARCH_FLUSH_PMD_TLB_RANGE 70 62 extern void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start, 71 63 unsigned long end);

+103

arch/arc/include/asm/mmu-arcv2.h

··· 1 + /* SPDX-License-Identifier: GPL-2.0-only */ 2 + /* 3 + * Copyright (C) 2004, 2007-2010, 2011-2012, 2019-20 Synopsys, Inc. (www.synopsys.com) 4 + * 5 + * MMUv3 (arc700) / MMUv4 (archs) are software page walked and software managed. 6 + * This file contains the TLB access registers and commands 7 + */ 8 + 9 + #ifndef _ASM_ARC_MMU_ARCV2_H 10 + #define _ASM_ARC_MMU_ARCV2_H 11 + 12 + /* 13 + * TLB Management regs 14 + */ 15 + #define ARC_REG_MMU_BCR 0x06f 16 + 17 + #ifdef CONFIG_ARC_MMU_V3 18 + #define ARC_REG_TLBPD0 0x405 19 + #define ARC_REG_TLBPD1 0x406 20 + #define ARC_REG_TLBPD1HI 0 /* Dummy: allows common code */ 21 + #define ARC_REG_TLBINDEX 0x407 22 + #define ARC_REG_TLBCOMMAND 0x408 23 + #define ARC_REG_PID 0x409 24 + #define ARC_REG_SCRATCH_DATA0 0x418 25 + #else 26 + #define ARC_REG_TLBPD0 0x460 27 + #define ARC_REG_TLBPD1 0x461 28 + #define ARC_REG_TLBPD1HI 0x463 29 + #define ARC_REG_TLBINDEX 0x464 30 + #define ARC_REG_TLBCOMMAND 0x465 31 + #define ARC_REG_PID 0x468 32 + #define ARC_REG_SCRATCH_DATA0 0x46c 33 + #endif 34 + 35 + /* Bits in MMU PID reg */ 36 + #define __TLB_ENABLE (1 << 31) 37 + #define __PROG_ENABLE (1 << 30) 38 + #define MMU_ENABLE (__TLB_ENABLE | __PROG_ENABLE) 39 + 40 + /* Bits in TLB Index reg */ 41 + #define TLB_LKUP_ERR 0x80000000 42 + 43 + #ifdef CONFIG_ARC_MMU_V3 44 + #define TLB_DUP_ERR (TLB_LKUP_ERR | 0x00000001) 45 + #else 46 + #define TLB_DUP_ERR (TLB_LKUP_ERR | 0x40000000) 47 + #endif 48 + 49 + /* 50 + * TLB Commands 51 + */ 52 + #define TLBWrite 0x1 53 + #define TLBRead 0x2 54 + #define TLBGetIndex 0x3 55 + #define TLBProbe 0x4 56 + #define TLBWriteNI 0x5 /* write JTLB without inv uTLBs */ 57 + #define TLBIVUTLB 0x6 /* explicitly inv uTLBs */ 58 + 59 + #ifdef CONFIG_ARC_MMU_V4 60 + #define TLBInsertEntry 0x7 61 + #define TLBDeleteEntry 0x8 62 + #endif 63 + 64 + /* Masks for actual TLB "PD"s */ 65 + #define PTE_BITS_IN_PD0 (_PAGE_GLOBAL | _PAGE_PRESENT | _PAGE_HW_SZ) 66 + #define PTE_BITS_RWX (_PAGE_EXECUTE | _PAGE_WRITE | _PAGE_READ) 67 + 68 + #define PTE_BITS_NON_RWX_IN_PD1 (PAGE_MASK_PHYS | _PAGE_CACHEABLE) 69 + 70 + #ifndef __ASSEMBLY__ 71 + 72 + struct mm_struct; 73 + extern int pae40_exist_but_not_enab(void); 74 + 75 + static inline int is_pae40_enabled(void) 76 + { 77 + return IS_ENABLED(CONFIG_ARC_HAS_PAE40); 78 + } 79 + 80 + static inline void mmu_setup_asid(struct mm_struct *mm, unsigned long asid) 81 + { 82 + write_aux_reg(ARC_REG_PID, asid | MMU_ENABLE); 83 + } 84 + 85 + static inline void mmu_setup_pgd(struct mm_struct *mm, void *pgd) 86 + { 87 + /* PGD cached in MMU reg to avoid 3 mem lookups: task->mm->pgd */ 88 + #ifdef CONFIG_ISA_ARCV2 89 + write_aux_reg(ARC_REG_SCRATCH_DATA0, (unsigned int)pgd); 90 + #endif 91 + } 92 + 93 + #else 94 + 95 + .macro ARC_MMU_REENABLE reg 96 + lr \reg, [ARC_REG_PID] 97 + or \reg, \reg, MMU_ENABLE 98 + sr \reg, [ARC_REG_PID] 99 + .endm 100 + 101 + #endif /* !__ASSEMBLY__ */ 102 + 103 + #endif

+2 -85

arch/arc/include/asm/mmu.h

··· 7 7 #define _ASM_ARC_MMU_H 8 8 9 9 #ifndef __ASSEMBLY__ 10 + 10 11 #include <linux/threads.h> /* NR_CPUS */ 11 - #endif 12 - 13 - #if defined(CONFIG_ARC_MMU_V1) 14 - #define CONFIG_ARC_MMU_VER 1 15 - #elif defined(CONFIG_ARC_MMU_V2) 16 - #define CONFIG_ARC_MMU_VER 2 17 - #elif defined(CONFIG_ARC_MMU_V3) 18 - #define CONFIG_ARC_MMU_VER 3 19 - #elif defined(CONFIG_ARC_MMU_V4) 20 - #define CONFIG_ARC_MMU_VER 4 21 - #endif 22 - 23 - /* MMU Management regs */ 24 - #define ARC_REG_MMU_BCR 0x06f 25 - #if (CONFIG_ARC_MMU_VER < 4) 26 - #define ARC_REG_TLBPD0 0x405 27 - #define ARC_REG_TLBPD1 0x406 28 - #define ARC_REG_TLBPD1HI 0 /* Dummy: allows code sharing with ARC700 */ 29 - #define ARC_REG_TLBINDEX 0x407 30 - #define ARC_REG_TLBCOMMAND 0x408 31 - #define ARC_REG_PID 0x409 32 - #define ARC_REG_SCRATCH_DATA0 0x418 33 - #else 34 - #define ARC_REG_TLBPD0 0x460 35 - #define ARC_REG_TLBPD1 0x461 36 - #define ARC_REG_TLBPD1HI 0x463 37 - #define ARC_REG_TLBINDEX 0x464 38 - #define ARC_REG_TLBCOMMAND 0x465 39 - #define ARC_REG_PID 0x468 40 - #define ARC_REG_SCRATCH_DATA0 0x46c 41 - #endif 42 - 43 - #if defined(CONFIG_ISA_ARCV2) || !defined(CONFIG_SMP) 44 - #define ARC_USE_SCRATCH_REG 45 - #endif 46 - 47 - /* Bits in MMU PID register */ 48 - #define __TLB_ENABLE (1 << 31) 49 - #define __PROG_ENABLE (1 << 30) 50 - #define MMU_ENABLE (__TLB_ENABLE | __PROG_ENABLE) 51 - 52 - /* Error code if probe fails */ 53 - #define TLB_LKUP_ERR 0x80000000 54 - 55 - #if (CONFIG_ARC_MMU_VER < 4) 56 - #define TLB_DUP_ERR (TLB_LKUP_ERR | 0x00000001) 57 - #else 58 - #define TLB_DUP_ERR (TLB_LKUP_ERR | 0x40000000) 59 - #endif 60 - 61 - /* TLB Commands */ 62 - #define TLBWrite 0x1 63 - #define TLBRead 0x2 64 - #define TLBGetIndex 0x3 65 - #define TLBProbe 0x4 66 - 67 - #if (CONFIG_ARC_MMU_VER >= 2) 68 - #define TLBWriteNI 0x5 /* write JTLB without inv uTLBs */ 69 - #define TLBIVUTLB 0x6 /* explicitly inv uTLBs */ 70 - #else 71 - #define TLBWriteNI TLBWrite /* Not present in hardware, fallback */ 72 - #endif 73 - 74 - #if (CONFIG_ARC_MMU_VER >= 4) 75 - #define TLBInsertEntry 0x7 76 - #define TLBDeleteEntry 0x8 77 - #endif 78 - 79 - #ifndef __ASSEMBLY__ 80 12 81 13 typedef struct { 82 14 unsigned long asid[NR_CPUS]; /* 8 bit MMU PID + Generation cycle */ 83 15 } mm_context_t; 84 16 85 - #ifdef CONFIG_ARC_DBG_TLB_PARANOIA 86 - void tlb_paranoid_check(unsigned int mm_asid, unsigned long address); 87 - #else 88 - #define tlb_paranoid_check(a, b) 89 17 #endif 90 18 91 - void arc_mmu_init(void); 92 - extern char *arc_mmu_mumbojumbo(int cpu_id, char *buf, int len); 93 - void read_decode_mmu_bcr(void); 94 - 95 - static inline int is_pae40_enabled(void) 96 - { 97 - return IS_ENABLED(CONFIG_ARC_HAS_PAE40); 98 - } 99 - 100 - extern int pae40_exist_but_not_enab(void); 101 - 102 - #endif /* !__ASSEMBLY__ */ 19 + #include <asm/mmu-arcv2.h> 103 20 104 21 #endif

+13 -15

arch/arc/include/asm/mmu_context.h

··· 15 15 #ifndef _ASM_ARC_MMU_CONTEXT_H 16 16 #define _ASM_ARC_MMU_CONTEXT_H 17 17 18 - #include <asm/arcregs.h> 19 - #include <asm/tlb.h> 20 18 #include <linux/sched/mm.h> 21 19 20 + #include <asm/tlb.h> 22 21 #include <asm-generic/mm_hooks.h> 23 22 24 - /* ARC700 ASID Management 23 + /* ARC ASID Management 25 24 * 26 - * ARC MMU provides 8-bit ASID (0..255) to TAG TLB entries, allowing entries 27 - * with same vaddr (different tasks) to co-exit. This provides for 28 - * "Fast Context Switch" i.e. no TLB flush on ctxt-switch 25 + * MMU tags TLBs with an 8-bit ASID, avoiding need to flush the TLB on 26 + * context-switch. 29 27 * 30 - * Linux assigns each task a unique ASID. A simple round-robin allocation 31 - * of H/w ASID is done using software tracker @asid_cpu. 32 - * When it reaches max 255, the allocation cycle starts afresh by flushing 33 - * the entire TLB and wrapping ASID back to zero. 28 + * ASID is managed per cpu, so task threads across CPUs can have different 29 + * ASID. Global ASID management is needed if hardware supports TLB shootdown 30 + * and/or shared TLB across cores, which ARC doesn't. 31 + * 32 + * Each task is assigned unique ASID, with a simple round-robin allocator 33 + * tracked in @asid_cpu. When 8-bit value rolls over,a new cycle is started 34 + * over from 0, and TLB is flushed 34 35 * 35 36 * A new allocation cycle, post rollover, could potentially reassign an ASID 36 37 * to a different task. Thus the rule is to refresh the ASID in a new cycle. ··· 94 93 asid_mm(mm, cpu) = asid_cpu(cpu); 95 94 96 95 set_hw: 97 - write_aux_reg(ARC_REG_PID, hw_pid(mm, cpu) | MMU_ENABLE); 96 + mmu_setup_asid(mm, hw_pid(mm, cpu)); 98 97 99 98 local_irq_restore(flags); 100 99 } ··· 147 146 */ 148 147 cpumask_set_cpu(cpu, mm_cpumask(next)); 149 148 150 - #ifdef ARC_USE_SCRATCH_REG 151 - /* PGD cached in MMU reg to avoid 3 mem lookups: task->mm->pgd */ 152 - write_aux_reg(ARC_REG_SCRATCH_DATA0, next->pgd); 153 - #endif 149 + mmu_setup_pgd(next, next->pgd); 154 150 155 151 get_new_mmu_context(next); 156 152 }

+38 -40

arch/arc/include/asm/page.h

··· 34 34 unsigned long u_vaddr, struct vm_area_struct *vma); 35 35 void clear_user_page(void *to, unsigned long u_vaddr, struct page *page); 36 36 37 - #undef STRICT_MM_TYPECHECKS 37 + typedef struct { 38 + unsigned long pgd; 39 + } pgd_t; 38 40 39 - #ifdef STRICT_MM_TYPECHECKS 40 - /* 41 - * These are used to make use of C type-checking.. 42 - */ 41 + #define pgd_val(x) ((x).pgd) 42 + #define __pgd(x) ((pgd_t) { (x) }) 43 + 44 + #if CONFIG_PGTABLE_LEVELS > 3 45 + 46 + typedef struct { 47 + unsigned long pud; 48 + } pud_t; 49 + 50 + #define pud_val(x) ((x).pud) 51 + #define __pud(x) ((pud_t) { (x) }) 52 + 53 + #endif 54 + 55 + #if CONFIG_PGTABLE_LEVELS > 2 56 + 57 + typedef struct { 58 + unsigned long pmd; 59 + } pmd_t; 60 + 61 + #define pmd_val(x) ((x).pmd) 62 + #define __pmd(x) ((pmd_t) { (x) }) 63 + 64 + #endif 65 + 43 66 typedef struct { 44 67 #ifdef CONFIG_ARC_HAS_PAE40 45 68 unsigned long long pte; ··· 70 47 unsigned long pte; 71 48 #endif 72 49 } pte_t; 73 - typedef struct { 74 - unsigned long pgd; 75 - } pgd_t; 50 + 51 + #define pte_val(x) ((x).pte) 52 + #define __pte(x) ((pte_t) { (x) }) 53 + 76 54 typedef struct { 77 55 unsigned long pgprot; 78 56 } pgprot_t; 79 57 80 - #define pte_val(x) ((x).pte) 81 - #define pgd_val(x) ((x).pgd) 82 - #define pgprot_val(x) ((x).pgprot) 58 + #define pgprot_val(x) ((x).pgprot) 59 + #define __pgprot(x) ((pgprot_t) { (x) }) 60 + #define pte_pgprot(x) __pgprot(pte_val(x)) 83 61 84 - #define __pte(x) ((pte_t) { (x) }) 85 - #define __pgd(x) ((pgd_t) { (x) }) 86 - #define __pgprot(x) ((pgprot_t) { (x) }) 87 - 88 - #define pte_pgprot(x) __pgprot(pte_val(x)) 89 - 90 - #else /* !STRICT_MM_TYPECHECKS */ 91 - 92 - #ifdef CONFIG_ARC_HAS_PAE40 93 - typedef unsigned long long pte_t; 94 - #else 95 - typedef unsigned long pte_t; 96 - #endif 97 - typedef unsigned long pgd_t; 98 - typedef unsigned long pgprot_t; 99 - 100 - #define pte_val(x) (x) 101 - #define pgd_val(x) (x) 102 - #define pgprot_val(x) (x) 103 - #define __pte(x) (x) 104 - #define __pgd(x) (x) 105 - #define __pgprot(x) (x) 106 - #define pte_pgprot(x) (x) 107 - 108 - #endif 109 - 110 - typedef pte_t * pgtable_t; 62 + typedef struct page *pgtable_t; 111 63 112 64 /* 113 65 * Use virt_to_pfn with caution: ··· 120 122 * virt here means link-address/program-address as embedded in object code. 121 123 * And for ARC, link-addr = physical address 122 124 */ 123 - #define __pa(vaddr) ((unsigned long)(vaddr)) 124 - #define __va(paddr) ((void *)((unsigned long)(paddr))) 125 + #define __pa(vaddr) ((unsigned long)(vaddr)) 126 + #define __va(paddr) ((void *)((unsigned long)(paddr))) 125 127 126 128 #define virt_to_page(kaddr) pfn_to_page(virt_to_pfn(kaddr)) 127 129 #define virt_addr_valid(kaddr) pfn_valid(virt_to_pfn(kaddr))

+25 -60

arch/arc/include/asm/pgalloc.h

··· 31 31 32 32 #include <linux/mm.h> 33 33 #include <linux/log2.h> 34 + #include <asm-generic/pgalloc.h> 34 35 35 36 static inline void 36 37 pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte) 37 38 { 38 - pmd_set(pmd, pte); 39 + /* 40 + * The cast to long below is OK in 32-bit PAE40 regime with long long pte 41 + * Despite "wider" pte, the pte table needs to be in non-PAE low memory 42 + * as all higher levels can only hold long pointers. 43 + * 44 + * The cast itself is needed given simplistic definition of set_pmd() 45 + */ 46 + set_pmd(pmd, __pmd((unsigned long)pte)); 39 47 } 40 48 41 - static inline void 42 - pmd_populate(struct mm_struct *mm, pmd_t *pmd, pgtable_t ptep) 49 + static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, pgtable_t pte_page) 43 50 { 44 - pmd_set(pmd, (pte_t *) ptep); 45 - } 46 - 47 - static inline int __get_order_pgd(void) 48 - { 49 - return get_order(PTRS_PER_PGD * sizeof(pgd_t)); 51 + set_pmd(pmd, __pmd((unsigned long)page_address(pte_page))); 50 52 } 51 53 52 54 static inline pgd_t *pgd_alloc(struct mm_struct *mm) 53 55 { 54 - int num, num2; 55 - pgd_t *ret = (pgd_t *) __get_free_pages(GFP_KERNEL, __get_order_pgd()); 56 + pgd_t *ret = (pgd_t *) __get_free_page(GFP_KERNEL); 56 57 57 58 if (ret) { 59 + int num, num2; 58 60 num = USER_PTRS_PER_PGD + USER_KERNEL_GUTTER / PGDIR_SIZE; 59 61 memzero(ret, num * sizeof(pgd_t)); 60 62 ··· 70 68 return ret; 71 69 } 72 70 73 - static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) 71 + #if CONFIG_PGTABLE_LEVELS > 3 72 + 73 + static inline void p4d_populate(struct mm_struct *mm, p4d_t *p4dp, pud_t *pudp) 74 74 { 75 - free_pages((unsigned long)pgd, __get_order_pgd()); 75 + set_p4d(p4dp, __p4d((unsigned long)pudp)); 76 76 } 77 77 78 + #define __pud_free_tlb(tlb, pmd, addr) pud_free((tlb)->mm, pmd) 78 79 79 - /* 80 - * With software-only page-tables, addr-split for traversal is tweakable and 81 - * that directly governs how big tables would be at each level. 82 - * Further, the MMU page size is configurable. 83 - * Thus we need to programatically assert the size constraint 84 - * All of this is const math, allowing gcc to do constant folding/propagation. 85 - */ 80 + #endif 86 81 87 - static inline int __get_order_pte(void) 82 + #if CONFIG_PGTABLE_LEVELS > 2 83 + 84 + static inline void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmdp) 88 85 { 89 - return get_order(PTRS_PER_PTE * sizeof(pte_t)); 86 + set_pud(pudp, __pud((unsigned long)pmdp)); 90 87 } 91 88 92 - static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm) 93 - { 94 - pte_t *pte; 89 + #define __pmd_free_tlb(tlb, pmd, addr) pmd_free((tlb)->mm, pmd) 95 90 96 - pte = (pte_t *) __get_free_pages(GFP_KERNEL | __GFP_ZERO, 97 - __get_order_pte()); 98 - 99 - return pte; 100 - } 101 - 102 - static inline pgtable_t 103 - pte_alloc_one(struct mm_struct *mm) 104 - { 105 - pgtable_t pte_pg; 106 - struct page *page; 107 - 108 - pte_pg = (pgtable_t)__get_free_pages(GFP_KERNEL, __get_order_pte()); 109 - if (!pte_pg) 110 - return 0; 111 - memzero((void *)pte_pg, PTRS_PER_PTE * sizeof(pte_t)); 112 - page = virt_to_page(pte_pg); 113 - if (!pgtable_pte_page_ctor(page)) { 114 - __free_page(page); 115 - return 0; 116 - } 117 - 118 - return pte_pg; 119 - } 120 - 121 - static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) 122 - { 123 - free_pages((unsigned long)pte, __get_order_pte()); /* takes phy addr */ 124 - } 125 - 126 - static inline void pte_free(struct mm_struct *mm, pgtable_t ptep) 127 - { 128 - pgtable_pte_page_dtor(virt_to_page(ptep)); 129 - free_pages((unsigned long)ptep, __get_order_pte()); 130 - } 91 + #endif 131 92 132 93 #define __pte_free_tlb(tlb, pte, addr) pte_free((tlb)->mm, pte) 133 94

+149

arch/arc/include/asm/pgtable-bits-arcv2.h

··· 1 + /* SPDX-License-Identifier: GPL-2.0-only */ 2 + /* 3 + * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) 4 + */ 5 + 6 + /* 7 + * page table flags for software walked/managed MMUv3 (ARC700) and MMUv4 (HS) 8 + * There correspond to the corresponding bits in the TLB 9 + */ 10 + 11 + #ifndef _ASM_ARC_PGTABLE_BITS_ARCV2_H 12 + #define _ASM_ARC_PGTABLE_BITS_ARCV2_H 13 + 14 + #ifdef CONFIG_ARC_CACHE_PAGES 15 + #define _PAGE_CACHEABLE (1 << 0) /* Cached (H) */ 16 + #else 17 + #define _PAGE_CACHEABLE 0 18 + #endif 19 + 20 + #define _PAGE_EXECUTE (1 << 1) /* User Execute (H) */ 21 + #define _PAGE_WRITE (1 << 2) /* User Write (H) */ 22 + #define _PAGE_READ (1 << 3) /* User Read (H) */ 23 + #define _PAGE_ACCESSED (1 << 4) /* Accessed (s) */ 24 + #define _PAGE_DIRTY (1 << 5) /* Modified (s) */ 25 + #define _PAGE_SPECIAL (1 << 6) 26 + #define _PAGE_GLOBAL (1 << 8) /* ASID agnostic (H) */ 27 + #define _PAGE_PRESENT (1 << 9) /* PTE/TLB Valid (H) */ 28 + 29 + #ifdef CONFIG_ARC_MMU_V4 30 + #define _PAGE_HW_SZ (1 << 10) /* Normal/super (H) */ 31 + #else 32 + #define _PAGE_HW_SZ 0 33 + #endif 34 + 35 + /* Defaults for every user page */ 36 + #define ___DEF (_PAGE_PRESENT | _PAGE_CACHEABLE) 37 + 38 + /* Set of bits not changed in pte_modify */ 39 + #define _PAGE_CHG_MASK (PAGE_MASK_PHYS | _PAGE_ACCESSED | _PAGE_DIRTY | \ 40 + _PAGE_SPECIAL) 41 + 42 + /* More Abbrevaited helpers */ 43 + #define PAGE_U_NONE __pgprot(___DEF) 44 + #define PAGE_U_R __pgprot(___DEF | _PAGE_READ) 45 + #define PAGE_U_W_R __pgprot(___DEF | _PAGE_READ | _PAGE_WRITE) 46 + #define PAGE_U_X_R __pgprot(___DEF | _PAGE_READ | _PAGE_EXECUTE) 47 + #define PAGE_U_X_W_R __pgprot(___DEF \ 48 + | _PAGE_READ | _PAGE_WRITE | _PAGE_EXECUTE) 49 + #define PAGE_KERNEL __pgprot(___DEF | _PAGE_GLOBAL \ 50 + | _PAGE_READ | _PAGE_WRITE | _PAGE_EXECUTE) 51 + 52 + #define PAGE_SHARED PAGE_U_W_R 53 + 54 + #define pgprot_noncached(prot) (__pgprot(pgprot_val(prot) & ~_PAGE_CACHEABLE)) 55 + 56 + /* 57 + * Mapping of vm_flags (Generic VM) to PTE flags (arch specific) 58 + * 59 + * Certain cases have 1:1 mapping 60 + * e.g. __P101 means VM_READ, VM_EXEC and !VM_SHARED 61 + * which directly corresponds to PAGE_U_X_R 62 + * 63 + * Other rules which cause the divergence from 1:1 mapping 64 + * 65 + * 1. Although ARC700 can do exclusive execute/write protection (meaning R 66 + * can be tracked independet of X/W unlike some other CPUs), still to 67 + * keep things consistent with other archs: 68 + * -Write implies Read: W => R 69 + * -Execute implies Read: X => R 70 + * 71 + * 2. Pvt Writable doesn't have Write Enabled initially: Pvt-W => !W 72 + * This is to enable COW mechanism 73 + */ 74 + /* xwr */ 75 + #define __P000 PAGE_U_NONE 76 + #define __P001 PAGE_U_R 77 + #define __P010 PAGE_U_R /* Pvt-W => !W */ 78 + #define __P011 PAGE_U_R /* Pvt-W => !W */ 79 + #define __P100 PAGE_U_X_R /* X => R */ 80 + #define __P101 PAGE_U_X_R 81 + #define __P110 PAGE_U_X_R /* Pvt-W => !W and X => R */ 82 + #define __P111 PAGE_U_X_R /* Pvt-W => !W */ 83 + 84 + #define __S000 PAGE_U_NONE 85 + #define __S001 PAGE_U_R 86 + #define __S010 PAGE_U_W_R /* W => R */ 87 + #define __S011 PAGE_U_W_R 88 + #define __S100 PAGE_U_X_R /* X => R */ 89 + #define __S101 PAGE_U_X_R 90 + #define __S110 PAGE_U_X_W_R /* X => R */ 91 + #define __S111 PAGE_U_X_W_R 92 + 93 + #ifndef __ASSEMBLY__ 94 + 95 + #define pte_write(pte) (pte_val(pte) & _PAGE_WRITE) 96 + #define pte_dirty(pte) (pte_val(pte) & _PAGE_DIRTY) 97 + #define pte_young(pte) (pte_val(pte) & _PAGE_ACCESSED) 98 + #define pte_special(pte) (pte_val(pte) & _PAGE_SPECIAL) 99 + 100 + #define PTE_BIT_FUNC(fn, op) \ 101 + static inline pte_t pte_##fn(pte_t pte) { pte_val(pte) op; return pte; } 102 + 103 + PTE_BIT_FUNC(mknotpresent, &= ~(_PAGE_PRESENT)); 104 + PTE_BIT_FUNC(wrprotect, &= ~(_PAGE_WRITE)); 105 + PTE_BIT_FUNC(mkwrite, |= (_PAGE_WRITE)); 106 + PTE_BIT_FUNC(mkclean, &= ~(_PAGE_DIRTY)); 107 + PTE_BIT_FUNC(mkdirty, |= (_PAGE_DIRTY)); 108 + PTE_BIT_FUNC(mkold, &= ~(_PAGE_ACCESSED)); 109 + PTE_BIT_FUNC(mkyoung, |= (_PAGE_ACCESSED)); 110 + PTE_BIT_FUNC(mkspecial, |= (_PAGE_SPECIAL)); 111 + PTE_BIT_FUNC(mkhuge, |= (_PAGE_HW_SZ)); 112 + 113 + static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) 114 + { 115 + return __pte((pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot)); 116 + } 117 + 118 + static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, 119 + pte_t *ptep, pte_t pteval) 120 + { 121 + set_pte(ptep, pteval); 122 + } 123 + 124 + void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, 125 + pte_t *ptep); 126 + 127 + /* Encode swap {type,off} tuple into PTE 128 + * We reserve 13 bits for 5-bit @type, keeping bits 12-5 zero, ensuring that 129 + * PAGE_PRESENT is zero in a PTE holding swap "identifier" 130 + */ 131 + #define __swp_entry(type, off) ((swp_entry_t) \ 132 + { ((type) & 0x1f) | ((off) << 13) }) 133 + 134 + /* Decode a PTE containing swap "identifier "into constituents */ 135 + #define __swp_type(pte_lookalike) (((pte_lookalike).val) & 0x1f) 136 + #define __swp_offset(pte_lookalike) ((pte_lookalike).val >> 13) 137 + 138 + #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) 139 + #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) 140 + 141 + #define kern_addr_valid(addr) (1) 142 + 143 + #ifdef CONFIG_TRANSPARENT_HUGEPAGE 144 + #include <asm/hugepage.h> 145 + #endif 146 + 147 + #endif /* __ASSEMBLY__ */ 148 + 149 + #endif

+189

arch/arc/include/asm/pgtable-levels.h

··· 1 + /* SPDX-License-Identifier: GPL-2.0-only */ 2 + /* 3 + * Copyright (C) 2020 Synopsys, Inc. (www.synopsys.com) 4 + */ 5 + 6 + /* 7 + * Helpers for implemenintg paging levels 8 + */ 9 + 10 + #ifndef _ASM_ARC_PGTABLE_LEVELS_H 11 + #define _ASM_ARC_PGTABLE_LEVELS_H 12 + 13 + #if CONFIG_PGTABLE_LEVELS == 2 14 + 15 + /* 16 + * 2 level paging setup for software walked MMUv3 (ARC700) and MMUv4 (HS) 17 + * 18 + * [31] 32 bit virtual address [0] 19 + * ------------------------------------------------------- 20 + * | | <---------- PGDIR_SHIFT ----------> | 21 + * | | | <-- PAGE_SHIFT --> | 22 + * ------------------------------------------------------- 23 + * | | | 24 + * | | --> off in page frame 25 + * | ---> index into Page Table 26 + * ----> index into Page Directory 27 + * 28 + * Given software walk, the vaddr split is arbitrary set to 11:8:13 29 + * However enabling of super page in a 2 level regime pegs PGDIR_SHIFT to 30 + * super page size. 31 + */ 32 + 33 + #if defined(CONFIG_ARC_HUGEPAGE_16M) 34 + #define PGDIR_SHIFT 24 35 + #elif defined(CONFIG_ARC_HUGEPAGE_2M) 36 + #define PGDIR_SHIFT 21 37 + #else 38 + /* 39 + * No Super page case 40 + * Default value provides 11:8:13 (8K), 10:10:12 (4K) 41 + * Limits imposed by pgtable_t only PAGE_SIZE long 42 + * (so 4K page can only have 1K entries: or 10 bits) 43 + */ 44 + #ifdef CONFIG_ARC_PAGE_SIZE_4K 45 + #define PGDIR_SHIFT 22 46 + #else 47 + #define PGDIR_SHIFT 21 48 + #endif 49 + 50 + #endif 51 + 52 + #else /* CONFIG_PGTABLE_LEVELS != 2 */ 53 + 54 + /* 55 + * A default 3 level paging testing setup in software walked MMU 56 + * MMUv4 (8K page): <4> : <7> : <8> : <13> 57 + * A default 4 level paging testing setup in software walked MMU 58 + * MMUv4 (8K page): <4> : <3> : <4> : <8> : <13> 59 + */ 60 + #define PGDIR_SHIFT 28 61 + #if CONFIG_PGTABLE_LEVELS > 3 62 + #define PUD_SHIFT 25 63 + #endif 64 + #if CONFIG_PGTABLE_LEVELS > 2 65 + #define PMD_SHIFT 21 66 + #endif 67 + 68 + #endif /* CONFIG_PGTABLE_LEVELS */ 69 + 70 + #define PGDIR_SIZE BIT(PGDIR_SHIFT) 71 + #define PGDIR_MASK (~(PGDIR_SIZE - 1)) 72 + #define PTRS_PER_PGD BIT(32 - PGDIR_SHIFT) 73 + 74 + #if CONFIG_PGTABLE_LEVELS > 3 75 + #define PUD_SIZE BIT(PUD_SHIFT) 76 + #define PUD_MASK (~(PUD_SIZE - 1)) 77 + #define PTRS_PER_PUD BIT(PGDIR_SHIFT - PUD_SHIFT) 78 + #endif 79 + 80 + #if CONFIG_PGTABLE_LEVELS > 2 81 + #define PMD_SIZE BIT(PMD_SHIFT) 82 + #define PMD_MASK (~(PMD_SIZE - 1)) 83 + #define PTRS_PER_PMD BIT(PUD_SHIFT - PMD_SHIFT) 84 + #endif 85 + 86 + #define PTRS_PER_PTE BIT(PMD_SHIFT - PAGE_SHIFT) 87 + 88 + #ifndef __ASSEMBLY__ 89 + 90 + #if CONFIG_PGTABLE_LEVELS > 3 91 + #include <asm-generic/pgtable-nop4d.h> 92 + #elif CONFIG_PGTABLE_LEVELS > 2 93 + #include <asm-generic/pgtable-nopud.h> 94 + #else 95 + #include <asm-generic/pgtable-nopmd.h> 96 + #endif 97 + 98 + /* 99 + * 1st level paging: pgd 100 + */ 101 + #define pgd_index(addr) ((addr) >> PGDIR_SHIFT) 102 + #define pgd_offset(mm, addr) (((mm)->pgd) + pgd_index(addr)) 103 + #define pgd_offset_k(addr) pgd_offset(&init_mm, addr) 104 + #define pgd_ERROR(e) \ 105 + pr_crit("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e)) 106 + 107 + #if CONFIG_PGTABLE_LEVELS > 3 108 + 109 + /* In 4 level paging, p4d_* macros work on pgd */ 110 + #define p4d_none(x) (!p4d_val(x)) 111 + #define p4d_bad(x) ((p4d_val(x) & ~PAGE_MASK)) 112 + #define p4d_present(x) (p4d_val(x)) 113 + #define p4d_clear(xp) do { p4d_val(*(xp)) = 0; } while (0) 114 + #define p4d_pgtable(p4d) ((pud_t *)(p4d_val(p4d) & PAGE_MASK)) 115 + #define p4d_page(p4d) virt_to_page(p4d_pgtable(p4d)) 116 + #define set_p4d(p4dp, p4d) (*(p4dp) = p4d) 117 + 118 + /* 119 + * 2nd level paging: pud 120 + */ 121 + #define pud_ERROR(e) \ 122 + pr_crit("%s:%d: bad pud %08lx.\n", __FILE__, __LINE__, pud_val(e)) 123 + 124 + #endif 125 + 126 + #if CONFIG_PGTABLE_LEVELS > 2 127 + 128 + /* 129 + * In 3 level paging, pud_* macros work on pgd 130 + * In 4 level paging, pud_* macros work on pud 131 + */ 132 + #define pud_none(x) (!pud_val(x)) 133 + #define pud_bad(x) ((pud_val(x) & ~PAGE_MASK)) 134 + #define pud_present(x) (pud_val(x)) 135 + #define pud_clear(xp) do { pud_val(*(xp)) = 0; } while (0) 136 + #define pud_pgtable(pud) ((pmd_t *)(pud_val(pud) & PAGE_MASK)) 137 + #define pud_page(pud) virt_to_page(pud_pgtable(pud)) 138 + #define set_pud(pudp, pud) (*(pudp) = pud) 139 + 140 + /* 141 + * 3rd level paging: pmd 142 + */ 143 + #define pmd_ERROR(e) \ 144 + pr_crit("%s:%d: bad pmd %08lx.\n", __FILE__, __LINE__, pmd_val(e)) 145 + 146 + #define pmd_pfn(pmd) ((pmd_val(pmd) & PMD_MASK) >> PAGE_SHIFT) 147 + #define pfn_pmd(pfn,prot) __pmd(((pfn) << PAGE_SHIFT) | pgprot_val(prot)) 148 + #define mk_pmd(page,prot) pfn_pmd(page_to_pfn(page),prot) 149 + 150 + #endif 151 + 152 + /* 153 + * Due to the strange way generic pgtable level folding works, the pmd_* macros 154 + * - are valid even for 2 levels (which supposedly only has pgd - pte) 155 + * - behave differently for 2 vs. 3 156 + * In 2 level paging (pgd -> pte), pmd_* macros work on pgd 157 + * In 3+ level paging (pgd -> pmd -> pte), pmd_* macros work on pmd 158 + */ 159 + #define pmd_none(x) (!pmd_val(x)) 160 + #define pmd_bad(x) ((pmd_val(x) & ~PAGE_MASK)) 161 + #define pmd_present(x) (pmd_val(x)) 162 + #define pmd_clear(xp) do { pmd_val(*(xp)) = 0; } while (0) 163 + #define pmd_page_vaddr(pmd) (pmd_val(pmd) & PAGE_MASK) 164 + #define pmd_page(pmd) virt_to_page(pmd_page_vaddr(pmd)) 165 + #define set_pmd(pmdp, pmd) (*(pmdp) = pmd) 166 + #define pmd_pgtable(pmd) ((pgtable_t) pmd_page_vaddr(pmd)) 167 + 168 + /* 169 + * 4th level paging: pte 170 + */ 171 + #define pte_ERROR(e) \ 172 + pr_crit("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, pte_val(e)) 173 + 174 + #define pte_none(x) (!pte_val(x)) 175 + #define pte_present(x) (pte_val(x) & _PAGE_PRESENT) 176 + #define pte_clear(mm,addr,ptep) set_pte_at(mm, addr, ptep, __pte(0)) 177 + #define pte_page(pte) pfn_to_page(pte_pfn(pte)) 178 + #define set_pte(ptep, pte) ((*(ptep)) = (pte)) 179 + #define pte_pfn(pte) (pte_val(pte) >> PAGE_SHIFT) 180 + #define pfn_pte(pfn, prot) __pte(__pfn_to_phys(pfn) | pgprot_val(prot)) 181 + #define mk_pte(page, prot) pfn_pte(page_to_pfn(page), prot) 182 + 183 + #ifdef CONFIG_ISA_ARCV2 184 + #define pmd_leaf(x) (pmd_val(x) & _PAGE_HW_SZ) 185 + #endif 186 + 187 + #endif /* !__ASSEMBLY__ */ 188 + 189 + #endif

+5 -334

arch/arc/include/asm/pgtable.h

··· 1 1 /* SPDX-License-Identifier: GPL-2.0-only */ 2 2 /* 3 3 * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) 4 - * 5 - * vineetg: May 2011 6 - * -Folded PAGE_PRESENT (used by VM) and PAGE_VALID (used by MMU) into 1. 7 - * They are semantically the same although in different contexts 8 - * VALID marks a TLB entry exists and it will only happen if PRESENT 9 - * - Utilise some unused free bits to confine PTE flags to 12 bits 10 - * This is a must for 4k pg-sz 11 - * 12 - * vineetg: Mar 2011 - changes to accommodate MMU TLB Page Descriptor mods 13 - * -TLB Locking never really existed, except for initial specs 14 - * -SILENT_xxx not needed for our port 15 - * -Per my request, MMU V3 changes the layout of some of the bits 16 - * to avoid a few shifts in TLB Miss handlers. 17 - * 18 - * vineetg: April 2010 19 - * -PGD entry no longer contains any flags. If empty it is 0, otherwise has 20 - * Pg-Tbl ptr. Thus pmd_present(), pmd_valid(), pmd_set( ) become simpler 21 - * 22 - * vineetg: April 2010 23 - * -Switched form 8:11:13 split for page table lookup to 11:8:13 24 - * -this speeds up page table allocation itself as we now have to memset 1K 25 - * instead of 8k per page table. 26 - * -TODO: Right now page table alloc is 8K and rest 7K is unused 27 - * need to optimise it 28 - * 29 - * Amit Bhor, Sameer Dhavale: Codito Technologies 2004 30 4 */ 31 5 32 6 #ifndef _ASM_ARC_PGTABLE_H 33 7 #define _ASM_ARC_PGTABLE_H 34 8 35 9 #include <linux/bits.h> 36 - #include <asm-generic/pgtable-nopmd.h> 10 + 11 + #include <asm/pgtable-levels.h> 12 + #include <asm/pgtable-bits-arcv2.h> 37 13 #include <asm/page.h> 38 - #include <asm/mmu.h> /* to propagate CONFIG_ARC_MMU_VER <n> */ 39 - 40 - /************************************************************************** 41 - * Page Table Flags 42 - * 43 - * ARC700 MMU only deals with softare managed TLB entries. 44 - * Page Tables are purely for Linux VM's consumption and the bits below are 45 - * suited to that (uniqueness). Hence some are not implemented in the TLB and 46 - * some have different value in TLB. 47 - * e.g. MMU v2: K_READ bit is 8 and so is GLOBAL (possible because they live in 48 - * seperate PD0 and PD1, which combined forms a translation entry) 49 - * while for PTE perspective, they are 8 and 9 respectively 50 - * with MMU v3: Most bits (except SHARED) represent the exact hardware pos 51 - * (saves some bit shift ops in TLB Miss hdlrs) 52 - */ 53 - 54 - #if (CONFIG_ARC_MMU_VER <= 2) 55 - 56 - #define _PAGE_ACCESSED (1<<1) /* Page is accessed (S) */ 57 - #define _PAGE_CACHEABLE (1<<2) /* Page is cached (H) */ 58 - #define _PAGE_EXECUTE (1<<3) /* Page has user execute perm (H) */ 59 - #define _PAGE_WRITE (1<<4) /* Page has user write perm (H) */ 60 - #define _PAGE_READ (1<<5) /* Page has user read perm (H) */ 61 - #define _PAGE_DIRTY (1<<6) /* Page modified (dirty) (S) */ 62 - #define _PAGE_SPECIAL (1<<7) 63 - #define _PAGE_GLOBAL (1<<8) /* Page is global (H) */ 64 - #define _PAGE_PRESENT (1<<10) /* TLB entry is valid (H) */ 65 - 66 - #else /* MMU v3 onwards */ 67 - 68 - #define _PAGE_CACHEABLE (1<<0) /* Page is cached (H) */ 69 - #define _PAGE_EXECUTE (1<<1) /* Page has user execute perm (H) */ 70 - #define _PAGE_WRITE (1<<2) /* Page has user write perm (H) */ 71 - #define _PAGE_READ (1<<3) /* Page has user read perm (H) */ 72 - #define _PAGE_ACCESSED (1<<4) /* Page is accessed (S) */ 73 - #define _PAGE_DIRTY (1<<5) /* Page modified (dirty) (S) */ 74 - #define _PAGE_SPECIAL (1<<6) 75 - 76 - #if (CONFIG_ARC_MMU_VER >= 4) 77 - #define _PAGE_WTHRU (1<<7) /* Page cache mode write-thru (H) */ 78 - #endif 79 - 80 - #define _PAGE_GLOBAL (1<<8) /* Page is global (H) */ 81 - #define _PAGE_PRESENT (1<<9) /* TLB entry is valid (H) */ 82 - 83 - #if (CONFIG_ARC_MMU_VER >= 4) 84 - #define _PAGE_HW_SZ (1<<10) /* Page Size indicator (H): 0 normal, 1 super */ 85 - #endif 86 - 87 - #define _PAGE_SHARED_CODE (1<<11) /* Shared Code page with cmn vaddr 88 - usable for shared TLB entries (H) */ 89 - 90 - #define _PAGE_UNUSED_BIT (1<<12) 91 - #endif 92 - 93 - /* vmalloc permissions */ 94 - #define _K_PAGE_PERMS (_PAGE_EXECUTE | _PAGE_WRITE | _PAGE_READ | \ 95 - _PAGE_GLOBAL | _PAGE_PRESENT) 96 - 97 - #ifndef CONFIG_ARC_CACHE_PAGES 98 - #undef _PAGE_CACHEABLE 99 - #define _PAGE_CACHEABLE 0 100 - #endif 101 - 102 - #ifndef _PAGE_HW_SZ 103 - #define _PAGE_HW_SZ 0 104 - #endif 105 - 106 - /* Defaults for every user page */ 107 - #define ___DEF (_PAGE_PRESENT | _PAGE_CACHEABLE) 108 - 109 - /* Set of bits not changed in pte_modify */ 110 - #define _PAGE_CHG_MASK (PAGE_MASK_PHYS | _PAGE_ACCESSED | _PAGE_DIRTY | \ 111 - _PAGE_SPECIAL) 112 - /* More Abbrevaited helpers */ 113 - #define PAGE_U_NONE __pgprot(___DEF) 114 - #define PAGE_U_R __pgprot(___DEF | _PAGE_READ) 115 - #define PAGE_U_W_R __pgprot(___DEF | _PAGE_READ | _PAGE_WRITE) 116 - #define PAGE_U_X_R __pgprot(___DEF | _PAGE_READ | _PAGE_EXECUTE) 117 - #define PAGE_U_X_W_R __pgprot(___DEF | _PAGE_READ | _PAGE_WRITE | \ 118 - _PAGE_EXECUTE) 119 - 120 - #define PAGE_SHARED PAGE_U_W_R 121 - 122 - /* While kernel runs out of unstranslated space, vmalloc/modules use a chunk of 123 - * user vaddr space - visible in all addr spaces, but kernel mode only 124 - * Thus Global, all-kernel-access, no-user-access, cached 125 - */ 126 - #define PAGE_KERNEL __pgprot(_K_PAGE_PERMS | _PAGE_CACHEABLE) 127 - 128 - /* ioremap */ 129 - #define PAGE_KERNEL_NO_CACHE __pgprot(_K_PAGE_PERMS) 130 - 131 - /* Masks for actual TLB "PD"s */ 132 - #define PTE_BITS_IN_PD0 (_PAGE_GLOBAL | _PAGE_PRESENT | _PAGE_HW_SZ) 133 - #define PTE_BITS_RWX (_PAGE_EXECUTE | _PAGE_WRITE | _PAGE_READ) 134 - 135 - #define PTE_BITS_NON_RWX_IN_PD1 (PAGE_MASK_PHYS | _PAGE_CACHEABLE) 136 - 137 - /************************************************************************** 138 - * Mapping of vm_flags (Generic VM) to PTE flags (arch specific) 139 - * 140 - * Certain cases have 1:1 mapping 141 - * e.g. __P101 means VM_READ, VM_EXEC and !VM_SHARED 142 - * which directly corresponds to PAGE_U_X_R 143 - * 144 - * Other rules which cause the divergence from 1:1 mapping 145 - * 146 - * 1. Although ARC700 can do exclusive execute/write protection (meaning R 147 - * can be tracked independet of X/W unlike some other CPUs), still to 148 - * keep things consistent with other archs: 149 - * -Write implies Read: W => R 150 - * -Execute implies Read: X => R 151 - * 152 - * 2. Pvt Writable doesn't have Write Enabled initially: Pvt-W => !W 153 - * This is to enable COW mechanism 154 - */ 155 - /* xwr */ 156 - #define __P000 PAGE_U_NONE 157 - #define __P001 PAGE_U_R 158 - #define __P010 PAGE_U_R /* Pvt-W => !W */ 159 - #define __P011 PAGE_U_R /* Pvt-W => !W */ 160 - #define __P100 PAGE_U_X_R /* X => R */ 161 - #define __P101 PAGE_U_X_R 162 - #define __P110 PAGE_U_X_R /* Pvt-W => !W and X => R */ 163 - #define __P111 PAGE_U_X_R /* Pvt-W => !W */ 164 - 165 - #define __S000 PAGE_U_NONE 166 - #define __S001 PAGE_U_R 167 - #define __S010 PAGE_U_W_R /* W => R */ 168 - #define __S011 PAGE_U_W_R 169 - #define __S100 PAGE_U_X_R /* X => R */ 170 - #define __S101 PAGE_U_X_R 171 - #define __S110 PAGE_U_X_W_R /* X => R */ 172 - #define __S111 PAGE_U_X_W_R 173 - 174 - /**************************************************************** 175 - * 2 tier (PGD:PTE) software page walker 176 - * 177 - * [31] 32 bit virtual address [0] 178 - * ------------------------------------------------------- 179 - * | | <------------ PGDIR_SHIFT ----------> | 180 - * | | | 181 - * | BITS_FOR_PGD | BITS_FOR_PTE | <-- PAGE_SHIFT --> | 182 - * ------------------------------------------------------- 183 - * | | | 184 - * | | --> off in page frame 185 - * | ---> index into Page Table 186 - * ----> index into Page Directory 187 - * 188 - * In a single page size configuration, only PAGE_SHIFT is fixed 189 - * So both PGD and PTE sizing can be tweaked 190 - * e.g. 8K page (PAGE_SHIFT 13) can have 191 - * - PGDIR_SHIFT 21 -> 11:8:13 address split 192 - * - PGDIR_SHIFT 24 -> 8:11:13 address split 193 - * 194 - * If Super Page is configured, PGDIR_SHIFT becomes fixed too, 195 - * so the sizing flexibility is gone. 196 - */ 197 - 198 - #if defined(CONFIG_ARC_HUGEPAGE_16M) 199 - #define PGDIR_SHIFT 24 200 - #elif defined(CONFIG_ARC_HUGEPAGE_2M) 201 - #define PGDIR_SHIFT 21 202 - #else 203 - /* 204 - * Only Normal page support so "hackable" (see comment above) 205 - * Default value provides 11:8:13 (8K), 11:9:12 (4K) 206 - */ 207 - #define PGDIR_SHIFT 21 208 - #endif 209 - 210 - #define BITS_FOR_PTE (PGDIR_SHIFT - PAGE_SHIFT) 211 - #define BITS_FOR_PGD (32 - PGDIR_SHIFT) 212 - 213 - #define PGDIR_SIZE BIT(PGDIR_SHIFT) /* vaddr span, not PDG sz */ 214 - #define PGDIR_MASK (~(PGDIR_SIZE-1)) 215 - 216 - #define PTRS_PER_PTE BIT(BITS_FOR_PTE) 217 - #define PTRS_PER_PGD BIT(BITS_FOR_PGD) 14 + #include <asm/mmu.h> 218 15 219 16 /* 220 17 * Number of entries a user land program use. ··· 19 222 */ 20 223 #define USER_PTRS_PER_PGD (TASK_SIZE / PGDIR_SIZE) 21 224 22 - 23 - /**************************************************************** 24 - * Bucket load of VM Helpers 25 - */ 26 - 27 225 #ifndef __ASSEMBLY__ 28 226 29 - #define pte_ERROR(e) \ 30 - pr_crit("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, pte_val(e)) 31 - #define pgd_ERROR(e) \ 32 - pr_crit("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e)) 33 - 34 - /* the zero page used for uninitialized and anonymous pages */ 35 227 extern char empty_zero_page[PAGE_SIZE]; 36 228 #define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page)) 37 229 38 - #define set_pte(pteptr, pteval) ((*(pteptr)) = (pteval)) 39 - #define set_pmd(pmdptr, pmdval) (*(pmdptr) = pmdval) 40 - 41 - /* find the page descriptor of the Page Tbl ref by PMD entry */ 42 - #define pmd_page(pmd) virt_to_page(pmd_val(pmd) & PAGE_MASK) 43 - 44 - /* find the logical addr (phy for ARC) of the Page Tbl ref by PMD entry */ 45 - #define pmd_page_vaddr(pmd) (pmd_val(pmd) & PAGE_MASK) 46 - 47 - /* In a 2 level sys, setup the PGD entry with PTE value */ 48 - static inline void pmd_set(pmd_t *pmdp, pte_t *ptep) 49 - { 50 - pmd_val(*pmdp) = (unsigned long)ptep; 51 - } 52 - 53 - #define pte_none(x) (!pte_val(x)) 54 - #define pte_present(x) (pte_val(x) & _PAGE_PRESENT) 55 - #define pte_clear(mm, addr, ptep) set_pte_at(mm, addr, ptep, __pte(0)) 56 - 57 - #define pmd_none(x) (!pmd_val(x)) 58 - #define pmd_bad(x) ((pmd_val(x) & ~PAGE_MASK)) 59 - #define pmd_present(x) (pmd_val(x)) 60 - #define pmd_leaf(x) (pmd_val(x) & _PAGE_HW_SZ) 61 - #define pmd_clear(xp) do { pmd_val(*(xp)) = 0; } while (0) 62 - 63 - #define pte_page(pte) pfn_to_page(pte_pfn(pte)) 64 - #define mk_pte(page, prot) pfn_pte(page_to_pfn(page), prot) 65 - #define pfn_pte(pfn, prot) __pte(__pfn_to_phys(pfn) | pgprot_val(prot)) 66 - 67 - /* Don't use virt_to_pfn for macros below: could cause truncations for PAE40*/ 68 - #define pte_pfn(pte) (pte_val(pte) >> PAGE_SHIFT) 69 - 70 - /* Zoo of pte_xxx function */ 71 - #define pte_read(pte) (pte_val(pte) & _PAGE_READ) 72 - #define pte_write(pte) (pte_val(pte) & _PAGE_WRITE) 73 - #define pte_dirty(pte) (pte_val(pte) & _PAGE_DIRTY) 74 - #define pte_young(pte) (pte_val(pte) & _PAGE_ACCESSED) 75 - #define pte_special(pte) (pte_val(pte) & _PAGE_SPECIAL) 76 - 77 - #define PTE_BIT_FUNC(fn, op) \ 78 - static inline pte_t pte_##fn(pte_t pte) { pte_val(pte) op; return pte; } 79 - 80 - PTE_BIT_FUNC(mknotpresent, &= ~(_PAGE_PRESENT)); 81 - PTE_BIT_FUNC(wrprotect, &= ~(_PAGE_WRITE)); 82 - PTE_BIT_FUNC(mkwrite, |= (_PAGE_WRITE)); 83 - PTE_BIT_FUNC(mkclean, &= ~(_PAGE_DIRTY)); 84 - PTE_BIT_FUNC(mkdirty, |= (_PAGE_DIRTY)); 85 - PTE_BIT_FUNC(mkold, &= ~(_PAGE_ACCESSED)); 86 - PTE_BIT_FUNC(mkyoung, |= (_PAGE_ACCESSED)); 87 - PTE_BIT_FUNC(exprotect, &= ~(_PAGE_EXECUTE)); 88 - PTE_BIT_FUNC(mkexec, |= (_PAGE_EXECUTE)); 89 - PTE_BIT_FUNC(mkspecial, |= (_PAGE_SPECIAL)); 90 - PTE_BIT_FUNC(mkhuge, |= (_PAGE_HW_SZ)); 91 - 92 - static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) 93 - { 94 - return __pte((pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot)); 95 - } 230 + extern pgd_t swapper_pg_dir[] __aligned(PAGE_SIZE); 96 231 97 232 /* Macro to mark a page protection as uncacheable */ 98 233 #define pgprot_noncached(prot) (__pgprot(pgprot_val(prot) & ~_PAGE_CACHEABLE)) 99 234 100 - static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, 101 - pte_t *ptep, pte_t pteval) 102 - { 103 - set_pte(ptep, pteval); 104 - } 105 - 106 - /* 107 - * Macro to quickly access the PGD entry, utlising the fact that some 108 - * arch may cache the pointer to Page Directory of "current" task 109 - * in a MMU register 110 - * 111 - * Thus task->mm->pgd (3 pointer dereferences, cache misses etc simply 112 - * becomes read a register 113 - * 114 - * ********CAUTION*******: 115 - * Kernel code might be dealing with some mm_struct of NON "current" 116 - * Thus use this macro only when you are certain that "current" is current 117 - * e.g. when dealing with signal frame setup code etc 118 - */ 119 - #ifdef ARC_USE_SCRATCH_REG 120 - #define pgd_offset_fast(mm, addr) \ 121 - ({ \ 122 - pgd_t *pgd_base = (pgd_t *) read_aux_reg(ARC_REG_SCRATCH_DATA0); \ 123 - pgd_base + pgd_index(addr); \ 124 - }) 125 - #else 126 - #define pgd_offset_fast(mm, addr) pgd_offset(mm, addr) 127 - #endif 128 - 129 235 extern pgd_t swapper_pg_dir[] __aligned(PAGE_SIZE); 130 - void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, 131 - pte_t *ptep); 132 - 133 - /* Encode swap {type,off} tuple into PTE 134 - * We reserve 13 bits for 5-bit @type, keeping bits 12-5 zero, ensuring that 135 - * PAGE_PRESENT is zero in a PTE holding swap "identifier" 136 - */ 137 - #define __swp_entry(type, off) ((swp_entry_t) { \ 138 - ((type) & 0x1f) | ((off) << 13) }) 139 - 140 - /* Decode a PTE containing swap "identifier "into constituents */ 141 - #define __swp_type(pte_lookalike) (((pte_lookalike).val) & 0x1f) 142 - #define __swp_offset(pte_lookalike) ((pte_lookalike).val >> 13) 143 - 144 - /* NOPs, to keep generic kernel happy */ 145 - #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) 146 - #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) 147 - 148 - #define kern_addr_valid(addr) (1) 149 - 150 - #define pmd_pgtable(pmd) ((pgtable_t) pmd_page_vaddr(pmd)) 151 - 152 - /* 153 - * remap a physical page `pfn' of size `size' with page protection `prot' 154 - * into virtual address `from' 155 - */ 156 - #ifdef CONFIG_TRANSPARENT_HUGEPAGE 157 - #include <asm/hugepage.h> 158 - #endif 159 236 160 237 /* to cope with aliasing VIPT cache */ 161 238 #define HAVE_ARCH_UNMAPPED_AREA

+1 -1

arch/arc/include/asm/processor.h

··· 93 93 #define VMALLOC_START (PAGE_OFFSET - (CONFIG_ARC_KVADDR_SIZE << 20)) 94 94 95 95 /* 1 PGDIR_SIZE each for fixmap/pkmap, 2 PGDIR_SIZE gutter (see asm/highmem.h) */ 96 - #define VMALLOC_SIZE ((CONFIG_ARC_KVADDR_SIZE << 20) - PGDIR_SIZE * 4) 96 + #define VMALLOC_SIZE ((CONFIG_ARC_KVADDR_SIZE << 20) - PMD_SIZE * 4) 97 97 98 98 #define VMALLOC_END (VMALLOC_START + VMALLOC_SIZE) 99 99

+10 -2

arch/arc/include/asm/setup.h

··· 2 2 /* 3 3 * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) 4 4 */ 5 - #ifndef __ASMARC_SETUP_H 6 - #define __ASMARC_SETUP_H 5 + #ifndef __ASM_ARC_SETUP_H 6 + #define __ASM_ARC_SETUP_H 7 7 8 8 9 9 #include <linux/types.h> ··· 33 33 #define IS_USED_CFG(cfg) IS_USED_RUN(IS_ENABLED(cfg)) 34 34 #define IS_AVAIL2(v, s, cfg) IS_AVAIL1(v, s), IS_AVAIL1(v, IS_USED_CFG(cfg)) 35 35 #define IS_AVAIL3(v, v2, s) IS_AVAIL1(v, s), IS_AVAIL1(v, IS_DISABLED_RUN(v2)) 36 + 37 + extern void arc_mmu_init(void); 38 + extern char *arc_mmu_mumbojumbo(int cpu_id, char *buf, int len); 39 + extern void read_decode_mmu_bcr(void); 40 + 41 + extern void arc_cache_init(void); 42 + extern char *arc_cache_mumbojumbo(int cpu_id, char *buf, int len); 43 + extern void read_decode_cache_bcr(void); 36 44 37 45 #endif /* __ASMARC_SETUP_H */

-14

arch/arc/include/asm/smp.h

··· 105 105 #include <asm/spinlock.h> 106 106 107 107 extern arch_spinlock_t smp_atomic_ops_lock; 108 - extern arch_spinlock_t smp_bitops_lock; 109 108 110 109 #define atomic_ops_lock(flags) do { \ 111 110 local_irq_save(flags); \ ··· 116 117 local_irq_restore(flags); \ 117 118 } while (0) 118 119 119 - #define bitops_lock(flags) do { \ 120 - local_irq_save(flags); \ 121 - arch_spin_lock(&smp_bitops_lock); \ 122 - } while (0) 123 - 124 - #define bitops_unlock(flags) do { \ 125 - arch_spin_unlock(&smp_bitops_lock); \ 126 - local_irq_restore(flags); \ 127 - } while (0) 128 - 129 120 #else /* !CONFIG_SMP */ 130 121 131 122 #define atomic_ops_lock(flags) local_irq_save(flags) 132 123 #define atomic_ops_unlock(flags) local_irq_restore(flags) 133 - 134 - #define bitops_lock(flags) local_irq_save(flags) 135 - #define bitops_unlock(flags) local_irq_restore(flags) 136 124 137 125 #endif /* !CONFIG_SMP */ 138 126

-101

arch/arc/include/asm/tlb-mmu1.h

··· 1 - /* SPDX-License-Identifier: GPL-2.0-only */ 2 - /* 3 - * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) 4 - */ 5 - 6 - #ifndef __ASM_TLB_MMU_V1_H__ 7 - #define __ASM_TLB_MMU_V1_H__ 8 - 9 - #include <asm/mmu.h> 10 - 11 - #if defined(__ASSEMBLY__) && (CONFIG_ARC_MMU_VER == 1) 12 - 13 - .macro TLB_WRITE_HEURISTICS 14 - 15 - #define JH_HACK1 16 - #undef JH_HACK2 17 - #undef JH_HACK3 18 - 19 - #ifdef JH_HACK3 20 - ; Calculate set index for 2-way MMU 21 - ; -avoiding use of GetIndex from MMU 22 - ; and its unpleasant LFSR pseudo-random sequence 23 - ; 24 - ; r1 = TLBPD0 from TLB_RELOAD above 25 - ; 26 - ; -- jh_ex_way_set not cleared on startup 27 - ; didn't want to change setup.c 28 - ; hence extra instruction to clean 29 - ; 30 - ; -- should be in cache since in same line 31 - ; as r0/r1 saves above 32 - ; 33 - ld r0,[jh_ex_way_sel] ; victim pointer 34 - and r0,r0,1 ; clean 35 - xor.f r0,r0,1 ; flip 36 - st r0,[jh_ex_way_sel] ; store back 37 - asr r0,r1,12 ; get set # <<1, note bit 12=R=0 38 - or.nz r0,r0,1 ; set way bit 39 - and r0,r0,0xff ; clean 40 - sr r0,[ARC_REG_TLBINDEX] 41 - #endif 42 - 43 - #ifdef JH_HACK2 44 - ; JH hack #2 45 - ; Faster than hack #1 in non-thrash case, but hard-coded for 2-way MMU 46 - ; Slower in thrash case (where it matters) because more code is executed 47 - ; Inefficient due to two-register paradigm of this miss handler 48 - ; 49 - /* r1 = data TLBPD0 at this point */ 50 - lr r0,[eret] /* instruction address */ 51 - xor r0,r0,r1 /* compare set # */ 52 - and.f r0,r0,0x000fe000 /* 2-way MMU mask */ 53 - bne 88f /* not in same set - no need to probe */ 54 - 55 - lr r0,[eret] /* instruction address */ 56 - and r0,r0,PAGE_MASK /* VPN of instruction address */ 57 - ; lr r1,[ARC_REG_TLBPD0] /* Data VPN+ASID - already in r1 from TLB_RELOAD*/ 58 - and r1,r1,0xff /* Data ASID */ 59 - or r0,r0,r1 /* Instruction address + Data ASID */ 60 - 61 - lr r1,[ARC_REG_TLBPD0] /* save TLBPD0 containing data TLB*/ 62 - sr r0,[ARC_REG_TLBPD0] /* write instruction address to TLBPD0 */ 63 - sr TLBProbe, [ARC_REG_TLBCOMMAND] /* Look for instruction */ 64 - lr r0,[ARC_REG_TLBINDEX] /* r0 = index where instruction is, if at all */ 65 - sr r1,[ARC_REG_TLBPD0] /* restore TLBPD0 */ 66 - 67 - xor r0,r0,1 /* flip bottom bit of data index */ 68 - b.d 89f 69 - sr r0,[ARC_REG_TLBINDEX] /* and put it back */ 70 - 88: 71 - sr TLBGetIndex, [ARC_REG_TLBCOMMAND] 72 - 89: 73 - #endif 74 - 75 - #ifdef JH_HACK1 76 - ; 77 - ; Always checks whether instruction will be kicked out by dtlb miss 78 - ; 79 - mov_s r3, r1 ; save PD0 prepared by TLB_RELOAD in r3 80 - lr r0,[eret] /* instruction address */ 81 - and r0,r0,PAGE_MASK /* VPN of instruction address */ 82 - bmsk r1,r3,7 /* Data ASID, bits 7-0 */ 83 - or_s r0,r0,r1 /* Instruction address + Data ASID */ 84 - 85 - sr r0,[ARC_REG_TLBPD0] /* write instruction address to TLBPD0 */ 86 - sr TLBProbe, [ARC_REG_TLBCOMMAND] /* Look for instruction */ 87 - lr r0,[ARC_REG_TLBINDEX] /* r0 = index where instruction is, if at all */ 88 - sr r3,[ARC_REG_TLBPD0] /* restore TLBPD0 */ 89 - 90 - sr TLBGetIndex, [ARC_REG_TLBCOMMAND] 91 - lr r1,[ARC_REG_TLBINDEX] /* r1 = index where MMU wants to put data */ 92 - cmp r0,r1 /* if no match on indices, go around */ 93 - xor.eq r1,r1,1 /* flip bottom bit of data index */ 94 - sr r1,[ARC_REG_TLBINDEX] /* and put it back */ 95 - #endif 96 - 97 - .endm 98 - 99 - #endif 100 - 101 - #endif

+1

arch/arc/kernel/entry-arcv2.S

··· 10 10 #include <asm/errno.h> 11 11 #include <asm/arcregs.h> 12 12 #include <asm/irqflags.h> 13 + #include <asm/mmu.h> 13 14 14 15 ; A maximum number of supported interrupts in the core interrupt controller. 15 16 ; This number is not equal to the maximum interrupt number (256) because

+2 -5

arch/arc/kernel/entry.S

··· 101 101 lr r0, [efa] 102 102 mov r1, sp 103 103 104 - ; hardware auto-disables MMU, re-enable it to allow kernel vaddr 105 - ; access for say stack unwinding of modules for crash dumps 106 - lr r3, [ARC_REG_PID] 107 - or r3, r3, MMU_ENABLE 108 - sr r3, [ARC_REG_PID] 104 + ; MC excpetions disable MMU 105 + ARC_MMU_REENABLE r3 109 106 110 107 lsr r3, r2, 8 111 108 bmsk r3, r3, 7

+1 -1

arch/arc/kernel/intc-compact.c

··· 142 142 * Time hard-ISR, timer_interrupt( ) calls spin_unlock_irq several times. 143 143 * Here local_irq_enable( ) shd not re-enable lower priority interrupts 144 144 * -If called from soft-ISR, it must re-enable all interrupts 145 - * soft ISR are low prioity jobs which can be very slow, thus all IRQs 145 + * soft ISR are low priority jobs which can be very slow, thus all IRQs 146 146 * must be enabled while they run. 147 147 * Now hardware context wise we may still be in L2 ISR (not done rtie) 148 148 * still we must re-enable both L1 and L2 IRQs

+1 -3

arch/arc/kernel/smp.c

··· 29 29 30 30 #ifndef CONFIG_ARC_HAS_LLSC 31 31 arch_spinlock_t smp_atomic_ops_lock = __ARCH_SPIN_LOCK_UNLOCKED; 32 - arch_spinlock_t smp_bitops_lock = __ARCH_SPIN_LOCK_UNLOCKED; 33 32 34 33 EXPORT_SYMBOL_GPL(smp_atomic_ops_lock); 35 - EXPORT_SYMBOL_GPL(smp_bitops_lock); 36 34 #endif 37 35 38 36 struct plat_smp_ops __weak plat_smp_ops; ··· 281 283 /* 282 284 * Call the platform specific IPI kick function, but avoid if possible: 283 285 * Only do so if there's no pending msg from other concurrent sender(s). 284 - * Otherwise, recevier will see this msg as well when it takes the 286 + * Otherwise, receiver will see this msg as well when it takes the 285 287 * IPI corresponding to that msg. This is true, even if it is already in 286 288 * IPI handler, because !@old means it has not yet dequeued the msg(s) 287 289 * so @new msg can be a free-loader

+1 -1

arch/arc/kernel/stacktrace.c

··· 149 149 #else 150 150 /* On ARC, only Dward based unwinder works. fp based backtracing is 151 151 * not possible (-fno-omit-frame-pointer) because of the way function 152 - * prelogue is setup (callee regs saved and then fp set and not other 152 + * prologue is setup (callee regs saved and then fp set and not other 153 153 * way around 154 154 */ 155 155 pr_warn_once("CONFIG_ARC_DW2_UNWIND needs to be enabled\n");

+15 -97

arch/arc/mm/cache.c

··· 205 205 #define OP_INV_IC 0x4 206 206 207 207 /* 208 - * I-Cache Aliasing in ARC700 VIPT caches (MMU v1-v3) 208 + * Cache Flush programming model 209 209 * 210 - * ARC VIPT I-cache uses vaddr to index into cache and paddr to match the tag. 211 - * The orig Cache Management Module "CDU" only required paddr to invalidate a 212 - * certain line since it sufficed as index in Non-Aliasing VIPT cache-geometry. 213 - * Infact for distinct V1,V2,P: all of {V1-P},{V2-P},{P-P} would end up fetching 214 - * the exact same line. 210 + * ARC700 MMUv3 I$ and D$ are both VIPT and can potentially alias. 211 + * Programming model requires both paddr and vaddr irrespecive of aliasing 212 + * considerations: 213 + * - vaddr in {I,D}C_IV?L 214 + * - paddr in {I,D}C_PTAG 215 215 * 216 - * However for larger Caches (way-size > page-size) - i.e. in Aliasing config, 217 - * paddr alone could not be used to correctly index the cache. 216 + * In HS38x (MMUv4), D$ is PIPT, I$ is VIPT and can still alias. 217 + * Programming model is different for aliasing vs. non-aliasing I$ 218 + * - D$ / Non-aliasing I$: only paddr in {I,D}C_IV?L 219 + * - Aliasing I$: same as ARC700 above (so MMUv3 routine used for MMUv4 I$) 218 220 * 219 - * ------------------ 220 - * MMU v1/v2 (Fixed Page Size 8k) 221 - * ------------------ 222 - * The solution was to provide CDU with these additonal vaddr bits. These 223 - * would be bits [x:13], x would depend on cache-geometry, 13 comes from 224 - * standard page size of 8k. 225 - * H/w folks chose [17:13] to be a future safe range, and moreso these 5 bits 226 - * of vaddr could easily be "stuffed" in the paddr as bits [4:0] since the 227 - * orig 5 bits of paddr were anyways ignored by CDU line ops, as they 228 - * represent the offset within cache-line. The adv of using this "clumsy" 229 - * interface for additional info was no new reg was needed in CDU programming 230 - * model. 231 - * 232 - * 17:13 represented the max num of bits passable, actual bits needed were 233 - * fewer, based on the num-of-aliases possible. 234 - * -for 2 alias possibility, only bit 13 needed (32K cache) 235 - * -for 4 alias possibility, bits 14:13 needed (64K cache) 236 - * 237 - * ------------------ 238 - * MMU v3 239 - * ------------------ 240 - * This ver of MMU supports variable page sizes (1k-16k): although Linux will 241 - * only support 8k (default), 16k and 4k. 242 - * However from hardware perspective, smaller page sizes aggravate aliasing 243 - * meaning more vaddr bits needed to disambiguate the cache-line-op ; 244 - * the existing scheme of piggybacking won't work for certain configurations. 245 - * Two new registers IC_PTAG and DC_PTAG inttoduced. 246 - * "tag" bits are provided in PTAG, index bits in existing IVIL/IVDL/FLDL regs 221 + * - If PAE40 is enabled, independent of aliasing considerations, the higher 222 + * bits needs to be written into PTAG_HI 247 223 */ 248 224 249 - static inline 250 - void __cache_line_loop_v2(phys_addr_t paddr, unsigned long vaddr, 251 - unsigned long sz, const int op, const int full_page) 252 - { 253 - unsigned int aux_cmd; 254 - int num_lines; 255 - 256 - if (op == OP_INV_IC) { 257 - aux_cmd = ARC_REG_IC_IVIL; 258 - } else { 259 - /* d$ cmd: INV (discard or wback-n-discard) OR FLUSH (wback) */ 260 - aux_cmd = op & OP_INV ? ARC_REG_DC_IVDL : ARC_REG_DC_FLDL; 261 - } 262 - 263 - /* Ensure we properly floor/ceil the non-line aligned/sized requests 264 - * and have @paddr - aligned to cache line and integral @num_lines. 265 - * This however can be avoided for page sized since: 266 - * -@paddr will be cache-line aligned already (being page aligned) 267 - * -@sz will be integral multiple of line size (being page sized). 268 - */ 269 - if (!full_page) { 270 - sz += paddr & ~CACHE_LINE_MASK; 271 - paddr &= CACHE_LINE_MASK; 272 - vaddr &= CACHE_LINE_MASK; 273 - } 274 - 275 - num_lines = DIV_ROUND_UP(sz, L1_CACHE_BYTES); 276 - 277 - /* MMUv2 and before: paddr contains stuffed vaddrs bits */ 278 - paddr |= (vaddr >> PAGE_SHIFT) & 0x1F; 279 - 280 - while (num_lines-- > 0) { 281 - write_aux_reg(aux_cmd, paddr); 282 - paddr += L1_CACHE_BYTES; 283 - } 284 - } 285 - 286 - /* 287 - * For ARC700 MMUv3 I-cache and D-cache flushes 288 - * - ARC700 programming model requires paddr and vaddr be passed in seperate 289 - * AUX registers (*_IV*L and *_PTAG respectively) irrespective of whether the 290 - * caches actually alias or not. 291 - * - For HS38, only the aliasing I-cache configuration uses the PTAG reg 292 - * (non aliasing I-cache version doesn't; while D-cache can't possibly alias) 293 - */ 294 225 static inline 295 226 void __cache_line_loop_v3(phys_addr_t paddr, unsigned long vaddr, 296 227 unsigned long sz, const int op, const int full_page) ··· 281 350 #ifndef USE_RGN_FLSH 282 351 283 352 /* 284 - * In HS38x (MMU v4), I-cache is VIPT (can alias), D-cache is PIPT 285 - * Here's how cache ops are implemented 286 - * 287 - * - D-cache: only paddr needed (in DC_IVDL/DC_FLDL) 288 - * - I-cache Non Aliasing: Despite VIPT, only paddr needed (in IC_IVIL) 289 - * - I-cache Aliasing: Both vaddr and paddr needed (in IC_IVIL, IC_PTAG 290 - * respectively, similar to MMU v3 programming model, hence 291 - * __cache_line_loop_v3() is used) 292 - * 293 - * If PAE40 is enabled, independent of aliasing considerations, the higher bits 294 - * needs to be written into PTAG_HI 295 353 */ 296 354 static inline 297 355 void __cache_line_loop_v4(phys_addr_t paddr, unsigned long vaddr, ··· 380 460 381 461 #endif 382 462 383 - #if (CONFIG_ARC_MMU_VER < 3) 384 - #define __cache_line_loop __cache_line_loop_v2 385 - #elif (CONFIG_ARC_MMU_VER == 3) 463 + #ifdef CONFIG_ARC_MMU_V3 386 464 #define __cache_line_loop __cache_line_loop_v3 387 - #elif (CONFIG_ARC_MMU_VER > 3) 465 + #else 388 466 #define __cache_line_loop __cache_line_loop_v4 389 467 #endif 390 468 ··· 1041 1123 clear_page(to); 1042 1124 clear_bit(PG_dc_clean, &page->flags); 1043 1125 } 1044 - 1126 + EXPORT_SYMBOL(clear_user_page); 1045 1127 1046 1128 /********************************************************************** 1047 1129 * Explicit Cache flush request from user space via syscall

+13 -7

arch/arc/mm/fault.c

··· 33 33 pud_t *pud, *pud_k; 34 34 pmd_t *pmd, *pmd_k; 35 35 36 - pgd = pgd_offset_fast(current->active_mm, address); 36 + pgd = pgd_offset(current->active_mm, address); 37 37 pgd_k = pgd_offset_k(address); 38 38 39 - if (!pgd_present(*pgd_k)) 39 + if (pgd_none (*pgd_k)) 40 40 goto bad_area; 41 + if (!pgd_present(*pgd)) 42 + set_pgd(pgd, *pgd_k); 41 43 42 44 p4d = p4d_offset(pgd, address); 43 45 p4d_k = p4d_offset(pgd_k, address); 44 - if (!p4d_present(*p4d_k)) 46 + if (p4d_none(*p4d_k)) 45 47 goto bad_area; 48 + if (!p4d_present(*p4d)) 49 + set_p4d(p4d, *p4d_k); 46 50 47 51 pud = pud_offset(p4d, address); 48 52 pud_k = pud_offset(p4d_k, address); 49 - if (!pud_present(*pud_k)) 53 + if (pud_none(*pud_k)) 50 54 goto bad_area; 55 + if (!pud_present(*pud)) 56 + set_pud(pud, *pud_k); 51 57 52 58 pmd = pmd_offset(pud, address); 53 59 pmd_k = pmd_offset(pud_k, address); 54 - if (!pmd_present(*pmd_k)) 60 + if (pmd_none(*pmd_k)) 55 61 goto bad_area; 56 - 57 - set_pmd(pmd, *pmd_k); 62 + if (!pmd_present(*pmd)) 63 + set_pmd(pmd, *pmd_k); 58 64 59 65 /* XXX: create the TLB entry here */ 60 66 return 0;

+5

arch/arc/mm/init.c

··· 189 189 { 190 190 memblock_free_all(); 191 191 highmem_init(); 192 + 193 + BUILD_BUG_ON((PTRS_PER_PGD * sizeof(pgd_t)) > PAGE_SIZE); 194 + BUILD_BUG_ON((PTRS_PER_PUD * sizeof(pud_t)) > PAGE_SIZE); 195 + BUILD_BUG_ON((PTRS_PER_PMD * sizeof(pmd_t)) > PAGE_SIZE); 196 + BUILD_BUG_ON((PTRS_PER_PTE * sizeof(pte_t)) > PAGE_SIZE); 192 197 } 193 198 194 199 #ifdef CONFIG_HIGHMEM

+2 -1

arch/arc/mm/ioremap.c

··· 39 39 if (arc_uncached_addr_space(paddr)) 40 40 return (void __iomem *)(u32)paddr; 41 41 42 - return ioremap_prot(paddr, size, PAGE_KERNEL_NO_CACHE); 42 + return ioremap_prot(paddr, size, 43 + pgprot_val(pgprot_noncached(PAGE_KERNEL))); 43 44 } 44 45 EXPORT_SYMBOL(ioremap); 45 46

+32 -236

arch/arc/mm/tlb.c

··· 1 1 // SPDX-License-Identifier: GPL-2.0-only 2 2 /* 3 - * TLB Management (flush/create/diagnostics) for ARC700 3 + * TLB Management (flush/create/diagnostics) for MMUv3 and MMUv4 4 4 * 5 5 * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) 6 6 * 7 - * vineetg: Aug 2011 8 - * -Reintroduce duplicate PD fixup - some customer chips still have the issue 9 - * 10 - * vineetg: May 2011 11 - * -No need to flush_cache_page( ) for each call to update_mmu_cache() 12 - * some of the LMBench tests improved amazingly 13 - * = page-fault thrice as fast (75 usec to 28 usec) 14 - * = mmap twice as fast (9.6 msec to 4.6 msec), 15 - * = fork (5.3 msec to 3.7 msec) 16 - * 17 - * vineetg: April 2011 : 18 - * -MMU v3: PD{0,1} bits layout changed: They don't overlap anymore, 19 - * helps avoid a shift when preparing PD0 from PTE 20 - * 21 - * vineetg: April 2011 : Preparing for MMU V3 22 - * -MMU v2/v3 BCRs decoded differently 23 - * -Remove TLB_SIZE hardcoding as it's variable now: 256 or 512 24 - * -tlb_entry_erase( ) can be void 25 - * -local_flush_tlb_range( ): 26 - * = need not "ceil" @end 27 - * = walks MMU only if range spans < 32 entries, as opposed to 256 28 - * 29 - * Vineetg: Sept 10th 2008 30 - * -Changes related to MMU v2 (Rel 4.8) 31 - * 32 - * Vineetg: Aug 29th 2008 33 - * -In TLB Flush operations (Metal Fix MMU) there is a explicit command to 34 - * flush Micro-TLBS. If TLB Index Reg is invalid prior to TLBIVUTLB cmd, 35 - * it fails. Thus need to load it with ANY valid value before invoking 36 - * TLBIVUTLB cmd 37 - * 38 - * Vineetg: Aug 21th 2008: 39 - * -Reduced the duration of IRQ lockouts in TLB Flush routines 40 - * -Multiple copies of TLB erase code separated into a "single" function 41 - * -In TLB Flush routines, interrupt disabling moved UP to retrieve ASID 42 - * in interrupt-safe region. 43 - * 44 - * Vineetg: April 23rd Bug #93131 45 - * Problem: tlb_flush_kernel_range() doesn't do anything if the range to 46 - * flush is more than the size of TLB itself. 47 - * 48 - * Rahul Trivedi : Codito Technologies 2004 49 7 */ 50 8 51 9 #include <linux/module.h> ··· 14 56 #include <asm/setup.h> 15 57 #include <asm/mmu_context.h> 16 58 #include <asm/mmu.h> 17 - 18 - /* Need for ARC MMU v2 19 - * 20 - * ARC700 MMU-v1 had a Joint-TLB for Code and Data and is 2 way set-assoc. 21 - * For a memcpy operation with 3 players (src/dst/code) such that all 3 pages 22 - * map into same set, there would be contention for the 2 ways causing severe 23 - * Thrashing. 24 - * 25 - * Although J-TLB is 2 way set assoc, ARC700 caches J-TLB into uTLBS which has 26 - * much higher associativity. u-D-TLB is 8 ways, u-I-TLB is 4 ways. 27 - * Given this, the thrashing problem should never happen because once the 3 28 - * J-TLB entries are created (even though 3rd will knock out one of the prev 29 - * two), the u-D-TLB and u-I-TLB will have what is required to accomplish memcpy 30 - * 31 - * Yet we still see the Thrashing because a J-TLB Write cause flush of u-TLBs. 32 - * This is a simple design for keeping them in sync. So what do we do? 33 - * The solution which James came up was pretty neat. It utilised the assoc 34 - * of uTLBs by not invalidating always but only when absolutely necessary. 35 - * 36 - * - Existing TLB commands work as before 37 - * - New command (TLBWriteNI) for TLB write without clearing uTLBs 38 - * - New command (TLBIVUTLB) to invalidate uTLBs. 39 - * 40 - * The uTLBs need only be invalidated when pages are being removed from the 41 - * OS page table. If a 'victim' TLB entry is being overwritten in the main TLB 42 - * as a result of a miss, the removed entry is still allowed to exist in the 43 - * uTLBs as it is still valid and present in the OS page table. This allows the 44 - * full associativity of the uTLBs to hide the limited associativity of the main 45 - * TLB. 46 - * 47 - * During a miss handler, the new "TLBWriteNI" command is used to load 48 - * entries without clearing the uTLBs. 49 - * 50 - * When the OS page table is updated, TLB entries that may be associated with a 51 - * removed page are removed (flushed) from the TLB using TLBWrite. In this 52 - * circumstance, the uTLBs must also be cleared. This is done by using the 53 - * existing TLBWrite command. An explicit IVUTLB is also required for those 54 - * corner cases when TLBWrite was not executed at all because the corresp 55 - * J-TLB entry got evicted/replaced. 56 - */ 57 - 58 59 59 60 /* A copy of the ASID from the PID reg is kept in asid_cache */ 60 61 DEFINE_PER_CPU(unsigned int, asid_cache) = MM_CTXT_FIRST_CYCLE; ··· 37 120 38 121 static void utlb_invalidate(void) 39 122 { 40 - #if (CONFIG_ARC_MMU_VER >= 2) 41 - 42 - #if (CONFIG_ARC_MMU_VER == 2) 43 - /* MMU v2 introduced the uTLB Flush command. 44 - * There was however an obscure hardware bug, where uTLB flush would 45 - * fail when a prior probe for J-TLB (both totally unrelated) would 46 - * return lkup err - because the entry didn't exist in MMU. 47 - * The Workaround was to set Index reg with some valid value, prior to 48 - * flush. This was fixed in MMU v3 49 - */ 50 - unsigned int idx; 51 - 52 - /* make sure INDEX Reg is valid */ 53 - idx = read_aux_reg(ARC_REG_TLBINDEX); 54 - 55 - /* If not write some dummy val */ 56 - if (unlikely(idx & TLB_LKUP_ERR)) 57 - write_aux_reg(ARC_REG_TLBINDEX, 0xa); 58 - #endif 59 - 60 123 write_aux_reg(ARC_REG_TLBCOMMAND, TLBIVUTLB); 61 - #endif 62 - 63 124 } 64 125 65 - #if (CONFIG_ARC_MMU_VER < 4) 126 + #ifdef CONFIG_ARC_MMU_V3 66 127 67 128 static inline unsigned int tlb_entry_lkup(unsigned long vaddr_n_asid) 68 129 { ··· 71 176 } 72 177 } 73 178 74 - static void tlb_entry_insert(unsigned int pd0, pte_t pd1) 179 + static void tlb_entry_insert(unsigned int pd0, phys_addr_t pd1) 75 180 { 76 181 unsigned int idx; 77 182 ··· 101 206 write_aux_reg(ARC_REG_TLBCOMMAND, TLBWrite); 102 207 } 103 208 104 - #else /* CONFIG_ARC_MMU_VER >= 4) */ 209 + #else /* MMUv4 */ 105 210 106 211 static void tlb_entry_erase(unsigned int vaddr_n_asid) 107 212 { ··· 109 214 write_aux_reg(ARC_REG_TLBCOMMAND, TLBDeleteEntry); 110 215 } 111 216 112 - static void tlb_entry_insert(unsigned int pd0, pte_t pd1) 217 + static void tlb_entry_insert(unsigned int pd0, phys_addr_t pd1) 113 218 { 114 219 write_aux_reg(ARC_REG_TLBPD0, pd0); 115 - write_aux_reg(ARC_REG_TLBPD1, pd1); 116 220 117 - if (is_pae40_enabled()) 221 + if (!is_pae40_enabled()) { 222 + write_aux_reg(ARC_REG_TLBPD1, pd1); 223 + } else { 224 + write_aux_reg(ARC_REG_TLBPD1, pd1 & 0xFFFFFFFF); 118 225 write_aux_reg(ARC_REG_TLBPD1HI, (u64)pd1 >> 32); 226 + } 119 227 120 228 write_aux_reg(ARC_REG_TLBCOMMAND, TLBInsertEntry); 121 229 } ··· 394 496 unsigned long flags; 395 497 unsigned int asid_or_sasid, rwx; 396 498 unsigned long pd0; 397 - pte_t pd1; 499 + phys_addr_t pd1; 398 500 399 501 /* 400 502 * create_tlb() assumes that current->mm == vma->mm, since ··· 403 505 * 404 506 * Removing the assumption involves 405 507 * -Using vma->mm->context{ASID,SASID}, as opposed to MMU reg. 406 - * -Fix the TLB paranoid debug code to not trigger false negatives. 407 508 * -More importantly it makes this handler inconsistent with fast-path 408 509 * TLB Refill handler which always deals with "current" 409 510 * ··· 424 527 return; 425 528 426 529 local_irq_save(flags); 427 - 428 - tlb_paranoid_check(asid_mm(vma->vm_mm, smp_processor_id()), vaddr); 429 530 430 531 vaddr &= PAGE_MASK; 431 532 ··· 534 639 update_mmu_cache(vma, addr, &pte); 535 640 } 536 641 537 - void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, 538 - pgtable_t pgtable) 539 - { 540 - struct list_head *lh = (struct list_head *) pgtable; 541 - 542 - assert_spin_locked(&mm->page_table_lock); 543 - 544 - /* FIFO */ 545 - if (!pmd_huge_pte(mm, pmdp)) 546 - INIT_LIST_HEAD(lh); 547 - else 548 - list_add(lh, (struct list_head *) pmd_huge_pte(mm, pmdp)); 549 - pmd_huge_pte(mm, pmdp) = pgtable; 550 - } 551 - 552 - pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp) 553 - { 554 - struct list_head *lh; 555 - pgtable_t pgtable; 556 - 557 - assert_spin_locked(&mm->page_table_lock); 558 - 559 - pgtable = pmd_huge_pte(mm, pmdp); 560 - lh = (struct list_head *) pgtable; 561 - if (list_empty(lh)) 562 - pmd_huge_pte(mm, pmdp) = NULL; 563 - else { 564 - pmd_huge_pte(mm, pmdp) = (pgtable_t) lh->next; 565 - list_del(lh); 566 - } 567 - 568 - pte_val(pgtable[0]) = 0; 569 - pte_val(pgtable[1]) = 0; 570 - 571 - return pgtable; 572 - } 573 - 574 642 void local_flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start, 575 643 unsigned long end) 576 644 { ··· 564 706 { 565 707 struct cpuinfo_arc_mmu *mmu = &cpuinfo_arc700[smp_processor_id()].mmu; 566 708 unsigned int tmp; 567 - struct bcr_mmu_1_2 { 568 - #ifdef CONFIG_CPU_BIG_ENDIAN 569 - unsigned int ver:8, ways:4, sets:4, u_itlb:8, u_dtlb:8; 570 - #else 571 - unsigned int u_dtlb:8, u_itlb:8, sets:4, ways:4, ver:8; 572 - #endif 573 - } *mmu2; 574 - 575 709 struct bcr_mmu_3 { 576 710 #ifdef CONFIG_CPU_BIG_ENDIAN 577 711 unsigned int ver:8, ways:4, sets:4, res:3, sasid:1, pg_sz:4, ··· 588 738 tmp = read_aux_reg(ARC_REG_MMU_BCR); 589 739 mmu->ver = (tmp >> 24); 590 740 591 - if (is_isa_arcompact()) { 592 - if (mmu->ver <= 2) { 593 - mmu2 = (struct bcr_mmu_1_2 *)&tmp; 594 - mmu->pg_sz_k = TO_KB(0x2000); 595 - mmu->sets = 1 << mmu2->sets; 596 - mmu->ways = 1 << mmu2->ways; 597 - mmu->u_dtlb = mmu2->u_dtlb; 598 - mmu->u_itlb = mmu2->u_itlb; 599 - } else { 600 - mmu3 = (struct bcr_mmu_3 *)&tmp; 601 - mmu->pg_sz_k = 1 << (mmu3->pg_sz - 1); 602 - mmu->sets = 1 << mmu3->sets; 603 - mmu->ways = 1 << mmu3->ways; 604 - mmu->u_dtlb = mmu3->u_dtlb; 605 - mmu->u_itlb = mmu3->u_itlb; 606 - mmu->sasid = mmu3->sasid; 607 - } 741 + if (is_isa_arcompact() && mmu->ver == 3) { 742 + mmu3 = (struct bcr_mmu_3 *)&tmp; 743 + mmu->pg_sz_k = 1 << (mmu3->pg_sz - 1); 744 + mmu->sets = 1 << mmu3->sets; 745 + mmu->ways = 1 << mmu3->ways; 746 + mmu->u_dtlb = mmu3->u_dtlb; 747 + mmu->u_itlb = mmu3->u_itlb; 748 + mmu->sasid = mmu3->sasid; 608 749 } else { 609 750 mmu4 = (struct bcr_mmu_4 *)&tmp; 610 751 mmu->pg_sz_k = 1 << (mmu4->sz0 - 1); ··· 621 780 IS_USED_CFG(CONFIG_TRANSPARENT_HUGEPAGE)); 622 781 623 782 n += scnprintf(buf + n, len - n, 624 - "MMU [v%x]\t: %dk PAGE, %sJTLB %d (%dx%d), uDTLB %d, uITLB %d%s%s\n", 625 - p_mmu->ver, p_mmu->pg_sz_k, super_pg, 783 + "MMU [v%x]\t: %dk PAGE, %s, swalk %d lvl, JTLB %d (%dx%d), uDTLB %d, uITLB %d%s%s\n", 784 + p_mmu->ver, p_mmu->pg_sz_k, super_pg, CONFIG_PGTABLE_LEVELS, 626 785 p_mmu->sets * p_mmu->ways, p_mmu->sets, p_mmu->ways, 627 786 p_mmu->u_dtlb, p_mmu->u_itlb, 628 787 IS_AVAIL2(p_mmu->pae, ", PAE40 ", CONFIG_ARC_HAS_PAE40)); ··· 656 815 657 816 /* 658 817 * Ensure that MMU features assumed by kernel exist in hardware. 659 - * For older ARC700 cpus, it has to be exact match, since the MMU 660 - * revisions were not backwards compatible (MMUv3 TLB layout changed 661 - * so even if kernel for v2 didn't use any new cmds of v3, it would 662 - * still not work. 663 - * For HS cpus, MMUv4 was baseline and v5 is backwards compatible 664 - * (will run older software). 818 + * - For older ARC700 cpus, only v3 supported 819 + * - For HS cpus, v4 was baseline and v5 is backwards compatible 820 + * (will run older software). 665 821 */ 666 - if (is_isa_arcompact() && mmu->ver == CONFIG_ARC_MMU_VER) 822 + if (is_isa_arcompact() && mmu->ver == 3) 667 823 compat = 1; 668 - else if (is_isa_arcv2() && mmu->ver >= CONFIG_ARC_MMU_VER) 824 + else if (is_isa_arcv2() && mmu->ver >= 4) 669 825 compat = 1; 670 826 671 - if (!compat) { 672 - panic("MMU ver %d doesn't match kernel built for %d...\n", 673 - mmu->ver, CONFIG_ARC_MMU_VER); 674 - } 827 + if (!compat) 828 + panic("MMU ver %d doesn't match kernel built for\n", mmu->ver); 675 829 676 830 if (mmu->pg_sz_k != TO_KB(PAGE_SIZE)) 677 831 panic("MMU pg size != PAGE_SIZE (%luk)\n", TO_KB(PAGE_SIZE)); ··· 679 843 if (IS_ENABLED(CONFIG_ARC_HAS_PAE40) && !mmu->pae) 680 844 panic("Hardware doesn't support PAE40\n"); 681 845 682 - /* Enable the MMU */ 683 - write_aux_reg(ARC_REG_PID, MMU_ENABLE); 846 + /* Enable the MMU with ASID 0 */ 847 + mmu_setup_asid(NULL, 0); 684 848 685 - /* In smp we use this reg for interrupt 1 scratch */ 686 - #ifdef ARC_USE_SCRATCH_REG 687 - /* swapper_pg_dir is the pgd for the kernel, used by vmalloc */ 688 - write_aux_reg(ARC_REG_SCRATCH_DATA0, swapper_pg_dir); 689 - #endif 849 + /* cache the pgd pointer in MMU SCRATCH reg (ARCv2 only) */ 850 + mmu_setup_pgd(NULL, swapper_pg_dir); 690 851 691 852 if (pae40_exist_but_not_enab()) 692 853 write_aux_reg(ARC_REG_TLBPD1HI, 0); ··· 778 945 779 946 local_irq_restore(flags); 780 947 } 781 - 782 - /*********************************************************************** 783 - * Diagnostic Routines 784 - * -Called from Low Level TLB Handlers if things don;t look good 785 - **********************************************************************/ 786 - 787 - #ifdef CONFIG_ARC_DBG_TLB_PARANOIA 788 - 789 - /* 790 - * Low Level ASM TLB handler calls this if it finds that HW and SW ASIDS 791 - * don't match 792 - */ 793 - void print_asid_mismatch(int mm_asid, int mmu_asid, int is_fast_path) 794 - { 795 - pr_emerg("ASID Mismatch in %s Path Handler: sw-pid=0x%x hw-pid=0x%x\n", 796 - is_fast_path ? "Fast" : "Slow", mm_asid, mmu_asid); 797 - 798 - __asm__ __volatile__("flag 1"); 799 - } 800 - 801 - void tlb_paranoid_check(unsigned int mm_asid, unsigned long addr) 802 - { 803 - unsigned int mmu_asid; 804 - 805 - mmu_asid = read_aux_reg(ARC_REG_PID) & 0xff; 806 - 807 - /* 808 - * At the time of a TLB miss/installation 809 - * - HW version needs to match SW version 810 - * - SW needs to have a valid ASID 811 - */ 812 - if (addr < 0x70000000 && 813 - ((mm_asid == MM_CTXT_NO_ASID) || 814 - (mmu_asid != (mm_asid & MM_CTXT_ASID_MASK)))) 815 - print_asid_mismatch(mm_asid, mmu_asid, 0); 816 - } 817 - #endif

+28 -60

arch/arc/mm/tlbex.S

··· 39 39 #include <asm/arcregs.h> 40 40 #include <asm/cache.h> 41 41 #include <asm/processor.h> 42 - #include <asm/tlb-mmu1.h> 43 42 44 43 #ifdef CONFIG_ISA_ARCOMPACT 45 44 ;----------------------------------------------------------------- ··· 93 94 st_s r1, [r0, 4] 94 95 st_s r2, [r0, 8] 95 96 st_s r3, [r0, 12] 96 - 97 - ; VERIFY if the ASID in MMU-PID Reg is same as 98 - ; one in Linux data structures 99 - 100 - tlb_paranoid_check_asm 101 97 .endm 102 98 103 99 .macro TLBMISS_RESTORE_REGS ··· 142 148 #endif 143 149 144 150 ;============================================================================ 145 - ; Troubleshooting Stuff 146 - ;============================================================================ 147 - 148 - ; Linux keeps ASID (Address Space ID) in task->active_mm->context.asid 149 - ; When Creating TLB Entries, instead of doing 3 dependent loads from memory, 150 - ; we use the MMU PID Reg to get current ASID. 151 - ; In bizzare scenrios SW and HW ASID can get out-of-sync which is trouble. 152 - ; So we try to detect this in TLB Mis shandler 153 - 154 - .macro tlb_paranoid_check_asm 155 - 156 - #ifdef CONFIG_ARC_DBG_TLB_PARANOIA 157 - 158 - GET_CURR_TASK_ON_CPU r3 159 - ld r0, [r3, TASK_ACT_MM] 160 - ld r0, [r0, MM_CTXT+MM_CTXT_ASID] 161 - breq r0, 0, 55f ; Error if no ASID allocated 162 - 163 - lr r1, [ARC_REG_PID] 164 - and r1, r1, 0xFF 165 - 166 - and r2, r0, 0xFF ; MMU PID bits only for comparison 167 - breq r1, r2, 5f 168 - 169 - 55: 170 - ; Error if H/w and S/w ASID don't match, but NOT if in kernel mode 171 - lr r2, [erstatus] 172 - bbit0 r2, STATUS_U_BIT, 5f 173 - 174 - ; We sure are in troubled waters, Flag the error, but to do so 175 - ; need to switch to kernel mode stack to call error routine 176 - GET_TSK_STACK_BASE r3, sp 177 - 178 - ; Call printk to shoutout aloud 179 - mov r2, 1 180 - j print_asid_mismatch 181 - 182 - 5: ; ASIDs match so proceed normally 183 - nop 184 - 185 - #endif 186 - 187 - .endm 188 - 189 - ;============================================================================ 190 151 ;TLB Miss handling Code 191 152 ;============================================================================ 153 + 154 + #ifndef PMD_SHIFT 155 + #define PMD_SHIFT PUD_SHIFT 156 + #endif 157 + 158 + #ifndef PUD_SHIFT 159 + #define PUD_SHIFT PGDIR_SHIFT 160 + #endif 192 161 193 162 ;----------------------------------------------------------------------------- 194 163 ; This macro does the page-table lookup for the faulting address. ··· 160 203 161 204 lr r2, [efa] 162 205 163 - #ifdef ARC_USE_SCRATCH_REG 206 + #ifdef CONFIG_ISA_ARCV2 164 207 lr r1, [ARC_REG_SCRATCH_DATA0] ; current pgd 165 208 #else 166 209 GET_CURR_TASK_ON_CPU r1 ··· 172 215 ld.as r3, [r1, r0] ; PGD entry corresp to faulting addr 173 216 tst r3, r3 174 217 bz do_slow_path_pf ; if no Page Table, do page fault 218 + 219 + #if CONFIG_PGTABLE_LEVELS > 3 220 + lsr r0, r2, PUD_SHIFT ; Bits for indexing into PUD 221 + and r0, r0, (PTRS_PER_PUD - 1) 222 + ld.as r1, [r3, r0] ; PMD entry 223 + tst r1, r1 224 + bz do_slow_path_pf 225 + mov r3, r1 226 + #endif 227 + 228 + #if CONFIG_PGTABLE_LEVELS > 2 229 + lsr r0, r2, PMD_SHIFT ; Bits for indexing into PMD 230 + and r0, r0, (PTRS_PER_PMD - 1) 231 + ld.as r1, [r3, r0] ; PMD entry 232 + tst r1, r1 233 + bz do_slow_path_pf 234 + mov r3, r1 235 + #endif 175 236 176 237 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 177 238 and.f 0, r3, _PAGE_HW_SZ ; Is this Huge PMD (thp) ··· 254 279 ; Commit the TLB entry into MMU 255 280 256 281 .macro COMMIT_ENTRY_TO_MMU 257 - #if (CONFIG_ARC_MMU_VER < 4) 282 + #ifdef CONFIG_ARC_MMU_V3 258 283 259 284 /* Get free TLB slot: Set = computed from vaddr, way = random */ 260 285 sr TLBGetIndex, [ARC_REG_TLBCOMMAND] ··· 349 374 st_s r0, [r1] ; Write back PTE 350 375 351 376 CONV_PTE_TO_TLB 352 - 353 - #if (CONFIG_ARC_MMU_VER == 1) 354 - ; MMU with 2 way set assoc J-TLB, needs some help in pathetic case of 355 - ; memcpy where 3 parties contend for 2 ways, ensuing a livelock. 356 - ; But only for old MMU or one with Metal Fix 357 - TLB_WRITE_HEURISTICS 358 - #endif 359 377 360 378 COMMIT_ENTRY_TO_MMU 361 379 TLBMISS_RESTORE_REGS