Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'csky-for-linus-5.19-rc1' of https://github.com/c-sky/csky-linux

Pull arch/csky updates from Guo Ren:

- Three atomic optimizations

- memcpy/memcpy_io optimization

- Some coding conventions for Kbuild, removing warnings

* tag 'csky-for-linus-5.19-rc1' of https://github.com/c-sky/csky-linux:
csky: Move $(core-y) into arch/csky/Kbuild
csky: Remove unused core-y for dts
csky: Remove unused $(dtb-y) from boot/Makefile
csky: atomic: Add conditional atomic operations' optimization
csky: atomic: Add custom atomic.h implementation
csky: atomic: Optimize cmpxchg with acquire & release
csky: optimize memcpy_{from,to}io() and memset_io()
csky: Add C based string functions
csky: Fix versioncheck warnings
csky: patch_text: Fixup last cpu should be master
csky: fix typos in comments

+562 -375
+2
arch/csky/Kbuild
··· 1 1 # SPDX-License-Identifier: GPL-2.0-only 2 2 3 + obj-y += kernel/ mm/ 4 + 3 5 # for cleaning 4 6 subdir- += boot
+8
arch/csky/Kconfig
··· 320 320 controlled through /sys/devices/system/cpu/cpu1/hotplug/target. 321 321 322 322 Say N if you want to disable CPU hotplug. 323 + 324 + config HAVE_EFFICIENT_UNALIGNED_STRING_OPS 325 + bool "Enable EFFICIENT_UNALIGNED_STRING_OPS for abiv2" 326 + depends on CPU_CK807 || CPU_CK810 || CPU_CK860 327 + help 328 + Say Y here to enable EFFICIENT_UNALIGNED_STRING_OPS. Some CPU models could 329 + deal with unaligned access by hardware. 330 + 323 331 endmenu 324 332 325 333 source "arch/csky/Kconfig.platforms"
-3
arch/csky/Makefile
··· 61 61 62 62 head-y := arch/csky/kernel/head.o 63 63 64 - core-y += arch/csky/kernel/ 65 - core-y += arch/csky/mm/ 66 64 core-y += arch/csky/$(CSKYABI)/ 67 65 68 66 libs-y += arch/csky/lib/ \ 69 67 $(shell $(CC) $(KBUILD_CFLAGS) $(KCFLAGS) -print-libgcc-file-name) 70 68 71 69 boot := arch/csky/boot 72 - core-y += $(boot)/dts/ 73 70 74 71 all: zImage 75 72
-2
arch/csky/abiv1/Makefile
··· 4 4 obj-y += bswapsi.o 5 5 obj-y += cacheflush.o 6 6 obj-y += mmap.o 7 - obj-y += memcpy.o 8 - obj-y += strksyms.o
-347
arch/csky/abiv1/memcpy.S
··· 1 - /* SPDX-License-Identifier: GPL-2.0 */ 2 - // Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd. 3 - 4 - #include <linux/linkage.h> 5 - 6 - .macro GET_FRONT_BITS rx y 7 - #ifdef __cskyLE__ 8 - lsri \rx, \y 9 - #else 10 - lsli \rx, \y 11 - #endif 12 - .endm 13 - 14 - .macro GET_AFTER_BITS rx y 15 - #ifdef __cskyLE__ 16 - lsli \rx, \y 17 - #else 18 - lsri \rx, \y 19 - #endif 20 - .endm 21 - 22 - /* void *memcpy(void *dest, const void *src, size_t n); */ 23 - ENTRY(memcpy) 24 - mov r7, r2 25 - cmplti r4, 4 26 - bt .L_copy_by_byte 27 - mov r6, r2 28 - andi r6, 3 29 - cmpnei r6, 0 30 - jbt .L_dest_not_aligned 31 - mov r6, r3 32 - andi r6, 3 33 - cmpnei r6, 0 34 - jbt .L_dest_aligned_but_src_not_aligned 35 - .L0: 36 - cmplti r4, 16 37 - jbt .L_aligned_and_len_less_16bytes 38 - subi sp, 8 39 - stw r8, (sp, 0) 40 - .L_aligned_and_len_larger_16bytes: 41 - ldw r1, (r3, 0) 42 - ldw r5, (r3, 4) 43 - ldw r8, (r3, 8) 44 - stw r1, (r7, 0) 45 - ldw r1, (r3, 12) 46 - stw r5, (r7, 4) 47 - stw r8, (r7, 8) 48 - stw r1, (r7, 12) 49 - subi r4, 16 50 - addi r3, 16 51 - addi r7, 16 52 - cmplti r4, 16 53 - jbf .L_aligned_and_len_larger_16bytes 54 - ldw r8, (sp, 0) 55 - addi sp, 8 56 - cmpnei r4, 0 57 - jbf .L_return 58 - 59 - .L_aligned_and_len_less_16bytes: 60 - cmplti r4, 4 61 - bt .L_copy_by_byte 62 - .L1: 63 - ldw r1, (r3, 0) 64 - stw r1, (r7, 0) 65 - subi r4, 4 66 - addi r3, 4 67 - addi r7, 4 68 - cmplti r4, 4 69 - jbf .L1 70 - br .L_copy_by_byte 71 - 72 - .L_return: 73 - rts 74 - 75 - .L_copy_by_byte: /* len less than 4 bytes */ 76 - cmpnei r4, 0 77 - jbf .L_return 78 - .L4: 79 - ldb r1, (r3, 0) 80 - stb r1, (r7, 0) 81 - addi r3, 1 82 - addi r7, 1 83 - decne r4 84 - jbt .L4 85 - rts 86 - 87 - /* 88 - * If dest is not aligned, just copying some bytes makes the dest align. 89 - * Afther that, we judge whether the src is aligned. 90 - */ 91 - .L_dest_not_aligned: 92 - mov r5, r3 93 - rsub r5, r5, r7 94 - abs r5, r5 95 - cmplt r5, r4 96 - bt .L_copy_by_byte 97 - mov r5, r7 98 - sub r5, r3 99 - cmphs r5, r4 100 - bf .L_copy_by_byte 101 - mov r5, r6 102 - .L5: 103 - ldb r1, (r3, 0) /* makes the dest align. */ 104 - stb r1, (r7, 0) 105 - addi r5, 1 106 - subi r4, 1 107 - addi r3, 1 108 - addi r7, 1 109 - cmpnei r5, 4 110 - jbt .L5 111 - cmplti r4, 4 112 - jbt .L_copy_by_byte 113 - mov r6, r3 /* judge whether the src is aligned. */ 114 - andi r6, 3 115 - cmpnei r6, 0 116 - jbf .L0 117 - 118 - /* Judge the number of misaligned, 1, 2, 3? */ 119 - .L_dest_aligned_but_src_not_aligned: 120 - mov r5, r3 121 - rsub r5, r5, r7 122 - abs r5, r5 123 - cmplt r5, r4 124 - bt .L_copy_by_byte 125 - bclri r3, 0 126 - bclri r3, 1 127 - ldw r1, (r3, 0) 128 - addi r3, 4 129 - cmpnei r6, 2 130 - bf .L_dest_aligned_but_src_not_aligned_2bytes 131 - cmpnei r6, 3 132 - bf .L_dest_aligned_but_src_not_aligned_3bytes 133 - 134 - .L_dest_aligned_but_src_not_aligned_1byte: 135 - mov r5, r7 136 - sub r5, r3 137 - cmphs r5, r4 138 - bf .L_copy_by_byte 139 - cmplti r4, 16 140 - bf .L11 141 - .L10: /* If the len is less than 16 bytes */ 142 - GET_FRONT_BITS r1 8 143 - mov r5, r1 144 - ldw r6, (r3, 0) 145 - mov r1, r6 146 - GET_AFTER_BITS r6 24 147 - or r5, r6 148 - stw r5, (r7, 0) 149 - subi r4, 4 150 - addi r3, 4 151 - addi r7, 4 152 - cmplti r4, 4 153 - bf .L10 154 - subi r3, 3 155 - br .L_copy_by_byte 156 - .L11: 157 - subi sp, 16 158 - stw r8, (sp, 0) 159 - stw r9, (sp, 4) 160 - stw r10, (sp, 8) 161 - stw r11, (sp, 12) 162 - .L12: 163 - ldw r5, (r3, 0) 164 - ldw r11, (r3, 4) 165 - ldw r8, (r3, 8) 166 - ldw r9, (r3, 12) 167 - 168 - GET_FRONT_BITS r1 8 /* little or big endian? */ 169 - mov r10, r5 170 - GET_AFTER_BITS r5 24 171 - or r5, r1 172 - 173 - GET_FRONT_BITS r10 8 174 - mov r1, r11 175 - GET_AFTER_BITS r11 24 176 - or r11, r10 177 - 178 - GET_FRONT_BITS r1 8 179 - mov r10, r8 180 - GET_AFTER_BITS r8 24 181 - or r8, r1 182 - 183 - GET_FRONT_BITS r10 8 184 - mov r1, r9 185 - GET_AFTER_BITS r9 24 186 - or r9, r10 187 - 188 - stw r5, (r7, 0) 189 - stw r11, (r7, 4) 190 - stw r8, (r7, 8) 191 - stw r9, (r7, 12) 192 - subi r4, 16 193 - addi r3, 16 194 - addi r7, 16 195 - cmplti r4, 16 196 - jbf .L12 197 - ldw r8, (sp, 0) 198 - ldw r9, (sp, 4) 199 - ldw r10, (sp, 8) 200 - ldw r11, (sp, 12) 201 - addi sp , 16 202 - cmplti r4, 4 203 - bf .L10 204 - subi r3, 3 205 - br .L_copy_by_byte 206 - 207 - .L_dest_aligned_but_src_not_aligned_2bytes: 208 - cmplti r4, 16 209 - bf .L21 210 - .L20: 211 - GET_FRONT_BITS r1 16 212 - mov r5, r1 213 - ldw r6, (r3, 0) 214 - mov r1, r6 215 - GET_AFTER_BITS r6 16 216 - or r5, r6 217 - stw r5, (r7, 0) 218 - subi r4, 4 219 - addi r3, 4 220 - addi r7, 4 221 - cmplti r4, 4 222 - bf .L20 223 - subi r3, 2 224 - br .L_copy_by_byte 225 - rts 226 - 227 - .L21: /* n > 16 */ 228 - subi sp, 16 229 - stw r8, (sp, 0) 230 - stw r9, (sp, 4) 231 - stw r10, (sp, 8) 232 - stw r11, (sp, 12) 233 - 234 - .L22: 235 - ldw r5, (r3, 0) 236 - ldw r11, (r3, 4) 237 - ldw r8, (r3, 8) 238 - ldw r9, (r3, 12) 239 - 240 - GET_FRONT_BITS r1 16 241 - mov r10, r5 242 - GET_AFTER_BITS r5 16 243 - or r5, r1 244 - 245 - GET_FRONT_BITS r10 16 246 - mov r1, r11 247 - GET_AFTER_BITS r11 16 248 - or r11, r10 249 - 250 - GET_FRONT_BITS r1 16 251 - mov r10, r8 252 - GET_AFTER_BITS r8 16 253 - or r8, r1 254 - 255 - GET_FRONT_BITS r10 16 256 - mov r1, r9 257 - GET_AFTER_BITS r9 16 258 - or r9, r10 259 - 260 - stw r5, (r7, 0) 261 - stw r11, (r7, 4) 262 - stw r8, (r7, 8) 263 - stw r9, (r7, 12) 264 - subi r4, 16 265 - addi r3, 16 266 - addi r7, 16 267 - cmplti r4, 16 268 - jbf .L22 269 - ldw r8, (sp, 0) 270 - ldw r9, (sp, 4) 271 - ldw r10, (sp, 8) 272 - ldw r11, (sp, 12) 273 - addi sp, 16 274 - cmplti r4, 4 275 - bf .L20 276 - subi r3, 2 277 - br .L_copy_by_byte 278 - 279 - 280 - .L_dest_aligned_but_src_not_aligned_3bytes: 281 - cmplti r4, 16 282 - bf .L31 283 - .L30: 284 - GET_FRONT_BITS r1 24 285 - mov r5, r1 286 - ldw r6, (r3, 0) 287 - mov r1, r6 288 - GET_AFTER_BITS r6 8 289 - or r5, r6 290 - stw r5, (r7, 0) 291 - subi r4, 4 292 - addi r3, 4 293 - addi r7, 4 294 - cmplti r4, 4 295 - bf .L30 296 - subi r3, 1 297 - br .L_copy_by_byte 298 - .L31: 299 - subi sp, 16 300 - stw r8, (sp, 0) 301 - stw r9, (sp, 4) 302 - stw r10, (sp, 8) 303 - stw r11, (sp, 12) 304 - .L32: 305 - ldw r5, (r3, 0) 306 - ldw r11, (r3, 4) 307 - ldw r8, (r3, 8) 308 - ldw r9, (r3, 12) 309 - 310 - GET_FRONT_BITS r1 24 311 - mov r10, r5 312 - GET_AFTER_BITS r5 8 313 - or r5, r1 314 - 315 - GET_FRONT_BITS r10 24 316 - mov r1, r11 317 - GET_AFTER_BITS r11 8 318 - or r11, r10 319 - 320 - GET_FRONT_BITS r1 24 321 - mov r10, r8 322 - GET_AFTER_BITS r8 8 323 - or r8, r1 324 - 325 - GET_FRONT_BITS r10 24 326 - mov r1, r9 327 - GET_AFTER_BITS r9 8 328 - or r9, r10 329 - 330 - stw r5, (r7, 0) 331 - stw r11, (r7, 4) 332 - stw r8, (r7, 8) 333 - stw r9, (r7, 12) 334 - subi r4, 16 335 - addi r3, 16 336 - addi r7, 16 337 - cmplti r4, 16 338 - jbf .L32 339 - ldw r8, (sp, 0) 340 - ldw r9, (sp, 4) 341 - ldw r10, (sp, 8) 342 - ldw r11, (sp, 12) 343 - addi sp, 16 344 - cmplti r4, 4 345 - bf .L30 346 - subi r3, 1 347 - br .L_copy_by_byte
-6
arch/csky/abiv1/strksyms.c
··· 1 - // SPDX-License-Identifier: GPL-2.0 2 - // Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd. 3 - 4 - #include <linux/module.h> 5 - 6 - EXPORT_SYMBOL(memcpy);
+2
arch/csky/abiv2/Makefile
··· 2 2 obj-y += cacheflush.o 3 3 obj-$(CONFIG_CPU_HAS_FPU) += fpu.o 4 4 obj-y += memcmp.o 5 + ifeq ($(CONFIG_HAVE_EFFICIENT_UNALIGNED_STRING_OPS), y) 5 6 obj-y += memcpy.o 6 7 obj-y += memmove.o 7 8 obj-y += memset.o 9 + endif 8 10 obj-y += strcmp.o 9 11 obj-y += strcpy.o 10 12 obj-y += strlen.o
+3 -1
arch/csky/abiv2/strksyms.c
··· 3 3 4 4 #include <linux/module.h> 5 5 6 + #ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_STRING_OPS 6 7 EXPORT_SYMBOL(memcpy); 7 8 EXPORT_SYMBOL(memset); 8 - EXPORT_SYMBOL(memcmp); 9 9 EXPORT_SYMBOL(memmove); 10 + #endif 11 + EXPORT_SYMBOL(memcmp); 10 12 EXPORT_SYMBOL(strcmp); 11 13 EXPORT_SYMBOL(strcpy); 12 14 EXPORT_SYMBOL(strlen);
-1
arch/csky/boot/Makefile
··· 1 1 # SPDX-License-Identifier: GPL-2.0-only 2 2 targets := Image zImage uImage 3 - targets += $(dtb-y) 4 3 5 4 $(obj)/Image: vmlinux FORCE 6 5 $(call if_changed,objcopy)
+237
arch/csky/include/asm/atomic.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + 3 + #ifndef __ASM_CSKY_ATOMIC_H 4 + #define __ASM_CSKY_ATOMIC_H 5 + 6 + #ifdef CONFIG_SMP 7 + #include <asm-generic/atomic64.h> 8 + 9 + #include <asm/cmpxchg.h> 10 + #include <asm/barrier.h> 11 + 12 + #define __atomic_acquire_fence() __bar_brarw() 13 + 14 + #define __atomic_release_fence() __bar_brwaw() 15 + 16 + static __always_inline int arch_atomic_read(const atomic_t *v) 17 + { 18 + return READ_ONCE(v->counter); 19 + } 20 + static __always_inline void arch_atomic_set(atomic_t *v, int i) 21 + { 22 + WRITE_ONCE(v->counter, i); 23 + } 24 + 25 + #define ATOMIC_OP(op) \ 26 + static __always_inline \ 27 + void arch_atomic_##op(int i, atomic_t *v) \ 28 + { \ 29 + unsigned long tmp; \ 30 + __asm__ __volatile__ ( \ 31 + "1: ldex.w %0, (%2) \n" \ 32 + " " #op " %0, %1 \n" \ 33 + " stex.w %0, (%2) \n" \ 34 + " bez %0, 1b \n" \ 35 + : "=&r" (tmp) \ 36 + : "r" (i), "r" (&v->counter) \ 37 + : "memory"); \ 38 + } 39 + 40 + ATOMIC_OP(add) 41 + ATOMIC_OP(sub) 42 + ATOMIC_OP(and) 43 + ATOMIC_OP( or) 44 + ATOMIC_OP(xor) 45 + 46 + #undef ATOMIC_OP 47 + 48 + #define ATOMIC_FETCH_OP(op) \ 49 + static __always_inline \ 50 + int arch_atomic_fetch_##op##_relaxed(int i, atomic_t *v) \ 51 + { \ 52 + register int ret, tmp; \ 53 + __asm__ __volatile__ ( \ 54 + "1: ldex.w %0, (%3) \n" \ 55 + " mov %1, %0 \n" \ 56 + " " #op " %0, %2 \n" \ 57 + " stex.w %0, (%3) \n" \ 58 + " bez %0, 1b \n" \ 59 + : "=&r" (tmp), "=&r" (ret) \ 60 + : "r" (i), "r"(&v->counter) \ 61 + : "memory"); \ 62 + return ret; \ 63 + } 64 + 65 + #define ATOMIC_OP_RETURN(op, c_op) \ 66 + static __always_inline \ 67 + int arch_atomic_##op##_return_relaxed(int i, atomic_t *v) \ 68 + { \ 69 + return arch_atomic_fetch_##op##_relaxed(i, v) c_op i; \ 70 + } 71 + 72 + #define ATOMIC_OPS(op, c_op) \ 73 + ATOMIC_FETCH_OP(op) \ 74 + ATOMIC_OP_RETURN(op, c_op) 75 + 76 + ATOMIC_OPS(add, +) 77 + ATOMIC_OPS(sub, -) 78 + 79 + #define arch_atomic_fetch_add_relaxed arch_atomic_fetch_add_relaxed 80 + #define arch_atomic_fetch_sub_relaxed arch_atomic_fetch_sub_relaxed 81 + 82 + #define arch_atomic_add_return_relaxed arch_atomic_add_return_relaxed 83 + #define arch_atomic_sub_return_relaxed arch_atomic_sub_return_relaxed 84 + 85 + #undef ATOMIC_OPS 86 + #undef ATOMIC_OP_RETURN 87 + 88 + #define ATOMIC_OPS(op) \ 89 + ATOMIC_FETCH_OP(op) 90 + 91 + ATOMIC_OPS(and) 92 + ATOMIC_OPS( or) 93 + ATOMIC_OPS(xor) 94 + 95 + #define arch_atomic_fetch_and_relaxed arch_atomic_fetch_and_relaxed 96 + #define arch_atomic_fetch_or_relaxed arch_atomic_fetch_or_relaxed 97 + #define arch_atomic_fetch_xor_relaxed arch_atomic_fetch_xor_relaxed 98 + 99 + #undef ATOMIC_OPS 100 + 101 + #undef ATOMIC_FETCH_OP 102 + 103 + static __always_inline int 104 + arch_atomic_fetch_add_unless(atomic_t *v, int a, int u) 105 + { 106 + int prev, tmp; 107 + 108 + __asm__ __volatile__ ( 109 + RELEASE_FENCE 110 + "1: ldex.w %0, (%3) \n" 111 + " cmpne %0, %4 \n" 112 + " bf 2f \n" 113 + " mov %1, %0 \n" 114 + " add %1, %2 \n" 115 + " stex.w %1, (%3) \n" 116 + " bez %1, 1b \n" 117 + FULL_FENCE 118 + "2:\n" 119 + : "=&r" (prev), "=&r" (tmp) 120 + : "r" (a), "r" (&v->counter), "r" (u) 121 + : "memory"); 122 + 123 + return prev; 124 + } 125 + #define arch_atomic_fetch_add_unless arch_atomic_fetch_add_unless 126 + 127 + static __always_inline bool 128 + arch_atomic_inc_unless_negative(atomic_t *v) 129 + { 130 + int rc, tmp; 131 + 132 + __asm__ __volatile__ ( 133 + RELEASE_FENCE 134 + "1: ldex.w %0, (%2) \n" 135 + " movi %1, 0 \n" 136 + " blz %0, 2f \n" 137 + " movi %1, 1 \n" 138 + " addi %0, 1 \n" 139 + " stex.w %0, (%2) \n" 140 + " bez %0, 1b \n" 141 + FULL_FENCE 142 + "2:\n" 143 + : "=&r" (tmp), "=&r" (rc) 144 + : "r" (&v->counter) 145 + : "memory"); 146 + 147 + return tmp ? true : false; 148 + 149 + } 150 + #define arch_atomic_inc_unless_negative arch_atomic_inc_unless_negative 151 + 152 + static __always_inline bool 153 + arch_atomic_dec_unless_positive(atomic_t *v) 154 + { 155 + int rc, tmp; 156 + 157 + __asm__ __volatile__ ( 158 + RELEASE_FENCE 159 + "1: ldex.w %0, (%2) \n" 160 + " movi %1, 0 \n" 161 + " bhz %0, 2f \n" 162 + " movi %1, 1 \n" 163 + " subi %0, 1 \n" 164 + " stex.w %0, (%2) \n" 165 + " bez %0, 1b \n" 166 + FULL_FENCE 167 + "2:\n" 168 + : "=&r" (tmp), "=&r" (rc) 169 + : "r" (&v->counter) 170 + : "memory"); 171 + 172 + return tmp ? true : false; 173 + } 174 + #define arch_atomic_dec_unless_positive arch_atomic_dec_unless_positive 175 + 176 + static __always_inline int 177 + arch_atomic_dec_if_positive(atomic_t *v) 178 + { 179 + int dec, tmp; 180 + 181 + __asm__ __volatile__ ( 182 + RELEASE_FENCE 183 + "1: ldex.w %0, (%2) \n" 184 + " subi %1, %0, 1 \n" 185 + " blz %1, 2f \n" 186 + " stex.w %1, (%2) \n" 187 + " bez %1, 1b \n" 188 + FULL_FENCE 189 + "2:\n" 190 + : "=&r" (dec), "=&r" (tmp) 191 + : "r" (&v->counter) 192 + : "memory"); 193 + 194 + return dec - 1; 195 + } 196 + #define arch_atomic_dec_if_positive arch_atomic_dec_if_positive 197 + 198 + #define ATOMIC_OP() \ 199 + static __always_inline \ 200 + int arch_atomic_xchg_relaxed(atomic_t *v, int n) \ 201 + { \ 202 + return __xchg_relaxed(n, &(v->counter), 4); \ 203 + } \ 204 + static __always_inline \ 205 + int arch_atomic_cmpxchg_relaxed(atomic_t *v, int o, int n) \ 206 + { \ 207 + return __cmpxchg_relaxed(&(v->counter), o, n, 4); \ 208 + } \ 209 + static __always_inline \ 210 + int arch_atomic_cmpxchg_acquire(atomic_t *v, int o, int n) \ 211 + { \ 212 + return __cmpxchg_acquire(&(v->counter), o, n, 4); \ 213 + } \ 214 + static __always_inline \ 215 + int arch_atomic_cmpxchg(atomic_t *v, int o, int n) \ 216 + { \ 217 + return __cmpxchg(&(v->counter), o, n, 4); \ 218 + } 219 + 220 + #define ATOMIC_OPS() \ 221 + ATOMIC_OP() 222 + 223 + ATOMIC_OPS() 224 + 225 + #define arch_atomic_xchg_relaxed arch_atomic_xchg_relaxed 226 + #define arch_atomic_cmpxchg_relaxed arch_atomic_cmpxchg_relaxed 227 + #define arch_atomic_cmpxchg_acquire arch_atomic_cmpxchg_acquire 228 + #define arch_atomic_cmpxchg arch_atomic_cmpxchg 229 + 230 + #undef ATOMIC_OPS 231 + #undef ATOMIC_OP 232 + 233 + #else 234 + #include <asm-generic/atomic.h> 235 + #endif 236 + 237 + #endif /* __ASM_CSKY_ATOMIC_H */
+7 -4
arch/csky/include/asm/barrier.h
··· 37 37 * bar.brar 38 38 * bar.bwaw 39 39 */ 40 + #define FULL_FENCE ".long 0x842fc000\n" 41 + #define ACQUIRE_FENCE ".long 0x8427c000\n" 42 + #define RELEASE_FENCE ".long 0x842ec000\n" 43 + 40 44 #define __bar_brw() asm volatile (".long 0x842cc000\n":::"memory") 41 45 #define __bar_br() asm volatile (".long 0x8424c000\n":::"memory") 42 46 #define __bar_bw() asm volatile (".long 0x8428c000\n":::"memory") 43 47 #define __bar_arw() asm volatile (".long 0x8423c000\n":::"memory") 44 48 #define __bar_ar() asm volatile (".long 0x8421c000\n":::"memory") 45 49 #define __bar_aw() asm volatile (".long 0x8422c000\n":::"memory") 46 - #define __bar_brwarw() asm volatile (".long 0x842fc000\n":::"memory") 47 - #define __bar_brarw() asm volatile (".long 0x8427c000\n":::"memory") 50 + #define __bar_brwarw() asm volatile (FULL_FENCE:::"memory") 51 + #define __bar_brarw() asm volatile (ACQUIRE_FENCE:::"memory") 48 52 #define __bar_bwarw() asm volatile (".long 0x842bc000\n":::"memory") 49 53 #define __bar_brwar() asm volatile (".long 0x842dc000\n":::"memory") 50 - #define __bar_brwaw() asm volatile (".long 0x842ec000\n":::"memory") 54 + #define __bar_brwaw() asm volatile (RELEASE_FENCE:::"memory") 51 55 #define __bar_brar() asm volatile (".long 0x8425c000\n":::"memory") 52 56 #define __bar_brar() asm volatile (".long 0x8425c000\n":::"memory") 53 57 #define __bar_bwaw() asm volatile (".long 0x842ac000\n":::"memory") ··· 60 56 #define __smp_rmb() __bar_brar() 61 57 #define __smp_wmb() __bar_bwaw() 62 58 63 - #define ACQUIRE_FENCE ".long 0x8427c000\n" 64 59 #define __smp_acquire_fence() __bar_brarw() 65 60 #define __smp_release_fence() __bar_brwaw() 66 61
+60 -4
arch/csky/include/asm/cmpxchg.h
··· 64 64 #define arch_cmpxchg_relaxed(ptr, o, n) \ 65 65 (__cmpxchg_relaxed((ptr), (o), (n), sizeof(*(ptr)))) 66 66 67 - #define arch_cmpxchg(ptr, o, n) \ 67 + #define __cmpxchg_acquire(ptr, old, new, size) \ 68 68 ({ \ 69 + __typeof__(ptr) __ptr = (ptr); \ 70 + __typeof__(new) __new = (new); \ 71 + __typeof__(new) __tmp; \ 72 + __typeof__(old) __old = (old); \ 69 73 __typeof__(*(ptr)) __ret; \ 70 - __smp_release_fence(); \ 71 - __ret = arch_cmpxchg_relaxed(ptr, o, n); \ 72 - __smp_acquire_fence(); \ 74 + switch (size) { \ 75 + case 4: \ 76 + asm volatile ( \ 77 + "1: ldex.w %0, (%3) \n" \ 78 + " cmpne %0, %4 \n" \ 79 + " bt 2f \n" \ 80 + " mov %1, %2 \n" \ 81 + " stex.w %1, (%3) \n" \ 82 + " bez %1, 1b \n" \ 83 + ACQUIRE_FENCE \ 84 + "2: \n" \ 85 + : "=&r" (__ret), "=&r" (__tmp) \ 86 + : "r" (__new), "r"(__ptr), "r"(__old) \ 87 + :); \ 88 + break; \ 89 + default: \ 90 + __bad_xchg(); \ 91 + } \ 73 92 __ret; \ 74 93 }) 75 94 95 + #define arch_cmpxchg_acquire(ptr, o, n) \ 96 + (__cmpxchg_acquire((ptr), (o), (n), sizeof(*(ptr)))) 97 + 98 + #define __cmpxchg(ptr, old, new, size) \ 99 + ({ \ 100 + __typeof__(ptr) __ptr = (ptr); \ 101 + __typeof__(new) __new = (new); \ 102 + __typeof__(new) __tmp; \ 103 + __typeof__(old) __old = (old); \ 104 + __typeof__(*(ptr)) __ret; \ 105 + switch (size) { \ 106 + case 4: \ 107 + asm volatile ( \ 108 + RELEASE_FENCE \ 109 + "1: ldex.w %0, (%3) \n" \ 110 + " cmpne %0, %4 \n" \ 111 + " bt 2f \n" \ 112 + " mov %1, %2 \n" \ 113 + " stex.w %1, (%3) \n" \ 114 + " bez %1, 1b \n" \ 115 + FULL_FENCE \ 116 + "2: \n" \ 117 + : "=&r" (__ret), "=&r" (__tmp) \ 118 + : "r" (__new), "r"(__ptr), "r"(__old) \ 119 + :); \ 120 + break; \ 121 + default: \ 122 + __bad_xchg(); \ 123 + } \ 124 + __ret; \ 125 + }) 126 + 127 + #define arch_cmpxchg(ptr, o, n) \ 128 + (__cmpxchg((ptr), (o), (n), sizeof(*(ptr)))) 129 + 130 + #define arch_cmpxchg_local(ptr, o, n) \ 131 + (__cmpxchg_relaxed((ptr), (o), (n), sizeof(*(ptr)))) 76 132 #else 77 133 #include <asm-generic/cmpxchg.h> 78 134 #endif
+11 -1
arch/csky/include/asm/io.h
··· 5 5 6 6 #include <linux/pgtable.h> 7 7 #include <linux/types.h> 8 - #include <linux/version.h> 9 8 10 9 /* 11 10 * I/O memory access primitives. Reads are ordered relative to any ··· 30 31 #define writew(v,c) ({ wmb(); writew_relaxed((v),(c)); mb(); }) 31 32 #define writel(v,c) ({ wmb(); writel_relaxed((v),(c)); mb(); }) 32 33 #endif 34 + 35 + /* 36 + * String version of I/O memory access operations. 37 + */ 38 + extern void __memcpy_fromio(void *, const volatile void __iomem *, size_t); 39 + extern void __memcpy_toio(volatile void __iomem *, const void *, size_t); 40 + extern void __memset_io(volatile void __iomem *, int, size_t); 41 + 42 + #define memset_io(c,v,l) __memset_io((c),(v),(l)) 43 + #define memcpy_fromio(a,c,l) __memcpy_fromio((a),(c),(l)) 44 + #define memcpy_toio(c,a,l) __memcpy_toio((c),(a),(l)) 33 45 34 46 /* 35 47 * I/O memory mapping functions.
+1 -1
arch/csky/kernel/Makefile
··· 2 2 extra-y := head.o vmlinux.lds 3 3 4 4 obj-y += entry.o atomic.o signal.o traps.o irq.o time.o vdso.o vdso/ 5 - obj-y += power.o syscall.o syscall_table.o setup.o 5 + obj-y += power.o syscall.o syscall_table.o setup.o io.o 6 6 obj-y += process.o cpu-probe.o ptrace.o stacktrace.o 7 7 obj-y += probes/ 8 8
+91
arch/csky/kernel/io.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + 3 + #include <linux/export.h> 4 + #include <linux/types.h> 5 + #include <linux/io.h> 6 + 7 + /* 8 + * Copy data from IO memory space to "real" memory space. 9 + */ 10 + void __memcpy_fromio(void *to, const volatile void __iomem *from, size_t count) 11 + { 12 + while (count && !IS_ALIGNED((unsigned long)from, 4)) { 13 + *(u8 *)to = __raw_readb(from); 14 + from++; 15 + to++; 16 + count--; 17 + } 18 + 19 + while (count >= 4) { 20 + *(u32 *)to = __raw_readl(from); 21 + from += 4; 22 + to += 4; 23 + count -= 4; 24 + } 25 + 26 + while (count) { 27 + *(u8 *)to = __raw_readb(from); 28 + from++; 29 + to++; 30 + count--; 31 + } 32 + } 33 + EXPORT_SYMBOL(__memcpy_fromio); 34 + 35 + /* 36 + * Copy data from "real" memory space to IO memory space. 37 + */ 38 + void __memcpy_toio(volatile void __iomem *to, const void *from, size_t count) 39 + { 40 + while (count && !IS_ALIGNED((unsigned long)to, 4)) { 41 + __raw_writeb(*(u8 *)from, to); 42 + from++; 43 + to++; 44 + count--; 45 + } 46 + 47 + while (count >= 4) { 48 + __raw_writel(*(u32 *)from, to); 49 + from += 4; 50 + to += 4; 51 + count -= 4; 52 + } 53 + 54 + while (count) { 55 + __raw_writeb(*(u8 *)from, to); 56 + from++; 57 + to++; 58 + count--; 59 + } 60 + } 61 + EXPORT_SYMBOL(__memcpy_toio); 62 + 63 + /* 64 + * "memset" on IO memory space. 65 + */ 66 + void __memset_io(volatile void __iomem *dst, int c, size_t count) 67 + { 68 + u32 qc = (u8)c; 69 + 70 + qc |= qc << 8; 71 + qc |= qc << 16; 72 + 73 + while (count && !IS_ALIGNED((unsigned long)dst, 4)) { 74 + __raw_writeb(c, dst); 75 + dst++; 76 + count--; 77 + } 78 + 79 + while (count >= 4) { 80 + __raw_writel(qc, dst); 81 + dst += 4; 82 + count -= 4; 83 + } 84 + 85 + while (count) { 86 + __raw_writeb(c, dst); 87 + dst++; 88 + count--; 89 + } 90 + } 91 + EXPORT_SYMBOL(__memset_io);
+1 -1
arch/csky/kernel/module.c
··· 68 68 *location = rel[i].r_addend + sym->st_value; 69 69 break; 70 70 case R_CSKY_PC32: 71 - /* Add the value, subtract its postition */ 71 + /* Add the value, subtract its position */ 72 72 *location = rel[i].r_addend + sym->st_value 73 73 - (uint32_t)location; 74 74 break;
+1 -1
arch/csky/kernel/probes/kprobes.c
··· 30 30 struct csky_insn_patch *param = priv; 31 31 unsigned int addr = (unsigned int)param->addr; 32 32 33 - if (atomic_inc_return(&param->cpu_count) == 1) { 33 + if (atomic_inc_return(&param->cpu_count) == num_online_cpus()) { 34 34 *(u16 *) addr = cpu_to_le16(param->opcode); 35 35 dcache_wb_range(addr, addr + 2); 36 36 atomic_inc(&param->cpu_count);
+1 -1
arch/csky/kernel/probes/uprobes.c
··· 102 102 struct uprobe_task *utask = current->utask; 103 103 104 104 /* 105 - * Task has received a fatal signal, so reset back to probbed 105 + * Task has received a fatal signal, so reset back to probed 106 106 * address. 107 107 */ 108 108 instruction_pointer_set(regs, utask->vaddr);
-1
arch/csky/kernel/process.c
··· 2 2 // Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd. 3 3 4 4 #include <linux/module.h> 5 - #include <linux/version.h> 6 5 #include <linux/sched.h> 7 6 #include <linux/sched/task_stack.h> 8 7 #include <linux/sched/debug.h>
+3
arch/csky/lib/Makefile
··· 1 1 # SPDX-License-Identifier: GPL-2.0-only 2 2 lib-y := usercopy.o delay.o 3 3 obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o 4 + ifneq ($(CONFIG_HAVE_EFFICIENT_UNALIGNED_STRING_OPS), y) 5 + lib-y += string.o 6 + endif
+134
arch/csky/lib/string.c
··· 1 + // SPDX-License-Identifier: GPL-2.0-only 2 + /* 3 + * String functions optimized for hardware which doesn't 4 + * handle unaligned memory accesses efficiently. 5 + * 6 + * Copyright (C) 2021 Matteo Croce 7 + */ 8 + 9 + #include <linux/types.h> 10 + #include <linux/module.h> 11 + 12 + /* Minimum size for a word copy to be convenient */ 13 + #define BYTES_LONG sizeof(long) 14 + #define WORD_MASK (BYTES_LONG - 1) 15 + #define MIN_THRESHOLD (BYTES_LONG * 2) 16 + 17 + /* convenience union to avoid cast between different pointer types */ 18 + union types { 19 + u8 *as_u8; 20 + unsigned long *as_ulong; 21 + uintptr_t as_uptr; 22 + }; 23 + 24 + union const_types { 25 + const u8 *as_u8; 26 + unsigned long *as_ulong; 27 + uintptr_t as_uptr; 28 + }; 29 + 30 + void *memcpy(void *dest, const void *src, size_t count) 31 + { 32 + union const_types s = { .as_u8 = src }; 33 + union types d = { .as_u8 = dest }; 34 + int distance = 0; 35 + 36 + if (count < MIN_THRESHOLD) 37 + goto copy_remainder; 38 + 39 + /* Copy a byte at time until destination is aligned. */ 40 + for (; d.as_uptr & WORD_MASK; count--) 41 + *d.as_u8++ = *s.as_u8++; 42 + 43 + distance = s.as_uptr & WORD_MASK; 44 + 45 + if (distance) { 46 + unsigned long last, next; 47 + 48 + /* 49 + * s is distance bytes ahead of d, and d just reached 50 + * the alignment boundary. Move s backward to word align it 51 + * and shift data to compensate for distance, in order to do 52 + * word-by-word copy. 53 + */ 54 + s.as_u8 -= distance; 55 + 56 + next = s.as_ulong[0]; 57 + for (; count >= BYTES_LONG; count -= BYTES_LONG) { 58 + last = next; 59 + next = s.as_ulong[1]; 60 + 61 + d.as_ulong[0] = last >> (distance * 8) | 62 + next << ((BYTES_LONG - distance) * 8); 63 + 64 + d.as_ulong++; 65 + s.as_ulong++; 66 + } 67 + 68 + /* Restore s with the original offset. */ 69 + s.as_u8 += distance; 70 + } else { 71 + /* 72 + * If the source and dest lower bits are the same, do a simple 73 + * 32/64 bit wide copy. 74 + */ 75 + for (; count >= BYTES_LONG; count -= BYTES_LONG) 76 + *d.as_ulong++ = *s.as_ulong++; 77 + } 78 + 79 + copy_remainder: 80 + while (count--) 81 + *d.as_u8++ = *s.as_u8++; 82 + 83 + return dest; 84 + } 85 + EXPORT_SYMBOL(memcpy); 86 + 87 + /* 88 + * Simply check if the buffer overlaps an call memcpy() in case, 89 + * otherwise do a simple one byte at time backward copy. 90 + */ 91 + void *memmove(void *dest, const void *src, size_t count) 92 + { 93 + if (dest < src || src + count <= dest) 94 + return memcpy(dest, src, count); 95 + 96 + if (dest > src) { 97 + const char *s = src + count; 98 + char *tmp = dest + count; 99 + 100 + while (count--) 101 + *--tmp = *--s; 102 + } 103 + return dest; 104 + } 105 + EXPORT_SYMBOL(memmove); 106 + 107 + void *memset(void *s, int c, size_t count) 108 + { 109 + union types dest = { .as_u8 = s }; 110 + 111 + if (count >= MIN_THRESHOLD) { 112 + unsigned long cu = (unsigned long)c; 113 + 114 + /* Compose an ulong with 'c' repeated 4/8 times */ 115 + cu |= cu << 8; 116 + cu |= cu << 16; 117 + /* Suppress warning on 32 bit machines */ 118 + cu |= (cu << 16) << 16; 119 + 120 + for (; count && dest.as_uptr & WORD_MASK; count--) 121 + *dest.as_u8++ = c; 122 + 123 + /* Copy using the largest size allowed */ 124 + for (; count >= BYTES_LONG; count -= BYTES_LONG) 125 + *dest.as_ulong++ = cu; 126 + } 127 + 128 + /* copy the remainder */ 129 + while (count--) 130 + *dest.as_u8++ = c; 131 + 132 + return s; 133 + } 134 + EXPORT_SYMBOL(memset);
-1
arch/csky/mm/dma-mapping.c
··· 9 9 #include <linux/mm.h> 10 10 #include <linux/scatterlist.h> 11 11 #include <linux/types.h> 12 - #include <linux/version.h> 13 12 #include <asm/cache.h> 14 13 15 14 static inline void cache_op(phys_addr_t paddr, size_t size,