xtensa: clean up word alignment macros in assembly code

Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git

kernel os linux

Remove duplicate definitions of ALIGN/src_b/__src_b and SSA8/ssa8/__ssa8
from assembly sources and put single definition into asm/asmmacro.h

Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>

Max Filippov 8 years ago fbb871e2 0013aceb

+59 -52

4 changed files

expand all

arch

xtensa

include

asm

asmmacro.h

kernel

align.S

lib

memcopy.S

usercopy.S

+33

arch/xtensa/include/asm/asmmacro.h

··· 158 158 .previous \ 159 159 97: 160 160 161 + 162 + /* 163 + * Extract unaligned word that is split between two registers w0 and w1 164 + * into r regardless of machine endianness. SAR must be loaded with the 165 + * starting bit of the word (see __ssa8). 166 + */ 167 + 168 + .macro __src_b r, w0, w1 169 + #ifdef __XTENSA_EB__ 170 + src \r, \w0, \w1 171 + #else 172 + src \r, \w1, \w0 173 + #endif 174 + .endm 175 + 176 + /* 177 + * Load 2 lowest address bits of r into SAR for __src_b to extract unaligned 178 + * word starting at r from two registers loaded from consecutive aligned 179 + * addresses covering r regardless of machine endianness. 180 + * 181 + * r 0 1 2 3 182 + * LE SAR 0 8 16 24 183 + * BE SAR 32 24 16 8 184 + */ 185 + 186 + .macro __ssa8 r 187 + #ifdef __XTENSA_EB__ 188 + ssa8b \r 189 + #else 190 + ssa8l \r 191 + #endif 192 + .endm 193 + 161 194 #endif /* _XTENSA_ASMMACRO_H */

+1 -4

arch/xtensa/kernel/align.S

··· 19 19 #include <linux/linkage.h> 20 20 #include <asm/current.h> 21 21 #include <asm/asm-offsets.h> 22 + #include <asm/asmmacro.h> 22 23 #include <asm/processor.h> 23 24 24 25 #if XCHAL_UNALIGNED_LOAD_EXCEPTION || XCHAL_UNALIGNED_STORE_EXCEPTION ··· 67 66 #define INSN_T 24 68 67 #define INSN_OP1 16 69 68 70 - .macro __src_b r, w0, w1; src \r, \w0, \w1; .endm 71 - .macro __ssa8 r; ssa8b \r; .endm 72 69 .macro __ssa8r r; ssa8l \r; .endm 73 70 .macro __sh r, s; srl \r, \s; .endm 74 71 .macro __sl r, s; sll \r, \s; .endm ··· 80 81 #define INSN_T 4 81 82 #define INSN_OP1 12 82 83 83 - .macro __src_b r, w0, w1; src \r, \w1, \w0; .endm 84 - .macro __ssa8 r; ssa8l \r; .endm 85 84 .macro __ssa8r r; ssa8b \r; .endm 86 85 .macro __sh r, s; sll \r, \s; .endm 87 86 .macro __sl r, s; srl \r, \s; .endm

+17 -32

arch/xtensa/lib/memcopy.S

··· 10 10 */ 11 11 12 12 #include <variant/core.h> 13 - 14 - .macro src_b r, w0, w1 15 - #ifdef __XTENSA_EB__ 16 - src \r, \w0, \w1 17 - #else 18 - src \r, \w1, \w0 19 - #endif 20 - .endm 21 - 22 - .macro ssa8 r 23 - #ifdef __XTENSA_EB__ 24 - ssa8b \r 25 - #else 26 - ssa8l \r 27 - #endif 28 - .endm 13 + #include <asm/asmmacro.h> 29 14 30 15 /* 31 16 * void *memcpy(void *dst, const void *src, size_t len); ··· 194 209 .Lsrcunaligned: 195 210 _beqz a4, .Ldone # avoid loading anything for zero-length copies 196 211 # copy 16 bytes per iteration for word-aligned dst and unaligned src 197 - ssa8 a3 # set shift amount from byte offset 212 + __ssa8 a3 # set shift amount from byte offset 198 213 199 214 /* set to 1 when running on ISS (simulator) with the 200 215 lint or ferret client, or 0 to save a few cycles */ ··· 214 229 .Loop2: 215 230 l32i a7, a3, 4 216 231 l32i a8, a3, 8 217 - src_b a6, a6, a7 232 + __src_b a6, a6, a7 218 233 s32i a6, a5, 0 219 234 l32i a9, a3, 12 220 - src_b a7, a7, a8 235 + __src_b a7, a7, a8 221 236 s32i a7, a5, 4 222 237 l32i a6, a3, 16 223 - src_b a8, a8, a9 238 + __src_b a8, a8, a9 224 239 s32i a8, a5, 8 225 240 addi a3, a3, 16 226 - src_b a9, a9, a6 241 + __src_b a9, a9, a6 227 242 s32i a9, a5, 12 228 243 addi a5, a5, 16 229 244 #if !XCHAL_HAVE_LOOPS ··· 234 249 # copy 8 bytes 235 250 l32i a7, a3, 4 236 251 l32i a8, a3, 8 237 - src_b a6, a6, a7 252 + __src_b a6, a6, a7 238 253 s32i a6, a5, 0 239 254 addi a3, a3, 8 240 - src_b a7, a7, a8 255 + __src_b a7, a7, a8 241 256 s32i a7, a5, 4 242 257 addi a5, a5, 8 243 258 mov a6, a8 ··· 246 261 # copy 4 bytes 247 262 l32i a7, a3, 4 248 263 addi a3, a3, 4 249 - src_b a6, a6, a7 264 + __src_b a6, a6, a7 250 265 s32i a6, a5, 0 251 266 addi a5, a5, 4 252 267 mov a6, a7 ··· 470 485 .Lbacksrcunaligned: 471 486 _beqz a4, .Lbackdone # avoid loading anything for zero-length copies 472 487 # copy 16 bytes per iteration for word-aligned dst and unaligned src 473 - ssa8 a3 # set shift amount from byte offset 488 + __ssa8 a3 # set shift amount from byte offset 474 489 #define SIM_CHECKS_ALIGNMENT 1 /* set to 1 when running on ISS with 475 490 * the lint or ferret client, or 0 476 491 * to save a few cycles */ ··· 491 506 l32i a7, a3, 12 492 507 l32i a8, a3, 8 493 508 addi a5, a5, -16 494 - src_b a6, a7, a6 509 + __src_b a6, a7, a6 495 510 s32i a6, a5, 12 496 511 l32i a9, a3, 4 497 - src_b a7, a8, a7 512 + __src_b a7, a8, a7 498 513 s32i a7, a5, 8 499 514 l32i a6, a3, 0 500 - src_b a8, a9, a8 515 + __src_b a8, a9, a8 501 516 s32i a8, a5, 4 502 - src_b a9, a6, a9 517 + __src_b a9, a6, a9 503 518 s32i a9, a5, 0 504 519 #if !XCHAL_HAVE_LOOPS 505 520 bne a3, a10, .backLoop2 # continue loop if a3:src != a10:src_start ··· 511 526 l32i a7, a3, 4 512 527 l32i a8, a3, 0 513 528 addi a5, a5, -8 514 - src_b a6, a7, a6 529 + __src_b a6, a7, a6 515 530 s32i a6, a5, 4 516 - src_b a7, a8, a7 531 + __src_b a7, a8, a7 517 532 s32i a7, a5, 0 518 533 mov a6, a8 519 534 .Lback12: ··· 522 537 addi a3, a3, -4 523 538 l32i a7, a3, 0 524 539 addi a5, a5, -4 525 - src_b a6, a7, a6 540 + __src_b a6, a7, a6 526 541 s32i a6, a5, 0 527 542 mov a6, a7 528 543 .Lback13:

+8 -16

arch/xtensa/lib/usercopy.S

··· 56 56 #include <variant/core.h> 57 57 #include <asm/asmmacro.h> 58 58 59 - #ifdef __XTENSA_EB__ 60 - #define ALIGN(R, W0, W1) src R, W0, W1 61 - #define SSA8(R) ssa8b R 62 - #else 63 - #define ALIGN(R, W0, W1) src R, W1, W0 64 - #define SSA8(R) ssa8l R 65 - #endif 66 - 67 59 .text 68 60 .align 4 69 61 .global __xtensa_copy_user ··· 73 81 # per iteration 74 82 movi a8, 3 # if source is also aligned, 75 83 bnone a3, a8, .Laligned # then use word copy 76 - SSA8( a3) # set shift amount from byte offset 84 + __ssa8 a3 # set shift amount from byte offset 77 85 bnez a4, .Lsrcunaligned 78 86 movi a2, 0 # return success for len==0 79 87 retw ··· 212 220 .Loop2: 213 221 EX(10f) l32i a7, a3, 4 214 222 EX(10f) l32i a8, a3, 8 215 - ALIGN( a6, a6, a7) 223 + __src_b a6, a6, a7 216 224 EX(10f) s32i a6, a5, 0 217 225 EX(10f) l32i a9, a3, 12 218 - ALIGN( a7, a7, a8) 226 + __src_b a7, a7, a8 219 227 EX(10f) s32i a7, a5, 4 220 228 EX(10f) l32i a6, a3, 16 221 - ALIGN( a8, a8, a9) 229 + __src_b a8, a8, a9 222 230 EX(10f) s32i a8, a5, 8 223 231 addi a3, a3, 16 224 - ALIGN( a9, a9, a6) 232 + __src_b a9, a9, a6 225 233 EX(10f) s32i a9, a5, 12 226 234 addi a5, a5, 16 227 235 #if !XCHAL_HAVE_LOOPS ··· 232 240 # copy 8 bytes 233 241 EX(10f) l32i a7, a3, 4 234 242 EX(10f) l32i a8, a3, 8 235 - ALIGN( a6, a6, a7) 243 + __src_b a6, a6, a7 236 244 EX(10f) s32i a6, a5, 0 237 245 addi a3, a3, 8 238 - ALIGN( a7, a7, a8) 246 + __src_b a7, a7, a8 239 247 EX(10f) s32i a7, a5, 4 240 248 addi a5, a5, 8 241 249 mov a6, a8 ··· 244 252 # copy 4 bytes 245 253 EX(10f) l32i a7, a3, 4 246 254 addi a3, a3, 4 247 - ALIGN( a6, a6, a7) 255 + __src_b a6, a6, a7 248 256 EX(10f) s32i a6, a5, 0 249 257 addi a5, a5, 4 250 258 mov a6, a7