Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

[POWERPC] Use mtocrf instruction in asm when CONFIG_POWER4_ONLY=y

mtocrf is a faster single-field mtcrf (move to condition register
fields) instruction available in POWER4 and later processors. It can
make quite a difference in performance on some implementations, so use
it for CONFIG_POWER4_ONLY builds.

Signed-off-by: Olof Johansson <olof@lixom.net>
Signed-off-by: Paul Mackerras <paulus@samba.org>

authored by

Olof Johansson and committed by
Paul Mackerras
3467bfd3 56997559

+19 -9
+3 -3
arch/powerpc/lib/copyuser_64.S
··· 24 24 dcbt 0,r4 25 25 beq .Lcopy_page_4K 26 26 andi. r6,r6,7 27 - mtcrf 0x01,r5 27 + PPC_MTOCRF 0x01,r5 28 28 blt cr1,.Lshort_copy 29 29 bne .Ldst_unaligned 30 30 .Ldst_aligned: ··· 135 135 b .Ldo_tail 136 136 137 137 .Ldst_unaligned: 138 - mtcrf 0x01,r6 /* put #bytes to 8B bdry into cr7 */ 138 + PPC_MTOCRF 0x01,r6 /* put #bytes to 8B bdry into cr7 */ 139 139 subf r5,r6,r5 140 140 li r7,0 141 141 cmpldi r1,r5,16 ··· 150 150 2: bf cr7*4+1,3f 151 151 37: lwzx r0,r7,r4 152 152 83: stwx r0,r7,r3 153 - 3: mtcrf 0x01,r5 153 + 3: PPC_MTOCRF 0x01,r5 154 154 add r4,r6,r4 155 155 add r3,r6,r3 156 156 b .Ldst_aligned
+3 -3
arch/powerpc/lib/mem_64.S
··· 19 19 rlwimi r4,r4,16,0,15 20 20 cmplw cr1,r5,r0 /* do we get that far? */ 21 21 rldimi r4,r4,32,0 22 - mtcrf 1,r0 22 + PPC_MTOCRF 1,r0 23 23 mr r6,r3 24 24 blt cr1,8f 25 25 beq+ 3f /* if already 8-byte aligned */ ··· 49 49 bdnz 4b 50 50 5: srwi. r0,r5,3 51 51 clrlwi r5,r5,29 52 - mtcrf 1,r0 52 + PPC_MTOCRF 1,r0 53 53 beq 8f 54 54 bf 29,6f 55 55 std r4,0(r6) ··· 65 65 std r4,0(r6) 66 66 addi r6,r6,8 67 67 8: cmpwi r5,0 68 - mtcrf 1,r5 68 + PPC_MTOCRF 1,r5 69 69 beqlr+ 70 70 bf 29,9f 71 71 stw r4,0(r6)
+3 -3
arch/powerpc/lib/memcpy_64.S
··· 12 12 .align 7 13 13 _GLOBAL(memcpy) 14 14 std r3,48(r1) /* save destination pointer for return value */ 15 - mtcrf 0x01,r5 15 + PPC_MTOCRF 0x01,r5 16 16 cmpldi cr1,r5,16 17 17 neg r6,r3 # LS 3 bits = # bytes to 8-byte dest bdry 18 18 andi. r6,r6,7 ··· 128 128 b .Ldo_tail 129 129 130 130 .Ldst_unaligned: 131 - mtcrf 0x01,r6 # put #bytes to 8B bdry into cr7 131 + PPC_MTOCRF 0x01,r6 # put #bytes to 8B bdry into cr7 132 132 subf r5,r6,r5 133 133 li r7,0 134 134 cmpldi r1,r5,16 ··· 143 143 2: bf cr7*4+1,3f 144 144 lwzx r0,r7,r4 145 145 stwx r0,r7,r3 146 - 3: mtcrf 0x01,r5 146 + 3: PPC_MTOCRF 0x01,r5 147 147 add r4,r6,r4 148 148 add r3,r6,r3 149 149 b .Ldst_aligned
+10
include/asm-powerpc/asm-compat.h
··· 78 78 #define PPC_STLCX stringify_in_c(stdcx.) 79 79 #define PPC_CNTLZL stringify_in_c(cntlzd) 80 80 81 + /* Move to CR, single-entry optimized version. Only available 82 + * on POWER4 and later. 83 + */ 84 + #ifdef CONFIG_POWER4_ONLY 85 + #define PPC_MTOCRF stringify_in_c(mtocrf) 86 + #else 87 + #define PPC_MTOCRF stringify_in_c(mtcrf) 88 + #endif 89 + 81 90 #else /* 32-bit */ 82 91 83 92 /* operations for longs and pointers */ ··· 98 89 #define PPC_LLARX stringify_in_c(lwarx) 99 90 #define PPC_STLCX stringify_in_c(stwcx.) 100 91 #define PPC_CNTLZL stringify_in_c(cntlzw) 92 + #define PPC_MTOCRF stringify_in_c(mtcrf) 101 93 102 94 #endif 103 95