Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

powerpc: Remove duplicate cacheable_memcpy/memzero functions

These functions are only used from one place each. If the cacheable_*
versions really are more efficient, then those changes should be
migrated into the common code instead.

NOTE: The old routines are just flat buggy on kernels that support
hardware with different cacheline sizes.

Signed-off-by: Kyle Moffett <Kyle.D.Moffett@boeing.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>

authored by

Kyle Moffett and committed by
Benjamin Herrenschmidt
b05ae4ee 9eccca08

+3 -145
-3
arch/powerpc/include/asm/cache.h
··· 76 76 #define _set_L3CR(val) do { } while(0) 77 77 #endif 78 78 79 - extern void cacheable_memzero(void *p, unsigned int nb); 80 - extern void *cacheable_memcpy(void *, const void *, unsigned int); 81 - 82 79 #endif /* !__ASSEMBLY__ */ 83 80 #endif /* __KERNEL__ */ 84 81 #endif /* _ASM_POWERPC_CACHE_H */
-127
arch/powerpc/lib/copy_32.S
··· 69 69 LG_CACHELINE_BYTES = L1_CACHE_SHIFT 70 70 CACHELINE_MASK = (L1_CACHE_BYTES-1) 71 71 72 - /* 73 - * Use dcbz on the complete cache lines in the destination 74 - * to set them to zero. This requires that the destination 75 - * area is cacheable. -- paulus 76 - */ 77 - _GLOBAL(cacheable_memzero) 78 - mr r5,r4 79 - li r4,0 80 - addi r6,r3,-4 81 - cmplwi 0,r5,4 82 - blt 7f 83 - stwu r4,4(r6) 84 - beqlr 85 - andi. r0,r6,3 86 - add r5,r0,r5 87 - subf r6,r0,r6 88 - clrlwi r7,r6,32-LG_CACHELINE_BYTES 89 - add r8,r7,r5 90 - srwi r9,r8,LG_CACHELINE_BYTES 91 - addic. r9,r9,-1 /* total number of complete cachelines */ 92 - ble 2f 93 - xori r0,r7,CACHELINE_MASK & ~3 94 - srwi. r0,r0,2 95 - beq 3f 96 - mtctr r0 97 - 4: stwu r4,4(r6) 98 - bdnz 4b 99 - 3: mtctr r9 100 - li r7,4 101 - 10: dcbz r7,r6 102 - addi r6,r6,CACHELINE_BYTES 103 - bdnz 10b 104 - clrlwi r5,r8,32-LG_CACHELINE_BYTES 105 - addi r5,r5,4 106 - 2: srwi r0,r5,2 107 - mtctr r0 108 - bdz 6f 109 - 1: stwu r4,4(r6) 110 - bdnz 1b 111 - 6: andi. r5,r5,3 112 - 7: cmpwi 0,r5,0 113 - beqlr 114 - mtctr r5 115 - addi r6,r6,3 116 - 8: stbu r4,1(r6) 117 - bdnz 8b 118 - blr 119 - 120 72 _GLOBAL(memset) 121 73 rlwimi r4,r4,8,16,23 122 74 rlwimi r4,r4,16,0,15 ··· 93 141 8: stbu r4,1(r6) 94 142 bdnz 8b 95 143 blr 96 - 97 - /* 98 - * This version uses dcbz on the complete cache lines in the 99 - * destination area to reduce memory traffic. This requires that 100 - * the destination area is cacheable. 101 - * We only use this version if the source and dest don't overlap. 102 - * -- paulus. 103 - */ 104 - _GLOBAL(cacheable_memcpy) 105 - add r7,r3,r5 /* test if the src & dst overlap */ 106 - add r8,r4,r5 107 - cmplw 0,r4,r7 108 - cmplw 1,r3,r8 109 - crand 0,0,4 /* cr0.lt &= cr1.lt */ 110 - blt memcpy /* if regions overlap */ 111 - 112 - addi r4,r4,-4 113 - addi r6,r3,-4 114 - neg r0,r3 115 - andi. r0,r0,CACHELINE_MASK /* # bytes to start of cache line */ 116 - beq 58f 117 - 118 - cmplw 0,r5,r0 /* is this more than total to do? */ 119 - blt 63f /* if not much to do */ 120 - andi. r8,r0,3 /* get it word-aligned first */ 121 - subf r5,r0,r5 122 - mtctr r8 123 - beq+ 61f 124 - 70: lbz r9,4(r4) /* do some bytes */ 125 - stb r9,4(r6) 126 - addi r4,r4,1 127 - addi r6,r6,1 128 - bdnz 70b 129 - 61: srwi. r0,r0,2 130 - mtctr r0 131 - beq 58f 132 - 72: lwzu r9,4(r4) /* do some words */ 133 - stwu r9,4(r6) 134 - bdnz 72b 135 - 136 - 58: srwi. r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */ 137 - clrlwi r5,r5,32-LG_CACHELINE_BYTES 138 - li r11,4 139 - mtctr r0 140 - beq 63f 141 - 53: 142 - dcbz r11,r6 143 - COPY_16_BYTES 144 - #if L1_CACHE_BYTES >= 32 145 - COPY_16_BYTES 146 - #if L1_CACHE_BYTES >= 64 147 - COPY_16_BYTES 148 - COPY_16_BYTES 149 - #if L1_CACHE_BYTES >= 128 150 - COPY_16_BYTES 151 - COPY_16_BYTES 152 - COPY_16_BYTES 153 - COPY_16_BYTES 154 - #endif 155 - #endif 156 - #endif 157 - bdnz 53b 158 - 159 - 63: srwi. r0,r5,2 160 - mtctr r0 161 - beq 64f 162 - 30: lwzu r0,4(r4) 163 - stwu r0,4(r6) 164 - bdnz 30b 165 - 166 - 64: andi. r0,r5,3 167 - mtctr r0 168 - beq+ 65f 169 - 40: lbz r0,4(r4) 170 - stb r0,4(r6) 171 - addi r4,r4,1 172 - addi r6,r6,1 173 - bdnz 40b 174 - 65: blr 175 144 176 145 _GLOBAL(memmove) 177 146 cmplw 0,r3,r4
-4
arch/powerpc/lib/ppc_ksyms.c
··· 8 8 EXPORT_SYMBOL(memmove); 9 9 EXPORT_SYMBOL(memcmp); 10 10 EXPORT_SYMBOL(memchr); 11 - #ifdef CONFIG_PPC32 12 - EXPORT_SYMBOL(cacheable_memcpy); 13 - EXPORT_SYMBOL(cacheable_memzero); 14 - #endif 15 11 16 12 EXPORT_SYMBOL(strcpy); 17 13 EXPORT_SYMBOL(strncpy);
+1 -1
arch/powerpc/mm/ppc_mmu_32.c
··· 224 224 */ 225 225 if ( ppc_md.progress ) ppc_md.progress("hash:find piece", 0x322); 226 226 Hash = __va(memblock_alloc(Hash_size, Hash_size)); 227 - cacheable_memzero(Hash, Hash_size); 227 + memset(Hash, 0, Hash_size); 228 228 _SDR1 = __pa(Hash) | SDR1_LOW_BITS; 229 229 230 230 Hash_end = (struct hash_pte *) ((unsigned long)Hash + Hash_size);
+2 -10
drivers/net/ethernet/ibm/emac/core.c
··· 79 79 ("Eugene Surovegin <eugene.surovegin@zultys.com> or <ebs@ebshome.net>"); 80 80 MODULE_LICENSE("GPL"); 81 81 82 - /* 83 - * PPC64 doesn't (yet) have a cacheable_memcpy 84 - */ 85 - #ifdef CONFIG_PPC64 86 - #define cacheable_memcpy(d,s,n) memcpy((d),(s),(n)) 87 - #endif 88 - 89 82 /* minimum number of free TX descriptors required to wake up TX process */ 90 83 #define EMAC_TX_WAKEUP_THRESH (NUM_TX_BUFF / 4) 91 84 ··· 1666 1673 dev_kfree_skb(dev->rx_sg_skb); 1667 1674 dev->rx_sg_skb = NULL; 1668 1675 } else { 1669 - cacheable_memcpy(skb_tail_pointer(dev->rx_sg_skb), 1676 + memcpy(skb_tail_pointer(dev->rx_sg_skb), 1670 1677 dev->rx_skb[slot]->data, len); 1671 1678 skb_put(dev->rx_sg_skb, len); 1672 1679 emac_recycle_rx_skb(dev, slot, len); ··· 1723 1730 goto oom; 1724 1731 1725 1732 skb_reserve(copy_skb, EMAC_RX_SKB_HEADROOM + 2); 1726 - cacheable_memcpy(copy_skb->data - 2, skb->data - 2, 1727 - len + 2); 1733 + memcpy(copy_skb->data - 2, skb->data - 2, len + 2); 1728 1734 emac_recycle_rx_skb(dev, slot, len); 1729 1735 skb = copy_skb; 1730 1736 } else if (unlikely(emac_alloc_rx_skb(dev, slot, GFP_ATOMIC)))