Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
at v4.15 186 lines 4.9 kB view raw
1! SPDX-License-Identifier: GPL-2.0 2! Copyright (C) 2008-2012 Imagination Technologies Ltd. 3 4 .text 5 .global _memcpy 6 .type _memcpy,function 7! D1Ar1 dst 8! D0Ar2 src 9! D1Ar3 cnt 10! D0Re0 dst 11_memcpy: 12 CMP D1Ar3, #16 13 MOV A1.2, D0Ar2 ! source pointer 14 MOV A0.2, D1Ar1 ! destination pointer 15 MOV A0.3, D1Ar1 ! for return value 16! If there are less than 16 bytes to copy use the byte copy loop 17 BGE $Llong_copy 18 19$Lbyte_copy: 20! Simply copy a byte at a time 21 SUBS TXRPT, D1Ar3, #1 22 BLT $Lend 23$Lloop_byte: 24 GETB D1Re0, [A1.2++] 25 SETB [A0.2++], D1Re0 26 BR $Lloop_byte 27 28$Lend: 29! Finally set return value and return 30 MOV D0Re0, A0.3 31 MOV PC, D1RtP 32 33$Llong_copy: 34 ANDS D1Ar5, D1Ar1, #7 ! test destination alignment 35 BZ $Laligned_dst 36 37! The destination address is not 8 byte aligned. We will copy bytes from 38! the source to the destination until the remaining data has an 8 byte 39! destination address alignment (i.e we should never copy more than 7 40! bytes here). 41$Lalign_dst: 42 GETB D0Re0, [A1.2++] 43 ADD D1Ar5, D1Ar5, #1 ! dest is aligned when D1Ar5 reaches #8 44 SUB D1Ar3, D1Ar3, #1 ! decrement count of remaining bytes 45 SETB [A0.2++], D0Re0 46 CMP D1Ar5, #8 47 BNE $Lalign_dst 48 49! We have at least (16 - 7) = 9 bytes to copy - calculate the number of 8 byte 50! blocks, then jump to the unaligned copy loop or fall through to the aligned 51! copy loop as appropriate. 52$Laligned_dst: 53 MOV D0Ar4, A1.2 54 LSR D1Ar5, D1Ar3, #3 ! D1Ar5 = number of 8 byte blocks 55 ANDS D0Ar4, D0Ar4, #7 ! test source alignment 56 BNZ $Lunaligned_copy ! if unaligned, use unaligned copy loop 57 58! Both source and destination are 8 byte aligned - the easy case. 59$Laligned_copy: 60 LSRS D1Ar5, D1Ar3, #5 ! D1Ar5 = number of 32 byte blocks 61 BZ $Lbyte_copy 62 SUB TXRPT, D1Ar5, #1 63 64$Laligned_32: 65 GETL D0Re0, D1Re0, [A1.2++] 66 GETL D0Ar6, D1Ar5, [A1.2++] 67 SETL [A0.2++], D0Re0, D1Re0 68 SETL [A0.2++], D0Ar6, D1Ar5 69 GETL D0Re0, D1Re0, [A1.2++] 70 GETL D0Ar6, D1Ar5, [A1.2++] 71 SETL [A0.2++], D0Re0, D1Re0 72 SETL [A0.2++], D0Ar6, D1Ar5 73 BR $Laligned_32 74 75! If there are any remaining bytes use the byte copy loop, otherwise we are done 76 ANDS D1Ar3, D1Ar3, #0x1f 77 BNZ $Lbyte_copy 78 B $Lend 79 80! The destination is 8 byte aligned but the source is not, and there are 8 81! or more bytes to be copied. 82$Lunaligned_copy: 83! Adjust the source pointer (A1.2) to the 8 byte boundary before its 84! current value 85 MOV D0Ar4, A1.2 86 MOV D0Ar6, A1.2 87 ANDMB D0Ar4, D0Ar4, #0xfff8 88 MOV A1.2, D0Ar4 89! Save the number of bytes of mis-alignment in D0Ar4 for use later 90 SUBS D0Ar6, D0Ar6, D0Ar4 91 MOV D0Ar4, D0Ar6 92! if there is no mis-alignment after all, use the aligned copy loop 93 BZ $Laligned_copy 94 95! prefetch 8 bytes 96 GETL D0Re0, D1Re0, [A1.2] 97 98 SUB TXRPT, D1Ar5, #1 99 100! There are 3 mis-alignment cases to be considered. Less than 4 bytes, exactly 101! 4 bytes, and more than 4 bytes. 102 CMP D0Ar6, #4 103 BLT $Lunaligned_1_2_3 ! use 1-3 byte mis-alignment loop 104 BZ $Lunaligned_4 ! use 4 byte mis-alignment loop 105 106! The mis-alignment is more than 4 bytes 107$Lunaligned_5_6_7: 108 SUB D0Ar6, D0Ar6, #4 109! Calculate the bit offsets required for the shift operations necesssary 110! to align the data. 111! D0Ar6 = bit offset, D1Ar5 = (32 - bit offset) 112 MULW D0Ar6, D0Ar6, #8 113 MOV D1Ar5, #32 114 SUB D1Ar5, D1Ar5, D0Ar6 115! Move data 4 bytes before we enter the main loop 116 MOV D0Re0, D1Re0 117 118$Lloop_5_6_7: 119 GETL D0Ar2, D1Ar1, [++A1.2] 120! form 64-bit data in D0Re0, D1Re0 121 LSR D0Re0, D0Re0, D0Ar6 122 MOV D1Re0, D0Ar2 123 LSL D1Re0, D1Re0, D1Ar5 124 ADD D0Re0, D0Re0, D1Re0 125 126 LSR D0Ar2, D0Ar2, D0Ar6 127 LSL D1Re0, D1Ar1, D1Ar5 128 ADD D1Re0, D1Re0, D0Ar2 129 130 SETL [A0.2++], D0Re0, D1Re0 131 MOV D0Re0, D1Ar1 132 BR $Lloop_5_6_7 133 134 B $Lunaligned_end 135 136$Lunaligned_1_2_3: 137! Calculate the bit offsets required for the shift operations necesssary 138! to align the data. 139! D0Ar6 = bit offset, D1Ar5 = (32 - bit offset) 140 MULW D0Ar6, D0Ar6, #8 141 MOV D1Ar5, #32 142 SUB D1Ar5, D1Ar5, D0Ar6 143 144$Lloop_1_2_3: 145! form 64-bit data in D0Re0,D1Re0 146 LSR D0Re0, D0Re0, D0Ar6 147 LSL D1Ar1, D1Re0, D1Ar5 148 ADD D0Re0, D0Re0, D1Ar1 149 MOV D0Ar2, D1Re0 150 LSR D0FrT, D0Ar2, D0Ar6 151 GETL D0Ar2, D1Ar1, [++A1.2] 152 153 MOV D1Re0, D0Ar2 154 LSL D1Re0, D1Re0, D1Ar5 155 ADD D1Re0, D1Re0, D0FrT 156 157 SETL [A0.2++], D0Re0, D1Re0 158 MOV D0Re0, D0Ar2 159 MOV D1Re0, D1Ar1 160 BR $Lloop_1_2_3 161 162 B $Lunaligned_end 163 164! The 4 byte mis-alignment case - this does not require any shifting, just a 165! shuffling of registers. 166$Lunaligned_4: 167 MOV D0Re0, D1Re0 168$Lloop_4: 169 GETL D0Ar2, D1Ar1, [++A1.2] 170 MOV D1Re0, D0Ar2 171 SETL [A0.2++], D0Re0, D1Re0 172 MOV D0Re0, D1Ar1 173 BR $Lloop_4 174 175$Lunaligned_end: 176! If there are no remaining bytes to copy, we are done. 177 ANDS D1Ar3, D1Ar3, #7 178 BZ $Lend 179! Re-adjust the source pointer (A1.2) back to the actual (unaligned) byte 180! address of the remaining bytes, and fall through to the byte copy loop. 181 MOV D0Ar6, A1.2 182 ADD D1Ar5, D0Ar4, D0Ar6 183 MOV A1.2, D1Ar5 184 B $Lbyte_copy 185 186 .size _memcpy,.-_memcpy