arch/metag/lib/memcpy.S at v4.15

tjh.dev / kernel
fork atom
Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
fork atom
kernel / arch / metag / lib / memcpy.S
at v4.15 186 lines 4.9 kB view raw
wrap content
  1! SPDX-License-Identifier: GPL-2.0
  2!   Copyright (C) 2008-2012 Imagination Technologies Ltd.
  3
  4	.text
  5	.global	_memcpy
  6	.type	_memcpy,function
  7! D1Ar1 dst
  8! D0Ar2 src
  9! D1Ar3 cnt
 10! D0Re0 dst
 11_memcpy:
 12	CMP 	D1Ar3, #16
 13	MOV 	A1.2, D0Ar2		! source pointer
 14	MOV 	A0.2, D1Ar1		! destination pointer
 15	MOV 	A0.3, D1Ar1		! for return value
 16! If there are less than 16 bytes to copy use the byte copy loop
 17	BGE 	$Llong_copy
 18
 19$Lbyte_copy:
 20! Simply copy a byte at a time
 21	SUBS	TXRPT, D1Ar3, #1
 22	BLT	$Lend
 23$Lloop_byte:
 24	GETB 	D1Re0, [A1.2++]
 25	SETB 	[A0.2++], D1Re0
 26	BR	$Lloop_byte
 27
 28$Lend:
 29! Finally set return value and return
 30	MOV 	D0Re0, A0.3
 31	MOV 	PC, D1RtP
 32
 33$Llong_copy:
 34	ANDS 	D1Ar5, D1Ar1, #7	! test destination alignment
 35	BZ	$Laligned_dst
 36
 37! The destination address is not 8 byte aligned. We will copy bytes from
 38! the source to the destination until the remaining data has an 8 byte
 39! destination address alignment (i.e we should never copy more than 7
 40! bytes here).
 41$Lalign_dst:
 42	GETB 	D0Re0, [A1.2++]
 43	ADD 	D1Ar5, D1Ar5, #1	! dest is aligned when D1Ar5 reaches #8
 44	SUB 	D1Ar3, D1Ar3, #1	! decrement count of remaining bytes
 45	SETB 	[A0.2++], D0Re0
 46	CMP 	D1Ar5, #8
 47	BNE 	$Lalign_dst
 48
 49! We have at least (16 - 7) = 9 bytes to copy - calculate the number of 8 byte
 50! blocks, then jump to the unaligned copy loop or fall through to the aligned
 51! copy loop as appropriate.
 52$Laligned_dst:
 53	MOV	D0Ar4, A1.2
 54	LSR 	D1Ar5, D1Ar3, #3	! D1Ar5 = number of 8 byte blocks
 55	ANDS 	D0Ar4, D0Ar4, #7	! test source alignment
 56	BNZ 	$Lunaligned_copy	! if unaligned, use unaligned copy loop
 57
 58! Both source and destination are 8 byte aligned - the easy case.
 59$Laligned_copy:
 60	LSRS	D1Ar5, D1Ar3, #5	! D1Ar5 = number of 32 byte blocks
 61	BZ	$Lbyte_copy
 62	SUB	TXRPT, D1Ar5, #1
 63
 64$Laligned_32:
 65	GETL 	D0Re0, D1Re0, [A1.2++]
 66	GETL 	D0Ar6, D1Ar5, [A1.2++]
 67	SETL 	[A0.2++], D0Re0, D1Re0
 68	SETL 	[A0.2++], D0Ar6, D1Ar5
 69	GETL 	D0Re0, D1Re0, [A1.2++]
 70	GETL 	D0Ar6, D1Ar5, [A1.2++]
 71	SETL 	[A0.2++], D0Re0, D1Re0
 72	SETL 	[A0.2++], D0Ar6, D1Ar5
 73	BR	$Laligned_32
 74
 75! If there are any remaining bytes use the byte copy loop, otherwise we are done
 76	ANDS 	D1Ar3, D1Ar3, #0x1f
 77	BNZ	$Lbyte_copy
 78	B	$Lend
 79
 80! The destination is 8 byte aligned but the source is not, and there are 8
 81! or more bytes to be copied.
 82$Lunaligned_copy:
 83! Adjust the source pointer (A1.2) to the 8 byte boundary before its
 84! current value
 85	MOV 	D0Ar4, A1.2
 86	MOV 	D0Ar6, A1.2
 87	ANDMB 	D0Ar4, D0Ar4, #0xfff8
 88	MOV 	A1.2, D0Ar4
 89! Save the number of bytes of mis-alignment in D0Ar4 for use later
 90	SUBS 	D0Ar6, D0Ar6, D0Ar4
 91	MOV	D0Ar4, D0Ar6
 92! if there is no mis-alignment after all, use the aligned copy loop
 93	BZ 	$Laligned_copy
 94
 95! prefetch 8 bytes
 96	GETL 	D0Re0, D1Re0, [A1.2]
 97
 98	SUB	TXRPT, D1Ar5, #1
 99
100! There are 3 mis-alignment cases to be considered. Less than 4 bytes, exactly
101! 4 bytes, and more than 4 bytes.
102	CMP 	D0Ar6, #4
103	BLT 	$Lunaligned_1_2_3	! use 1-3 byte mis-alignment loop
104	BZ 	$Lunaligned_4		! use 4 byte mis-alignment loop
105
106! The mis-alignment is more than 4 bytes
107$Lunaligned_5_6_7:
108	SUB 	D0Ar6, D0Ar6, #4
109! Calculate the bit offsets required for the shift operations necesssary
110! to align the data.
111! D0Ar6 = bit offset, D1Ar5 = (32 - bit offset)
112	MULW 	D0Ar6, D0Ar6, #8
113	MOV	D1Ar5, #32
114	SUB	D1Ar5, D1Ar5, D0Ar6
115! Move data 4 bytes before we enter the main loop
116	MOV 	D0Re0, D1Re0
117
118$Lloop_5_6_7:
119	GETL 	D0Ar2, D1Ar1, [++A1.2]
120! form 64-bit data in D0Re0, D1Re0
121	LSR 	D0Re0, D0Re0, D0Ar6
122	MOV 	D1Re0, D0Ar2
123	LSL 	D1Re0, D1Re0, D1Ar5
124	ADD 	D0Re0, D0Re0, D1Re0
125
126	LSR 	D0Ar2, D0Ar2, D0Ar6
127	LSL 	D1Re0, D1Ar1, D1Ar5
128	ADD 	D1Re0, D1Re0, D0Ar2
129
130	SETL 	[A0.2++], D0Re0, D1Re0
131	MOV 	D0Re0, D1Ar1
132	BR	$Lloop_5_6_7
133
134	B 	$Lunaligned_end
135
136$Lunaligned_1_2_3:
137! Calculate the bit offsets required for the shift operations necesssary
138! to align the data.
139! D0Ar6 = bit offset, D1Ar5 = (32 - bit offset)
140	MULW 	D0Ar6, D0Ar6, #8
141	MOV	D1Ar5, #32
142	SUB	D1Ar5, D1Ar5, D0Ar6
143
144$Lloop_1_2_3:
145! form 64-bit data in D0Re0,D1Re0
146	LSR 	D0Re0, D0Re0, D0Ar6
147	LSL 	D1Ar1, D1Re0, D1Ar5
148	ADD 	D0Re0, D0Re0, D1Ar1
149	MOV	D0Ar2, D1Re0
150	LSR 	D0FrT, D0Ar2, D0Ar6
151	GETL 	D0Ar2, D1Ar1, [++A1.2]
152
153	MOV 	D1Re0, D0Ar2
154	LSL 	D1Re0, D1Re0, D1Ar5
155	ADD 	D1Re0, D1Re0, D0FrT
156
157	SETL 	[A0.2++], D0Re0, D1Re0
158	MOV 	D0Re0, D0Ar2
159	MOV 	D1Re0, D1Ar1
160	BR	$Lloop_1_2_3
161
162	B 	$Lunaligned_end
163
164! The 4 byte mis-alignment case - this does not require any shifting, just a
165! shuffling of registers.
166$Lunaligned_4:
167	MOV 	D0Re0, D1Re0
168$Lloop_4:
169	GETL 	D0Ar2, D1Ar1, [++A1.2]
170	MOV 	D1Re0, D0Ar2
171	SETL 	[A0.2++], D0Re0, D1Re0
172	MOV 	D0Re0, D1Ar1
173	BR	$Lloop_4
174
175$Lunaligned_end:
176! If there are no remaining bytes to copy, we are done.
177	ANDS 	D1Ar3, D1Ar3, #7
178	BZ	$Lend
179! Re-adjust the source pointer (A1.2) back to the actual (unaligned) byte
180! address of the remaining bytes, and fall through to the byte copy loop.
181	MOV 	D0Ar6, A1.2
182	ADD 	D1Ar5, D0Ar4, D0Ar6
183	MOV 	A1.2, D1Ar5
184	B	$Lbyte_copy
185
186	.size _memcpy,.-_memcpy