Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1/* SPDX-License-Identifier: GPL-2.0-only */
2/*
3 * Copyright (C) 2013 ARM Ltd.
4 * Copyright (C) 2013 Linaro.
5 *
6 * This code is based on glibc cortex strings work originally authored by Linaro
7 * be found @
8 *
9 * http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/
10 * files/head:/src/aarch64/
11 */
12
13#include <linux/linkage.h>
14#include <asm/assembler.h>
15#include <asm/cache.h>
16
17/*
18 * Move a buffer from src to test (alignment handled by the hardware).
19 * If dest <= src, call memcpy, otherwise copy in reverse order.
20 *
21 * Parameters:
22 * x0 - dest
23 * x1 - src
24 * x2 - n
25 * Returns:
26 * x0 - dest
27 */
28dstin .req x0
29src .req x1
30count .req x2
31tmp1 .req x3
32tmp1w .req w3
33tmp2 .req x4
34tmp2w .req w4
35tmp3 .req x5
36tmp3w .req w5
37dst .req x6
38
39A_l .req x7
40A_h .req x8
41B_l .req x9
42B_h .req x10
43C_l .req x11
44C_h .req x12
45D_l .req x13
46D_h .req x14
47
48SYM_FUNC_START_ALIAS(__memmove)
49SYM_FUNC_START_WEAK_PI(memmove)
50 cmp dstin, src
51 b.lo __memcpy
52 add tmp1, src, count
53 cmp dstin, tmp1
54 b.hs __memcpy /* No overlap. */
55
56 add dst, dstin, count
57 add src, src, count
58 cmp count, #16
59 b.lo .Ltail15 /*probably non-alignment accesses.*/
60
61 ands tmp2, src, #15 /* Bytes to reach alignment. */
62 b.eq .LSrcAligned
63 sub count, count, tmp2
64 /*
65 * process the aligned offset length to make the src aligned firstly.
66 * those extra instructions' cost is acceptable. It also make the
67 * coming accesses are based on aligned address.
68 */
69 tbz tmp2, #0, 1f
70 ldrb tmp1w, [src, #-1]!
71 strb tmp1w, [dst, #-1]!
721:
73 tbz tmp2, #1, 2f
74 ldrh tmp1w, [src, #-2]!
75 strh tmp1w, [dst, #-2]!
762:
77 tbz tmp2, #2, 3f
78 ldr tmp1w, [src, #-4]!
79 str tmp1w, [dst, #-4]!
803:
81 tbz tmp2, #3, .LSrcAligned
82 ldr tmp1, [src, #-8]!
83 str tmp1, [dst, #-8]!
84
85.LSrcAligned:
86 cmp count, #64
87 b.ge .Lcpy_over64
88
89 /*
90 * Deal with small copies quickly by dropping straight into the
91 * exit block.
92 */
93.Ltail63:
94 /*
95 * Copy up to 48 bytes of data. At this point we only need the
96 * bottom 6 bits of count to be accurate.
97 */
98 ands tmp1, count, #0x30
99 b.eq .Ltail15
100 cmp tmp1w, #0x20
101 b.eq 1f
102 b.lt 2f
103 ldp A_l, A_h, [src, #-16]!
104 stp A_l, A_h, [dst, #-16]!
1051:
106 ldp A_l, A_h, [src, #-16]!
107 stp A_l, A_h, [dst, #-16]!
1082:
109 ldp A_l, A_h, [src, #-16]!
110 stp A_l, A_h, [dst, #-16]!
111
112.Ltail15:
113 tbz count, #3, 1f
114 ldr tmp1, [src, #-8]!
115 str tmp1, [dst, #-8]!
1161:
117 tbz count, #2, 2f
118 ldr tmp1w, [src, #-4]!
119 str tmp1w, [dst, #-4]!
1202:
121 tbz count, #1, 3f
122 ldrh tmp1w, [src, #-2]!
123 strh tmp1w, [dst, #-2]!
1243:
125 tbz count, #0, .Lexitfunc
126 ldrb tmp1w, [src, #-1]
127 strb tmp1w, [dst, #-1]
128
129.Lexitfunc:
130 ret
131
132.Lcpy_over64:
133 subs count, count, #128
134 b.ge .Lcpy_body_large
135 /*
136 * Less than 128 bytes to copy, so handle 64 bytes here and then jump
137 * to the tail.
138 */
139 ldp A_l, A_h, [src, #-16]
140 stp A_l, A_h, [dst, #-16]
141 ldp B_l, B_h, [src, #-32]
142 ldp C_l, C_h, [src, #-48]
143 stp B_l, B_h, [dst, #-32]
144 stp C_l, C_h, [dst, #-48]
145 ldp D_l, D_h, [src, #-64]!
146 stp D_l, D_h, [dst, #-64]!
147
148 tst count, #0x3f
149 b.ne .Ltail63
150 ret
151
152 /*
153 * Critical loop. Start at a new cache line boundary. Assuming
154 * 64 bytes per line this ensures the entire loop is in one line.
155 */
156 .p2align L1_CACHE_SHIFT
157.Lcpy_body_large:
158 /* pre-load 64 bytes data. */
159 ldp A_l, A_h, [src, #-16]
160 ldp B_l, B_h, [src, #-32]
161 ldp C_l, C_h, [src, #-48]
162 ldp D_l, D_h, [src, #-64]!
1631:
164 /*
165 * interlace the load of next 64 bytes data block with store of the last
166 * loaded 64 bytes data.
167 */
168 stp A_l, A_h, [dst, #-16]
169 ldp A_l, A_h, [src, #-16]
170 stp B_l, B_h, [dst, #-32]
171 ldp B_l, B_h, [src, #-32]
172 stp C_l, C_h, [dst, #-48]
173 ldp C_l, C_h, [src, #-48]
174 stp D_l, D_h, [dst, #-64]!
175 ldp D_l, D_h, [src, #-64]!
176 subs count, count, #64
177 b.ge 1b
178 stp A_l, A_h, [dst, #-16]
179 stp B_l, B_h, [dst, #-32]
180 stp C_l, C_h, [dst, #-48]
181 stp D_l, D_h, [dst, #-64]!
182
183 tst count, #0x3f
184 b.ne .Ltail63
185 ret
186SYM_FUNC_END_PI(memmove)
187EXPORT_SYMBOL(memmove)
188SYM_FUNC_END_ALIAS(__memmove)
189EXPORT_SYMBOL(__memmove)