Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
at v5.9-rc2 190 lines 3.9 kB view raw
1/* SPDX-License-Identifier: GPL-2.0-only */ 2/* 3 * Copyright (C) 2013 ARM Ltd. 4 * Copyright (C) 2013 Linaro. 5 * 6 * This code is based on glibc cortex strings work originally authored by Linaro 7 * be found @ 8 * 9 * http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/ 10 * files/head:/src/aarch64/ 11 */ 12 13#include <linux/linkage.h> 14#include <asm/assembler.h> 15#include <asm/cache.h> 16 17/* 18 * Move a buffer from src to test (alignment handled by the hardware). 19 * If dest <= src, call memcpy, otherwise copy in reverse order. 20 * 21 * Parameters: 22 * x0 - dest 23 * x1 - src 24 * x2 - n 25 * Returns: 26 * x0 - dest 27 */ 28dstin .req x0 29src .req x1 30count .req x2 31tmp1 .req x3 32tmp1w .req w3 33tmp2 .req x4 34tmp2w .req w4 35tmp3 .req x5 36tmp3w .req w5 37dst .req x6 38 39A_l .req x7 40A_h .req x8 41B_l .req x9 42B_h .req x10 43C_l .req x11 44C_h .req x12 45D_l .req x13 46D_h .req x14 47 48 .weak memmove 49SYM_FUNC_START_ALIAS(__memmove) 50SYM_FUNC_START_PI(memmove) 51 cmp dstin, src 52 b.lo __memcpy 53 add tmp1, src, count 54 cmp dstin, tmp1 55 b.hs __memcpy /* No overlap. */ 56 57 add dst, dstin, count 58 add src, src, count 59 cmp count, #16 60 b.lo .Ltail15 /*probably non-alignment accesses.*/ 61 62 ands tmp2, src, #15 /* Bytes to reach alignment. */ 63 b.eq .LSrcAligned 64 sub count, count, tmp2 65 /* 66 * process the aligned offset length to make the src aligned firstly. 67 * those extra instructions' cost is acceptable. It also make the 68 * coming accesses are based on aligned address. 69 */ 70 tbz tmp2, #0, 1f 71 ldrb tmp1w, [src, #-1]! 72 strb tmp1w, [dst, #-1]! 731: 74 tbz tmp2, #1, 2f 75 ldrh tmp1w, [src, #-2]! 76 strh tmp1w, [dst, #-2]! 772: 78 tbz tmp2, #2, 3f 79 ldr tmp1w, [src, #-4]! 80 str tmp1w, [dst, #-4]! 813: 82 tbz tmp2, #3, .LSrcAligned 83 ldr tmp1, [src, #-8]! 84 str tmp1, [dst, #-8]! 85 86.LSrcAligned: 87 cmp count, #64 88 b.ge .Lcpy_over64 89 90 /* 91 * Deal with small copies quickly by dropping straight into the 92 * exit block. 93 */ 94.Ltail63: 95 /* 96 * Copy up to 48 bytes of data. At this point we only need the 97 * bottom 6 bits of count to be accurate. 98 */ 99 ands tmp1, count, #0x30 100 b.eq .Ltail15 101 cmp tmp1w, #0x20 102 b.eq 1f 103 b.lt 2f 104 ldp A_l, A_h, [src, #-16]! 105 stp A_l, A_h, [dst, #-16]! 1061: 107 ldp A_l, A_h, [src, #-16]! 108 stp A_l, A_h, [dst, #-16]! 1092: 110 ldp A_l, A_h, [src, #-16]! 111 stp A_l, A_h, [dst, #-16]! 112 113.Ltail15: 114 tbz count, #3, 1f 115 ldr tmp1, [src, #-8]! 116 str tmp1, [dst, #-8]! 1171: 118 tbz count, #2, 2f 119 ldr tmp1w, [src, #-4]! 120 str tmp1w, [dst, #-4]! 1212: 122 tbz count, #1, 3f 123 ldrh tmp1w, [src, #-2]! 124 strh tmp1w, [dst, #-2]! 1253: 126 tbz count, #0, .Lexitfunc 127 ldrb tmp1w, [src, #-1] 128 strb tmp1w, [dst, #-1] 129 130.Lexitfunc: 131 ret 132 133.Lcpy_over64: 134 subs count, count, #128 135 b.ge .Lcpy_body_large 136 /* 137 * Less than 128 bytes to copy, so handle 64 bytes here and then jump 138 * to the tail. 139 */ 140 ldp A_l, A_h, [src, #-16] 141 stp A_l, A_h, [dst, #-16] 142 ldp B_l, B_h, [src, #-32] 143 ldp C_l, C_h, [src, #-48] 144 stp B_l, B_h, [dst, #-32] 145 stp C_l, C_h, [dst, #-48] 146 ldp D_l, D_h, [src, #-64]! 147 stp D_l, D_h, [dst, #-64]! 148 149 tst count, #0x3f 150 b.ne .Ltail63 151 ret 152 153 /* 154 * Critical loop. Start at a new cache line boundary. Assuming 155 * 64 bytes per line this ensures the entire loop is in one line. 156 */ 157 .p2align L1_CACHE_SHIFT 158.Lcpy_body_large: 159 /* pre-load 64 bytes data. */ 160 ldp A_l, A_h, [src, #-16] 161 ldp B_l, B_h, [src, #-32] 162 ldp C_l, C_h, [src, #-48] 163 ldp D_l, D_h, [src, #-64]! 1641: 165 /* 166 * interlace the load of next 64 bytes data block with store of the last 167 * loaded 64 bytes data. 168 */ 169 stp A_l, A_h, [dst, #-16] 170 ldp A_l, A_h, [src, #-16] 171 stp B_l, B_h, [dst, #-32] 172 ldp B_l, B_h, [src, #-32] 173 stp C_l, C_h, [dst, #-48] 174 ldp C_l, C_h, [src, #-48] 175 stp D_l, D_h, [dst, #-64]! 176 ldp D_l, D_h, [src, #-64]! 177 subs count, count, #64 178 b.ge 1b 179 stp A_l, A_h, [dst, #-16] 180 stp B_l, B_h, [dst, #-32] 181 stp C_l, C_h, [dst, #-48] 182 stp D_l, D_h, [dst, #-64]! 183 184 tst count, #0x3f 185 b.ne .Ltail63 186 ret 187SYM_FUNC_END_PI(memmove) 188EXPORT_SYMBOL(memmove) 189SYM_FUNC_END_ALIAS(__memmove) 190EXPORT_SYMBOL(__memmove)