Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

arch/tile: minor whitespace/naming changes for string support files

Our internal process shares memcpy, memset, etc., with libc, and
we did some minor tweaking as part of moving from uclibc to glibc,
which is now reflected in the kernel versions of these files.

There are no semantic changes in this commit, just whitespace
(memcpy_32.S now properly uses tabs), naming (memmove.c instead
of memmove_32.c, since TILE-Gx shares the file with TILEPro),
and a couple of other minor tweaks.

Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>

+108 -103
+2 -2
arch/tile/lib/Makefile
··· 3 3 # 4 4 5 5 lib-y = cacheflush.o checksum.o cpumask.o delay.o \ 6 - mb_incoherent.o uaccess.o \ 7 - memcpy_$(BITS).o memchr_$(BITS).o memmove_$(BITS).o memset_$(BITS).o \ 6 + mb_incoherent.o uaccess.o memmove.o \ 7 + memcpy_$(BITS).o memchr_$(BITS).o memset_$(BITS).o \ 8 8 strchr_$(BITS).o strlen_$(BITS).o 9 9 10 10 ifeq ($(CONFIG_TILEGX),y)
+103 -101
arch/tile/lib/memcpy_32.S
··· 10 10 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or 11 11 * NON INFRINGEMENT. See the GNU General Public License for 12 12 * more details. 13 - * 14 - * This file shares the implementation of the userspace memcpy and 15 - * the kernel's memcpy, copy_to_user and copy_from_user. 16 13 */ 17 14 18 15 #include <arch/chip.h> 19 16 17 + 18 + /* 19 + * This file shares the implementation of the userspace memcpy and 20 + * the kernel's memcpy, copy_to_user and copy_from_user. 21 + */ 20 22 21 23 #include <linux/linkage.h> 22 24 ··· 55 53 */ 56 54 ENTRY(__copy_from_user_inatomic) 57 55 .type __copy_from_user_inatomic, @function 58 - FEEDBACK_ENTER_EXPLICIT(__copy_from_user_inatomic, \ 56 + FEEDBACK_ENTER_EXPLICIT(__copy_from_user_inatomic, \ 59 57 .text.memcpy_common, \ 60 - .Lend_memcpy_common - __copy_from_user_inatomic) 58 + .Lend_memcpy_common - __copy_from_user_inatomic) 61 59 { movei r29, IS_COPY_FROM_USER; j memcpy_common } 62 60 .size __copy_from_user_inatomic, . - __copy_from_user_inatomic 63 61 ··· 66 64 */ 67 65 ENTRY(__copy_from_user_zeroing) 68 66 .type __copy_from_user_zeroing, @function 69 - FEEDBACK_REENTER(__copy_from_user_inatomic) 67 + FEEDBACK_REENTER(__copy_from_user_inatomic) 70 68 { movei r29, IS_COPY_FROM_USER_ZEROING; j memcpy_common } 71 69 .size __copy_from_user_zeroing, . - __copy_from_user_zeroing 72 70 ··· 76 74 */ 77 75 ENTRY(__copy_to_user_inatomic) 78 76 .type __copy_to_user_inatomic, @function 79 - FEEDBACK_REENTER(__copy_from_user_inatomic) 77 + FEEDBACK_REENTER(__copy_from_user_inatomic) 80 78 { movei r29, IS_COPY_TO_USER; j memcpy_common } 81 79 .size __copy_to_user_inatomic, . - __copy_to_user_inatomic 82 80 83 81 ENTRY(memcpy) 84 82 .type memcpy, @function 85 - FEEDBACK_REENTER(__copy_from_user_inatomic) 83 + FEEDBACK_REENTER(__copy_from_user_inatomic) 86 84 { movei r29, IS_MEMCPY } 87 85 .size memcpy, . - memcpy 88 86 /* Fall through */ ··· 159 157 { addi r3, r1, 60; andi r9, r9, -64 } 160 158 161 159 #if CHIP_HAS_WH64() 162 - /* No need to prefetch dst, we'll just do the wh64 163 - * right before we copy a line. 160 + /* No need to prefetch dst, we'll just do the wh64 161 + * right before we copy a line. 164 162 */ 165 163 #endif 166 164 167 165 EX: { lw r5, r3; addi r3, r3, 64; movei r4, 1 } 168 - /* Intentionally stall for a few cycles to leave L2 cache alone. */ 169 - { bnzt zero, .; move r27, lr } 166 + /* Intentionally stall for a few cycles to leave L2 cache alone. */ 167 + { bnzt zero, .; move r27, lr } 170 168 EX: { lw r6, r3; addi r3, r3, 64 } 171 - /* Intentionally stall for a few cycles to leave L2 cache alone. */ 172 - { bnzt zero, . } 169 + /* Intentionally stall for a few cycles to leave L2 cache alone. */ 170 + { bnzt zero, . } 173 171 EX: { lw r7, r3; addi r3, r3, 64 } 174 172 #if !CHIP_HAS_WH64() 175 - /* Prefetch the dest */ 176 - /* Intentionally stall for a few cycles to leave L2 cache alone. */ 177 - { bnzt zero, . } 178 - /* Use a real load to cause a TLB miss if necessary. We aren't using 179 - * r28, so this should be fine. 180 - */ 173 + /* Prefetch the dest */ 174 + /* Intentionally stall for a few cycles to leave L2 cache alone. */ 175 + { bnzt zero, . } 176 + /* Use a real load to cause a TLB miss if necessary. We aren't using 177 + * r28, so this should be fine. 178 + */ 181 179 EX: { lw r28, r9; addi r9, r9, 64 } 182 - /* Intentionally stall for a few cycles to leave L2 cache alone. */ 183 - { bnzt zero, . } 184 - { prefetch r9; addi r9, r9, 64 } 185 - /* Intentionally stall for a few cycles to leave L2 cache alone. */ 186 - { bnzt zero, . } 187 - { prefetch r9; addi r9, r9, 64 } 180 + /* Intentionally stall for a few cycles to leave L2 cache alone. */ 181 + { bnzt zero, . } 182 + { prefetch r9; addi r9, r9, 64 } 183 + /* Intentionally stall for a few cycles to leave L2 cache alone. */ 184 + { bnzt zero, . } 185 + { prefetch r9; addi r9, r9, 64 } 188 186 #endif 189 - /* Intentionally stall for a few cycles to leave L2 cache alone. */ 190 - { bz zero, .Lbig_loop2 } 187 + /* Intentionally stall for a few cycles to leave L2 cache alone. */ 188 + { bz zero, .Lbig_loop2 } 191 189 192 190 /* On entry to this loop: 193 191 * - r0 points to the start of dst line 0 ··· 199 197 * to some "safe" recently loaded address. 200 198 * - r5 contains *(r1 + 60) [i.e. last word of source line 0] 201 199 * - r6 contains *(r1 + 64 + 60) [i.e. last word of source line 1] 202 - * - r9 contains ((r0 + 63) & -64) 200 + * - r9 contains ((r0 + 63) & -64) 203 201 * [start of next dst cache line.] 204 202 */ 205 203 ··· 210 208 /* Copy line 0, first stalling until r5 is ready. */ 211 209 EX: { move r12, r5; lw r16, r1 } 212 210 { bz r4, .Lcopy_8_check; slti_u r8, r2, 8 } 213 - /* Prefetch several lines ahead. */ 211 + /* Prefetch several lines ahead. */ 214 212 EX: { lw r5, r3; addi r3, r3, 64 } 215 - { jal .Lcopy_line } 213 + { jal .Lcopy_line } 216 214 217 215 /* Copy line 1, first stalling until r6 is ready. */ 218 216 EX: { move r12, r6; lw r16, r1 } 219 217 { bz r4, .Lcopy_8_check; slti_u r8, r2, 8 } 220 - /* Prefetch several lines ahead. */ 218 + /* Prefetch several lines ahead. */ 221 219 EX: { lw r6, r3; addi r3, r3, 64 } 222 220 { jal .Lcopy_line } 223 221 224 222 /* Copy line 2, first stalling until r7 is ready. */ 225 223 EX: { move r12, r7; lw r16, r1 } 226 224 { bz r4, .Lcopy_8_check; slti_u r8, r2, 8 } 227 - /* Prefetch several lines ahead. */ 225 + /* Prefetch several lines ahead. */ 228 226 EX: { lw r7, r3; addi r3, r3, 64 } 229 - /* Use up a caches-busy cycle by jumping back to the top of the 230 - * loop. Might as well get it out of the way now. 231 - */ 232 - { j .Lbig_loop } 227 + /* Use up a caches-busy cycle by jumping back to the top of the 228 + * loop. Might as well get it out of the way now. 229 + */ 230 + { j .Lbig_loop } 233 231 234 232 235 233 /* On entry: 236 234 * - r0 points to the destination line. 237 235 * - r1 points to the source line. 238 - * - r3 is the next prefetch address. 236 + * - r3 is the next prefetch address. 239 237 * - r9 holds the last address used for wh64. 240 238 * - r12 = WORD_15 241 - * - r16 = WORD_0. 242 - * - r17 == r1 + 16. 243 - * - r27 holds saved lr to restore. 239 + * - r16 = WORD_0. 240 + * - r17 == r1 + 16. 241 + * - r27 holds saved lr to restore. 244 242 * 245 243 * On exit: 246 244 * - r0 is incremented by 64. 247 245 * - r1 is incremented by 64, unless that would point to a word 248 - * beyond the end of the source array, in which case it is redirected 249 - * to point to an arbitrary word already in the cache. 246 + * beyond the end of the source array, in which case it is redirected 247 + * to point to an arbitrary word already in the cache. 250 248 * - r2 is decremented by 64. 251 - * - r3 is unchanged, unless it points to a word beyond the 252 - * end of the source array, in which case it is redirected 253 - * to point to an arbitrary word already in the cache. 254 - * Redirecting is OK since if we are that close to the end 255 - * of the array we will not come back to this subroutine 256 - * and use the contents of the prefetched address. 249 + * - r3 is unchanged, unless it points to a word beyond the 250 + * end of the source array, in which case it is redirected 251 + * to point to an arbitrary word already in the cache. 252 + * Redirecting is OK since if we are that close to the end 253 + * of the array we will not come back to this subroutine 254 + * and use the contents of the prefetched address. 257 255 * - r4 is nonzero iff r2 >= 64. 258 - * - r9 is incremented by 64, unless it points beyond the 259 - * end of the last full destination cache line, in which 260 - * case it is redirected to a "safe address" that can be 261 - * clobbered (sp - 64) 256 + * - r9 is incremented by 64, unless it points beyond the 257 + * end of the last full destination cache line, in which 258 + * case it is redirected to a "safe address" that can be 259 + * clobbered (sp - 64) 262 260 * - lr contains the value in r27. 263 261 */ 264 262 265 263 /* r26 unused */ 266 264 267 265 .Lcopy_line: 268 - /* TODO: when r3 goes past the end, we would like to redirect it 269 - * to prefetch the last partial cache line (if any) just once, for the 270 - * benefit of the final cleanup loop. But we don't want to 271 - * prefetch that line more than once, or subsequent prefetches 272 - * will go into the RTF. But then .Lbig_loop should unconditionally 273 - * branch to top of loop to execute final prefetch, and its 274 - * nop should become a conditional branch. 275 - */ 266 + /* TODO: when r3 goes past the end, we would like to redirect it 267 + * to prefetch the last partial cache line (if any) just once, for the 268 + * benefit of the final cleanup loop. But we don't want to 269 + * prefetch that line more than once, or subsequent prefetches 270 + * will go into the RTF. But then .Lbig_loop should unconditionally 271 + * branch to top of loop to execute final prefetch, and its 272 + * nop should become a conditional branch. 273 + */ 276 274 277 - /* We need two non-memory cycles here to cover the resources 278 - * used by the loads initiated by the caller. 279 - */ 280 - { add r15, r1, r2 } 275 + /* We need two non-memory cycles here to cover the resources 276 + * used by the loads initiated by the caller. 277 + */ 278 + { add r15, r1, r2 } 281 279 .Lcopy_line2: 282 - { slt_u r13, r3, r15; addi r17, r1, 16 } 280 + { slt_u r13, r3, r15; addi r17, r1, 16 } 283 281 284 - /* NOTE: this will stall for one cycle as L1 is busy. */ 282 + /* NOTE: this will stall for one cycle as L1 is busy. */ 285 283 286 - /* Fill second L1D line. */ 284 + /* Fill second L1D line. */ 287 285 EX: { lw r17, r17; addi r1, r1, 48; mvz r3, r13, r1 } /* r17 = WORD_4 */ 288 286 289 287 #if CHIP_HAS_WH64() 290 - /* Prepare destination line for writing. */ 288 + /* Prepare destination line for writing. */ 291 289 EX: { wh64 r9; addi r9, r9, 64 } 292 290 #else 293 - /* Prefetch dest line */ 291 + /* Prefetch dest line */ 294 292 { prefetch r9; addi r9, r9, 64 } 295 293 #endif 296 - /* Load seven words that are L1D hits to cover wh64 L2 usage. */ 294 + /* Load seven words that are L1D hits to cover wh64 L2 usage. */ 297 295 298 - /* Load the three remaining words from the last L1D line, which 299 - * we know has already filled the L1D. 300 - */ 296 + /* Load the three remaining words from the last L1D line, which 297 + * we know has already filled the L1D. 298 + */ 301 299 EX: { lw r4, r1; addi r1, r1, 4; addi r20, r1, 16 } /* r4 = WORD_12 */ 302 300 EX: { lw r8, r1; addi r1, r1, 4; slt_u r13, r20, r15 }/* r8 = WORD_13 */ 303 301 EX: { lw r11, r1; addi r1, r1, -52; mvz r20, r13, r1 } /* r11 = WORD_14 */ 304 302 305 - /* Load the three remaining words from the first L1D line, first 306 - * stalling until it has filled by "looking at" r16. 307 - */ 303 + /* Load the three remaining words from the first L1D line, first 304 + * stalling until it has filled by "looking at" r16. 305 + */ 308 306 EX: { lw r13, r1; addi r1, r1, 4; move zero, r16 } /* r13 = WORD_1 */ 309 307 EX: { lw r14, r1; addi r1, r1, 4 } /* r14 = WORD_2 */ 310 308 EX: { lw r15, r1; addi r1, r1, 8; addi r10, r0, 60 } /* r15 = WORD_3 */ 311 309 312 - /* Load second word from the second L1D line, first 313 - * stalling until it has filled by "looking at" r17. 314 - */ 310 + /* Load second word from the second L1D line, first 311 + * stalling until it has filled by "looking at" r17. 312 + */ 315 313 EX: { lw r19, r1; addi r1, r1, 4; move zero, r17 } /* r19 = WORD_5 */ 316 314 317 - /* Store last word to the destination line, potentially dirtying it 318 - * for the first time, which keeps the L2 busy for two cycles. 319 - */ 315 + /* Store last word to the destination line, potentially dirtying it 316 + * for the first time, which keeps the L2 busy for two cycles. 317 + */ 320 318 EX: { sw r10, r12 } /* store(WORD_15) */ 321 319 322 - /* Use two L1D hits to cover the sw L2 access above. */ 320 + /* Use two L1D hits to cover the sw L2 access above. */ 323 321 EX: { lw r10, r1; addi r1, r1, 4 } /* r10 = WORD_6 */ 324 322 EX: { lw r12, r1; addi r1, r1, 4 } /* r12 = WORD_7 */ 325 323 326 - /* Fill third L1D line. */ 324 + /* Fill third L1D line. */ 327 325 EX: { lw r18, r1; addi r1, r1, 4 } /* r18 = WORD_8 */ 328 326 329 - /* Store first L1D line. */ 327 + /* Store first L1D line. */ 330 328 EX: { sw r0, r16; addi r0, r0, 4; add r16, r0, r2 } /* store(WORD_0) */ 331 329 EX: { sw r0, r13; addi r0, r0, 4; andi r16, r16, -64 } /* store(WORD_1) */ 332 330 EX: { sw r0, r14; addi r0, r0, 4; slt_u r16, r9, r16 } /* store(WORD_2) */ 333 331 #if CHIP_HAS_WH64() 334 332 EX: { sw r0, r15; addi r0, r0, 4; addi r13, sp, -64 } /* store(WORD_3) */ 335 333 #else 336 - /* Back up the r9 to a cache line we are already storing to 334 + /* Back up the r9 to a cache line we are already storing to 337 335 * if it gets past the end of the dest vector. Strictly speaking, 338 336 * we don't need to back up to the start of a cache line, but it's free 339 337 * and tidy, so why not? 340 - */ 338 + */ 341 339 EX: { sw r0, r15; addi r0, r0, 4; andi r13, r0, -64 } /* store(WORD_3) */ 342 340 #endif 343 - /* Store second L1D line. */ 341 + /* Store second L1D line. */ 344 342 EX: { sw r0, r17; addi r0, r0, 4; mvz r9, r16, r13 }/* store(WORD_4) */ 345 343 EX: { sw r0, r19; addi r0, r0, 4 } /* store(WORD_5) */ 346 344 EX: { sw r0, r10; addi r0, r0, 4 } /* store(WORD_6) */ ··· 350 348 EX: { lw r14, r1; addi r1, r1, 4 } /* r14 = WORD_10 */ 351 349 EX: { lw r15, r1; move r1, r20 } /* r15 = WORD_11 */ 352 350 353 - /* Store third L1D line. */ 351 + /* Store third L1D line. */ 354 352 EX: { sw r0, r18; addi r0, r0, 4 } /* store(WORD_8) */ 355 353 EX: { sw r0, r13; addi r0, r0, 4 } /* store(WORD_9) */ 356 354 EX: { sw r0, r14; addi r0, r0, 4 } /* store(WORD_10) */ 357 355 EX: { sw r0, r15; addi r0, r0, 4 } /* store(WORD_11) */ 358 356 359 - /* Store rest of fourth L1D line. */ 357 + /* Store rest of fourth L1D line. */ 360 358 EX: { sw r0, r4; addi r0, r0, 4 } /* store(WORD_12) */ 361 - { 359 + { 362 360 EX: sw r0, r8 /* store(WORD_13) */ 363 - addi r0, r0, 4 361 + addi r0, r0, 4 364 362 /* Will r2 be > 64 after we subtract 64 below? */ 365 - shri r4, r2, 7 366 - } 367 - { 363 + shri r4, r2, 7 364 + } 365 + { 368 366 EX: sw r0, r11 /* store(WORD_14) */ 369 - addi r0, r0, 8 370 - /* Record 64 bytes successfully copied. */ 371 - addi r2, r2, -64 372 - } 367 + addi r0, r0, 8 368 + /* Record 64 bytes successfully copied. */ 369 + addi r2, r2, -64 370 + } 373 371 374 372 { jrp lr; move lr, r27 } 375 373 376 - /* Convey to the backtrace library that the stack frame is size 374 + /* Convey to the backtrace library that the stack frame is size 377 375 * zero, and the real return address is on the stack rather than 378 376 * in 'lr'. 379 377 */
arch/tile/lib/memmove_32.c arch/tile/lib/memmove.c
+1
arch/tile/lib/memset_32.c
··· 18 18 #include <linux/string.h> 19 19 #include <linux/module.h> 20 20 21 + #undef memset 21 22 22 23 void *memset(void *s, int c, size_t n) 23 24 {
+2
arch/tile/lib/strlen_32.c
··· 16 16 #include <linux/string.h> 17 17 #include <linux/module.h> 18 18 19 + #undef strlen 20 + 19 21 size_t strlen(const char *s) 20 22 { 21 23 /* Get an aligned pointer. */