Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

alpha: Use new generic strncpy_from_user() and strnlen_user()

Similar to x86/sparc/powerpc implementations except:
1) we implement an extremely efficient has_zero()/find_zero()
sequence with both prep_zero_mask() and create_zero_mask()
no-operations.
2) Our output from prep_zero_mask() differs in that only the
lowest eight bits are used to represent the zero bytes
nevertheless it can be safely ORed with other similar masks
from prep_zero_mask() and forms input to create_zero_mask(),
the two fundamental properties prep_zero_mask() must satisfy.

Tests on EV67 and EV68 CPUs revealed that the generic code is
essentially as fast (to within 0.5% of CPU cycles) of the old
Alpha specific code for large quadword-aligned strings, despite
the 30% extra CPU instructions executed. In contrast, the
generic code for unaligned strings is substantially slower (by
more than a factor of 3) than the old Alpha specific code.

Signed-off-by: Michael Cree <mcree@orcon.net.nz>
Acked-by: Matt Turner <mattst88@gmail.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

authored by

Michael Cree and committed by
Linus Torvalds
f2db633d d8d5da12

+62 -994
+2
arch/alpha/Kconfig
··· 18 18 select ARCH_HAVE_NMI_SAFE_CMPXCHG 19 19 select GENERIC_SMP_IDLE_THREAD 20 20 select GENERIC_CMOS_UPDATE 21 + select GENERIC_STRNCPY_FROM_USER 22 + select GENERIC_STRNLEN_USER 21 23 help 22 24 The Alpha is a 64-bit general-purpose processor designed and 23 25 marketed by the Digital Equipment Corporation of blessed memory,
+5 -29
arch/alpha/include/asm/uaccess.h
··· 433 433 #undef __module_address 434 434 #undef __module_call 435 435 436 - /* Returns: -EFAULT if exception before terminator, N if the entire 437 - buffer filled, else strlen. */ 436 + #define user_addr_max() \ 437 + (segment_eq(get_fs(), USER_DS) ? TASK_SIZE : ~0UL) 438 438 439 - extern long __strncpy_from_user(char *__to, const char __user *__from, long __to_len); 440 - 441 - extern inline long 442 - strncpy_from_user(char *to, const char __user *from, long n) 443 - { 444 - long ret = -EFAULT; 445 - if (__access_ok((unsigned long)from, 0, get_fs())) 446 - ret = __strncpy_from_user(to, from, n); 447 - return ret; 448 - } 449 - 450 - /* Returns: 0 if bad, string length+1 (memory size) of string if ok */ 451 - extern long __strlen_user(const char __user *); 452 - 453 - extern inline long strlen_user(const char __user *str) 454 - { 455 - return access_ok(VERIFY_READ,str,0) ? __strlen_user(str) : 0; 456 - } 457 - 458 - /* Returns: 0 if exception before NUL or reaching the supplied limit (N), 459 - * a value greater than N if the limit would be exceeded, else strlen. */ 460 - extern long __strnlen_user(const char __user *, long); 461 - 462 - extern inline long strnlen_user(const char __user *str, long n) 463 - { 464 - return access_ok(VERIFY_READ,str,0) ? __strnlen_user(str, n) : 0; 465 - } 439 + extern long strncpy_from_user(char *dest, const char __user *src, long count); 440 + extern __must_check long strlen_user(const char __user *str); 441 + extern __must_check long strnlen_user(const char __user *str, long n); 466 442 467 443 /* 468 444 * About the exception table:
+55
arch/alpha/include/asm/word-at-a-time.h
··· 1 + #ifndef _ASM_WORD_AT_A_TIME_H 2 + #define _ASM_WORD_AT_A_TIME_H 3 + 4 + #include <asm/compiler.h> 5 + 6 + /* 7 + * word-at-a-time interface for Alpha. 8 + */ 9 + 10 + /* 11 + * We do not use the word_at_a_time struct on Alpha, but it needs to be 12 + * implemented to humour the generic code. 13 + */ 14 + struct word_at_a_time { 15 + const unsigned long unused; 16 + }; 17 + 18 + #define WORD_AT_A_TIME_CONSTANTS { 0 } 19 + 20 + /* Return nonzero if val has a zero */ 21 + static inline unsigned long has_zero(unsigned long val, unsigned long *bits, const struct word_at_a_time *c) 22 + { 23 + unsigned long zero_locations = __kernel_cmpbge(0, val); 24 + *bits = zero_locations; 25 + return zero_locations; 26 + } 27 + 28 + static inline unsigned long prep_zero_mask(unsigned long val, unsigned long bits, const struct word_at_a_time *c) 29 + { 30 + return bits; 31 + } 32 + 33 + #define create_zero_mask(bits) (bits) 34 + 35 + static inline unsigned long find_zero(unsigned long bits) 36 + { 37 + #if defined(CONFIG_ALPHA_EV6) && defined(CONFIG_ALPHA_EV67) 38 + /* Simple if have CIX instructions */ 39 + return __kernel_cttz(bits); 40 + #else 41 + unsigned long t1, t2, t3; 42 + /* Retain lowest set bit only */ 43 + bits &= -bits; 44 + /* Binary search for lowest set bit */ 45 + t1 = bits & 0xf0; 46 + t2 = bits & 0xcc; 47 + t3 = bits & 0xaa; 48 + if (t1) t1 = 4; 49 + if (t2) t2 = 2; 50 + if (t3) t3 = 1; 51 + return t1 + t2 + t3; 52 + #endif 53 + } 54 + 55 + #endif /* _ASM_WORD_AT_A_TIME_H */
-2
arch/alpha/kernel/alpha_ksyms.c
··· 74 74 */ 75 75 EXPORT_SYMBOL(__copy_user); 76 76 EXPORT_SYMBOL(__do_clear_user); 77 - EXPORT_SYMBOL(__strncpy_from_user); 78 - EXPORT_SYMBOL(__strnlen_user); 79 77 80 78 /* 81 79 * SMP-specific symbols.
-2
arch/alpha/lib/Makefile
··· 31 31 $(ev6-y)memchr.o \ 32 32 $(ev6-y)copy_user.o \ 33 33 $(ev6-y)clear_user.o \ 34 - $(ev6-y)strncpy_from_user.o \ 35 - $(ev67-y)strlen_user.o \ 36 34 $(ev6-y)csum_ipv6_magic.o \ 37 35 $(ev6-y)clear_page.o \ 38 36 $(ev6-y)copy_page.o \
-424
arch/alpha/lib/ev6-strncpy_from_user.S
··· 1 - /* 2 - * arch/alpha/lib/ev6-strncpy_from_user.S 3 - * 21264 version contributed by Rick Gorton <rick.gorton@alpha-processor.com> 4 - * 5 - * Just like strncpy except in the return value: 6 - * 7 - * -EFAULT if an exception occurs before the terminator is copied. 8 - * N if the buffer filled. 9 - * 10 - * Otherwise the length of the string is returned. 11 - * 12 - * Much of the information about 21264 scheduling/coding comes from: 13 - * Compiler Writer's Guide for the Alpha 21264 14 - * abbreviated as 'CWG' in other comments here 15 - * ftp.digital.com/pub/Digital/info/semiconductor/literature/dsc-library.html 16 - * Scheduling notation: 17 - * E - either cluster 18 - * U - upper subcluster; U0 - subcluster U0; U1 - subcluster U1 19 - * L - lower subcluster; L0 - subcluster L0; L1 - subcluster L1 20 - * A bunch of instructions got moved and temp registers were changed 21 - * to aid in scheduling. Control flow was also re-arranged to eliminate 22 - * branches, and to provide longer code sequences to enable better scheduling. 23 - * A total rewrite (using byte load/stores for start & tail sequences) 24 - * is desirable, but very difficult to do without a from-scratch rewrite. 25 - * Save that for the future. 26 - */ 27 - 28 - 29 - #include <asm/errno.h> 30 - #include <asm/regdef.h> 31 - 32 - 33 - /* Allow an exception for an insn; exit if we get one. */ 34 - #define EX(x,y...) \ 35 - 99: x,##y; \ 36 - .section __ex_table,"a"; \ 37 - .long 99b - .; \ 38 - lda $31, $exception-99b($0); \ 39 - .previous 40 - 41 - 42 - .set noat 43 - .set noreorder 44 - .text 45 - 46 - .globl __strncpy_from_user 47 - .ent __strncpy_from_user 48 - .frame $30, 0, $26 49 - .prologue 0 50 - 51 - .align 4 52 - __strncpy_from_user: 53 - and a0, 7, t3 # E : find dest misalignment 54 - beq a2, $zerolength # U : 55 - 56 - /* Are source and destination co-aligned? */ 57 - mov a0, v0 # E : save the string start 58 - xor a0, a1, t4 # E : 59 - EX( ldq_u t1, 0(a1) ) # L : Latency=3 load first quadword 60 - ldq_u t0, 0(a0) # L : load first (partial) aligned dest quadword 61 - 62 - addq a2, t3, a2 # E : bias count by dest misalignment 63 - subq a2, 1, a3 # E : 64 - addq zero, 1, t10 # E : 65 - and t4, 7, t4 # E : misalignment between the two 66 - 67 - and a3, 7, t6 # E : number of tail bytes 68 - sll t10, t6, t10 # E : t10 = bitmask of last count byte 69 - bne t4, $unaligned # U : 70 - lda t2, -1 # E : build a mask against false zero 71 - 72 - /* 73 - * We are co-aligned; take care of a partial first word. 74 - * On entry to this basic block: 75 - * t0 == the first destination word for masking back in 76 - * t1 == the first source word. 77 - */ 78 - 79 - srl a3, 3, a2 # E : a2 = loop counter = (count - 1)/8 80 - addq a1, 8, a1 # E : 81 - mskqh t2, a1, t2 # U : detection in the src word 82 - nop 83 - 84 - /* Create the 1st output word and detect 0's in the 1st input word. */ 85 - mskqh t1, a1, t3 # U : 86 - mskql t0, a1, t0 # U : assemble the first output word 87 - ornot t1, t2, t2 # E : 88 - nop 89 - 90 - cmpbge zero, t2, t8 # E : bits set iff null found 91 - or t0, t3, t0 # E : 92 - beq a2, $a_eoc # U : 93 - bne t8, $a_eos # U : 2nd branch in a quad. Bad. 94 - 95 - /* On entry to this basic block: 96 - * t0 == a source quad not containing a null. 97 - * a0 - current aligned destination address 98 - * a1 - current aligned source address 99 - * a2 - count of quadwords to move. 100 - * NOTE: Loop improvement - unrolling this is going to be 101 - * a huge win, since we're going to stall otherwise. 102 - * Fix this later. For _really_ large copies, look 103 - * at using wh64 on a look-ahead basis. See the code 104 - * in clear_user.S and copy_user.S. 105 - * Presumably, since (a0) and (a1) do not overlap (by C definition) 106 - * Lots of nops here: 107 - * - Separate loads from stores 108 - * - Keep it to 1 branch/quadpack so the branch predictor 109 - * can train. 110 - */ 111 - $a_loop: 112 - stq_u t0, 0(a0) # L : 113 - addq a0, 8, a0 # E : 114 - nop 115 - subq a2, 1, a2 # E : 116 - 117 - EX( ldq_u t0, 0(a1) ) # L : 118 - addq a1, 8, a1 # E : 119 - cmpbge zero, t0, t8 # E : Stall 2 cycles on t0 120 - beq a2, $a_eoc # U : 121 - 122 - beq t8, $a_loop # U : 123 - nop 124 - nop 125 - nop 126 - 127 - /* Take care of the final (partial) word store. At this point 128 - * the end-of-count bit is set in t8 iff it applies. 129 - * 130 - * On entry to this basic block we have: 131 - * t0 == the source word containing the null 132 - * t8 == the cmpbge mask that found it. 133 - */ 134 - $a_eos: 135 - negq t8, t12 # E : find low bit set 136 - and t8, t12, t12 # E : 137 - 138 - /* We're doing a partial word store and so need to combine 139 - our source and original destination words. */ 140 - ldq_u t1, 0(a0) # L : 141 - subq t12, 1, t6 # E : 142 - 143 - or t12, t6, t8 # E : 144 - zapnot t0, t8, t0 # U : clear src bytes > null 145 - zap t1, t8, t1 # U : clear dst bytes <= null 146 - or t0, t1, t0 # E : 147 - 148 - stq_u t0, 0(a0) # L : 149 - br $finish_up # L0 : 150 - nop 151 - nop 152 - 153 - /* Add the end-of-count bit to the eos detection bitmask. */ 154 - .align 4 155 - $a_eoc: 156 - or t10, t8, t8 157 - br $a_eos 158 - nop 159 - nop 160 - 161 - 162 - /* The source and destination are not co-aligned. Align the destination 163 - and cope. We have to be very careful about not reading too much and 164 - causing a SEGV. */ 165 - 166 - .align 4 167 - $u_head: 168 - /* We know just enough now to be able to assemble the first 169 - full source word. We can still find a zero at the end of it 170 - that prevents us from outputting the whole thing. 171 - 172 - On entry to this basic block: 173 - t0 == the first dest word, unmasked 174 - t1 == the shifted low bits of the first source word 175 - t6 == bytemask that is -1 in dest word bytes */ 176 - 177 - EX( ldq_u t2, 8(a1) ) # L : load second src word 178 - addq a1, 8, a1 # E : 179 - mskql t0, a0, t0 # U : mask trailing garbage in dst 180 - extqh t2, a1, t4 # U : 181 - 182 - or t1, t4, t1 # E : first aligned src word complete 183 - mskqh t1, a0, t1 # U : mask leading garbage in src 184 - or t0, t1, t0 # E : first output word complete 185 - or t0, t6, t6 # E : mask original data for zero test 186 - 187 - cmpbge zero, t6, t8 # E : 188 - beq a2, $u_eocfin # U : 189 - bne t8, $u_final # U : bad news - 2nd branch in a quad 190 - lda t6, -1 # E : mask out the bits we have 191 - 192 - mskql t6, a1, t6 # U : already seen 193 - stq_u t0, 0(a0) # L : store first output word 194 - or t6, t2, t2 # E : 195 - cmpbge zero, t2, t8 # E : find nulls in second partial 196 - 197 - addq a0, 8, a0 # E : 198 - subq a2, 1, a2 # E : 199 - bne t8, $u_late_head_exit # U : 200 - nop 201 - 202 - /* Finally, we've got all the stupid leading edge cases taken care 203 - of and we can set up to enter the main loop. */ 204 - 205 - extql t2, a1, t1 # U : position hi-bits of lo word 206 - EX( ldq_u t2, 8(a1) ) # L : read next high-order source word 207 - addq a1, 8, a1 # E : 208 - cmpbge zero, t2, t8 # E : 209 - 210 - beq a2, $u_eoc # U : 211 - bne t8, $u_eos # U : 212 - nop 213 - nop 214 - 215 - /* Unaligned copy main loop. In order to avoid reading too much, 216 - the loop is structured to detect zeros in aligned source words. 217 - This has, unfortunately, effectively pulled half of a loop 218 - iteration out into the head and half into the tail, but it does 219 - prevent nastiness from accumulating in the very thing we want 220 - to run as fast as possible. 221 - 222 - On entry to this basic block: 223 - t1 == the shifted high-order bits from the previous source word 224 - t2 == the unshifted current source word 225 - 226 - We further know that t2 does not contain a null terminator. */ 227 - 228 - /* 229 - * Extra nops here: 230 - * separate load quads from store quads 231 - * only one branch/quad to permit predictor training 232 - */ 233 - 234 - .align 4 235 - $u_loop: 236 - extqh t2, a1, t0 # U : extract high bits for current word 237 - addq a1, 8, a1 # E : 238 - extql t2, a1, t3 # U : extract low bits for next time 239 - addq a0, 8, a0 # E : 240 - 241 - or t0, t1, t0 # E : current dst word now complete 242 - EX( ldq_u t2, 0(a1) ) # L : load high word for next time 243 - subq a2, 1, a2 # E : 244 - nop 245 - 246 - stq_u t0, -8(a0) # L : save the current word 247 - mov t3, t1 # E : 248 - cmpbge zero, t2, t8 # E : test new word for eos 249 - beq a2, $u_eoc # U : 250 - 251 - beq t8, $u_loop # U : 252 - nop 253 - nop 254 - nop 255 - 256 - /* We've found a zero somewhere in the source word we just read. 257 - If it resides in the lower half, we have one (probably partial) 258 - word to write out, and if it resides in the upper half, we 259 - have one full and one partial word left to write out. 260 - 261 - On entry to this basic block: 262 - t1 == the shifted high-order bits from the previous source word 263 - t2 == the unshifted current source word. */ 264 - .align 4 265 - $u_eos: 266 - extqh t2, a1, t0 # U : 267 - or t0, t1, t0 # E : first (partial) source word complete 268 - cmpbge zero, t0, t8 # E : is the null in this first bit? 269 - nop 270 - 271 - bne t8, $u_final # U : 272 - stq_u t0, 0(a0) # L : the null was in the high-order bits 273 - addq a0, 8, a0 # E : 274 - subq a2, 1, a2 # E : 275 - 276 - .align 4 277 - $u_late_head_exit: 278 - extql t2, a1, t0 # U : 279 - cmpbge zero, t0, t8 # E : 280 - or t8, t10, t6 # E : 281 - cmoveq a2, t6, t8 # E : 282 - 283 - /* Take care of a final (probably partial) result word. 284 - On entry to this basic block: 285 - t0 == assembled source word 286 - t8 == cmpbge mask that found the null. */ 287 - .align 4 288 - $u_final: 289 - negq t8, t6 # E : isolate low bit set 290 - and t6, t8, t12 # E : 291 - ldq_u t1, 0(a0) # L : 292 - subq t12, 1, t6 # E : 293 - 294 - or t6, t12, t8 # E : 295 - zapnot t0, t8, t0 # U : kill source bytes > null 296 - zap t1, t8, t1 # U : kill dest bytes <= null 297 - or t0, t1, t0 # E : 298 - 299 - stq_u t0, 0(a0) # E : 300 - br $finish_up # U : 301 - nop 302 - nop 303 - 304 - .align 4 305 - $u_eoc: # end-of-count 306 - extqh t2, a1, t0 # U : 307 - or t0, t1, t0 # E : 308 - cmpbge zero, t0, t8 # E : 309 - nop 310 - 311 - .align 4 312 - $u_eocfin: # end-of-count, final word 313 - or t10, t8, t8 # E : 314 - br $u_final # U : 315 - nop 316 - nop 317 - 318 - /* Unaligned copy entry point. */ 319 - .align 4 320 - $unaligned: 321 - 322 - srl a3, 3, a2 # U : a2 = loop counter = (count - 1)/8 323 - and a0, 7, t4 # E : find dest misalignment 324 - and a1, 7, t5 # E : find src misalignment 325 - mov zero, t0 # E : 326 - 327 - /* Conditionally load the first destination word and a bytemask 328 - with 0xff indicating that the destination byte is sacrosanct. */ 329 - 330 - mov zero, t6 # E : 331 - beq t4, 1f # U : 332 - ldq_u t0, 0(a0) # L : 333 - lda t6, -1 # E : 334 - 335 - mskql t6, a0, t6 # E : 336 - nop 337 - nop 338 - nop 339 - 340 - .align 4 341 - 1: 342 - subq a1, t4, a1 # E : sub dest misalignment from src addr 343 - /* If source misalignment is larger than dest misalignment, we need 344 - extra startup checks to avoid SEGV. */ 345 - cmplt t4, t5, t12 # E : 346 - extql t1, a1, t1 # U : shift src into place 347 - lda t2, -1 # E : for creating masks later 348 - 349 - beq t12, $u_head # U : 350 - mskqh t2, t5, t2 # U : begin src byte validity mask 351 - cmpbge zero, t1, t8 # E : is there a zero? 352 - nop 353 - 354 - extql t2, a1, t2 # U : 355 - or t8, t10, t5 # E : test for end-of-count too 356 - cmpbge zero, t2, t3 # E : 357 - cmoveq a2, t5, t8 # E : Latency=2, extra map slot 358 - 359 - nop # E : goes with cmov 360 - andnot t8, t3, t8 # E : 361 - beq t8, $u_head # U : 362 - nop 363 - 364 - /* At this point we've found a zero in the first partial word of 365 - the source. We need to isolate the valid source data and mask 366 - it into the original destination data. (Incidentally, we know 367 - that we'll need at least one byte of that original dest word.) */ 368 - 369 - ldq_u t0, 0(a0) # L : 370 - negq t8, t6 # E : build bitmask of bytes <= zero 371 - mskqh t1, t4, t1 # U : 372 - and t6, t8, t12 # E : 373 - 374 - subq t12, 1, t6 # E : 375 - or t6, t12, t8 # E : 376 - zapnot t2, t8, t2 # U : prepare source word; mirror changes 377 - zapnot t1, t8, t1 # U : to source validity mask 378 - 379 - andnot t0, t2, t0 # E : zero place for source to reside 380 - or t0, t1, t0 # E : and put it there 381 - stq_u t0, 0(a0) # L : 382 - nop 383 - 384 - .align 4 385 - $finish_up: 386 - zapnot t0, t12, t4 # U : was last byte written null? 387 - and t12, 0xf0, t3 # E : binary search for the address of the 388 - cmovne t4, 1, t4 # E : Latency=2, extra map slot 389 - nop # E : with cmovne 390 - 391 - and t12, 0xcc, t2 # E : last byte written 392 - and t12, 0xaa, t1 # E : 393 - cmovne t3, 4, t3 # E : Latency=2, extra map slot 394 - nop # E : with cmovne 395 - 396 - bic a0, 7, t0 397 - cmovne t2, 2, t2 # E : Latency=2, extra map slot 398 - nop # E : with cmovne 399 - nop 400 - 401 - cmovne t1, 1, t1 # E : Latency=2, extra map slot 402 - nop # E : with cmovne 403 - addq t0, t3, t0 # E : 404 - addq t1, t2, t1 # E : 405 - 406 - addq t0, t1, t0 # E : 407 - addq t0, t4, t0 # add one if we filled the buffer 408 - subq t0, v0, v0 # find string length 409 - ret # L0 : 410 - 411 - .align 4 412 - $zerolength: 413 - nop 414 - nop 415 - nop 416 - clr v0 417 - 418 - $exception: 419 - nop 420 - nop 421 - nop 422 - ret 423 - 424 - .end __strncpy_from_user
-107
arch/alpha/lib/ev67-strlen_user.S
··· 1 - /* 2 - * arch/alpha/lib/ev67-strlen_user.S 3 - * 21264 version contributed by Rick Gorton <rick.gorton@api-networks.com> 4 - * 5 - * Return the length of the string including the NULL terminator 6 - * (strlen+1) or zero if an error occurred. 7 - * 8 - * In places where it is critical to limit the processing time, 9 - * and the data is not trusted, strnlen_user() should be used. 10 - * It will return a value greater than its second argument if 11 - * that limit would be exceeded. This implementation is allowed 12 - * to access memory beyond the limit, but will not cross a page 13 - * boundary when doing so. 14 - * 15 - * Much of the information about 21264 scheduling/coding comes from: 16 - * Compiler Writer's Guide for the Alpha 21264 17 - * abbreviated as 'CWG' in other comments here 18 - * ftp.digital.com/pub/Digital/info/semiconductor/literature/dsc-library.html 19 - * Scheduling notation: 20 - * E - either cluster 21 - * U - upper subcluster; U0 - subcluster U0; U1 - subcluster U1 22 - * L - lower subcluster; L0 - subcluster L0; L1 - subcluster L1 23 - * Try not to change the actual algorithm if possible for consistency. 24 - */ 25 - 26 - #include <asm/regdef.h> 27 - 28 - 29 - /* Allow an exception for an insn; exit if we get one. */ 30 - #define EX(x,y...) \ 31 - 99: x,##y; \ 32 - .section __ex_table,"a"; \ 33 - .long 99b - .; \ 34 - lda v0, $exception-99b(zero); \ 35 - .previous 36 - 37 - 38 - .set noreorder 39 - .set noat 40 - .text 41 - 42 - .globl __strlen_user 43 - .ent __strlen_user 44 - .frame sp, 0, ra 45 - 46 - .align 4 47 - __strlen_user: 48 - ldah a1, 32767(zero) # do not use plain strlen_user() for strings 49 - # that might be almost 2 GB long; you should 50 - # be using strnlen_user() instead 51 - nop 52 - nop 53 - nop 54 - 55 - .globl __strnlen_user 56 - 57 - .align 4 58 - __strnlen_user: 59 - .prologue 0 60 - EX( ldq_u t0, 0(a0) ) # L : load first quadword (a0 may be misaligned) 61 - lda t1, -1(zero) # E : 62 - 63 - insqh t1, a0, t1 # U : 64 - andnot a0, 7, v0 # E : 65 - or t1, t0, t0 # E : 66 - subq a0, 1, a0 # E : get our +1 for the return 67 - 68 - cmpbge zero, t0, t1 # E : t1 <- bitmask: bit i == 1 <==> i-th byte == 0 69 - subq a1, 7, t2 # E : 70 - subq a0, v0, t0 # E : 71 - bne t1, $found # U : 72 - 73 - addq t2, t0, t2 # E : 74 - addq a1, 1, a1 # E : 75 - nop # E : 76 - nop # E : 77 - 78 - .align 4 79 - $loop: ble t2, $limit # U : 80 - EX( ldq t0, 8(v0) ) # L : 81 - nop # E : 82 - nop # E : 83 - 84 - cmpbge zero, t0, t1 # E : 85 - subq t2, 8, t2 # E : 86 - addq v0, 8, v0 # E : addr += 8 87 - beq t1, $loop # U : 88 - 89 - $found: cttz t1, t2 # U0 : 90 - addq v0, t2, v0 # E : 91 - subq v0, a0, v0 # E : 92 - ret # L0 : 93 - 94 - $exception: 95 - nop 96 - nop 97 - nop 98 - ret 99 - 100 - .align 4 # currently redundant 101 - $limit: 102 - nop 103 - nop 104 - subq a1, t2, v0 105 - ret 106 - 107 - .end __strlen_user
-91
arch/alpha/lib/strlen_user.S
··· 1 - /* 2 - * arch/alpha/lib/strlen_user.S 3 - * 4 - * Return the length of the string including the NUL terminator 5 - * (strlen+1) or zero if an error occurred. 6 - * 7 - * In places where it is critical to limit the processing time, 8 - * and the data is not trusted, strnlen_user() should be used. 9 - * It will return a value greater than its second argument if 10 - * that limit would be exceeded. This implementation is allowed 11 - * to access memory beyond the limit, but will not cross a page 12 - * boundary when doing so. 13 - */ 14 - 15 - #include <asm/regdef.h> 16 - 17 - 18 - /* Allow an exception for an insn; exit if we get one. */ 19 - #define EX(x,y...) \ 20 - 99: x,##y; \ 21 - .section __ex_table,"a"; \ 22 - .long 99b - .; \ 23 - lda v0, $exception-99b(zero); \ 24 - .previous 25 - 26 - 27 - .set noreorder 28 - .set noat 29 - .text 30 - 31 - .globl __strlen_user 32 - .ent __strlen_user 33 - .frame sp, 0, ra 34 - 35 - .align 3 36 - __strlen_user: 37 - ldah a1, 32767(zero) # do not use plain strlen_user() for strings 38 - # that might be almost 2 GB long; you should 39 - # be using strnlen_user() instead 40 - 41 - .globl __strnlen_user 42 - 43 - .align 3 44 - __strnlen_user: 45 - .prologue 0 46 - 47 - EX( ldq_u t0, 0(a0) ) # load first quadword (a0 may be misaligned) 48 - lda t1, -1(zero) 49 - insqh t1, a0, t1 50 - andnot a0, 7, v0 51 - or t1, t0, t0 52 - subq a0, 1, a0 # get our +1 for the return 53 - cmpbge zero, t0, t1 # t1 <- bitmask: bit i == 1 <==> i-th byte == 0 54 - subq a1, 7, t2 55 - subq a0, v0, t0 56 - bne t1, $found 57 - 58 - addq t2, t0, t2 59 - addq a1, 1, a1 60 - 61 - .align 3 62 - $loop: ble t2, $limit 63 - EX( ldq t0, 8(v0) ) 64 - subq t2, 8, t2 65 - addq v0, 8, v0 # addr += 8 66 - cmpbge zero, t0, t1 67 - beq t1, $loop 68 - 69 - $found: negq t1, t2 # clear all but least set bit 70 - and t1, t2, t1 71 - 72 - and t1, 0xf0, t2 # binary search for that set bit 73 - and t1, 0xcc, t3 74 - and t1, 0xaa, t4 75 - cmovne t2, 4, t2 76 - cmovne t3, 2, t3 77 - cmovne t4, 1, t4 78 - addq t2, t3, t2 79 - addq v0, t4, v0 80 - addq v0, t2, v0 81 - nop # dual issue next two on ev4 and ev5 82 - subq v0, a0, v0 83 - $exception: 84 - ret 85 - 86 - .align 3 # currently redundant 87 - $limit: 88 - subq a1, t2, v0 89 - ret 90 - 91 - .end __strlen_user
-339
arch/alpha/lib/strncpy_from_user.S
··· 1 - /* 2 - * arch/alpha/lib/strncpy_from_user.S 3 - * Contributed by Richard Henderson (rth@tamu.edu) 4 - * 5 - * Just like strncpy except in the return value: 6 - * 7 - * -EFAULT if an exception occurs before the terminator is copied. 8 - * N if the buffer filled. 9 - * 10 - * Otherwise the length of the string is returned. 11 - */ 12 - 13 - 14 - #include <asm/errno.h> 15 - #include <asm/regdef.h> 16 - 17 - 18 - /* Allow an exception for an insn; exit if we get one. */ 19 - #define EX(x,y...) \ 20 - 99: x,##y; \ 21 - .section __ex_table,"a"; \ 22 - .long 99b - .; \ 23 - lda $31, $exception-99b($0); \ 24 - .previous 25 - 26 - 27 - .set noat 28 - .set noreorder 29 - .text 30 - 31 - .globl __strncpy_from_user 32 - .ent __strncpy_from_user 33 - .frame $30, 0, $26 34 - .prologue 0 35 - 36 - .align 3 37 - $aligned: 38 - /* On entry to this basic block: 39 - t0 == the first destination word for masking back in 40 - t1 == the first source word. */ 41 - 42 - /* Create the 1st output word and detect 0's in the 1st input word. */ 43 - lda t2, -1 # e1 : build a mask against false zero 44 - mskqh t2, a1, t2 # e0 : detection in the src word 45 - mskqh t1, a1, t3 # e0 : 46 - ornot t1, t2, t2 # .. e1 : 47 - mskql t0, a1, t0 # e0 : assemble the first output word 48 - cmpbge zero, t2, t8 # .. e1 : bits set iff null found 49 - or t0, t3, t0 # e0 : 50 - beq a2, $a_eoc # .. e1 : 51 - bne t8, $a_eos # .. e1 : 52 - 53 - /* On entry to this basic block: 54 - t0 == a source word not containing a null. */ 55 - 56 - $a_loop: 57 - stq_u t0, 0(a0) # e0 : 58 - addq a0, 8, a0 # .. e1 : 59 - EX( ldq_u t0, 0(a1) ) # e0 : 60 - addq a1, 8, a1 # .. e1 : 61 - subq a2, 1, a2 # e0 : 62 - cmpbge zero, t0, t8 # .. e1 (stall) 63 - beq a2, $a_eoc # e1 : 64 - beq t8, $a_loop # e1 : 65 - 66 - /* Take care of the final (partial) word store. At this point 67 - the end-of-count bit is set in t8 iff it applies. 68 - 69 - On entry to this basic block we have: 70 - t0 == the source word containing the null 71 - t8 == the cmpbge mask that found it. */ 72 - 73 - $a_eos: 74 - negq t8, t12 # e0 : find low bit set 75 - and t8, t12, t12 # e1 (stall) 76 - 77 - /* For the sake of the cache, don't read a destination word 78 - if we're not going to need it. */ 79 - and t12, 0x80, t6 # e0 : 80 - bne t6, 1f # .. e1 (zdb) 81 - 82 - /* We're doing a partial word store and so need to combine 83 - our source and original destination words. */ 84 - ldq_u t1, 0(a0) # e0 : 85 - subq t12, 1, t6 # .. e1 : 86 - or t12, t6, t8 # e0 : 87 - unop # 88 - zapnot t0, t8, t0 # e0 : clear src bytes > null 89 - zap t1, t8, t1 # .. e1 : clear dst bytes <= null 90 - or t0, t1, t0 # e1 : 91 - 92 - 1: stq_u t0, 0(a0) 93 - br $finish_up 94 - 95 - /* Add the end-of-count bit to the eos detection bitmask. */ 96 - $a_eoc: 97 - or t10, t8, t8 98 - br $a_eos 99 - 100 - /*** The Function Entry Point ***/ 101 - .align 3 102 - __strncpy_from_user: 103 - mov a0, v0 # save the string start 104 - beq a2, $zerolength 105 - 106 - /* Are source and destination co-aligned? */ 107 - xor a0, a1, t1 # e0 : 108 - and a0, 7, t0 # .. e1 : find dest misalignment 109 - and t1, 7, t1 # e0 : 110 - addq a2, t0, a2 # .. e1 : bias count by dest misalignment 111 - subq a2, 1, a2 # e0 : 112 - and a2, 7, t2 # e1 : 113 - srl a2, 3, a2 # e0 : a2 = loop counter = (count - 1)/8 114 - addq zero, 1, t10 # .. e1 : 115 - sll t10, t2, t10 # e0 : t10 = bitmask of last count byte 116 - bne t1, $unaligned # .. e1 : 117 - 118 - /* We are co-aligned; take care of a partial first word. */ 119 - 120 - EX( ldq_u t1, 0(a1) ) # e0 : load first src word 121 - addq a1, 8, a1 # .. e1 : 122 - 123 - beq t0, $aligned # avoid loading dest word if not needed 124 - ldq_u t0, 0(a0) # e0 : 125 - br $aligned # .. e1 : 126 - 127 - 128 - /* The source and destination are not co-aligned. Align the destination 129 - and cope. We have to be very careful about not reading too much and 130 - causing a SEGV. */ 131 - 132 - .align 3 133 - $u_head: 134 - /* We know just enough now to be able to assemble the first 135 - full source word. We can still find a zero at the end of it 136 - that prevents us from outputting the whole thing. 137 - 138 - On entry to this basic block: 139 - t0 == the first dest word, unmasked 140 - t1 == the shifted low bits of the first source word 141 - t6 == bytemask that is -1 in dest word bytes */ 142 - 143 - EX( ldq_u t2, 8(a1) ) # e0 : load second src word 144 - addq a1, 8, a1 # .. e1 : 145 - mskql t0, a0, t0 # e0 : mask trailing garbage in dst 146 - extqh t2, a1, t4 # e0 : 147 - or t1, t4, t1 # e1 : first aligned src word complete 148 - mskqh t1, a0, t1 # e0 : mask leading garbage in src 149 - or t0, t1, t0 # e0 : first output word complete 150 - or t0, t6, t6 # e1 : mask original data for zero test 151 - cmpbge zero, t6, t8 # e0 : 152 - beq a2, $u_eocfin # .. e1 : 153 - bne t8, $u_final # e1 : 154 - 155 - lda t6, -1 # e1 : mask out the bits we have 156 - mskql t6, a1, t6 # e0 : already seen 157 - stq_u t0, 0(a0) # e0 : store first output word 158 - or t6, t2, t2 # .. e1 : 159 - cmpbge zero, t2, t8 # e0 : find nulls in second partial 160 - addq a0, 8, a0 # .. e1 : 161 - subq a2, 1, a2 # e0 : 162 - bne t8, $u_late_head_exit # .. e1 : 163 - 164 - /* Finally, we've got all the stupid leading edge cases taken care 165 - of and we can set up to enter the main loop. */ 166 - 167 - extql t2, a1, t1 # e0 : position hi-bits of lo word 168 - EX( ldq_u t2, 8(a1) ) # .. e1 : read next high-order source word 169 - addq a1, 8, a1 # e0 : 170 - cmpbge zero, t2, t8 # e1 (stall) 171 - beq a2, $u_eoc # e1 : 172 - bne t8, $u_eos # e1 : 173 - 174 - /* Unaligned copy main loop. In order to avoid reading too much, 175 - the loop is structured to detect zeros in aligned source words. 176 - This has, unfortunately, effectively pulled half of a loop 177 - iteration out into the head and half into the tail, but it does 178 - prevent nastiness from accumulating in the very thing we want 179 - to run as fast as possible. 180 - 181 - On entry to this basic block: 182 - t1 == the shifted high-order bits from the previous source word 183 - t2 == the unshifted current source word 184 - 185 - We further know that t2 does not contain a null terminator. */ 186 - 187 - .align 3 188 - $u_loop: 189 - extqh t2, a1, t0 # e0 : extract high bits for current word 190 - addq a1, 8, a1 # .. e1 : 191 - extql t2, a1, t3 # e0 : extract low bits for next time 192 - addq a0, 8, a0 # .. e1 : 193 - or t0, t1, t0 # e0 : current dst word now complete 194 - EX( ldq_u t2, 0(a1) ) # .. e1 : load high word for next time 195 - stq_u t0, -8(a0) # e0 : save the current word 196 - mov t3, t1 # .. e1 : 197 - subq a2, 1, a2 # e0 : 198 - cmpbge zero, t2, t8 # .. e1 : test new word for eos 199 - beq a2, $u_eoc # e1 : 200 - beq t8, $u_loop # e1 : 201 - 202 - /* We've found a zero somewhere in the source word we just read. 203 - If it resides in the lower half, we have one (probably partial) 204 - word to write out, and if it resides in the upper half, we 205 - have one full and one partial word left to write out. 206 - 207 - On entry to this basic block: 208 - t1 == the shifted high-order bits from the previous source word 209 - t2 == the unshifted current source word. */ 210 - $u_eos: 211 - extqh t2, a1, t0 # e0 : 212 - or t0, t1, t0 # e1 : first (partial) source word complete 213 - 214 - cmpbge zero, t0, t8 # e0 : is the null in this first bit? 215 - bne t8, $u_final # .. e1 (zdb) 216 - 217 - stq_u t0, 0(a0) # e0 : the null was in the high-order bits 218 - addq a0, 8, a0 # .. e1 : 219 - subq a2, 1, a2 # e1 : 220 - 221 - $u_late_head_exit: 222 - extql t2, a1, t0 # .. e0 : 223 - cmpbge zero, t0, t8 # e0 : 224 - or t8, t10, t6 # e1 : 225 - cmoveq a2, t6, t8 # e0 : 226 - nop # .. e1 : 227 - 228 - /* Take care of a final (probably partial) result word. 229 - On entry to this basic block: 230 - t0 == assembled source word 231 - t8 == cmpbge mask that found the null. */ 232 - $u_final: 233 - negq t8, t6 # e0 : isolate low bit set 234 - and t6, t8, t12 # e1 : 235 - 236 - and t12, 0x80, t6 # e0 : avoid dest word load if we can 237 - bne t6, 1f # .. e1 (zdb) 238 - 239 - ldq_u t1, 0(a0) # e0 : 240 - subq t12, 1, t6 # .. e1 : 241 - or t6, t12, t8 # e0 : 242 - zapnot t0, t8, t0 # .. e1 : kill source bytes > null 243 - zap t1, t8, t1 # e0 : kill dest bytes <= null 244 - or t0, t1, t0 # e1 : 245 - 246 - 1: stq_u t0, 0(a0) # e0 : 247 - br $finish_up 248 - 249 - $u_eoc: # end-of-count 250 - extqh t2, a1, t0 251 - or t0, t1, t0 252 - cmpbge zero, t0, t8 253 - 254 - $u_eocfin: # end-of-count, final word 255 - or t10, t8, t8 256 - br $u_final 257 - 258 - /* Unaligned copy entry point. */ 259 - .align 3 260 - $unaligned: 261 - 262 - EX( ldq_u t1, 0(a1) ) # e0 : load first source word 263 - 264 - and a0, 7, t4 # .. e1 : find dest misalignment 265 - and a1, 7, t5 # e0 : find src misalignment 266 - 267 - /* Conditionally load the first destination word and a bytemask 268 - with 0xff indicating that the destination byte is sacrosanct. */ 269 - 270 - mov zero, t0 # .. e1 : 271 - mov zero, t6 # e0 : 272 - beq t4, 1f # .. e1 : 273 - ldq_u t0, 0(a0) # e0 : 274 - lda t6, -1 # .. e1 : 275 - mskql t6, a0, t6 # e0 : 276 - 1: 277 - subq a1, t4, a1 # .. e1 : sub dest misalignment from src addr 278 - 279 - /* If source misalignment is larger than dest misalignment, we need 280 - extra startup checks to avoid SEGV. */ 281 - 282 - cmplt t4, t5, t12 # e1 : 283 - extql t1, a1, t1 # .. e0 : shift src into place 284 - lda t2, -1 # e0 : for creating masks later 285 - beq t12, $u_head # e1 : 286 - 287 - mskqh t2, t5, t2 # e0 : begin src byte validity mask 288 - cmpbge zero, t1, t8 # .. e1 : is there a zero? 289 - extql t2, a1, t2 # e0 : 290 - or t8, t10, t5 # .. e1 : test for end-of-count too 291 - cmpbge zero, t2, t3 # e0 : 292 - cmoveq a2, t5, t8 # .. e1 : 293 - andnot t8, t3, t8 # e0 : 294 - beq t8, $u_head # .. e1 (zdb) 295 - 296 - /* At this point we've found a zero in the first partial word of 297 - the source. We need to isolate the valid source data and mask 298 - it into the original destination data. (Incidentally, we know 299 - that we'll need at least one byte of that original dest word.) */ 300 - 301 - ldq_u t0, 0(a0) # e0 : 302 - negq t8, t6 # .. e1 : build bitmask of bytes <= zero 303 - mskqh t1, t4, t1 # e0 : 304 - and t6, t8, t12 # .. e1 : 305 - subq t12, 1, t6 # e0 : 306 - or t6, t12, t8 # e1 : 307 - 308 - zapnot t2, t8, t2 # e0 : prepare source word; mirror changes 309 - zapnot t1, t8, t1 # .. e1 : to source validity mask 310 - 311 - andnot t0, t2, t0 # e0 : zero place for source to reside 312 - or t0, t1, t0 # e1 : and put it there 313 - stq_u t0, 0(a0) # e0 : 314 - 315 - $finish_up: 316 - zapnot t0, t12, t4 # was last byte written null? 317 - cmovne t4, 1, t4 318 - 319 - and t12, 0xf0, t3 # binary search for the address of the 320 - and t12, 0xcc, t2 # last byte written 321 - and t12, 0xaa, t1 322 - bic a0, 7, t0 323 - cmovne t3, 4, t3 324 - cmovne t2, 2, t2 325 - cmovne t1, 1, t1 326 - addq t0, t3, t0 327 - addq t1, t2, t1 328 - addq t0, t1, t0 329 - addq t0, t4, t0 # add one if we filled the buffer 330 - 331 - subq t0, v0, v0 # find string length 332 - ret 333 - 334 - $zerolength: 335 - clr v0 336 - $exception: 337 - ret 338 - 339 - .end __strncpy_from_user