this repo has no description
at fixPythonPipStalling 438 lines 14 kB view raw
1/* 2 * ilogb.s 3 * 4 * by Ian Ollmann 5 * 6 * Copyright 2007 Apple Inc. All Rights Reserved. 7 */ 8 9#include <machine/asm.h> 10#include "abi.h" 11 12ENTRY( ilogbf ) 13//move argument to eax 14#if defined( __i386__ ) 15 movl FRAME_SIZE(STACKP), %eax 16#else 17 movd %xmm0, %eax 18#endif 19 20 andl $0x7f800000, %eax //expose exponent 21 addl $0x00800000, %eax //add 1 to exponent, promotes NaN/Inf to 2**31 22 cmpl $0x00800000, %eax //test for exceptional case 23 jle 1f //jump to special code for zeros, NaNs, Infs and denorms 24 sarl $23, %eax //move exponent to unit precision 25 subl $128, %eax //remove (bias+1) 26 ret 27 281: je 2f //handle zeros/denorms at 2 29 30 //load initial value 31#if defined( __i386__ ) 32 movss FRAME_SIZE(STACKP), %xmm0 33#endif 34 35 cvtss2si %xmm0, %eax //set invalid and produce a 0x80000000 36 cmpeqss %xmm0, %xmm0 // x == x 37 movd %xmm0, %edx 38 addl %edx, %eax // c = |x| == inf ? 0x7fffffff : 0x80000000 39 ret 40 412: //reload initial value 42#if defined( __i386__ ) 43 movl FRAME_SIZE(STACKP), %edx 44 movss FRAME_SIZE(STACKP), %xmm0 45#else 46 movd %xmm0, %edx 47#endif 48 49 //set invalid for zero 50 xorps %xmm1, %xmm1 51 cmpeqss %xmm1, %xmm0 52 cvtss2si %xmm0, %ecx 53 54 andl $0x7fffffff, %edx //take absolute value 55 bsrl %edx, %eax //get positition of leading set bit 56 subl $(126+23), %eax //subtract out bias for denorm 57 cmp $0, %edx 58 cmove %ecx, %eax 59 ret 60 61ENTRY( ilogb ) 62//move high 32-bits of argument to eax 63#if defined( __i386__ ) 64 //We take this roundabout route to avoid a store forwarding stall, which costs us 8 cycles on Yonah. 65 movq FRAME_SIZE(STACKP), %xmm0 66 psrlq $32, %xmm0 67 movd %xmm0, %eax 68#else 69 movd %xmm0, %rax 70 shrq $32, %rax 71#endif 72 73 andl $0x7ff00000, %eax //expose exponent 74 addl $0x00100000, %eax //add 1 to exponent, promotes NaN/Inf to 2**31 75 cmpl $0x00100000, %eax //test for exceptional cases 76 jle 1f //jump to special code for zeros, NaNs, Infs and denorms 77 sarl $20, %eax //move exponent to unit precision 78 subl $1024, %eax //remove (bias+1) 79 ret 80 811: je 2f //handle zeros/denorms at 2 82 83 //reload initial value 84#if defined( __i386__ ) 85 movsd FRAME_SIZE(STACKP), %xmm0 86#endif 87 cvtsd2si %xmm0, %eax //set invalid and produce a 0x80000000 88 cmpeqsd %xmm0, %xmm0 // x == x 89 movd %xmm0, %edx 90 addl %edx, %eax // c = |x| == inf ? 0x7fffffff : 0x80000000 91 ret 92 932: // zero/denorm case 94 //reload initial value 95#if defined( __i386__ ) 96 movsd FRAME_SIZE(STACKP), %xmm0 97 movl 4+FRAME_SIZE(STACKP), %edx 98#else 99 movd %xmm0, %rdx 100#endif 101 xorpd %xmm1, %xmm1 102 cmpeqsd %xmm1, %xmm0 103 cvtsd2si %xmm0, %ecx //set invalid for zero, and produce ILOGB_ZERO. Denorms produce 0 here. 104 105#if defined( __i386__ ) 106 andl $0x7fffffff, %edx //take absolute value 107 movl $32, %eax 108 cmp $0, %edx 109 cmove %edx, %eax 110 cmove FRAME_SIZE(STACKP), %edx 111 bsrl %edx, %edx 112 subl $(1022+52), %eax 113 addl %edx, %eax 114 cmp $0, %ecx 115 cmovne %ecx, %eax 116 ret 117 118#else 119 //take absolute value 120 salq $1, %rdx 121 shrq $1, %rdx 122 bsrq %rdx, %rax 123 subl $(1022+52), %eax 124 cmp $0, %rdx 125 cmove %ecx, %eax 126 ret 127#endif 128 129ENTRY( ilogbl ) 130 movw 8+FRAME_SIZE( STACKP ), %ax 131 andw $0x7fff, %ax 132 addw $1, %ax 133 cmpw $1, %ax 134 jle 1f 135 andl $0xffff, %eax 136 subl $16384, %eax 137 ret 138 1391: je 2f 140 141 fldt FRAME_SIZE(STACKP) // {x} 142 pcmpeqb %xmm0, %xmm0 // -1 143 xor %eax, %eax 144 cvtss2si %xmm0, %edx // set invalid 145 fucomip %st(0), %st(0) // test for NaN 146 setp %al // isnan(x) 147 addl $0x7fffffff, %eax // isnan(x) ? 0x80000000 : 0x7fffffff 148 ret 149 150 1512: //zeros / denorms 152 fildll FRAME_SIZE(STACKP) // load in 0/denormal mantissa as integer 153 SUBP $(32-FRAME_SIZE), STACKP 154 fstpt (STACKP) // write it back out 155 xorl %eax, %eax 156 movw 8(STACKP), %ax 157 subl $(16382+63+16383), %eax 158 movl $0x80000000, %edx 159 cmp $(-16382+-63+-16383), %eax 160 cmove %edx, %eax 161 ADDP $(32-FRAME_SIZE), STACKP 162 163 //set invalid if necessary 164 xorps %xmm1, %xmm1 165 movd %eax, %xmm0 166 cmpeqss %xmm1, %xmm0 167 cvtss2si %xmm0, %edx 168 ret 169 170 171ENTRY( logbf ) 172 //move argument to eax 173#if defined( __i386__ ) 174 movl FRAME_SIZE(STACKP), %eax 175#else 176 movd %xmm0, %eax 177#endif 178 179 andl $0x7f800000, %eax //expose exponent 180 addl $0x00800000, %eax //add 1 to exponent, promotes NaN/Inf to 2**31 181 cmpl $0x00800000, %eax //test for exceptional case 182 jle 1f //jump to special code for zeros, NaNs, Infs and denorms 183 sarl $23, %eax //move exponent to unit precision 184 subl $128, %eax //remove (bias+1) 185 186#if defined( __i386__ ) 187 SUBP $4, STACKP 188 movl %eax, (STACKP) 189 fildl (STACKP) 190 ADDP $4, STACKP 191#else 192 cvtsi2ss %eax, %xmm0 193#endif 194 ret 195 1961: je 2f //handle zeros/denorms at 2 197 198 //Infinity and NaN just return |x| 199#if defined( __i386__ ) 200 flds FRAME_SIZE(STACKP) 201 fabs 202#else 203 movaps %xmm0, %xmm1 204 cmpeqps %xmm0, %xmm0 205 pslld $31, %xmm0 206 andnps %xmm1, %xmm0 207#endif 208 ret 209 2102: 211 //load |x| as if it was an int (equivalent to multiplying denorm by 2**(23+126)) 212#if defined( __i386__ ) 213 movl FRAME_SIZE(STACKP), %edx 214#else 215 movd %xmm0, %edx 216#endif 217 andl $0x7fffffff, %edx 218 cvtsi2ss %edx, %xmm1 //read the denorm/0 in as an integer 219 220 //move a = x==0 ? 1.0f: 0.0f to xmm 221 movl $0x3f800000, %eax 222 xorl %ecx, %ecx 223 cmp $0, %edx 224 cmovne %ecx, %eax 225 movd %eax, %xmm0 //load x == 0 ? 1.0f : 0.0f (0.0f is to avoid inexact flag on denorms) 226 227 // do isdenorm(x) ? 0.0f / denorm*2**(23+126) : 1.0 / 0 228 divss %xmm1, %xmm0 //set div/0 flag if x is zero 229 230 //extract the exponent of the scaled denorm 231 movd %xmm1, %eax //move the bits back to gpr, formatted as float now 232 andl $0x7f800000, %eax //extract exponent 233 sarl $23, %eax //reduce to unit precision 234 subl $(126+23+127), %eax //correct for new bias after int->float conversion 235 236 //convert to float 237 cvtsi2ss %eax, %xmm0 //logb requires its result in floating point format, so convert the unbiased exponent to float 238 movd %xmm0, %edx //and bring it back 239 240 //if( x == 0 ) return -inf 241 movl $0xff800000, %ecx 242 cmp $(-126-23-127), %eax 243 cmove %ecx, %edx 244 245#if defined( __i386__ ) 246 SUBP $4, STACKP 247 movl %edx, (STACKP) 248 flds (STACKP) 249 ADDP $4, STACKP 250#else 251 movd %edx, %xmm0 252#endif 253 ret 254 255ENTRY( logb ) 256 //move argument to eax 257#if defined( __i386__ ) 258 //We take this roundabout route to avoid a store forwarding stall, which costs us 8 cycles on Yonah. 259 movq FRAME_SIZE(STACKP), %xmm0 260 psrlq $32, %xmm0 261 movd %xmm0, %eax 262#else 263 movd %xmm0, %rax 264 shrq $32, %rax 265#endif 266 267 andl $0x7ff00000, %eax //expose exponent 268 addl $0x00100000, %eax //add 1 to exponent, promotes NaN/Inf to 2**31 269 cmpl $0x00100000, %eax //test for exceptional case 270 jle 1f //jump to special code for zeros, NaNs, Infs and denorms 271 sarl $20, %eax //move exponent to unit precision 272 subl $1024, %eax //remove (bias+1) 273 274#if defined( __i386__ ) 275 SUBP $4, STACKP 276 movl %eax, (STACKP) 277 fildl (STACKP) 278 ADDP $4, STACKP 279#else 280 cvtsi2sd %eax, %xmm0 281#endif 282 ret 283 2841: je 2f //handle zeros/denorms at 2 285 286 //Infinity and NaN just return |x| 287#if defined( __i386__ ) 288 fldl FRAME_SIZE(STACKP) 289 fabs 290#else 291 movapd %xmm0, %xmm1 292 cmpeqpd %xmm0, %xmm0 293 psllq $63, %xmm0 294 andnpd %xmm1, %xmm0 295#endif 296 ret 297 2982: 299 SUBP $(16 - FRAME_SIZE), STACKP 300 301#if defined( __i386__ ) 302 //take absolute value of x 303 movq (16-FRAME_SIZE+FRAME_SIZE)(STACKP), %xmm0 304 pcmpeqb %xmm1, %xmm1 305 psllq $63, %xmm1 306 pandn %xmm0, %xmm1 307 movq %xmm1, (STACKP) 308 309 //convert to double (interpreting bits as int -- same a multiplying denormal by 2**(1022+52)) 310 fildll (STACKP) // { x * 2**(1022+52) } 311 fstpt (STACKP) 312 313 //load in the exponent 314 xorl %eax, %eax 315 movw 8(STACKP), %ax 316 317 //subtract the bias 318 subl $(1022+52+16383), %eax 319 cmp $(-1022-52-16383), %eax 320 movl %eax, (STACKP) 321 322 //deal with the div/0 flag we need to set for the 0 case 323 fld1 // { 1 } 324 fldz // { 0, 1 } 325 fcmovne %st(1), %st(0) 326 fdivrp %st(1), %st(0) // set div/0 if necessary, gives either 1 or Inf 327 328 fildl (STACKP) //{ denormal result, 1 or Inf } 329 fmulp //{ result } 330 331 ADDP $(16 - FRAME_SIZE), STACKP 332 ret 333 334#else 335 //Fetch original value. 336 movd %xmm0, %rax 337 338 //Check to see if original value is zero 339 xorpd %xmm1, %xmm1 340 cmpneqsd %xmm0, %xmm1 // x == 0 ? 0 : -1ULL 341 342 //take absolute value 343 salq $1, %rax 344 shrq $1, %rax 345 346 //convert to double (interpreting bits as int -- same a multiplying denormal by 2**(1022+52)) 347 cvtsi2sd %rax, %xmm0 348 349 //change test result from -1 to 1.0 350 psllq $54, %xmm1 // x == 0 ? 0 : 0xFFC0000000000000 351 psrlq $2, %xmm1 // x == 0 ? 0 : 0x3ff0000000000000 352 353 //move exponent to units position 354 movd %xmm0, %rax 355 shrq $52, %rax 356 357 //subtract out bias 358 subq $(1022+52+1023), %rax 359 360 //convert to double precision 361 cvtsi2sd %eax, %xmm0 362 363 //divide result by x == 0 ? 0 : 1.0 to get correct zero result and set div/0 if x == 0 364 divsd %xmm1, %xmm0 365 366 ADDP $(16 - FRAME_SIZE), STACKP 367 ret 368 369#endif 370 371 372ENTRY( logbl ) 373 SUBP $(32-FRAME_SIZE), STACKP 374 375 //load exponent 376 xorl %eax, %eax 377 movw 40(STACKP), %ax 378 379 //take absolute value 380 andl $0x7fff, %eax 381 382 //add 1 to exponent, pushes NaN/Inf into 2**15 bit 383 addl $1, %eax 384 385 //check for exceptional conditions 386 cmpw $1, %ax 387 388 //deal with exceptional conditions elsewhere 389 jle 1f 390 391 //remove the bias+1 392 subl $(16383+1), %eax 393 394 //convert to long double 395 movl %eax, (STACKP) 396 397 fildl (STACKP) 398 399 ADDP $(32-FRAME_SIZE), STACKP 400 ret 401 4021: je 2f //deal with zero / denorm elsewhere 403 404 //load value 405 fldt 32(STACKP) // {x} 406 fucomi %st(0), %st(0) // isnan(x) 407 fld %st(0) // {x, x} 408 fabs // {|x|, x} 409 fcmovne %st(1), %st(0) // {result, x} 410 fstp %st(1) // {result} 411 ADDP $(32-FRAME_SIZE), STACKP 412 ret 413 4142: //denormal/zero 415 fildll 32(STACKP) // load in 0/denormal mantissa as integer. Fails for pseudodenormals, but they "don't happen". 416 fstpt (STACKP) // write it back out to stack as floating point 417 xorl %eax, %eax 418 movw 8(STACKP), %ax // read the exponent 419 subl $(16382+63+16383), %eax // subtract out the bias 420 cmp $(-16382+-63+-16383), %eax //check for zero 421 422 //convert the exponent to float 423 movl %eax, (STACKP) 424 fildl (STACKP) // { exp } 425 426 //set div/0 for zero result 427 fld1 // { 1, exp } 428 fldz // { 0, 1, exp } 429 fcmovne %st(1), %st(0) 430 fdivrp %st(0), %st(1) // { 1 or Inf, exp } set div/0 if necessary, gives either 1 or Inf 431 fmulp // { result } 432 433 ADDP $(32-FRAME_SIZE), STACKP 434 ret 435 436 437 438