this repo has no description
at fixPythonPipStalling 103 lines 5.1 kB view raw
1/* 2 * Written by Ian Ollmann. 3 * Copyright 2005 Apple Computer Inc. 4 */ 5 6#include <machine/asm.h> 7 8#include "abi.h" 9 10//i386 versions if these functions are in xmm_floor.c 11//On x86_64 we can take advantage of the REX form of cvtsd2si to produce 64-bit values 12#if defined( __LP64__ ) 13 14ENTRY( lrint ) 15ENTRY( llrint ) 16 movl $0x43e00000, %eax //Exponent for 0x1.0p63 17 movd %eax, %xmm1 //copy to low 32-bits of xmm1 18 psllq $32, %xmm1 //move it to the high 32-bits of the low double in xmm1, to make 0x1.0p63 19 cmplesd %xmm0, %xmm1 //compare 0x1.0p63 <= x. Since there are no double precision values between LONG_MAX and 0x1.0p63 we don't need to worry about them 20 cvtsd2siq %xmm0, %rax //convert x to long 21 movd %xmm1, %rdx //copy compare result (all 64-bits) to %rdx 22 xorq %rdx, %rax //flip overflow values to 0x7fffffffffffffff 23 ret 24 25ENTRY( lrintf ) 26ENTRY( llrintf ) 27 movl $0x5f000000, %eax //load 0x1.063f 28 movd %eax, %xmm1 //copy to xmm 29 cmpless %xmm0, %xmm1 //compare 0x1.063f <= x 30 cvtss2siq %xmm0, %rdx //convert x to long 31 movd %xmm1, %rax //copy 64 bits of the comparison result to %rdx 32 cdqe //sign extend 33 xorq %rdx, %rax //flip overflow results to 0x7fffffffffffffff 34 ret 35 36#else 37 38ENTRY( lrintf ) 39 movl $0x4f000000, %eax //load 0x1.0p31f 40 movss (FIRST_ARG_OFFSET)( STACKP ), %xmm0 //load x 41 movd %eax, %xmm1 //copy 0x1.0p31f to xmm1 42 cmpless %xmm0, %xmm1 //compare 0x1.0p31f <= x. There are no single precision values between INT_MAX and 0x1.0p31f, so no need to worry here. 43 cvtss2si %xmm0, %eax //convert to int 44 movd %xmm1, %edx //move the compare result to edx 45 xorl %edx, %eax //saturate overflow results to 0x7fffffff 46 ret 47 48ENTRY( lrint ) 49 movsd (FIRST_ARG_OFFSET)( STACKP ), %xmm0 // load x 50 xorpd %xmm1, %xmm1 // load 0.0f 51 cmpltsd %xmm0, %xmm1 // test 0.0f < x 52 cvtsd2si %xmm0, %eax // convert x to int 53 movd %xmm1, %edx // copy the compare result to %edx 54 xorl %ecx, %ecx // set %ecx to 0 55 cmp $0x80000000, %eax // check the result to see if it is 0x80000000 -- the overflow result 56 cmovne %ecx, %edx // if the result is not 0x80000000, overwrite the earlier compare result with 0 57 xorl %edx, %eax // saturate overflow results to 0x7fffffff (was 0x80000000) 58 ret 59 60ENTRY( llrintf ) 61 SUBP $12, STACKP 62 movl $0x5f000000, 8(STACKP) //0x1.0p63f 63 xor %edx, %edx 64 65 flds 8(STACKP) //{0x1.0p63 } 66 flds (FIRST_ARG_OFFSET+12)( STACKP ) //{f, 0x1.0p63} 67 fucomi %ST(1), %ST //{f, 0x1.0p63} f>=0x1.0p63 68 fistpll (STACKP) //{0x1.0p63} 69 fstp %ST(0) //{} 70 71 setnb %dl // copy f >= 0x1.0p63 to the d register 72 negl %edx // convert [0,1] to [0,-1] 73 movl (STACKP), %eax // load low 32-bits of the result 74 xorl %edx, %eax // saturate to 0xffffffff if overflow 75 xorl 4(STACKP), %edx // load the high 32-bits of the result and saturate to 0x7fffffff if overflow 76 77 ADDP $12, STACKP 78 ret 79 80ENTRY( llrint ) 81 SUBP $12, STACKP 82 movl $0x5f000000, 8(STACKP) //0x1.0p63f 83 xor %edx, %edx 84 85 flds 8(STACKP) //{0x1.0p63 } 86 fldl (FIRST_ARG_OFFSET+12)( STACKP ) //{f, 0x1.0p63} 87 fucomi %ST(1), %ST //{f, 0x1.0p63} f>=0x1.0p63 88 fistpll (STACKP) //{0x1.0p63} 89 fstp %ST(0) //{} 90 91 setnb %dl // copy f >= 0x1.0p63 to the d register 92 negl %edx // convert [0,1] to [0,-1] 93 movl (STACKP), %eax // load low 32-bits of the result 94 xorl %edx, %eax // saturate to 0xffffffff if overflow 95 xorl 4(STACKP), %edx // load the high 32-bits of the result and saturate to 0x7fffffff if overflow 96 97 ADDP $12, STACKP 98 ret 99 100#endif 101 102 103