this repo has no description
at fixPythonPipStalling 104 lines 2.7 kB view raw
1/* double floor( double ) 2 * 3 * Reimplemented by Steve Canon, based on Ian Ollmann's implementations 4 * tuned for increased performance on in-order machines (but faster on 5 * out-of-order machines as well). 6 * 7 * Copyright 2009, Apple Inc. 8 */ 9 10#ifdef __i386__ 11 12#ifdef __SSE3__ 13 14.text 15.align 4 16.globl _floor 17_floor: 18 mov 8(%esp), %ecx 19 fldl 4(%esp) 20 cmp $0x43300000, %ecx // if x is negative or x > 0x1.0p53 21 jae 1f // goto 1 22 23 fisttpll 4(%esp) // fast path if +0 <= x < 0x1.0p52 24 fildll 4(%esp) // return trunc(x) 25 ret 26 271: and $0x7fffffff, %ecx 28 sub $1, %ecx 29 cmp $0x43300000, %ecx // if |x| > 0x1.0p52 or isnan(x) or 30 fld %st(0) // the high word of |x| is zero, 31 jae 4f // goto 4 32 332: fistpll 4(%esp) 34 fildll 4(%esp) // rint(x) 35 fucomi %st(1), %st(0) 36 fstp %st(1) 37 jbe 3f // if rint(x) <= x, return rint(x) 38 fld1 39 fsubrp %st(0), %st(1) // otherwise, return rint(x) - 1.0 403: ret 41 424: jg 5f // if |x| > 0x1.0p52 or isnan(x), return x 43 testl $0xffffffff, 4(%esp) // if x is -denorm, go back through the 44 jnz 2b // main path, returning -1.0 455: fstp %st(1) // otherwise, pop the extra copy of x, and 46 ret // return x. 47 48#else // i386, no SSE3 49 50.text 51.align 4 52.globl _floor 53_floor: 54 mov 8(%esp), %ecx 55 and $0x7fffffff, %ecx // high word of |x| 56 fldl 4(%esp) 57 sub $1, %ecx 58 cmp $0x432fffff, %ecx // if |x| >= 0x1.0p52 or isnan(x) or iszero(x) 59 fld %st(0) // or x is a denormal whose high 32 bits are 60 jae 2f // all zero, goto 2 61 620: fistpll 4(%esp) 63 fildll 4(%esp) // rint(x) 64 fucomi %st(1), %st(0) 65 fstp %st(1) 66 jbe 1f // if rint(x) <= x, return rint(x) 67 fld1 68 fsubrp %st(0), %st(1) // otherwise, return rint(x) - 1.0 691: ret 70 712: jg 3f // if |x| >= 0x1.0p52 or isnan(x), goto 3 72 testl $0xffffffff, 4(%esp) // if x is denorm (not zero) 73 jnz 0b // jump back to the mainline 743: fstp %st(1) // pop the extra copy of x off the stack 75 ret // and return x 76 77#endif // SSE3 78 79#else // x86_64 80 81.const 82.align 4 83mone: .quad 0xbff0000000000000 84absmask:.quad 0x7fffffffffffffff 85thresh: .quad 0x432fffffffffffff 86 87.text 88.align 4 89.globl _floor 90_floor: 91 movd %xmm0, %rcx 92 andq absmask(%rip), %rcx // |x| 93 subq $1, %rcx 94 cmpq thresh(%rip), %rcx // if |x| >= 0x1.0p52 or isnan(x) or iszero(x) 95 jae 1f // early out, returning x. 96 97 cvttsd2si %xmm0, %rax 98 cvtsi2sd %rax, %xmm1 // trunc(x) 99 cmpltsd %xmm1, %xmm0 100 andpd mone(%rip), %xmm0 // x < trunc(x) ? -1.0 : 0.0 101 addsd %xmm1, %xmm0 // floor(x) 1021: ret 103 104#endif