this repo has no description
at fixPythonPipStalling 120 lines 2.5 kB view raw
1/* double ceil(double ) 2 * 3 * Reimplemented by Steve Canon, based on Ian Ollmann's implementations 4 * tuned for increased performance on in-order machines (but faster on 5 * out-of-order machines as well). 6 * 7 * Copyright 2009, Apple Inc. 8 */ 9 10#ifdef __i386__ 11 12#ifdef __SSE3__ 13 14.text 15.align 4 16.globl _ceil 17_ceil: 18 mov 8(%esp), %ecx 19 fldl 4(%esp) 20 cmp $0x43300000, %ecx // if x is negative or x > 0x1.0p53 21 jae 2f // goto 2 22 23 fld %st(0) 24 fistpll 4(%esp) 25 fildll 4(%esp) // rint(x) 26 fucomi %st(1), %st(0) 27 fstp %st(1) 28 jae 1f // if rint(x) >= x, return rint(x) 29 fld1 30 faddp %st(0), %st(1) // otherwise, return rint(x) + 1.0 311: ret 32 332: and $0x7fffffff, %ecx 34 cmp $0x43300000, %ecx // if |x| >= 0x1.0p53 35 jae 3f 36 37 fisttpll 4(%esp) 38 fldz 39 fildll 4(%esp) // ceil(x), up to sign of zero 40 fucomi %st(1), %st(0) // if result == 0 41 fstp %st(1) // 42 jne 3f // 43 fchs // patch up sign bit 443: ret 45 46#else // i386, no SSE3 47 48.text 49.align 4 50.globl _ceil 51_ceil: 52 mov 8(%esp), %ecx 53 fldl 4(%esp) 54 cmp $0x43300000, %ecx // if x is negative or x > 0x1.0p53 55 jae 2f // goto 2 56 57 fld %st(0) 58 fistpll 4(%esp) 59 fildll 4(%esp) // rint(x) 60 fucomi %st(1), %st(0) 61 fstp %st(1) 62 jae 1f // if rint(x) >= x, return rint(x) 63 fld1 64 faddp // otherwise, return rint(x) + 1.0 651: ret 66 672: and $0x7fffffff, %ecx 68 cmp $0x43300000, %ecx // if |x| >= 0x1.0p53 69 fld %st(0) 70 jae 4f 71 72 fistpll 4(%esp) 73 fldz 74 fildll 4(%esp) // rint(x) 75 fucomi %st(2), %st(0) 76 fstp %st(2) 77 jae 3f // if rint(x) < x, add one 78 fld1 79 faddp %st(2), %st(0) 803: fucomi %st(1), %st(0) 81 jne 4f // if ceil(x) == 0, patch up sign bit 82 fchs 83 fstp %st(1) 84 ret 85 864: fstp %st(0) 87 ret 88 89 90#endif // SSE3 91 92#else // x86_64 93 94.const 95.align 4 96one: .quad 0x3ff0000000000000 97absmask:.quad 0x7fffffffffffffff 98thresh: .quad 0x4330000000000000 99 100.text 101.align 4 102.globl _ceil 103_ceil: 104 movd %xmm0, %rcx 105 andq absmask(%rip), %rcx // |x| 106 cmpq thresh(%rip), %rcx // if |x| >= 0x1.0p52 or isnan(x) 107 movsd absmask(%rip), %xmm2 // 108 jae 1f // early out, returning x. 109 110 cvttsd2si %xmm0, %rax 111 andnpd %xmm0, %xmm2 // signbit(x) 112 cvtsi2sd %rax, %xmm1 // trunc(x), except for sign of zero 113 cmplesd %xmm1, %xmm0 114 orpd %xmm2, %xmm1 // trunc(x) 115 andnpd one(%rip), %xmm0 // x <= trunc(x) ? 0.0 : 1.0 116 orpd %xmm2, %xmm0 // x <= trunc(x) ? copysign(0.0, x) : 1.0 117 addsd %xmm1, %xmm0 // ceil(x) 1181: ret 119 120#endif