this repo has no description
at fixPythonPipStalling 100 lines 2.2 kB view raw
1/* double round( double ) 2 * 3 * Reimplemented by Steve Canon, based on Ian Ollmann's implementations 4 * tuned for increased performance on in-order machines (but faster on 5 * out-of-order machines as well). 6 * 7 * Copyright 2009, Apple Inc. 8 */ 9 10#ifdef __i386__ 11 12#ifdef __SSE3__ 13#define TRUNCATE \ 14 fisttpll 4(%esp); \ 15 fildll 4(%esp) 16#else 17#define TRUNCATE \ 18 fnstcw 4(%esp); \ 19 movw 4(%esp), %dx; \ 20 orw $0xc00, 4(%esp); \ 21 fldcw 4(%esp); \ 22 frndint; \ 23 movw %dx, 4(%esp); \ 24 fldcw 4(%esp) 25#endif 26 27.text 28.align 4 29.globl _round 30_round: 31 movl 8(%esp), %ecx 32 cmpl $0x43300000, %ecx 33 fldl 4(%esp) 34 jae 2f 35 36 fld %st(0) // { x, x } 37 TRUNCATE // { trunc(x), x } 38 fxch // { x, trunc(x) } 39 fsub %st(1), %st(0) // { frac(x), x } 40 fadd %st(0), %st(0) // { 2*frac(x), x } 41 fld1 // { 1.0, 2*frac(x), x } 42 fucomi %st(1), %st(0) 43 fstp %st(1) // { 1.0, trunc(x) } 44 ja 1f 45 fadd %st(0), %st(1) // { 1.0, trunc(x) + 1.0 } 461: fstp %st(0) // { round(x) } 47 ret 48 492: andl $0x7fffffff, %ecx 50 cmpl $0x43300000, %ecx 51 jge 4f 52 53 fabs 54 fld %st(0) 55 TRUNCATE // { trunc(x), x } 56 fxch // { x, trunc(x) } 57 fsub %st(1), %st(0) // { frac(x), x } 58 fadd %st(0), %st(0) // { 2*frac(x), x } 59 fld1 // { 1.0, 2*frac(x), x } 60 fucomi %st(1), %st(0) 61 fstp %st(1) // { 1.0, trunc(x) } 62 ja 3f 63 fadd %st(0), %st(1) // { 1.0, trunc(x) + 1.0 } 643: fstp %st(0) // { round(x) } 65 fchs 664: ret 67 68#else //x86_64 69 70.const 71.align 4 72one: .quad 0x3ff0000000000000 73absmask:.quad 0x7fffffffffffffff 74half: .quad 0x3fe0000000000000 75thresh: .quad 0x4330000000000000 76 77.text 78.align 4 79.globl _round 80_round: 81 movd %xmm0, %rcx 82 andq absmask(%rip), %rcx 83 movsd absmask(%rip), %xmm2 84 cmpq thresh(%rip), %rcx 85 jae 1f 86 87 cvttsd2si %xmm0, %rax 88 andnpd %xmm0, %xmm2 // signbit(x) 89 movsd half(%rip), %xmm3 90 cvtsi2sd %rax, %xmm1 // trunc(x) 91 subsd %xmm1, %xmm0 // frac(x) 92 orpd %xmm2, %xmm1 93 xorpd %xmm2, %xmm0 // |frac(x)| 94 cmpltpd %xmm3, %xmm0 // (|frac(x)| < 0.5) ? 95 andnpd one(%rip), %xmm0 // (|frac(x)| < 0.5) ? 0.0 : 1.0 96 orpd %xmm2, %xmm0 97 addsd %xmm1, %xmm0 // round(x) 981: ret 99 100#endif