this repo has no description
1/* double round( double )
2 *
3 * Reimplemented by Steve Canon, based on Ian Ollmann's implementations
4 * tuned for increased performance on in-order machines (but faster on
5 * out-of-order machines as well).
6 *
7 * Copyright 2009, Apple Inc.
8 */
9
10#ifdef __i386__
11
12#ifdef __SSE3__
13#define TRUNCATE \
14 fisttpll 4(%esp); \
15 fildll 4(%esp)
16#else
17#define TRUNCATE \
18 fnstcw 4(%esp); \
19 movw 4(%esp), %dx; \
20 orw $0xc00, 4(%esp); \
21 fldcw 4(%esp); \
22 frndint; \
23 movw %dx, 4(%esp); \
24 fldcw 4(%esp)
25#endif
26
27.text
28.align 4
29.globl _round
30_round:
31 movl 8(%esp), %ecx
32 cmpl $0x43300000, %ecx
33 fldl 4(%esp)
34 jae 2f
35
36 fld %st(0) // { x, x }
37 TRUNCATE // { trunc(x), x }
38 fxch // { x, trunc(x) }
39 fsub %st(1), %st(0) // { frac(x), x }
40 fadd %st(0), %st(0) // { 2*frac(x), x }
41 fld1 // { 1.0, 2*frac(x), x }
42 fucomi %st(1), %st(0)
43 fstp %st(1) // { 1.0, trunc(x) }
44 ja 1f
45 fadd %st(0), %st(1) // { 1.0, trunc(x) + 1.0 }
461: fstp %st(0) // { round(x) }
47 ret
48
492: andl $0x7fffffff, %ecx
50 cmpl $0x43300000, %ecx
51 jge 4f
52
53 fabs
54 fld %st(0)
55 TRUNCATE // { trunc(x), x }
56 fxch // { x, trunc(x) }
57 fsub %st(1), %st(0) // { frac(x), x }
58 fadd %st(0), %st(0) // { 2*frac(x), x }
59 fld1 // { 1.0, 2*frac(x), x }
60 fucomi %st(1), %st(0)
61 fstp %st(1) // { 1.0, trunc(x) }
62 ja 3f
63 fadd %st(0), %st(1) // { 1.0, trunc(x) + 1.0 }
643: fstp %st(0) // { round(x) }
65 fchs
664: ret
67
68#else //x86_64
69
70.const
71.align 4
72one: .quad 0x3ff0000000000000
73absmask:.quad 0x7fffffffffffffff
74half: .quad 0x3fe0000000000000
75thresh: .quad 0x4330000000000000
76
77.text
78.align 4
79.globl _round
80_round:
81 movd %xmm0, %rcx
82 andq absmask(%rip), %rcx
83 movsd absmask(%rip), %xmm2
84 cmpq thresh(%rip), %rcx
85 jae 1f
86
87 cvttsd2si %xmm0, %rax
88 andnpd %xmm0, %xmm2 // signbit(x)
89 movsd half(%rip), %xmm3
90 cvtsi2sd %rax, %xmm1 // trunc(x)
91 subsd %xmm1, %xmm0 // frac(x)
92 orpd %xmm2, %xmm1
93 xorpd %xmm2, %xmm0 // |frac(x)|
94 cmpltpd %xmm3, %xmm0 // (|frac(x)| < 0.5) ?
95 andnpd one(%rip), %xmm0 // (|frac(x)| < 0.5) ? 0.0 : 1.0
96 orpd %xmm2, %xmm0
97 addsd %xmm1, %xmm0 // round(x)
981: ret
99
100#endif