this repo has no description
1/* double floor( double )
2 *
3 * Reimplemented by Steve Canon, based on Ian Ollmann's implementations
4 * tuned for increased performance on in-order machines (but faster on
5 * out-of-order machines as well).
6 *
7 * Copyright 2009, Apple Inc.
8 */
9
10#ifdef __i386__
11
12#ifdef __SSE3__
13
14.text
15.align 4
16.globl _floor
17_floor:
18 mov 8(%esp), %ecx
19 fldl 4(%esp)
20 cmp $0x43300000, %ecx // if x is negative or x > 0x1.0p53
21 jae 1f // goto 1
22
23 fisttpll 4(%esp) // fast path if +0 <= x < 0x1.0p52
24 fildll 4(%esp) // return trunc(x)
25 ret
26
271: and $0x7fffffff, %ecx
28 sub $1, %ecx
29 cmp $0x43300000, %ecx // if |x| > 0x1.0p52 or isnan(x) or
30 fld %st(0) // the high word of |x| is zero,
31 jae 4f // goto 4
32
332: fistpll 4(%esp)
34 fildll 4(%esp) // rint(x)
35 fucomi %st(1), %st(0)
36 fstp %st(1)
37 jbe 3f // if rint(x) <= x, return rint(x)
38 fld1
39 fsubrp %st(0), %st(1) // otherwise, return rint(x) - 1.0
403: ret
41
424: jg 5f // if |x| > 0x1.0p52 or isnan(x), return x
43 testl $0xffffffff, 4(%esp) // if x is -denorm, go back through the
44 jnz 2b // main path, returning -1.0
455: fstp %st(1) // otherwise, pop the extra copy of x, and
46 ret // return x.
47
48#else // i386, no SSE3
49
50.text
51.align 4
52.globl _floor
53_floor:
54 mov 8(%esp), %ecx
55 and $0x7fffffff, %ecx // high word of |x|
56 fldl 4(%esp)
57 sub $1, %ecx
58 cmp $0x432fffff, %ecx // if |x| >= 0x1.0p52 or isnan(x) or iszero(x)
59 fld %st(0) // or x is a denormal whose high 32 bits are
60 jae 2f // all zero, goto 2
61
620: fistpll 4(%esp)
63 fildll 4(%esp) // rint(x)
64 fucomi %st(1), %st(0)
65 fstp %st(1)
66 jbe 1f // if rint(x) <= x, return rint(x)
67 fld1
68 fsubrp %st(0), %st(1) // otherwise, return rint(x) - 1.0
691: ret
70
712: jg 3f // if |x| >= 0x1.0p52 or isnan(x), goto 3
72 testl $0xffffffff, 4(%esp) // if x is denorm (not zero)
73 jnz 0b // jump back to the mainline
743: fstp %st(1) // pop the extra copy of x off the stack
75 ret // and return x
76
77#endif // SSE3
78
79#else // x86_64
80
81.const
82.align 4
83mone: .quad 0xbff0000000000000
84absmask:.quad 0x7fffffffffffffff
85thresh: .quad 0x432fffffffffffff
86
87.text
88.align 4
89.globl _floor
90_floor:
91 movd %xmm0, %rcx
92 andq absmask(%rip), %rcx // |x|
93 subq $1, %rcx
94 cmpq thresh(%rip), %rcx // if |x| >= 0x1.0p52 or isnan(x) or iszero(x)
95 jae 1f // early out, returning x.
96
97 cvttsd2si %xmm0, %rax
98 cvtsi2sd %rax, %xmm1 // trunc(x)
99 cmpltsd %xmm1, %xmm0
100 andpd mone(%rip), %xmm0 // x < trunc(x) ? -1.0 : 0.0
101 addsd %xmm1, %xmm0 // floor(x)
1021: ret
103
104#endif