this repo has no description
1
2/*
3 * lroundf.s
4 *
5 * by Ian Ollmann
6 *
7 * Copyright (c) 2007, Apple Inc. All Rights Reserved.
8 *
9 * C99 lroundf for __i386__ and __x86_64__
10 *
11 */
12
13#include <machine/asm.h>
14#include "abi.h"
15
16
17#if defined( __i386__ )
18
19
20 ENTRY( llroundf )
21 movl FRAME_SIZE(STACKP), %eax
22 flds FRAME_SIZE(STACKP) // { x }
23 fld %st(0) // { x, x }
24 SUBP $16-FRAME_SIZE, STACKP
25
26 fistpll (STACKP) // { x }, llrint(x), set invalid / inexact if necessary
27 movl %eax, %edx // x
28 andl $0x7fffffff, %eax // |x|
29 xorl %eax, %edx // signof( x )
30 cmpl $0x4b000000, %eax // |x| >= 0x1.0p23f or NaN
31 jae 1f
32
33 fildll (STACKP) // { llrint(x), x }
34 fucomip %st(1), %st(0) // { x } x == llrint(x)
35 fstp %st(0) // { }
36 je 2f // return llrint(x)
37
38 cmpl $0x3effffff, %eax // |x| == 0.5f - 1 ulp
39 je 4f
40
41 // at this point we know that |x| < 0x1.0p23f, so we dont need a full 64-bit conversion, which is good because we really need trunc now.
42 orl $0x3f000000, %edx // copysign( 0.5f, x )
43 movss 16(STACKP), %xmm0 // x
44 movd %edx, %xmm1 // copysign( 0.5f, x )
45 addss %xmm1, %xmm0 // x + copysign( 0.5f, x )
46 cvttss2si %xmm0, %edx // result = (int32_t) ( x + copysign( 0.5f, x ))
47 movl %edx, %eax // result
48 sarl $31, %edx // sign extended result
49 ADDP $16-FRAME_SIZE, STACKP
50 ret
51
521: // |x| >= 0x1.0p23f or NaN
53 fstp %st(0) // { }
54 cmpl $0x5f000000, %eax // |x| >= 0x1.0p63f
55 jae 3f
56
57 // |x| is non-overflowing integer (NaN ends up here eventually too)
582: movl (STACKP), %eax // low 32 bits
59 movl 4(STACKP), %edx // high 32 bits
60 ADDP $16-FRAME_SIZE, STACKP
61 ret
62
633: // |x| overflows or is NaN
64 cmpl $0x7f800000, %eax // |x| > 0x1.0p63f
65 ja 2b
66
67 // |x| overflows
68 subl $1, %edx // x < 0 ? 0x7fffffff : -1U
69 sarl $31, %edx // x < 0 ? 0 : -1U
70 movl (STACKP), %eax
71 xorl %edx, %eax // x < 0 ? low result : low result ^ -1U
72 xorl 4(STACKP), %edx
73 ADDP $16-FRAME_SIZE, STACKP
74 ret
75
764: // |x| == 0.5f - 1 ulp, return 0
77 xorl %eax, %eax
78 xorl %edx, %edx
79 ADDP $16-FRAME_SIZE, STACKP
80 ret
81
82#define LONG_MIN_f 0x4f000000
83
84 ENTRY( lroundf )
85 movl FRAME_SIZE(STACKP), %edx
86 movss FRAME_SIZE(STACKP), %xmm0
87#elif defined( __x86_64__ )
88
89#define LONG_MIN_f 0x5f000000
90
91
92 ENTRY( lroundf )
93 ENTRY( llroundf )
94 xorq %rdx, %rdx
95 movd %xmm0, %edx // |x|
96#endif
97
98 cvttss2si %xmm0, AX_P // (long) x, set invalid / inexact if necessary
99 MOVP DX_P, CX_P // x
100 and $0x7fffffff, DX_P // |x|
101 XORP DX_P, CX_P // signof( x )
102 cmpl $0x4b000000, %edx // |x| >= 0x1.0p23f or NaN
103 jae 2f
104
105 cvtsi2ss AX_P, %xmm1 // trunc(x)
106 ucomiss %xmm0, %xmm1 // x == trunc(x)
107 je 1f // return (long) x
108
109 orl $0x3f000000, %ecx // copysign( 0.5f, x )
110 movd %ecx, %xmm1 // copysign( 0.5f, x )
111 cmpl $0x3effffff, %edx // |x| == 0.5f - 1 ulp
112 je 1f // return (long) x
113
114 addss %xmm1, %xmm0 // x += copysign( 0.5, x )
115 cvttss2si %xmm0, AX_P // (int) (x + copysign( 0.5, x ) )
116
117 1: ret
118
119 2: // |x| >= 0x1.0p23f or NaN
120 cmpl $0x7f800000, %edx // |x| is NaN
121 ja 3f
122 cmpl $LONG_MIN_f, %edx // |x| < LONG_MIN
123 jb 1b
124 SUBP $1, CX_P // x < 0 ? 0x7fffffff : -1LL
125 sar $31, CX_P // x < 0 ? 0 : -1LL
126 XORP CX_P, AX_P // flip LONG_LONG_MIN to LONG_LONG_MAX if needed
127 3: ret
128
129