this repo has no description
1
2/*
3 * expl.s
4 *
5 * by Ian Ollmann
6 *
7 * C99 expl for Mac OS X for Intel
8 */
9
10#include "abi.h"
11#include "machine/asm.h"
12
13.literal4
14inf: .long 0x7f800000
15minf: .long 0xff800000
16
17.const
18.align 4
19maxldbl: .quad 0xffffffffffffffff, 0x7ffe
20minldbl: .quad 0x8000000000000000, 0x0001
21ln2_hi: .quad 0xB17217F7D1D00000, 0x3ffe
22ln2_lo: .quad 0x8654361C4C67FC0D, 0xbFCE
23
24
25#if defined( __i386__ )
26 #define RELATIVE_ADDR( _a ) ((_a) - 0b)(%ecx)
27#else
28 #define RELATIVE_ADDR( _a ) _a(%rip)
29#endif
30
31.text
32ENTRY( expl )
33 fldt FRAME_SIZE( STACKP ) // { x }
34 movswl 8+FRAME_SIZE( STACKP ), %eax // sign + exponent
35 movl %eax, %edx // sign + exponent
36 andl $0x7fff, %eax // exponent
37 subl $(16383-64), %eax // subtract out most of the bias -- push expoents less than -64 negative
38 cmpl $(64+14), %eax // if( exponent < -64 || exponent > 14 || isnan(x)
39 ja 2f // goto 2
40
41 fldl2e // { log2(e), x }
42 andl $0x80000000, %edx // signof (x)
43 fmul %st(1), %st(0) // { x * log2(e), x }
44 orl $0x3f000000, %edx // copysign( 0.5f, x )
45 movl %edx, FRAME_SIZE(STACKP)
46 fadds FRAME_SIZE( STACKP ) // { x * log2(e) + copysign( 0.5, x ), x }
47
48 //truncate the top of stack
49#if defined( __SSE3__ )
50 fisttpl FRAME_SIZE(STACKP) // { x }
51 movl FRAME_SIZE(STACKP), %edx
52 fildl FRAME_SIZE(STACKP) // { round(2(x * log2(e)), x }
53#else
54 fstpl FRAME_SIZE(STACKP) // { x }
55 cvtsd2si FRAME_SIZE(STACKP), %edx
56 movl %edx, FRAME_SIZE(STACKP)
57 fildl FRAME_SIZE(STACKP) // { round(2(x * log2(e)), x }
58#endif
59
60 // PIC
61#if defined( __i386__ )
62 call 0f
630: popl %ecx
64#endif
65
66 // reduce x to the fractional part
67 fldt RELATIVE_ADDR(ln2_hi) // { ln(2) hi part, round(2(x * log2(e)), x }
68 fmul %st(1), %st(0) // { ln(2) hi part * round(2(x * log2(e)), round(2(x * log2(e)), x }
69 fsubrp %st(0), %st(2) // { round(2(x * log2(e)), x - log2(e) hi part * round(2(x * log2(e)) }
70 fldt RELATIVE_ADDR(ln2_lo) // { ln2
71 fmul %st(1), %st(0) // { round(2(x * log2(e)) * ln(2) lo part, round(2(x * log2(e)), x - log2(e) hi part * round(2(x * log2(e)) }
72 fsubrp %st(0), %st(2) // { round(2(x * log2(e)), reduced x }
73 fxch // { reduced x, round(2(x * log2(e)) }
74
75 // multiply by log2(e)
76 fldl2e // { log2(e), x - ln(2) hi part * round(2(x * log2(e)) - round(2(x * log2(e)) * ln(2) lo part }
77 fmulp // { log2(e) * reduced x, round(2(x * log2(e)) }
78
79 // calculate 2**(log2(e) * reduced x )
80 f2xm1 // { 2**f - 1, round(2(x * log2(e)) }
81 fld1 // { 1, 2**f - 1, round(2(x * log2(e)) }
82 faddp // { 2**f, round(2(x * log2(e)) }
83
84 // scale results
85 fscale // { result, , round(2(x * log2(e)) }
86 fstp %st(1)
87
88 // The scale may fail to set underflow. Check to see if we need to do that
89 fldt RELATIVE_ADDR( minldbl ) // { 0x1.0p-16382, result }
90 fucomip %st(1), %st(0) // { 0x1.0p-16382, result }
91 ja 1f
92
93 // Nope! We're done
94 ret
95
961: // set underflow
97 fldt RELATIVE_ADDR( minldbl ) // { 0x1.0p-16382, result }
98 fmul %st(0), %st(0)
99 fstp %st(0)
100 ret
101
102//0x1.p-64 > |x| || 0x1.p14 < |x| || isnan(x)
1032: jg 3f
104
105 // 0x1.p-64 > |x|
106 fld1 // { 1, x }
107 faddp // { 1 + x }
108 ret
109
110
111// 0x1.p14 < |x| || isnan(x)
1123:
113#if defined( __i386__ )
114 call 0f
1150: popl %ecx
116#endif
117 cmpl $0, %edx // if ( x < 0 )
118 jl 4f // goto 4
119
120 // 0x1.p14 < x || isnan(x)
121 fldt RELATIVE_ADDR(maxldbl) // { LDBL_MAX, x }
122 fmulp // { x * LDBL_MAX } set overflow, Inf or quiet nan
123 ret
124
125
126 // -0x1.p14 > x || isnan(x)
1274: flds RELATIVE_ADDR( minf ) // { -inf, x } set overflow, Inf or quiet nan
128 fucomip %st(1), %st(0) // { x }
129 je 6f
130
131 // set underflow, return 0
1325: fldt RELATIVE_ADDR( minldbl ) // { LDBL_MIN, x }
133 fmul %st(0), %st(1) // { LDBL_MIN, LDBL_MIN * x }
134 fmul %st(0), %st(1) // { LDBL_MIN, LDBL_MIN * x }
135 fchs // { -LDBL_MIN, LDBL_MIN * x }
136 fmulp // { -LDBL_MIN * (LDBL_MIN * x) }
137 ret
138
139
140 // x == -inf or nan
1416: jp 5b
142
143 // x == -inf
144 fstp %st(0)
145 fldz
146 ret
147
148
149
150
151ENTRY( expl$fenv_access_off )
152 fldt FRAME_SIZE( STACKP ) // { x }
153 movswl 8+FRAME_SIZE( STACKP ), %eax // sign + exponent
154 movl %eax, %edx // sign + exponent
155 andl $0x7fff, %eax // exponent
156 subl $(16383-64), %eax // subtract out most of the bias -- push expoents less than -64 negative
157 cmpl $(64+14), %eax // if( exponent < -64 || exponent > 14 || isnan(x)
158 ja 2f // goto 2
159
160 fldl2e // { log2(e), x }
161 andl $0x80000000, %edx // signof (x)
162 fmul %st(1), %st(0) // { x * log2(e), x }
163 orl $0x3f000000, %edx // copysign( 0.5f, x )
164 movl %edx, FRAME_SIZE(STACKP)
165 fadds FRAME_SIZE( STACKP ) // { x * log2(e) + copysign( 0.5, x ), x }
166
167 //truncate the top of stack
168#if defined( __SSE3__ )
169 fisttpl FRAME_SIZE(STACKP) // { x }
170 movl FRAME_SIZE(STACKP), %edx
171 fildl FRAME_SIZE(STACKP) // { round(2(x * log2(e)), x }
172#else
173 fstpl FRAME_SIZE(STACKP) // { x }
174 cvtsd2si FRAME_SIZE(STACKP), %edx
175 movl %edx, FRAME_SIZE(STACKP)
176 fildl FRAME_SIZE(STACKP) // { round(2(x * log2(e)), x }
177#endif
178
179 // PIC
180#if defined( __i386__ )
181 call 0f
1820: popl %ecx
183#endif
184
185 // reduce x to the fractional part
186 flds RELATIVE_ADDR(ln2_hi) // { ln(2) hi part, round(2(x * log2(e)), x }
187 fmul %st(1), %st(0) // { ln(2) hi part * round(2(x * log2(e)), round(2(x * log2(e)), x }
188 fsubrp %st(0), %st(2) // { round(2(x * log2(e)), x - log2(e) hi part * round(2(x * log2(e)) }
189 fldt RELATIVE_ADDR(ln2_lo) // { ln2
190 fmul %st(1), %st(0) // { round(2(x * log2(e)) * ln(2) lo part, round(2(x * log2(e)), x - log2(e) hi part * round(2(x * log2(e)) }
191 fsubrp %st(0), %st(2) // { round(2(x * log2(e)), reduced x }
192 fxch // { reduced x, round(2(x * log2(e)) }
193
194 // multiply by log2(e)
195 fldl2e // { log2(e), x - ln(2) hi part * round(2(x * log2(e)) - round(2(x * log2(e)) * ln(2) lo part }
196 fmulp // { log2(e) * reduced x, round(2(x * log2(e)) }
197
198 // calculate 2**(log2(e) * reduced x )
199 f2xm1 // { 2**f - 1, round(2(x * log2(e)) }
200 fld1 // { 1, 2**f - 1, round(2(x * log2(e)) }
201 faddp // { 2**f, round(2(x * log2(e)) }
202
203 // scale results
204 fscale // { result, , round(2(x * log2(e)) }
205 fstp %st(1)
206 ret
207
208
209//0x1.p-64 > |x| || 0x1.p14 < |x| || isnan(x)
2102: jg 3f
211
212 // 0x1.p-64 > |x|
213 fld1 // { 1, x }
214 faddp // { 1 + x }
215 ret
216
217
218// 0x1.p14 < |x| || isnan(x)
2193:
220#if defined( __i386__ )
221 call 0f
2220: popl %ecx
223#endif
224 cmpl $0, %edx // if ( x < 0 )
225 jl 4f // goto 4
226
227 // 0x1.p14 < x || isnan(x)
228 fldt RELATIVE_ADDR(maxldbl) // { LDBL_MAX, x }
229 fmulp // { x * LDBL_MAX } set overflow, Inf or quiet nan
230 ret
231
232
233 // -0x1.p14 > x || isnan(x)
2344: flds RELATIVE_ADDR( minf ) // { -inf, x } set overflow, Inf or quiet nan
235 fucomip %st(1), %st(0) // { x }
236 je 6f
237
238 // set underflow, return 0
2395: fldt RELATIVE_ADDR( minldbl ) // { LDBL_MIN, x }
240 fmul %st(0), %st(1) // { LDBL_MIN, LDBL_MIN * x }
241 fmul %st(0), %st(1) // { LDBL_MIN, LDBL_MIN * x }
242 fchs // { -LDBL_MIN, LDBL_MIN * x }
243 fmulp // { -LDBL_MIN * (LDBL_MIN * x) }
244 ret
245
246
247 // x == -inf or nan
2486: jp 5b
249
250 // x == -inf
251 fstp %st(0)
252 fldz
253 ret
254
255