this repo has no description
1/*
2 * Original public domain code for expl Written by J.T. Conklin <jtc@netbsd.org>.
3 * fixed up to deliver correct edge cases by Ian Ollmann (expl)
4 *
5 * exp2l by Ian Ollmann based on algorithm by J.T. Conklin
6 *
7 * expm1l is new code by Ian Ollmann
8 */
9
10#include <machine/asm.h>
11
12#define LOCAL_STACK_SIZE 24
13#include "abi.h"
14
15
16/* e^x = 2^(x * log2(e)) */
17/*
18ENTRY(expl)
19 SUBP $LOCAL_STACK_SIZE, STACKP
20 movl $0xbe205c61, 20(STACKP) // ln2 hi (double, low 32-bits)
21 movl $0x0ca86c39, 16(STACKP) // ln2 lo (double, low 32-bits)
22 movl $0x3f317218, 8(STACKP) // ln2 hi (float)
23 movl $0x7f800000, 4(STACKP) // inf
24 movl $0x50000000, (STACKP) // limit
25
26 //test for inf or NaN
27 fldt FIRST_ARG_OFFSET(STACKP) // { f }
28 fabs // { |f| }
29 flds 4(STACKP) // { inf, |f| }
30 fucomip %ST(1), %ST(0) // { |f| } if |f| == inf or f is NaN
31 fstp %ST(0) // {}
32 fldt FIRST_ARG_OFFSET(STACKP) // { f }
33 flds (STACKP) // { limit, f }
34 je expl_special
35
36 //clip f to be between limit and -limit.
37 //If we dont do this, when we scale by log2e then it is possible that we will generate spurious overflow
38 fld %ST(0) // { limit, limit, f }
39 fchs // { -limit, limit, f }
40 fucomi %ST(2), %ST(0) // { -limit, limit, f } if( -limit < f )
41 fcmovb %ST(2), %ST(0) // { max( -limit, f ), limit, f }
42 fucomi %ST(1), %ST(0) // { max( -limit, f ), limit, f } if( max( -limit, f ) > limit )
43 fcmovnbe %ST(1), %ST(0) // { clipped, limit, f }
44 fstp %ST(2) // { limit, clipped }
45 fstp %ST(0) // { clipped }
46
47 //extended precision exp:
48 //Calculate the integer part of log2(e) * clipped
49 fldl2e // { log2e, clipped }
50 fmul %st(1), %st(0) // { log2e * clipped, clipped }
51 frndint // { n, clipped }
52
53 //figure out the fractional part. Use more than 64-bit precision so we get the exact fraction.
54 flds 8(STACKP) // { ln2 hi, n, clipped }
55 fmul %st(1), %st(0) // { ln2 hi * n, n, clipped }
56 fsubrp %st(0), %st(2) // { n, clipped - n*ln2hi }
57 fldl 16(STACKP) // { ln2 lo, n, clipped - n*ln2hi }
58 fmul %st(1), %st(0) // { ln2lo*n, n, clipped - n*ln2hi }
59 fsubrp %st(0), %st(2) // { n, fract }
60 fldl2e // { log2e, n, fract }
61 fmulp %st(0), %st(2) // { n, log2e * fract }
62
63 //Calculate e**fract
64 fxch // { log2e * fract, n }
65 f2xm1 // { 2**fract( f*log2e ) - 1, n }
66 fld1 // { 1, 2**fract( f*log2e ) - 1, n }
67 faddp // { 2**fract( f*log2e ), n }
68
69 //scale according to n
70 fscale // {result, n }
71 fstp %st(1)
72 ADDP $LOCAL_STACK_SIZE, STACKP
73 ret
74
75expl_special: //handles +-Inf, QNaN, SNaN
76 fucomip %ST(1), %ST(0) // { f } if( limit > f )
77 fldz // { 0, f }
78 fxch // { f, 0 }
79 fcmovnbe %ST(1), %ST(0) // set the -Inf case to 0
80 fadd %ST(0), %ST(0) // silence NaNs
81 fstp %ST(1)
82 ADDP $LOCAL_STACK_SIZE, STACKP
83 ret
84*/
85
86
87ENTRY(exp2l)
88 SUBP $LOCAL_STACK_SIZE, STACKP
89 movl $0x7f800000, 4(STACKP) // inf
90 movl $0x1e000000, (STACKP) // small (0x1.0p-67f)
91
92 //test for +inf, NaN
93 fldt FIRST_ARG_OFFSET(STACKP) // { f }
94 flds 4(STACKP) // { inf, f }
95 fucomip %ST(1), %ST(0) // { f }
96 je exp2l_exit // return f if Inf or NaN
97
98 //test for -inf
99 flds 4(STACKP) // { inf, f }
100 fchs // {-inf, f }
101 fucomip %ST(1), %ST(0) // { f }
102 jne exp2l_main
103 fldz // { 0, f }
104 fstp %st(1) // { f }
105 jmp exp2l_exit
106
107exp2l_main:
108 flds (STACKP) // { small, f }
109 fld %st(1) // { f, small, f }
110 fabs // { |f|, small, f }
111 fucomip %st(1), %st(0) // { small, f }
112 fstp %st(0) // { f }
113 jb exp2l_small
114 fld %st(0) // { f, f }
115 frndint // { intf, f }
116 fxch %st(1) // { f, intf }
117 fsub %st(1),%st // { fract(f), intf }
118 f2xm1 // { 2**fract(f) - 1, intf }
119 fld1 // { 1, 2**fract(f) - 1, intf }
120 faddp // { 2**fract(f), intf }
121 fscale // { result, intf }
122 fstp %st(1) // { result }
123 //fall through to exit
124
125exp2l_exit:
126 ADDP $LOCAL_STACK_SIZE, STACKP
127 ret
128
129exp2l_small: //avoid setting the underflow flag for denormal inputs
130 fld1 // {1, f }
131 faddp // {result}
132 ADDP $LOCAL_STACK_SIZE, STACKP
133 ret
134
135
136//do expm1
137 // e**x - 1 = 2**(x*log2(e)) - 1
138 //
139 // f = fract( x*log2e ) //fractional part of x*log2e
140 // i = int( x*log2e ) //integral part of x*log2e
141 //
142 // e**x - 1 = 2**( f + i ) - 1
143 // = (2**f)(2**i) - 1
144 // = (2**f)(2**i) - (2**i) + (2**i) - 1
145 // = (2**f - 1)(2**i) + (2**i) - 1
146 // (exact) (needs to be done in the right order, or not at all for i == 0)
147 //
148 // The multiplication is exact. The two adds need to be done least to greatest in order of magnitude.
149 // Since 2**f - 1 is always in the range [ -0.5, 1.0 ], we know ahead of time that (2**f-1)(2**i)
150 // always has less magnitude than (2**i). if i < 0 then add the -1 first. Otherwise, add (2**i) first
151 //
152 // There are a lot of special cases here, which is why this is a bit branchy. I tried it without
153 // branches and wasn't able to satisfy all the constraints called for by various standards
154
155#define EXPM1L_STACK_SIZE 12
156#undef FIRST_ARG_OFFSET
157#define FIRST_ARG_OFFSET (FRAME_SIZE + EXPM1L_STACK_SIZE)
158
159ENTRY(expm1l)
160 SUBP $EXPM1L_STACK_SIZE, STACKP
161 movl $0xc6000000, 8(STACKP) // -8192.0f
162 movl $0x7f800000, 4(STACKP) // inf
163 movl $0x47000000, 0(STACKP) // 32768
164
165 //test for inf or NaN
166 fldt FIRST_ARG_OFFSET(STACKP) // { x }
167 fldz // { 0, x }
168 fucomip %st(1), %st(0) // { x }
169 jnbe expm1l_neg // if 0 > x goto expm1_neg
170
171 // x >= 0...
172 // get x*log2e
173 fldl2e // { log2e, x }
174 fmulp // { x*log2e }
175 flds 4(STACKP) // { inf, x*log2e }
176 fucomip %ST(1), %ST(0) // { x*log2e } if x*log2e == inf or NaN
177 je expm1l_special
178
179 //next convert to integer and fractional parts using round to zero rounding mode
180 //calculate trunc(x*log2e)
181 flds (STACKP) // { 32768.0, x*log2e }
182 fucomi %st(1), %st(0) // { x*log2e }
183 fcmovnbe %st(1), %st(0) // { clipped, x*log2e }
184 fstp %st(1) // { clipped }
185 fld %st(0) // { clipped, clipped }
186
187#if defined( __SSE3__ )
188 fisttpll (STACKP) // { clipped }
189 fildll (STACKP) // { i, clipped }
190#else
191 //set to round to zero
192 fnstcw 4(STACKP)
193 movw 4(STACKP), %ax
194 movw %ax, %dx //save old value for later
195 orw $0x0c00, %ax
196 movw %ax, 4(STACKP)
197 fldcw 4(STACKP)
198 //round to int
199 fistpll (STACKP) // { clipped }
200 fildll (STACKP) // { i, clipped }
201 //restore old rounding mode
202 movw %dx, 4(STACKP)
203 fldcw 4(STACKP)
204#endif
205
206 //test 0 against i
207 fldz // {0, i, x*log2e}
208 fucomip %st(1), %st(0) // {i, x*log2e}
209
210 //get the fractional part of x*log2e with the same sign as x*log2e
211 fsubr %st(0), %st(1) // { i, f }
212
213 //calculate 2**f-1
214 fxch // { f, i }
215 f2xm1 // { 2**f - 1, i }
216
217 //three way switch for 0>i, i==0, i>0
218 je exmpm1l_zero
219
220 //This case is a bit trickier, because 2**i can overflow when the result does not
221 //we do this as:
222 //
223 // e**x - 1 = (2**f - 1)(2**i) + (2**i) - 1
224 // = (2**i)( (2**f-1) - 2**(-i) + 1)
225 //
226 // we clamp 2**(-i) so that it can't go lower than 2**(-8192) to prevent underflow
227 // When this happens the result is always 0x1.fffffffffffffffep-1 * 2**i, so it doesn't
228 // much matter
229 fld %st(1) // { i, 2**f-1, i}
230 fchs // {-i, 2**f-1, i}
231 flds 8(STACKP) // {-8192.0f, -i, 2**f-1, i}
232 fucomi %st(1), %st(0) // {-8192.0f, -i, 2**f-1, i}
233 fcmovb %st(1), %st(0) // { clipped(-i), -i, 2**f-1, i}
234 fstp %st(1) // { clipped(-i), 2**f-1, i}
235 fld1 // {1, clipped(-i), 2**f-1, i}
236 fscale // { 2**-i,clipped(-i), 2**f-1, i}
237 fstp %st(1) // { 2**-i, 2**f-1, i}
238 fsubrp // { 2**f-1 - 2**-i, i}
239 fld1 // { 1, 2**f-1 - 2**-i, i}
240 faddp // { 1+ 2**f-1 - 2**-i, i}
241 fscale
242 fstp %st(1)
243
244 ADDP $EXPM1L_STACK_SIZE, STACKP
245 ret
246
247
248// x < 0 or x is NaN
249expm1l_neg: // {x}
250 flds 4(STACKP) // { inf, x }
251 fchs // { -inf, x }
252 fucomip %ST(1), %ST(0) // { x } if x == -inf
253 je expm1l_special // NaNs and -Inf go to special case handling code
254
255 // clip large negative values to avoid overflow
256 flds 8(STACKP) // { -8192, x }
257 fucomi %st(1), %st(0) // { -8192, x }
258 fcmovb %st(1), %st(0) // { clipped, x }
259 fstp %st(1) // { clipped }
260
261 //scale by log2e
262 fldl2e // { log2e, clipped } //clipped a.k.a x from here on
263 fmulp // { x*log2e }
264
265 //next convert to integer and fractional parts using round to zero rounding mode
266 //calculate trunc(x*log2e)
267 fld %st(0) // { x*log2e, x*log2e }
268#if defined( __SSE3__ )
269 fisttpll (STACKP) // { x*log2e }
270 fildll (STACKP) // { i, x*log2e }
271#else
272 //set to round to zero
273 fnstcw 4(STACKP)
274 movw 4(STACKP), %ax
275 movw %ax, %dx //save old value for later
276 orw $0x0c00, %ax
277 movw %ax, 4(STACKP)
278 fldcw 4(STACKP)
279 //round to int
280 fistpll (STACKP) // { x*log2e }
281 fildll (STACKP) // { i, x*log2e }
282 //restore old rounding mode
283 movw %dx, 4(STACKP)
284 fldcw 4(STACKP)
285#endif
286
287
288 //test 0 against i
289 fldz // {0, i, x*log2e}
290 fucomip %st(1), %st(0) // {i, x*log2e}
291
292 //get the fractional part of x*log2e with the same sign as x*log2e
293 fsubr %st(0), %st(1) // { i, f }
294
295 //calculate 2**f-1
296 fxch // { f, i }
297 f2xm1 // { 2**f - 1, i }
298
299 //jump to complex code for handling the i < 0 case
300 je exmpm1l_zero
301
302
303 // In the i < 0 case, 2**i has a smaller magnitude than -1, so it is added first
304 //
305 // = (2**f - 1)(2**i) + (2**i) - 1
306 //
307 // The 2**i scaling is exact, so it can be factored out:
308 //
309 // = (2**i)(2**f-1+1) -1
310 fld1 // { 1, 2**f - 1, i }
311 faddp // { 2**f-1+1, i }
312 fscale // { (2**f-1+1)*(2**i), i }
313 fstp %st(1) // { (2**f-1+1)*(2**i) }
314 fld1 // { 1, (2**f-1+1)*(2**i) }
315 fsubrp // { result }
316 ADDP $EXPM1L_STACK_SIZE, STACKP
317 ret
318
319exmpm1l_zero: //the i == 0 case
320 fstp %st(1) // { result }
321 ADDP $EXPM1L_STACK_SIZE, STACKP
322 ret
323
324
325expm1l_special: //handles +-Inf, QNaN, SNaN
326 fldz // {0, x}
327 fucomip %ST(1), %ST(0) // { x } if( 0 > x )
328 fadd %ST(0), %ST(0) // silence NaNs
329 fld1 // { 1, x }
330 fchs // { -1, x }
331 fxch // { f, -1 }
332 fcmovnbe %ST(1), %ST(0) // set the -Inf case to -1
333 fstp %ST(1)
334 ADDP $EXPM1L_STACK_SIZE, STACKP
335 ret