this repo has no description
1/*
2 * e_cbrtl.s
3 * LibmV5
4 *
5 * Created by Ian Ollmann on 8/27/05.
6 * Copyright 2005 Apple Computer. All rights reserved.
7 *
8 */
9
10#define ENTRY(name) \
11 .globl _##name; \
12 .align 4; \
13 _##name##:
14
15#if defined( __LP64__ )
16 #define LOCAL_STACK_SIZE 20
17#else
18 #define LOCAL_STACK_SIZE 28
19#endif
20
21#include "abi.h"
22
23.const
24.align 4
25onethird: .long 0xaaaaaaab, 0xaaaaaaaa, 0x00003ffd, 0x00000000 //(long double) 1.0L/3.0L
26
27.align 3
28correction: .double 0.62996052494743658238361, 0.79370052598409973737585, 1.0, 1.2599210498948731647672, 1.5874010519681994747517
29coeffs: .double 1.7830491344381518, -1.5730724799776633, 1.2536000054780357, -0.60460822457398278, 0.15834924310704463, -0.017322841453552703
30
31.align 2
32infinity: .single +Infinity
33
34// Stack:
35// old ebp
36// old ebx
37
38
39.text
40
41#if defined( __x86_64__ )
42 #define RELATIVE_ADDR( _a) (_a)( %rip )
43#elif defined( __i386__ )
44 #define RELATIVE_ADDR( _a) (_a)-rel_addr( BX_P )
45#else
46 #error arch not supported
47#endif
48
49
50#if defined( __i386__ )
51 //a short routine to get the local address
52 local_addr:
53 MOVP (STACKP), BX_P
54 ret
55#endif
56
57
58ENTRY( cbrtl )
59 #if defined( __LP64__ )
60 SUBP $LOCAL_STACK_SIZE, STACKP
61 movl $0x55555556, 16(STACKP) //write out fixed point 1/3, align stack to 16 bytes
62 movq $0, 8(STACKP)
63 movq $0, 0(STACKP)
64 leaq (correction + 16)(%rip), %r8
65 #else
66 pushl BASEP //push ebp
67 movl STACKP, BASEP //copy stack pointer to ebp
68 pushl %ebx //push ebx
69 pushl $0x55555556 //write out fixed point 1/3, align stack to 16 bytes
70 pushl $0
71 pushl $0
72 pushl $0
73 pushl $0
74 CALLP local_addr //load the address of rel_addr into %ebx
75rel_addr:
76 #endif
77
78 //load our argument
79 fldt FIRST_ARG_OFFSET(STACKP) //{x}
80
81 //if( fabs(x) == INF || fabs(x) is NaN )
82 // return x + x
83 fld %ST(0) //{x, x}
84 fabs //{|x|, x}
85 flds RELATIVE_ADDR(infinity) //{inf, |x|, x}
86 fucomip %ST(1), %ST //{|x|, x}
87 jne test_zero
88 fstp %ST(0) //{x}
89 fadd %ST(0) //{x + x}
90 jmp my_cbrtl_exit
91
92test_zero:
93 //if( x == 0.0 )
94 // return x;
95 fldz //{0, |x|, x}
96 fucomip %ST(1), %ST //{|x|, x}
97 jne main_part //{|x|, x}
98 fstp %ST(0) //{x}
99 jmp my_cbrtl_exit //{x}
100
101main_part: //{|x|, x}
102 //extract significand and exponent parts
103 fxtract //{ |significand|, exponent, x }
104
105 //write out the exponent as an integer
106 fxch //{ exponent, |significand|, x }
107 fistpl (STACKP) //{ |significand|, x }
108
109 //apply polynomial to significand, store in s, figure out what the new exponent is
110 fld %ST(0) //{ s, |significand|, x }
111 fmull RELATIVE_ADDR(coeffs+5*8) //{ s*c5, |significand|, x }
112 movl (STACKP), %eax // load the exponent
113 faddl RELATIVE_ADDR(coeffs+4*8) //{ s*c5+c4, |significand|, x }
114 imull 16(STACKP) // divide the exponent by 3 (multiply by 0x55555556 and take high 32 bits, place in %edx)
115 movl (STACKP), %eax // get exponent >> 1
116 sarl $31, %eax
117 fmul %ST(1) //{ (c4+c5*s)s, |significand|, x }
118 faddl RELATIVE_ADDR(coeffs+3*8) //{ c3+(c4+c5*s)s, |significand|, x }
119 subl %eax, %edx // subtract the sign of the exponent (makes our approximation work for neg numbers)
120 movl %edx, %eax // copy exponent/3 to eax
121 fmul %ST(1) //{ (c3+(c4+c5*s)s)s, |significand|, x }
122 faddl RELATIVE_ADDR(coeffs+2*8) //{ c2+(c3+(c4+c5*s)s)s, |significand|, x }
123 imul $3, %edx // exponent/3 *= 3
124 fmul %ST(1) //{ (c2+(c3+(c4+c5*s)s)s)s, |significand|, x }
125 faddl RELATIVE_ADDR(coeffs+1*8) //{ c1+(c2+(c3+(c4+c5*s)s)s)s, |significand|, x }
126 subl (STACKP), %edx // remainder = (exponent/3)*3 - original exponent (edx)
127 fmul %ST(1) //{ (c1+(c2+(c3+(c4+c5*s)s)s)s)s, |significand|, x }
128 faddl RELATIVE_ADDR(coeffs+0*8) //{ c0+(c1+(c2+(c3+(c4+c5*s)s)s)s)s, |significand|, x }
129 neg %eax // exponent = -exponent
130 shld $16, %eax, %eax // exponent <<= 16
131 andl $0xFFFF0000, %eax // mask off the other mantissa bits
132 addl $0x3FFF8000, %eax // bias the exponent, set the top mantissa bit
133 movl %eax, 6(STACKP) // write out exponent/3
134
135
136 //correct for exponent remainder to get our estimate
137#if defined( __LP64__ )
138 movslq %edx, %rdx //sign extend edx
139 fmull ( %r8, %rdx, 8 ) //{ e, |significand|, x}
140#else
141 fmull correction + 16 - rel_addr( %ebx, %edx, 8 ) //{ e, |significand|, x}
142#endif
143
144 //fix up the sign of the estimate
145 fld %ST(0) //{ e, e, |significand|, x}
146 fchs //{-e, e, |significand|, x}
147 fldz //{0, -e, e, |significand|, x}
148 fucomip %ST(4), %ST //{-e, e, |significand|, x} if( 0 < x )
149 fcmovb %ST(1), %ST(0) //{+-e, e, |significand|, x}
150 fstp %ST(1) //{+-e, |significand|, x}
151 fstp %ST(1) //{+-e, x}
152
153 //apply the appropriate exponent
154 fldt (STACKP) //{ new exponent, +-e, x }
155 fmulp //{ +-e with correct exponent, x }
156
157 // e += oneThird * e * (1.0L - x * e * e * e);
158 fldt RELATIVE_ADDR( onethird) //{0.3333, e, x}
159 fld %ST(1) //{ e, 0.3333, e, x }
160 fmul %ST(3) //{ x*e, 0.3333, e, x}
161 fmul %ST(2) //{ e*x*e, 0.3333, e, x}
162 fmul %ST(2) //{ e*e*x*e, 0.3333, e, x}
163 fld1 //{1.0, e*e*x*e, 0.3333, e, x}
164 fsubp //{1.0 - e*e*x*e, 0.3333, e, x}
165 fld %ST(2) //{e, 1.0 - e*e*x*e, 0.3333, e, x}
166 fmul %ST(2) //{0.3333*e, 1.0 - e*e*x*e, 0.3333, e, x}
167 fmulp //{0.3333*e*(1.0 - e*e*x*e), 0.3333, e, x}
168 faddp %ST(0), %ST(2) //{0.3333, e+0.3333*e*(1.0 - e*e*x*e), x }
169
170 // e += oneThird * e * (1.0L - x * e * e * e);
171 fld %ST(1) //{ e, 0.3333, e, x }
172 fmul %ST(3) //{ x*e, 0.3333, e, x}
173 fmul %ST(2) //{ e*x*e, 0.3333, e, x}
174 fmul %ST(2) //{ e*e*x*e, 0.3333, e, x}
175 fld1 //{1.0, e*e*x*e, 0.3333, e, x}
176 fsubp //{1.0 - e*e*x*e, 0.3333, e, x}
177 fld %ST(2) //{e, 1.0 - e*e*x*e, 0.3333, e, x}
178 fmul %ST(2) //{0.3333*e, 1.0 - e*e*x*e, 0.3333, e, x}
179 fmulp //{0.3333*e*(1.0 - e*e*x*e), 0.3333, e, x}
180 faddp %ST(0), %ST(2) //{0.3333, e+0.3333*e*(1.0 - e*e*x*e), x }
181
182
183 // e = (e*x)*e;
184 fld %ST(1) //{e, 0.3333, e, x}
185 fmul %ST(3) //{e*x, 0.3333, e, x}
186 fmulp %ST(0), %ST(2) //{0.3333, (e*x)*e, x}
187
188 // e -= ( e - (x/(e*e)) ) * oneThird;
189 fld %ST(1) //{ e, 0.3333, e, x }
190 fmul %ST(0) //{ e*e, 0.3333, e, x}
191 fdivr %ST(3), %ST(0) //{ x/(e*e), 0.3333, e, x }
192 fsubr %ST(2), %ST(0) //{ e - x/(e*e), 0.3333, e, x }
193 fmulp //{ 0.3333*(e - x/(e*e)), e, x }
194 fsubrp %ST(0), %ST(1) //{ e - 0.3333*(e - x/(e*e)), x }
195 fstp %ST(1)
196
197my_cbrtl_exit:
198 #if defined( __i386__ )
199 movl 20(STACKP), %ebx
200 movl 24(STACKP), BASEP
201 #endif
202 ADDP $LOCAL_STACK_SIZE, STACKP
203 ret