this repo has no description
at fixPythonPipStalling 203 lines 11 kB view raw
1/* 2 * e_cbrtl.s 3 * LibmV5 4 * 5 * Created by Ian Ollmann on 8/27/05. 6 * Copyright 2005 Apple Computer. All rights reserved. 7 * 8 */ 9 10#define ENTRY(name) \ 11 .globl _##name; \ 12 .align 4; \ 13 _##name##: 14 15#if defined( __LP64__ ) 16 #define LOCAL_STACK_SIZE 20 17#else 18 #define LOCAL_STACK_SIZE 28 19#endif 20 21#include "abi.h" 22 23.const 24.align 4 25onethird: .long 0xaaaaaaab, 0xaaaaaaaa, 0x00003ffd, 0x00000000 //(long double) 1.0L/3.0L 26 27.align 3 28correction: .double 0.62996052494743658238361, 0.79370052598409973737585, 1.0, 1.2599210498948731647672, 1.5874010519681994747517 29coeffs: .double 1.7830491344381518, -1.5730724799776633, 1.2536000054780357, -0.60460822457398278, 0.15834924310704463, -0.017322841453552703 30 31.align 2 32infinity: .single +Infinity 33 34// Stack: 35// old ebp 36// old ebx 37 38 39.text 40 41#if defined( __x86_64__ ) 42 #define RELATIVE_ADDR( _a) (_a)( %rip ) 43#elif defined( __i386__ ) 44 #define RELATIVE_ADDR( _a) (_a)-rel_addr( BX_P ) 45#else 46 #error arch not supported 47#endif 48 49 50#if defined( __i386__ ) 51 //a short routine to get the local address 52 local_addr: 53 MOVP (STACKP), BX_P 54 ret 55#endif 56 57 58ENTRY( cbrtl ) 59 #if defined( __LP64__ ) 60 SUBP $LOCAL_STACK_SIZE, STACKP 61 movl $0x55555556, 16(STACKP) //write out fixed point 1/3, align stack to 16 bytes 62 movq $0, 8(STACKP) 63 movq $0, 0(STACKP) 64 leaq (correction + 16)(%rip), %r8 65 #else 66 pushl BASEP //push ebp 67 movl STACKP, BASEP //copy stack pointer to ebp 68 pushl %ebx //push ebx 69 pushl $0x55555556 //write out fixed point 1/3, align stack to 16 bytes 70 pushl $0 71 pushl $0 72 pushl $0 73 pushl $0 74 CALLP local_addr //load the address of rel_addr into %ebx 75rel_addr: 76 #endif 77 78 //load our argument 79 fldt FIRST_ARG_OFFSET(STACKP) //{x} 80 81 //if( fabs(x) == INF || fabs(x) is NaN ) 82 // return x + x 83 fld %ST(0) //{x, x} 84 fabs //{|x|, x} 85 flds RELATIVE_ADDR(infinity) //{inf, |x|, x} 86 fucomip %ST(1), %ST //{|x|, x} 87 jne test_zero 88 fstp %ST(0) //{x} 89 fadd %ST(0) //{x + x} 90 jmp my_cbrtl_exit 91 92test_zero: 93 //if( x == 0.0 ) 94 // return x; 95 fldz //{0, |x|, x} 96 fucomip %ST(1), %ST //{|x|, x} 97 jne main_part //{|x|, x} 98 fstp %ST(0) //{x} 99 jmp my_cbrtl_exit //{x} 100 101main_part: //{|x|, x} 102 //extract significand and exponent parts 103 fxtract //{ |significand|, exponent, x } 104 105 //write out the exponent as an integer 106 fxch //{ exponent, |significand|, x } 107 fistpl (STACKP) //{ |significand|, x } 108 109 //apply polynomial to significand, store in s, figure out what the new exponent is 110 fld %ST(0) //{ s, |significand|, x } 111 fmull RELATIVE_ADDR(coeffs+5*8) //{ s*c5, |significand|, x } 112 movl (STACKP), %eax // load the exponent 113 faddl RELATIVE_ADDR(coeffs+4*8) //{ s*c5+c4, |significand|, x } 114 imull 16(STACKP) // divide the exponent by 3 (multiply by 0x55555556 and take high 32 bits, place in %edx) 115 movl (STACKP), %eax // get exponent >> 1 116 sarl $31, %eax 117 fmul %ST(1) //{ (c4+c5*s)s, |significand|, x } 118 faddl RELATIVE_ADDR(coeffs+3*8) //{ c3+(c4+c5*s)s, |significand|, x } 119 subl %eax, %edx // subtract the sign of the exponent (makes our approximation work for neg numbers) 120 movl %edx, %eax // copy exponent/3 to eax 121 fmul %ST(1) //{ (c3+(c4+c5*s)s)s, |significand|, x } 122 faddl RELATIVE_ADDR(coeffs+2*8) //{ c2+(c3+(c4+c5*s)s)s, |significand|, x } 123 imul $3, %edx // exponent/3 *= 3 124 fmul %ST(1) //{ (c2+(c3+(c4+c5*s)s)s)s, |significand|, x } 125 faddl RELATIVE_ADDR(coeffs+1*8) //{ c1+(c2+(c3+(c4+c5*s)s)s)s, |significand|, x } 126 subl (STACKP), %edx // remainder = (exponent/3)*3 - original exponent (edx) 127 fmul %ST(1) //{ (c1+(c2+(c3+(c4+c5*s)s)s)s)s, |significand|, x } 128 faddl RELATIVE_ADDR(coeffs+0*8) //{ c0+(c1+(c2+(c3+(c4+c5*s)s)s)s)s, |significand|, x } 129 neg %eax // exponent = -exponent 130 shld $16, %eax, %eax // exponent <<= 16 131 andl $0xFFFF0000, %eax // mask off the other mantissa bits 132 addl $0x3FFF8000, %eax // bias the exponent, set the top mantissa bit 133 movl %eax, 6(STACKP) // write out exponent/3 134 135 136 //correct for exponent remainder to get our estimate 137#if defined( __LP64__ ) 138 movslq %edx, %rdx //sign extend edx 139 fmull ( %r8, %rdx, 8 ) //{ e, |significand|, x} 140#else 141 fmull correction + 16 - rel_addr( %ebx, %edx, 8 ) //{ e, |significand|, x} 142#endif 143 144 //fix up the sign of the estimate 145 fld %ST(0) //{ e, e, |significand|, x} 146 fchs //{-e, e, |significand|, x} 147 fldz //{0, -e, e, |significand|, x} 148 fucomip %ST(4), %ST //{-e, e, |significand|, x} if( 0 < x ) 149 fcmovb %ST(1), %ST(0) //{+-e, e, |significand|, x} 150 fstp %ST(1) //{+-e, |significand|, x} 151 fstp %ST(1) //{+-e, x} 152 153 //apply the appropriate exponent 154 fldt (STACKP) //{ new exponent, +-e, x } 155 fmulp //{ +-e with correct exponent, x } 156 157 // e += oneThird * e * (1.0L - x * e * e * e); 158 fldt RELATIVE_ADDR( onethird) //{0.3333, e, x} 159 fld %ST(1) //{ e, 0.3333, e, x } 160 fmul %ST(3) //{ x*e, 0.3333, e, x} 161 fmul %ST(2) //{ e*x*e, 0.3333, e, x} 162 fmul %ST(2) //{ e*e*x*e, 0.3333, e, x} 163 fld1 //{1.0, e*e*x*e, 0.3333, e, x} 164 fsubp //{1.0 - e*e*x*e, 0.3333, e, x} 165 fld %ST(2) //{e, 1.0 - e*e*x*e, 0.3333, e, x} 166 fmul %ST(2) //{0.3333*e, 1.0 - e*e*x*e, 0.3333, e, x} 167 fmulp //{0.3333*e*(1.0 - e*e*x*e), 0.3333, e, x} 168 faddp %ST(0), %ST(2) //{0.3333, e+0.3333*e*(1.0 - e*e*x*e), x } 169 170 // e += oneThird * e * (1.0L - x * e * e * e); 171 fld %ST(1) //{ e, 0.3333, e, x } 172 fmul %ST(3) //{ x*e, 0.3333, e, x} 173 fmul %ST(2) //{ e*x*e, 0.3333, e, x} 174 fmul %ST(2) //{ e*e*x*e, 0.3333, e, x} 175 fld1 //{1.0, e*e*x*e, 0.3333, e, x} 176 fsubp //{1.0 - e*e*x*e, 0.3333, e, x} 177 fld %ST(2) //{e, 1.0 - e*e*x*e, 0.3333, e, x} 178 fmul %ST(2) //{0.3333*e, 1.0 - e*e*x*e, 0.3333, e, x} 179 fmulp //{0.3333*e*(1.0 - e*e*x*e), 0.3333, e, x} 180 faddp %ST(0), %ST(2) //{0.3333, e+0.3333*e*(1.0 - e*e*x*e), x } 181 182 183 // e = (e*x)*e; 184 fld %ST(1) //{e, 0.3333, e, x} 185 fmul %ST(3) //{e*x, 0.3333, e, x} 186 fmulp %ST(0), %ST(2) //{0.3333, (e*x)*e, x} 187 188 // e -= ( e - (x/(e*e)) ) * oneThird; 189 fld %ST(1) //{ e, 0.3333, e, x } 190 fmul %ST(0) //{ e*e, 0.3333, e, x} 191 fdivr %ST(3), %ST(0) //{ x/(e*e), 0.3333, e, x } 192 fsubr %ST(2), %ST(0) //{ e - x/(e*e), 0.3333, e, x } 193 fmulp //{ 0.3333*(e - x/(e*e)), e, x } 194 fsubrp %ST(0), %ST(1) //{ e - 0.3333*(e - x/(e*e)), x } 195 fstp %ST(1) 196 197my_cbrtl_exit: 198 #if defined( __i386__ ) 199 movl 20(STACKP), %ebx 200 movl 24(STACKP), BASEP 201 #endif 202 ADDP $LOCAL_STACK_SIZE, STACKP 203 ret