this repo has no description
at fixPythonPipStalling 468 lines 18 kB view raw
1/* 2 * xmm_exp.c 3 * xmmLibm 4 * 5 * Created by Ian Ollmann, Ph.D. on 6/30/05. 6 * Copyright © 2005 Apple Computer, Inc. All rights reserved. 7 * 8 * Constants from original typing by Earl Killian of MIPS on March 23rd, 1992. 9 * Converted from pairs of 32-bit hexadecimal words to C99 notation, by Ian Ollmann 10 * 11 * Algorithm from Peter Tang: 12 * 13 * ACM Transactions on Mathematical Software, Vol 15, 2, June 1989, pp 144-157 14 * ACM Transactions on Mathematical Software, Vol 18, 2, June 1992, pp 211-222 15 * 16 */ 17 18 19#include "xmmLibm_prefix.h" 20 21#include "math.h" 22#include "fenv.h" 23 24 25#pragma mark exp 26 27//Functions implemented here 28static inline double _xexp( double x ) ALWAYS_INLINE; 29 30static const hexdouble S_Lead[32] = 31 { { 0x3FF0000000000000ULL }, 32 { 0x3FF059B0D3158540ULL }, 33 { 0x3FF0B5586CF98900ULL }, 34 { 0x3FF11301D0125B40ULL }, 35 { 0x3FF172B83C7D5140ULL }, 36 { 0x3FF1D4873168B980ULL }, 37 { 0x3FF2387A6E756200ULL }, 38 { 0x3FF29E9DF51FDEC0ULL }, 39 { 0x3FF306FE0A31B700ULL }, 40 { 0x3FF371A7373AA9C0ULL }, 41 { 0x3FF3DEA64C123400ULL }, 42 { 0x3FF44E0860618900ULL }, 43 { 0x3FF4BFDAD5362A00ULL }, 44 { 0x3FF5342B569D4F80ULL }, 45 { 0x3FF5AB07DD485400ULL }, 46 { 0x3FF6247EB03A5580ULL }, 47 { 0x3FF6A09E667F3BC0ULL }, 48 { 0x3FF71F75E8EC5F40ULL }, 49 { 0x3FF7A11473EB0180ULL }, 50 { 0x3FF82589994CCE00ULL }, 51 { 0x3FF8ACE5422AA0C0ULL }, 52 { 0x3FF93737B0CDC5C0ULL }, 53 { 0x3FF9C49182A3F080ULL }, 54 { 0x3FFA5503B23E2540ULL }, 55 { 0x3FFAE89F995AD380ULL }, 56 { 0x3FFB7F76F2FB5E40ULL }, 57 { 0x3FFC199BDD855280ULL }, 58 { 0x3FFCB720DCEF9040ULL }, 59 { 0x3FFD5818DCFBA480ULL }, 60 { 0x3FFDFC97337B9B40ULL }, 61 { 0x3FFEA4AFA2A490C0ULL }, 62 { 0x3FFF50765B6E4540ULL } }; 63 64static const hexdouble S_Trail[32] = 65 { { 0x0000000000000000ULL }, 66 { 0x3D0A1D73E2A475B4ULL }, 67 { 0x3CEEC5317256E308ULL }, 68 { 0x3CF0A4EBBF1AED93ULL }, 69 { 0x3D0D6E6FBE462876ULL }, 70 { 0x3D053C02DC0144C8ULL }, 71 { 0x3D0C3360FD6D8E0BULL }, 72 { 0x3D009612E8AFAD12ULL }, 73 { 0x3CF52DE8D5A46306ULL }, 74 { 0x3CE54E28AA05E8A9ULL }, 75 { 0x3D011ADA0911F09FULL }, 76 { 0x3D068189B7A04EF8ULL }, 77 { 0x3D038EA1CBD7F621ULL }, 78 { 0x3CBDF0A83C49D86AULL }, 79 { 0x3D04AC64980A8C8FULL }, 80 { 0x3CD2C7C3E81BF4B7ULL }, 81 { 0x3CE921165F626CDDULL }, 82 { 0x3D09EE91B8797785ULL }, 83 { 0x3CDB5F54408FDB37ULL }, 84 { 0x3CF28ACF88AFAB35ULL }, 85 { 0x3CFB5BA7C55A192DULL }, 86 { 0x3D027A280E1F92A0ULL }, 87 { 0x3CF01C7C46B071F3ULL }, 88 { 0x3CFC8B424491CAF8ULL }, 89 { 0x3D06AF439A68BB99ULL }, 90 { 0x3CDBAA9EC206AD4FULL }, 91 { 0x3CFC2220CB12A092ULL }, 92 { 0x3D048A81E5E8F4A5ULL }, 93 { 0x3CDC976816BAD9B8ULL }, 94 { 0x3CFEB968CAC39ED3ULL }, 95 { 0x3CF9858F73A18F5EULL }, 96 { 0x3C99D3E12DD8A18BULL } }; 97 98static const double scalars_data[2] = { 0x1.0000000000000p+54, 0x1.0000000000000p-54 }; 99static const int32_t exponents_data[2] = { 0x7920, 0x86a0 }; 100static const double * const scalars = &scalars_data[1]; 101 102static const xDouble xNaN = { NAN, NAN }; 103static const xDouble xInfinity = { INFINITY, INFINITY }; 104static const xDouble minusOneD = { -1.0, -1.0 }; 105static const DoubleConstant_sd Inv_L = GET_CONSTANT_sd(0x1.71547652B82FEp+5); //46.1662413084468283841 106static const DoubleConstant_sd A0 = GET_CONSTANT_sd(0x1.0000000000000p-1); // 0.5 107static const DoubleConstant_sd L1 = GET_CONSTANT_sd(0x1.62E42FEF00000p-6); //0.0216608493901730980724 108static const DoubleConstant_sd L2 = GET_CONSTANT_sd(-0x1.473DE6AF278EDp-39); //-2.32519284687887401481e-12 109static const DoubleConstant_sd A1 = GET_CONSTANT_sd(0x1.5555555548F7Cp-3); //0.166666666665260865265 110static const DoubleConstant_sd A2 = GET_CONSTANT_sd(0x1.5555555545D4Ep-5); //0.0416666666662260792853 111static const DoubleConstant_sd A3 = GET_CONSTANT_sd(0x1.11115B7AA905Ep-7); //0.00833336798434219580556 112static const DoubleConstant_sd A4 = GET_CONSTANT_sd(0x1.6C1728D739765p-10); //0.00138889490863777190395 113static const DoubleConstant_sd oneD = GET_CONSTANT_sd( 1.0 ); 114static const double largeD = 0x1.fffffffffffffp1022; 115static const double tinyD = 0x1.0p-1022; 116 117static inline double _xexp( double _x ) 118{ 119 if( _x != _x || _x == __builtin_inf() ) 120 return _x + _x; 121 122 123 static const double X1 = 0x1.62e42fefa39f0p+9; // 7.09782712893384087e+02 124 static const double X2 = -745.5; 125 static const double c0 = 1.5e-154; 126 static const double half = 0.5; 127 static const double infD = INFINITY; 128 xDouble x = DOUBLE_2_XDOUBLE( _x ); 129 xDouble fabsx = _mm_andnot_pd( minusZeroD, x ); 130 131 xDouble xLTx1 = _mm_cmplt_sdm( x, &X1 ); 132 xDouble xGTx2 = _mm_cmpgt_sdm( x, &X2 ); 133 xDouble xIsSmall = _mm_cmplt_sdm( fabsx, &c0 ); 134 xDouble branchtest = _mm_andnot_pd( xIsSmall, _mm_and_pd( xLTx1, xGTx2 ) ); 135 xDouble result; 136 137 // special case handling 138 if( _mm_isfalse_sd( branchtest)) 139 { 140 xDouble tiny = _mm_load_sd( &tinyD ); 141 xDouble one = _mm_load_sd( &oneD ); 142 xDouble minusInf = _mm_or_pd( minusZeroD, _mm_load_sd( &infD) ); 143 144 //if |x| < 1.5e-154, set inexact, return 1.0, unless -Inf, then go straight to zero 145 result = _mm_andnot_pd( _mm_cmpeq_sd( x, minusInf ), _mm_add_sd( x, one ) ); 146 147 //if x >= X1.d, set inexact and overflow, return +Inf 148 xDouble xGEx1 = _mm_cmpge_sdm( x, &X1 ); 149 xDouble multiplier = _mm_sel_pd( one, _mm_load_sd( &largeD ), xGEx1 ); 150 151 //if x <= -745.5, set inexact and underflow, return 0.0 152 xDouble xLEx2 = _mm_cmple_sdm( x, &X2 ); 153 multiplier = _mm_andnot_pd( xLEx2, multiplier ); 154 multiplier = _mm_or_pd( multiplier, tiny ); 155 156 //scale the result appropriately 157 result = _mm_andnot_pd( minusZeroD, result ); //results of exp are always positive 158 result = _mm_mul_sd( result, multiplier ); 159 result = _mm_mul_sd( result, multiplier ); 160 result = _mm_mul_sd( result, multiplier ); 161 162 return XDOUBLE_2_DOUBLE( result ); 163 } 164 165 xDouble mask = _mm_cmpgt_sdm( x, (const double*) &minusZeroD ); //if (x > 0 )... was //if( (x > 1.5e-154) && (x < X1)) 166 xDouble sign = _mm_and_pd( x, minusZeroD ); 167 xDouble dN =_mm_mul_sdm( x, &Inv_L ); 168 dN = _mm_add_sd( dN, _mm_or_pd( sign, _mm_load_sd( &half ) ) ); 169 int N = _mm_cvttsd_si32( dN ); 170 int N2 = N & 0x1F; // N2 = N & 0x1f; // N2 = N mod 32 with N2 in [0..31] even for N < 0 171 dN = _mm_cvtsi32_sd( minusZeroD, N ); //double(N) 172 xDouble R1 = _mm_sub_sd( x, _mm_mul_sdm( dN, &L1 ) ); // R1 = x - N * L1.d; /* Leading part of the reduced arg */ 173 xDouble R2 = _mm_mul_sdm( dN, &L2 ); // Trailing part of the reduced arg 174 xDouble R = _mm_add_sd( R1, R2 ); // Reduced arg 175 xDouble P, Q; 176 int index = _mm_cvtsi128_si32((xSInt32)mask); 177 xDouble scalar = _mm_load_sd( &scalars[ index ] ); 178// Q = R*R*(A[0].d + R*(A[1].d + R*( A[2].d + R*( A[3].d + R*A[4].d)))); 179 Q = _mm_add_sdm( _mm_mul_sdm( R, &A4 ), &A3 ); 180 Q = _mm_add_sdm( _mm_mul_sd( Q, R ), &A2 ); 181 Q = _mm_add_sdm( _mm_mul_sd( Q, R ), &A1 ); 182 Q = _mm_add_sdm( _mm_mul_sd( Q, R ), &A0 ); 183 Q = _mm_mul_sd( _mm_mul_sd( Q, R ), R ); 184 185// P = R1 + ( R2 + Q ); 186 P = _mm_add_sd( _mm_add_sd( Q, R2 ), R1 ); 187 188 R1 = _mm_load_sd( &S_Lead[N2].d ); //R1 = S_Lead[N2].d; 189 R2 = _mm_load_sd( &S_Trail[N2].d ); //R2 = S_Trail[N2].d; 190 191// x = SCALB((R1 + (R2 + P*(R1 + R2)))*m54.d, (N - N2)/32 + 54); 192 // t.d = 0.0; 193 // t.i[0] = ((N-N2) + 0x86a0) << 15; 194 N = N - N2; 195 N += exponents_data[ index + 1]; //N = N - N2 + 0x86a0 196 dN = (__m128d) _mm_cvtsi32_si128( N ); 197 dN = (__m128d) _mm_slli_epi64( (__m128i) dN, 15 + 32 ); //N <<= 15 198 // result = ((R1 + (R2 + P*(R1 + R2)))*m54.d)*t.d; 199 // note, must preserve order of operations here to keep precision 200 R = _mm_add_sd( R1, R2 ); 201 result = _mm_mul_sd( P, R ); 202 result = _mm_add_sd( result, R2 ); 203 result = _mm_add_sd( result, R1 ); 204 result = _mm_mul_sd( result, scalar ); 205 result = _mm_mul_sd( result, dN ); 206 207 return XDOUBLE_2_DOUBLE( result ); 208} 209 210 211static inline double _xexp2( double x ) ALWAYS_INLINE; 212static inline double _xexp2( double x ) 213{ 214 static const double conversion = 6.9314718055994530942E-1; 215 216 //Early out for NaNs to avoid problems with invalid exceptions in next compare 217 if( x != x ) 218 return x + x; 219 220 //avoid underflow errors 221 if( __builtin_fabs(x) < 0x1.0p-1020 ) 222 x = 0x1.0p-1020; 223 224 return _xexp( x * conversion ); 225} 226 227 228#if ! defined( BUILDING_FOR_CARBONCORE_LEGACY ) 229/* 230double exp ( double x ) 231{ 232 return _xexp( x ); 233} 234 */ 235 236/* 237float expf( float x ) 238{//cheesy fallback on double, that probably fails to get the edge cases right. 239 return (float) _xexp( x ); 240} 241*/ 242 243#pragma mark - 244#pragma mark expm1 245 246static const hexdouble Em1_A[9] = 247 { { 0x3FC5555555555549ULL }, 248 { 0x3FA55555555554B6ULL }, 249 { 0x3F8111111111A9F3ULL }, 250 { 0x3F56C16C16CE14C6ULL }, 251 { 0x3F2A01A01159DD2DULL }, 252 { 0x3EFA019F635825C4ULL }, 253 { 0x3EC71E14BFE3DB59ULL }, 254 { 0x3E928295484734EAULL }, 255 { 0x3E5A2836AA646B96ULL } }; 256 257static inline double _xexpm1( double x ) ALWAYS_INLINE; 258 259static inline double _xexpm1( double _x ) 260{ 261 //Get the NaN and +inf cases out of the way 262 if( EXPECT_FALSE( _x != _x ) || _x == __builtin_inf() ) 263 return _x + _x; 264 265 xDouble x = DOUBLE_2_XDOUBLE( _x ); 266 267 //get old fp env, and set the default one 268 static const DoubleConstant_sd Em1_Tiny = GET_CONSTANT_sd(0x1.0000000000000p-54); //5.55111512312578270212e-17 269 static const DoubleConstant_sd Em1_Pos = GET_CONSTANT_sd(0x1.C4474E1726455p+10); //1809.11414126145723458 270 static const DoubleConstant_sd Em1_Neg = GET_CONSTANT_sd(-0x1.2B708872320E1p+5); //-37.4299477502370407933 271 static const DoubleConstant_sd Em1_T1 = GET_CONSTANT_sd(-0x1.269621134DB93p-2); //-0.287682072451780956879, log(1-1/4) 272 static const DoubleConstant_sd Em1_T2 = GET_CONSTANT_sd(0x1.C8FF7C79A9A22p-3); //0.223143551314209764858, log(1+1/4) 273 static const DoubleConstant_sd twoTOn7 = GET_CONSTANT_sd(0x1.0000000000000p-7); // 0.0078125, 2**-7 274 static const DoubleConstant_sd half = GET_CONSTANT_sd( 0.5 ); 275 xDouble fabsx = _mm_andnot_pd( minusZeroD, x ); 276 xDouble xGEem1Neg = _mm_cmpge_sdm( x, &Em1_Neg ); 277 xDouble xLEem1Pos = _mm_cmple_sdm( x, &Em1_Pos ); 278 279 // special case handling 280 if( _mm_isfalse_sd( _mm_and_pd( xLEem1Pos, xGEem1Neg ))) 281 { 282 static const xDouble tiny = { 0x1.0p-1022, 0 }; 283 static const xDouble maxFinite = { 0x1.FFFFFFFFFFFFFp1023, 0 }; 284 static const xDouble mOneD = { -1.0, 0 }; 285 xDouble xLTzero = _mm_cmplt_sdm( x, (double*) &minusZeroD ); 286 xDouble xEQmInf = _mm_cmpeq_sdm( fabsx, (double*) &xInfinity ); 287 xDouble v = _mm_sel_pd( maxFinite, mOneD, xLTzero ); 288 xDouble v2 = _mm_sel_pd( maxFinite, _mm_andnot_pd( xEQmInf, tiny), xLTzero ); 289 x = _mm_add_sd( v, v2 ); 290 291 return XDOUBLE_2_DOUBLE( x ); 292 } 293 294 xDouble xGEem1_t2 = _mm_cmpge_sdm( x, &Em1_T2 ); 295 xDouble xLEem1_t1 = _mm_cmple_sdm( x, &Em1_T1 ); 296 if( _mm_istrue_sd( _mm_or_pd( xGEem1_t2, xLEem1_t1 ) ) ) //if ((x >= Em1_T2.d) || (x <= Em1_T1.d)) 297 { 298 //int N = x*Inv_L.d + (x>0 ? 0.5 : -0.5); // N = rint(x*Inv_L.d) 299 //int N2 = N & 0x1f; // N2 = N mod 32 with N2 in [0..31] even for N < 0 300 //int M = (N - N2) / 32; 301 xDouble dN = _mm_mul_sdm( x, &Inv_L ); // dN = x * Inv_L 302 xDouble signedHalf = _mm_sel_pd( _mm_load_sd( &half ), x, minusZeroD ); 303 dN = _mm_add_sd( dN, signedHalf ); 304 int N = _mm_cvttsd_si32( dN ); // N = (int) rint( dN ), round to nearest mode used 305 int N2 = N & 0x1f; // N2 = N mod 32 with N2 in [0..31] even for N < 0 306 int M = (N - N2) / 32; 307 dN = _mm_cvtsi32_sd( dN, N ); 308 xDouble R1 = _mm_sub_sd( x, _mm_mul_sdm( dN, &L1 )); //x - N * L1; /* leading part of the reduced arg */ 309 xDouble R2 = _mm_mul_sdm( dN, &L2 ); //N * L2 trailing part of the reduced arg 310 xDouble R = _mm_add_sd( R1, R2 ); // R1 + R2, reduced argument 311 312 //Q = R*R*(A[0].d + R*(A[1].d + R*(A[2].d + R*( A[3].d + R * A[4].d)))); 313 xDouble Q = _mm_add_sdm( _mm_mul_sdm( R, &A4 ), &A3 ); 314 Q = _mm_add_sdm( _mm_mul_sd( Q, R ), &A2 ); 315 Q = _mm_add_sdm( _mm_mul_sd( Q, R ), &A1 ); 316 Q = _mm_add_sdm( _mm_mul_sd( Q, R ), &A0 ); 317 Q = _mm_mul_sd( Q, _mm_mul_sd( R, R ) ); 318 319 xDouble P = _mm_add_sd( _mm_add_sd( R2, Q ), R1 ); // P = R1 + ( R2 + Q ); 320 xDouble S = _mm_add_sdm( _mm_load_sd( &S_Lead[N2].d), &S_Trail[N2].d ); //S = S_Lead[N2].d + S_Trail[N2].d; 321 xDouble twoM; 322 323 if( M >= 1024 ) //overflow 324 { 325 x = _mm_set_sd (0x1.0p1023); 326 return XDOUBLE_2_DOUBLE( REQUIRED_ADD_sd( x, x ) ); //Note that simpler C syntax here is optimized away resulting in missing overflow flag. 327 } 328 else if( M >= 53 ) 329 { 330 twoM = twoToTheM( M ); 331 // x = SCALB ( 1.0, M ) * ( S_Lead[N2].d + ( S * P + ( S_Trail[N2].d - SCALB ( 1.0, -M ) ) ) ); 332 R = _mm_sub_sdm( twoToTheM( -M ), &S_Trail[N2].d ); //SCALB ( 1.0, -M ) - S_Trail[N2].d 333 R = _mm_sub_sd( _mm_mul_sd( S, P ), R ); // S*P - (SCALB ( 1.0, -M ) - S_Trail[N2].d) 334 R = _mm_add_sdm( R, &S_Lead[N2].d ); // (S*P - (SCALB ( 1.0, -M ) - S_Trail[N2].d)) + S_Lead[N2] 335 x = _mm_mul_sd( R, twoM ); 336 } 337 else if( M <= -8 ) 338 { 339 if( M >= -1023 ) 340 twoM = twoToTheM( M ); 341 else 342 { 343 double zzz = scalbn( 1.0, M ); 344 twoM = DOUBLE_2_XDOUBLE( zzz ); 345 } 346 347 // x = SCALB ( 1.0, M ) * ( S_Lead[N2].d + ( S * P + S_Trail[N2].d ) ) - 1.0; 348 R = _mm_add_sdm( _mm_mul_sd( S, P ), &S_Trail[N2].d ); //S * P + S_Trail[N2].d 349 R = _mm_add_sdm( R, &S_Lead[N2].d); //S * P + S_Trail[N2].d + S_Lead[N2].d 350 R = _mm_mul_sd( R, twoM ); 351 x = _mm_sub_sdm( R, &oneD ); 352 } 353 else 354 { 355 twoM = twoToTheM( M ); 356 // x = SCALB ( 1.0, M ) * ( ( S_Lead[N2].d - SCALB ( 1.0, -M ) ) + ( S_Lead[N2].d * P + S_Trail[N2].d * ( 1.0 + P ) ) ); 357 R = _mm_add_sdm( P, &oneD ); 358 R = _mm_mul_sdm( R, &S_Trail[N2].d ); 359 R = _mm_add_sd( R, _mm_mul_sdm( P, &S_Lead[N2].d ) ); //( S_Lead[N2].d * P + S_Trail[N2].d * ( 1.0 + P ) ) 360 R = _mm_sub_sd( R, _mm_sub_sdm( twoToTheM( -M ), &S_Lead[N2].d ) );//( S_Lead[N2].d * P + S_Trail[N2].d * ( 1.0 + P ) ) - (SCALB ( 1.0, -M ) - S_Lead[N2].d) 361 x = _mm_mul_sd( R, twoM ); 362 } 363 364 // Merge new exceptions into old environment 365 return XDOUBLE_2_DOUBLE( x ); 366 } 367 368 xDouble xIsTiny = _mm_cmplt_sdm( fabsx, &Em1_Tiny ); 369 if( _mm_istrue_sd( xIsTiny ) ) 370 { 371 static const double scale2 = 0x1.0000000000001p-1022; 372 static const double half = 0.5; 373 374 xDouble xEQzero = _mm_cmpeq_sd( x, minusZeroD ); 375 xDouble oneHalf = _mm_load_sd( &half ); 376 xDouble isDenorm = _mm_cmplt_sdm( fabsx, &tinyD ); 377 378 //scale out of denormal range (if not already) 379 xDouble result = (xDouble) _mm_add_epi64( (xSInt64) x, (xSInt64) oneHalf ); 380 oneHalf = _mm_and_pd( oneHalf, isDenorm ); 381 oneHalf = _mm_sel_pd( oneHalf, x, minusZeroD); 382 result = _mm_sub_sd( result, oneHalf ); 383 384 //scale back (should set underflow) 385 result = _mm_mul_sdm( result, &scale2 ); 386 387 result = _mm_sel_pd( result, x, xEQzero ); 388 389 return XDOUBLE_2_DOUBLE( x ); 390 } 391 else 392 { 393 xDouble U = _mm_cvtss_sd( x, _mm_cvtsd_ss( (xFloat)x, x ) ); //U = ( double ) ( ( float ) x ); 394 xDouble V = _mm_sub_sd( x, U ); //V = x - U 395 xDouble Y = _mm_mul_sdm( _mm_mul_sd( U, U ), &A0 ); //Y = U * U * 0.5 396 xDouble Z = _mm_mul_sdm( _mm_mul_sd(V, _mm_add_sd( x, U)), &A0 ); //Z = V * ( x + U ) * 0.5; /* Y + Z = x*x/2 to 24 extra bits */ 397 xDouble Q = _mm_add_sdm( _mm_mul_sdm( x, &Em1_A[8] ), &Em1_A[7] ); //Q = Em1_A[7].d + x * Em1_A[8].d; 398 //for ( i = 0; i < 7; i++ ) 399 // Q = Em1_A[6-i].d + x * Q; 400 Q = _mm_add_sdm( _mm_mul_sd( Q, x ), &Em1_A[6] ); 401 Q = _mm_add_sdm( _mm_mul_sd( Q, x ), &Em1_A[5] ); 402 Q = _mm_add_sdm( _mm_mul_sd( Q, x ), &Em1_A[4] ); 403 Q = _mm_add_sdm( _mm_mul_sd( Q, x ), &Em1_A[3] ); 404 Q = _mm_add_sdm( _mm_mul_sd( Q, x ), &Em1_A[2] ); 405 Q = _mm_add_sdm( _mm_mul_sd( Q, x ), &Em1_A[1] ); 406 Q = _mm_add_sdm( _mm_mul_sd( Q, x ), &Em1_A[0] ); 407 408 Q = _mm_mul_sd( _mm_mul_sd( _mm_mul_sd( Q, x ), x), x ); //Q = ( x * ( x * ( x * Q ) ) ); 409 410 //if ( Y >= twoTOn7.d ) 411 // x = ( U + Y ) + ( Q + ( V + Z ) ); /* an exact operation */ 412 //else 413 // x += ( Y + ( Q + Z ) ); 414 415 //zero V and U if Y >= twoTOn7, otherwise zero x 416 xDouble test = _mm_cmpge_sdm( Y, &twoTOn7 ); 417 U = _mm_and_pd( U, test ); 418 V = _mm_and_pd( V, test ); 419 x = _mm_andnot_pd( test, x ); 420 421 V = _mm_add_sd( V, Z ); 422 U = _mm_add_sd( U, Y ); 423 Q = _mm_add_sd( Q, V ); 424 U = _mm_add_sd( U, Q ); 425 x = _mm_add_sd( x, U ); 426 427 // Merge new exceptions into old environment 428 return XDOUBLE_2_DOUBLE( x ); 429 } 430 431} 432 433/* 434double expm1( double x ) 435{ 436 return _xexpm1( x ); 437} 438*/ 439 440/* 441float expm1f( float x ) 442{//cheesy fallback on double, that probably fails to get the edge cases right. 443 return (float) _xexpm1( x ); 444} 445*/ 446 447#pragma mark - 448#pragma mark exp2 449 450/* 451float exp2f( float x ) 452{ 453 return (float) _xexp2( (double) x ); 454} 455*/ 456 457#else /*carbon core legacy */ 458 459/* 460double exp2( double x ) 461{ 462 return _xexp2( x ); 463} 464 */ 465 466 467#endif /*CARBONCORE LEGACY */ 468