this repo has no description
at fixPythonPipStalling 581 lines 25 kB view raw
1/* 2 * Copyright (c) 2002 Apple Computer, Inc. All rights reserved. 3 * 4 * @APPLE_LICENSE_HEADER_START@ 5 * 6 * The contents of this file constitute Original Code as defined in and 7 * are subject to the Apple Public Source License Version 1.1 (the 8 * "License"). You may not use this file except in compliance with the 9 * License. Please obtain a copy of the License at 10 * http://www.apple.com/publicsource and read it before using this file. 11 * 12 * This Original Code and all software distributed under the License are 13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER 14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the 17 * License for the specific language governing rights and limitations 18 * under the License. 19 * 20 * @APPLE_LICENSE_HEADER_END@ 21 */ 22/******************************************************************************* 23* * 24* File asinacos.c, * 25* Implementation of asin and acos for the PowerPC. * 26* * 27* Copyright � 1991-2001 Apple Computer, Inc. All rights reserved. * 28* * 29* Written and ported by Robert A. Murley (ram) for Mac OS X. * 30* Modified by A. Sazegari, September 2001. * 31* * 32* A MathLib v4 file. * 33* * 34* Based on the trigonometric functions from IBM/Taligent. * 35* * 36* July 20 2001: replaced __setflm with FEGETENVD/FESETENVD. * 37* replaced DblInHex typedef with hexdouble. * 38* September 07 2001: added #ifdef __ppc__. * 39* September 09 2001: added more comments. * 40* September 10 2001: added macros to detect PowerPC and correct compiler. * 41* September 21 2001: added <CoreServices/CoreServices.h> to get to <fp.h> * 42* and <fenv.h>. * 43* September 24 2001: used standard symbols for environment flags. * 44* September 25 2001: added FESETENVD(0.0) at start of acos. * 45* October 08 2001: removed <CoreServices/CoreServices.h>. * 46* changed compiler errors to warnings. * 47* November 06 2001: commented out warning about Intel architectures. * 48* * 49* W A R N I N G: * 50* These routines require a 64-bit double precision IEEE-754 model. * 51* They are written for PowerPC only and are expecting the compiler * 52* to generate the correct sequence of multiply-add fused instructions. * 53* * 54* These routines are not intended for 32-bit Intel architectures. * 55* * 56* A version of gcc higher than 932 is required. * 57* * 58* GCC compiler options: * 59* optimization level 3 (-O3) * 60* -fschedule-insns -finline-functions -funroll-all-loops * 61* * 62*******************************************************************************/ 63 64#include "fp_private.h" 65#include "fenv_private.h" 66#include "math.h" 67 68extern const unsigned short SqrtTable[]; 69 70// Floating-point constants 71static const double kPiBy2Tail = 6.1232339957367660e-17; // 0x1.1a62633145c07p-54 72static const hexdouble kPiu = HEXDOUBLE(0x400921fb, 0x54442d18); 73 74#define INVERSE_TRIGONOMETRIC_NAN "34" 75 76/**************************************************************************** 77 78FUNCTION: double asin (double x) 79 80DESCRIPTION: returns the inverse sine of its argument in the range of 81 -pi/2 to pi/2 (radians). 82 83CALLS: __FABS 84 85EXCEPTIONS raised: INEXACT, UNDERFLOW/INEXACT or INVALID. 86 87ACCURACY: Error is less than an ulp and usually less than half an ulp. 88 Caller's rounding direction is honored. 89 90****************************************************************************/ 91static struct kBlock { 92 const double k2ToM26 /* = 1.490116119384765625e-8 */ ; // 0x1.0p-26 */ ; 93 const double kPiBy2 /* = 1.570796326794896619231322 */ ; // 0x1.921fb54442d18p0 94 const double kPiBy2Tail /* = 6.1232339957367660e-17 */ ; // 0x1.1a62633145c07p-54 95 const double kMinNormal /* = 2.2250738585072014e-308 */ ; // 0x1.0p-1022 96 97 const double a14 /* = 0.03085091303188211304259 */ ; 98 const double a13 /* =-0.02425125216179617744614 */ ; 99 const double a12 /* = 0.02273334623573271023373 */ ; 100 const double a11 /* = 0.0002983797813053653120360 */ ; 101 const double a10 /* = 0.008819738782817955152102 */ ; 102 const double a9 /* = 0.008130738583790024803650 */ ; 103 const double a8 /* = 0.009793486386035222577675 */ ; 104 const double a7 /* = 0.01154901189590083099584 */ ; 105 const double a6 /* = 0.01396501522140471126730 */ ; 106 const double a5 /* = 0.01735275722383019335276 */ ; 107 const double a4 /* = 0.02237215928757573539490 */ ; 108 const double a3 /* = 0.03038194444121688698408 */ ; 109 const double a2 /* = 0.04464285714288482921087 */ ; 110 const double a1 /* = 0.07499999999999990640378 */ ; 111 const double a0 /* = 0.1666666666666666667191 */ ; 112 const double aa11 /* = 0.0002983797813053653120360 */ ; 113 const double aa10 /* = 0.008819738782817955152102 */ ; 114 const double aa9 /* = 0.008130738583790024803650 */ ; 115 const double aa8 /* = 0.009793486386035222577675 */ ; 116 const double aa7 /* = 0.01154901189590083099584 */ ; 117 const double aa6 /* = 0.01396501522140471126730 */ ; 118 const double aa5 /* = 0.01735275722383019335276 */ ; 119 const double aa4 /* = 0.02237215928757573539490 */ ; 120 const double aa3 /* = 0.03038194444121688698408 */ ; 121 const double aa2 /* = 0.04464285714288482921087 */ ; 122 const double aa1 /* = 0.07499999999999990640378 */ ; 123 const double aa0 /* = 0.1666666666666666667191 */ ; 124} kBlock = 125{ 126/* static const double k2ToM26 = */ 0x1.0p-26, // 1.490116119384765625e-8 127/* static const double kPiBy2 = */ 1.570796326794896619231322, // 0x1.921fb54442d18p0 128/* static const double kPiBy2Tail = */ 6.1232339957367660e-17, // 0x1.1a62633145c07p-54 129/* static const double kMinNormal = */ 0x1.0p-1022, // 2.2250738585072014e-308 130 131/* const static double a14 = */ 0.03085091303188211304259, 132/* const static double a13 = */-0.02425125216179617744614, 133/* const static double a12 = */ 0.02273334623573271023373, 134/* const static double a11 = */ 0.0002983797813053653120360, 135/* const static double a10 = */ 0.008819738782817955152102, 136/* const static double a9 = */ 0.008130738583790024803650, 137/* const static double a8 = */ 0.009793486386035222577675, 138/* const static double a7 = */ 0.01154901189590083099584, 139/* const static double a6 = */ 0.01396501522140471126730, 140/* const static double a5 = */ 0.01735275722383019335276, 141/* const static double a4 = */ 0.02237215928757573539490, 142/* const static double a3 = */ 0.03038194444121688698408, 143/* const static double a2 = */ 0.04464285714288482921087, 144/* const static double a1 = */ 0.07499999999999990640378, 145/* const static double a0 = */ 0.1666666666666666667191, 146/* const static double aa11 = */ 0.0002983797813053653120360, 147/* const static double aa10 = */ 0.008819738782817955152102, 148/* const static double aa9 = */ 0.008130738583790024803650, 149/* const static double aa8 = */ 0.009793486386035222577675, 150/* const static double aa7 = */ 0.01154901189590083099584, 151/* const static double aa6 = */ 0.01396501522140471126730, 152/* const static double aa5 = */ 0.01735275722383019335276, 153/* const static double aa4 = */ 0.02237215928757573539490, 154/* const static double aa3 = */ 0.03038194444121688698408, 155/* const static double aa2 = */ 0.04464285714288482921087, 156/* const static double aa1 = */ 0.07499999999999990640378, 157/* const static double aa0 = */ 0.1666666666666666667191 158}; 159 160double asin (double x) 161{ 162 hexdouble cD, yD ,gD , fpenv; 163 double absx, x2, x3, x4, temp1, temp2, u, v; 164 double g, y, d, y2, e; 165 register int index; 166 register uint32_t ghead, yhead, rnddir; 167 168 register double FPR_env, FPR_z, FPR_half, FPR_one, FPR_k2ToM26, FPR_cD, FPR_yD, FPR_kMinNormal, FPR_kPiBy2, FPR_kPiBy2Tail; 169 register struct kBlock *pK; 170 171 pK = &kBlock; 172 173 FPR_z = 0.0; FPR_half = 0.5; 174 FPR_k2ToM26 = pK->k2ToM26; FPR_kMinNormal = pK->kMinNormal; 175 FPR_kPiBy2 = pK->kPiBy2; FPR_one = 1.0; 176 177 if (unlikely(x != x)) // NaN argument 178 return ( copysign ( nan ( INVERSE_TRIGONOMETRIC_NAN ), x ) ); 179 180 absx = __FABS( x ); 181 182 FEGETENVD ( FPR_env ); // save env, set default 183 __ENSURE( FPR_z, FPR_half, FPR_k2ToM26 ); __ENSURE( FPR_one, FPR_kMinNormal, FPR_kPiBy2 ); // ensure register loads 184 FPR_cD = __FNMSUB( FPR_half, absx, FPR_half ); 185 FESETENVD ( FPR_z ); 186 187 x2 = __FMUL( x, x ); // under/overflows are masked 188 cD.d = FPR_cD; 189 190 if ( absx <= FPR_half ) 191 { // |x| <= 0.5 192 if (unlikely( absx < FPR_k2ToM26 )) 193 { // |x| < 2^(-26) 194 if ( absx == FPR_z ) 195 { // x == 0.0 case is exact 196 FESETENVD ( FPR_env ); 197 return ( x ); 198 } 199 200 fpenv.d = FPR_env; 201 __NOOP; 202 __NOOP; 203 __NOOP; 204 rnddir = fpenv.i.lo & FE_ALL_RND; // caller's rounding direction 205 FPR_yD = x; 206 207 // Rounding away from zero: return NextDouble(x,x+x) 208 if (((rnddir == FE_DOWNWARD) && (x < FPR_z)) || ((rnddir == FE_UPWARD) && (x > FPR_z))) 209 { 210 yD.d = FPR_yD; 211 __NOOP; 212 __NOOP; 213 __NOOP; 214 yD.i.lo += 1u; 215 if (yD.i.lo == 0u) 216 yD.i.hi += 1u; 217 __NOOP; 218 __NOOP; 219 __NOOP; 220 FPR_yD = yD.d; 221 absx = __FABS( FPR_yD ); 222 } 223 224 FESETENVD ( FPR_env ); // restore caller environment 225 __PROG_INEXACT( FPR_kPiBy2 ); 226 227 // Set UNDERFLOW if result is subnormal 228 if (absx < FPR_kMinNormal) // subnormal case 229 __PROG_UF_INEXACT( FPR_kMinNormal ); // set UNDERFLOW 230 231 return ( FPR_yD ); // return small result 232 } // [|x| < 2^(-26)] 233 234 { 235 register double r0, r1, r2, r3, r4, r5, r6, r7, r8, r9; 236 register double r10, r11, r12, r13, r14; 237 238 r14 = pK->a14; r13 = pK->a13; 239 temp1 = __FMADD( r14, x2, r13 ); x4 = __FMUL( x2, x2 ); 240 241 x3 = __FMUL( x, x2 ); __NOOP; 242 r11 = pK->a11; r12 = pK->a12; 243 244 r9 = pK->a9; r10 = pK->a10; 245 temp1 = __FMADD( temp1, x4, r11 ); __NOOP; 246 247 temp1 = __FMADD( temp1, x4, r9 ); temp2 = __FMADD( r12, x4, r10 ); 248 r7 = pK->a7; r8 = pK->a8; 249 250 r5 = pK->a5; r6 = pK->a6; 251 temp1 = __FMADD( temp1, x4, r7 ); temp2 = __FMADD( temp2, x4, r8 ); 252 253 temp1 = __FMADD( temp1, x4, r5 ); temp2 = __FMADD( temp2, x4, r6 ); 254 r3 = pK->a3; r4 = pK->a4; 255 256 r1 = pK->a1; r2 = pK->a2; 257 temp1 = __FMADD( temp1, x4, r3 ); temp2 = __FMADD( temp2, x4, r4 ); 258 259 temp1 = __FMADD( temp1, x4, r1 ); temp2 = __FMADD( temp2, x4, r2 ); 260 r0 = pK->a0; __NOOP; 261 262 temp2 = __FMADD( temp2, x2, temp1 ); 263 temp1 = __FMADD( x2, temp2, r0 ); 264 } 265 266 FESETENVD ( FPR_env ); // restore caller's mode 267 __PROG_INEXACT( FPR_kPiBy2 ); 268 269 return (__FMADD( x3, temp1, x )); 270 } // [|x| <= 0.5] 271 272/**************************************************************************** 273 For 0.5 < |x| < 1.0, a polynomial approximation is used to estimate 274 the power series resulting from the identity: 275 ArcSin(x) = pi/2 - 2*ArcSin(Sqrt(0.5(1.0 - x))). 276 The square root is evaluated in-line and concurrently with the 277 evaluation of the ArcSin. 278 ***************************************************************************/ 279 280 if (absx < FPR_one) 281 { // |x| < 1.0 282 register double FPR_gD, FPR_two; 283 register double r0, r1, r2, r3, r4, r5, r6, r7, r8, r9; 284 register double r10, r11, r12, r13, r14; 285 register uint32_t GPR_p, GPR_q, GPR_r, GPR_s, GPR_t; 286 287 x2 = FPR_cD; /* fmr */ x4 = __FMUL( FPR_cD, FPR_cD); // Was x2*x2; 288 GPR_t = cD.i.hi; r0 = pK->aa0; 289 290 GPR_s = GPR_t + 0x3ff00000u; __NOOP; 291 r14 = pK->a14; r13 = pK->a13; 292 293 r12 = pK->a12; r11 = pK->a11; 294 gD.i.lo = 0u; 295 296 index = (GPR_t >> 13) & 0xffu; 297 GPR_p = SqrtTable[index]; // 3 instrs issue 298 299 temp1 = __FMADD( r13, x4, r11 ); temp2 = __FMADD( r14, x4, r12 ); 300 r10 = pK->a10; r9 = pK->a9; 301 302 GPR_r = ((0xff00u & GPR_p) << 4); 303 ghead = (GPR_s >> 1) & 0x7ff00000u; 304 gD.i.hi = ghead + GPR_r; 305 306 r8 = pK->a8; r7 = pK->aa7; 307 yhead = 0x7fc00000u - ghead; 308 309 yD.i.lo = 0u; GPR_q = ((0xffu & GPR_p) << 12); 310 yD.i.hi = yhead + GPR_q; 311 312 r6 = pK->aa6; r5 = pK->aa5; 313 temp1 = __FMADD( temp1, x4, r9 ); temp2 = __FMADD( temp2, x4, r10 ); 314 315 temp1 = __FMADD( temp1, x4, r7 ); temp2 = __FMADD( temp2, x4, r8 ); 316 r4 = pK->aa4; r3 = pK->aa3; 317 318 r2 = pK->aa2; r1 = pK->aa1; 319 temp1 = __FMADD( temp1, x4, r5 ); temp2 = __FMADD( temp2, x4, r6 ); 320 321 temp1 = __FMADD( temp1, x4, r3 ); temp2 = __FMADD( temp2, x4, r4 ); 322 FPR_two = 2.0; 323 324 FPR_gD = gD.d; y = yD.d; 325 temp1 = __FMADD( temp1, x4, r1 ); temp2 = __FMADD( temp2, x4, r2 ); 326 327 d = __FNMSUB( FPR_gD, FPR_gD, FPR_cD ); FPR_kPiBy2Tail = pK->kPiBy2Tail; 328 g = __FMADD( y, d, FPR_gD ); y2 = y + y; // 16-bit g 329 e = __FNMSUB( y, g, FPR_half ); d = __FNMSUB( g, g, FPR_cD ); 330 y = __FMADD( e, y2, y); temp1 = __FMADD( x2, temp2, temp1 ); // 16-bit y 331 g = __FMADD( y, d, g ); y2 = y + y; // 32-bit g 332 333 e = __FNMSUB( y, g, FPR_half ); d = __FNMSUB( g, g, FPR_cD ); 334 y = __FMADD( e, y2, y ); temp1 = __FMADD( x2, temp1, r0 ); // 32-bit y 335 g = __FMADD( y, d, g); // 64-bit Sqrt 336 d = __FNMSUB( g, g, FPR_cD ); u = __FNMSUB( FPR_two, g, FPR_kPiBy2 ); // pi/2 - 2.0*Sqrt (head) 337 d = __FMUL( d, y ); v = __FNMSUB( FPR_two, g, (FPR_kPiBy2 - u) );// tail of 0.5*Sqrt (32 bits) 338 g = __FMUL( g, x2 ); d = __FNMSUB( FPR_half, FPR_kPiBy2Tail, d ); 339 340 temp2 = __FNMSUB( FPR_half, v, d ); 341 temp2 = __FMADD( g, temp1, temp2 ); 342 343 FESETENVD ( FPR_env ); // restore caller's mode 344 __PROG_INEXACT( FPR_kPiBy2 ); 345 346 if (x > FPR_z) 347 { // take care of sign of result 348 return __FNMSUB( FPR_two, temp2, u ); 349 } 350 else 351 return __FMSUB( FPR_two, temp2, u ); 352 } // [|x| < 1.0] 353 354 355 // If |x| == 1.0, return properly signed pi/2. 356 if (absx == FPR_one) 357 { 358 FPR_kPiBy2Tail = pK->kPiBy2Tail; 359 360 FESETENVD (FPR_env); // restore caller's mode 361 if (x > FPR_z) 362 return __FMADD( x, FPR_kPiBy2Tail, FPR_kPiBy2 ); 363 else 364 return __FMSUB( x, FPR_kPiBy2Tail, FPR_kPiBy2 ); 365 } 366 367 // |x| > 1.0 signals INVALID and returns a NaN 368 fpenv.d = FPR_env; 369 __NOOP; 370 __NOOP; 371 __NOOP; 372 fpenv.i.lo |= SET_INVALID; 373 FESETENVD_GRP (fpenv.d); 374 return ( nan ( INVERSE_TRIGONOMETRIC_NAN ) ); 375} // ArcSin(x) 376 377/**************************************************************************** 378 379FUNCTION: double ArcCos(double x) 380 381DESCRIPTION: returns the inverse cosine of its argument in the range of 382 0 to pi (radians). 383 384CALLS: __FABS 385 386EXCEPTIONS raised: INEXACT or INVALID. 387 388ACCURACY: Error is less than an ulp and usually less than half an ulp. 389 Caller's rounding direction is honored. 390 391****************************************************************************/ 392 393double acos(double x) 394{ 395 hexdouble cD, yD, gD, fpenv; 396 double absx, x2, x3, x4, temp1, temp2, s, u, v, w; 397 double g, y, d, y2, e; 398 int index; 399 uint32_t ghead, yhead; 400 401 register double FPR_env, FPR_z, FPR_half, FPR_one, FPR_k2ToM26, FPR_cD, FPR_kMinNormal, FPR_kPiBy2, FPR_kPiBy2Tail; 402 register struct kBlock *pK; 403 404 pK = &kBlock; 405 406 FPR_z = 0.0; FPR_half = 0.5; 407 FPR_k2ToM26 = pK->k2ToM26; FPR_kMinNormal = pK->kMinNormal; 408 FPR_kPiBy2 = pK->kPiBy2; FPR_one = 1.0; 409 410 if (unlikely(x != x)) // NaN 411 return ( copysign ( nan ( INVERSE_TRIGONOMETRIC_NAN ), x ) ); 412 413 absx = __FABS(x); 414 415 FEGETENVD ( FPR_env ); // save env, set default 416 __ENSURE( FPR_z, FPR_half, FPR_k2ToM26 ); __ENSURE( FPR_one, FPR_kMinNormal, FPR_kPiBy2 ); // ensure register loads 417 FPR_cD = __FNMSUB( FPR_half, absx, FPR_half ); 418 FESETENVD ( FPR_z ); 419 420 x2 = __FMUL( x, x ); // under/overflows are masked 421 cD.d = FPR_cD; 422 423 if ( absx <= FPR_half ) 424 { // |x| <= 0.5 425 register double r0, r1, r2, r3, r4, r5, r6, r7, r8, r9; 426 register double r10, r11, r12, r13, r14; 427 428 r14 = pK->a14; r13 = pK->a13; 429 temp1 = __FMADD( r14, x2, r13 ); x4 = __FMUL( x2, x2 ); 430 431 x3 = __FMUL( x, x2 ); u = FPR_kPiBy2 - x; // pi/2 - x (high-order term) 432 r11 = pK->a11; r12 = pK->a12; 433 434 r9 = pK->a9; r10 = pK->a10; 435 temp1 = __FMADD( temp1, x4, r11 ); __NOOP; 436 437 temp1 = __FMADD( temp1, x4, r9 ); temp2 = __FMADD( r12, x4, r10 ); 438 r7 = pK->a7; r8 = pK->a8; 439 440 r5 = pK->a5; r6 = pK->a6; 441 temp1 = __FMADD( temp1, x4, r7 ); temp2 = __FMADD( temp2, x4, r8 ); 442 443 temp1 = __FMADD( temp1, x4, r5 ); temp2 = __FMADD( temp2, x4, r6 ); 444 r3 = pK->a3; r4 = pK->a4; 445 446 r1 = pK->a1; r2 = pK->a2; 447 temp1 = __FMADD( temp1, x4, r3 ); temp2 = __FMADD( temp2, x4, r4 ); 448 449 temp1 = __FMADD( temp1, x4, r1 ); temp2 = __FMADD( temp2, x4, r2 ); 450 r0 = pK->a0; FPR_kPiBy2Tail = pK->kPiBy2Tail; 451 452 // finish up ArcCos(x) = pi/2 - ArcSin(x) 453 454 temp1 = __FMADD( x2, temp2, temp1 ); 455 v = FPR_kPiBy2 - u - x; // pi/2 - x (tail) 456 temp1 = __FMADD( x2, temp1, pK->a0 ); 457 w = __FNMSUB( x3, temp1, (v + FPR_kPiBy2Tail) );// combine low order terms 458 459 FESETENVD ( FPR_env ); // restore caller's mode 460 __PROG_INEXACT( FPR_kPiBy2 ); 461 462 return ( u + w ); 463 } // [|x| <= 0.5] 464 465/**************************************************************************** 466 For 0.5 < |x| < 1.0, a polynomial approximation is used to estimate 467 the power series resulting from the identity: 468 ArcSin(x) = pi/2 - 2*ArcSin(Sqrt(0.5(1.0 - x))). 469 The square root is evaluated in-line and concurrently with the 470 evaluation of the ArcSin. 471 ****************************************************************************/ 472 if ( absx < FPR_one ) 473 { // 0.5 < |x| < 1.0 474 register double FPR_gD, FPR_two; 475 register double r0, r1, r2, r3, r4, r5, r6, r7, r8, r9; 476 register double r10, r11, r12, r13, r14; 477 register uint32_t GPR_p, GPR_q, GPR_r, GPR_s, GPR_t; 478 479 x2 = FPR_cD; /* fmr */ x4 = __FMUL( x2, x2); 480 GPR_t = cD.i.hi; r0 = pK->aa0; 481 482 GPR_s = GPR_t + 0x3ff00000u; __NOOP; 483 r14 = pK->a14; r13 = pK->a13; 484 485 r12 = pK->a12; r11 = pK->aa11; 486 gD.i.lo = 0u; 487 488 index = (GPR_t >> 13) & 0xffu; 489 GPR_p = SqrtTable[index]; // 3 instrs issue 490 491 temp1 = __FMADD( r13, x4, r11 ); temp2 = __FMADD( r14, x4, r12 ); 492 r10 = pK->aa10; r9 = pK->aa9; 493 494 GPR_r = ((0xff00u & GPR_p) << 4); 495 ghead = (GPR_s >> 1) & 0x7ff00000u; 496 gD.i.hi = ghead + GPR_r; 497 498 r8 = pK->aa8; r7 = pK->aa7; 499 yhead = 0x7fc00000u - ghead; 500 501 yD.i.lo = 0u; GPR_q = ((0xffu & GPR_p) << 12); 502 yD.i.hi = yhead + GPR_q; 503 504 r6 = pK->aa6; r5 = pK->aa5; 505 temp1 = __FMADD( temp1, x4, r9 ); temp2 = __FMADD( temp2, x4, r10 ); 506 507 temp1 = __FMADD( temp1, x4, r7 ); temp2 = __FMADD( temp2, x4, r8 ); 508 r4 = pK->aa4; r3 = pK->aa3; 509 510 r2 = pK->aa2; r1 = pK->aa1; 511 temp1 = __FMADD( temp1, x4, r5 ); temp2 = __FMADD( temp2, x4, r6 ); 512 513 temp1 = __FMADD( temp1, x4, r3 ); temp2 = __FMADD( temp2, x4, r4 ); 514 FPR_two = 2.0; 515 516 FPR_gD = gD.d; y = yD.d; 517 temp1 = __FMADD( temp1, x4, r1 ); temp2 = __FMADD( temp2, x4, r2 ); 518 519 d = __FNMSUB( FPR_gD, FPR_gD, x2 ); 520 g = __FMADD( y, d, FPR_gD ); y2 = y + y; // 16-bit g 521 e = __FNMSUB( y, g, FPR_half ); d = __FNMSUB( g, g, x2 ); 522 y = __FMADD( e, y2, y); temp1 = __FMADD( x2, temp2, temp1 );// 16-bit y 523 g = __FMADD( y, d, g ); y2 = y + y; // 32-bit g 524 e = __FNMSUB( y, g, FPR_half ); d = __FNMSUB( g, g, x2 ); 525 y = __FMADD( e, y2, y ); // 32-bit y 526 g = __FMADD( y, d, g); // 64-bit Sqrt 527 d = __FNMSUB( g, g, x2 ); // tail of 0.5*Sqrt (32 bits) 528 y2 = g + g; // 2*Sqrt 529 530 if (x > FPR_z) 531 { // positive x 532 s = __FMADD( __FMUL( g, x2 ), __FMADD( x2, temp1, pK->aa0), d );// low order terms 533 534 FESETENVD ( FPR_env ); // restore caller's mode 535 __PROG_INEXACT( FPR_kPiBy2 ); 536 537 return __FMADD( FPR_two, s, y2); // combine terms 538 } // [positive x] 539 else 540 { // negative x 541 register double q,r,ss,t; 542 543 u = __FMSUB( FPR_two, FPR_kPiBy2, y2 ); // high-order term 544 r = d - kPiBy2Tail; 545 ss = __FMADD( x2, temp1, pK->aa0 ); 546 t = __FMUL( g, x2 ); 547 v = __FMSUB( FPR_two, FPR_kPiBy2, u ) - y2; // tail correction 548 q = __FNMSUB( FPR_half, v, r ); 549 s = __FMADD( ss, t, q ); // collect low-order terms 550 551 FESETENVD ( FPR_env ); // restore caller's mode 552 __PROG_INEXACT( FPR_kPiBy2 ); 553 554 return __FNMSUB( FPR_two, s, u); // combine terms 555 } // [negative x] 556 } // [0.5 < |x| < 1.0] 557 558 559 // |x| == 1.0: return exact +0.0 or inexact pi 560 if ( x == FPR_one ) 561 { 562 FESETENVD ( FPR_env ); // restore caller's mode 563 return ( FPR_z ); 564 } 565 566 if ( x == -1.0 ) 567 { 568 FESETENVD ( FPR_env ); // restore caller's mode 569 __PROG_INEXACT( FPR_kPiBy2 ); 570 return (kPiu.d + 2.0*kPiBy2Tail); 571 } 572 573 // |x| > 1.0 signals INVALID and returns a NaN 574 fpenv.d = FPR_env; 575 __NOOP; 576 __NOOP; 577 __NOOP; 578 fpenv.i.lo |= SET_INVALID; 579 FESETENVD_GRP ( fpenv.d ); 580 return ( nan ( INVERSE_TRIGONOMETRIC_NAN ) ); 581} // ArcCos(x)