this repo has no description
at fixPythonPipStalling 550 lines 27 kB view raw
1/* 2 * Copyright (c) 2002 Apple Computer, Inc. All rights reserved. 3 * 4 * @APPLE_LICENSE_HEADER_START@ 5 * 6 * The contents of this file constitute Original Code as defined in and 7 * are subject to the Apple Public Source License Version 1.1 (the 8 * "License"). You may not use this file except in compliance with the 9 * License. Please obtain a copy of the License at 10 * http://www.apple.com/publicsource and read it before using this file. 11 * 12 * This Original Code and all software distributed under the License are 13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER 14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the 17 * License for the specific language governing rights and limitations 18 * under the License. 19 * 20 * @APPLE_LICENSE_HEADER_END@ 21 */ 22/******************************************************************************* 23* * 24* File atan.c, * 25* Function atan. * 26* * 27* Implementation of arctg based on the approximation provided by * 28* Taligent, Inc. who based it on sources from IBM. * 29* * 30* Copyright � 1996-2001 Apple Computer, Inc. All rights reserved. * 31* * 32* Written by A. Sazegari, started on June 1996. * 33* Modified and ported by Robert A. Murley (ram) for Mac OS X. * 34* * 35* A MathLib v4 file. * 36* * 37* The principal values of the inverse tangent function are: * 38* * 39* -�/2 � atan(x) � �/2, -� � x � +� * 40* * 41* August 26 1996: produced a PowerMathLib� version. this routine * 42* does not handle edge cases and does not set the * 43* ieee flags. its computation is suseptible to the * 44* inherited rounding direction. it is assumed that * 45* the rounding direction is set to the ieee default. * 46* October 16 1996: fixed a grave mistake by replacing x2 with xSquared. * 47* April 14 1997: added the environmental controls for mathlib v3. * 48* unlike mathlib v2, v3 will honor the inherited * 49* rounding mode since it is easier to set the flags * 50* computationally. * 51* July 01 1997: this function no longer honors the rounding mode. * 52* corrected an error for nan arguments. * 53* July 20 2001: replaced __setflm with FEGETENVD/FESETENVD. * 54* replaced DblInHex typedef with hexdouble. * 55* September 07 2001: added #ifdef __ppc__. * 56* September 09 2001: added more comments. * 57* September 10 2001: added macros to detect PowerPC and correct compiler. * 58* September 18 2001: added <CoreServices/CoreServices.h> to get to <fp.h> * 59* and <fenv.h>. * 60* October 08 2001: removed <CoreServices/CoreServices.h>. * 61* changed compiler errors to warnings. * 62* November 06 2001: commented out warning about Intel architectures. * 63* * 64* W A R N I N G: * 65* These routines require a 64-bit double precision IEEE-754 model. * 66* They are written for PowerPC only and are expecting the compiler * 67* to generate the correct sequence of multiply-add fused instructions. * 68* * 69* These routines are not intended for 32-bit Intel architectures. * 70* * 71* A version of gcc higher than 932 is required. * 72* * 73* GCC compiler options: * 74* optimization level 3 (-O3) * 75* -fschedule-insns -finline-functions -funroll-all-loops * 76* * 77*******************************************************************************/ 78 79#include "fenv_private.h" 80#include "fp_private.h" 81#include "math.h" 82 83extern const unsigned short SqrtTable[]; 84 85static const double c13 = 7.6018324085327799e-02; /* 0x1.375efd7d7dcbep-4 */ 86static const double c11 = -9.0904243427904791e-02; /* -0x1.745802097294ep-4 */ 87static const double c9 = 1.1111109821671356e-01; /* 0x1.c71c6e5103cddp-4 */ 88static const double c7 = -1.4285714283952728e-01; /* -0x1.24924923f7566p-3 */ 89static const double c5 = 1.9999999999998854e-01; /* 0x1.99999999997fdp-3 */ 90static const double c3 = -3.3333333333333330e-01; /* -0x1.5555555555555p-2 */ 91 92static const double Rint = 6.755399441055744e15; /* 0x1.8p52 */ 93static const double PiOver2 = 1.570796326794896619231322; /* head of �/2 */ 94static const double PiOver2Tail = 6.1232339957367660e-17; /* tail of �/2 */ 95static const hexdouble Pi = HEXDOUBLE(0x400921fb, 0x54442d18); 96static const double kMinNormal = 0x1.0p-1022; // 2.2250738585072014e-308; 97 98/******************************************************************************* 99******************************************************************************** 100* A T A N * 101******************************************************************************** 102*******************************************************************************/ 103 104struct tableEntry /* tanatantable entry structure */ 105 { 106 double p; 107 double f5; 108 double f4; 109 double f3; 110 double f2; 111 double f1; 112 double f0; 113 }; 114 115extern const uint32_t tanatantable[]; 116 117static const hexdouble Big = HEXDOUBLE(0x434d0297, 0x00000000); 118 119double atan ( double x ) 120{ 121 hexdouble reducedX; 122 register double fabsOfx, xSquared, xFourth, xThird, temp1, temp2, result, y, z; 123 struct tableEntry *tablePointer; 124 125 register double FPR_env, FPR_z, FPR_half, FPR_one, FPR_256, FPR_kMinNormal; 126 register double FPR_PiDiv2, FPR_PiDiv2Tail, FPR_kRint; 127 register double FPR_c13, FPR_c11, FPR_c9, FPR_c7, FPR_c5, FPR_c3, FPR_r, FPR_t; 128 register struct tableEntry *pT; 129 130 fabsOfx = __FABS ( x ); 131 132 if (unlikely(x != x)) 133 return x; 134 135 FPR_z = 0.0; FPR_half = 0.5; 136 137 FPR_one = 1.0; FPR_256 = 256.0; 138 139 FPR_PiDiv2 = PiOver2; FPR_PiDiv2Tail = PiOver2Tail; 140 141 FPR_kRint = Rint; FPR_t = Big.d; 142 143/******************************************************************************* 144* initialization of table pointer. * 145*******************************************************************************/ 146 147 tablePointer = ( struct tableEntry * ) ( tanatantable - ( 16 * 14 ) ); 148 149/******************************************************************************* 150* |x| > 1.0 or NaN. * 151*******************************************************************************/ 152 153 FEGETENVD( FPR_env ); 154 __ENSURE( FPR_z, FPR_kRint, FPR_256 ); __ENSURE( FPR_half, FPR_one, FPR_PiDiv2 ); 155 156 if ( fabsOfx > FPR_one ) 157 { 158 __NOOP; 159 y = FPR_one / fabsOfx; // Executes in issue slot 1 hence FPU 1 160 __NOOP; 161 162/******************************************************************************* 163* |x| is nontrivial. * 164*******************************************************************************/ 165 166 if (likely( fabsOfx < FPR_t )) 167 { 168 register double yTail, z, resultHead, resultTail; 169 170 xSquared = __FMUL( y, y ); 171 FPR_t = 16.0; 172 FPR_r = __FMADD( FPR_256, y, FPR_kRint ); 173 174 __NOOP; 175 __NOOP; 176 __NOOP; 177 reducedX.d = FPR_r; 178 179/******************************************************************************* 180* |x| > 16.0 * 181*******************************************************************************/ 182 if ( fabsOfx > FPR_t ) 183 { 184 xFourth = __FMUL( xSquared, xSquared ); xThird = __FMUL( xSquared, y ); 185 FPR_c13 = c13; 186 187 FPR_t = __FNMSUB( fabsOfx, y, FPR_one ); resultHead = FPR_PiDiv2 - y; 188 FPR_c11 = c11; 189 190 FPR_c9 = c9; 191 yTail = __FMUL( y, FPR_t ); resultTail = ( resultHead - FPR_PiDiv2 ) + y; 192 193 FPR_c7 = c7; 194 temp1 = __FMADD( xFourth, FPR_c13, FPR_c9); temp2 = __FMADD( xFourth, FPR_c11, FPR_c7 ); 195 196 FPR_c5 = c5; FPR_c3 = c3; 197 temp1 = __FMADD( xFourth, temp1, FPR_c5 ); temp2 = __FMADD( xFourth, temp2, FPR_c3 ); 198 199 temp1 = __FMADD( xSquared, temp1, temp2 ); 200 201 temp1 = __FNMSUB( xThird, temp1, FPR_PiDiv2Tail ); /* correction for �/2 */ 202 203 if ( x > FPR_z ) /* adjust sign of result */ 204 result = ( ( ( temp1 - yTail ) - resultTail ) + resultHead ); 205 else 206 result = ( ( ( yTail - temp1 ) + resultTail ) - resultHead ); 207 208 FESETENVD( FPR_env ); /* restore the environment */ 209 __PROG_INEXACT( FPR_PiDiv2 ); 210 211 return result; 212 } 213 214/******************************************************************************* 215* 1 <= |x| <= 16 use table-driven approximation for arctg(y = 1/|x|). * 216*******************************************************************************/ 217 218 pT = &(tablePointer[reducedX.i.lo]); FPR_t = __FNMSUB( fabsOfx, y, FPR_one ); 219 220 FPR_c13 = pT->p; FPR_c11 = pT->f5; 221 yTail = __FMUL( y, FPR_t ); z = y - FPR_c13 + yTail; /* x delta */ 222 223 FPR_c9 = pT->f4; FPR_c7 = pT->f3; 224 temp1 = __FMADD( FPR_c11, z, FPR_c9 ); __NOOP; 225 226 temp1 = __FMADD( temp1, z, FPR_c7 ); __NOOP; 227 FPR_c5 = pT->f2; FPR_c3 = pT->f1; 228 229 FPR_t = pT->f0; 230 temp1 = __FMADD( temp1, z, FPR_c5 ); 231 temp1 = __FMADD( temp1, z, FPR_c3 ); 232 233 resultHead = FPR_PiDiv2 - FPR_t; /* zeroth order term */ 234 235 if ( x > FPR_z ) /* adjust for sign of x */ 236 result = ( __FNMSUB( z, temp1, FPR_PiDiv2Tail ) + resultHead ); 237 else 238 result = ( __FMSUB( z, temp1, FPR_PiDiv2Tail ) - resultHead ); 239 240 FESETENVD( FPR_env ); /* restore the environment */ 241 __PROG_INEXACT( FPR_PiDiv2 ); 242 243 return result; 244 } 245 246/******************************************************************************* 247* |x| is huge, or INF. * 248* For x = INF, then the expression �/2 � �tail would return the round up * 249* or down version of atan if rounding is taken into consideration. * 250* otherwise, just ��/2 would be delivered. * 251*******************************************************************************/ 252 else 253 { /* |x| is huge, or INF */ 254 FESETENVD( FPR_env ); /* restore the environment */ 255 FPR_t = Pi.d; 256 if ( x > FPR_z ) /* positive x returns �/2 */ 257 result = __FMADD( FPR_t, FPR_half, FPR_PiDiv2Tail ); 258 else /* negative x returns -�/2 */ 259 result = ( - FPR_t * FPR_half - FPR_PiDiv2Tail ); 260 261 __PROG_INEXACT( FPR_PiDiv2 ); 262 return result; 263 } 264 } 265 266 267/******************************************************************************* 268* |x| <= 1.0. * 269*******************************************************************************/ 270 271 FPR_t = .0625; 272 reducedX.d = __FMADD( FPR_256, fabsOfx, FPR_kRint ); 273 xSquared = __FMUL( x, x ); 274 275/******************************************************************************* 276* 1.0/16 < |x| < 1 use table-driven approximation for arctg(x). * 277*******************************************************************************/ 278 if ( fabsOfx > FPR_t ) 279 { 280 pT = &(tablePointer[reducedX.i.lo]); __NOOP; 281 282 FPR_c13 = pT->p; FPR_c11 = pT->f5; 283 z = fabsOfx - FPR_c13; __NOOP; /* x delta */ 284 285 FPR_c9 = pT->f4; FPR_c7 = pT->f3; 286 temp1 = __FMADD( FPR_c11, z, FPR_c9 ); __NOOP; 287 288 temp1 = __FMADD( temp1, z, FPR_c7 ); __NOOP; 289 FPR_c5 = pT->f2; FPR_c3 = pT->f1; 290 291 FPR_t = pT->f0; 292 temp1 = __FMADD( temp1, z, FPR_c5 ); 293 temp1 = __FMADD( temp1, z, FPR_c3 ); 294 295 if ( x > FPR_z ) /* adjust for sign of x */ 296 result = __FMADD( z, temp1, FPR_t ); 297 else 298 result = - z * temp1 - FPR_t; 299 300 FESETENVD( FPR_env ); /* restore the environment */ 301 __PROG_INEXACT( FPR_PiDiv2 ); 302 303 return result; 304 } 305 306/******************************************************************************* 307* |x| <= 1.0/16 fast, simple polynomial approximation. * 308*******************************************************************************/ 309 310 if (unlikely( fabsOfx == FPR_z )) 311 { 312 FESETENVD( FPR_env ); /* restore the environment */ 313 return x; /* +0 or -0 preserved */ 314 } 315 316 xFourth = __FMUL( xSquared, xSquared ); xThird = __FMUL( xSquared, x ); 317 FPR_c13 = c13; FPR_c11 = c11; 318 319 FPR_c9 = c9; FPR_c7 = c7; 320 temp1 = __FMADD( xFourth, FPR_c13, FPR_c9); temp2 = __FMADD( xFourth, FPR_c11, FPR_c7 ); 321 322 FPR_c5 = c5; FPR_c3 = c3; 323 temp1 = __FMADD( xFourth, temp1, FPR_c5 ); temp2 = __FMADD( xFourth, temp2, FPR_c3 ); 324 325 temp1 = __FMADD( xSquared, temp1, temp2 ); 326 FPR_kMinNormal = kMinNormal; 327 328 result = __FMADD( xThird, temp1, x ); 329 330 FESETENVD( FPR_env ); /* restore the environment */ 331 if (likely( __FABS( result ) >= FPR_kMinNormal )) 332 { 333 __PROG_INEXACT( FPR_PiDiv2 ); 334 return result; 335 } 336 else 337 { 338 __PROG_UF_INEXACT( FPR_kMinNormal ); 339 return result; 340 } 341} 342 343// 344// For atan2(). Mean and lean. 345// 346double atanCore ( double fabsOfx ) // absolute value is passed by caller! 347{ 348 hexdouble reducedX; 349 register double xSquared, xFourth, xThird, temp1, temp2, result, z; 350 struct tableEntry *tablePointer; 351 352 register double FPR_z, FPR_256, FPR_kRint; 353 register double FPR_c13, FPR_c11, FPR_c9, FPR_c7, FPR_c5, FPR_c3, FPR_r, FPR_s, FPR_t; 354 register struct tableEntry *pT; 355 356 357 FPR_256 = 256.0; FPR_kRint = Rint; 358 359 FPR_r = __FMADD( FPR_256, fabsOfx, FPR_kRint ); 360 reducedX.d = FPR_r; 361 362 FPR_s = .0625; FPR_z = 0.0; 363 364/******************************************************************************* 365* initialization of table pointer. * 366*******************************************************************************/ 367 368 tablePointer = ( struct tableEntry * ) ( tanatantable - ( 16 * 14 ) ); 369 xSquared = __FMUL( fabsOfx, fabsOfx ); __ENSURE( FPR_z, FPR_z, FPR_s ); 370 371/******************************************************************************* 372* |x| <= 1.0. * 373*******************************************************************************/ 374 375/******************************************************************************* 376* 1.0/16 < |x| < 1 use table-driven approximation for arctg(x). * 377*******************************************************************************/ 378 379 if ( fabsOfx > FPR_s ) 380 { 381 pT = &(tablePointer[reducedX.i.lo]); 382 383 FPR_c13 = pT->p; FPR_c11 = pT->f5; 384 z = fabsOfx - FPR_c13; __NOOP; /* x delta */ 385 386 FPR_c9 = pT->f4; FPR_c7 = pT->f3; 387 temp1 = __FMADD( FPR_c11, z, FPR_c9 ); __NOOP; 388 389 temp1 = __FMADD( temp1, z, FPR_c7 ); __NOOP; 390 FPR_c5 = pT->f2; FPR_c3 = pT->f1; 391 392 FPR_t = pT->f0; 393 temp1 = __FMADD( temp1, z, FPR_c5 ); 394 temp1 = __FMADD( temp1, z, FPR_c3 ); 395 396 result = __FMADD( z, temp1, FPR_t ); 397 398 return result; 399 } 400 401/******************************************************************************* 402* |x| <= 1.0/16 fast, simple polynomial approximation. * 403*******************************************************************************/ 404 405 if (unlikely( fabsOfx == FPR_z )) 406 return fabsOfx; 407 408 xFourth = __FMUL( xSquared, xSquared ); xThird = __FMUL( xSquared, fabsOfx ); 409 FPR_c13 = c13; FPR_c11 = c11; 410 411 FPR_c9 = c9; FPR_c7 = c7; 412 temp1 = __FMADD( xFourth, FPR_c13, FPR_c9); temp2 = __FMADD( xFourth, FPR_c11, FPR_c7 ); 413 414 FPR_c5 = c5; FPR_c3 = c3; 415 temp1 = __FMADD( xFourth, temp1, FPR_c5 ); temp2 = __FMADD( xFourth, temp2, FPR_c3 ); 416 417 temp1 = __FMADD( xSquared, temp1, temp2 ); 418 419 result = __FMADD( xThird, temp1, fabsOfx ); 420 421 return result; 422} 423 424double atanCoreInv ( double fabsOfx ) // absolute value is passed by caller! 425{ 426 hexdouble reducedX; 427 register double xSquared, xFourth, xThird, temp1, temp2, result, y; 428 struct tableEntry *tablePointer; 429 430 register double FPR_z, FPR_half, FPR_one, FPR_256; 431 register double FPR_PiDiv2, FPR_PiDiv2Tail, FPR_kRint; 432 register double FPR_c13, FPR_c11, FPR_c9, FPR_c7, FPR_c5, FPR_c3, FPR_r, FPR_s, FPR_t; 433 register struct tableEntry *pT; 434 435 FPR_s = Big.d; y = 1.0 / fabsOfx; // slot 1 hence fpu1 436 437 FPR_z = 0.0; FPR_half = 0.5; 438 439 FPR_one = 1.0; FPR_256 = 256.0; 440 441 FPR_PiDiv2 = PiOver2; FPR_PiDiv2Tail = PiOver2Tail; 442 443 __ENSURE( FPR_half, FPR_one, FPR_PiDiv2 ); //slot 0 hence fpu0 444 FPR_kRint = Rint; 445 __ENSURE( FPR_z, FPR_kRint, FPR_256 ); //slot 3 hence fpu0 446 447 448/******************************************************************************* 449* initialization of table pointer. * 450*******************************************************************************/ 451 452 tablePointer = ( struct tableEntry * ) ( tanatantable - ( 16 * 14 ) ); 453 454/******************************************************************************* 455* |x| > 1.0 or NaN. * 456*******************************************************************************/ 457 { 458 459/******************************************************************************* 460* |x| is nontrivial. * 461*******************************************************************************/ 462 463 if (likely( fabsOfx < FPR_s )) 464 { 465 register double yTail, z, resultHead, resultTail; 466 467 FPR_r = __FMADD( FPR_256, y, FPR_kRint ); 468 FPR_s = 16.0; 469 xSquared = __FMUL( y, y ); 470 471 __NOOP; 472 __NOOP; 473 __NOOP; 474 reducedX.d = FPR_r; 475 476/******************************************************************************* 477* |x| > 16.0 * 478*******************************************************************************/ 479 if ( fabsOfx > FPR_s ) 480 { 481 xFourth = __FMUL( xSquared, xSquared ); xThird = __FMUL( xSquared, y ); 482 FPR_c13 = c13; 483 484 FPR_t = __FNMSUB( fabsOfx, y, FPR_one ); resultHead = FPR_PiDiv2 - y; 485 FPR_c11 = c11; 486 487 FPR_c9 = c9; 488 yTail = __FMUL( y, FPR_t ); resultTail = ( resultHead - FPR_PiDiv2 ) + y; 489 490 FPR_c7 = c7; 491 temp1 = __FMADD( xFourth, FPR_c13, FPR_c9); temp2 = __FMADD( xFourth, FPR_c11, FPR_c7 ); 492 493 FPR_c5 = c5; FPR_c3 = c3; 494 temp1 = __FMADD( xFourth, temp1, FPR_c5 ); temp2 = __FMADD( xFourth, temp2, FPR_c3 ); 495 496 temp1 = __FMADD( xSquared, temp1, temp2 ); 497 498 temp1 = __FNMSUB( xThird, temp1, FPR_PiDiv2Tail ); /* correction for �/2 */ 499 500 result = ( ( ( temp1 - yTail ) - resultTail ) + resultHead ); 501 502 return result; 503 } 504 505/******************************************************************************* 506* 1 <= |x| <= 16 use table-driven approximation for arctg(y = 1/|x|). * 507*******************************************************************************/ 508 509 pT = &(tablePointer[reducedX.i.lo]); FPR_t = __FNMSUB( fabsOfx, y, FPR_one ); 510 511 FPR_c13 = pT->p; FPR_c11 = pT->f5; 512 yTail = __FMUL( y, FPR_t ); z = y - FPR_c13 + yTail; /* x delta */ 513 514 FPR_c9 = pT->f4; FPR_c7 = pT->f3; 515 temp1 = __FMADD( FPR_c11, z, FPR_c9 ); __NOOP; 516 517 temp1 = __FMADD( temp1, z, FPR_c7 ); __NOOP; 518 FPR_c5 = pT->f2; FPR_c3 = pT->f1; 519 520 FPR_t = pT->f0; 521 temp1 = __FMADD( temp1, z, FPR_c5 ); 522 temp1 = __FMADD( temp1, z, FPR_c3 ); 523 524 resultHead = FPR_PiDiv2 - FPR_t; /* zeroth order term */ 525 526 result = ( __FNMSUB( z, temp1, FPR_PiDiv2Tail ) + resultHead ); 527 528 return result; 529 } 530 531/******************************************************************************* 532* |x| is huge, INF, or NaN. * 533* For x = INF, then the expression �/2 � �tail would return the round up * 534* or down version of atan if rounding is taken into consideration. * 535* otherwise, just ��/2 would be delivered. * 536*******************************************************************************/ 537 else 538 { /* |x| is huge, INF, or NaN */ 539 if ( fabsOfx != fabsOfx ) /* NaN argument is returned */ 540 result = fabsOfx; 541 else 542 { 543 /* positive x returns �/2 */ 544 FPR_t = Pi.d; 545 result = __FMADD( FPR_t, FPR_half, FPR_PiDiv2Tail ); 546 } 547 return result; 548 } 549 } 550}