this repo has no description
at fixPythonPipStalling 511 lines 24 kB view raw
1/* 2 * Copyright (c) 2002 Apple Computer, Inc. All rights reserved. 3 * 4 * @APPLE_LICENSE_HEADER_START@ 5 * 6 * The contents of this file constitute Original Code as defined in and 7 * are subject to the Apple Public Source License Version 1.1 (the 8 * "License"). You may not use this file except in compliance with the 9 * License. Please obtain a copy of the License at 10 * http://www.apple.com/publicsource and read it before using this file. 11 * 12 * This Original Code and all software distributed under the License are 13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER 14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the 17 * License for the specific language governing rights and limitations 18 * under the License. 19 * 20 * @APPLE_LICENSE_HEADER_END@ 21 */ 22/******************************************************************************* 23* * 24* FastSinCos.c * 25* * 26* Double precision Sine and Cosine. * 27* * 28* Copyright � 1997-2001 by Apple Computer, Inc. All rights reserved. * 29* * 30* Written by A. Sazegari, started on June 1997. * 31* Modified and ported by Robert A. Murley (ram) for Mac OS X. * 32* * 33* A MathLib v4 file. * 34* * 35* Based on the trigonometric functions from IBM/Taligent. * 36* * 37* November 06 2001: commented out warning about Intel architectures. * 38* July 20 2001: replaced __setflm with FEGETENVD/FESETENVD. * 39* replaced DblInHex typedef with hexdouble. * 40* September 07 2001: added #ifdef __ppc__. * 41* September 09 2001: added more comments. * 42* September 10 2001: added macros to detect PowerPC and correct compiler. * 43* September 18 2001: added <CoreServices/CoreServices.h> to get to <fp.h> * 44* and <fenv.h>, removed denormal comments. * 45* October 08 2001: removed <CoreServices/CoreServices.h>. * 46* changed compiler errors to warnings. * 47* * 48* These routines have a long double (107-bits) argument reduction to * 49* better match their long double counterpart. * 50* * 51* W A R N I N G: * 52* These routines require a 64-bit double precision IEEE-754 model. * 53* They are written for PowerPC only and are expecting the compiler * 54* to generate the correct sequence of multiply-add fused instructions. * 55* * 56* These routines are not intended for 32-bit Intel architectures. * 57* * 58* A version of gcc higher than 932 is required. * 59* * 60* GCC compiler options: * 61* optimization level 3 (-O3) * 62* -fschedule-insns -finline-functions -funroll-all-loops * 63* * 64*******************************************************************************/ 65 66#include "fenv_private.h" 67#include "fp_private.h" 68#include "math.h" 69 70#define TRIG_NAN "33" 71 72/******************************************************************************* 73* Floating-point constants. * 74*******************************************************************************/ 75 76static const double kPiScale42 = 1.38168706094305449e13; // 0x1.921fb54442d17p43 77static const double kPiScale53 = 2.829695100811376e16; // 0x1.921fb54442d18p54 78static const double piOver4 = 0.785398163397448390; // 0x1.921fb54442d19p-1 79static const double piOver2 = 1.570796326794896619231322; // 0x1.921fb54442d18p0 80static const double piOver2Tail = 6.1232339957367660e-17; // 0x1.1a62633145c07p-54 81static const double twoOverPi = 0.636619772367581382; // 0x1.45f306dc9c883p-1 82//static const double k2ToM26 = 1.490116119384765625e-8; // 0x1.0p-26; 83static const double kMinNormal = 0x1.0p-1022; // 2.2250738585072014e-308; 84static const double kRintBig = 2.7021597764222976e16; // 0x1.8p54 85static const double kRint = 6.755399441055744e15; // 0x1.8p52 86static const hexdouble infinity = HEXDOUBLE(0x7ff00000, 0x00000000); 87 88/******************************************************************************* 89* Approximation coefficients. * 90*******************************************************************************/ 91 92static const double s13 = 1.5868926979889205164e-10; // 1.0/13! 93static const double s11 = -2.5050225177523807003e-8; // -1.0/11! 94static const double s9 = 2.7557309793219876880e-6; // 1.0/9! 95static const double s7 = -1.9841269816180999116e-4; // -1.0/7! 96static const double s5 = 8.3333333332992771264e-3; // 1.0/5! 97static const double s3 = -0.16666666666666463126; // 1.0/3! 98static const double c14 = -1.138218794258068723867e-11; // -1.0/14! 99static const double c12 = 2.087614008917893178252e-9; // 1.0/12! 100static const double c10 = -2.755731724204127572108e-7; // -1.0/10! 101static const double c8 = 2.480158729870839541888e-5; // 1.0/8! 102static const double c6 = -1.388888888888735934799e-3; // -1.0/6! 103static const double c4 = 4.166666666666666534980e-2; // 1.0/4! 104static const double c2 = -.5; // -1.0/2! 105 106double sin ( double x ) 107{ 108 register double absOfX, intquo, arg, argtail, xSquared, xThird, xFourth, temp1, temp2, result; 109 register uint32_t ltable; 110 hexdouble z, OldEnvironment; 111 112 register double FPR_env, FPR_z, FPR_Min, FPR_pi4, FPR_piScale; 113 register double FPR_t, FPR_inf, FPR_pi53, FPR_2divPi, FPR_PiDiv2, FPR_PiDiv2Tail, FPR_kRintBig, FPR_kRint; 114 register uint32_t GPR_f; 115 116 FEGETENVD( FPR_env ); // save env, set default 117 FPR_z = 0.0; 118 119 FPR_pi4 = piOver4; 120 absOfX = __FABS ( x ); __ENSURE( FPR_z, FPR_z, FPR_pi4 ); // ensure z and pi4 are in registers 121 122 FESETENVD( FPR_z ); 123 124 if ( absOfX < FPR_pi4 ) // |x| < �/4 125 { 126 if (likely( absOfX != FPR_z )) 127 { 128 register double FPR_s3, FPR_s5, FPR_s7, FPR_s9, FPR_s11, FPR_s13; 129 130/******************************************************************************* 131* at this point, x is normal with magnitude between 0 and �/4. * 132*******************************************************************************/ 133 // sin polynomial approximation 134 135 xSquared = __FMUL( x, x ); 136 FPR_Min = kMinNormal; 137 __ENSURE( FPR_z, FPR_z, FPR_Min ); // ensure FPR_Min is loaded into register 138 139 FPR_s9 = s9; FPR_s13 = s13; 140 141 FPR_s7 = s7; FPR_s11 = s11; 142 143 xFourth = __FMUL( xSquared, xSquared ); xThird = __FMUL( x, xSquared ); 144 FPR_s5 = s5; 145 146 FPR_s3 = s3; 147 temp1 = __FMADD( s13, xFourth, s9 ); temp2 = __FMADD( s11, xFourth, s7 ); 148 149 temp1 = __FMADD( temp1, xFourth, s5 ); temp2 = __FMADD( temp2, xFourth, s3 ); 150 151 temp1 = __FMADD( temp1, xSquared, temp2 ); result = __FMADD( temp1, xThird, x ); 152 153 FESETENVD( FPR_env ); // restore caller's mode 154 155 if (unlikely( __FABS( result ) < FPR_Min )) 156 __PROG_UF_INEXACT( FPR_Min ); 157 else 158 __PROG_INEXACT( FPR_pi4 ); 159 160 return ( result ); 161 } 162 else 163 { 164 FESETENVD( FPR_env ); // restore caller's mode 165 return x; // +0 -0 preserved 166 } 167 } 168 169 170 if (unlikely( x != x )) // x is a NaN 171 { 172 FESETENVD( FPR_env ); // restore caller's mode 173 return ( x ); 174 } 175 176/******************************************************************************* 177* x has magnitude > �/4. * 178*******************************************************************************/ 179 FPR_piScale = kPiScale42; 180 FPR_piScale = __FMADD( FPR_z, FPR_z, FPR_piScale ); 181 FPR_inf = infinity.d; FPR_pi53 = kPiScale53; 182 FPR_2divPi = twoOverPi; FPR_PiDiv2 = piOver2; 183 FPR_PiDiv2Tail = piOver2Tail; FPR_kRintBig = kRintBig; 184 FPR_kRint = kRint; 185 186 if (unlikely( absOfX > FPR_piScale )) 187 { 188/******************************************************************************* 189* |x| is huge or infinite. * 190*******************************************************************************/ 191 if ( absOfX == FPR_inf ) 192 { // infinite case is invalid 193 OldEnvironment.d = FPR_env; 194 __NOOP; 195 __NOOP; 196 __NOOP; 197 OldEnvironment.i.lo |= SET_INVALID; 198 FESETENVD_GRP( OldEnvironment.d ); // restore caller's mode 199 return ( nan ( TRIG_NAN ) ); // return NaN 200 } 201 202 while ( absOfX > FPR_pi53 ) 203 { // loop to reduce x below 204 intquo = __FMUL( x, FPR_2divPi ); // �*2^53 in magnitude 205 FPR_t = __FNMSUB( intquo, FPR_PiDiv2, x ); 206 x = __FNMSUB( intquo, FPR_PiDiv2Tail, FPR_t ); 207 absOfX = __FABS ( x ) ; 208 } 209 210/******************************************************************************* 211* final reduction of x to magnitude between 0 and 2*�. * 212*******************************************************************************/ 213 FPR_t = __FMADD( x, FPR_2divPi, FPR_kRintBig ); 214 intquo = FPR_t - FPR_kRintBig; 215 FPR_t = __FNMSUB( intquo, FPR_PiDiv2, x ); 216 x = __FNMSUB( intquo, FPR_PiDiv2Tail, FPR_t ); 217 absOfX = __FABS( x ); 218 } 219 220/******************************************************************************* 221* |x| < pi*2^42: further reduction is probably necessary. A double-double * 222* reduced argument is determined ( arg:argtail ) . It is possible that x * 223* has been reduced below pi/4 in magnitude, but x is definitely nonzero * 224* and safely in the normal range. * 225*******************************************************************************/ 226 // find integer quotient of x/(�/2) 227 FPR_t = __FMADD( x, FPR_2divPi, FPR_kRint ); intquo = FPR_t - FPR_kRint; 228 z.d = FPR_t; 229 230 FPR_t = __FNMSUB( intquo, FPR_PiDiv2, x ); arg = __FNMSUB( intquo, FPR_PiDiv2Tail, FPR_t ); 231 xSquared = __FMUL( arg, arg ); FPR_t -= arg; 232 233 GPR_f = z.i.lo; 234 xFourth = __FMUL( xSquared, xSquared ); argtail = __FNMSUB( intquo, FPR_PiDiv2Tail, FPR_t ); 235 236/******************************************************************************* 237* multiple of �/2 ( mod 4) determines approx used and sign of result. * 238*******************************************************************************/ 239 240 ltable = GPR_f & FE_ALL_RND; 241 242 if ( ltable & 0x1u ) 243 { // argument closest to ��/2 244/******************************************************************************* 245* use cosine approximation. * 246*******************************************************************************/ 247 register double FPR_c2, FPR_c4, FPR_c6, FPR_c8, FPR_c10, FPR_c12, FPR_c14; 248 249 FPR_c10 = c10; FPR_c14 = c14; 250 FPR_c8 = c8; FPR_c12 = c12; 251 252 temp1 = __FMADD( FPR_c14, xFourth, FPR_c10 ); temp2 = __FMADD( FPR_c12, xFourth, FPR_c8); 253 FPR_c6 = c6; 254 255 FPR_c4 = c4; 256 temp1 = __FMADD( temp1, xFourth, FPR_c6 ); temp2 = __FMADD( temp2, xFourth, FPR_c4); 257 258 FPR_c2 = c2; 259 temp1 = __FMADD( temp1, xFourth, FPR_c2 ); 260 261 temp1 = __FMADD( xSquared, temp2, temp1 ); 262 temp1 = __FMSUB( arg, temp1, argtail ); // second-order correction 263 264 if ( ltable < 2 ) // adjust sign of result 265 result = __FMADD( arg, temp1, 1.0 ); // positive 266 else 267 { 268 arg = - arg; 269 result =__FMSUB( arg, temp1, 1.0 ); // negative 270 } 271 FESETENVD( FPR_env ); // restore caller's mode 272 __PROG_INEXACT( FPR_PiDiv2 ); 273 } 274 else 275 { 276/******************************************************************************* 277* use sine approximation. * 278*******************************************************************************/ 279 register double FPR_s3, FPR_s5, FPR_s7, FPR_s9, FPR_s11, FPR_s13; 280 281 FPR_s9 = s9; FPR_s13 = s13; 282 FPR_s7 = s7; FPR_s11 = s11; 283 284 temp1 = __FMADD( FPR_s13, xFourth, FPR_s9 );temp2 = __FMADD( FPR_s11, xFourth, FPR_s7 ); 285 FPR_s5 = s5; 286 287 FPR_s3 = s3; 288 temp1 = __FMADD( temp1, xFourth, FPR_s5 ); temp2 = __FMADD( temp2, xFourth, FPR_s3 ); 289 290 temp1 = __FMADD( temp1, xSquared, temp2 ); xThird = __FMUL( arg, xSquared ); 291 292 temp1 = __FMADD( temp1, xThird, argtail ); // second-order correction 293 294 if ( ltable < 2 ) // adjust sign of final result 295 result = arg + temp1 ; // positive 296 else 297 { 298 arg = - arg; 299 result = arg - temp1; // negative 300 } 301 302 FESETENVD( FPR_env ); // restore caller's mode 303 __PROG_INEXACT( FPR_PiDiv2 ); 304 } 305 306 return ( result ) ; 307} 308 309/******************************************************************************* 310* Cosine section. * 311*******************************************************************************/ 312double cos ( double x ) 313{ 314 register double absOfX, intquo, arg, argtail, xSquared, xThird, xFourth, 315 temp1, temp2, result; 316 register uint32_t iquad; 317 hexdouble z, OldEnvironment; 318 319 register double FPR_env, FPR_z, FPR_pi4, FPR_piScale; 320 register double FPR_t, FPR_inf, FPR_pi53, FPR_2divPi, FPR_PiDiv2, FPR_PiDiv2Tail, FPR_kRintBig, FPR_kRint; 321 register uint32_t GPR_f; 322 323 FEGETENVD( FPR_env ); // save env, set default 324 FPR_z = 0.0; 325 326 FPR_pi4 = piOver4; 327 absOfX = __FABS ( x ); __ENSURE( FPR_z, FPR_z, FPR_pi4 ); // ensure z and pi4 are in registers 328 329 FESETENVD( FPR_z ); 330 331 if ( absOfX < FPR_pi4 ) // |x| < �/4 332 { 333 if (likely( absOfX != FPR_z )) 334 { 335 // cos polynomial approximation 336 register double FPR_c2, FPR_c4, FPR_c6, FPR_c8, FPR_c10, FPR_c12, FPR_c14, FPR_One; 337 338 xSquared = __FMUL( x, x ); 339 FPR_One = 1.0; 340 __ENSURE( FPR_z, FPR_z, FPR_One ); 341 342 FPR_c10 = c10; FPR_c14 = c14; 343 344 FPR_c8 = c8; FPR_c12 = c12; 345 346 FPR_c6 = c6; FPR_c4 = c4; 347 348 xFourth = __FMUL( xSquared, xSquared ); 349 __NOOP; 350 FPR_c2 = c2; 351 352 temp1 = __FMADD( FPR_c14, xFourth, FPR_c10 ); temp2 = __FMADD( FPR_c12, xFourth, FPR_c8); 353 temp1 = __FMADD( temp1, xFourth, FPR_c6 ); temp2 = __FMADD( temp2, xFourth, FPR_c4); 354 355 temp1 = __FMADD( temp1, xFourth, FPR_c2 ); temp1 = __FMADD( xSquared, temp2, temp1 ); 356 result = __FMADD( xSquared, temp1, FPR_One ); 357 358 FESETENVD( FPR_env ); // restore caller's mode 359 __PROG_INEXACT( FPR_pi4 ); 360 361 return ( result ); 362 } 363 else 364 { 365 FESETENVD( FPR_env ); // restore caller's mode 366 return 1.0; 367 } 368 } 369 370 if (unlikely( x != x )) // x is a NaN 371 { 372 FESETENVD( FPR_env ); // restore caller's mode 373 return ( x ); 374 } 375 376/******************************************************************************* 377* x has magnitude > �/4. * 378*******************************************************************************/ 379 FPR_piScale = kPiScale42; 380 FPR_piScale = __FMADD( FPR_z, FPR_z, FPR_piScale ); 381 FPR_inf = infinity.d; FPR_pi53 = kPiScale53; 382 FPR_2divPi = twoOverPi; FPR_PiDiv2 = piOver2; 383 FPR_PiDiv2Tail = piOver2Tail; FPR_kRintBig = kRintBig; 384 FPR_kRint = kRint; 385 386 if (unlikely( absOfX > FPR_piScale )) 387 388/******************************************************************************* 389* |x| is huge or infinite. * 390*******************************************************************************/ 391 { 392 if ( absOfX == infinity.d ) 393 { // infinite case is invalid 394 OldEnvironment.d = FPR_env; 395 __NOOP; 396 __NOOP; 397 __NOOP; 398 OldEnvironment.i.lo |= SET_INVALID; 399 FESETENVD_GRP( OldEnvironment.d ); // restore caller's mode 400 return ( nan ( TRIG_NAN ) ); // return NaN 401 } 402 403 while ( absOfX > FPR_pi53 ) 404 { // loop to reduce x below 405 intquo = __FMUL( x, FPR_2divPi ); // �*2^53 in magnitude 406 FPR_t = __FNMSUB( intquo, FPR_PiDiv2, x ); 407 x = __FNMSUB( intquo, FPR_PiDiv2Tail, FPR_t ); 408 absOfX = __FABS ( x ) ; 409 } 410 411/******************************************************************************* 412* final reduction of x to magnitude between 0 and 2*�. * 413*******************************************************************************/ 414 FPR_t = __FMADD( x, FPR_2divPi, FPR_kRintBig ); 415 intquo = FPR_t - FPR_kRintBig; 416 FPR_t = __FNMSUB( intquo, FPR_PiDiv2, x ); 417 x = __FNMSUB( intquo, FPR_PiDiv2Tail, FPR_t ); 418 absOfX = __FABS( x ); 419 } 420 421/******************************************************************************* 422* |x| < pi*2^42: further reduction is probably necessary. A double-double * 423* reduced argument is determined ( arg:argtail ) . It is possible that x * 424* has been reduced below pi/4 in magnitude, but x is definitely nonzero * 425* and safely in the normal range. * 426*******************************************************************************/ 427 428 // find integer quotient of x/(�/2) 429 FPR_t = __FMADD( x, FPR_2divPi, FPR_kRint ); 430 431 intquo = FPR_t - FPR_kRint; 432 z.d = FPR_t; 433 434 FPR_t = __FNMSUB( intquo, FPR_PiDiv2, x ); 435 436 arg = __FNMSUB( intquo, FPR_PiDiv2Tail, FPR_t ); 437 438 xSquared = __FMUL( arg, arg ); FPR_t -= arg; 439 GPR_f = z.i.lo; 440 441 xFourth = __FMUL( xSquared, xSquared ); argtail = __FNMSUB( intquo, FPR_PiDiv2Tail, FPR_t ); 442 443 iquad = (GPR_f + 1 ) & FE_ALL_RND; // iquad = int multiple mod 4 444 445/******************************************************************************* 446* multiple of �/2 ( mod 4) determines approx used and sign of result. * 447*******************************************************************************/ 448 449 if ( iquad & 0x1u) 450 { // arg closest to 0 or � 451/******************************************************************************* 452* use cosine approximation. * 453*******************************************************************************/ 454 register double FPR_c2, FPR_c4, FPR_c6, FPR_c8, FPR_c10, FPR_c12, FPR_c14; 455 456 FPR_c10 = c10; FPR_c14 = c14; 457 FPR_c8 = c8; FPR_c12 = c12; 458 459 temp1 = __FMADD( FPR_c14, xFourth, FPR_c10 ); temp2 = __FMADD( FPR_c12, xFourth, FPR_c8); 460 FPR_c6 = c6; 461 462 FPR_c4 = c4; 463 temp1 = __FMADD( temp1, xFourth, FPR_c6 ); temp2 = __FMADD( temp2, xFourth, FPR_c4); 464 465 FPR_c2 = c2; 466 temp1 = __FMADD( temp1, xFourth, FPR_c2 ); temp1 = __FMADD( xSquared, temp2, temp1 ); 467 468 temp1 = __FMSUB( arg, temp1, argtail ); // second-order correction 469 if ( iquad < 2 ) // adjust sign of result 470 result = __FMADD( arg, temp1, 1.0 ); // positive 471 else 472 { 473 arg = - arg; 474 result =__FMSUB( arg, temp1, 1.0 ); // negative 475 } 476 } 477 else 478 { 479/******************************************************************************* 480* use sine approximation. * 481*******************************************************************************/ 482 register double FPR_s3, FPR_s5, FPR_s7, FPR_s9, FPR_s11, FPR_s13; 483 484 FPR_s9 = s9; FPR_s13 = s13; 485 FPR_s7 = s7; FPR_s11 = s11; 486 487 temp1 = __FMADD( FPR_s13, xFourth, FPR_s9 ); temp2 = __FMADD( FPR_s11, xFourth, FPR_s7 ); 488 FPR_s5 = s5; 489 490 FPR_s3 = s3; 491 temp1 = __FMADD( temp1, xFourth, FPR_s5 ); temp2 = __FMADD( temp2, xFourth, FPR_s3 ); 492 493 temp1 = __FMADD( temp1, xSquared, temp2 ); xThird = __FMUL( arg, xSquared ); 494 495 temp1 = __FMADD( temp1, xThird, argtail ); // second-order correction 496 497 if ( iquad < 2 ) // adjust sign of result 498 result = temp1 + arg; 499 else 500 { 501 arg = - arg; 502 result = arg - temp1; 503 } 504 } 505 506 FESETENVD( FPR_env ); // restore caller's mode 507 __PROG_INEXACT( FPR_PiDiv2 ); 508 509 return ( result ) ; 510} 511