this repo has no description
at fixPythonPipStalling 520 lines 23 kB view raw
1/* 2 * Copyright (c) 2002 Apple Computer, Inc. All rights reserved. 3 * 4 * @APPLE_LICENSE_HEADER_START@ 5 * 6 * The contents of this file constitute Original Code as defined in and 7 * are subject to the Apple Public Source License Version 1.1 (the 8 * "License"). You may not use this file except in compliance with the 9 * License. Please obtain a copy of the License at 10 * http://www.apple.com/publicsource and read it before using this file. 11 * 12 * This Original Code and all software distributed under the License are 13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER 14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the 17 * License for the specific language governing rights and limitations 18 * under the License. 19 * 20 * @APPLE_LICENSE_HEADER_END@ 21 */ 22/******************************************************************************* 23* * 24* tg.c * 25* * 26* Double precision Tangent * 27* * 28* Copyright: � 1993-1997 by Apple Computer, Inc., all rights reserved * 29* * 30* Written and ported by A. Sazegari, started on June 1997. * 31* Modified by Robert Murley, 2001 * 32* * 33* A MathLib v4 file. * 34* * 35* Based on the trigonometric functions from IBM/Taligent. * 36* * 37* Modification history: * 38* November 06 2001: commented out warning about Intel architectures. * 39* July 20 2001: replaced __setflm with FEGETENVD/FESETENVD. * 40* replaced DblInHex typedef with hexdouble. * 41* September 07 2001: added #ifdef __ppc__. * 42* September 19 2001: added macros to detect PowerPC and correct compiler. * 43* September 19 2001: added <CoreServices/CoreServices.h> to get to <fp.h> * 44* and <fenv.h>, removed denormal comments. * 45* September 25 2001: removed more denormal comments. * 46* October 08 2001: removed <CoreServices/CoreServices.h>. * 47* changed compiler errors to warnings. * 48* * 49* W A R N I N G: * 50* This routine requires a 64-bit double precision IEEE-754 model. * 51* They are written for PowerPC only and are expecting the compiler * 52* to generate the correct sequence of multiply-add fused instructions. * 53* * 54* This routine is not intended for 32-bit Intel architectures. * 55* * 56* 08 Oct 01 ram changed compiler errors to warnings. * 57* * 58* GCC compiler options: * 59* optimization level 3 (-O3) * 60* -fschedule-insns -finline-functions -funroll-all-loops * 61* * 62*******************************************************************************/ 63 64#include "math.h" 65#include "fenv_private.h" 66#include "fp_private.h" 67 68#define TRIG_NAN "33" 69 70struct tableEntry /* tanatantable entry structure */ 71 { 72 double p; 73 double f5; 74 double f4; 75 double f3; 76 double f2; 77 double f1; 78 double f0; 79 }; 80 81extern const uint32_t tanatantable[]; 82 83static const double kPiBy2 = 1.570796326794896619231322; // 0x1.921fb54442d18p0 84static const double kPiBy2Tail = 6.1232339957367660e-17; // 0x1.1a62633145c07p-54 85static const double k2ByPi = 0.636619772367581382; // 0x1.45f306dc9c883p-1 86static const double kRint = 6.755399441055744e15; // 0x1.8p52 87static const double kRintBig = 2.7021597764222976e16; // 0x1.8p54 88static const double kPiScale42 = 1.38168706094305449e13; // 0x1.921fb54442d17p43 89static const double kPiScale53 = 2.829695100811376e16; // 0x1.921fb54442d18p54 90static const hexdouble infinity = HEXDOUBLE(0x7ff00000, 0x00000000); 91 92/******************************************************************************* 93******************************************************************************** 94* T A N * 95******************************************************************************** 96*******************************************************************************/ 97 98static const double ts11 = 8.898406739539066157565e-3, 99 ts9 = 2.186936821731655951177e-2, 100 ts7 = 5.396825413618260185395e-2, 101 ts5 = 0.1333333333332513155016, 102 ts3 = 0.3333333333333333333333; 103static const double tt7 = 0.05396875452473400572649, 104 tt5 = 0.1333333333304691192925, 105 tt3 = 0.3333333333333333357614; 106static const double k2ToM1023 = 0x1.0p-1023; // 1.112536929253600692e-308 107static const uint32_t indexTable[] = 108 { 109 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 110 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 111 38, 39, 40, 41, 42, 43, 44, 45, 47, 48, 49, 112 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 113 61, 62, 63, 64, 65, 66, 68, 69, 70, 71, 72, 114 73, 74, 75, 76, 77, 78, 79, 81, 82, 83, 84, 115 85, 86, 87, 88, 89, 91, 92, 93, 94, 95, 96, 116 97, 98, 100, 101, 102, 103, 104, 105, 107, 108, 109, 117 110, 111, 113, 114, 115, 116, 117, 119, 120, 121, 122, 118 123, 125, 126, 127, 128, 130, 131, 132, 133, 135, 136, 119 137, 139, 140, 141, 142, 144, 145, 146, 148, 149, 150, 120 152, 153, 154, 156, 157, 159, 160, 161, 163, 164, 166, 121 167, 168, 170, 171, 173, 174, 176, 177, 179, 180, 182, 122 183, 185, 186, 188, 189, 191, 192, 194, 196, 197, 199, 123 200, 202, 204, 205, 207, 209, 210, 212, 214, 215, 217, 124 219, 220, 222, 224, 226, 228, 229, 231, 233, 235, 237, 125 238, 240, 242, 244, 246, 248, 250, 252, 254, 256, 256 126 }; 127 128static const double kMinNormal = 0x1.0p-1022; // 2.2250738585072014e-308; 129static const double kPiDiv4 = 0.7853980064392090732; 130 131double tan ( double x ) 132{ 133 register double absOfX, intquo ,arg, argtail, aSquared, aThird, aFourth, 134 temp1, temp2, s, u, v, w, y, result; 135 int tableIndex; 136 uint32_t iz; 137 hexdouble zD, aD, OldEnvironment; 138 struct tableEntry *tablePointer; 139 140 register double FPR_env, FPR_z, FPR_one, FPR_PiDiv4, FPR_256, FPR_piScale; 141 register double FPR_t, FPR_u, FPR_inf, FPR_pi53, FPR_2divPi, FPR_PiDiv2, FPR_PiDiv2Tail, FPR_kRintBig, FPR_kRint; 142 register uint32_t GPR_f; 143 register struct tableEntry *pT; 144 145 FEGETENVD( FPR_env ); // save env, set default 146 147 FPR_z = 0.0; tablePointer = ( struct tableEntry * ) ( tanatantable - ( 16 * 14 ) ); 148 absOfX = __FABS( x ); 149 FPR_PiDiv4 = kPiDiv4; FPR_256 = 256.0; 150 FPR_kRint = kRint; FPR_one = 1.0; 151 FPR_PiDiv2 = kPiBy2; 152 153 __ENSURE( FPR_PiDiv2, FPR_PiDiv4, FPR_256); 154 __ENSURE( FPR_z, FPR_kRint, FPR_one); 155 156 FESETENVD( FPR_z ); 157 158/******************************************************************************* 159* |x| < 0.7853980064392090732 * 160*******************************************************************************/ 161 162 if ( absOfX < FPR_PiDiv4 ) 163 { 164 165 FPR_t = __FMADD( FPR_256, absOfX, FPR_kRint ); 166 aD.d = FPR_t; 167 168 if (unlikely( absOfX == FPR_z )) 169 { 170 FESETENVD( FPR_env ); // restore caller's mode 171 return x; // +0 -0 preserved 172 } 173 174 175/******************************************************************************* 176* |x| < 0.0625 * 177*******************************************************************************/ 178 179 if ( aD.i.lo < 16u ) // No Store/Load hazard thanks to the imediately preceding branch 180 { 181 register double FPR_ts3, FPR_ts5, FPR_ts7, FPR_ts9, FPR_ts11, FPR_Min; 182 183 aSquared = __FMUL( absOfX, absOfX ); FPR_Min = kMinNormal; 184 __NOOP; 185 186 FPR_ts11 = ts11; FPR_ts9 = ts9; 187 188 FPR_ts7 = ts7; FPR_ts5 = ts5; 189 190 FPR_ts3 = ts3; aThird = __FMUL( absOfX, aSquared ); 191 192 temp1 = __FMADD( FPR_ts11, aSquared, FPR_ts9 ); 193 194 temp1 = __FMADD( temp1, aSquared, FPR_ts7 ); 195 196 temp1 = __FMADD( temp1, aSquared, FPR_ts5 ); 197 198 temp1 = __FMADD( temp1, aSquared, FPR_ts3 ); 199 200 if ( x > FPR_z ) 201 result = __FMADD( temp1, aThird, absOfX ); 202 else 203 { 204 aThird = -aThird; 205 result = __FMSUB( temp1, aThird, absOfX ); 206 } 207 208 FESETENVD( FPR_env ); // restore caller's mode 209 210 if ( __FABS ( result ) < FPR_Min ) 211 __PROG_UF_INEXACT( FPR_Min ); 212 else 213 __PROG_INEXACT( FPR_PiDiv2 ); 214 215 return result; 216 } 217 218/******************************************************************************* 219* .0625 <= x < .7853980064392090732. * 220*******************************************************************************/ 221 else 222 { 223 register double FPR_tt3, FPR_tt5, FPR_tt7, FPR_sqr, FPR_f0; 224 225 tableIndex = indexTable[aD.i.lo - 16]; 226 pT = &(tablePointer[tableIndex]); 227 FPR_f0 = pT->f0; 228 229 w = absOfX - FPR_f0; // w = deltax 230 y = pT->p; // y = Tan from table 231 FPR_tt3 = tt3; 232 233 FPR_tt7 = tt7; FPR_tt5 = tt5; 234 235 FPR_sqr = __FMUL( w, w ); // calculate delta Tan 236 237 temp1 = __FMADD( FPR_tt7, FPR_sqr, FPR_tt5 ); 238 239 temp1 = __FMADD( temp1, FPR_sqr, FPR_tt3 ); 240 241 temp1 = __FMADD( temp1, __FMUL( FPR_sqr, w ), w ); 242 243 v = __FMUL(y, temp1 ); 244 245 aSquared = __FMUL( v, v ); aThird = FPR_one + v; 246 247 aFourth = __FMUL( aSquared,aSquared );w = __FMADD( aThird, aSquared, aThird ); 248 249 aThird = __FMADD( w, aFourth, w ); temp1 = __FMADD( v, y, temp1 ); 250 251 if ( x > FPR_z ) // fix final sign 252 result = __FMADD( temp1, aThird, y ); 253 else 254 { 255 aThird = -aThird; 256 result = __FMSUB( temp1, aThird, y ); 257 } 258 259 FESETENVD( FPR_env ); // restore caller's mode 260 __PROG_INEXACT( FPR_PiDiv2 ); 261 262 return result; 263 } 264 } 265 266 if (unlikely( x != x )) // x is a NaN 267 { 268 FESETENVD( FPR_env ); // restore caller's mode 269 return ( x ); 270 } 271 272/******************************************************************************* 273* |x| > �/4 and perhaps |x| > kPiScale42. * 274*******************************************************************************/ 275 276 FPR_piScale = kPiScale42; FPR_PiDiv2Tail = kPiBy2Tail; 277 FPR_2divPi = k2ByPi; FPR_pi53 = kPiScale53; 278 FPR_inf = infinity.d; FPR_kRintBig = kRintBig; 279 __ENSURE( FPR_2divPi, FPR_PiDiv2, FPR_piScale ); 280 281 if (unlikely( absOfX > FPR_piScale )) 282 { // |x| is huge or infinite 283 if ( absOfX == FPR_inf ) 284 { // infinite case is invalid 285 OldEnvironment.d = FPR_env; 286 __NOOP; 287 __NOOP; 288 __NOOP; 289 290 OldEnvironment.i.lo |= SET_INVALID; 291 FESETENVD_GRP( OldEnvironment.d ); // restore caller's mode 292 return ( nan ( TRIG_NAN ) ); // return NaN 293 } 294 295 while ( absOfX > FPR_pi53 ) 296 { // loop to reduce x below 297 intquo = __FMUL( x, FPR_2divPi ); // �*2^53 in magnitude 298 FPR_t = __FNMSUB( intquo, FPR_PiDiv2, x ); 299 x = __FNMSUB( intquo, FPR_PiDiv2Tail, FPR_t ); 300 absOfX = __FABS ( x ) ; 301 } 302 303/******************************************************************************* 304* final reduction of x to magnitude between 0 and 2*�. * 305*******************************************************************************/ 306 FPR_t = __FMADD( x, FPR_2divPi, FPR_kRintBig ); 307 intquo = FPR_t - FPR_kRintBig; 308 FPR_t = __FNMSUB( intquo, FPR_PiDiv2, x ); 309 x = __FNMSUB( intquo, FPR_PiDiv2Tail, FPR_t ); 310 absOfX = __FABS( x ); 311 } 312 313/******************************************************************************* 314* �/4 < x < �*2^42 * 315*******************************************************************************/ 316 317/******************************************************************************* 318* Further argument reduction is probably necessary. A double-double * 319* reduced argument is determined ( arg|argtail ). * 320*******************************************************************************/ 321 FPR_t = __FMADD( x, FPR_2divPi, FPR_kRint ); 322 intquo = FPR_t - FPR_kRint; 323 zD.d = FPR_t; 324 325 FPR_t = __FNMSUB( intquo, FPR_PiDiv2, x ); 326 arg = __FNMSUB( intquo, FPR_PiDiv2Tail, FPR_t ); 327 328 absOfX = __FABS( arg ); FPR_t -= arg; 329 GPR_f = zD.i.lo; 330 331 FPR_u = __FMADD( FPR_256, absOfX, FPR_kRint ); 332 aD.d = FPR_u; 333 __NOOP; 334 __NOOP; 335 336 argtail = __FNMSUB( intquo, FPR_PiDiv2Tail, FPR_t ); 337 338/******************************************************************************* 339* |arg| < .0625. * 340*******************************************************************************/ 341 342 if ( aD.i.lo < 16u ) // No Store/Load hazard, NOOPs above push this issue to next cycle. 343 { 344 register double FPR_ts3, FPR_ts5, FPR_ts7, FPR_ts9, FPR_ts11; 345 346 aSquared = __FMUL( absOfX, absOfX ); 347 u = absOfX; 348 v = argtail; 349 350 FPR_ts11 = ts11; FPR_ts9 = ts9; 351 352 FPR_ts7 = ts7; FPR_ts5 = ts5; 353 354 FPR_ts3 = ts3; iz = GPR_f & 1u; // quo modulo 2 355 356 temp1 = __FMADD( FPR_ts11, aSquared, FPR_ts9 ); 357 358 temp1 = __FMADD( temp1, aSquared, FPR_ts7 ); 359 360 temp1 = __FMADD( temp1, aSquared, FPR_ts5 ); 361 362 temp1 = __FMADD( temp1, aSquared, FPR_ts3 ); 363 364 aThird = __FMUL( aSquared, u ); 365 366/******************************************************************************* 367* tangent approximation starts here. * 368*******************************************************************************/ 369 370 if ( iz == 0 ) 371 { 372 if ( arg > FPR_z ) 373 { // positive arg 374 w = __FMADD( temp1, aThird, v ); 375 result = ( w + u ); 376 } 377 else 378 { // negative arg 379 w = __FNMSUB( temp1, aThird, v ); 380 result = ( w - u ); 381 } 382 383 FESETENVD( FPR_env ); // restore caller's mode 384 __PROG_INEXACT( FPR_PiDiv2 ); 385 386 return result; 387 } 388 389/******************************************************************************* 390* cotangent approximation starts here. * 391*******************************************************************************/ 392 else 393 { 394 register double FPR_k2ToM1023; 395 396 FPR_k2ToM1023 = k2ToM1023; 397 398 if ( arg > FPR_z ) 399 { // positive argument 400 s = __FMADD( temp1, aThird, v ); 401 temp2 = s + u; 402 } 403 else 404 { // negative argument 405 s = __FMSUB( temp1, aThird, v ); 406 temp2 = s + u; 407 } 408 409 aFourth = ( u - temp2 ) + s; 410 411 if ( __FABS ( temp2 ) < k2ToM1023 ) 412 { // huge result 413 if ( arg > FPR_z ) 414 result = ( FPR_one / temp2 ); 415 else 416 result = ( FPR_one / ( -temp2 ) ); 417 418 FESETENVD( FPR_env ); // restore caller's mode 419 __PROG_INEXACT( FPR_PiDiv2 ); 420 421 return result; 422 } 423 424 u = FPR_one / temp2; 425 v = __FNMSUB( u, temp2, FPR_one ); 426 temp1 = __FMSUB( aFourth, u, v ); 427 428 if ( arg > FPR_z ) 429 result = __FMSUB( temp1, u, u ); 430 else 431 result = __FNMSUB( temp1, u, u ); 432 433 FESETENVD( FPR_env ); // restore caller's mode 434 __PROG_INEXACT( FPR_PiDiv2 ); 435 436 return result; 437 } 438 } 439 440/******************************************************************************* 441* The following code covers the case where the reduced argument has * 442* magnitude greater than 0.0625 but less than �/4. * 443*******************************************************************************/ 444 445 tableIndex = indexTable [ aD.i.lo - 16 ]; 446 447 if ( arg > FPR_z ) 448 { // positive argument 449 w = absOfX - tablePointer [tableIndex].f0 + argtail; // argument delta 450 aSquared = __FMUL( w, w ); 451 v = absOfX - tablePointer [tableIndex].f0 - w + argtail; // tail of argument delta 452 } 453 else 454 { // negative argument 455 w = absOfX - tablePointer [tableIndex].f0 - argtail; // argument delta 456 aSquared = __FMUL( w, w ); 457 v = absOfX - tablePointer [tableIndex].f0 - w - argtail; // tail of argument delta 458 } 459 { 460 register double FPR_tt3, FPR_tt5, FPR_tt7; 461 462 FPR_t = v + w; FPR_u = __FMUL( aSquared, w ); 463 y = tablePointer[tableIndex].p; FPR_tt3 = tt3; 464 465 FPR_tt7 = tt7; FPR_tt5 = tt5; 466 467 temp1 = __FMADD( FPR_tt7, aSquared, FPR_tt5 ); 468 469 temp1 = __FMADD( temp1, aSquared, FPR_tt3 ); 470 471 temp1 = __FMADD( temp1, FPR_u, FPR_t ); 472 473 v = __FMUL( y, temp1 ); // Tan( delta ) 474 } 475 476/******************************************************************************* 477* polynomial approx of 1/(1-v) * 478*******************************************************************************/ 479 480 aSquared = __FMUL( v, v ); iz = GPR_f & 1u; // quo modulo 2 481 aThird = FPR_one + v; 482 aFourth = __FMUL( aSquared, aSquared ); 483 w = __FMADD( aThird, aSquared, aThird ); 484 aThird = __FMADD( w, aFourth, w ); // aThird = 1/(1-v) 485 s = __FMUL( __FMADD( y, y, FPR_one ), aThird ); 486 487 if ( iz == 0 ) 488 { // tangent approximation 489 if ( arg > FPR_z ) 490 result = __FMADD( temp1, s, y ); 491 else 492 { 493 y = -y; 494 result = __FNMSUB( temp1, s, y ); 495 } 496 497 FESETENVD( FPR_env ); // restore caller's mode 498 __PROG_INEXACT( FPR_PiDiv2 ); 499 500 return result; 501 } 502 else 503 { // cotangent approx 504 w = __FMADD( temp1, s, y ); 505 aFourth = __FMADD( temp1, s, ( y - w ) ); 506 u = FPR_one / w; 507 v = __FNMSUB( u, w, FPR_one); 508 w = __FMSUB( u, aFourth, v ); 509 510 if ( arg > FPR_z ) 511 result = __FMSUB( u, w, u ); 512 else 513 result = __FNMSUB( u, w, u ); 514 515 FESETENVD( FPR_env ); // restore caller's mode 516 __PROG_INEXACT( FPR_PiDiv2 ); 517 518 return result; 519 } 520}