this repo has no description
at fixPythonPipStalling 670 lines 28 kB view raw
1/* 2 * Copyright (c) 2002 Apple Computer, Inc. All rights reserved. 3 * 4 * @APPLE_LICENSE_HEADER_START@ 5 * 6 * The contents of this file constitute Original Code as defined in and 7 * are subject to the Apple Public Source License Version 1.1 (the 8 * "License"). You may not use this file except in compliance with the 9 * License. Please obtain a copy of the License at 10 * http://www.apple.com/publicsource and read it before using this file. 11 * 12 * This Original Code and all software distributed under the License are 13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER 14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the 17 * License for the specific language governing rights and limitations 18 * under the License. 19 * 20 * @APPLE_LICENSE_HEADER_END@ 21 */ 22// 23// HyperbolicDD.c 24// 25// Double-double Function Library 26// Copyright: � 1995-96 by Apple Computer, Inc., all rights reserved 27// 28// long double coshl(long double x); 29// long double sinhl(long double x); 30// long double tanhl(long double x); 31// 32// Change history: 33// 9/1/96 - PAF removed unnecessary tests (uncovered by MrC warning) 34// 35 36#include "math.h" 37#include "fp_private.h" 38#if (__WANT_LONG_DOUBLE_FORMAT__ - 0L == 128L) 39#include "DD.h" 40 41static const double recip = 0.1957294106339126123084757437350e-19; 42static const double kBig = 4.504699138998272e+15 ; // 2.0**52 + 2.0**40 = 0x43300100 00000000 43 44uint32_t TanhTbl[] = { 45 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 46 0x3FB00000, 0x00030807, 0x3FAFF559, 0x97E63AD9, 0xBB6CF9C0, 0x2437818A, 47 0x3FC00000, 0x0000284A, 0x3FBFD599, 0x2BC5078A, 0xBB77F9D4, 0x704D912A, 48 0x3FC80000, 0x00005880, 0x3FC7B8FF, 0x903C4CEC, 0x3B47716F, 0xB54B60B5, 49 0x3FD00000, 0x00000CDC, 0x3FCF597E, 0xA69A34B3, 0xBB794C01, 0xC101435A, 50 0x3FD40000, 0x0000313C, 0x3FD35F98, 0xA0EA91C7, 0xBB904F63, 0x26BD63B6, 51 0x3FD80000, 0x000021EA, 0x3FD6EF53, 0xDE8CAD3F, 0xBB91172E, 0x8FAFB7A2, 52 0x3FDC0000, 0x00005214, 0x3FDA5729, 0xEE48C464, 0x3B9543D8, 0xC4943292, 53 0x3FE00000, 0x00001E86, 0x3FDD9353, 0xD756BAF6, 0xBB8F03A0, 0x34F7B469, 54 0x3FE20000, 0x00007115, 0x3FE05086, 0xF2F72867, 0xBB95D918, 0x3E3269EE, 55 0x3FE40000, 0x00005720, 0x3FE1BF47, 0xEABBCBE9, 0x3B918CCE, 0xA6F1FA3A, 56 0x3FE60000, 0x000021CF, 0x3FE3157D, 0xFE9F8724, 0xBBA1AC2C, 0x2B395168, 57 0x3FE80000, 0x00016908, 0x3FE45323, 0xE553C98B, 0xBBA14AF9, 0xB571AFA7, 58 0x3FEA0000, 0x00005202, 0x3FE5788F, 0xF10D56AF, 0xBBA3C229, 0xC063AD80, 59 0x3FEC0000, 0x00005742, 0x3FE68665, 0x0B8C4C1B, 0x3B8BE9FC, 0xF1B4428B, 60 0x3FEE0000, 0x00008582, 0x3FE77D83, 0x8E34430D, 0x3B829FA7, 0xF1623176, 61 0x3FF00000, 0x00005F76, 0x3FE85EFA, 0xB51543C3, 0xBB4B3592, 0x2498C785, 62 0x3FF10000, 0x00001F8D, 0x3FE92BFB, 0x370DB380, 0x3BA29188, 0x377121C3, 63 0x3FF20000, 0x0000130F, 0x3FE9E5CB, 0x5BA45A90, 0x3B7DFB4E, 0xF67C44E2, 64 0x3FF30000, 0x00004F18, 0x3FEA8DBC, 0xBC31BABE, 0x3B99C45D, 0xAE5D7827, 65 0x3FF40000, 0x00005131, 0x3FEB2523, 0xBB6B5B77, 0x3B85A025, 0x161129AB, 66 0x3FF50000, 0x000029D4, 0x3FEBAD50, 0xA4A6A0D5, 0xBB983007, 0x72D2D609, 67 0x3FF60000, 0x000007E1, 0x3FEC278A, 0x52A4E807, 0x3BA451F7, 0x31DE764B, 68 0x3FF70000, 0x0000185B, 0x3FEC950A, 0x3340D299, 0x3BAABF14, 0xE874463C, 69 0x3FF80000, 0x000024C4, 0x3FECF6F9, 0x786E02C1, 0x3B6B3F8E, 0x2446EA6B, 70 0x3FF90000, 0x0000BF0C, 0x3FED4E6F, 0x4642C44F, 0x3BA657E4, 0xDE96E741, 71 0x3FFA0000, 0x00006986, 0x3FED9C6F, 0xAFE63ACE, 0x3BA71F1B, 0xC2E3ED65, 72 0x3FFB0000, 0x00003690, 0x3FEDE1EB, 0x59375F86, 0x3B94D798, 0x08425B54, 73 0x3FFC0000, 0x000068CA, 0x3FEE1FBF, 0x97E34D01, 0x3BAD07C7, 0xA6BF9B47, 74 0x3FFD0000, 0x00008A24, 0x3FEE56B6, 0xF3EFC7EE, 0xBBA26658, 0xA2EB55F0, 75 0x3FFE0000, 0x0003C04A, 0x3FEE8789, 0xECECBA51, 0xBBA8B27F, 0x9026BBD4, 76 0x3FFF0000, 0x00000D68, 0x3FEEB2DF, 0xEDD5EEB6, 0x3B9EE74B, 0x12149464, 77 0x40000000, 0x00004DFD, 0x3FEED950, 0x5E1BD9DE, 0x3B971596, 0x795B2F55 78}; 79 80uint32_t TanhCoeff[] = { 81 0x3FF00000, 0x00000000, 0x00000000, 0x00000000, 82 0xBFD55555, 0x55555555, 0xBC755555, 0x55555554, 83 0x3FC11111, 0x11111111, 0x3C411111, 0x10FF0F6D, 84 0xBFABA1BA, 0x1BA1BA1C, 0x3C47917A, 0xA287E6B6, 85 0x3F9664F4, 0x882C10FA, 0xBC0A8F5F, 0x684BD9FF, 86 0xBF8226E3, 0x55E6C238, 0x3C01097B, 0x425ED098, 87 0x3F6D6D3D, 0x0E154787, 0xBC0BA781, 0x948B0FCF, 88 0xBF57DA36, 0x446C8BDA, 0xBBD2F684, 0xBDA12F6B, 89 0x3F435580, 0xBCDA12F7, 0xBBEED097, 0xB425ED0A, 90 0xBF2F545A, 0x781948B1, 0x3B7948B0, 0xFCD6E991, 91 0x3F17B291, 0x61F9ADD4, 0xBBAF9ADD, 0x3C0CA458 92}; 93 94struct TanhTblEntry { 95 double One; 96 double Two; 97 double Three; 98} TanhTblEntry; 99 100struct TanhCoeffEntry { 101 double One; 102 double Two; 103} TanhCoeffEntry; 104 105 106extern long double _ExpInnerLD(double alpha, double beta, double gamma, 107 double *pextra, int normflag); 108 109long double sinhl(long double x) 110{ 111 double xHead, xTail, xHeadx, xTailx; 112 double p, q, r1, r2, r3, r4, r5, t1, t2; 113 double residual, residual1, residual2, residual3, residual4, residual5, residual6; 114 double res, resnew, reshi, reslow, restiny, resbot, resmid; 115 double remainder, partial, extraword; 116 double argsq, argsq2; 117 double prod, prodlow, sum, suml, temp, temp2, templow; 118 double bottom; 119 uint32_t hibits; 120 int i; 121 double fpenv; 122 DblDbl ddx; 123 double coeff[16] = { 124 0.62132974171578525315635255289e-14, 0.577836659795680285435407874e-11, 125 0.469203367754092391773551193841e-8, 0.329380764163372859025032938e-5, 126 0.1976284584980237154150197628458498e-2, 1.0, 420.0, 143640.0, 39070080.0, 127 8204716800.0, 1279935820800.0, 140792940288000.0, 10137091700736000.0, 128 425757851430912000.0, 8515157028618240000.0, 51090942171709440000.0 129 }; 130 131 ddx.f = x; 132 xHead = ddx.d[0]; 133 xTail = ddx.d[1]; 134 hibits = ddx.i[0] & 0x7FFFFFFFu; // highest 32 bits as uint32_t 135 136 // NaNs and infinities 137 if (hibits >= 0x7ff00000u) // special cases: NaN and infinity 138 return x; 139 140 // x = zero hence xHead is zero 141 if ((hibits | ddx.i[1]) == 0) return x; 142 143 144 // x is not zero, infinity, or NaN 145 146 // finite non-zero x 147 FEGETENVD(fpenv); 148 FESETENVD(0.0); 149 150 151 // for small x, |x| <= 0.693147 use power series 152 153 if (fabs(xHead) <= 0.693147 ) { 154 temp=2.0*xHead*xTail; // small argument 155 argsq=__FMADD( xHead, xHead, temp ); // xHead*xHead+temp; // use power series 156 argsq2=__FMADD( xTail, xTail, __FMSUB( xHead, xHead, argsq ) + temp ); // xHead*xHead-argsq+temp+xTail*xTail; 157 sum=coeff[0]; 158 suml=0.0; 159 sum=__FMADD( argsq, sum, coeff[1] ); // coeff[1]+argsq*sum; 160 sum=__FMADD( argsq, sum, coeff[2] ); // coeff[2]+argsq*sum; 161 sum=__FMADD( argsq, sum, coeff[3] ); // coeff[3]+argsq*sum; 162 for ( i=4; i<15; i++) { 163 templow=__FMADD( suml, argsq, sum*argsq2 ); // suml*argsq+sum*argsq2; 164 temp=__FMADD( sum, argsq, templow ); // sum*argsq+templow; 165 bottom=__FMSUB( sum, argsq, temp) + templow; // sum*argsq-temp+templow; 166 sum=__FADD( coeff[i], temp ); 167 residual=coeff[i]-sum+temp; 168 suml=bottom+residual; 169 } 170 prodlow=__FMADD( suml, argsq, sum*argsq2 ); // suml*argsq+sum*argsq2; // mult. by arg^2 171 prod=__FMADD( sum, argsq, prodlow ); // sum*argsq+prodlow; 172 prodlow=__FMSUB( sum, argsq, prod) + prodlow; // sum*argsq-prod+prodlow; 173 temp2=__FMADD( prodlow, xHead, prod*xTail ); // prodlow*xHead+prod*xTail; // mult. by xHead 174 temp=__FMADD( prod, xHead, temp2 ); // prod*xHead+temp2; 175 temp2=__FMSUB(prod, xHead, temp ) + temp2; // prod*xHead-temp+temp2; 176 sum=temp*recip; // sum of series --- 177 remainder=__FNMSUB( sum, coeff[15], temp ); // (temp-sum*coeff[15]); 178 partial=__FADD( remainder, temp2 ); 179 residual=remainder-partial+temp2; 180 suml=__FMADD( partial, recip, residual*recip ); // partial*recip+(residual*recip); 181 res=__FADD( xHead, sum ); // except for argument 182 reslow=(xHead-res+sum); // exact 183 resmid=__FADD( xTail, suml ); 184 restiny=xTail-resmid+suml; 185 p=__FADD( reslow, resmid ); // sum of middle terms 186 q=reslow-p+resmid; // possible residual 187 reshi=__FADD( res, p ); 188 resbot=(res-reshi+p)+(q+restiny); 189 ddx.d[0] = __FADD( reshi, resbot ); // build a cannonical result 190 ddx.d[1] = (reshi - ddx.d[0]) + resbot; 191 FESETENVD(fpenv); 192 return (ddx.f); 193 } 194 195 // |x| > 0.693147 196 // else if (fabs(xHead) > 0.693147 ) { <-- Unnecessary test 197 else { 198 if (xHead < 0) { 199 xHeadx = -xHead; 200 xTailx = -xTail; 201 } 202 else { 203 xHeadx = xHead; 204 xTailx = xTail; 205 } 206 207 ddx.f = _ExpInnerLD(xHeadx, xTailx, 0.0 , &extraword , 1); //get .5*e^x 208 209 if (fabs(xHead) > 39.0) { 210 211 if (xHead < 0) ddx.f = - ddx.f; 212 213 reshi = ddx.d[0]; 214 ddx.d[0] = __FADD( ddx.d[0], ddx.d[1] ); 215 if((ddx.i[0] & 0x7FFFFFFFu)>= 0x7ff00000u) { 216 // special cases : NaN and infinity 217 ddx.d[1] = 0.0; 218 } 219 else { 220 ddx.d[1] = (reshi - ddx.d[0]) + ddx.d[1]; 221 } 222 FESETENVD(fpenv); 223 return (ddx.f); 224 } 225 else { 226 t1 = ddx.d[0]; // zres = .5*e^x 227 t2 = ddx.d[1]; // Def. of sinh: (e^x - e^-x)/2 228 r1=0.25/t1; 229 residual=__FNMSUB( t2, r1, __FNMSUB( t1, r1, 0.25 ) ); // (0.25-t1*r1)-t2*r1; 230 r2=residual*(4.0*r1); // (r1,r2)=.5*e^-x 231 residual1 =__FNMSUB( t2, r1, __FNMSUB( t1, r1, 0.25 ) - residual ); // (0.25-t1*r1)-residual-t2*r1; 232 r3=__FMSUB(t1, r2, residual ); //(t1*r2-residual); // (reversed sign) 233 r4=__FMADD( extraword, r1, t2*r2 ); //(extraword*r1+(t2*r2)); // (reversed sign) 234 reshi=__FSUB( t1, r1 ); 235 reslow=__FSUB( t2, r2 ); 236 residual=(t1-reshi)-r1; // exact 237 if (fabs(t2) > fabs(r2)) 238 residual2=(t2-reslow)-r2; // exact 239 else 240 residual2=t2-(reslow+r2); // exact 241 r5=(r3+r4-residual1)*(4.0*r1); 242 resnew=__FADD( reshi, reslow ); 243 residual3=reshi-resnew+reslow; // exact 244 residual4=__FADD( residual, residual3 ); 245 reshi=__FADD( resnew, residual4 ); 246 residual5=residual-residual4+residual3; 247 residual6=resnew -reshi+residual4; 248 reslow=(residual2+(extraword+r5))+residual5+residual6; 249 if (xHead < 0.0) { // Reverse signs for 250 reshi = -reshi; // negative arguments 251 reslow = -reslow; 252 } 253 ddx.d[0] = __FADD( reshi, reslow ); // build a cannonical result 254 ddx.d[1] = (reshi - ddx.d[0]) + reslow; 255 FESETENVD(fpenv); 256 return (ddx.f); 257 } 258 } // end of abs(xHead) > 0.693147 259 260} 261 262 263long double coshl(long double x) 264{ 265 double xHead, xTail, xHeadx, xTailx; 266 double r1, r2, r3, r4, r5, t1, t2; 267 double residual, residual1, residual2, residual3, residual4, residual5, residual6; 268 double res, resnew, reshi, reslow; 269 double remainder, partial, extraword; 270 double argsq, argsq2; 271 double prod, prodlow, sum, suml, temp, templow; 272 double bottom; 273 uint32_t hibits, signx; 274 int i; 275 double fpenv; 276 DblDbl ddx; 277 double coeff[9] = { 278 306.0, 73440.0, 13366080.0, 1764322560.0, 158789030400.0, 8892185702400.0, 279 266765571072000.0, 3201186852864000.0, 6402373705728000.0 280 }; 281 double recip = 1.561920696858622646221636435005e-16; 282 283 ddx.f = x; 284 xHead = ddx.d[0]; 285 xTail = ddx.d[1]; 286 hibits = ddx.i[0] & 0x7FFFFFFFu; // highest 32 bits as uint32_t 287 signx = (ddx.i[0] >> 31) & 1u; // get sign of head 288 289 // NaNs and infinities 290 291 if (hibits >= 0x7ff00000u){ // special cases: NaN and infinity 292 if (xHead != xHead) { // x is a NaN 293 return x; 294 } 295 else { // For +/- infinity return +infinity 296 if (signx) 297 return (-x); 298 else 299 return x; 300 } 301 } 302 303 304 // x = zero hence xHead is zero 305 306 if ((hibits | ddx.i[1]) == 0) 307 return (long double) 1.0; 308 309 // x is not zero, infinity nor a NaN 310 311 FEGETENVD(fpenv); 312 FESETENVD(0.0); 313 314 //for small x, |x| <= 0.125 use power series 315 if (hibits <= 0x3fc00000u) { 316 temp=2.0*xHead*xTail; 317 argsq=__FMADD( xHead, xHead, temp ); // xHead*xHead+temp; 318 argsq2=__FMADD( xTail, xTail, __FMSUB( xHead, xHead, argsq ) + temp ); // xHead*xHead-argsq+temp+xTail*xTail; 319 sum=1.0; 320 suml=0.0; 321 sum=__FMADD( argsq, sum, coeff[0] ); // coeff[0]+argsq*sum; 322 sum=__FMADD( argsq, sum, coeff[1] ); // coeff[1]+argsq*sum; 323 sum=__FMADD( argsq, sum, coeff[2] ); // coeff[2]+argsq*sum; 324 for (i=3; i< 8; i++) { 325 templow=__FMADD( suml, argsq, sum*argsq2 ); // suml*argsq+sum*argsq2; 326 temp=__FMADD( sum, argsq, templow ); // sum*argsq+templow; 327 bottom=__FMSUB( sum, argsq, temp) + templow; // sum*argsq-temp+templow; 328 sum=__FADD( coeff[i], temp ); 329 residual=coeff[i]-sum+temp; 330 suml=bottom+residual; 331 } 332 prodlow=__FMADD( suml, argsq, sum*argsq2 ); // suml*argsq+sum*argsq2; // mult. by arg^2 333 prod=__FMADD( sum, argsq, prodlow ); // sum*argsq+prodlow; 334 prodlow=__FMSUB( sum, argsq, prod) + prodlow; // sum*argsq-prod+prodlow; 335 sum=prod*recip; // sum of series --- 336 remainder=__FNMSUB( sum, coeff[8], prod ); // (prod-sum*coeff[8]); 337 partial=__FADD( remainder, prodlow ); 338 residual=remainder-partial+prodlow; 339 suml=__FMADD( partial, recip, residual*recip ); // partial*recip+(residual*recip); 340 res=__FADD(1.0, sum ); // except for 1.0 term. 341 reslow=1.0 - res+sum+suml; 342 ddx.d[0] = __FADD( res, reslow ); // build a cannonical result 343 ddx.d[1] = (res - ddx.d[0]) + reslow; 344 FESETENVD(fpenv); 345 return (ddx.f); 346 } 347 348 // |x| > 0.125 349 // else if (hibits > 0x3fc00000u) { // abs(xHead) > 0.125 <-- Unnecessary test 350 else { // abs(xHead) > 0.125 351 if (signx) { 352 xHeadx = -xHead; 353 xTailx = -xTail; 354 } 355 else{ 356 xHeadx = xHead; 357 xTailx = xTail; 358 } 359 ddx.f = _ExpInnerLD (xHeadx, xTailx, 0.0 , &extraword , 1); // get .5*e^x 360 361 // |x| > 40.0 362 if (hibits > 0x40440000u) { // abs(xHead) > 40.0 363 364 reshi = ddx.d[0]; 365 ddx.d[0] = __FADD( ddx.d[0], ddx.d[1] ); 366 if((ddx.i[0] & 0x7FFFFFFFu)>= 0x7ff00000u) { // special cases : NaN and infinity 367 ddx.d[1] = 0.0; 368 } 369 else { 370 ddx.d[1] = (reshi - ddx.d[0]) + ddx.d[1]; 371 } 372 FESETENVD(fpenv); 373 return (ddx.f); 374 } 375 // 0.125 <= |x| < 40.0 376 else{ 377 t1 = ddx.d[0]; // zres=.5*e^x 378 t2 = ddx.d[1]; // def. of cosh: (e^x + e^-x)/2 379 r1=0.25/t1; 380 residual=__FNMSUB( t2, r1, __FNMSUB( t1, r1, 0.25 ) ); // (0.25-t1*r1)-t2*r1; 381 r2=residual*(4.0*r1); // (r1,r2)=.5*e^-x + errors< 4 ulps 382 residual1 =__FNMSUB( t2, r1, __FNMSUB( t1, r1, 0.25 ) - residual ); // (0.25-t1*r1)-residual-t2*r1; 383 // rest of rem. from HO divisor only 384 r3=__FMSUB(t1, r2, residual ); //(t1*r2-residual); // (reversed sign)...LO quotient 385 r4=__FMADD( extraword, r1, t2*r2 ); //(extraword*r1+(t2*r2)); // (reversed sign)...LO quotient 386 reshi=__FADD( t1, r1 ); 387 reslow=__FADD( t2, r2 ); 388 residual=(t1-reshi)+r1; // exact 389 if (fabs(t2) > fabs(r2)) 390 residual2=(t2-reslow)+r2; // exact 391 else 392 residual2=r2-reslow+t2; // exact 393 r5=(r3+r4-residual1)*(4.0*r1); // reversed sign 394 resnew=__FADD( reshi, reslow ); 395 residual3=reshi-resnew+reslow; // exact 396 residual4=__FADD( residual, residual3 ); 397 reshi=__FADD( resnew, residual4 ); 398 residual5=residual-residual4+residual3; 399 residual6=resnew-reshi+residual4; 400 reslow=(residual2+(extraword-r5))+residual5+residual6; 401 ddx.d[0] = __FADD( reshi, reslow ); // build a cannonical result 402 ddx.d[1] = (reshi - ddx.d[0]) + reslow; 403 FESETENVD(fpenv); 404 return (ddx.f); 405 } 406 } // end of abs(xHead) > 0.125 407} 408 409 410long double tanhl(long double x) 411{ 412 413 double r, res, res2, res3, res4,res5, resfin, reslow; 414 double rem, rem1, rem2; 415 uint32_t signx, hibits, ndx ; 416 double q, qq, qa, qb, qc, q1, quot, quot2, quot3; 417 double arg, arg1, arg2, argsq, argsq2; 418 double sum, sum2, suml, suml2; 419 double frac, frac2, fac, fac2; 420 double ra, rb, rc, rd, re, rf; 421 double ans, ansx, ansl, ansmid, anstiny, anslow, almost; 422 double prod, prodlow, prodx, prodlowx, proderr, pl; 423 double top, top2, den, den1, denf, denf2; 424 double rg, t2, t3, tsq, tsql; 425 double az, bz, temp, temp1, templow, carry, carry1, extra; 426 double bump, residual, tiny, recip; 427 Double DeN; 428 double fpenv; 429 double xHead, xTail; 430 DblDbl ddx; 431 int i; 432 433 struct TanhTblEntry *TanhTblPtr = (struct TanhTblEntry *)TanhTbl; 434 struct TanhCoeffEntry *TanhCoeffPtr = (struct TanhCoeffEntry *) TanhCoeff; 435 436 ddx.f = x; 437 xHead = ddx.d[0]; 438 xTail = ddx.d[1]; 439 hibits = ddx.i[0] & 0x7FFFFFFFu; // highest 32 bits as uint32_t 440 signx = (ddx.i[0] >> 31) & 1u; // get sign of head 441 442 // NaN or zero 443 444 if ((xHead != xHead)||((hibits | ddx.i[1]) == 0)) 445 return x; 446 447 FEGETENVD(fpenv); 448 FESETENVD(0.0); 449 450 //for infinity or large x , |xHead| > 40.0 451 452 if (hibits > 0x40440000u) { // results = 1, for big args. 453 FESETENVD(fpenv); 454 if (signx) 455 return (long double) (-1.0); // sign of result= sign of arg 456 else 457 return (long double) (1.0); 458 } 459 460 // x is not zero, infinity nor a NaN 461 462 if (signx) { 463 xHead = -xHead; 464 xTail = -xTail; 465 } 466 467 468 // values of |xHead| with 2.0<= |xHead| <= 40.0 469 470 if (hibits > 0x40000000u) { 471 //**************************************************************************** 472 // * 473 // 1.0 * 474 // tanh(x)= 1 - ------------- |x| > 2.0 * 475 // .5 +.5*e^(2x) * 476 // * 477 //**************************************************************************** 478 xHead=2.0*xHead; // double arg 479 xTail=2.0*xTail; 480 481 ddx.f = _ExpInnerLD (xHead, xTail, 0.0 , &extra, 1 ); // compute e^(2*arg) 482 az = ddx.d[0]; 483 bz = ddx.d[1]; 484 temp=__FADD( az, 0.5 ); // az >= 0.5 485 carry=az-temp+0.5; // exact 486 temp1=__FADD( carry, bz ); 487 carry1=carry-temp1+bz; 488 den=__FADD( temp, temp1 ); 489 den1=temp-den+temp1+carry1; 490 r=1.0/den; // guess recip. of denominator 491 rem=__FNMSUB( den, r, 1.0 ); // 1.0-den*r; // first remainder 492 rem1=__FNMSUB( r, den1, rem ); // rem-r*den1; // (rem1,rem2) is better rem. 493 if (fabs(rem) > fabs(r*den1)) 494 rem2=__FNMSUB( r, den1, rem-rem1 ); // rem-rem1-r*den1; 495 else { 496 rem1=__FNMSUB( r, den1, rem ); // rem-(r*den1); 497 /* rem2=-(r*den1)-rem1+rem -(r*den1-(r*den1)); */ 498 rem2 = rem - __FMADD( r, den1, rem1 ) - __FMSUB( r, den1, (r*den1) ); 499 } 500 qq=__FMADD( r, rem1, r ); // r+r*rem1; // going for the full quotient 501 qa=__FMADD( r, rem1, r-qq ); // r-qq+r*rem1; 502 qb=__FMADD( r, rem2, qa ); // qa+r*rem2; 503 q=__FADD( qq, qb ); 504 // qc=qa-qb+r*rem2-(extra*r)*r; 505 qc = __FNMSUB( extra*r, r, __FMADD( r, rem2, qa - qb ) ); 506 q1=qq-q+qb+qc; 507 res=__FSUB( 1.0, q ); // high order result 508 res2=1.0-res-q; // exact low order 509 res3=__FSUB( res2, q1 ); 510 resfin=__FADD( res, res3 ); 511 res4=res2-res3-q1; 512 res5=res-resfin+res3; 513 reslow=res4+res5; 514 if (signx) { 515 resfin=-resfin; 516 reslow=-reslow; 517 } 518 ddx.d[0] = __FADD( resfin, reslow ); // build a cannonical result 519 ddx.d[1] = (resfin - ddx.d[0]) + reslow; 520 FESETENVD(fpenv); 521 return (ddx.f); 522 } 523 524 // else if (hibits <= 0x40000000u) { // |xHead| < 2.0 <-- Unnecessary test 525 else { // |xHead| < 2.0 526 527 //*************************************************************************** 528 // * 529 // Accurate table is used to reduce small arguments such that the * 530 // range of the |reduced argument| < 1/32. The tanh addition formula * 531 // is used to piece together three tanhs * 532 // * 533 //*************************************************************************** 534 535 DeN.f = __FMADD( xHead, 16.0, kBig ); // kBig+xHead*16.0; // compute table lookup index 536 ndx = DeN.i[1]; 537 if (hibits < 0x3fb00000u) ndx=0; // |xHead| < 0.0625 538 arg1=xHead-TanhTblPtr[(int32_t)ndx].One; // reduce argument by table value 539 // which is close to ndx/16 (exact) 540 arg=__FADD( arg1, xTail ); // full 53 bit argument 541 arg2=arg1-arg+xTail; // low order argument 542 //************************************************************************************ 543 // The argument has been broken up as follows: * 544 // Zarg=TanhTblPtr[ndx].One +arg+arg2 * 545 // * 546 // tanh(TanhTblPtr[ndx].One) is read from tantbl(2,ndx),tantbl(3,ndx) * 547 // with at least 122 bits of precision * 548 // tanh(arg2)=arg2+o(2^-172) * 549 // * 550 // Compute tanh(arg) by economized power series. * 551 // the task is to piece together the three parts * 552 // * 553 // (tanh(xTail)+tanh(c))(1-tanh^2(xHead)) * 554 // tanh(xHead+xTail+c)=tanh(xHead) + ------------------------------------------ * 555 // 1+tanh(xHead) tanh(xTail)+tanh(c)(tanh(xHead)+tanh(xTail))* 556 // * 557 //************************************************************************************ 558 sum=TanhCoeffPtr[10].One; 559 suml=TanhCoeffPtr[10].Two; 560 argsq=arg*arg; 561 argsq2=__FMSUB( arg, arg, argsq ); // arg*arg-argsq; // arg^2 for series 562 for (i = 9; i > 0; i--) { 563 pl=__FMADD( suml, argsq, sum*argsq2 ); // suml*argsq+sum*argsq2; 564 prod=__FMADD( sum, argsq, pl ); // sum*argsq+pl; 565 prodlow=__FMSUB( sum, argsq, prod ) + pl; // sum*argsq-prod+pl; 566 sum=__FADD( TanhCoeffPtr[i].One, prod ); // add in the next coefficient 567 sum2=TanhCoeffPtr[i].Two+prodlow; 568 suml=TanhCoeffPtr[i].One-sum+prod+sum2; 569 } 570 pl=__FMADD( suml, argsq, sum*argsq2 ); // suml*argsq+sum*argsq2; 571 temp=__FMADD( sum, argsq, pl ); // sum*argsq+pl; 572 templow=__FMSUB( sum, argsq, temp ) + pl; // sum*argsq-temp+pl; 573 pl=templow*arg; // last multiplication by arg 574 prodx=__FMADD( temp, arg, pl ); // temp*arg+pl; 575 prodlowx=__FMSUB( temp, arg, prodx ) + pl; // temp*arg-prodx+pl; // tanh(arg)-1.0 is done. 576 prod=__FADD( arg, prodx ); 577 prodlow=arg-prod+prodx+prodlowx; // tanh(arg) is done. 578 579 if (hibits < 0x3fb00000u) { // |xHead| < 0.0625, trivial: tanh(xHead)=0 in formula 580 proderr=(arg-prod+prodx)-prodlow+prodlowx; 581 bump=__FNMSUB( prod, prod*arg2, arg2 ); // arg2-prod*prod*arg2; 582 reslow=__FADD( prodlow, bump ); 583 residual=prodlow-reslow+bump+proderr; 584 res=__FADD( reslow, prod ); 585 reslow=prod-res+reslow+residual; 586 if (signx) { 587 res=-res; 588 reslow=-reslow; 589 } 590 591 ddx.d[0] = __FADD( res, reslow ); // build a cannonical result 592 ddx.d[1] = (res - ddx.d[0]) + reslow; 593 FESETENVD(fpenv); 594 return (ddx.f); 595 } 596 597 tiny=arg2*(TanhTblPtr[(int32_t)ndx].Two+prod); // The last addend in denominator 598 599 // pl=TanhTblPtr[(int32_t)ndx].Two*prodlow+TanhTblPtr[(int32_t)ndx].Three*prod; // tanh(xHead)*tanh(xTail) 600 pl = __FMADD( TanhTblPtr[(int32_t)ndx].Two, prodlow, TanhTblPtr[(int32_t)ndx].Three*prod ); 601 602 // den=TanhTblPtr[(int32_t)ndx].Two*prod+pl; // denominator completed except 603 den = __FMADD( TanhTblPtr[(int32_t)ndx].Two, prod, pl ); 604 605 // den1=TanhTblPtr[(int32_t)ndx].Two*prod-den+pl+tiny; // for the +1 term. 606 den1 = __FMSUB( TanhTblPtr[(int32_t)ndx].Two, prod, den ) + pl + tiny; 607 608 suml2=prodlow+arg2; // start on numerator 609 t2=2.0*TanhTblPtr[(int32_t)ndx].Two*TanhTblPtr[(int32_t)ndx].Three; // table look up value 610 611 // tsq=TanhTblPtr[(int32_t)ndx].Two*TanhTblPtr[(int32_t)ndx].Two+t2; // squared. 612 tsq = __FMADD( TanhTblPtr[(int32_t)ndx].Two, TanhTblPtr[(int32_t)ndx].Two, t2 ); 613 614 // tsql=TanhTblPtr[(int32_t)ndx].Two*TanhTblPtr[(int32_t)ndx].Two-tsq+t2; 615 tsql = __FMSUB( TanhTblPtr[(int32_t)ndx].Two, TanhTblPtr[(int32_t)ndx].Two, tsq ) + t2; 616 617 /* t3=2.0*TanhTblPtr[(int32_t)ndx].Two*TanhTblPtr[(int32_t)ndx].Three-t2 + 618 TanhTblPtr[(int32_t)ndx].Three*TanhTblPtr[(int32_t)ndx].Three; */ 619 t3 = __FMADD( TanhTblPtr[(int32_t)ndx].Three, TanhTblPtr[(int32_t)ndx].Three, 620 __FMSUB( 2.0*TanhTblPtr[(int32_t)ndx].Two, TanhTblPtr[(int32_t)ndx].Three, t2 ) ); 621 622 denf=__FADD( 1.0, den ); // denominator is really done now 623 denf2=1.0-denf+den+den1; 624 fac=__FSUB( 1.0, tsq ); // compute 1-tanhtble(ndx)^2 becomes 625 residual=1.0-fac-tsq; // (fac, fac2) 626 fac2=residual-tsql-t3; 627 pl=__FMADD( fac, suml2, fac2*prod ); // fac*suml2+fac2*prod; // (xTail+c)(1-xHead*xHead) ... above formula 628 top=__FMADD( fac, prod, pl ); // fac*prod+pl; 629 top2=__FMSUB( fac, prod, top ) + pl; // fac*prod-top+pl; // doing division 630 recip=1.0/denf; 631 quot=top*recip; 632 ra=__FNMSUB( quot, denf, top ); // top-quot*denf; // 3 part remainder: 633 rb=-(quot*denf2); // rem=ra+rb+top2 634 rc= - __FMADD(quot, denf2, rb ); // -(quot*denf2+rb); // 3rd order remainder 635 rd=__FADD( top2, rb ); // sum 2nd order rems. into rf 636 if (fabs(top2) > fabs(rb)) 637 re=top2-rd+rb; 638 else 639 re=rb-rd+top2; 640 rf=__FADD( ra, rd ); 641 if (fabs(ra) > fabs(rd)) 642 rg=ra-rf+rd; // more 3rd order rems. 643 else // rc+re+rg 644 rg=rd-rf+ra; 645 quot2=rf*recip; 646 quot3=__FMSUB( rf, recip, quot2 ); // rf*recip-quot2; 647 frac=__FADD( quot, quot2 ); 648 // frac2=quot-frac+quot2+(quot3+recip *(rc+re+rg)); 649 frac2 = quot-frac+quot2 + __FMADD( recip, (rc+re+rg), quot3 ); 650 651 652 ansx=__FADD( TanhTblPtr[(int32_t)ndx].Two, frac ); // paste together the result 653 ansl=__FADD( TanhTblPtr[(int32_t)ndx].Three, frac2 ); 654 ansmid=TanhTblPtr[(int32_t)ndx].Two-ansx+frac; 655 anstiny=TanhTblPtr[(int32_t)ndx].Three-ansl+frac2; 656 almost=__FADD( ansmid, ansl ); 657 ans=__FADD( ansx, almost ); 658 anslow=(ansx-ans+almost)+((ansmid-almost+ansl) +anstiny); 659 if (signx) { 660 ans=-ans; 661 anslow=-anslow; 662 } 663 664 ddx.d[0] = __FADD( ans, anslow ); // build a cannonical result 665 ddx.d[1] = (ans - ddx.d[0]) + anslow; 666 FESETENVD(fpenv); 667 return (ddx.f); 668 } 669} 670#endif