/* * Copyright (c) 2002 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * * The contents of this file constitute Original Code as defined in and * are subject to the Apple Public Source License Version 1.1 (the * "License"). You may not use this file except in compliance with the * License. Please obtain a copy of the License at * http://www.apple.com/publicsource and read it before using this file. * * This Original Code and all software distributed under the License are * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the * License for the specific language governing rights and limitations * under the License. * * @APPLE_LICENSE_HEADER_END@ */ // // HyperbolicDD.c // // Double-double Function Library // Copyright: © 1995-96 by Apple Computer, Inc., all rights reserved // // long double coshl(long double x); // long double sinhl(long double x); // long double tanhl(long double x); // // Change history: // 9/1/96 - PAF removed unnecessary tests (uncovered by MrC warning) // #include "math.h" #include "fp_private.h" #if (__WANT_LONG_DOUBLE_FORMAT__ - 0L == 128L) #include "DD.h" static const double recip = 0.1957294106339126123084757437350e-19; static const double kBig = 4.504699138998272e+15 ; // 2.0**52 + 2.0**40 = 0x43300100 00000000 uint32_t TanhTbl[] = { 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x3FB00000, 0x00030807, 0x3FAFF559, 0x97E63AD9, 0xBB6CF9C0, 0x2437818A, 0x3FC00000, 0x0000284A, 0x3FBFD599, 0x2BC5078A, 0xBB77F9D4, 0x704D912A, 0x3FC80000, 0x00005880, 0x3FC7B8FF, 0x903C4CEC, 0x3B47716F, 0xB54B60B5, 0x3FD00000, 0x00000CDC, 0x3FCF597E, 0xA69A34B3, 0xBB794C01, 0xC101435A, 0x3FD40000, 0x0000313C, 0x3FD35F98, 0xA0EA91C7, 0xBB904F63, 0x26BD63B6, 0x3FD80000, 0x000021EA, 0x3FD6EF53, 0xDE8CAD3F, 0xBB91172E, 0x8FAFB7A2, 0x3FDC0000, 0x00005214, 0x3FDA5729, 0xEE48C464, 0x3B9543D8, 0xC4943292, 0x3FE00000, 0x00001E86, 0x3FDD9353, 0xD756BAF6, 0xBB8F03A0, 0x34F7B469, 0x3FE20000, 0x00007115, 0x3FE05086, 0xF2F72867, 0xBB95D918, 0x3E3269EE, 0x3FE40000, 0x00005720, 0x3FE1BF47, 0xEABBCBE9, 0x3B918CCE, 0xA6F1FA3A, 0x3FE60000, 0x000021CF, 0x3FE3157D, 0xFE9F8724, 0xBBA1AC2C, 0x2B395168, 0x3FE80000, 0x00016908, 0x3FE45323, 0xE553C98B, 0xBBA14AF9, 0xB571AFA7, 0x3FEA0000, 0x00005202, 0x3FE5788F, 0xF10D56AF, 0xBBA3C229, 0xC063AD80, 0x3FEC0000, 0x00005742, 0x3FE68665, 0x0B8C4C1B, 0x3B8BE9FC, 0xF1B4428B, 0x3FEE0000, 0x00008582, 0x3FE77D83, 0x8E34430D, 0x3B829FA7, 0xF1623176, 0x3FF00000, 0x00005F76, 0x3FE85EFA, 0xB51543C3, 0xBB4B3592, 0x2498C785, 0x3FF10000, 0x00001F8D, 0x3FE92BFB, 0x370DB380, 0x3BA29188, 0x377121C3, 0x3FF20000, 0x0000130F, 0x3FE9E5CB, 0x5BA45A90, 0x3B7DFB4E, 0xF67C44E2, 0x3FF30000, 0x00004F18, 0x3FEA8DBC, 0xBC31BABE, 0x3B99C45D, 0xAE5D7827, 0x3FF40000, 0x00005131, 0x3FEB2523, 0xBB6B5B77, 0x3B85A025, 0x161129AB, 0x3FF50000, 0x000029D4, 0x3FEBAD50, 0xA4A6A0D5, 0xBB983007, 0x72D2D609, 0x3FF60000, 0x000007E1, 0x3FEC278A, 0x52A4E807, 0x3BA451F7, 0x31DE764B, 0x3FF70000, 0x0000185B, 0x3FEC950A, 0x3340D299, 0x3BAABF14, 0xE874463C, 0x3FF80000, 0x000024C4, 0x3FECF6F9, 0x786E02C1, 0x3B6B3F8E, 0x2446EA6B, 0x3FF90000, 0x0000BF0C, 0x3FED4E6F, 0x4642C44F, 0x3BA657E4, 0xDE96E741, 0x3FFA0000, 0x00006986, 0x3FED9C6F, 0xAFE63ACE, 0x3BA71F1B, 0xC2E3ED65, 0x3FFB0000, 0x00003690, 0x3FEDE1EB, 0x59375F86, 0x3B94D798, 0x08425B54, 0x3FFC0000, 0x000068CA, 0x3FEE1FBF, 0x97E34D01, 0x3BAD07C7, 0xA6BF9B47, 0x3FFD0000, 0x00008A24, 0x3FEE56B6, 0xF3EFC7EE, 0xBBA26658, 0xA2EB55F0, 0x3FFE0000, 0x0003C04A, 0x3FEE8789, 0xECECBA51, 0xBBA8B27F, 0x9026BBD4, 0x3FFF0000, 0x00000D68, 0x3FEEB2DF, 0xEDD5EEB6, 0x3B9EE74B, 0x12149464, 0x40000000, 0x00004DFD, 0x3FEED950, 0x5E1BD9DE, 0x3B971596, 0x795B2F55 }; uint32_t TanhCoeff[] = { 0x3FF00000, 0x00000000, 0x00000000, 0x00000000, 0xBFD55555, 0x55555555, 0xBC755555, 0x55555554, 0x3FC11111, 0x11111111, 0x3C411111, 0x10FF0F6D, 0xBFABA1BA, 0x1BA1BA1C, 0x3C47917A, 0xA287E6B6, 0x3F9664F4, 0x882C10FA, 0xBC0A8F5F, 0x684BD9FF, 0xBF8226E3, 0x55E6C238, 0x3C01097B, 0x425ED098, 0x3F6D6D3D, 0x0E154787, 0xBC0BA781, 0x948B0FCF, 0xBF57DA36, 0x446C8BDA, 0xBBD2F684, 0xBDA12F6B, 0x3F435580, 0xBCDA12F7, 0xBBEED097, 0xB425ED0A, 0xBF2F545A, 0x781948B1, 0x3B7948B0, 0xFCD6E991, 0x3F17B291, 0x61F9ADD4, 0xBBAF9ADD, 0x3C0CA458 }; struct TanhTblEntry { double One; double Two; double Three; } TanhTblEntry; struct TanhCoeffEntry { double One; double Two; } TanhCoeffEntry; extern long double _ExpInnerLD(double alpha, double beta, double gamma, double *pextra, int normflag); long double sinhl(long double x) { double xHead, xTail, xHeadx, xTailx; double p, q, r1, r2, r3, r4, r5, t1, t2; double residual, residual1, residual2, residual3, residual4, residual5, residual6; double res, resnew, reshi, reslow, restiny, resbot, resmid; double remainder, partial, extraword; double argsq, argsq2; double prod, prodlow, sum, suml, temp, temp2, templow; double bottom; uint32_t hibits; int i; double fpenv; DblDbl ddx; double coeff[16] = { 0.62132974171578525315635255289e-14, 0.577836659795680285435407874e-11, 0.469203367754092391773551193841e-8, 0.329380764163372859025032938e-5, 0.1976284584980237154150197628458498e-2, 1.0, 420.0, 143640.0, 39070080.0, 8204716800.0, 1279935820800.0, 140792940288000.0, 10137091700736000.0, 425757851430912000.0, 8515157028618240000.0, 51090942171709440000.0 }; ddx.f = x; xHead = ddx.d[0]; xTail = ddx.d[1]; hibits = ddx.i[0] & 0x7FFFFFFFu; // highest 32 bits as uint32_t // NaNs and infinities if (hibits >= 0x7ff00000u) // special cases: NaN and infinity return x; // x = zero hence xHead is zero if ((hibits | ddx.i[1]) == 0) return x; // x is not zero, infinity, or NaN // finite non-zero x FEGETENVD(fpenv); FESETENVD(0.0); // for small x, |x| <= 0.693147 use power series if (fabs(xHead) <= 0.693147 ) { temp=2.0*xHead*xTail; // small argument argsq=__FMADD( xHead, xHead, temp ); // xHead*xHead+temp; // use power series argsq2=__FMADD( xTail, xTail, __FMSUB( xHead, xHead, argsq ) + temp ); // xHead*xHead-argsq+temp+xTail*xTail; sum=coeff[0]; suml=0.0; sum=__FMADD( argsq, sum, coeff[1] ); // coeff[1]+argsq*sum; sum=__FMADD( argsq, sum, coeff[2] ); // coeff[2]+argsq*sum; sum=__FMADD( argsq, sum, coeff[3] ); // coeff[3]+argsq*sum; for ( i=4; i<15; i++) { templow=__FMADD( suml, argsq, sum*argsq2 ); // suml*argsq+sum*argsq2; temp=__FMADD( sum, argsq, templow ); // sum*argsq+templow; bottom=__FMSUB( sum, argsq, temp) + templow; // sum*argsq-temp+templow; sum=__FADD( coeff[i], temp ); residual=coeff[i]-sum+temp; suml=bottom+residual; } prodlow=__FMADD( suml, argsq, sum*argsq2 ); // suml*argsq+sum*argsq2; // mult. by arg^2 prod=__FMADD( sum, argsq, prodlow ); // sum*argsq+prodlow; prodlow=__FMSUB( sum, argsq, prod) + prodlow; // sum*argsq-prod+prodlow; temp2=__FMADD( prodlow, xHead, prod*xTail ); // prodlow*xHead+prod*xTail; // mult. by xHead temp=__FMADD( prod, xHead, temp2 ); // prod*xHead+temp2; temp2=__FMSUB(prod, xHead, temp ) + temp2; // prod*xHead-temp+temp2; sum=temp*recip; // sum of series --- remainder=__FNMSUB( sum, coeff[15], temp ); // (temp-sum*coeff[15]); partial=__FADD( remainder, temp2 ); residual=remainder-partial+temp2; suml=__FMADD( partial, recip, residual*recip ); // partial*recip+(residual*recip); res=__FADD( xHead, sum ); // except for argument reslow=(xHead-res+sum); // exact resmid=__FADD( xTail, suml ); restiny=xTail-resmid+suml; p=__FADD( reslow, resmid ); // sum of middle terms q=reslow-p+resmid; // possible residual reshi=__FADD( res, p ); resbot=(res-reshi+p)+(q+restiny); ddx.d[0] = __FADD( reshi, resbot ); // build a cannonical result ddx.d[1] = (reshi - ddx.d[0]) + resbot; FESETENVD(fpenv); return (ddx.f); } // |x| > 0.693147 // else if (fabs(xHead) > 0.693147 ) { <-- Unnecessary test else { if (xHead < 0) { xHeadx = -xHead; xTailx = -xTail; } else { xHeadx = xHead; xTailx = xTail; } ddx.f = _ExpInnerLD(xHeadx, xTailx, 0.0 , &extraword , 1); //get .5*e^x if (fabs(xHead) > 39.0) { if (xHead < 0) ddx.f = - ddx.f; reshi = ddx.d[0]; ddx.d[0] = __FADD( ddx.d[0], ddx.d[1] ); if((ddx.i[0] & 0x7FFFFFFFu)>= 0x7ff00000u) { // special cases : NaN and infinity ddx.d[1] = 0.0; } else { ddx.d[1] = (reshi - ddx.d[0]) + ddx.d[1]; } FESETENVD(fpenv); return (ddx.f); } else { t1 = ddx.d[0]; // zres = .5*e^x t2 = ddx.d[1]; // Def. of sinh: (e^x - e^-x)/2 r1=0.25/t1; residual=__FNMSUB( t2, r1, __FNMSUB( t1, r1, 0.25 ) ); // (0.25-t1*r1)-t2*r1; r2=residual*(4.0*r1); // (r1,r2)=.5*e^-x residual1 =__FNMSUB( t2, r1, __FNMSUB( t1, r1, 0.25 ) - residual ); // (0.25-t1*r1)-residual-t2*r1; r3=__FMSUB(t1, r2, residual ); //(t1*r2-residual); // (reversed sign) r4=__FMADD( extraword, r1, t2*r2 ); //(extraword*r1+(t2*r2)); // (reversed sign) reshi=__FSUB( t1, r1 ); reslow=__FSUB( t2, r2 ); residual=(t1-reshi)-r1; // exact if (fabs(t2) > fabs(r2)) residual2=(t2-reslow)-r2; // exact else residual2=t2-(reslow+r2); // exact r5=(r3+r4-residual1)*(4.0*r1); resnew=__FADD( reshi, reslow ); residual3=reshi-resnew+reslow; // exact residual4=__FADD( residual, residual3 ); reshi=__FADD( resnew, residual4 ); residual5=residual-residual4+residual3; residual6=resnew -reshi+residual4; reslow=(residual2+(extraword+r5))+residual5+residual6; if (xHead < 0.0) { // Reverse signs for reshi = -reshi; // negative arguments reslow = -reslow; } ddx.d[0] = __FADD( reshi, reslow ); // build a cannonical result ddx.d[1] = (reshi - ddx.d[0]) + reslow; FESETENVD(fpenv); return (ddx.f); } } // end of abs(xHead) > 0.693147 } long double coshl(long double x) { double xHead, xTail, xHeadx, xTailx; double r1, r2, r3, r4, r5, t1, t2; double residual, residual1, residual2, residual3, residual4, residual5, residual6; double res, resnew, reshi, reslow; double remainder, partial, extraword; double argsq, argsq2; double prod, prodlow, sum, suml, temp, templow; double bottom; uint32_t hibits, signx; int i; double fpenv; DblDbl ddx; double coeff[9] = { 306.0, 73440.0, 13366080.0, 1764322560.0, 158789030400.0, 8892185702400.0, 266765571072000.0, 3201186852864000.0, 6402373705728000.0 }; double recip = 1.561920696858622646221636435005e-16; ddx.f = x; xHead = ddx.d[0]; xTail = ddx.d[1]; hibits = ddx.i[0] & 0x7FFFFFFFu; // highest 32 bits as uint32_t signx = (ddx.i[0] >> 31) & 1u; // get sign of head // NaNs and infinities if (hibits >= 0x7ff00000u){ // special cases: NaN and infinity if (xHead != xHead) { // x is a NaN return x; } else { // For +/- infinity return +infinity if (signx) return (-x); else return x; } } // x = zero hence xHead is zero if ((hibits | ddx.i[1]) == 0) return (long double) 1.0; // x is not zero, infinity nor a NaN FEGETENVD(fpenv); FESETENVD(0.0); //for small x, |x| <= 0.125 use power series if (hibits <= 0x3fc00000u) { temp=2.0*xHead*xTail; argsq=__FMADD( xHead, xHead, temp ); // xHead*xHead+temp; argsq2=__FMADD( xTail, xTail, __FMSUB( xHead, xHead, argsq ) + temp ); // xHead*xHead-argsq+temp+xTail*xTail; sum=1.0; suml=0.0; sum=__FMADD( argsq, sum, coeff[0] ); // coeff[0]+argsq*sum; sum=__FMADD( argsq, sum, coeff[1] ); // coeff[1]+argsq*sum; sum=__FMADD( argsq, sum, coeff[2] ); // coeff[2]+argsq*sum; for (i=3; i< 8; i++) { templow=__FMADD( suml, argsq, sum*argsq2 ); // suml*argsq+sum*argsq2; temp=__FMADD( sum, argsq, templow ); // sum*argsq+templow; bottom=__FMSUB( sum, argsq, temp) + templow; // sum*argsq-temp+templow; sum=__FADD( coeff[i], temp ); residual=coeff[i]-sum+temp; suml=bottom+residual; } prodlow=__FMADD( suml, argsq, sum*argsq2 ); // suml*argsq+sum*argsq2; // mult. by arg^2 prod=__FMADD( sum, argsq, prodlow ); // sum*argsq+prodlow; prodlow=__FMSUB( sum, argsq, prod) + prodlow; // sum*argsq-prod+prodlow; sum=prod*recip; // sum of series --- remainder=__FNMSUB( sum, coeff[8], prod ); // (prod-sum*coeff[8]); partial=__FADD( remainder, prodlow ); residual=remainder-partial+prodlow; suml=__FMADD( partial, recip, residual*recip ); // partial*recip+(residual*recip); res=__FADD(1.0, sum ); // except for 1.0 term. reslow=1.0 - res+sum+suml; ddx.d[0] = __FADD( res, reslow ); // build a cannonical result ddx.d[1] = (res - ddx.d[0]) + reslow; FESETENVD(fpenv); return (ddx.f); } // |x| > 0.125 // else if (hibits > 0x3fc00000u) { // abs(xHead) > 0.125 <-- Unnecessary test else { // abs(xHead) > 0.125 if (signx) { xHeadx = -xHead; xTailx = -xTail; } else{ xHeadx = xHead; xTailx = xTail; } ddx.f = _ExpInnerLD (xHeadx, xTailx, 0.0 , &extraword , 1); // get .5*e^x // |x| > 40.0 if (hibits > 0x40440000u) { // abs(xHead) > 40.0 reshi = ddx.d[0]; ddx.d[0] = __FADD( ddx.d[0], ddx.d[1] ); if((ddx.i[0] & 0x7FFFFFFFu)>= 0x7ff00000u) { // special cases : NaN and infinity ddx.d[1] = 0.0; } else { ddx.d[1] = (reshi - ddx.d[0]) + ddx.d[1]; } FESETENVD(fpenv); return (ddx.f); } // 0.125 <= |x| < 40.0 else{ t1 = ddx.d[0]; // zres=.5*e^x t2 = ddx.d[1]; // def. of cosh: (e^x + e^-x)/2 r1=0.25/t1; residual=__FNMSUB( t2, r1, __FNMSUB( t1, r1, 0.25 ) ); // (0.25-t1*r1)-t2*r1; r2=residual*(4.0*r1); // (r1,r2)=.5*e^-x + errors< 4 ulps residual1 =__FNMSUB( t2, r1, __FNMSUB( t1, r1, 0.25 ) - residual ); // (0.25-t1*r1)-residual-t2*r1; // rest of rem. from HO divisor only r3=__FMSUB(t1, r2, residual ); //(t1*r2-residual); // (reversed sign)...LO quotient r4=__FMADD( extraword, r1, t2*r2 ); //(extraword*r1+(t2*r2)); // (reversed sign)...LO quotient reshi=__FADD( t1, r1 ); reslow=__FADD( t2, r2 ); residual=(t1-reshi)+r1; // exact if (fabs(t2) > fabs(r2)) residual2=(t2-reslow)+r2; // exact else residual2=r2-reslow+t2; // exact r5=(r3+r4-residual1)*(4.0*r1); // reversed sign resnew=__FADD( reshi, reslow ); residual3=reshi-resnew+reslow; // exact residual4=__FADD( residual, residual3 ); reshi=__FADD( resnew, residual4 ); residual5=residual-residual4+residual3; residual6=resnew-reshi+residual4; reslow=(residual2+(extraword-r5))+residual5+residual6; ddx.d[0] = __FADD( reshi, reslow ); // build a cannonical result ddx.d[1] = (reshi - ddx.d[0]) + reslow; FESETENVD(fpenv); return (ddx.f); } } // end of abs(xHead) > 0.125 } long double tanhl(long double x) { double r, res, res2, res3, res4,res5, resfin, reslow; double rem, rem1, rem2; uint32_t signx, hibits, ndx ; double q, qq, qa, qb, qc, q1, quot, quot2, quot3; double arg, arg1, arg2, argsq, argsq2; double sum, sum2, suml, suml2; double frac, frac2, fac, fac2; double ra, rb, rc, rd, re, rf; double ans, ansx, ansl, ansmid, anstiny, anslow, almost; double prod, prodlow, prodx, prodlowx, proderr, pl; double top, top2, den, den1, denf, denf2; double rg, t2, t3, tsq, tsql; double az, bz, temp, temp1, templow, carry, carry1, extra; double bump, residual, tiny, recip; Double DeN; double fpenv; double xHead, xTail; DblDbl ddx; int i; struct TanhTblEntry *TanhTblPtr = (struct TanhTblEntry *)TanhTbl; struct TanhCoeffEntry *TanhCoeffPtr = (struct TanhCoeffEntry *) TanhCoeff; ddx.f = x; xHead = ddx.d[0]; xTail = ddx.d[1]; hibits = ddx.i[0] & 0x7FFFFFFFu; // highest 32 bits as uint32_t signx = (ddx.i[0] >> 31) & 1u; // get sign of head // NaN or zero if ((xHead != xHead)||((hibits | ddx.i[1]) == 0)) return x; FEGETENVD(fpenv); FESETENVD(0.0); //for infinity or large x , |xHead| > 40.0 if (hibits > 0x40440000u) { // results = 1, for big args. FESETENVD(fpenv); if (signx) return (long double) (-1.0); // sign of result= sign of arg else return (long double) (1.0); } // x is not zero, infinity nor a NaN if (signx) { xHead = -xHead; xTail = -xTail; } // values of |xHead| with 2.0<= |xHead| <= 40.0 if (hibits > 0x40000000u) { //**************************************************************************** // * // 1.0 * // tanh(x)= 1 - ------------- |x| > 2.0 * // .5 +.5*e^(2x) * // * //**************************************************************************** xHead=2.0*xHead; // double arg xTail=2.0*xTail; ddx.f = _ExpInnerLD (xHead, xTail, 0.0 , &extra, 1 ); // compute e^(2*arg) az = ddx.d[0]; bz = ddx.d[1]; temp=__FADD( az, 0.5 ); // az >= 0.5 carry=az-temp+0.5; // exact temp1=__FADD( carry, bz ); carry1=carry-temp1+bz; den=__FADD( temp, temp1 ); den1=temp-den+temp1+carry1; r=1.0/den; // guess recip. of denominator rem=__FNMSUB( den, r, 1.0 ); // 1.0-den*r; // first remainder rem1=__FNMSUB( r, den1, rem ); // rem-r*den1; // (rem1,rem2) is better rem. if (fabs(rem) > fabs(r*den1)) rem2=__FNMSUB( r, den1, rem-rem1 ); // rem-rem1-r*den1; else { rem1=__FNMSUB( r, den1, rem ); // rem-(r*den1); /* rem2=-(r*den1)-rem1+rem -(r*den1-(r*den1)); */ rem2 = rem - __FMADD( r, den1, rem1 ) - __FMSUB( r, den1, (r*den1) ); } qq=__FMADD( r, rem1, r ); // r+r*rem1; // going for the full quotient qa=__FMADD( r, rem1, r-qq ); // r-qq+r*rem1; qb=__FMADD( r, rem2, qa ); // qa+r*rem2; q=__FADD( qq, qb ); // qc=qa-qb+r*rem2-(extra*r)*r; qc = __FNMSUB( extra*r, r, __FMADD( r, rem2, qa - qb ) ); q1=qq-q+qb+qc; res=__FSUB( 1.0, q ); // high order result res2=1.0-res-q; // exact low order res3=__FSUB( res2, q1 ); resfin=__FADD( res, res3 ); res4=res2-res3-q1; res5=res-resfin+res3; reslow=res4+res5; if (signx) { resfin=-resfin; reslow=-reslow; } ddx.d[0] = __FADD( resfin, reslow ); // build a cannonical result ddx.d[1] = (resfin - ddx.d[0]) + reslow; FESETENVD(fpenv); return (ddx.f); } // else if (hibits <= 0x40000000u) { // |xHead| < 2.0 <-- Unnecessary test else { // |xHead| < 2.0 //*************************************************************************** // * // Accurate table is used to reduce small arguments such that the * // range of the |reduced argument| < 1/32. The tanh addition formula * // is used to piece together three tanhs * // * //*************************************************************************** DeN.f = __FMADD( xHead, 16.0, kBig ); // kBig+xHead*16.0; // compute table lookup index ndx = DeN.i[1]; if (hibits < 0x3fb00000u) ndx=0; // |xHead| < 0.0625 arg1=xHead-TanhTblPtr[(int32_t)ndx].One; // reduce argument by table value // which is close to ndx/16 (exact) arg=__FADD( arg1, xTail ); // full 53 bit argument arg2=arg1-arg+xTail; // low order argument //************************************************************************************ // The argument has been broken up as follows: * // Zarg=TanhTblPtr[ndx].One +arg+arg2 * // * // tanh(TanhTblPtr[ndx].One) is read from tantbl(2,ndx),tantbl(3,ndx) * // with at least 122 bits of precision * // tanh(arg2)=arg2+o(2^-172) * // * // Compute tanh(arg) by economized power series. * // the task is to piece together the three parts * // * // (tanh(xTail)+tanh(c))(1-tanh^2(xHead)) * // tanh(xHead+xTail+c)=tanh(xHead) + ------------------------------------------ * // 1+tanh(xHead) tanh(xTail)+tanh(c)(tanh(xHead)+tanh(xTail))* // * //************************************************************************************ sum=TanhCoeffPtr[10].One; suml=TanhCoeffPtr[10].Two; argsq=arg*arg; argsq2=__FMSUB( arg, arg, argsq ); // arg*arg-argsq; // arg^2 for series for (i = 9; i > 0; i--) { pl=__FMADD( suml, argsq, sum*argsq2 ); // suml*argsq+sum*argsq2; prod=__FMADD( sum, argsq, pl ); // sum*argsq+pl; prodlow=__FMSUB( sum, argsq, prod ) + pl; // sum*argsq-prod+pl; sum=__FADD( TanhCoeffPtr[i].One, prod ); // add in the next coefficient sum2=TanhCoeffPtr[i].Two+prodlow; suml=TanhCoeffPtr[i].One-sum+prod+sum2; } pl=__FMADD( suml, argsq, sum*argsq2 ); // suml*argsq+sum*argsq2; temp=__FMADD( sum, argsq, pl ); // sum*argsq+pl; templow=__FMSUB( sum, argsq, temp ) + pl; // sum*argsq-temp+pl; pl=templow*arg; // last multiplication by arg prodx=__FMADD( temp, arg, pl ); // temp*arg+pl; prodlowx=__FMSUB( temp, arg, prodx ) + pl; // temp*arg-prodx+pl; // tanh(arg)-1.0 is done. prod=__FADD( arg, prodx ); prodlow=arg-prod+prodx+prodlowx; // tanh(arg) is done. if (hibits < 0x3fb00000u) { // |xHead| < 0.0625, trivial: tanh(xHead)=0 in formula proderr=(arg-prod+prodx)-prodlow+prodlowx; bump=__FNMSUB( prod, prod*arg2, arg2 ); // arg2-prod*prod*arg2; reslow=__FADD( prodlow, bump ); residual=prodlow-reslow+bump+proderr; res=__FADD( reslow, prod ); reslow=prod-res+reslow+residual; if (signx) { res=-res; reslow=-reslow; } ddx.d[0] = __FADD( res, reslow ); // build a cannonical result ddx.d[1] = (res - ddx.d[0]) + reslow; FESETENVD(fpenv); return (ddx.f); } tiny=arg2*(TanhTblPtr[(int32_t)ndx].Two+prod); // The last addend in denominator // pl=TanhTblPtr[(int32_t)ndx].Two*prodlow+TanhTblPtr[(int32_t)ndx].Three*prod; // tanh(xHead)*tanh(xTail) pl = __FMADD( TanhTblPtr[(int32_t)ndx].Two, prodlow, TanhTblPtr[(int32_t)ndx].Three*prod ); // den=TanhTblPtr[(int32_t)ndx].Two*prod+pl; // denominator completed except den = __FMADD( TanhTblPtr[(int32_t)ndx].Two, prod, pl ); // den1=TanhTblPtr[(int32_t)ndx].Two*prod-den+pl+tiny; // for the +1 term. den1 = __FMSUB( TanhTblPtr[(int32_t)ndx].Two, prod, den ) + pl + tiny; suml2=prodlow+arg2; // start on numerator t2=2.0*TanhTblPtr[(int32_t)ndx].Two*TanhTblPtr[(int32_t)ndx].Three; // table look up value // tsq=TanhTblPtr[(int32_t)ndx].Two*TanhTblPtr[(int32_t)ndx].Two+t2; // squared. tsq = __FMADD( TanhTblPtr[(int32_t)ndx].Two, TanhTblPtr[(int32_t)ndx].Two, t2 ); // tsql=TanhTblPtr[(int32_t)ndx].Two*TanhTblPtr[(int32_t)ndx].Two-tsq+t2; tsql = __FMSUB( TanhTblPtr[(int32_t)ndx].Two, TanhTblPtr[(int32_t)ndx].Two, tsq ) + t2; /* t3=2.0*TanhTblPtr[(int32_t)ndx].Two*TanhTblPtr[(int32_t)ndx].Three-t2 + TanhTblPtr[(int32_t)ndx].Three*TanhTblPtr[(int32_t)ndx].Three; */ t3 = __FMADD( TanhTblPtr[(int32_t)ndx].Three, TanhTblPtr[(int32_t)ndx].Three, __FMSUB( 2.0*TanhTblPtr[(int32_t)ndx].Two, TanhTblPtr[(int32_t)ndx].Three, t2 ) ); denf=__FADD( 1.0, den ); // denominator is really done now denf2=1.0-denf+den+den1; fac=__FSUB( 1.0, tsq ); // compute 1-tanhtble(ndx)^2 becomes residual=1.0-fac-tsq; // (fac, fac2) fac2=residual-tsql-t3; pl=__FMADD( fac, suml2, fac2*prod ); // fac*suml2+fac2*prod; // (xTail+c)(1-xHead*xHead) ... above formula top=__FMADD( fac, prod, pl ); // fac*prod+pl; top2=__FMSUB( fac, prod, top ) + pl; // fac*prod-top+pl; // doing division recip=1.0/denf; quot=top*recip; ra=__FNMSUB( quot, denf, top ); // top-quot*denf; // 3 part remainder: rb=-(quot*denf2); // rem=ra+rb+top2 rc= - __FMADD(quot, denf2, rb ); // -(quot*denf2+rb); // 3rd order remainder rd=__FADD( top2, rb ); // sum 2nd order rems. into rf if (fabs(top2) > fabs(rb)) re=top2-rd+rb; else re=rb-rd+top2; rf=__FADD( ra, rd ); if (fabs(ra) > fabs(rd)) rg=ra-rf+rd; // more 3rd order rems. else // rc+re+rg rg=rd-rf+ra; quot2=rf*recip; quot3=__FMSUB( rf, recip, quot2 ); // rf*recip-quot2; frac=__FADD( quot, quot2 ); // frac2=quot-frac+quot2+(quot3+recip *(rc+re+rg)); frac2 = quot-frac+quot2 + __FMADD( recip, (rc+re+rg), quot3 ); ansx=__FADD( TanhTblPtr[(int32_t)ndx].Two, frac ); // paste together the result ansl=__FADD( TanhTblPtr[(int32_t)ndx].Three, frac2 ); ansmid=TanhTblPtr[(int32_t)ndx].Two-ansx+frac; anstiny=TanhTblPtr[(int32_t)ndx].Three-ansl+frac2; almost=__FADD( ansmid, ansl ); ans=__FADD( ansx, almost ); anslow=(ansx-ans+almost)+((ansmid-almost+ansl) +anstiny); if (signx) { ans=-ans; anslow=-anslow; } ddx.d[0] = __FADD( ans, anslow ); // build a cannonical result ddx.d[1] = (ans - ddx.d[0]) + anslow; FESETENVD(fpenv); return (ddx.f); } } #endif