this repo has no description
1/*
2 * Copyright (c) 2002 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
11 *
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22//
23// HyperbolicDD.c
24//
25// Double-double Function Library
26// Copyright: � 1995-96 by Apple Computer, Inc., all rights reserved
27//
28// long double coshl(long double x);
29// long double sinhl(long double x);
30// long double tanhl(long double x);
31//
32// Change history:
33// 9/1/96 - PAF removed unnecessary tests (uncovered by MrC warning)
34//
35
36#include "math.h"
37#include "fp_private.h"
38#if (__WANT_LONG_DOUBLE_FORMAT__ - 0L == 128L)
39#include "DD.h"
40
41static const double recip = 0.1957294106339126123084757437350e-19;
42static const double kBig = 4.504699138998272e+15 ; // 2.0**52 + 2.0**40 = 0x43300100 00000000
43
44uint32_t TanhTbl[] = {
45 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
46 0x3FB00000, 0x00030807, 0x3FAFF559, 0x97E63AD9, 0xBB6CF9C0, 0x2437818A,
47 0x3FC00000, 0x0000284A, 0x3FBFD599, 0x2BC5078A, 0xBB77F9D4, 0x704D912A,
48 0x3FC80000, 0x00005880, 0x3FC7B8FF, 0x903C4CEC, 0x3B47716F, 0xB54B60B5,
49 0x3FD00000, 0x00000CDC, 0x3FCF597E, 0xA69A34B3, 0xBB794C01, 0xC101435A,
50 0x3FD40000, 0x0000313C, 0x3FD35F98, 0xA0EA91C7, 0xBB904F63, 0x26BD63B6,
51 0x3FD80000, 0x000021EA, 0x3FD6EF53, 0xDE8CAD3F, 0xBB91172E, 0x8FAFB7A2,
52 0x3FDC0000, 0x00005214, 0x3FDA5729, 0xEE48C464, 0x3B9543D8, 0xC4943292,
53 0x3FE00000, 0x00001E86, 0x3FDD9353, 0xD756BAF6, 0xBB8F03A0, 0x34F7B469,
54 0x3FE20000, 0x00007115, 0x3FE05086, 0xF2F72867, 0xBB95D918, 0x3E3269EE,
55 0x3FE40000, 0x00005720, 0x3FE1BF47, 0xEABBCBE9, 0x3B918CCE, 0xA6F1FA3A,
56 0x3FE60000, 0x000021CF, 0x3FE3157D, 0xFE9F8724, 0xBBA1AC2C, 0x2B395168,
57 0x3FE80000, 0x00016908, 0x3FE45323, 0xE553C98B, 0xBBA14AF9, 0xB571AFA7,
58 0x3FEA0000, 0x00005202, 0x3FE5788F, 0xF10D56AF, 0xBBA3C229, 0xC063AD80,
59 0x3FEC0000, 0x00005742, 0x3FE68665, 0x0B8C4C1B, 0x3B8BE9FC, 0xF1B4428B,
60 0x3FEE0000, 0x00008582, 0x3FE77D83, 0x8E34430D, 0x3B829FA7, 0xF1623176,
61 0x3FF00000, 0x00005F76, 0x3FE85EFA, 0xB51543C3, 0xBB4B3592, 0x2498C785,
62 0x3FF10000, 0x00001F8D, 0x3FE92BFB, 0x370DB380, 0x3BA29188, 0x377121C3,
63 0x3FF20000, 0x0000130F, 0x3FE9E5CB, 0x5BA45A90, 0x3B7DFB4E, 0xF67C44E2,
64 0x3FF30000, 0x00004F18, 0x3FEA8DBC, 0xBC31BABE, 0x3B99C45D, 0xAE5D7827,
65 0x3FF40000, 0x00005131, 0x3FEB2523, 0xBB6B5B77, 0x3B85A025, 0x161129AB,
66 0x3FF50000, 0x000029D4, 0x3FEBAD50, 0xA4A6A0D5, 0xBB983007, 0x72D2D609,
67 0x3FF60000, 0x000007E1, 0x3FEC278A, 0x52A4E807, 0x3BA451F7, 0x31DE764B,
68 0x3FF70000, 0x0000185B, 0x3FEC950A, 0x3340D299, 0x3BAABF14, 0xE874463C,
69 0x3FF80000, 0x000024C4, 0x3FECF6F9, 0x786E02C1, 0x3B6B3F8E, 0x2446EA6B,
70 0x3FF90000, 0x0000BF0C, 0x3FED4E6F, 0x4642C44F, 0x3BA657E4, 0xDE96E741,
71 0x3FFA0000, 0x00006986, 0x3FED9C6F, 0xAFE63ACE, 0x3BA71F1B, 0xC2E3ED65,
72 0x3FFB0000, 0x00003690, 0x3FEDE1EB, 0x59375F86, 0x3B94D798, 0x08425B54,
73 0x3FFC0000, 0x000068CA, 0x3FEE1FBF, 0x97E34D01, 0x3BAD07C7, 0xA6BF9B47,
74 0x3FFD0000, 0x00008A24, 0x3FEE56B6, 0xF3EFC7EE, 0xBBA26658, 0xA2EB55F0,
75 0x3FFE0000, 0x0003C04A, 0x3FEE8789, 0xECECBA51, 0xBBA8B27F, 0x9026BBD4,
76 0x3FFF0000, 0x00000D68, 0x3FEEB2DF, 0xEDD5EEB6, 0x3B9EE74B, 0x12149464,
77 0x40000000, 0x00004DFD, 0x3FEED950, 0x5E1BD9DE, 0x3B971596, 0x795B2F55
78};
79
80uint32_t TanhCoeff[] = {
81 0x3FF00000, 0x00000000, 0x00000000, 0x00000000,
82 0xBFD55555, 0x55555555, 0xBC755555, 0x55555554,
83 0x3FC11111, 0x11111111, 0x3C411111, 0x10FF0F6D,
84 0xBFABA1BA, 0x1BA1BA1C, 0x3C47917A, 0xA287E6B6,
85 0x3F9664F4, 0x882C10FA, 0xBC0A8F5F, 0x684BD9FF,
86 0xBF8226E3, 0x55E6C238, 0x3C01097B, 0x425ED098,
87 0x3F6D6D3D, 0x0E154787, 0xBC0BA781, 0x948B0FCF,
88 0xBF57DA36, 0x446C8BDA, 0xBBD2F684, 0xBDA12F6B,
89 0x3F435580, 0xBCDA12F7, 0xBBEED097, 0xB425ED0A,
90 0xBF2F545A, 0x781948B1, 0x3B7948B0, 0xFCD6E991,
91 0x3F17B291, 0x61F9ADD4, 0xBBAF9ADD, 0x3C0CA458
92};
93
94struct TanhTblEntry {
95 double One;
96 double Two;
97 double Three;
98} TanhTblEntry;
99
100struct TanhCoeffEntry {
101 double One;
102 double Two;
103} TanhCoeffEntry;
104
105
106extern long double _ExpInnerLD(double alpha, double beta, double gamma,
107 double *pextra, int normflag);
108
109long double sinhl(long double x)
110{
111 double xHead, xTail, xHeadx, xTailx;
112 double p, q, r1, r2, r3, r4, r5, t1, t2;
113 double residual, residual1, residual2, residual3, residual4, residual5, residual6;
114 double res, resnew, reshi, reslow, restiny, resbot, resmid;
115 double remainder, partial, extraword;
116 double argsq, argsq2;
117 double prod, prodlow, sum, suml, temp, temp2, templow;
118 double bottom;
119 uint32_t hibits;
120 int i;
121 double fpenv;
122 DblDbl ddx;
123 double coeff[16] = {
124 0.62132974171578525315635255289e-14, 0.577836659795680285435407874e-11,
125 0.469203367754092391773551193841e-8, 0.329380764163372859025032938e-5,
126 0.1976284584980237154150197628458498e-2, 1.0, 420.0, 143640.0, 39070080.0,
127 8204716800.0, 1279935820800.0, 140792940288000.0, 10137091700736000.0,
128 425757851430912000.0, 8515157028618240000.0, 51090942171709440000.0
129 };
130
131 ddx.f = x;
132 xHead = ddx.d[0];
133 xTail = ddx.d[1];
134 hibits = ddx.i[0] & 0x7FFFFFFFu; // highest 32 bits as uint32_t
135
136 // NaNs and infinities
137 if (hibits >= 0x7ff00000u) // special cases: NaN and infinity
138 return x;
139
140 // x = zero hence xHead is zero
141 if ((hibits | ddx.i[1]) == 0) return x;
142
143
144 // x is not zero, infinity, or NaN
145
146 // finite non-zero x
147 FEGETENVD(fpenv);
148 FESETENVD(0.0);
149
150
151 // for small x, |x| <= 0.693147 use power series
152
153 if (fabs(xHead) <= 0.693147 ) {
154 temp=2.0*xHead*xTail; // small argument
155 argsq=__FMADD( xHead, xHead, temp ); // xHead*xHead+temp; // use power series
156 argsq2=__FMADD( xTail, xTail, __FMSUB( xHead, xHead, argsq ) + temp ); // xHead*xHead-argsq+temp+xTail*xTail;
157 sum=coeff[0];
158 suml=0.0;
159 sum=__FMADD( argsq, sum, coeff[1] ); // coeff[1]+argsq*sum;
160 sum=__FMADD( argsq, sum, coeff[2] ); // coeff[2]+argsq*sum;
161 sum=__FMADD( argsq, sum, coeff[3] ); // coeff[3]+argsq*sum;
162 for ( i=4; i<15; i++) {
163 templow=__FMADD( suml, argsq, sum*argsq2 ); // suml*argsq+sum*argsq2;
164 temp=__FMADD( sum, argsq, templow ); // sum*argsq+templow;
165 bottom=__FMSUB( sum, argsq, temp) + templow; // sum*argsq-temp+templow;
166 sum=__FADD( coeff[i], temp );
167 residual=coeff[i]-sum+temp;
168 suml=bottom+residual;
169 }
170 prodlow=__FMADD( suml, argsq, sum*argsq2 ); // suml*argsq+sum*argsq2; // mult. by arg^2
171 prod=__FMADD( sum, argsq, prodlow ); // sum*argsq+prodlow;
172 prodlow=__FMSUB( sum, argsq, prod) + prodlow; // sum*argsq-prod+prodlow;
173 temp2=__FMADD( prodlow, xHead, prod*xTail ); // prodlow*xHead+prod*xTail; // mult. by xHead
174 temp=__FMADD( prod, xHead, temp2 ); // prod*xHead+temp2;
175 temp2=__FMSUB(prod, xHead, temp ) + temp2; // prod*xHead-temp+temp2;
176 sum=temp*recip; // sum of series ---
177 remainder=__FNMSUB( sum, coeff[15], temp ); // (temp-sum*coeff[15]);
178 partial=__FADD( remainder, temp2 );
179 residual=remainder-partial+temp2;
180 suml=__FMADD( partial, recip, residual*recip ); // partial*recip+(residual*recip);
181 res=__FADD( xHead, sum ); // except for argument
182 reslow=(xHead-res+sum); // exact
183 resmid=__FADD( xTail, suml );
184 restiny=xTail-resmid+suml;
185 p=__FADD( reslow, resmid ); // sum of middle terms
186 q=reslow-p+resmid; // possible residual
187 reshi=__FADD( res, p );
188 resbot=(res-reshi+p)+(q+restiny);
189 ddx.d[0] = __FADD( reshi, resbot ); // build a cannonical result
190 ddx.d[1] = (reshi - ddx.d[0]) + resbot;
191 FESETENVD(fpenv);
192 return (ddx.f);
193 }
194
195 // |x| > 0.693147
196 // else if (fabs(xHead) > 0.693147 ) { <-- Unnecessary test
197 else {
198 if (xHead < 0) {
199 xHeadx = -xHead;
200 xTailx = -xTail;
201 }
202 else {
203 xHeadx = xHead;
204 xTailx = xTail;
205 }
206
207 ddx.f = _ExpInnerLD(xHeadx, xTailx, 0.0 , &extraword , 1); //get .5*e^x
208
209 if (fabs(xHead) > 39.0) {
210
211 if (xHead < 0) ddx.f = - ddx.f;
212
213 reshi = ddx.d[0];
214 ddx.d[0] = __FADD( ddx.d[0], ddx.d[1] );
215 if((ddx.i[0] & 0x7FFFFFFFu)>= 0x7ff00000u) {
216 // special cases : NaN and infinity
217 ddx.d[1] = 0.0;
218 }
219 else {
220 ddx.d[1] = (reshi - ddx.d[0]) + ddx.d[1];
221 }
222 FESETENVD(fpenv);
223 return (ddx.f);
224 }
225 else {
226 t1 = ddx.d[0]; // zres = .5*e^x
227 t2 = ddx.d[1]; // Def. of sinh: (e^x - e^-x)/2
228 r1=0.25/t1;
229 residual=__FNMSUB( t2, r1, __FNMSUB( t1, r1, 0.25 ) ); // (0.25-t1*r1)-t2*r1;
230 r2=residual*(4.0*r1); // (r1,r2)=.5*e^-x
231 residual1 =__FNMSUB( t2, r1, __FNMSUB( t1, r1, 0.25 ) - residual ); // (0.25-t1*r1)-residual-t2*r1;
232 r3=__FMSUB(t1, r2, residual ); //(t1*r2-residual); // (reversed sign)
233 r4=__FMADD( extraword, r1, t2*r2 ); //(extraword*r1+(t2*r2)); // (reversed sign)
234 reshi=__FSUB( t1, r1 );
235 reslow=__FSUB( t2, r2 );
236 residual=(t1-reshi)-r1; // exact
237 if (fabs(t2) > fabs(r2))
238 residual2=(t2-reslow)-r2; // exact
239 else
240 residual2=t2-(reslow+r2); // exact
241 r5=(r3+r4-residual1)*(4.0*r1);
242 resnew=__FADD( reshi, reslow );
243 residual3=reshi-resnew+reslow; // exact
244 residual4=__FADD( residual, residual3 );
245 reshi=__FADD( resnew, residual4 );
246 residual5=residual-residual4+residual3;
247 residual6=resnew -reshi+residual4;
248 reslow=(residual2+(extraword+r5))+residual5+residual6;
249 if (xHead < 0.0) { // Reverse signs for
250 reshi = -reshi; // negative arguments
251 reslow = -reslow;
252 }
253 ddx.d[0] = __FADD( reshi, reslow ); // build a cannonical result
254 ddx.d[1] = (reshi - ddx.d[0]) + reslow;
255 FESETENVD(fpenv);
256 return (ddx.f);
257 }
258 } // end of abs(xHead) > 0.693147
259
260}
261
262
263long double coshl(long double x)
264{
265 double xHead, xTail, xHeadx, xTailx;
266 double r1, r2, r3, r4, r5, t1, t2;
267 double residual, residual1, residual2, residual3, residual4, residual5, residual6;
268 double res, resnew, reshi, reslow;
269 double remainder, partial, extraword;
270 double argsq, argsq2;
271 double prod, prodlow, sum, suml, temp, templow;
272 double bottom;
273 uint32_t hibits, signx;
274 int i;
275 double fpenv;
276 DblDbl ddx;
277 double coeff[9] = {
278 306.0, 73440.0, 13366080.0, 1764322560.0, 158789030400.0, 8892185702400.0,
279 266765571072000.0, 3201186852864000.0, 6402373705728000.0
280 };
281 double recip = 1.561920696858622646221636435005e-16;
282
283 ddx.f = x;
284 xHead = ddx.d[0];
285 xTail = ddx.d[1];
286 hibits = ddx.i[0] & 0x7FFFFFFFu; // highest 32 bits as uint32_t
287 signx = (ddx.i[0] >> 31) & 1u; // get sign of head
288
289 // NaNs and infinities
290
291 if (hibits >= 0x7ff00000u){ // special cases: NaN and infinity
292 if (xHead != xHead) { // x is a NaN
293 return x;
294 }
295 else { // For +/- infinity return +infinity
296 if (signx)
297 return (-x);
298 else
299 return x;
300 }
301 }
302
303
304 // x = zero hence xHead is zero
305
306 if ((hibits | ddx.i[1]) == 0)
307 return (long double) 1.0;
308
309 // x is not zero, infinity nor a NaN
310
311 FEGETENVD(fpenv);
312 FESETENVD(0.0);
313
314 //for small x, |x| <= 0.125 use power series
315 if (hibits <= 0x3fc00000u) {
316 temp=2.0*xHead*xTail;
317 argsq=__FMADD( xHead, xHead, temp ); // xHead*xHead+temp;
318 argsq2=__FMADD( xTail, xTail, __FMSUB( xHead, xHead, argsq ) + temp ); // xHead*xHead-argsq+temp+xTail*xTail;
319 sum=1.0;
320 suml=0.0;
321 sum=__FMADD( argsq, sum, coeff[0] ); // coeff[0]+argsq*sum;
322 sum=__FMADD( argsq, sum, coeff[1] ); // coeff[1]+argsq*sum;
323 sum=__FMADD( argsq, sum, coeff[2] ); // coeff[2]+argsq*sum;
324 for (i=3; i< 8; i++) {
325 templow=__FMADD( suml, argsq, sum*argsq2 ); // suml*argsq+sum*argsq2;
326 temp=__FMADD( sum, argsq, templow ); // sum*argsq+templow;
327 bottom=__FMSUB( sum, argsq, temp) + templow; // sum*argsq-temp+templow;
328 sum=__FADD( coeff[i], temp );
329 residual=coeff[i]-sum+temp;
330 suml=bottom+residual;
331 }
332 prodlow=__FMADD( suml, argsq, sum*argsq2 ); // suml*argsq+sum*argsq2; // mult. by arg^2
333 prod=__FMADD( sum, argsq, prodlow ); // sum*argsq+prodlow;
334 prodlow=__FMSUB( sum, argsq, prod) + prodlow; // sum*argsq-prod+prodlow;
335 sum=prod*recip; // sum of series ---
336 remainder=__FNMSUB( sum, coeff[8], prod ); // (prod-sum*coeff[8]);
337 partial=__FADD( remainder, prodlow );
338 residual=remainder-partial+prodlow;
339 suml=__FMADD( partial, recip, residual*recip ); // partial*recip+(residual*recip);
340 res=__FADD(1.0, sum ); // except for 1.0 term.
341 reslow=1.0 - res+sum+suml;
342 ddx.d[0] = __FADD( res, reslow ); // build a cannonical result
343 ddx.d[1] = (res - ddx.d[0]) + reslow;
344 FESETENVD(fpenv);
345 return (ddx.f);
346 }
347
348 // |x| > 0.125
349 // else if (hibits > 0x3fc00000u) { // abs(xHead) > 0.125 <-- Unnecessary test
350 else { // abs(xHead) > 0.125
351 if (signx) {
352 xHeadx = -xHead;
353 xTailx = -xTail;
354 }
355 else{
356 xHeadx = xHead;
357 xTailx = xTail;
358 }
359 ddx.f = _ExpInnerLD (xHeadx, xTailx, 0.0 , &extraword , 1); // get .5*e^x
360
361 // |x| > 40.0
362 if (hibits > 0x40440000u) { // abs(xHead) > 40.0
363
364 reshi = ddx.d[0];
365 ddx.d[0] = __FADD( ddx.d[0], ddx.d[1] );
366 if((ddx.i[0] & 0x7FFFFFFFu)>= 0x7ff00000u) { // special cases : NaN and infinity
367 ddx.d[1] = 0.0;
368 }
369 else {
370 ddx.d[1] = (reshi - ddx.d[0]) + ddx.d[1];
371 }
372 FESETENVD(fpenv);
373 return (ddx.f);
374 }
375 // 0.125 <= |x| < 40.0
376 else{
377 t1 = ddx.d[0]; // zres=.5*e^x
378 t2 = ddx.d[1]; // def. of cosh: (e^x + e^-x)/2
379 r1=0.25/t1;
380 residual=__FNMSUB( t2, r1, __FNMSUB( t1, r1, 0.25 ) ); // (0.25-t1*r1)-t2*r1;
381 r2=residual*(4.0*r1); // (r1,r2)=.5*e^-x + errors< 4 ulps
382 residual1 =__FNMSUB( t2, r1, __FNMSUB( t1, r1, 0.25 ) - residual ); // (0.25-t1*r1)-residual-t2*r1;
383 // rest of rem. from HO divisor only
384 r3=__FMSUB(t1, r2, residual ); //(t1*r2-residual); // (reversed sign)...LO quotient
385 r4=__FMADD( extraword, r1, t2*r2 ); //(extraword*r1+(t2*r2)); // (reversed sign)...LO quotient
386 reshi=__FADD( t1, r1 );
387 reslow=__FADD( t2, r2 );
388 residual=(t1-reshi)+r1; // exact
389 if (fabs(t2) > fabs(r2))
390 residual2=(t2-reslow)+r2; // exact
391 else
392 residual2=r2-reslow+t2; // exact
393 r5=(r3+r4-residual1)*(4.0*r1); // reversed sign
394 resnew=__FADD( reshi, reslow );
395 residual3=reshi-resnew+reslow; // exact
396 residual4=__FADD( residual, residual3 );
397 reshi=__FADD( resnew, residual4 );
398 residual5=residual-residual4+residual3;
399 residual6=resnew-reshi+residual4;
400 reslow=(residual2+(extraword-r5))+residual5+residual6;
401 ddx.d[0] = __FADD( reshi, reslow ); // build a cannonical result
402 ddx.d[1] = (reshi - ddx.d[0]) + reslow;
403 FESETENVD(fpenv);
404 return (ddx.f);
405 }
406 } // end of abs(xHead) > 0.125
407}
408
409
410long double tanhl(long double x)
411{
412
413 double r, res, res2, res3, res4,res5, resfin, reslow;
414 double rem, rem1, rem2;
415 uint32_t signx, hibits, ndx ;
416 double q, qq, qa, qb, qc, q1, quot, quot2, quot3;
417 double arg, arg1, arg2, argsq, argsq2;
418 double sum, sum2, suml, suml2;
419 double frac, frac2, fac, fac2;
420 double ra, rb, rc, rd, re, rf;
421 double ans, ansx, ansl, ansmid, anstiny, anslow, almost;
422 double prod, prodlow, prodx, prodlowx, proderr, pl;
423 double top, top2, den, den1, denf, denf2;
424 double rg, t2, t3, tsq, tsql;
425 double az, bz, temp, temp1, templow, carry, carry1, extra;
426 double bump, residual, tiny, recip;
427 Double DeN;
428 double fpenv;
429 double xHead, xTail;
430 DblDbl ddx;
431 int i;
432
433 struct TanhTblEntry *TanhTblPtr = (struct TanhTblEntry *)TanhTbl;
434 struct TanhCoeffEntry *TanhCoeffPtr = (struct TanhCoeffEntry *) TanhCoeff;
435
436 ddx.f = x;
437 xHead = ddx.d[0];
438 xTail = ddx.d[1];
439 hibits = ddx.i[0] & 0x7FFFFFFFu; // highest 32 bits as uint32_t
440 signx = (ddx.i[0] >> 31) & 1u; // get sign of head
441
442 // NaN or zero
443
444 if ((xHead != xHead)||((hibits | ddx.i[1]) == 0))
445 return x;
446
447 FEGETENVD(fpenv);
448 FESETENVD(0.0);
449
450 //for infinity or large x , |xHead| > 40.0
451
452 if (hibits > 0x40440000u) { // results = 1, for big args.
453 FESETENVD(fpenv);
454 if (signx)
455 return (long double) (-1.0); // sign of result= sign of arg
456 else
457 return (long double) (1.0);
458 }
459
460 // x is not zero, infinity nor a NaN
461
462 if (signx) {
463 xHead = -xHead;
464 xTail = -xTail;
465 }
466
467
468 // values of |xHead| with 2.0<= |xHead| <= 40.0
469
470 if (hibits > 0x40000000u) {
471 //****************************************************************************
472 // *
473 // 1.0 *
474 // tanh(x)= 1 - ------------- |x| > 2.0 *
475 // .5 +.5*e^(2x) *
476 // *
477 //****************************************************************************
478 xHead=2.0*xHead; // double arg
479 xTail=2.0*xTail;
480
481 ddx.f = _ExpInnerLD (xHead, xTail, 0.0 , &extra, 1 ); // compute e^(2*arg)
482 az = ddx.d[0];
483 bz = ddx.d[1];
484 temp=__FADD( az, 0.5 ); // az >= 0.5
485 carry=az-temp+0.5; // exact
486 temp1=__FADD( carry, bz );
487 carry1=carry-temp1+bz;
488 den=__FADD( temp, temp1 );
489 den1=temp-den+temp1+carry1;
490 r=1.0/den; // guess recip. of denominator
491 rem=__FNMSUB( den, r, 1.0 ); // 1.0-den*r; // first remainder
492 rem1=__FNMSUB( r, den1, rem ); // rem-r*den1; // (rem1,rem2) is better rem.
493 if (fabs(rem) > fabs(r*den1))
494 rem2=__FNMSUB( r, den1, rem-rem1 ); // rem-rem1-r*den1;
495 else {
496 rem1=__FNMSUB( r, den1, rem ); // rem-(r*den1);
497 /* rem2=-(r*den1)-rem1+rem -(r*den1-(r*den1)); */
498 rem2 = rem - __FMADD( r, den1, rem1 ) - __FMSUB( r, den1, (r*den1) );
499 }
500 qq=__FMADD( r, rem1, r ); // r+r*rem1; // going for the full quotient
501 qa=__FMADD( r, rem1, r-qq ); // r-qq+r*rem1;
502 qb=__FMADD( r, rem2, qa ); // qa+r*rem2;
503 q=__FADD( qq, qb );
504 // qc=qa-qb+r*rem2-(extra*r)*r;
505 qc = __FNMSUB( extra*r, r, __FMADD( r, rem2, qa - qb ) );
506 q1=qq-q+qb+qc;
507 res=__FSUB( 1.0, q ); // high order result
508 res2=1.0-res-q; // exact low order
509 res3=__FSUB( res2, q1 );
510 resfin=__FADD( res, res3 );
511 res4=res2-res3-q1;
512 res5=res-resfin+res3;
513 reslow=res4+res5;
514 if (signx) {
515 resfin=-resfin;
516 reslow=-reslow;
517 }
518 ddx.d[0] = __FADD( resfin, reslow ); // build a cannonical result
519 ddx.d[1] = (resfin - ddx.d[0]) + reslow;
520 FESETENVD(fpenv);
521 return (ddx.f);
522 }
523
524 // else if (hibits <= 0x40000000u) { // |xHead| < 2.0 <-- Unnecessary test
525 else { // |xHead| < 2.0
526
527 //***************************************************************************
528 // *
529 // Accurate table is used to reduce small arguments such that the *
530 // range of the |reduced argument| < 1/32. The tanh addition formula *
531 // is used to piece together three tanhs *
532 // *
533 //***************************************************************************
534
535 DeN.f = __FMADD( xHead, 16.0, kBig ); // kBig+xHead*16.0; // compute table lookup index
536 ndx = DeN.i[1];
537 if (hibits < 0x3fb00000u) ndx=0; // |xHead| < 0.0625
538 arg1=xHead-TanhTblPtr[(int32_t)ndx].One; // reduce argument by table value
539 // which is close to ndx/16 (exact)
540 arg=__FADD( arg1, xTail ); // full 53 bit argument
541 arg2=arg1-arg+xTail; // low order argument
542 //************************************************************************************
543 // The argument has been broken up as follows: *
544 // Zarg=TanhTblPtr[ndx].One +arg+arg2 *
545 // *
546 // tanh(TanhTblPtr[ndx].One) is read from tantbl(2,ndx),tantbl(3,ndx) *
547 // with at least 122 bits of precision *
548 // tanh(arg2)=arg2+o(2^-172) *
549 // *
550 // Compute tanh(arg) by economized power series. *
551 // the task is to piece together the three parts *
552 // *
553 // (tanh(xTail)+tanh(c))(1-tanh^2(xHead)) *
554 // tanh(xHead+xTail+c)=tanh(xHead) + ------------------------------------------ *
555 // 1+tanh(xHead) tanh(xTail)+tanh(c)(tanh(xHead)+tanh(xTail))*
556 // *
557 //************************************************************************************
558 sum=TanhCoeffPtr[10].One;
559 suml=TanhCoeffPtr[10].Two;
560 argsq=arg*arg;
561 argsq2=__FMSUB( arg, arg, argsq ); // arg*arg-argsq; // arg^2 for series
562 for (i = 9; i > 0; i--) {
563 pl=__FMADD( suml, argsq, sum*argsq2 ); // suml*argsq+sum*argsq2;
564 prod=__FMADD( sum, argsq, pl ); // sum*argsq+pl;
565 prodlow=__FMSUB( sum, argsq, prod ) + pl; // sum*argsq-prod+pl;
566 sum=__FADD( TanhCoeffPtr[i].One, prod ); // add in the next coefficient
567 sum2=TanhCoeffPtr[i].Two+prodlow;
568 suml=TanhCoeffPtr[i].One-sum+prod+sum2;
569 }
570 pl=__FMADD( suml, argsq, sum*argsq2 ); // suml*argsq+sum*argsq2;
571 temp=__FMADD( sum, argsq, pl ); // sum*argsq+pl;
572 templow=__FMSUB( sum, argsq, temp ) + pl; // sum*argsq-temp+pl;
573 pl=templow*arg; // last multiplication by arg
574 prodx=__FMADD( temp, arg, pl ); // temp*arg+pl;
575 prodlowx=__FMSUB( temp, arg, prodx ) + pl; // temp*arg-prodx+pl; // tanh(arg)-1.0 is done.
576 prod=__FADD( arg, prodx );
577 prodlow=arg-prod+prodx+prodlowx; // tanh(arg) is done.
578
579 if (hibits < 0x3fb00000u) { // |xHead| < 0.0625, trivial: tanh(xHead)=0 in formula
580 proderr=(arg-prod+prodx)-prodlow+prodlowx;
581 bump=__FNMSUB( prod, prod*arg2, arg2 ); // arg2-prod*prod*arg2;
582 reslow=__FADD( prodlow, bump );
583 residual=prodlow-reslow+bump+proderr;
584 res=__FADD( reslow, prod );
585 reslow=prod-res+reslow+residual;
586 if (signx) {
587 res=-res;
588 reslow=-reslow;
589 }
590
591 ddx.d[0] = __FADD( res, reslow ); // build a cannonical result
592 ddx.d[1] = (res - ddx.d[0]) + reslow;
593 FESETENVD(fpenv);
594 return (ddx.f);
595 }
596
597 tiny=arg2*(TanhTblPtr[(int32_t)ndx].Two+prod); // The last addend in denominator
598
599 // pl=TanhTblPtr[(int32_t)ndx].Two*prodlow+TanhTblPtr[(int32_t)ndx].Three*prod; // tanh(xHead)*tanh(xTail)
600 pl = __FMADD( TanhTblPtr[(int32_t)ndx].Two, prodlow, TanhTblPtr[(int32_t)ndx].Three*prod );
601
602 // den=TanhTblPtr[(int32_t)ndx].Two*prod+pl; // denominator completed except
603 den = __FMADD( TanhTblPtr[(int32_t)ndx].Two, prod, pl );
604
605 // den1=TanhTblPtr[(int32_t)ndx].Two*prod-den+pl+tiny; // for the +1 term.
606 den1 = __FMSUB( TanhTblPtr[(int32_t)ndx].Two, prod, den ) + pl + tiny;
607
608 suml2=prodlow+arg2; // start on numerator
609 t2=2.0*TanhTblPtr[(int32_t)ndx].Two*TanhTblPtr[(int32_t)ndx].Three; // table look up value
610
611 // tsq=TanhTblPtr[(int32_t)ndx].Two*TanhTblPtr[(int32_t)ndx].Two+t2; // squared.
612 tsq = __FMADD( TanhTblPtr[(int32_t)ndx].Two, TanhTblPtr[(int32_t)ndx].Two, t2 );
613
614 // tsql=TanhTblPtr[(int32_t)ndx].Two*TanhTblPtr[(int32_t)ndx].Two-tsq+t2;
615 tsql = __FMSUB( TanhTblPtr[(int32_t)ndx].Two, TanhTblPtr[(int32_t)ndx].Two, tsq ) + t2;
616
617 /* t3=2.0*TanhTblPtr[(int32_t)ndx].Two*TanhTblPtr[(int32_t)ndx].Three-t2 +
618 TanhTblPtr[(int32_t)ndx].Three*TanhTblPtr[(int32_t)ndx].Three; */
619 t3 = __FMADD( TanhTblPtr[(int32_t)ndx].Three, TanhTblPtr[(int32_t)ndx].Three,
620 __FMSUB( 2.0*TanhTblPtr[(int32_t)ndx].Two, TanhTblPtr[(int32_t)ndx].Three, t2 ) );
621
622 denf=__FADD( 1.0, den ); // denominator is really done now
623 denf2=1.0-denf+den+den1;
624 fac=__FSUB( 1.0, tsq ); // compute 1-tanhtble(ndx)^2 becomes
625 residual=1.0-fac-tsq; // (fac, fac2)
626 fac2=residual-tsql-t3;
627 pl=__FMADD( fac, suml2, fac2*prod ); // fac*suml2+fac2*prod; // (xTail+c)(1-xHead*xHead) ... above formula
628 top=__FMADD( fac, prod, pl ); // fac*prod+pl;
629 top2=__FMSUB( fac, prod, top ) + pl; // fac*prod-top+pl; // doing division
630 recip=1.0/denf;
631 quot=top*recip;
632 ra=__FNMSUB( quot, denf, top ); // top-quot*denf; // 3 part remainder:
633 rb=-(quot*denf2); // rem=ra+rb+top2
634 rc= - __FMADD(quot, denf2, rb ); // -(quot*denf2+rb); // 3rd order remainder
635 rd=__FADD( top2, rb ); // sum 2nd order rems. into rf
636 if (fabs(top2) > fabs(rb))
637 re=top2-rd+rb;
638 else
639 re=rb-rd+top2;
640 rf=__FADD( ra, rd );
641 if (fabs(ra) > fabs(rd))
642 rg=ra-rf+rd; // more 3rd order rems.
643 else // rc+re+rg
644 rg=rd-rf+ra;
645 quot2=rf*recip;
646 quot3=__FMSUB( rf, recip, quot2 ); // rf*recip-quot2;
647 frac=__FADD( quot, quot2 );
648 // frac2=quot-frac+quot2+(quot3+recip *(rc+re+rg));
649 frac2 = quot-frac+quot2 + __FMADD( recip, (rc+re+rg), quot3 );
650
651
652 ansx=__FADD( TanhTblPtr[(int32_t)ndx].Two, frac ); // paste together the result
653 ansl=__FADD( TanhTblPtr[(int32_t)ndx].Three, frac2 );
654 ansmid=TanhTblPtr[(int32_t)ndx].Two-ansx+frac;
655 anstiny=TanhTblPtr[(int32_t)ndx].Three-ansl+frac2;
656 almost=__FADD( ansmid, ansl );
657 ans=__FADD( ansx, almost );
658 anslow=(ansx-ans+almost)+((ansmid-almost+ansl) +anstiny);
659 if (signx) {
660 ans=-ans;
661 anslow=-anslow;
662 }
663
664 ddx.d[0] = __FADD( ans, anslow ); // build a cannonical result
665 ddx.d[1] = (ans - ddx.d[0]) + anslow;
666 FESETENVD(fpenv);
667 return (ddx.f);
668 }
669}
670#endif