this repo has no description
1/*
2 * Copyright (c) 2002 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
11 *
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22//
23// ArcSinCosDD.c
24//
25// Double-double Function Library
26// Copyright: � 1995-96 by Apple Computer, Inc., all rights reserved
27//
28// long double asinl( long double x );
29// long double acosl( long double x );
30//
31
32#include "math.h"
33#include "fp_private.h"
34#if (__WANT_LONG_DOUBLE_FORMAT__ - 0L == 128L)
35#include "DD.h"
36
37long double __sqrtldextra( double x, double y, double *pextra);
38
39// Floating-point constants
40static const double kPiBy2 = 1.570796326794896619231322; // 0x1.921FB54442D18p0
41static const double kPiBy2Tail1 = 6.123233995736766e-17; // 0x1.1A62633145C07p-54
42static const double kPiBy2Tail2 = -1.4973849048591698e-33; // 0x1.F1976B7ED8FBCp-110
43
44static const double kPiBy4 = 7.85398163397448279e-01; // 0x1.921FB54442D18p-1
45static const double kPiBy4Tail1 = 3.061616997868383018e-17; // 0x1.1A62633145C07p-55
46static const double kPiBy4Tail2 = -7.486924524295849165e-34; // 0x1.F1976B7ED8FBCp-111
47
48static const double k3PiBy4 = 2.356194490192344837; // 0x1.2D97C7F3321D2p1
49static const double k3PiBy4Tail1 = 9.184850993605148438e-17; // 0x1.A79394C9E8A0Ap-54
50static const double k3PiBy4Tail2 = 3.916898464750400322e-33; // 0x1.456737B06EA1Ap-108
51
52static const double kPi = 3.141592653589793116; // 0x1.921FB54442D18p1
53static const double kPiTail1 = 1.224646799147353207e-16; // 0x1.1A62633145C07p-53
54static const double kPiTail2 = -2.994769809718339666e-33; // 0x1.F1976B7ED8FBCp-109
55
56static const double kSqrt3By2 = 0.866025403684439; // 0x1.bb67ae84a8e3ep-1
57
58static const int kNterms = 23;
59
60static const Float kNaNu = {0x7fc00000};
61
62static const uint32_t asinCoeff[] __attribute__ ((aligned(8))) = {
63 0x3FF00000, 0x00000000, 0x00000000, 0x00000000,
64 0x3FC55555, 0x55555555, 0x3C655555, 0x55555555,
65 0x3FB33333, 0x33333333, 0x3C499999, 0x9999999A,
66 0x3FA6DB6D, 0xB6DB6DB7, 0xBC324924, 0x92492492,
67 0x3F9F1C71, 0xC71C71C7, 0x3C1C71C7, 0x1C71C71C,
68 0x3F96E8BA, 0x2E8BA2E9, 0xBC31745D, 0x1745D174,
69 0x3F91C4EC, 0x4EC4EC4F, 0xBC2D89D8, 0x9D89D89E,
70 0x3F8C9999, 0x9999999A, 0xBC299999, 0x9999999A,
71 0x3F87A878, 0x78787878, 0x3C2E1E1E, 0x1E1E1E1E,
72 0x3F83FDE5, 0x0D79435E, 0x3C2435E5, 0x0D79435E,
73 0x3F812EF3, 0xCF3CF3CF, 0x3C1E79E7, 0x9E79E79E,
74 0x3F7DF3BD, 0x37A6F4DF, 0xBC190B21, 0x642C8591,
75 0x3F7A6863, 0xD70A3D71, 0xBC170A3D, 0x70A3D70A,
76 0x3F7782DD, 0xA12F684C, 0xBC02F684, 0xBDA12F68,
77 0x3F751BA3, 0x08D3DCB1, 0xBC1CB08D, 0x3DCB08D4,
78 0x3F731683, 0xBDEF7BDF, 0xBBE08421, 0x08421084,
79 0x3F715EE9, 0xD45D1746, 0xBC0745D1, 0x745D1746,
80 0x3F6FCAF8, 0xFB6DB6DB, 0x3C0B6DB6, 0xDB6DB6DB,
81 0x3F6D3D2A, 0x8E0DD67D, 0xBC0D67C8, 0xA60DD67D,
82 0x3F6B026F, 0x57B13B14, 0xBC03B13B, 0x13B13B14,
83 0x3F690CB7, 0x7F60C7CE, 0x3BD8F9C1, 0x8F9C18FA,
84 0x3F6750DE, 0x64D7D05F, 0x3C005F41, 0x7D05F418,
85 0x3F65C5F5, 0x6EFAAAAB, 0xBC055555, 0x55555555,
86 0x3F6464C0, 0x950F7D47, 0xBBF882B9, 0x31057262,
87 0x3F632755, 0x86C5F2F0, 0x3C04E5E0, 0xA72F0539,
88 0x3F6208D3, 0x570AE5A6, 0xBC069696, 0x96969697,
89 0x3F61052B, 0xC5FA960A, 0xBC05BC60, 0x9A90E7D9,
90 0x3F6018F9, 0x63C229BF, 0xBBF4F209, 0x4F2094F2,
91 0x3F5E82BE, 0x60D9127E, 0xBBEF7047, 0xDC11F704,
92 0x3F5CF7DE, 0xA5B6E830, 0xBBF15B1E, 0x5F75270D,
93 0x3F5B8D2E, 0x5667CE6C, 0x3BD0C971, 0x4FBCDA3B,
94 0x3F5A3F1E, 0xF82137EE, 0x3BEC71C7, 0x1C71C71C,
95 0x3F590A9F, 0x747DB95D, 0xBBE6A56A, 0x56A56A57,
96 0x3F57ED07, 0x9ED4C037, 0xBBD03D22, 0x6357E16F,
97 0x3F56E407, 0x90442038, 0x3BCA6F4D, 0xE9BD37A7,
98 0x3F55ED9A, 0x0BD901B6, 0xBBF040E6, 0xC2B4481D,
99 0x3F5507F9, 0x4C2470BD, 0x3BE56070, 0x381C0E07,
100 0x3F543195, 0xBF54E5D7, 0xBBFB9E4B, 0x17E4B17E,
101 0x3F53690E, 0x51F04536, 0x3BF9D974, 0x5D1745D1,
102 0x3F52AD29, 0xFCD49D54, 0x3BF82FBF, 0x309B8B57,
103 0x3F51FCD2, 0x5AE4A26E, 0x3BECA730, 0xFCD6E9E0,
104 0x3F51570F, 0x16ECE10A, 0x3BFA9E4B, 0xF3A9A378,
105 0x3F50BB02, 0x0BC1EAA0, 0x3BF8C4B0, 0x78787878,
106 0x3F5027E3, 0xF7FCD8BF, 0x3BF67F63, 0x2C234F73,
107 0x3F4F3A03, 0x591C2915, 0x3BE22B2B, 0xE05C0B81,
108 0x3F4E3373, 0x43F5C1F5, 0x3BEEF13D, 0x589D89D9,
109 0x3F4D3AF3, 0xC78CE2E4, 0x3BB8EEC6, 0x4A5294A5,
110 0x3F4C4F7C, 0x88F08B5E, 0x3BD4D455, 0x26BCA1AF,
111 0x3F4B701D, 0x9E1F038E, 0xBBE2BB23, 0xE09FAB8C,
112 0x3F4A9BFC, 0xD93A26FD, 0x3BC437EC, 0xD445D174,
113 0x3F49D253, 0x6C99619A, 0xBB9FD25B, 0x093F5DC8
114};
115
116struct asinCoeffEntry {
117 double Head;
118 double Tail;
119} asinCoeffEntry;
120
121long double asinl( long double x )
122{
123 double arg, argl, argsq, argsq2;
124 double temp,temp2, temp3;
125 double sum, sumt, suml;
126 double xHead, xTail;
127 double sq, sq2, sq3;
128 double res, reslow, resmid, restiny, reshi, resbot, resinf;
129 double h, h2, g2, g3, g4, p, q, t;
130 double prod, prodlow;
131 double fenv;
132 DblDbl ddx;
133
134 int i;
135 uint32_t hibits;
136 struct asinCoeffEntry *asinPtr = (struct asinCoeffEntry *) asinCoeff;
137
138 FEGETENVD(fenv);
139 FESETENVD(0.0);
140
141 ddx.f = x;
142
143 xHead = ddx.d[0];
144 xTail = ddx.d[1];
145
146 hibits = ddx.i[0] & 0x7FFFFFFFu; // highest 32 bits as uint32_t
147
148 // This filters most of the invalid cases
149 if (hibits > 0x3ff00000u) { // |x| > 1.0 or NaN
150 if (xHead != xHead) { // x is a NaN
151 FESETENVD(fenv);
152 return x;
153 }
154 else { // |x| > 1.0
155 ddx.d[0] = kNaNu.f; // NaN
156 ddx.d[1] = ddx.d[0];
157 FESETENVD(fenv);
158 return (ddx.f);
159 }
160 }
161 // For values of |x| very close to unity, xTail needs to be considered.
162 if ((xHead > 0.0) && (xHead -1.0 + xTail > 0.0)) { // x > 1 out of range
163 ddx.d[0] = kNaNu.f; // NaN
164 ddx.d[1] = ddx.d[0];
165 FESETENVD(fenv);
166 return (ddx.f);
167 }
168 if ((xHead < 0.0) && (xHead +1.0 + xTail < 0.0)) { // x < -1 out of range
169 ddx.d[0] = kNaNu.f; // NaN
170 ddx.d[1] = ddx.d[0];
171 FESETENVD(fenv);
172 return (ddx.f);
173 }
174
175 // finite x, with |x| <= 1
176
177 //for tiny x, ArcSin x = x
178 if (hibits < 0x3c800000u) { // 2^(-55);
179 FESETENVD(fenv);
180 return x;
181 }
182
183
184 // finite result but not tiny
185
186 //Argument is valid, i.e., |x| <= 1 range reduce
187
188 if (hibits <= 0x3fe00000u) { // |xHead| <= 0.5
189
190 temp = __FMUL( (2.0*xHead), xTail );
191 argsq = __FMADD( xHead, xHead, temp ); // temp + xHead*xHead;
192 /* argsq2 = xHead*xHead - argsq + temp + ((2.0*xHead)*xTail-temp); */
193 argsq2 = __FMSUB( xHead, xHead, argsq ) + temp + __FMSUB( (2.0*xHead), xTail, temp );
194
195 sum = asinPtr[50].Head; // Use Taylor Series
196 sum = __FMADD( sum, argsq, asinPtr[49].Head ); // asinPtr[49].Head + sum*argsq;
197 sum = __FMADD( sum, argsq, asinPtr[48].Head ); // asinPtr[48].Head + sum*argsq;
198 sum = __FMADD( sum, argsq, asinPtr[47].Head ); // asinPtr[47].Head + sum*argsq;
199 sum = __FMADD( sum, argsq, asinPtr[46].Head ); // asinPtr[46].Head + sum*argsq;
200
201 for (i = 45; i > kNterms+1; i--) // First 25 terms in working precison
202 sum = __FMADD( sum, argsq, asinPtr[i].Head );// asinPtr[i].Head + sum*argsq;
203
204 sumt = __FMADD( sum, argsq, asinPtr[kNterms+1].Head );// asinPtr[kNterms+1].Head + sum*argsq;
205 suml = __FMADD( sum, argsq, asinPtr[kNterms+1].Head - sumt );// asinPtr[kNterms+1].Head - sumt + sum*argsq;
206 sum = sumt;
207
208 for (i = 1; i <= kNterms; i++) {
209 temp = __FMADD( sum, argsq, asinPtr[kNterms-i+1].Head ); // asinPtr[kNterms-i+1].Head + sum*argsq;
210 /* suml = asinPtr[kNterms-i+1].Head - temp+sum*argsq +
211 (asinPtr[kNterms-i+1].Tail + sum*argsq2 + suml*argsq); */
212 suml = __FMADD( sum, argsq, asinPtr[kNterms-i+1].Head - temp ) +
213 __FMADD( suml, argsq, __FMADD( sum, argsq2, asinPtr[kNterms-i+1].Tail ) );
214 sum = temp;
215 }
216
217 prodlow = __FMADD( suml, argsq, sum*argsq2 ); // suml*argsq + sum*argsq2; // mult. by arg^2
218 prod = __FMADD( sum, argsq, prodlow ); // sum*argsq + prodlow;
219 prodlow = __FMSUB( sum, argsq, prod ) + prodlow;// sum*argsq - prod+prodlow;
220 temp2 = __FMADD( prodlow, xHead, prod*xTail ); // prodlow*xHead + prod*xTail; // mult. by arg
221 temp = __FMADD( prod, xHead, temp2 ); // prod*xHead + temp2;
222 temp2 = __FMSUB( prod, xHead, temp ) + temp2; // prod*xHead - temp+temp2;
223
224 res = __FADD( xHead, temp ); // except for argument
225 reslow = (xHead - res + temp); // exact
226 resmid = __FADD( xTail, temp2 );
227 restiny = xTail - resmid + temp2;
228 p = __FADD( reslow, resmid ); // sum of middle terms
229 q = reslow - p + resmid; // possible residual
230 reshi = __FADD( res, p );
231 resbot = (res - reshi + p) + (q + restiny);
232
233 ddx.d[0] = reshi;
234 ddx.d[1] = resbot;
235 FESETENVD(fenv);
236 return (ddx.f);
237 }
238 else if (fabs(xHead) < kSqrt3By2) { // Pi/6 < |result| < Pi/3
239
240 // Use 1-2.*(xHead,xTail)^2 as arg.
241 h = __FMUL( xHead, xHead ); // careful computation of
242 h2 = __FMSUB( xHead, xHead, h ); // xHead*xHead - h; // square of original argument
243 g2 = __FMUL( (2.0*xHead), xTail );
244 g3 = __FMADD( xTail, xTail, __FMSUB( (2.0*xHead), xTail, g2 ) ); // (2.0*xHead)*xTail - g2 + xTail*xTail;
245 t = __FADD( h2 , g2 ); //sum of middle parts
246 sq = __FADD( h, t );
247
248 if (fabs(h2) > fabs(g2)) // More than 107 bits are needed,
249 g4 = h2 - t + g2 + g3; // because the square will be
250 else // subtracted from small constants,
251 g4 = g2 - t + h2 + g3; // causing leading bit cancellations,
252 // which must be filled in from the
253 sq2 = __FADD( (h - sq + t), g4 ); // right
254 sq3 = (h - sq + t) - sq2 + g4; // This captures extra low order bits
255 temp = __FNMSUB( 2.0, sq, 1.0 ); // 1.0 - 2.0 * sq;
256 temp2 = __FNMSUB( 2.0, sq2, __FNMSUB( 2.0, sq, 1.0 - temp ) ); // 1.0 - temp - 2.0*sq - 2.0*sq2;
257 arg = __FADD( temp, temp2 );
258 argl = __FNMSUB( 2.0, sq3, temp - arg + temp2 ); // temp - arg + temp2 - 2.0*sq3;
259 temp = 2.0*arg*argl;
260 argsq = __FMADD( arg, arg, temp ); // __FADD( arg*arg, temp ); // Square of new argument
261 argsq2 = __FMSUB( arg, arg, argsq ) + temp; // arg*arg - argsq + temp;
262 // Compute result as
263 // Pi/4-0.5*asin(arg,argl)
264 // Compute asin with
265 sum = asinPtr[50].Head; // Taylor Series
266
267 for (i = 49; i > kNterms+1; i--) // First 25 terms in working precison
268 sum = __FMADD( sum, argsq, asinPtr[i].Head );// asinPtr[i].Head + sum*argsq; // precison
269
270 sumt = __FMADD( sum, argsq, asinPtr[kNterms+1].Head );// asinPtr[kNterms+1].Head + sum*argsq;
271 suml = __FMADD( sum, argsq, asinPtr[kNterms+1].Head - sumt );// asinPtr[kNterms+1].Head - sumt + sum*argsq;
272 sum = sumt;
273
274 for (i = 1; i <= kNterms; i++) { // remaining terms in quad precision
275 temp = __FMADD( sum, argsq, asinPtr[kNterms-i+1].Head ); // asinPtr[kNterms-i+1].Head + sum*argsq;
276 /* suml = asinPtr[kNterms-i+1].Head - temp + sum*argsq +
277 (asinPtr[kNterms-i+1].Tail + sum*argsq2 + suml*argsq); */
278 suml = __FMADD( sum, argsq, asinPtr[kNterms-i+1].Head - temp ) +
279 __FMADD( suml, argsq, __FMADD( sum, argsq2, asinPtr[kNterms-i+1].Tail ) );
280 sum = temp;
281 }
282 prodlow = __FMADD( suml, argsq, sum*argsq2 ); // suml*argsq + sum*argsq2; // mult. by arg^2
283 prod = __FMADD( sum, argsq, prodlow ); // sum*argsq + prodlow;
284 prodlow = __FMSUB( sum, argsq, prod ) + prodlow;// sum*argsq - prod+prodlow;
285 temp2 = __FMADD( prodlow, arg, prod*argl ); // prodlow*arg + prod*argl; // mult. by arg
286 temp = __FMADD( prod, arg, temp2 ); // prod*arg + temp2;
287 temp2 = __FMSUB( prod, arg, temp ) + temp2; // prod*arg - temp+temp2;
288
289 res = __FADD( arg, temp ); // add arg for asin(arg)
290 reslow = (arg - res + temp); // exact
291 resmid = __FADD( argl, temp2 );
292 restiny = argl - resmid + temp2;
293 p = __FADD( reslow, resmid ); // sum of middle terms
294 q = reslow - p + resmid; // possible residual
295 reshi = __FADD( res, p );
296 resbot = __FADD( (res - reshi + p), (q + restiny) ); //get ready to subtract from pi/4
297 resinf = (res - reshi + p) - resbot + (q + restiny);
298 temp = kPiBy4;
299 temp2 = kPiBy4Tail1;
300 temp3 = kPiBy4Tail2;
301 res = __FNMSUB( 0.5, reshi, temp ); // temp - 0.5*reshi;
302 reslow = __FNMSUB( 0.5, reshi, temp - res ) ; // (temp - res - 0.5*reshi);
303 resmid = __FNMSUB( 0.5, resbot, temp2 ); // temp2 - 0.5*resbot;
304 if (fabs(temp2) > fabs(0.5*resbot))
305 /* restiny = temp2 - resmid - 0.5*resbot + temp3 - 0.5*resinf; */
306 restiny = __FNMSUB( 0.5, resbot, temp2 - resmid ) + __FNMSUB( 0.5, resinf, temp3 );
307 else
308 /* restiny = temp2 - (0.5*resbot + resmid) + temp3 - 0.5*resinf; */
309 restiny = temp2 - __FMADD( 0.5, resbot, resmid ) + __FNMSUB( 0.5, resinf, temp3 );
310 p = __FADD( reslow, resmid );
311 q = reslow - p + resmid;
312 reshi = __FADD( res, p );
313 resbot = (res - reshi + p) + (q + restiny);
314
315 if (xHead > 0.0) {
316 ddx.d[0] = reshi;
317 ddx.d[1] = resbot;
318 }
319 else {
320 ddx.d[0] = -reshi;
321 ddx.d[1] = -resbot;
322 }
323
324 FESETENVD(fenv);
325 return (ddx.f);
326 }
327 else { // "large arguments", .866 < Abs(xHead,xTail) <= 1.0
328
329 if ((xHead - 1.0 + xTail) == 0.0) { //for input of 1.0, return Pi/2
330 ddx.d[0] = kPiBy2;
331 ddx.d[1] = kPiBy2Tail1;
332 FESETENVD(fenv);
333 return ddx.f;
334 }
335 if ((xHead + 1.0 - xTail) == 0.0) { //for input of -1.0, return -Pi/2
336 ddx.d[0] = - kPiBy2;
337 ddx.d[1] = - kPiBy2Tail1;
338 FESETENVD(fenv);
339 return ddx.f;
340 }
341 if (xHead < 0.0) { // Use absolute value of input
342 temp = -xHead; // to work only in the first
343 temp2 = -xTail; // quadrant
344 }
345 else { // For arg x > 0.5, use identity
346 temp = xHead; // asin(x)=pi/2-2asin((1-x)/2)^.5)
347 temp2 = xTail;
348 }
349 h = __FNMSUB( 0.5, temp, 0.5 ); // 0.5 - 0.5*temp; // exact
350 argsq = __FNMSUB( 0.5, temp2, h ); // h - 0.5*temp2;
351 argsq2 = __FNMSUB( 0.5, temp2, h - argsq ); // __FSUB( h, argsq ) - 0.5*temp2; //square of reduced arg, exact
352
353 ddx.f = __sqrtldextra(argsq, argsq2 , &temp); // The new argument
354
355 sum = asinPtr[25].Head; // Taylor Series
356
357 for (i = 24; i > kNterms/2+1; i--) // First 25 terms in working precison
358 sum = __FMADD( sum, argsq, asinPtr[i].Head );// asinPtr[i].Head + sum*argsq; // precison
359
360 sumt = __FMADD( sum, argsq, asinPtr[kNterms/2+1].Head );// asinPtr[kNterms/2+1].Head + sum*argsq;
361 suml = __FMADD( sum, argsq, asinPtr[kNterms/2+1].Head - sumt );// __FSUB( asinPtr[kNterms/2+1].Head, sumt ) + sum*argsq;
362 sum = sumt;
363
364 for (i = 1; i <= kNterms/2; i++) { //remaining terms in quad
365 temp = __FMADD( sum, argsq, asinPtr[kNterms/2-i+1].Head ); // asinPtr[kNterms/2-i+1].Head + sum*argsq;
366 /* suml = asinPtr[kNterms/2-i+1].Head - temp + sum*argsq +
367 (asinPtr[kNterms/2-i+1].Tail + sum*argsq2 + suml*argsq); */
368 suml = __FMADD( sum, argsq, asinPtr[kNterms/2-i+1].Head - temp ) +
369 __FMADD( suml, argsq, __FMADD( sum, argsq2, asinPtr[kNterms/2-i+1].Tail ) );
370 sum = temp;
371 }
372
373 arg = ddx.d[0];
374 argl = ddx.d[1];
375
376 prodlow = __FMADD( suml, argsq, sum*argsq2 ); // suml*argsq + sum*argsq2; // mult. by arg^2
377 prod = __FMADD( sum, argsq, prodlow ); // sum*argsq + prodlow;
378 prodlow = __FMSUB( sum, argsq, prod ) + prodlow;// sum*argsq - prod+prodlow;
379 temp2 = __FMADD( prodlow, arg, prod*argl ); // prodlow*arg + prod*argl; // multiply by arg
380 temp = __FMADD( prod, arg, temp2 ); // prod*arg + temp2;
381 temp2 = __FMSUB( prod, arg, temp ) + temp2; // prod*arg - temp + temp2;
382
383 res = __FADD( arg, temp ); // add arg for asin(arg)
384 reslow = (arg - res + temp); // exact
385 resmid = __FADD( argl, temp2 );
386 restiny = argl - resmid + temp2;
387 p = __FADD( reslow, resmid ); // sum of middle terms
388 q = reslow - p + resmid; // possible residual
389 reshi = __FADD( res, p );
390 resbot = __FADD( (res - reshi + p), (q + restiny) ); //get ready to subtract from pi/4
391 resinf = (res - reshi + p) - resbot + (q + restiny);
392
393 res = __FNMSUB( 2.0, reshi, kPiBy2 ); // __FSUB( kPiBy2, (2.0*reshi) );
394 reslow = __FNMSUB( 2.0, reshi, kPiBy2 - res ); // (kPiBy2 - res - (2.0*reshi));
395 resmid = __FNMSUB( 2.0, resbot, kPiBy2Tail1 ); // __FSUB( kPiBy2Tail1, (2.0*resbot) );
396
397 if (kPiBy2Tail1 > fabs(2.0*resbot))
398 /* restiny = kPiBy2Tail1 - resmid - (2.0*resbot) + (kPiBy2Tail2 - 2.0*resinf); */
399 restiny = __FNMSUB( 2.0, resbot, kPiBy2Tail1 - resmid ) + __FNMSUB( 2.0, resinf, kPiBy2Tail2 );
400 else
401 /* restiny = kPiBy2Tail1 - (resmid + (2.0*resbot)) + (kPiBy2Tail2 - 2.0*resinf); */
402 restiny = kPiBy2Tail1 - __FMADD( 2.0, resbot, resmid ) + __FNMSUB( 2.0, resinf, kPiBy2Tail2 );
403 p = __FADD( reslow, resmid );
404 q = reslow - p+resmid;
405 reshi = __FADD( res, p );
406 resbot = (res - reshi + p) + (q + restiny);
407 if (xHead < 0) {
408 ddx.d[0] = -reshi;
409 ddx.d[1] = -resbot;
410 }
411 else {
412 ddx.d[0] = reshi;
413 ddx.d[1] = resbot;
414 }
415 FESETENVD(fenv);
416 return (ddx.f);
417 }
418}
419
420long double acosl( long double x )
421{
422 double arg, argl, argsq, argsq2;
423 double temp,temp2, temp3;
424 double sum, sumt, suml;
425 double xHead, xTail;
426 double sq, sq2, sq3;
427 double sqrtextra;
428 double res, reslow, resmid, restiny, reshi, resbot, resinf;
429 double h, h2, g2, g3, g4, p, q, t;
430 double prod, prodlow;
431 double fenv;
432 DblDbl ddx;
433
434 int i;
435 uint32_t hibits;
436
437 struct asinCoeffEntry *asinPtr = (struct asinCoeffEntry *) asinCoeff;
438
439 FEGETENVD(fenv);
440 FESETENVD(0.0);
441
442 ddx.f = x;
443
444 xHead = ddx.d[0];
445 xTail = ddx.d[1];
446
447 hibits = ddx.i[0] & 0x7FFFFFFFu; // highest 32 bits as uint32_t
448
449 // This filters most of the invalid cases (NaNs, inf and |x| > 1)
450
451 if (hibits > 0x3ff00000u) { // |x| > 1.0 or NaN
452 if (xHead != xHead) { // x is a NaN
453 FESETENVD(fenv);
454 return x;
455 }
456 else { // |x| > 1.0
457 ddx.d[0] = kNaNu.f; // NaN
458 ddx.d[1] = ddx.d[0];
459 FESETENVD(fenv);
460 return (ddx.f);
461 }
462 }
463
464 // For values of |x| very close to unity, xTail needs to be considered.
465
466 if ((xHead > 0.0) && (xHead -1.0 + xTail > 0.0)) { // x > 1 out of range
467 ddx.d[0] = kNaNu.f; // NaN
468 ddx.d[1] = ddx.d[0];
469 FESETENVD(fenv);
470 return (ddx.f);
471 }
472
473 if ((xHead < 0.0) && (xHead +1.0 + xTail < 0.0)) { // x < -1 out of range
474 ddx.d[0] = kNaNu.f; // NaN
475 ddx.d[1] = ddx.d[0];
476 FESETENVD(fenv);
477 return (ddx.f);
478 }
479
480 //finite result
481
482 // finite tiny x
483 if (hibits < 0x3c800000u) { // Tiny argument -- |x| < 2^-55
484 //subnormal case underflow. Zero must be excluded.
485 res = kPiBy2;
486 reslow = -xHead;
487 resmid = __FSUB( kPiBy2Tail1, xTail );
488 restiny = kPiBy2Tail1 - resmid - xTail + kPiBy2Tail2;
489 p = __FADD( reslow, resmid );
490 reshi = __FADD( res, p );
491 if (fabs (reslow) > fabs (resmid))
492 q = reslow - p + resmid;
493 else
494 q = resmid - p + reslow;
495 resbot = (res-reshi+p) + (q+restiny);
496 ddx.d[0] = __FADD( reshi, resbot );
497 ddx.d[1] = (reshi - ddx.d[0]) + resbot;
498 FESETENVD(fenv);
499 return (ddx.f);
500 }
501
502 // Argument is valid, i.e., |x| <= 1 range reduce
503
504 if (hibits <= 0x3fe00000u) { // |xHead| <= 0.5
505 // in power series for ASIN
506 temp = __FMUL( (2.0*xHead), xTail ); // Compute acos(x)=pi/2-asin(x)
507 argsq = __FMADD( xHead, xHead, temp ); // temp + xHead*xHead;
508 /* argsq2 = xHead*xHead - argsq + temp + ((2.0*xHead)*xTail-temp); */
509 argsq2 = __FMSUB( xHead, xHead, argsq ) + temp + __FMSUB( (2.0*xHead), xTail, temp );
510
511 sum = asinPtr[50].Head; // Use Taylor Series
512 sum = __FMADD( sum, argsq, asinPtr[49].Head ); // asinPtr[49].Head + sum*argsq;
513 sum = __FMADD( sum, argsq, asinPtr[48].Head ); // asinPtr[48].Head + sum*argsq;
514 sum = __FMADD( sum, argsq, asinPtr[47].Head ); // asinPtr[47].Head + sum*argsq;
515 sum = __FMADD( sum, argsq, asinPtr[46].Head ); // asinPtr[46].Head + sum*argsq;
516
517 for (i = 45; i >= kNterms+2; i--) // First 25 terms in working
518 sum = __FMADD( sum, argsq, asinPtr[i].Head );// asinPtr[i].Head + sum*argsq;
519
520 sumt = __FMADD( sum, argsq, asinPtr[kNterms+1].Head );// asinPtr[kNterms+1].Head + sum*argsq;
521 suml = __FMADD( sum, argsq, asinPtr[kNterms+1].Head - sumt );// asinPtr[kNterms+1].Head - sumt + sum*argsq;
522 sum = sumt;
523
524 for (i = 1; i <= kNterms; i++) {
525 temp = __FMADD( sum, argsq, asinPtr[kNterms-i+1].Head ); // asinPtr[kNterms-i+1].Head + sum*argsq;
526 /* suml = asinPtr[kNterms-i+1].Head - temp+sum*argsq +
527 (asinPtr[kNterms-i+1].Tail + sum*argsq2 + suml*argsq); */
528 suml = __FMADD( sum, argsq, asinPtr[kNterms-i+1].Head - temp ) +
529 __FMADD( suml, argsq, __FMADD( sum, argsq2, asinPtr[kNterms-i+1].Tail ) );
530 sum = temp;
531 }
532
533 prodlow = __FMADD( suml, argsq, sum*argsq2 ); // suml*argsq + sum*argsq2; // mult. by arg^2
534 prod = __FMADD( sum, argsq, prodlow ); // sum*argsq + prodlow;
535 prodlow = __FMSUB( sum, argsq, prod ) + prodlow;// sum*argsq - prod+prodlow;
536 temp2 = __FMADD( prodlow, xHead, prod*xTail ); // prodlow*xHead + prod*xTail; // mult. by arg
537 temp = __FMADD( prod, xHead, temp2 ); // prod*xHead + temp2;
538 temp2 = __FMSUB( prod, xHead, temp ) + temp2; // prod*xHead - temp+temp2;
539
540 res = __FADD( xHead, temp ); // add argument
541 reslow = (xHead - res + temp);
542 resmid = __FADD( xTail, temp2 );
543 restiny = xTail - resmid + temp2;
544 p = __FADD( reslow, resmid ); // sum of middle terms
545 q = reslow - p + resmid; // possible residual
546 reshi = __FADD( res, p );
547 resbot = __FADD( (res - reshi + p), (q + restiny) );
548 resinf = (res - reshi + p) - resbot + (q + restiny);
549
550 res = __FSUB( kPiBy2, reshi ); // and subtract from Pi/2
551 reslow = kPiBy2 - res - reshi;
552 resmid = __FSUB( kPiBy2Tail1, resbot );
553
554 if (kPiBy2Tail1 > fabs (resbot))
555 restiny = kPiBy2Tail1 - resmid - resbot + (kPiBy2Tail2 - resinf);
556 else
557 restiny = kPiBy2Tail1 - (resmid + resbot) + (kPiBy2Tail2 - resinf);
558 p = __FADD( reslow, resmid );
559 q = reslow - p + resmid;
560 reshi = __FADD( res, p );
561 resbot = (res - reshi + p) + (q + restiny);
562
563 ddx.d[0] = __FADD( reshi, resbot );
564 ddx.d[1] = (reshi - ddx.d[0]) + resbot;
565
566 FESETENVD(fenv);
567 return (ddx.f);
568 }
569
570
571 if (fabs (xHead) < kSqrt3By2) { // Pi/6 < |result| < Pi/3
572 //Use 2.*(xHead ,xTail )^2-1 as arg.
573 h = __FMUL( xHead, xHead ); // careful computation of
574 h2 = __FMSUB( xHead, xHead, h ); // xHead*xHead - h; // square of original argument
575 g2 = __FMUL( (2.0*xHead), xTail );
576 g3 = __FMADD( xTail, xTail, __FMSUB( (2.0*xHead), xTail, g2 ) ); // (2.0*xHead)*xTail - g2 + xTail*xTail;
577 t = __FADD( h2 , g2 ); //sum of middle parts
578 sq = __FADD( h, t );
579
580 if (fabs (h2) > fabs (g2)) // More than 107 bits are needed,
581 g4 = h2 - t + g2 + g3; // because the square will be
582 else // subtracted from small constants,
583 g4 = g2 - t + h2 + g3; // causing leading bit cancel-
584 // lations, which must be filled
585 sq2 = __FADD( (h - sq + t), g4 ); // in from the right
586 sq3 = (h - sq + t) - sq2 + g4; // This captures extra l.o. bits
587 temp = __FMSUB( 2.0, sq, 1.0 ); // 2.0*sq - 1.0;
588 temp2 = __FMADD( 2.0, sq2, __FMSUB( 2.0, sq, 1.0 + temp ) ); // 2.0*sq - (1.0 + temp) + 2.0*sq2;
589 arg = __FADD( temp, temp2 );
590 argl = __FMADD( 2.0, sq3, temp - arg + temp2 ); // temp - arg + temp2 + 2.0*sq3;
591 temp = 2.0*arg*argl;
592 argsq = __FMADD( arg, arg, temp ); // arg*arg + temp; // Square of new argument
593 argsq2 = __FMSUB( arg, arg, argsq ) + temp; // arg*arg - argsq + temp;
594 // Compute result as
595 // Pi/4-0.5*asin(arg,argl)
596 // Compute asin with
597 sum = asinPtr[50].Head; // Taylor Series
598
599 for (i = 49;i >= kNterms+2;i--) // First half(approx) terms in
600 sum = __FMADD( sum, argsq, asinPtr[i].Head );// asinPtr[i].Head + sum*argsq; // precison
601
602 sumt = __FMADD( sum, argsq, asinPtr[kNterms+1].Head );// asinPtr[kNterms+1].Head + sum*argsq;
603 suml = __FMADD( sum, argsq, asinPtr[kNterms+1].Head - sumt );// asinPtr[kNterms+1].Head - sumt + sum*argsq;
604 sum = sumt;
605
606 for (i = 1;i <= kNterms;i++) { // remaining terms in quad prec.
607 temp = __FMADD( sum, argsq, asinPtr[kNterms-i+1].Head ); // asinPtr[kNterms-i+1].Head + sum*argsq;
608 /* suml = asinPtr[kNterms-i+1].Head - temp + sum*argsq +
609 (asinPtr[kNterms-i+1].Tail + sum*argsq2 + suml*argsq); */
610 suml = __FMADD( sum, argsq, asinPtr[kNterms-i+1].Head - temp ) +
611 __FMADD( suml, argsq, __FMADD( sum, argsq2, asinPtr[kNterms-i+1].Tail ) );
612 sum = temp;
613 }
614
615 prodlow = __FMADD( suml, argsq, sum*argsq2 ); // suml*argsq + sum*argsq2; // mult. by arg^2
616 prod = __FMADD( sum, argsq, prodlow ); // sum*argsq + prodlow;
617 prodlow = __FMSUB( sum, argsq, prod ) + prodlow;// sum*argsq - prod+prodlow;
618 temp2 = __FMADD( prodlow, arg, prod*argl ); // prodlow*arg + prod*argl; // mult. by arg
619 temp = __FMADD( prod, arg, temp2 ); // prod*arg + temp2;
620 temp2 = __FMSUB( prod, arg, temp ) + temp2; // prod*arg - temp+temp2;
621
622 res = __FADD( arg, temp ); // add arg for asin(arg)
623 reslow = (arg - res + temp); // exact
624 resmid = __FADD( argl, temp2 );
625 restiny = argl - resmid + temp2;
626 p = __FADD( reslow, resmid ); // sum of middle terms
627 q = reslow - p + resmid; // possible residual
628 reshi = __FADD( res, p );
629 resbot = __FADD( (res - reshi + p), (q + restiny) ); // prepare to subtract from pi/4
630 resinf = (res - reshi + p) - resbot + (q + restiny);
631
632 if (xHead>0.0) {
633 temp = kPiBy4; // positive argument
634 temp2 = kPiBy4Tail1; // acos(x)=pi/4-.5 asin(arg,arg1)
635 temp3 = kPiBy4Tail2;
636 }
637 else {
638 temp = k3PiBy4; // negative argument
639 temp2 = k3PiBy4Tail1; // acos(x)=3pi/4 +
640 temp3 = k3PiBy4Tail2; // 0.5 asin(arg,argl)
641 reshi = -reshi;
642 resbot = -resbot;
643 resinf = -resinf;
644 }
645
646 res = __FNMSUB( 0.5, reshi, temp ); // temp - 0.5*reshi;
647 reslow = __FNMSUB( 0.5, reshi, temp - res ) ; // (temp - res - 0.5*reshi);
648 resmid = __FNMSUB( 0.5, resbot, temp2 ); // temp2 - 0.5*resbot;
649
650 if (fabs (temp2) > fabs (0.5*resbot)) {
651 /* restiny = temp2 - resmid - 0.5*resbot + temp3 - 0.5*resinf; */
652 restiny = __FNMSUB( 0.5, resbot, temp2 - resmid ) + __FNMSUB( 0.5, resinf, temp3 );
653 }
654 else {
655 /* restiny = temp2 - (0.5*resbot + resmid) + temp3 - 0.5*resinf; */
656 restiny = temp2 - __FMADD( 0.5, resbot, resmid ) + __FNMSUB( 0.5, resinf, temp3 );
657 }
658
659 p = __FADD( reslow, resmid );
660 q = reslow - p + resmid;
661 reshi = __FADD( res, p );
662 resbot = (res - reshi + p) + (q + restiny);
663
664 ddx.d[0] = __FADD( reshi, resbot );
665 ddx.d[1] = (reshi - ddx.d[0]) + resbot;
666
667 FESETENVD(fenv);
668 return (ddx.f);
669 }
670 else { // "large arguments", .866 < abs(xHead,xTail) <= 1.0
671
672 if ((xHead - 1.0 + xTail) == 0.0) { // for input of 1.0, return zero
673 FESETENVD(fenv);
674 return (long double) 0.0;
675 }
676
677 if ((xHead + 1.0 - xTail) == 0.0) { // for input of -1.0, return Pi
678 ddx.d[0] = kPi;
679 ddx.d[1] = kPiTail1;
680 FESETENVD(fenv);
681 return ddx.f;
682 }
683
684 if (xHead < 0.0) { // Use absolute value of input
685 temp = -xHead; // to work only in the first
686 temp2 = -xTail; // quadrant
687 }
688 else { // For input x > 0.5, use identity
689 temp = xHead; // asin(x) =
690 temp2 = xTail; // pi/2-2asin(sqrt((1-x)/2))
691 }
692
693 h = __FNMSUB( 0.5, temp, 0.5 ); // 0.5 - 0.5*temp; // exact
694 argsq = __FNMSUB( 0.5, temp2, h ); // h - 0.5*temp2;
695 argsq2 = __FNMSUB( 0.5, temp2, h - argsq ); // __FSUB( h, argsq ) - 0.5*temp2; //square of reduced arg, exact
696
697 ddx.f = __sqrtldextra(argsq , argsq2, &sqrtextra);
698
699 sum = asinPtr[25].Head; // Taylor Series
700
701 for (i = 24; i >= kNterms/2+2; i--)
702 sum = __FMADD( sum, argsq, asinPtr[i].Head );// asinPtr[i].Head + sum*argsq; // working precison
703
704 sumt = __FMADD( sum, argsq, asinPtr[kNterms/2+1].Head );// asinPtr[kNterms/2+1].Head + sum*argsq;
705 suml = __FMADD( sum, argsq, asinPtr[kNterms/2+1].Head - sumt );// __FSUB( asinPtr[kNterms/2+1].Head, sumt ) + sum*argsq;
706 sum = sumt;
707
708 for (i = 1; i <= kNterms/2; i++) { // remaining terms in quad prec.
709 temp = __FMADD( sum, argsq, asinPtr[kNterms/2-i+1].Head ); // asinPtr[kNterms/2-i+1].Head + sum*argsq;
710 /* suml = asinPtr[kNterms/2-i+1].Head - temp + sum*argsq +
711 (asinPtr[kNterms/2-i+1].Tail + sum*argsq2 + suml*argsq); */
712 suml = __FMADD( sum, argsq, asinPtr[kNterms/2-i+1].Head - temp ) +
713 __FMADD( suml, argsq, __FMADD( sum, argsq2, asinPtr[kNterms/2-i+1].Tail ) );
714 sum = temp;
715 }
716
717 arg = ddx.d[0];
718 argl = ddx.d[1];
719
720 prodlow = __FMADD( suml, argsq, sum*argsq2 ); // suml*argsq + sum*argsq2; // mult. by arg^2
721 prod = __FMADD( sum, argsq, prodlow ); // sum*argsq + prodlow;
722 prodlow = __FMSUB( sum, argsq, prod ) + prodlow;// sum*argsq - prod+prodlow;
723 temp2 = __FMADD( prodlow, arg, prod*argl ); // prodlow*arg + prod*argl; // multiply by arg
724 temp = __FMADD( prod, arg, temp2 ); // prod*arg + temp2;
725 temp2 = __FMSUB( prod, arg, temp ) + temp2; // prod*arg - temp + temp2;
726
727 res = __FADD( arg, temp ); // add arg for asin(arg)
728 reslow = (arg - res + temp); // exact
729 resmid = __FADD( argl, temp2 );
730 restiny = argl - resmid + temp2 + sqrtextra;
731 p = __FADD( reslow, resmid ); // sum of middle terms
732 q = reslow - p + resmid; // possible residual
733 reshi = __FADD( res, p );
734 resbot = __FADD( (res - reshi + p), (q + restiny) ); //get ready to subtract from pi/4
735
736 if (xHead > 0) { // for first quadrant angles,
737 ddx.d[0] = __FMADD( 2.0, reshi, 2.0*resbot ); // __FADD( 2.0*reshi, 2.0*resbot ); // we are finished
738 ddx.d[1] = __FMADD( 2.0, resbot, __FMSUB( 2.0, reshi, ddx.d[0] ) ); // (2.0*reshi - ddx.d[0]) + 2.0*resbot;
739 FESETENVD(fenv);
740 return (ddx.f);
741 }
742
743 resinf = (res - reshi +p ) - resbot + (q + restiny); // for second quadrant need to
744 res = __FNMSUB( 2.0, reshi, kPi ); // __FSUB( kPi, (2.0*reshi) ); // subtract from Pi.
745 reslow = __FNMSUB( 2.0, reshi, kPi - res ); // (kPi - res - (2.0*reshi));
746 resmid = __FNMSUB( 2.0, resbot, kPiTail1 ); // __FSUB( kPiTail1, (2.0*resbot) );
747
748 if (kPiTail1 > fabs(2.0*resbot))
749 /* restiny = kPiTail1 - resmid - (2.0*resbot) + (kPiTail2 - 2.0*resinf); */
750 restiny = __FNMSUB( 2.0, resbot, kPiTail1 - resmid ) + __FNMSUB( 2.0, resinf, kPiTail2 );
751 else
752 /* restiny = kPiTail1 - (resmid + (2.0*resbot)) + (kPiTail2 - 2.0*resinf); */
753 restiny = kPiTail1 - __FMADD( 2.0, resbot, resmid ) + __FNMSUB( 2.0, resinf, kPiTail2 );
754
755 p = __FADD( reslow, resmid );
756 q = reslow - p + resmid;
757 reshi = __FADD( res, p );
758 resbot = (res - reshi + p) + (q + restiny);
759
760 ddx.d[0] = __FADD( reshi, resbot );
761 ddx.d[1] = (reshi - ddx.d[0]) + resbot;
762
763 FESETENVD(fenv);
764 return (ddx.f);
765 }
766}
767#endif