this repo has no description
1/*
2 * Copyright (c) 2002 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
11 *
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22/*******************************************************************************
23* *
24* File asinacos.c, *
25* Implementation of asin and acos for the PowerPC. *
26* *
27* Copyright � 1991-2001 Apple Computer, Inc. All rights reserved. *
28* *
29* Written and ported by Robert A. Murley (ram) for Mac OS X. *
30* Modified by A. Sazegari, September 2001. *
31* *
32* A MathLib v4 file. *
33* *
34* Based on the trigonometric functions from IBM/Taligent. *
35* *
36* July 20 2001: replaced __setflm with FEGETENVD/FESETENVD. *
37* replaced DblInHex typedef with hexdouble. *
38* September 07 2001: added #ifdef __ppc__. *
39* September 09 2001: added more comments. *
40* September 10 2001: added macros to detect PowerPC and correct compiler. *
41* September 21 2001: added <CoreServices/CoreServices.h> to get to <fp.h> *
42* and <fenv.h>. *
43* September 24 2001: used standard symbols for environment flags. *
44* September 25 2001: added FESETENVD(0.0) at start of acos. *
45* October 08 2001: removed <CoreServices/CoreServices.h>. *
46* changed compiler errors to warnings. *
47* November 06 2001: commented out warning about Intel architectures. *
48* *
49* W A R N I N G: *
50* These routines require a 64-bit double precision IEEE-754 model. *
51* They are written for PowerPC only and are expecting the compiler *
52* to generate the correct sequence of multiply-add fused instructions. *
53* *
54* These routines are not intended for 32-bit Intel architectures. *
55* *
56* A version of gcc higher than 932 is required. *
57* *
58* GCC compiler options: *
59* optimization level 3 (-O3) *
60* -fschedule-insns -finline-functions -funroll-all-loops *
61* *
62*******************************************************************************/
63
64#include "fp_private.h"
65#include "fenv_private.h"
66#include "math.h"
67
68extern const unsigned short SqrtTable[];
69
70// Floating-point constants
71static const double kPiBy2Tail = 6.1232339957367660e-17; // 0x1.1a62633145c07p-54
72static const hexdouble kPiu = HEXDOUBLE(0x400921fb, 0x54442d18);
73
74#define INVERSE_TRIGONOMETRIC_NAN "34"
75
76/****************************************************************************
77
78FUNCTION: double asin (double x)
79
80DESCRIPTION: returns the inverse sine of its argument in the range of
81 -pi/2 to pi/2 (radians).
82
83CALLS: __FABS
84
85EXCEPTIONS raised: INEXACT, UNDERFLOW/INEXACT or INVALID.
86
87ACCURACY: Error is less than an ulp and usually less than half an ulp.
88 Caller's rounding direction is honored.
89
90****************************************************************************/
91static struct kBlock {
92 const double k2ToM26 /* = 1.490116119384765625e-8 */ ; // 0x1.0p-26 */ ;
93 const double kPiBy2 /* = 1.570796326794896619231322 */ ; // 0x1.921fb54442d18p0
94 const double kPiBy2Tail /* = 6.1232339957367660e-17 */ ; // 0x1.1a62633145c07p-54
95 const double kMinNormal /* = 2.2250738585072014e-308 */ ; // 0x1.0p-1022
96
97 const double a14 /* = 0.03085091303188211304259 */ ;
98 const double a13 /* =-0.02425125216179617744614 */ ;
99 const double a12 /* = 0.02273334623573271023373 */ ;
100 const double a11 /* = 0.0002983797813053653120360 */ ;
101 const double a10 /* = 0.008819738782817955152102 */ ;
102 const double a9 /* = 0.008130738583790024803650 */ ;
103 const double a8 /* = 0.009793486386035222577675 */ ;
104 const double a7 /* = 0.01154901189590083099584 */ ;
105 const double a6 /* = 0.01396501522140471126730 */ ;
106 const double a5 /* = 0.01735275722383019335276 */ ;
107 const double a4 /* = 0.02237215928757573539490 */ ;
108 const double a3 /* = 0.03038194444121688698408 */ ;
109 const double a2 /* = 0.04464285714288482921087 */ ;
110 const double a1 /* = 0.07499999999999990640378 */ ;
111 const double a0 /* = 0.1666666666666666667191 */ ;
112 const double aa11 /* = 0.0002983797813053653120360 */ ;
113 const double aa10 /* = 0.008819738782817955152102 */ ;
114 const double aa9 /* = 0.008130738583790024803650 */ ;
115 const double aa8 /* = 0.009793486386035222577675 */ ;
116 const double aa7 /* = 0.01154901189590083099584 */ ;
117 const double aa6 /* = 0.01396501522140471126730 */ ;
118 const double aa5 /* = 0.01735275722383019335276 */ ;
119 const double aa4 /* = 0.02237215928757573539490 */ ;
120 const double aa3 /* = 0.03038194444121688698408 */ ;
121 const double aa2 /* = 0.04464285714288482921087 */ ;
122 const double aa1 /* = 0.07499999999999990640378 */ ;
123 const double aa0 /* = 0.1666666666666666667191 */ ;
124} kBlock =
125{
126/* static const double k2ToM26 = */ 0x1.0p-26, // 1.490116119384765625e-8
127/* static const double kPiBy2 = */ 1.570796326794896619231322, // 0x1.921fb54442d18p0
128/* static const double kPiBy2Tail = */ 6.1232339957367660e-17, // 0x1.1a62633145c07p-54
129/* static const double kMinNormal = */ 0x1.0p-1022, // 2.2250738585072014e-308
130
131/* const static double a14 = */ 0.03085091303188211304259,
132/* const static double a13 = */-0.02425125216179617744614,
133/* const static double a12 = */ 0.02273334623573271023373,
134/* const static double a11 = */ 0.0002983797813053653120360,
135/* const static double a10 = */ 0.008819738782817955152102,
136/* const static double a9 = */ 0.008130738583790024803650,
137/* const static double a8 = */ 0.009793486386035222577675,
138/* const static double a7 = */ 0.01154901189590083099584,
139/* const static double a6 = */ 0.01396501522140471126730,
140/* const static double a5 = */ 0.01735275722383019335276,
141/* const static double a4 = */ 0.02237215928757573539490,
142/* const static double a3 = */ 0.03038194444121688698408,
143/* const static double a2 = */ 0.04464285714288482921087,
144/* const static double a1 = */ 0.07499999999999990640378,
145/* const static double a0 = */ 0.1666666666666666667191,
146/* const static double aa11 = */ 0.0002983797813053653120360,
147/* const static double aa10 = */ 0.008819738782817955152102,
148/* const static double aa9 = */ 0.008130738583790024803650,
149/* const static double aa8 = */ 0.009793486386035222577675,
150/* const static double aa7 = */ 0.01154901189590083099584,
151/* const static double aa6 = */ 0.01396501522140471126730,
152/* const static double aa5 = */ 0.01735275722383019335276,
153/* const static double aa4 = */ 0.02237215928757573539490,
154/* const static double aa3 = */ 0.03038194444121688698408,
155/* const static double aa2 = */ 0.04464285714288482921087,
156/* const static double aa1 = */ 0.07499999999999990640378,
157/* const static double aa0 = */ 0.1666666666666666667191
158};
159
160double asin (double x)
161{
162 hexdouble cD, yD ,gD , fpenv;
163 double absx, x2, x3, x4, temp1, temp2, u, v;
164 double g, y, d, y2, e;
165 register int index;
166 register uint32_t ghead, yhead, rnddir;
167
168 register double FPR_env, FPR_z, FPR_half, FPR_one, FPR_k2ToM26, FPR_cD, FPR_yD, FPR_kMinNormal, FPR_kPiBy2, FPR_kPiBy2Tail;
169 register struct kBlock *pK;
170
171 pK = &kBlock;
172
173 FPR_z = 0.0; FPR_half = 0.5;
174 FPR_k2ToM26 = pK->k2ToM26; FPR_kMinNormal = pK->kMinNormal;
175 FPR_kPiBy2 = pK->kPiBy2; FPR_one = 1.0;
176
177 if (unlikely(x != x)) // NaN argument
178 return ( copysign ( nan ( INVERSE_TRIGONOMETRIC_NAN ), x ) );
179
180 absx = __FABS( x );
181
182 FEGETENVD ( FPR_env ); // save env, set default
183 __ENSURE( FPR_z, FPR_half, FPR_k2ToM26 ); __ENSURE( FPR_one, FPR_kMinNormal, FPR_kPiBy2 ); // ensure register loads
184 FPR_cD = __FNMSUB( FPR_half, absx, FPR_half );
185 FESETENVD ( FPR_z );
186
187 x2 = __FMUL( x, x ); // under/overflows are masked
188 cD.d = FPR_cD;
189
190 if ( absx <= FPR_half )
191 { // |x| <= 0.5
192 if (unlikely( absx < FPR_k2ToM26 ))
193 { // |x| < 2^(-26)
194 if ( absx == FPR_z )
195 { // x == 0.0 case is exact
196 FESETENVD ( FPR_env );
197 return ( x );
198 }
199
200 fpenv.d = FPR_env;
201 __NOOP;
202 __NOOP;
203 __NOOP;
204 rnddir = fpenv.i.lo & FE_ALL_RND; // caller's rounding direction
205 FPR_yD = x;
206
207 // Rounding away from zero: return NextDouble(x,x+x)
208 if (((rnddir == FE_DOWNWARD) && (x < FPR_z)) || ((rnddir == FE_UPWARD) && (x > FPR_z)))
209 {
210 yD.d = FPR_yD;
211 __NOOP;
212 __NOOP;
213 __NOOP;
214 yD.i.lo += 1u;
215 if (yD.i.lo == 0u)
216 yD.i.hi += 1u;
217 __NOOP;
218 __NOOP;
219 __NOOP;
220 FPR_yD = yD.d;
221 absx = __FABS( FPR_yD );
222 }
223
224 FESETENVD ( FPR_env ); // restore caller environment
225 __PROG_INEXACT( FPR_kPiBy2 );
226
227 // Set UNDERFLOW if result is subnormal
228 if (absx < FPR_kMinNormal) // subnormal case
229 __PROG_UF_INEXACT( FPR_kMinNormal ); // set UNDERFLOW
230
231 return ( FPR_yD ); // return small result
232 } // [|x| < 2^(-26)]
233
234 {
235 register double r0, r1, r2, r3, r4, r5, r6, r7, r8, r9;
236 register double r10, r11, r12, r13, r14;
237
238 r14 = pK->a14; r13 = pK->a13;
239 temp1 = __FMADD( r14, x2, r13 ); x4 = __FMUL( x2, x2 );
240
241 x3 = __FMUL( x, x2 ); __NOOP;
242 r11 = pK->a11; r12 = pK->a12;
243
244 r9 = pK->a9; r10 = pK->a10;
245 temp1 = __FMADD( temp1, x4, r11 ); __NOOP;
246
247 temp1 = __FMADD( temp1, x4, r9 ); temp2 = __FMADD( r12, x4, r10 );
248 r7 = pK->a7; r8 = pK->a8;
249
250 r5 = pK->a5; r6 = pK->a6;
251 temp1 = __FMADD( temp1, x4, r7 ); temp2 = __FMADD( temp2, x4, r8 );
252
253 temp1 = __FMADD( temp1, x4, r5 ); temp2 = __FMADD( temp2, x4, r6 );
254 r3 = pK->a3; r4 = pK->a4;
255
256 r1 = pK->a1; r2 = pK->a2;
257 temp1 = __FMADD( temp1, x4, r3 ); temp2 = __FMADD( temp2, x4, r4 );
258
259 temp1 = __FMADD( temp1, x4, r1 ); temp2 = __FMADD( temp2, x4, r2 );
260 r0 = pK->a0; __NOOP;
261
262 temp2 = __FMADD( temp2, x2, temp1 );
263 temp1 = __FMADD( x2, temp2, r0 );
264 }
265
266 FESETENVD ( FPR_env ); // restore caller's mode
267 __PROG_INEXACT( FPR_kPiBy2 );
268
269 return (__FMADD( x3, temp1, x ));
270 } // [|x| <= 0.5]
271
272/****************************************************************************
273 For 0.5 < |x| < 1.0, a polynomial approximation is used to estimate
274 the power series resulting from the identity:
275 ArcSin(x) = pi/2 - 2*ArcSin(Sqrt(0.5(1.0 - x))).
276 The square root is evaluated in-line and concurrently with the
277 evaluation of the ArcSin.
278 ***************************************************************************/
279
280 if (absx < FPR_one)
281 { // |x| < 1.0
282 register double FPR_gD, FPR_two;
283 register double r0, r1, r2, r3, r4, r5, r6, r7, r8, r9;
284 register double r10, r11, r12, r13, r14;
285 register uint32_t GPR_p, GPR_q, GPR_r, GPR_s, GPR_t;
286
287 x2 = FPR_cD; /* fmr */ x4 = __FMUL( FPR_cD, FPR_cD); // Was x2*x2;
288 GPR_t = cD.i.hi; r0 = pK->aa0;
289
290 GPR_s = GPR_t + 0x3ff00000u; __NOOP;
291 r14 = pK->a14; r13 = pK->a13;
292
293 r12 = pK->a12; r11 = pK->a11;
294 gD.i.lo = 0u;
295
296 index = (GPR_t >> 13) & 0xffu;
297 GPR_p = SqrtTable[index]; // 3 instrs issue
298
299 temp1 = __FMADD( r13, x4, r11 ); temp2 = __FMADD( r14, x4, r12 );
300 r10 = pK->a10; r9 = pK->a9;
301
302 GPR_r = ((0xff00u & GPR_p) << 4);
303 ghead = (GPR_s >> 1) & 0x7ff00000u;
304 gD.i.hi = ghead + GPR_r;
305
306 r8 = pK->a8; r7 = pK->aa7;
307 yhead = 0x7fc00000u - ghead;
308
309 yD.i.lo = 0u; GPR_q = ((0xffu & GPR_p) << 12);
310 yD.i.hi = yhead + GPR_q;
311
312 r6 = pK->aa6; r5 = pK->aa5;
313 temp1 = __FMADD( temp1, x4, r9 ); temp2 = __FMADD( temp2, x4, r10 );
314
315 temp1 = __FMADD( temp1, x4, r7 ); temp2 = __FMADD( temp2, x4, r8 );
316 r4 = pK->aa4; r3 = pK->aa3;
317
318 r2 = pK->aa2; r1 = pK->aa1;
319 temp1 = __FMADD( temp1, x4, r5 ); temp2 = __FMADD( temp2, x4, r6 );
320
321 temp1 = __FMADD( temp1, x4, r3 ); temp2 = __FMADD( temp2, x4, r4 );
322 FPR_two = 2.0;
323
324 FPR_gD = gD.d; y = yD.d;
325 temp1 = __FMADD( temp1, x4, r1 ); temp2 = __FMADD( temp2, x4, r2 );
326
327 d = __FNMSUB( FPR_gD, FPR_gD, FPR_cD ); FPR_kPiBy2Tail = pK->kPiBy2Tail;
328 g = __FMADD( y, d, FPR_gD ); y2 = y + y; // 16-bit g
329 e = __FNMSUB( y, g, FPR_half ); d = __FNMSUB( g, g, FPR_cD );
330 y = __FMADD( e, y2, y); temp1 = __FMADD( x2, temp2, temp1 ); // 16-bit y
331 g = __FMADD( y, d, g ); y2 = y + y; // 32-bit g
332
333 e = __FNMSUB( y, g, FPR_half ); d = __FNMSUB( g, g, FPR_cD );
334 y = __FMADD( e, y2, y ); temp1 = __FMADD( x2, temp1, r0 ); // 32-bit y
335 g = __FMADD( y, d, g); // 64-bit Sqrt
336 d = __FNMSUB( g, g, FPR_cD ); u = __FNMSUB( FPR_two, g, FPR_kPiBy2 ); // pi/2 - 2.0*Sqrt (head)
337 d = __FMUL( d, y ); v = __FNMSUB( FPR_two, g, (FPR_kPiBy2 - u) );// tail of 0.5*Sqrt (32 bits)
338 g = __FMUL( g, x2 ); d = __FNMSUB( FPR_half, FPR_kPiBy2Tail, d );
339
340 temp2 = __FNMSUB( FPR_half, v, d );
341 temp2 = __FMADD( g, temp1, temp2 );
342
343 FESETENVD ( FPR_env ); // restore caller's mode
344 __PROG_INEXACT( FPR_kPiBy2 );
345
346 if (x > FPR_z)
347 { // take care of sign of result
348 return __FNMSUB( FPR_two, temp2, u );
349 }
350 else
351 return __FMSUB( FPR_two, temp2, u );
352 } // [|x| < 1.0]
353
354
355 // If |x| == 1.0, return properly signed pi/2.
356 if (absx == FPR_one)
357 {
358 FPR_kPiBy2Tail = pK->kPiBy2Tail;
359
360 FESETENVD (FPR_env); // restore caller's mode
361 if (x > FPR_z)
362 return __FMADD( x, FPR_kPiBy2Tail, FPR_kPiBy2 );
363 else
364 return __FMSUB( x, FPR_kPiBy2Tail, FPR_kPiBy2 );
365 }
366
367 // |x| > 1.0 signals INVALID and returns a NaN
368 fpenv.d = FPR_env;
369 __NOOP;
370 __NOOP;
371 __NOOP;
372 fpenv.i.lo |= SET_INVALID;
373 FESETENVD_GRP (fpenv.d);
374 return ( nan ( INVERSE_TRIGONOMETRIC_NAN ) );
375} // ArcSin(x)
376
377/****************************************************************************
378
379FUNCTION: double ArcCos(double x)
380
381DESCRIPTION: returns the inverse cosine of its argument in the range of
382 0 to pi (radians).
383
384CALLS: __FABS
385
386EXCEPTIONS raised: INEXACT or INVALID.
387
388ACCURACY: Error is less than an ulp and usually less than half an ulp.
389 Caller's rounding direction is honored.
390
391****************************************************************************/
392
393double acos(double x)
394{
395 hexdouble cD, yD, gD, fpenv;
396 double absx, x2, x3, x4, temp1, temp2, s, u, v, w;
397 double g, y, d, y2, e;
398 int index;
399 uint32_t ghead, yhead;
400
401 register double FPR_env, FPR_z, FPR_half, FPR_one, FPR_k2ToM26, FPR_cD, FPR_kMinNormal, FPR_kPiBy2, FPR_kPiBy2Tail;
402 register struct kBlock *pK;
403
404 pK = &kBlock;
405
406 FPR_z = 0.0; FPR_half = 0.5;
407 FPR_k2ToM26 = pK->k2ToM26; FPR_kMinNormal = pK->kMinNormal;
408 FPR_kPiBy2 = pK->kPiBy2; FPR_one = 1.0;
409
410 if (unlikely(x != x)) // NaN
411 return ( copysign ( nan ( INVERSE_TRIGONOMETRIC_NAN ), x ) );
412
413 absx = __FABS(x);
414
415 FEGETENVD ( FPR_env ); // save env, set default
416 __ENSURE( FPR_z, FPR_half, FPR_k2ToM26 ); __ENSURE( FPR_one, FPR_kMinNormal, FPR_kPiBy2 ); // ensure register loads
417 FPR_cD = __FNMSUB( FPR_half, absx, FPR_half );
418 FESETENVD ( FPR_z );
419
420 x2 = __FMUL( x, x ); // under/overflows are masked
421 cD.d = FPR_cD;
422
423 if ( absx <= FPR_half )
424 { // |x| <= 0.5
425 register double r0, r1, r2, r3, r4, r5, r6, r7, r8, r9;
426 register double r10, r11, r12, r13, r14;
427
428 r14 = pK->a14; r13 = pK->a13;
429 temp1 = __FMADD( r14, x2, r13 ); x4 = __FMUL( x2, x2 );
430
431 x3 = __FMUL( x, x2 ); u = FPR_kPiBy2 - x; // pi/2 - x (high-order term)
432 r11 = pK->a11; r12 = pK->a12;
433
434 r9 = pK->a9; r10 = pK->a10;
435 temp1 = __FMADD( temp1, x4, r11 ); __NOOP;
436
437 temp1 = __FMADD( temp1, x4, r9 ); temp2 = __FMADD( r12, x4, r10 );
438 r7 = pK->a7; r8 = pK->a8;
439
440 r5 = pK->a5; r6 = pK->a6;
441 temp1 = __FMADD( temp1, x4, r7 ); temp2 = __FMADD( temp2, x4, r8 );
442
443 temp1 = __FMADD( temp1, x4, r5 ); temp2 = __FMADD( temp2, x4, r6 );
444 r3 = pK->a3; r4 = pK->a4;
445
446 r1 = pK->a1; r2 = pK->a2;
447 temp1 = __FMADD( temp1, x4, r3 ); temp2 = __FMADD( temp2, x4, r4 );
448
449 temp1 = __FMADD( temp1, x4, r1 ); temp2 = __FMADD( temp2, x4, r2 );
450 r0 = pK->a0; FPR_kPiBy2Tail = pK->kPiBy2Tail;
451
452 // finish up ArcCos(x) = pi/2 - ArcSin(x)
453
454 temp1 = __FMADD( x2, temp2, temp1 );
455 v = FPR_kPiBy2 - u - x; // pi/2 - x (tail)
456 temp1 = __FMADD( x2, temp1, pK->a0 );
457 w = __FNMSUB( x3, temp1, (v + FPR_kPiBy2Tail) );// combine low order terms
458
459 FESETENVD ( FPR_env ); // restore caller's mode
460 __PROG_INEXACT( FPR_kPiBy2 );
461
462 return ( u + w );
463 } // [|x| <= 0.5]
464
465/****************************************************************************
466 For 0.5 < |x| < 1.0, a polynomial approximation is used to estimate
467 the power series resulting from the identity:
468 ArcSin(x) = pi/2 - 2*ArcSin(Sqrt(0.5(1.0 - x))).
469 The square root is evaluated in-line and concurrently with the
470 evaluation of the ArcSin.
471 ****************************************************************************/
472 if ( absx < FPR_one )
473 { // 0.5 < |x| < 1.0
474 register double FPR_gD, FPR_two;
475 register double r0, r1, r2, r3, r4, r5, r6, r7, r8, r9;
476 register double r10, r11, r12, r13, r14;
477 register uint32_t GPR_p, GPR_q, GPR_r, GPR_s, GPR_t;
478
479 x2 = FPR_cD; /* fmr */ x4 = __FMUL( x2, x2);
480 GPR_t = cD.i.hi; r0 = pK->aa0;
481
482 GPR_s = GPR_t + 0x3ff00000u; __NOOP;
483 r14 = pK->a14; r13 = pK->a13;
484
485 r12 = pK->a12; r11 = pK->aa11;
486 gD.i.lo = 0u;
487
488 index = (GPR_t >> 13) & 0xffu;
489 GPR_p = SqrtTable[index]; // 3 instrs issue
490
491 temp1 = __FMADD( r13, x4, r11 ); temp2 = __FMADD( r14, x4, r12 );
492 r10 = pK->aa10; r9 = pK->aa9;
493
494 GPR_r = ((0xff00u & GPR_p) << 4);
495 ghead = (GPR_s >> 1) & 0x7ff00000u;
496 gD.i.hi = ghead + GPR_r;
497
498 r8 = pK->aa8; r7 = pK->aa7;
499 yhead = 0x7fc00000u - ghead;
500
501 yD.i.lo = 0u; GPR_q = ((0xffu & GPR_p) << 12);
502 yD.i.hi = yhead + GPR_q;
503
504 r6 = pK->aa6; r5 = pK->aa5;
505 temp1 = __FMADD( temp1, x4, r9 ); temp2 = __FMADD( temp2, x4, r10 );
506
507 temp1 = __FMADD( temp1, x4, r7 ); temp2 = __FMADD( temp2, x4, r8 );
508 r4 = pK->aa4; r3 = pK->aa3;
509
510 r2 = pK->aa2; r1 = pK->aa1;
511 temp1 = __FMADD( temp1, x4, r5 ); temp2 = __FMADD( temp2, x4, r6 );
512
513 temp1 = __FMADD( temp1, x4, r3 ); temp2 = __FMADD( temp2, x4, r4 );
514 FPR_two = 2.0;
515
516 FPR_gD = gD.d; y = yD.d;
517 temp1 = __FMADD( temp1, x4, r1 ); temp2 = __FMADD( temp2, x4, r2 );
518
519 d = __FNMSUB( FPR_gD, FPR_gD, x2 );
520 g = __FMADD( y, d, FPR_gD ); y2 = y + y; // 16-bit g
521 e = __FNMSUB( y, g, FPR_half ); d = __FNMSUB( g, g, x2 );
522 y = __FMADD( e, y2, y); temp1 = __FMADD( x2, temp2, temp1 );// 16-bit y
523 g = __FMADD( y, d, g ); y2 = y + y; // 32-bit g
524 e = __FNMSUB( y, g, FPR_half ); d = __FNMSUB( g, g, x2 );
525 y = __FMADD( e, y2, y ); // 32-bit y
526 g = __FMADD( y, d, g); // 64-bit Sqrt
527 d = __FNMSUB( g, g, x2 ); // tail of 0.5*Sqrt (32 bits)
528 y2 = g + g; // 2*Sqrt
529
530 if (x > FPR_z)
531 { // positive x
532 s = __FMADD( __FMUL( g, x2 ), __FMADD( x2, temp1, pK->aa0), d );// low order terms
533
534 FESETENVD ( FPR_env ); // restore caller's mode
535 __PROG_INEXACT( FPR_kPiBy2 );
536
537 return __FMADD( FPR_two, s, y2); // combine terms
538 } // [positive x]
539 else
540 { // negative x
541 register double q,r,ss,t;
542
543 u = __FMSUB( FPR_two, FPR_kPiBy2, y2 ); // high-order term
544 r = d - kPiBy2Tail;
545 ss = __FMADD( x2, temp1, pK->aa0 );
546 t = __FMUL( g, x2 );
547 v = __FMSUB( FPR_two, FPR_kPiBy2, u ) - y2; // tail correction
548 q = __FNMSUB( FPR_half, v, r );
549 s = __FMADD( ss, t, q ); // collect low-order terms
550
551 FESETENVD ( FPR_env ); // restore caller's mode
552 __PROG_INEXACT( FPR_kPiBy2 );
553
554 return __FNMSUB( FPR_two, s, u); // combine terms
555 } // [negative x]
556 } // [0.5 < |x| < 1.0]
557
558
559 // |x| == 1.0: return exact +0.0 or inexact pi
560 if ( x == FPR_one )
561 {
562 FESETENVD ( FPR_env ); // restore caller's mode
563 return ( FPR_z );
564 }
565
566 if ( x == -1.0 )
567 {
568 FESETENVD ( FPR_env ); // restore caller's mode
569 __PROG_INEXACT( FPR_kPiBy2 );
570 return (kPiu.d + 2.0*kPiBy2Tail);
571 }
572
573 // |x| > 1.0 signals INVALID and returns a NaN
574 fpenv.d = FPR_env;
575 __NOOP;
576 __NOOP;
577 __NOOP;
578 fpenv.i.lo |= SET_INVALID;
579 FESETENVD_GRP ( fpenv.d );
580 return ( nan ( INVERSE_TRIGONOMETRIC_NAN ) );
581} // ArcCos(x)