this repo has no description
1/*
2 * Copyright (c) 2002 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
11 *
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22/*******************************************************************************
23* *
24* File atan.c, *
25* Function atan. *
26* *
27* Implementation of arctg based on the approximation provided by *
28* Taligent, Inc. who based it on sources from IBM. *
29* *
30* Copyright � 1996-2001 Apple Computer, Inc. All rights reserved. *
31* *
32* Written by A. Sazegari, started on June 1996. *
33* Modified and ported by Robert A. Murley (ram) for Mac OS X. *
34* *
35* A MathLib v4 file. *
36* *
37* The principal values of the inverse tangent function are: *
38* *
39* -�/2 � atan(x) � �/2, -� � x � +� *
40* *
41* August 26 1996: produced a PowerMathLib� version. this routine *
42* does not handle edge cases and does not set the *
43* ieee flags. its computation is suseptible to the *
44* inherited rounding direction. it is assumed that *
45* the rounding direction is set to the ieee default. *
46* October 16 1996: fixed a grave mistake by replacing x2 with xSquared. *
47* April 14 1997: added the environmental controls for mathlib v3. *
48* unlike mathlib v2, v3 will honor the inherited *
49* rounding mode since it is easier to set the flags *
50* computationally. *
51* July 01 1997: this function no longer honors the rounding mode. *
52* corrected an error for nan arguments. *
53* July 20 2001: replaced __setflm with FEGETENVD/FESETENVD. *
54* replaced DblInHex typedef with hexdouble. *
55* September 07 2001: added #ifdef __ppc__. *
56* September 09 2001: added more comments. *
57* September 10 2001: added macros to detect PowerPC and correct compiler. *
58* September 18 2001: added <CoreServices/CoreServices.h> to get to <fp.h> *
59* and <fenv.h>. *
60* October 08 2001: removed <CoreServices/CoreServices.h>. *
61* changed compiler errors to warnings. *
62* November 06 2001: commented out warning about Intel architectures. *
63* *
64* W A R N I N G: *
65* These routines require a 64-bit double precision IEEE-754 model. *
66* They are written for PowerPC only and are expecting the compiler *
67* to generate the correct sequence of multiply-add fused instructions. *
68* *
69* These routines are not intended for 32-bit Intel architectures. *
70* *
71* A version of gcc higher than 932 is required. *
72* *
73* GCC compiler options: *
74* optimization level 3 (-O3) *
75* -fschedule-insns -finline-functions -funroll-all-loops *
76* *
77*******************************************************************************/
78
79#include "fenv_private.h"
80#include "fp_private.h"
81#include "math.h"
82
83extern const unsigned short SqrtTable[];
84
85static const double c13 = 7.6018324085327799e-02; /* 0x1.375efd7d7dcbep-4 */
86static const double c11 = -9.0904243427904791e-02; /* -0x1.745802097294ep-4 */
87static const double c9 = 1.1111109821671356e-01; /* 0x1.c71c6e5103cddp-4 */
88static const double c7 = -1.4285714283952728e-01; /* -0x1.24924923f7566p-3 */
89static const double c5 = 1.9999999999998854e-01; /* 0x1.99999999997fdp-3 */
90static const double c3 = -3.3333333333333330e-01; /* -0x1.5555555555555p-2 */
91
92static const double Rint = 6.755399441055744e15; /* 0x1.8p52 */
93static const double PiOver2 = 1.570796326794896619231322; /* head of �/2 */
94static const double PiOver2Tail = 6.1232339957367660e-17; /* tail of �/2 */
95static const hexdouble Pi = HEXDOUBLE(0x400921fb, 0x54442d18);
96static const double kMinNormal = 0x1.0p-1022; // 2.2250738585072014e-308;
97
98/*******************************************************************************
99********************************************************************************
100* A T A N *
101********************************************************************************
102*******************************************************************************/
103
104struct tableEntry /* tanatantable entry structure */
105 {
106 double p;
107 double f5;
108 double f4;
109 double f3;
110 double f2;
111 double f1;
112 double f0;
113 };
114
115extern const uint32_t tanatantable[];
116
117static const hexdouble Big = HEXDOUBLE(0x434d0297, 0x00000000);
118
119double atan ( double x )
120{
121 hexdouble reducedX;
122 register double fabsOfx, xSquared, xFourth, xThird, temp1, temp2, result, y, z;
123 struct tableEntry *tablePointer;
124
125 register double FPR_env, FPR_z, FPR_half, FPR_one, FPR_256, FPR_kMinNormal;
126 register double FPR_PiDiv2, FPR_PiDiv2Tail, FPR_kRint;
127 register double FPR_c13, FPR_c11, FPR_c9, FPR_c7, FPR_c5, FPR_c3, FPR_r, FPR_t;
128 register struct tableEntry *pT;
129
130 fabsOfx = __FABS ( x );
131
132 if (unlikely(x != x))
133 return x;
134
135 FPR_z = 0.0; FPR_half = 0.5;
136
137 FPR_one = 1.0; FPR_256 = 256.0;
138
139 FPR_PiDiv2 = PiOver2; FPR_PiDiv2Tail = PiOver2Tail;
140
141 FPR_kRint = Rint; FPR_t = Big.d;
142
143/*******************************************************************************
144* initialization of table pointer. *
145*******************************************************************************/
146
147 tablePointer = ( struct tableEntry * ) ( tanatantable - ( 16 * 14 ) );
148
149/*******************************************************************************
150* |x| > 1.0 or NaN. *
151*******************************************************************************/
152
153 FEGETENVD( FPR_env );
154 __ENSURE( FPR_z, FPR_kRint, FPR_256 ); __ENSURE( FPR_half, FPR_one, FPR_PiDiv2 );
155
156 if ( fabsOfx > FPR_one )
157 {
158 __NOOP;
159 y = FPR_one / fabsOfx; // Executes in issue slot 1 hence FPU 1
160 __NOOP;
161
162/*******************************************************************************
163* |x| is nontrivial. *
164*******************************************************************************/
165
166 if (likely( fabsOfx < FPR_t ))
167 {
168 register double yTail, z, resultHead, resultTail;
169
170 xSquared = __FMUL( y, y );
171 FPR_t = 16.0;
172 FPR_r = __FMADD( FPR_256, y, FPR_kRint );
173
174 __NOOP;
175 __NOOP;
176 __NOOP;
177 reducedX.d = FPR_r;
178
179/*******************************************************************************
180* |x| > 16.0 *
181*******************************************************************************/
182 if ( fabsOfx > FPR_t )
183 {
184 xFourth = __FMUL( xSquared, xSquared ); xThird = __FMUL( xSquared, y );
185 FPR_c13 = c13;
186
187 FPR_t = __FNMSUB( fabsOfx, y, FPR_one ); resultHead = FPR_PiDiv2 - y;
188 FPR_c11 = c11;
189
190 FPR_c9 = c9;
191 yTail = __FMUL( y, FPR_t ); resultTail = ( resultHead - FPR_PiDiv2 ) + y;
192
193 FPR_c7 = c7;
194 temp1 = __FMADD( xFourth, FPR_c13, FPR_c9); temp2 = __FMADD( xFourth, FPR_c11, FPR_c7 );
195
196 FPR_c5 = c5; FPR_c3 = c3;
197 temp1 = __FMADD( xFourth, temp1, FPR_c5 ); temp2 = __FMADD( xFourth, temp2, FPR_c3 );
198
199 temp1 = __FMADD( xSquared, temp1, temp2 );
200
201 temp1 = __FNMSUB( xThird, temp1, FPR_PiDiv2Tail ); /* correction for �/2 */
202
203 if ( x > FPR_z ) /* adjust sign of result */
204 result = ( ( ( temp1 - yTail ) - resultTail ) + resultHead );
205 else
206 result = ( ( ( yTail - temp1 ) + resultTail ) - resultHead );
207
208 FESETENVD( FPR_env ); /* restore the environment */
209 __PROG_INEXACT( FPR_PiDiv2 );
210
211 return result;
212 }
213
214/*******************************************************************************
215* 1 <= |x| <= 16 use table-driven approximation for arctg(y = 1/|x|). *
216*******************************************************************************/
217
218 pT = &(tablePointer[reducedX.i.lo]); FPR_t = __FNMSUB( fabsOfx, y, FPR_one );
219
220 FPR_c13 = pT->p; FPR_c11 = pT->f5;
221 yTail = __FMUL( y, FPR_t ); z = y - FPR_c13 + yTail; /* x delta */
222
223 FPR_c9 = pT->f4; FPR_c7 = pT->f3;
224 temp1 = __FMADD( FPR_c11, z, FPR_c9 ); __NOOP;
225
226 temp1 = __FMADD( temp1, z, FPR_c7 ); __NOOP;
227 FPR_c5 = pT->f2; FPR_c3 = pT->f1;
228
229 FPR_t = pT->f0;
230 temp1 = __FMADD( temp1, z, FPR_c5 );
231 temp1 = __FMADD( temp1, z, FPR_c3 );
232
233 resultHead = FPR_PiDiv2 - FPR_t; /* zeroth order term */
234
235 if ( x > FPR_z ) /* adjust for sign of x */
236 result = ( __FNMSUB( z, temp1, FPR_PiDiv2Tail ) + resultHead );
237 else
238 result = ( __FMSUB( z, temp1, FPR_PiDiv2Tail ) - resultHead );
239
240 FESETENVD( FPR_env ); /* restore the environment */
241 __PROG_INEXACT( FPR_PiDiv2 );
242
243 return result;
244 }
245
246/*******************************************************************************
247* |x| is huge, or INF. *
248* For x = INF, then the expression �/2 � �tail would return the round up *
249* or down version of atan if rounding is taken into consideration. *
250* otherwise, just ��/2 would be delivered. *
251*******************************************************************************/
252 else
253 { /* |x| is huge, or INF */
254 FESETENVD( FPR_env ); /* restore the environment */
255 FPR_t = Pi.d;
256 if ( x > FPR_z ) /* positive x returns �/2 */
257 result = __FMADD( FPR_t, FPR_half, FPR_PiDiv2Tail );
258 else /* negative x returns -�/2 */
259 result = ( - FPR_t * FPR_half - FPR_PiDiv2Tail );
260
261 __PROG_INEXACT( FPR_PiDiv2 );
262 return result;
263 }
264 }
265
266
267/*******************************************************************************
268* |x| <= 1.0. *
269*******************************************************************************/
270
271 FPR_t = .0625;
272 reducedX.d = __FMADD( FPR_256, fabsOfx, FPR_kRint );
273 xSquared = __FMUL( x, x );
274
275/*******************************************************************************
276* 1.0/16 < |x| < 1 use table-driven approximation for arctg(x). *
277*******************************************************************************/
278 if ( fabsOfx > FPR_t )
279 {
280 pT = &(tablePointer[reducedX.i.lo]); __NOOP;
281
282 FPR_c13 = pT->p; FPR_c11 = pT->f5;
283 z = fabsOfx - FPR_c13; __NOOP; /* x delta */
284
285 FPR_c9 = pT->f4; FPR_c7 = pT->f3;
286 temp1 = __FMADD( FPR_c11, z, FPR_c9 ); __NOOP;
287
288 temp1 = __FMADD( temp1, z, FPR_c7 ); __NOOP;
289 FPR_c5 = pT->f2; FPR_c3 = pT->f1;
290
291 FPR_t = pT->f0;
292 temp1 = __FMADD( temp1, z, FPR_c5 );
293 temp1 = __FMADD( temp1, z, FPR_c3 );
294
295 if ( x > FPR_z ) /* adjust for sign of x */
296 result = __FMADD( z, temp1, FPR_t );
297 else
298 result = - z * temp1 - FPR_t;
299
300 FESETENVD( FPR_env ); /* restore the environment */
301 __PROG_INEXACT( FPR_PiDiv2 );
302
303 return result;
304 }
305
306/*******************************************************************************
307* |x| <= 1.0/16 fast, simple polynomial approximation. *
308*******************************************************************************/
309
310 if (unlikely( fabsOfx == FPR_z ))
311 {
312 FESETENVD( FPR_env ); /* restore the environment */
313 return x; /* +0 or -0 preserved */
314 }
315
316 xFourth = __FMUL( xSquared, xSquared ); xThird = __FMUL( xSquared, x );
317 FPR_c13 = c13; FPR_c11 = c11;
318
319 FPR_c9 = c9; FPR_c7 = c7;
320 temp1 = __FMADD( xFourth, FPR_c13, FPR_c9); temp2 = __FMADD( xFourth, FPR_c11, FPR_c7 );
321
322 FPR_c5 = c5; FPR_c3 = c3;
323 temp1 = __FMADD( xFourth, temp1, FPR_c5 ); temp2 = __FMADD( xFourth, temp2, FPR_c3 );
324
325 temp1 = __FMADD( xSquared, temp1, temp2 );
326 FPR_kMinNormal = kMinNormal;
327
328 result = __FMADD( xThird, temp1, x );
329
330 FESETENVD( FPR_env ); /* restore the environment */
331 if (likely( __FABS( result ) >= FPR_kMinNormal ))
332 {
333 __PROG_INEXACT( FPR_PiDiv2 );
334 return result;
335 }
336 else
337 {
338 __PROG_UF_INEXACT( FPR_kMinNormal );
339 return result;
340 }
341}
342
343//
344// For atan2(). Mean and lean.
345//
346double atanCore ( double fabsOfx ) // absolute value is passed by caller!
347{
348 hexdouble reducedX;
349 register double xSquared, xFourth, xThird, temp1, temp2, result, z;
350 struct tableEntry *tablePointer;
351
352 register double FPR_z, FPR_256, FPR_kRint;
353 register double FPR_c13, FPR_c11, FPR_c9, FPR_c7, FPR_c5, FPR_c3, FPR_r, FPR_s, FPR_t;
354 register struct tableEntry *pT;
355
356
357 FPR_256 = 256.0; FPR_kRint = Rint;
358
359 FPR_r = __FMADD( FPR_256, fabsOfx, FPR_kRint );
360 reducedX.d = FPR_r;
361
362 FPR_s = .0625; FPR_z = 0.0;
363
364/*******************************************************************************
365* initialization of table pointer. *
366*******************************************************************************/
367
368 tablePointer = ( struct tableEntry * ) ( tanatantable - ( 16 * 14 ) );
369 xSquared = __FMUL( fabsOfx, fabsOfx ); __ENSURE( FPR_z, FPR_z, FPR_s );
370
371/*******************************************************************************
372* |x| <= 1.0. *
373*******************************************************************************/
374
375/*******************************************************************************
376* 1.0/16 < |x| < 1 use table-driven approximation for arctg(x). *
377*******************************************************************************/
378
379 if ( fabsOfx > FPR_s )
380 {
381 pT = &(tablePointer[reducedX.i.lo]);
382
383 FPR_c13 = pT->p; FPR_c11 = pT->f5;
384 z = fabsOfx - FPR_c13; __NOOP; /* x delta */
385
386 FPR_c9 = pT->f4; FPR_c7 = pT->f3;
387 temp1 = __FMADD( FPR_c11, z, FPR_c9 ); __NOOP;
388
389 temp1 = __FMADD( temp1, z, FPR_c7 ); __NOOP;
390 FPR_c5 = pT->f2; FPR_c3 = pT->f1;
391
392 FPR_t = pT->f0;
393 temp1 = __FMADD( temp1, z, FPR_c5 );
394 temp1 = __FMADD( temp1, z, FPR_c3 );
395
396 result = __FMADD( z, temp1, FPR_t );
397
398 return result;
399 }
400
401/*******************************************************************************
402* |x| <= 1.0/16 fast, simple polynomial approximation. *
403*******************************************************************************/
404
405 if (unlikely( fabsOfx == FPR_z ))
406 return fabsOfx;
407
408 xFourth = __FMUL( xSquared, xSquared ); xThird = __FMUL( xSquared, fabsOfx );
409 FPR_c13 = c13; FPR_c11 = c11;
410
411 FPR_c9 = c9; FPR_c7 = c7;
412 temp1 = __FMADD( xFourth, FPR_c13, FPR_c9); temp2 = __FMADD( xFourth, FPR_c11, FPR_c7 );
413
414 FPR_c5 = c5; FPR_c3 = c3;
415 temp1 = __FMADD( xFourth, temp1, FPR_c5 ); temp2 = __FMADD( xFourth, temp2, FPR_c3 );
416
417 temp1 = __FMADD( xSquared, temp1, temp2 );
418
419 result = __FMADD( xThird, temp1, fabsOfx );
420
421 return result;
422}
423
424double atanCoreInv ( double fabsOfx ) // absolute value is passed by caller!
425{
426 hexdouble reducedX;
427 register double xSquared, xFourth, xThird, temp1, temp2, result, y;
428 struct tableEntry *tablePointer;
429
430 register double FPR_z, FPR_half, FPR_one, FPR_256;
431 register double FPR_PiDiv2, FPR_PiDiv2Tail, FPR_kRint;
432 register double FPR_c13, FPR_c11, FPR_c9, FPR_c7, FPR_c5, FPR_c3, FPR_r, FPR_s, FPR_t;
433 register struct tableEntry *pT;
434
435 FPR_s = Big.d; y = 1.0 / fabsOfx; // slot 1 hence fpu1
436
437 FPR_z = 0.0; FPR_half = 0.5;
438
439 FPR_one = 1.0; FPR_256 = 256.0;
440
441 FPR_PiDiv2 = PiOver2; FPR_PiDiv2Tail = PiOver2Tail;
442
443 __ENSURE( FPR_half, FPR_one, FPR_PiDiv2 ); //slot 0 hence fpu0
444 FPR_kRint = Rint;
445 __ENSURE( FPR_z, FPR_kRint, FPR_256 ); //slot 3 hence fpu0
446
447
448/*******************************************************************************
449* initialization of table pointer. *
450*******************************************************************************/
451
452 tablePointer = ( struct tableEntry * ) ( tanatantable - ( 16 * 14 ) );
453
454/*******************************************************************************
455* |x| > 1.0 or NaN. *
456*******************************************************************************/
457 {
458
459/*******************************************************************************
460* |x| is nontrivial. *
461*******************************************************************************/
462
463 if (likely( fabsOfx < FPR_s ))
464 {
465 register double yTail, z, resultHead, resultTail;
466
467 FPR_r = __FMADD( FPR_256, y, FPR_kRint );
468 FPR_s = 16.0;
469 xSquared = __FMUL( y, y );
470
471 __NOOP;
472 __NOOP;
473 __NOOP;
474 reducedX.d = FPR_r;
475
476/*******************************************************************************
477* |x| > 16.0 *
478*******************************************************************************/
479 if ( fabsOfx > FPR_s )
480 {
481 xFourth = __FMUL( xSquared, xSquared ); xThird = __FMUL( xSquared, y );
482 FPR_c13 = c13;
483
484 FPR_t = __FNMSUB( fabsOfx, y, FPR_one ); resultHead = FPR_PiDiv2 - y;
485 FPR_c11 = c11;
486
487 FPR_c9 = c9;
488 yTail = __FMUL( y, FPR_t ); resultTail = ( resultHead - FPR_PiDiv2 ) + y;
489
490 FPR_c7 = c7;
491 temp1 = __FMADD( xFourth, FPR_c13, FPR_c9); temp2 = __FMADD( xFourth, FPR_c11, FPR_c7 );
492
493 FPR_c5 = c5; FPR_c3 = c3;
494 temp1 = __FMADD( xFourth, temp1, FPR_c5 ); temp2 = __FMADD( xFourth, temp2, FPR_c3 );
495
496 temp1 = __FMADD( xSquared, temp1, temp2 );
497
498 temp1 = __FNMSUB( xThird, temp1, FPR_PiDiv2Tail ); /* correction for �/2 */
499
500 result = ( ( ( temp1 - yTail ) - resultTail ) + resultHead );
501
502 return result;
503 }
504
505/*******************************************************************************
506* 1 <= |x| <= 16 use table-driven approximation for arctg(y = 1/|x|). *
507*******************************************************************************/
508
509 pT = &(tablePointer[reducedX.i.lo]); FPR_t = __FNMSUB( fabsOfx, y, FPR_one );
510
511 FPR_c13 = pT->p; FPR_c11 = pT->f5;
512 yTail = __FMUL( y, FPR_t ); z = y - FPR_c13 + yTail; /* x delta */
513
514 FPR_c9 = pT->f4; FPR_c7 = pT->f3;
515 temp1 = __FMADD( FPR_c11, z, FPR_c9 ); __NOOP;
516
517 temp1 = __FMADD( temp1, z, FPR_c7 ); __NOOP;
518 FPR_c5 = pT->f2; FPR_c3 = pT->f1;
519
520 FPR_t = pT->f0;
521 temp1 = __FMADD( temp1, z, FPR_c5 );
522 temp1 = __FMADD( temp1, z, FPR_c3 );
523
524 resultHead = FPR_PiDiv2 - FPR_t; /* zeroth order term */
525
526 result = ( __FNMSUB( z, temp1, FPR_PiDiv2Tail ) + resultHead );
527
528 return result;
529 }
530
531/*******************************************************************************
532* |x| is huge, INF, or NaN. *
533* For x = INF, then the expression �/2 � �tail would return the round up *
534* or down version of atan if rounding is taken into consideration. *
535* otherwise, just ��/2 would be delivered. *
536*******************************************************************************/
537 else
538 { /* |x| is huge, INF, or NaN */
539 if ( fabsOfx != fabsOfx ) /* NaN argument is returned */
540 result = fabsOfx;
541 else
542 {
543 /* positive x returns �/2 */
544 FPR_t = Pi.d;
545 result = __FMADD( FPR_t, FPR_half, FPR_PiDiv2Tail );
546 }
547 return result;
548 }
549 }
550}