this repo has no description
1/*
2 * Copyright (c) 2002 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
11 *
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22/*******************************************************************************
23* *
24* tg.c *
25* *
26* Double precision Tangent *
27* *
28* Copyright: � 1993-1997 by Apple Computer, Inc., all rights reserved *
29* *
30* Written and ported by A. Sazegari, started on June 1997. *
31* Modified by Robert Murley, 2001 *
32* *
33* A MathLib v4 file. *
34* *
35* Based on the trigonometric functions from IBM/Taligent. *
36* *
37* Modification history: *
38* November 06 2001: commented out warning about Intel architectures. *
39* July 20 2001: replaced __setflm with FEGETENVD/FESETENVD. *
40* replaced DblInHex typedef with hexdouble. *
41* September 07 2001: added #ifdef __ppc__. *
42* September 19 2001: added macros to detect PowerPC and correct compiler. *
43* September 19 2001: added <CoreServices/CoreServices.h> to get to <fp.h> *
44* and <fenv.h>, removed denormal comments. *
45* September 25 2001: removed more denormal comments. *
46* October 08 2001: removed <CoreServices/CoreServices.h>. *
47* changed compiler errors to warnings. *
48* *
49* W A R N I N G: *
50* This routine requires a 64-bit double precision IEEE-754 model. *
51* They are written for PowerPC only and are expecting the compiler *
52* to generate the correct sequence of multiply-add fused instructions. *
53* *
54* This routine is not intended for 32-bit Intel architectures. *
55* *
56* 08 Oct 01 ram changed compiler errors to warnings. *
57* *
58* GCC compiler options: *
59* optimization level 3 (-O3) *
60* -fschedule-insns -finline-functions -funroll-all-loops *
61* *
62*******************************************************************************/
63
64#include "math.h"
65#include "fenv_private.h"
66#include "fp_private.h"
67
68#define TRIG_NAN "33"
69
70struct tableEntry /* tanatantable entry structure */
71 {
72 double p;
73 double f5;
74 double f4;
75 double f3;
76 double f2;
77 double f1;
78 double f0;
79 };
80
81extern const uint32_t tanatantable[];
82
83static const double kPiBy2 = 1.570796326794896619231322; // 0x1.921fb54442d18p0
84static const double kPiBy2Tail = 6.1232339957367660e-17; // 0x1.1a62633145c07p-54
85static const double k2ByPi = 0.636619772367581382; // 0x1.45f306dc9c883p-1
86static const double kRint = 6.755399441055744e15; // 0x1.8p52
87static const double kRintBig = 2.7021597764222976e16; // 0x1.8p54
88static const double kPiScale42 = 1.38168706094305449e13; // 0x1.921fb54442d17p43
89static const double kPiScale53 = 2.829695100811376e16; // 0x1.921fb54442d18p54
90static const hexdouble infinity = HEXDOUBLE(0x7ff00000, 0x00000000);
91
92/*******************************************************************************
93********************************************************************************
94* T A N *
95********************************************************************************
96*******************************************************************************/
97
98static const double ts11 = 8.898406739539066157565e-3,
99 ts9 = 2.186936821731655951177e-2,
100 ts7 = 5.396825413618260185395e-2,
101 ts5 = 0.1333333333332513155016,
102 ts3 = 0.3333333333333333333333;
103static const double tt7 = 0.05396875452473400572649,
104 tt5 = 0.1333333333304691192925,
105 tt3 = 0.3333333333333333357614;
106static const double k2ToM1023 = 0x1.0p-1023; // 1.112536929253600692e-308
107static const uint32_t indexTable[] =
108 {
109 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
110 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37,
111 38, 39, 40, 41, 42, 43, 44, 45, 47, 48, 49,
112 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
113 61, 62, 63, 64, 65, 66, 68, 69, 70, 71, 72,
114 73, 74, 75, 76, 77, 78, 79, 81, 82, 83, 84,
115 85, 86, 87, 88, 89, 91, 92, 93, 94, 95, 96,
116 97, 98, 100, 101, 102, 103, 104, 105, 107, 108, 109,
117 110, 111, 113, 114, 115, 116, 117, 119, 120, 121, 122,
118 123, 125, 126, 127, 128, 130, 131, 132, 133, 135, 136,
119 137, 139, 140, 141, 142, 144, 145, 146, 148, 149, 150,
120 152, 153, 154, 156, 157, 159, 160, 161, 163, 164, 166,
121 167, 168, 170, 171, 173, 174, 176, 177, 179, 180, 182,
122 183, 185, 186, 188, 189, 191, 192, 194, 196, 197, 199,
123 200, 202, 204, 205, 207, 209, 210, 212, 214, 215, 217,
124 219, 220, 222, 224, 226, 228, 229, 231, 233, 235, 237,
125 238, 240, 242, 244, 246, 248, 250, 252, 254, 256, 256
126 };
127
128static const double kMinNormal = 0x1.0p-1022; // 2.2250738585072014e-308;
129static const double kPiDiv4 = 0.7853980064392090732;
130
131double tan ( double x )
132{
133 register double absOfX, intquo ,arg, argtail, aSquared, aThird, aFourth,
134 temp1, temp2, s, u, v, w, y, result;
135 int tableIndex;
136 uint32_t iz;
137 hexdouble zD, aD, OldEnvironment;
138 struct tableEntry *tablePointer;
139
140 register double FPR_env, FPR_z, FPR_one, FPR_PiDiv4, FPR_256, FPR_piScale;
141 register double FPR_t, FPR_u, FPR_inf, FPR_pi53, FPR_2divPi, FPR_PiDiv2, FPR_PiDiv2Tail, FPR_kRintBig, FPR_kRint;
142 register uint32_t GPR_f;
143 register struct tableEntry *pT;
144
145 FEGETENVD( FPR_env ); // save env, set default
146
147 FPR_z = 0.0; tablePointer = ( struct tableEntry * ) ( tanatantable - ( 16 * 14 ) );
148 absOfX = __FABS( x );
149 FPR_PiDiv4 = kPiDiv4; FPR_256 = 256.0;
150 FPR_kRint = kRint; FPR_one = 1.0;
151 FPR_PiDiv2 = kPiBy2;
152
153 __ENSURE( FPR_PiDiv2, FPR_PiDiv4, FPR_256);
154 __ENSURE( FPR_z, FPR_kRint, FPR_one);
155
156 FESETENVD( FPR_z );
157
158/*******************************************************************************
159* |x| < 0.7853980064392090732 *
160*******************************************************************************/
161
162 if ( absOfX < FPR_PiDiv4 )
163 {
164
165 FPR_t = __FMADD( FPR_256, absOfX, FPR_kRint );
166 aD.d = FPR_t;
167
168 if (unlikely( absOfX == FPR_z ))
169 {
170 FESETENVD( FPR_env ); // restore caller's mode
171 return x; // +0 -0 preserved
172 }
173
174
175/*******************************************************************************
176* |x| < 0.0625 *
177*******************************************************************************/
178
179 if ( aD.i.lo < 16u ) // No Store/Load hazard thanks to the imediately preceding branch
180 {
181 register double FPR_ts3, FPR_ts5, FPR_ts7, FPR_ts9, FPR_ts11, FPR_Min;
182
183 aSquared = __FMUL( absOfX, absOfX ); FPR_Min = kMinNormal;
184 __NOOP;
185
186 FPR_ts11 = ts11; FPR_ts9 = ts9;
187
188 FPR_ts7 = ts7; FPR_ts5 = ts5;
189
190 FPR_ts3 = ts3; aThird = __FMUL( absOfX, aSquared );
191
192 temp1 = __FMADD( FPR_ts11, aSquared, FPR_ts9 );
193
194 temp1 = __FMADD( temp1, aSquared, FPR_ts7 );
195
196 temp1 = __FMADD( temp1, aSquared, FPR_ts5 );
197
198 temp1 = __FMADD( temp1, aSquared, FPR_ts3 );
199
200 if ( x > FPR_z )
201 result = __FMADD( temp1, aThird, absOfX );
202 else
203 {
204 aThird = -aThird;
205 result = __FMSUB( temp1, aThird, absOfX );
206 }
207
208 FESETENVD( FPR_env ); // restore caller's mode
209
210 if ( __FABS ( result ) < FPR_Min )
211 __PROG_UF_INEXACT( FPR_Min );
212 else
213 __PROG_INEXACT( FPR_PiDiv2 );
214
215 return result;
216 }
217
218/*******************************************************************************
219* .0625 <= x < .7853980064392090732. *
220*******************************************************************************/
221 else
222 {
223 register double FPR_tt3, FPR_tt5, FPR_tt7, FPR_sqr, FPR_f0;
224
225 tableIndex = indexTable[aD.i.lo - 16];
226 pT = &(tablePointer[tableIndex]);
227 FPR_f0 = pT->f0;
228
229 w = absOfX - FPR_f0; // w = deltax
230 y = pT->p; // y = Tan from table
231 FPR_tt3 = tt3;
232
233 FPR_tt7 = tt7; FPR_tt5 = tt5;
234
235 FPR_sqr = __FMUL( w, w ); // calculate delta Tan
236
237 temp1 = __FMADD( FPR_tt7, FPR_sqr, FPR_tt5 );
238
239 temp1 = __FMADD( temp1, FPR_sqr, FPR_tt3 );
240
241 temp1 = __FMADD( temp1, __FMUL( FPR_sqr, w ), w );
242
243 v = __FMUL(y, temp1 );
244
245 aSquared = __FMUL( v, v ); aThird = FPR_one + v;
246
247 aFourth = __FMUL( aSquared,aSquared );w = __FMADD( aThird, aSquared, aThird );
248
249 aThird = __FMADD( w, aFourth, w ); temp1 = __FMADD( v, y, temp1 );
250
251 if ( x > FPR_z ) // fix final sign
252 result = __FMADD( temp1, aThird, y );
253 else
254 {
255 aThird = -aThird;
256 result = __FMSUB( temp1, aThird, y );
257 }
258
259 FESETENVD( FPR_env ); // restore caller's mode
260 __PROG_INEXACT( FPR_PiDiv2 );
261
262 return result;
263 }
264 }
265
266 if (unlikely( x != x )) // x is a NaN
267 {
268 FESETENVD( FPR_env ); // restore caller's mode
269 return ( x );
270 }
271
272/*******************************************************************************
273* |x| > �/4 and perhaps |x| > kPiScale42. *
274*******************************************************************************/
275
276 FPR_piScale = kPiScale42; FPR_PiDiv2Tail = kPiBy2Tail;
277 FPR_2divPi = k2ByPi; FPR_pi53 = kPiScale53;
278 FPR_inf = infinity.d; FPR_kRintBig = kRintBig;
279 __ENSURE( FPR_2divPi, FPR_PiDiv2, FPR_piScale );
280
281 if (unlikely( absOfX > FPR_piScale ))
282 { // |x| is huge or infinite
283 if ( absOfX == FPR_inf )
284 { // infinite case is invalid
285 OldEnvironment.d = FPR_env;
286 __NOOP;
287 __NOOP;
288 __NOOP;
289
290 OldEnvironment.i.lo |= SET_INVALID;
291 FESETENVD_GRP( OldEnvironment.d ); // restore caller's mode
292 return ( nan ( TRIG_NAN ) ); // return NaN
293 }
294
295 while ( absOfX > FPR_pi53 )
296 { // loop to reduce x below
297 intquo = __FMUL( x, FPR_2divPi ); // �*2^53 in magnitude
298 FPR_t = __FNMSUB( intquo, FPR_PiDiv2, x );
299 x = __FNMSUB( intquo, FPR_PiDiv2Tail, FPR_t );
300 absOfX = __FABS ( x ) ;
301 }
302
303/*******************************************************************************
304* final reduction of x to magnitude between 0 and 2*�. *
305*******************************************************************************/
306 FPR_t = __FMADD( x, FPR_2divPi, FPR_kRintBig );
307 intquo = FPR_t - FPR_kRintBig;
308 FPR_t = __FNMSUB( intquo, FPR_PiDiv2, x );
309 x = __FNMSUB( intquo, FPR_PiDiv2Tail, FPR_t );
310 absOfX = __FABS( x );
311 }
312
313/*******************************************************************************
314* �/4 < x < �*2^42 *
315*******************************************************************************/
316
317/*******************************************************************************
318* Further argument reduction is probably necessary. A double-double *
319* reduced argument is determined ( arg|argtail ). *
320*******************************************************************************/
321 FPR_t = __FMADD( x, FPR_2divPi, FPR_kRint );
322 intquo = FPR_t - FPR_kRint;
323 zD.d = FPR_t;
324
325 FPR_t = __FNMSUB( intquo, FPR_PiDiv2, x );
326 arg = __FNMSUB( intquo, FPR_PiDiv2Tail, FPR_t );
327
328 absOfX = __FABS( arg ); FPR_t -= arg;
329 GPR_f = zD.i.lo;
330
331 FPR_u = __FMADD( FPR_256, absOfX, FPR_kRint );
332 aD.d = FPR_u;
333 __NOOP;
334 __NOOP;
335
336 argtail = __FNMSUB( intquo, FPR_PiDiv2Tail, FPR_t );
337
338/*******************************************************************************
339* |arg| < .0625. *
340*******************************************************************************/
341
342 if ( aD.i.lo < 16u ) // No Store/Load hazard, NOOPs above push this issue to next cycle.
343 {
344 register double FPR_ts3, FPR_ts5, FPR_ts7, FPR_ts9, FPR_ts11;
345
346 aSquared = __FMUL( absOfX, absOfX );
347 u = absOfX;
348 v = argtail;
349
350 FPR_ts11 = ts11; FPR_ts9 = ts9;
351
352 FPR_ts7 = ts7; FPR_ts5 = ts5;
353
354 FPR_ts3 = ts3; iz = GPR_f & 1u; // quo modulo 2
355
356 temp1 = __FMADD( FPR_ts11, aSquared, FPR_ts9 );
357
358 temp1 = __FMADD( temp1, aSquared, FPR_ts7 );
359
360 temp1 = __FMADD( temp1, aSquared, FPR_ts5 );
361
362 temp1 = __FMADD( temp1, aSquared, FPR_ts3 );
363
364 aThird = __FMUL( aSquared, u );
365
366/*******************************************************************************
367* tangent approximation starts here. *
368*******************************************************************************/
369
370 if ( iz == 0 )
371 {
372 if ( arg > FPR_z )
373 { // positive arg
374 w = __FMADD( temp1, aThird, v );
375 result = ( w + u );
376 }
377 else
378 { // negative arg
379 w = __FNMSUB( temp1, aThird, v );
380 result = ( w - u );
381 }
382
383 FESETENVD( FPR_env ); // restore caller's mode
384 __PROG_INEXACT( FPR_PiDiv2 );
385
386 return result;
387 }
388
389/*******************************************************************************
390* cotangent approximation starts here. *
391*******************************************************************************/
392 else
393 {
394 register double FPR_k2ToM1023;
395
396 FPR_k2ToM1023 = k2ToM1023;
397
398 if ( arg > FPR_z )
399 { // positive argument
400 s = __FMADD( temp1, aThird, v );
401 temp2 = s + u;
402 }
403 else
404 { // negative argument
405 s = __FMSUB( temp1, aThird, v );
406 temp2 = s + u;
407 }
408
409 aFourth = ( u - temp2 ) + s;
410
411 if ( __FABS ( temp2 ) < k2ToM1023 )
412 { // huge result
413 if ( arg > FPR_z )
414 result = ( FPR_one / temp2 );
415 else
416 result = ( FPR_one / ( -temp2 ) );
417
418 FESETENVD( FPR_env ); // restore caller's mode
419 __PROG_INEXACT( FPR_PiDiv2 );
420
421 return result;
422 }
423
424 u = FPR_one / temp2;
425 v = __FNMSUB( u, temp2, FPR_one );
426 temp1 = __FMSUB( aFourth, u, v );
427
428 if ( arg > FPR_z )
429 result = __FMSUB( temp1, u, u );
430 else
431 result = __FNMSUB( temp1, u, u );
432
433 FESETENVD( FPR_env ); // restore caller's mode
434 __PROG_INEXACT( FPR_PiDiv2 );
435
436 return result;
437 }
438 }
439
440/*******************************************************************************
441* The following code covers the case where the reduced argument has *
442* magnitude greater than 0.0625 but less than �/4. *
443*******************************************************************************/
444
445 tableIndex = indexTable [ aD.i.lo - 16 ];
446
447 if ( arg > FPR_z )
448 { // positive argument
449 w = absOfX - tablePointer [tableIndex].f0 + argtail; // argument delta
450 aSquared = __FMUL( w, w );
451 v = absOfX - tablePointer [tableIndex].f0 - w + argtail; // tail of argument delta
452 }
453 else
454 { // negative argument
455 w = absOfX - tablePointer [tableIndex].f0 - argtail; // argument delta
456 aSquared = __FMUL( w, w );
457 v = absOfX - tablePointer [tableIndex].f0 - w - argtail; // tail of argument delta
458 }
459 {
460 register double FPR_tt3, FPR_tt5, FPR_tt7;
461
462 FPR_t = v + w; FPR_u = __FMUL( aSquared, w );
463 y = tablePointer[tableIndex].p; FPR_tt3 = tt3;
464
465 FPR_tt7 = tt7; FPR_tt5 = tt5;
466
467 temp1 = __FMADD( FPR_tt7, aSquared, FPR_tt5 );
468
469 temp1 = __FMADD( temp1, aSquared, FPR_tt3 );
470
471 temp1 = __FMADD( temp1, FPR_u, FPR_t );
472
473 v = __FMUL( y, temp1 ); // Tan( delta )
474 }
475
476/*******************************************************************************
477* polynomial approx of 1/(1-v) *
478*******************************************************************************/
479
480 aSquared = __FMUL( v, v ); iz = GPR_f & 1u; // quo modulo 2
481 aThird = FPR_one + v;
482 aFourth = __FMUL( aSquared, aSquared );
483 w = __FMADD( aThird, aSquared, aThird );
484 aThird = __FMADD( w, aFourth, w ); // aThird = 1/(1-v)
485 s = __FMUL( __FMADD( y, y, FPR_one ), aThird );
486
487 if ( iz == 0 )
488 { // tangent approximation
489 if ( arg > FPR_z )
490 result = __FMADD( temp1, s, y );
491 else
492 {
493 y = -y;
494 result = __FNMSUB( temp1, s, y );
495 }
496
497 FESETENVD( FPR_env ); // restore caller's mode
498 __PROG_INEXACT( FPR_PiDiv2 );
499
500 return result;
501 }
502 else
503 { // cotangent approx
504 w = __FMADD( temp1, s, y );
505 aFourth = __FMADD( temp1, s, ( y - w ) );
506 u = FPR_one / w;
507 v = __FNMSUB( u, w, FPR_one);
508 w = __FMSUB( u, aFourth, v );
509
510 if ( arg > FPR_z )
511 result = __FMSUB( u, w, u );
512 else
513 result = __FNMSUB( u, w, u );
514
515 FESETENVD( FPR_env ); // restore caller's mode
516 __PROG_INEXACT( FPR_PiDiv2 );
517
518 return result;
519 }
520}