this repo has no description
1/*
2 * Copyright (c) 2002 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
11 *
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22/*******************************************************************************
23* *
24* FastSinCos.c *
25* *
26* Double precision Sine and Cosine. *
27* *
28* Copyright � 1997-2001 by Apple Computer, Inc. All rights reserved. *
29* *
30* Written by A. Sazegari, started on June 1997. *
31* Modified and ported by Robert A. Murley (ram) for Mac OS X. *
32* *
33* A MathLib v4 file. *
34* *
35* Based on the trigonometric functions from IBM/Taligent. *
36* *
37* November 06 2001: commented out warning about Intel architectures. *
38* July 20 2001: replaced __setflm with FEGETENVD/FESETENVD. *
39* replaced DblInHex typedef with hexdouble. *
40* September 07 2001: added #ifdef __ppc__. *
41* September 09 2001: added more comments. *
42* September 10 2001: added macros to detect PowerPC and correct compiler. *
43* September 18 2001: added <CoreServices/CoreServices.h> to get to <fp.h> *
44* and <fenv.h>, removed denormal comments. *
45* October 08 2001: removed <CoreServices/CoreServices.h>. *
46* changed compiler errors to warnings. *
47* *
48* These routines have a long double (107-bits) argument reduction to *
49* better match their long double counterpart. *
50* *
51* W A R N I N G: *
52* These routines require a 64-bit double precision IEEE-754 model. *
53* They are written for PowerPC only and are expecting the compiler *
54* to generate the correct sequence of multiply-add fused instructions. *
55* *
56* These routines are not intended for 32-bit Intel architectures. *
57* *
58* A version of gcc higher than 932 is required. *
59* *
60* GCC compiler options: *
61* optimization level 3 (-O3) *
62* -fschedule-insns -finline-functions -funroll-all-loops *
63* *
64*******************************************************************************/
65
66#include "fenv_private.h"
67#include "fp_private.h"
68#include "math.h"
69
70#define TRIG_NAN "33"
71
72/*******************************************************************************
73* Floating-point constants. *
74*******************************************************************************/
75
76static const double kPiScale42 = 1.38168706094305449e13; // 0x1.921fb54442d17p43
77static const double kPiScale53 = 2.829695100811376e16; // 0x1.921fb54442d18p54
78static const double piOver4 = 0.785398163397448390; // 0x1.921fb54442d19p-1
79static const double piOver2 = 1.570796326794896619231322; // 0x1.921fb54442d18p0
80static const double piOver2Tail = 6.1232339957367660e-17; // 0x1.1a62633145c07p-54
81static const double twoOverPi = 0.636619772367581382; // 0x1.45f306dc9c883p-1
82//static const double k2ToM26 = 1.490116119384765625e-8; // 0x1.0p-26;
83static const double kMinNormal = 0x1.0p-1022; // 2.2250738585072014e-308;
84static const double kRintBig = 2.7021597764222976e16; // 0x1.8p54
85static const double kRint = 6.755399441055744e15; // 0x1.8p52
86static const hexdouble infinity = HEXDOUBLE(0x7ff00000, 0x00000000);
87
88/*******************************************************************************
89* Approximation coefficients. *
90*******************************************************************************/
91
92static const double s13 = 1.5868926979889205164e-10; // 1.0/13!
93static const double s11 = -2.5050225177523807003e-8; // -1.0/11!
94static const double s9 = 2.7557309793219876880e-6; // 1.0/9!
95static const double s7 = -1.9841269816180999116e-4; // -1.0/7!
96static const double s5 = 8.3333333332992771264e-3; // 1.0/5!
97static const double s3 = -0.16666666666666463126; // 1.0/3!
98static const double c14 = -1.138218794258068723867e-11; // -1.0/14!
99static const double c12 = 2.087614008917893178252e-9; // 1.0/12!
100static const double c10 = -2.755731724204127572108e-7; // -1.0/10!
101static const double c8 = 2.480158729870839541888e-5; // 1.0/8!
102static const double c6 = -1.388888888888735934799e-3; // -1.0/6!
103static const double c4 = 4.166666666666666534980e-2; // 1.0/4!
104static const double c2 = -.5; // -1.0/2!
105
106double sin ( double x )
107{
108 register double absOfX, intquo, arg, argtail, xSquared, xThird, xFourth, temp1, temp2, result;
109 register uint32_t ltable;
110 hexdouble z, OldEnvironment;
111
112 register double FPR_env, FPR_z, FPR_Min, FPR_pi4, FPR_piScale;
113 register double FPR_t, FPR_inf, FPR_pi53, FPR_2divPi, FPR_PiDiv2, FPR_PiDiv2Tail, FPR_kRintBig, FPR_kRint;
114 register uint32_t GPR_f;
115
116 FEGETENVD( FPR_env ); // save env, set default
117 FPR_z = 0.0;
118
119 FPR_pi4 = piOver4;
120 absOfX = __FABS ( x ); __ENSURE( FPR_z, FPR_z, FPR_pi4 ); // ensure z and pi4 are in registers
121
122 FESETENVD( FPR_z );
123
124 if ( absOfX < FPR_pi4 ) // |x| < �/4
125 {
126 if (likely( absOfX != FPR_z ))
127 {
128 register double FPR_s3, FPR_s5, FPR_s7, FPR_s9, FPR_s11, FPR_s13;
129
130/*******************************************************************************
131* at this point, x is normal with magnitude between 0 and �/4. *
132*******************************************************************************/
133 // sin polynomial approximation
134
135 xSquared = __FMUL( x, x );
136 FPR_Min = kMinNormal;
137 __ENSURE( FPR_z, FPR_z, FPR_Min ); // ensure FPR_Min is loaded into register
138
139 FPR_s9 = s9; FPR_s13 = s13;
140
141 FPR_s7 = s7; FPR_s11 = s11;
142
143 xFourth = __FMUL( xSquared, xSquared ); xThird = __FMUL( x, xSquared );
144 FPR_s5 = s5;
145
146 FPR_s3 = s3;
147 temp1 = __FMADD( s13, xFourth, s9 ); temp2 = __FMADD( s11, xFourth, s7 );
148
149 temp1 = __FMADD( temp1, xFourth, s5 ); temp2 = __FMADD( temp2, xFourth, s3 );
150
151 temp1 = __FMADD( temp1, xSquared, temp2 ); result = __FMADD( temp1, xThird, x );
152
153 FESETENVD( FPR_env ); // restore caller's mode
154
155 if (unlikely( __FABS( result ) < FPR_Min ))
156 __PROG_UF_INEXACT( FPR_Min );
157 else
158 __PROG_INEXACT( FPR_pi4 );
159
160 return ( result );
161 }
162 else
163 {
164 FESETENVD( FPR_env ); // restore caller's mode
165 return x; // +0 -0 preserved
166 }
167 }
168
169
170 if (unlikely( x != x )) // x is a NaN
171 {
172 FESETENVD( FPR_env ); // restore caller's mode
173 return ( x );
174 }
175
176/*******************************************************************************
177* x has magnitude > �/4. *
178*******************************************************************************/
179 FPR_piScale = kPiScale42;
180 FPR_piScale = __FMADD( FPR_z, FPR_z, FPR_piScale );
181 FPR_inf = infinity.d; FPR_pi53 = kPiScale53;
182 FPR_2divPi = twoOverPi; FPR_PiDiv2 = piOver2;
183 FPR_PiDiv2Tail = piOver2Tail; FPR_kRintBig = kRintBig;
184 FPR_kRint = kRint;
185
186 if (unlikely( absOfX > FPR_piScale ))
187 {
188/*******************************************************************************
189* |x| is huge or infinite. *
190*******************************************************************************/
191 if ( absOfX == FPR_inf )
192 { // infinite case is invalid
193 OldEnvironment.d = FPR_env;
194 __NOOP;
195 __NOOP;
196 __NOOP;
197 OldEnvironment.i.lo |= SET_INVALID;
198 FESETENVD_GRP( OldEnvironment.d ); // restore caller's mode
199 return ( nan ( TRIG_NAN ) ); // return NaN
200 }
201
202 while ( absOfX > FPR_pi53 )
203 { // loop to reduce x below
204 intquo = __FMUL( x, FPR_2divPi ); // �*2^53 in magnitude
205 FPR_t = __FNMSUB( intquo, FPR_PiDiv2, x );
206 x = __FNMSUB( intquo, FPR_PiDiv2Tail, FPR_t );
207 absOfX = __FABS ( x ) ;
208 }
209
210/*******************************************************************************
211* final reduction of x to magnitude between 0 and 2*�. *
212*******************************************************************************/
213 FPR_t = __FMADD( x, FPR_2divPi, FPR_kRintBig );
214 intquo = FPR_t - FPR_kRintBig;
215 FPR_t = __FNMSUB( intquo, FPR_PiDiv2, x );
216 x = __FNMSUB( intquo, FPR_PiDiv2Tail, FPR_t );
217 absOfX = __FABS( x );
218 }
219
220/*******************************************************************************
221* |x| < pi*2^42: further reduction is probably necessary. A double-double *
222* reduced argument is determined ( arg:argtail ) . It is possible that x *
223* has been reduced below pi/4 in magnitude, but x is definitely nonzero *
224* and safely in the normal range. *
225*******************************************************************************/
226 // find integer quotient of x/(�/2)
227 FPR_t = __FMADD( x, FPR_2divPi, FPR_kRint ); intquo = FPR_t - FPR_kRint;
228 z.d = FPR_t;
229
230 FPR_t = __FNMSUB( intquo, FPR_PiDiv2, x ); arg = __FNMSUB( intquo, FPR_PiDiv2Tail, FPR_t );
231 xSquared = __FMUL( arg, arg ); FPR_t -= arg;
232
233 GPR_f = z.i.lo;
234 xFourth = __FMUL( xSquared, xSquared ); argtail = __FNMSUB( intquo, FPR_PiDiv2Tail, FPR_t );
235
236/*******************************************************************************
237* multiple of �/2 ( mod 4) determines approx used and sign of result. *
238*******************************************************************************/
239
240 ltable = GPR_f & FE_ALL_RND;
241
242 if ( ltable & 0x1u )
243 { // argument closest to ��/2
244/*******************************************************************************
245* use cosine approximation. *
246*******************************************************************************/
247 register double FPR_c2, FPR_c4, FPR_c6, FPR_c8, FPR_c10, FPR_c12, FPR_c14;
248
249 FPR_c10 = c10; FPR_c14 = c14;
250 FPR_c8 = c8; FPR_c12 = c12;
251
252 temp1 = __FMADD( FPR_c14, xFourth, FPR_c10 ); temp2 = __FMADD( FPR_c12, xFourth, FPR_c8);
253 FPR_c6 = c6;
254
255 FPR_c4 = c4;
256 temp1 = __FMADD( temp1, xFourth, FPR_c6 ); temp2 = __FMADD( temp2, xFourth, FPR_c4);
257
258 FPR_c2 = c2;
259 temp1 = __FMADD( temp1, xFourth, FPR_c2 );
260
261 temp1 = __FMADD( xSquared, temp2, temp1 );
262 temp1 = __FMSUB( arg, temp1, argtail ); // second-order correction
263
264 if ( ltable < 2 ) // adjust sign of result
265 result = __FMADD( arg, temp1, 1.0 ); // positive
266 else
267 {
268 arg = - arg;
269 result =__FMSUB( arg, temp1, 1.0 ); // negative
270 }
271 FESETENVD( FPR_env ); // restore caller's mode
272 __PROG_INEXACT( FPR_PiDiv2 );
273 }
274 else
275 {
276/*******************************************************************************
277* use sine approximation. *
278*******************************************************************************/
279 register double FPR_s3, FPR_s5, FPR_s7, FPR_s9, FPR_s11, FPR_s13;
280
281 FPR_s9 = s9; FPR_s13 = s13;
282 FPR_s7 = s7; FPR_s11 = s11;
283
284 temp1 = __FMADD( FPR_s13, xFourth, FPR_s9 );temp2 = __FMADD( FPR_s11, xFourth, FPR_s7 );
285 FPR_s5 = s5;
286
287 FPR_s3 = s3;
288 temp1 = __FMADD( temp1, xFourth, FPR_s5 ); temp2 = __FMADD( temp2, xFourth, FPR_s3 );
289
290 temp1 = __FMADD( temp1, xSquared, temp2 ); xThird = __FMUL( arg, xSquared );
291
292 temp1 = __FMADD( temp1, xThird, argtail ); // second-order correction
293
294 if ( ltable < 2 ) // adjust sign of final result
295 result = arg + temp1 ; // positive
296 else
297 {
298 arg = - arg;
299 result = arg - temp1; // negative
300 }
301
302 FESETENVD( FPR_env ); // restore caller's mode
303 __PROG_INEXACT( FPR_PiDiv2 );
304 }
305
306 return ( result ) ;
307}
308
309/*******************************************************************************
310* Cosine section. *
311*******************************************************************************/
312double cos ( double x )
313{
314 register double absOfX, intquo, arg, argtail, xSquared, xThird, xFourth,
315 temp1, temp2, result;
316 register uint32_t iquad;
317 hexdouble z, OldEnvironment;
318
319 register double FPR_env, FPR_z, FPR_pi4, FPR_piScale;
320 register double FPR_t, FPR_inf, FPR_pi53, FPR_2divPi, FPR_PiDiv2, FPR_PiDiv2Tail, FPR_kRintBig, FPR_kRint;
321 register uint32_t GPR_f;
322
323 FEGETENVD( FPR_env ); // save env, set default
324 FPR_z = 0.0;
325
326 FPR_pi4 = piOver4;
327 absOfX = __FABS ( x ); __ENSURE( FPR_z, FPR_z, FPR_pi4 ); // ensure z and pi4 are in registers
328
329 FESETENVD( FPR_z );
330
331 if ( absOfX < FPR_pi4 ) // |x| < �/4
332 {
333 if (likely( absOfX != FPR_z ))
334 {
335 // cos polynomial approximation
336 register double FPR_c2, FPR_c4, FPR_c6, FPR_c8, FPR_c10, FPR_c12, FPR_c14, FPR_One;
337
338 xSquared = __FMUL( x, x );
339 FPR_One = 1.0;
340 __ENSURE( FPR_z, FPR_z, FPR_One );
341
342 FPR_c10 = c10; FPR_c14 = c14;
343
344 FPR_c8 = c8; FPR_c12 = c12;
345
346 FPR_c6 = c6; FPR_c4 = c4;
347
348 xFourth = __FMUL( xSquared, xSquared );
349 __NOOP;
350 FPR_c2 = c2;
351
352 temp1 = __FMADD( FPR_c14, xFourth, FPR_c10 ); temp2 = __FMADD( FPR_c12, xFourth, FPR_c8);
353 temp1 = __FMADD( temp1, xFourth, FPR_c6 ); temp2 = __FMADD( temp2, xFourth, FPR_c4);
354
355 temp1 = __FMADD( temp1, xFourth, FPR_c2 ); temp1 = __FMADD( xSquared, temp2, temp1 );
356 result = __FMADD( xSquared, temp1, FPR_One );
357
358 FESETENVD( FPR_env ); // restore caller's mode
359 __PROG_INEXACT( FPR_pi4 );
360
361 return ( result );
362 }
363 else
364 {
365 FESETENVD( FPR_env ); // restore caller's mode
366 return 1.0;
367 }
368 }
369
370 if (unlikely( x != x )) // x is a NaN
371 {
372 FESETENVD( FPR_env ); // restore caller's mode
373 return ( x );
374 }
375
376/*******************************************************************************
377* x has magnitude > �/4. *
378*******************************************************************************/
379 FPR_piScale = kPiScale42;
380 FPR_piScale = __FMADD( FPR_z, FPR_z, FPR_piScale );
381 FPR_inf = infinity.d; FPR_pi53 = kPiScale53;
382 FPR_2divPi = twoOverPi; FPR_PiDiv2 = piOver2;
383 FPR_PiDiv2Tail = piOver2Tail; FPR_kRintBig = kRintBig;
384 FPR_kRint = kRint;
385
386 if (unlikely( absOfX > FPR_piScale ))
387
388/*******************************************************************************
389* |x| is huge or infinite. *
390*******************************************************************************/
391 {
392 if ( absOfX == infinity.d )
393 { // infinite case is invalid
394 OldEnvironment.d = FPR_env;
395 __NOOP;
396 __NOOP;
397 __NOOP;
398 OldEnvironment.i.lo |= SET_INVALID;
399 FESETENVD_GRP( OldEnvironment.d ); // restore caller's mode
400 return ( nan ( TRIG_NAN ) ); // return NaN
401 }
402
403 while ( absOfX > FPR_pi53 )
404 { // loop to reduce x below
405 intquo = __FMUL( x, FPR_2divPi ); // �*2^53 in magnitude
406 FPR_t = __FNMSUB( intquo, FPR_PiDiv2, x );
407 x = __FNMSUB( intquo, FPR_PiDiv2Tail, FPR_t );
408 absOfX = __FABS ( x ) ;
409 }
410
411/*******************************************************************************
412* final reduction of x to magnitude between 0 and 2*�. *
413*******************************************************************************/
414 FPR_t = __FMADD( x, FPR_2divPi, FPR_kRintBig );
415 intquo = FPR_t - FPR_kRintBig;
416 FPR_t = __FNMSUB( intquo, FPR_PiDiv2, x );
417 x = __FNMSUB( intquo, FPR_PiDiv2Tail, FPR_t );
418 absOfX = __FABS( x );
419 }
420
421/*******************************************************************************
422* |x| < pi*2^42: further reduction is probably necessary. A double-double *
423* reduced argument is determined ( arg:argtail ) . It is possible that x *
424* has been reduced below pi/4 in magnitude, but x is definitely nonzero *
425* and safely in the normal range. *
426*******************************************************************************/
427
428 // find integer quotient of x/(�/2)
429 FPR_t = __FMADD( x, FPR_2divPi, FPR_kRint );
430
431 intquo = FPR_t - FPR_kRint;
432 z.d = FPR_t;
433
434 FPR_t = __FNMSUB( intquo, FPR_PiDiv2, x );
435
436 arg = __FNMSUB( intquo, FPR_PiDiv2Tail, FPR_t );
437
438 xSquared = __FMUL( arg, arg ); FPR_t -= arg;
439 GPR_f = z.i.lo;
440
441 xFourth = __FMUL( xSquared, xSquared ); argtail = __FNMSUB( intquo, FPR_PiDiv2Tail, FPR_t );
442
443 iquad = (GPR_f + 1 ) & FE_ALL_RND; // iquad = int multiple mod 4
444
445/*******************************************************************************
446* multiple of �/2 ( mod 4) determines approx used and sign of result. *
447*******************************************************************************/
448
449 if ( iquad & 0x1u)
450 { // arg closest to 0 or �
451/*******************************************************************************
452* use cosine approximation. *
453*******************************************************************************/
454 register double FPR_c2, FPR_c4, FPR_c6, FPR_c8, FPR_c10, FPR_c12, FPR_c14;
455
456 FPR_c10 = c10; FPR_c14 = c14;
457 FPR_c8 = c8; FPR_c12 = c12;
458
459 temp1 = __FMADD( FPR_c14, xFourth, FPR_c10 ); temp2 = __FMADD( FPR_c12, xFourth, FPR_c8);
460 FPR_c6 = c6;
461
462 FPR_c4 = c4;
463 temp1 = __FMADD( temp1, xFourth, FPR_c6 ); temp2 = __FMADD( temp2, xFourth, FPR_c4);
464
465 FPR_c2 = c2;
466 temp1 = __FMADD( temp1, xFourth, FPR_c2 ); temp1 = __FMADD( xSquared, temp2, temp1 );
467
468 temp1 = __FMSUB( arg, temp1, argtail ); // second-order correction
469 if ( iquad < 2 ) // adjust sign of result
470 result = __FMADD( arg, temp1, 1.0 ); // positive
471 else
472 {
473 arg = - arg;
474 result =__FMSUB( arg, temp1, 1.0 ); // negative
475 }
476 }
477 else
478 {
479/*******************************************************************************
480* use sine approximation. *
481*******************************************************************************/
482 register double FPR_s3, FPR_s5, FPR_s7, FPR_s9, FPR_s11, FPR_s13;
483
484 FPR_s9 = s9; FPR_s13 = s13;
485 FPR_s7 = s7; FPR_s11 = s11;
486
487 temp1 = __FMADD( FPR_s13, xFourth, FPR_s9 ); temp2 = __FMADD( FPR_s11, xFourth, FPR_s7 );
488 FPR_s5 = s5;
489
490 FPR_s3 = s3;
491 temp1 = __FMADD( temp1, xFourth, FPR_s5 ); temp2 = __FMADD( temp2, xFourth, FPR_s3 );
492
493 temp1 = __FMADD( temp1, xSquared, temp2 ); xThird = __FMUL( arg, xSquared );
494
495 temp1 = __FMADD( temp1, xThird, argtail ); // second-order correction
496
497 if ( iquad < 2 ) // adjust sign of result
498 result = temp1 + arg;
499 else
500 {
501 arg = - arg;
502 result = arg - temp1;
503 }
504 }
505
506 FESETENVD( FPR_env ); // restore caller's mode
507 __PROG_INEXACT( FPR_PiDiv2 );
508
509 return ( result ) ;
510}
511