src/libm/Source/PowerPC/tg.c at fixPythonPipStalling

overby.me / darling-nix
fork atom
this repo has no description
fork atom
darling-nix / src / libm / Source / PowerPC / tg.c
at fixPythonPipStalling 520 lines 23 kB view raw
wrap content
Lubos Dolezel Restructured source tree to prepare for merge with the "darling" repo 10y ago
f228ae16
  1/*
  2 * Copyright (c) 2002 Apple Computer, Inc. All rights reserved.
  3 *
  4 * @APPLE_LICENSE_HEADER_START@
  5 * 
  6 * The contents of this file constitute Original Code as defined in and
  7 * are subject to the Apple Public Source License Version 1.1 (the
  8 * "License").  You may not use this file except in compliance with the
  9 * License.  Please obtain a copy of the License at
 10 * http://www.apple.com/publicsource and read it before using this file.
 11 * 
 12 * This Original Code and all software distributed under the License are
 13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
 14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
 15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
 16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
 17 * License for the specific language governing rights and limitations
 18 * under the License.
 19 * 
 20 * @APPLE_LICENSE_HEADER_END@
 21 */
 22/*******************************************************************************
 23*                                                                              *
 24*    tg.c                                                                      *
 25*                                                                              *
 26*    Double precision Tangent                                                  *
 27*                                                                              *
 28*    Copyright: � 1993-1997 by Apple Computer, Inc., all rights reserved       *
 29*                                                                              *
 30*    Written and ported by A. Sazegari, started on June 1997.                  *
 31*    Modified by Robert Murley, 2001                                           *
 32*                                                                              *
 33*    A MathLib v4 file.                                                        *
 34*                                                                              *
 35*    Based on the trigonometric functions from IBM/Taligent.                   *
 36*                                                                              *
 37*    Modification history:                                                     *
 38*    November  06 2001: commented out warning about Intel architectures.       *
 39*    July      20 2001: replaced __setflm with FEGETENVD/FESETENVD.            *
 40*                       replaced DblInHex typedef with hexdouble.              *
 41*    September 07 2001: added #ifdef __ppc__.                                  *
 42*    September 19 2001: added macros to detect PowerPC and correct compiler.   *
 43*    September 19 2001: added <CoreServices/CoreServices.h> to get to <fp.h>   *
 44*                       and <fenv.h>, removed denormal comments.               *
 45*    September 25 2001: removed more denormal comments.                        *
 46*    October   08 2001: removed <CoreServices/CoreServices.h>.                 *
 47*                       changed compiler errors to warnings.                   *
 48*                                                                              *
 49*    W A R N I N G:                                                            *
 50*    This routine requires a 64-bit double precision IEEE-754 model.           *
 51*    They are written for PowerPC only and are expecting the compiler          *
 52*    to generate the correct sequence of multiply-add fused instructions.      *
 53*                                                                              *
 54*    This routine is not intended for 32-bit Intel architectures.              *
 55*                                                                              *
 56*     08 Oct 01   ram   changed compiler errors to warnings.                   *
 57*                                                                              *
 58*      GCC compiler options:                                                   *
 59*            optimization level 3 (-O3)                                        *
 60*            -fschedule-insns -finline-functions -funroll-all-loops            *
 61*                                                                              *
 62*******************************************************************************/
 63
 64#include    "math.h"
 65#include    "fenv_private.h"
 66#include    "fp_private.h"
 67
 68#define TRIG_NAN  "33"
 69
 70struct tableEntry                             /* tanatantable entry structure */
 71      {
 72      double p;
 73      double f5;
 74      double f4;
 75      double f3;
 76      double f2;
 77      double f1;
 78      double f0;
 79      };
 80
 81extern const uint32_t tanatantable[];
 82
 83static const double kPiBy2 = 1.570796326794896619231322;      // 0x1.921fb54442d18p0
 84static const double kPiBy2Tail = 6.1232339957367660e-17;      // 0x1.1a62633145c07p-54
 85static const double k2ByPi = 0.636619772367581382;            // 0x1.45f306dc9c883p-1
 86static const double kRint = 6.755399441055744e15;             // 0x1.8p52
 87static const double kRintBig = 2.7021597764222976e16;         // 0x1.8p54
 88static const double kPiScale42 = 1.38168706094305449e13;      // 0x1.921fb54442d17p43
 89static const double kPiScale53 = 2.829695100811376e16;        // 0x1.921fb54442d18p54
 90static const hexdouble infinity = HEXDOUBLE(0x7ff00000, 0x00000000);
 91
 92/*******************************************************************************
 93********************************************************************************
 94*                                 T   A   N                                    *
 95********************************************************************************
 96*******************************************************************************/
 97
 98static const double ts11 = 8.898406739539066157565e-3,
 99            ts9  = 2.186936821731655951177e-2,
100            ts7  = 5.396825413618260185395e-2,
101            ts5  = 0.1333333333332513155016,
102            ts3  = 0.3333333333333333333333;
103static const double tt7  = 0.05396875452473400572649,
104            tt5  = 0.1333333333304691192925,
105            tt3  = 0.3333333333333333357614;
106static const double k2ToM1023 = 0x1.0p-1023; // 1.112536929253600692e-308
107static const uint32_t indexTable[] =
108    {  
109    16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  26,
110    27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,
111    38,  39,  40,  41,  42,  43,  44,  45,  47,  48,  49,
112    50,  51,  52,  53,  54,  55,  56,  57,  58,  59,  60,
113    61,  62,  63,  64,  65,  66,  68,  69,  70,  71,  72,
114    73,  74,  75,  76,  77,  78,  79,  81,  82,  83,  84,
115    85,  86,  87,  88,  89,  91,  92,  93,  94,  95,  96,
116    97,  98,  100, 101, 102, 103, 104, 105, 107, 108, 109,
117    110, 111, 113, 114, 115, 116, 117, 119, 120, 121, 122,
118    123, 125, 126, 127, 128, 130, 131, 132, 133, 135, 136,
119    137, 139, 140, 141, 142, 144, 145, 146, 148, 149, 150,
120    152, 153, 154, 156, 157, 159, 160, 161, 163, 164, 166,
121    167, 168, 170, 171, 173, 174, 176, 177, 179, 180, 182,
122    183, 185, 186, 188, 189, 191, 192, 194, 196, 197, 199,
123    200, 202, 204, 205, 207, 209, 210, 212, 214, 215, 217,
124    219, 220, 222, 224, 226, 228, 229, 231, 233, 235, 237,
125    238, 240, 242, 244, 246, 248, 250, 252, 254, 256, 256
126    };
127    
128static const double kMinNormal = 0x1.0p-1022;                 // 2.2250738585072014e-308;
129static const double kPiDiv4 = 0.7853980064392090732;
130
131double tan (   double x  )
132{
133      register double absOfX, intquo ,arg, argtail, aSquared, aThird, aFourth,
134                      temp1, temp2, s, u, v, w, y, result;
135      int tableIndex;
136      uint32_t iz;
137      hexdouble zD, aD, OldEnvironment;
138      struct tableEntry *tablePointer;
139     
140      register double FPR_env, FPR_z, FPR_one, FPR_PiDiv4, FPR_256, FPR_piScale;
141      register double FPR_t, FPR_u, FPR_inf, FPR_pi53, FPR_2divPi, FPR_PiDiv2, FPR_PiDiv2Tail, FPR_kRintBig, FPR_kRint;
142      register uint32_t GPR_f;
143      register struct tableEntry *pT;
144
145      FEGETENVD( FPR_env );                     	// save env, set default
146      
147      FPR_z = 0.0;			 		tablePointer = ( struct tableEntry * ) ( tanatantable - ( 16 * 14 ) );      
148      absOfX = __FABS( x );
149      FPR_PiDiv4 = kPiDiv4;		 		FPR_256 = 256.0;
150      FPR_kRint = kRint;		 		FPR_one = 1.0;
151      FPR_PiDiv2 = kPiBy2;
152
153      __ENSURE( FPR_PiDiv2, FPR_PiDiv4, FPR_256); 
154      __ENSURE( FPR_z, FPR_kRint, FPR_one); 
155      
156      FESETENVD( FPR_z );
157      
158/*******************************************************************************
159*     |x| < 0.7853980064392090732                                              *
160*******************************************************************************/
161
162      if ( absOfX < FPR_PiDiv4 ) 
163      {  
164
165            FPR_t = __FMADD( FPR_256, absOfX, FPR_kRint ); 
166            aD.d = FPR_t;
167            
168            if (unlikely( absOfX == FPR_z ))
169            {
170                FESETENVD( FPR_env );       		// restore caller's mode
171                return x; 				// +0 -0 preserved
172            }
173                
174
175/*******************************************************************************
176*     |x| < 0.0625                                                             *
177*******************************************************************************/
178            
179            if ( aD.i.lo < 16u ) // No Store/Load hazard thanks to the imediately preceding branch
180            {
181                  register double FPR_ts3, FPR_ts5, FPR_ts7, FPR_ts9, FPR_ts11, FPR_Min;
182                  
183                  aSquared = __FMUL( absOfX, absOfX );	FPR_Min = kMinNormal;
184                  __NOOP;
185
186                  FPR_ts11 = ts11;		 	FPR_ts9 = ts9;
187                  
188                  FPR_ts7 = ts7;		 	FPR_ts5 = ts5;
189                  
190                  FPR_ts3 = ts3;			aThird = __FMUL( absOfX, aSquared );
191                  
192                  temp1 = __FMADD( FPR_ts11, aSquared, FPR_ts9 );
193                  
194                  temp1 = __FMADD( temp1, aSquared, FPR_ts7 );
195                  
196                  temp1 = __FMADD( temp1, aSquared, FPR_ts5 );
197                  
198                  temp1 = __FMADD( temp1, aSquared, FPR_ts3 );
199                                    
200                  if ( x > FPR_z )
201                        result = __FMADD( temp1, aThird, absOfX );
202                  else 
203                  {
204                        aThird = -aThird;
205                        result = __FMSUB( temp1, aThird, absOfX );
206                  }
207                        
208                  FESETENVD( FPR_env );             	// restore caller's mode
209                  
210                  if ( __FABS ( result ) < FPR_Min ) 
211                        __PROG_UF_INEXACT( FPR_Min );
212                  else
213                        __PROG_INEXACT( FPR_PiDiv2 );
214
215                  return result;
216            }
217            
218/*******************************************************************************
219*     .0625 <= x < .7853980064392090732.                                       *
220*******************************************************************************/
221            else 
222            {
223                  register double FPR_tt3, FPR_tt5, FPR_tt7, FPR_sqr, FPR_f0;
224                  
225                  tableIndex = indexTable[aD.i.lo - 16];
226                  pT = &(tablePointer[tableIndex]);
227                  FPR_f0 = pT->f0;
228                  
229                  w = absOfX - FPR_f0;  		// w = deltax
230                  y = pT->p;            		// y = Tan from table
231                  FPR_tt3 = tt3;
232
233                  FPR_tt7 = tt7;			FPR_tt5 = tt5;
234                  
235                  FPR_sqr = __FMUL( w, w );		// calculate delta Tan
236                  
237                  temp1 = __FMADD( FPR_tt7, FPR_sqr, FPR_tt5 );
238                  
239                  temp1 = __FMADD( temp1, FPR_sqr, FPR_tt3 );
240                  
241                  temp1 = __FMADD( temp1, __FMUL( FPR_sqr, w ), w );
242                  
243                  v = __FMUL(y, temp1 );
244                  
245                  aSquared = __FMUL( v, v );		aThird = FPR_one + v;
246                  
247                  aFourth = __FMUL( aSquared,aSquared );w = __FMADD( aThird, aSquared, aThird );
248                  
249                  aThird = __FMADD( w, aFourth, w );	temp1 = __FMADD( v, y, temp1 );
250                  
251                  if ( x > FPR_z )			// fix final sign
252                        result = __FMADD( temp1, aThird, y );
253                  else 
254                  {
255                        aThird = -aThird;
256                        result = __FMSUB( temp1, aThird, y );
257                  }
258                  
259                  FESETENVD( FPR_env );            	 // restore caller's mode
260                  __PROG_INEXACT( FPR_PiDiv2 );
261                  
262                  return result;
263            }
264      }
265      
266      if (unlikely( x != x ))                           // x is a NaN
267      {
268            FESETENVD( FPR_env );                   	// restore caller's mode
269            return ( x );
270      }
271      
272/*******************************************************************************
273*     |x| > �/4 and perhaps |x| > kPiScale42.                                  *
274*******************************************************************************/
275
276      FPR_piScale = kPiScale42;	 			FPR_PiDiv2Tail = kPiBy2Tail;
277      FPR_2divPi = k2ByPi; 			 	FPR_pi53 = kPiScale53;
278      FPR_inf = infinity.d; 		 		FPR_kRintBig = kRintBig;
279      __ENSURE( FPR_2divPi, FPR_PiDiv2, FPR_piScale );
280
281      if (unlikely( absOfX > FPR_piScale )) 
282      {                                                	// |x| is huge or infinite
283            if ( absOfX == FPR_inf )  
284            {                               	  	// infinite case is invalid
285                  OldEnvironment.d = FPR_env;
286				  __NOOP;
287				  __NOOP;
288				  __NOOP;
289      
290                  OldEnvironment.i.lo |= SET_INVALID;
291                  FESETENVD_GRP( OldEnvironment.d );  	// restore caller's mode
292                  return ( nan ( TRIG_NAN ) );    	// return NaN
293            }
294            
295            while ( absOfX > FPR_pi53 )  
296            {                               	  	// loop to reduce x below
297                  intquo = __FMUL( x, FPR_2divPi );	// �*2^53 in magnitude
298                  FPR_t = __FNMSUB( intquo, FPR_PiDiv2, x );
299                  x = __FNMSUB( intquo, FPR_PiDiv2Tail, FPR_t );
300                  absOfX = __FABS ( x ) ;
301            }
302
303/*******************************************************************************
304*     final reduction of x to magnitude between 0 and 2*�.                     *
305*******************************************************************************/
306            FPR_t = __FMADD( x, FPR_2divPi, FPR_kRintBig );
307            intquo = FPR_t - FPR_kRintBig;
308            FPR_t = __FNMSUB( intquo, FPR_PiDiv2, x );
309            x = __FNMSUB( intquo, FPR_PiDiv2Tail, FPR_t );
310            absOfX = __FABS( x );
311      }
312      
313/*******************************************************************************
314*     �/4 < x < �*2^42                                                         *
315*******************************************************************************/
316      
317/*******************************************************************************
318*     Further argument reduction is probably necessary.  A double-double       *
319*     reduced argument is determined ( arg|argtail ).                          *
320*******************************************************************************/
321      FPR_t = __FMADD( x, FPR_2divPi, FPR_kRint );
322      intquo = FPR_t - FPR_kRint; 
323      zD.d = FPR_t;
324      
325      FPR_t = __FNMSUB( intquo, FPR_PiDiv2, x );
326      arg = __FNMSUB( intquo, FPR_PiDiv2Tail, FPR_t );
327      
328      absOfX = __FABS( arg );	 			FPR_t -= arg;
329      GPR_f = zD.i.lo;
330      
331      FPR_u = __FMADD( FPR_256, absOfX, FPR_kRint ); 
332      aD.d = FPR_u;
333      __NOOP;
334      __NOOP;
335      
336      argtail = __FNMSUB( intquo, FPR_PiDiv2Tail, FPR_t );
337      
338/*******************************************************************************
339*     |arg| < .0625.                                                           *
340*******************************************************************************/
341
342      if ( aD.i.lo < 16u ) // No Store/Load hazard, NOOPs above push this issue to next cycle.
343      {
344            register double FPR_ts3, FPR_ts5, FPR_ts7, FPR_ts9, FPR_ts11;
345            
346            aSquared = __FMUL( absOfX, absOfX );
347            u = absOfX;
348            v = argtail; 
349            
350            FPR_ts11 = ts11;		 		FPR_ts9 = ts9;
351            
352            FPR_ts7 = ts7;		 		FPR_ts5 = ts5;
353            
354            FPR_ts3 = ts3;           			iz = GPR_f & 1u; // quo modulo 2
355
356            temp1 = __FMADD( FPR_ts11, aSquared, FPR_ts9 );
357            
358            temp1 = __FMADD( temp1, aSquared, FPR_ts7 );
359            
360            temp1 = __FMADD( temp1, aSquared, FPR_ts5 );
361            
362            temp1 = __FMADD( temp1, aSquared, FPR_ts3 );
363            
364            aThird = __FMUL( aSquared, u );
365            
366/*******************************************************************************
367*     tangent approximation starts here.                                       *
368*******************************************************************************/
369
370            if ( iz == 0 ) 
371            {
372                  if ( arg > FPR_z ) 
373                  {                                    	// positive arg
374                        w = __FMADD( temp1, aThird, v );
375                        result =  ( w + u );
376                  }
377                  else 
378                  {                                    	// negative arg
379                        w = __FNMSUB( temp1, aThird, v );
380                        result = ( w - u );
381                  }
382                        
383                  FESETENVD( FPR_env );             	// restore caller's mode
384                  __PROG_INEXACT( FPR_PiDiv2 );
385                  
386                  return result;   
387            }
388            
389/*******************************************************************************
390*     cotangent approximation starts here.                                     *
391*******************************************************************************/
392            else 
393            {
394                  register double FPR_k2ToM1023;
395                  
396                  FPR_k2ToM1023 = k2ToM1023;
397                  
398                  if ( arg > FPR_z ) 
399                  {                                    	// positive argument
400                        s = __FMADD( temp1, aThird, v );
401                        temp2 = s + u;
402                  }
403                  else 
404                  {                                    	// negative argument
405                        s = __FMSUB( temp1, aThird, v );
406                        temp2 = s + u;
407                  }
408                  
409                  aFourth = ( u - temp2 ) + s;
410                  
411                  if ( __FABS ( temp2 ) < k2ToM1023 ) 
412                  {                                    	// huge result
413                        if ( arg > FPR_z ) 
414                              result = ( FPR_one / temp2 );
415                        else 
416                              result = ( FPR_one / ( -temp2 ) );
417                              
418                        FESETENVD( FPR_env );		// restore caller's mode
419                        __PROG_INEXACT( FPR_PiDiv2 );
420                        
421                        return result;   
422                  }
423                  
424                  u = FPR_one / temp2;
425                  v = __FNMSUB( u, temp2, FPR_one );
426                  temp1 = __FMSUB( aFourth, u, v );
427                  
428                  if ( arg > FPR_z )
429                        result = __FMSUB( temp1, u, u );
430                  else 
431                        result = __FNMSUB( temp1, u, u );
432                  
433                  FESETENVD( FPR_env );             // restore caller's mode
434                  __PROG_INEXACT( FPR_PiDiv2 );
435                  
436                  return result;   
437            }
438      } 
439      
440/*******************************************************************************
441*     The following code covers the case where the reduced argument has        *
442*     magnitude greater than 0.0625 but less than �/4.                         *
443*******************************************************************************/
444      
445      tableIndex = indexTable [ aD.i.lo - 16 ];
446      
447      if ( arg > FPR_z ) 
448      {                                                        	     // positive argument
449            w = absOfX - tablePointer [tableIndex].f0 + argtail;     // argument delta
450            aSquared = __FMUL( w, w );
451            v = absOfX - tablePointer [tableIndex].f0 - w + argtail; // tail of argument delta
452      }
453      else 
454      {                                                        	     // negative argument
455            w = absOfX - tablePointer [tableIndex].f0 - argtail;     // argument delta
456            aSquared = __FMUL( w, w );
457            v = absOfX - tablePointer [tableIndex].f0 - w - argtail; // tail of argument delta
458      }
459      {
460      register double FPR_tt3, FPR_tt5, FPR_tt7;
461      
462      FPR_t = v + w;					FPR_u = __FMUL( aSquared, w );
463      y = tablePointer[tableIndex].p;			FPR_tt3 = tt3;
464
465      FPR_tt7 = tt7;					FPR_tt5 = tt5;
466      
467      temp1 = __FMADD( FPR_tt7, aSquared, FPR_tt5 );
468    
469      temp1 = __FMADD( temp1, aSquared, FPR_tt3 );
470    
471      temp1 = __FMADD( temp1, FPR_u, FPR_t );
472      
473      v = __FMUL( y, temp1 );                                         // Tan( delta )
474      }
475
476/*******************************************************************************
477*      polynomial approx of 1/(1-v)                                            *
478*******************************************************************************/
479
480      aSquared = __FMUL( v, v );	      		iz = GPR_f & 1u; // quo modulo 2
481      aThird = FPR_one + v;
482      aFourth = __FMUL( aSquared, aSquared );
483      w = __FMADD( aThird, aSquared, aThird );
484      aThird = __FMADD( w, aFourth, w );		// aThird = 1/(1-v)
485      s = __FMUL( __FMADD( y, y, FPR_one ), aThird );
486      
487      if ( iz == 0 ) 
488      {                                   		// tangent approximation
489            if ( arg > FPR_z )
490                  result = __FMADD( temp1, s, y );
491            else
492            {
493                  y = -y;
494                  result = __FNMSUB( temp1, s, y );
495            }
496                  
497            FESETENVD( FPR_env );             		// restore caller's mode
498            __PROG_INEXACT( FPR_PiDiv2 );
499            
500            return result;   
501      }
502      else 
503      {                                          	// cotangent approx
504            w = __FMADD( temp1, s, y );
505            aFourth = __FMADD( temp1, s, ( y - w ) );
506            u = FPR_one / w;
507            v = __FNMSUB( u, w, FPR_one);
508            w = __FMSUB( u, aFourth, v );
509            
510            if ( arg > FPR_z )
511                  result = __FMSUB( u, w, u );
512            else 
513                  result = __FNMSUB( u, w, u );
514            
515            FESETENVD( FPR_env );             		// restore caller's mode
516            __PROG_INEXACT( FPR_PiDiv2 );
517            
518            return result;   
519      }
520}