src/libm/Source/PowerPC/shchth.c at fixPythonPipStalling

overby.me / darling-nix
fork atom
this repo has no description
fork atom
darling-nix / src / libm / Source / PowerPC / shchth.c
at fixPythonPipStalling 259 lines 13 kB view raw
wrap content
Lubos Dolezel Restructured source tree to prepare for merge with the "darling" repo 10y ago
f228ae16
  1/*
  2 * Copyright (c) 2002 Apple Computer, Inc. All rights reserved.
  3 *
  4 * @APPLE_LICENSE_HEADER_START@
  5 * 
  6 * The contents of this file constitute Original Code as defined in and
  7 * are subject to the Apple Public Source License Version 1.1 (the
  8 * "License").  You may not use this file except in compliance with the
  9 * License.  Please obtain a copy of the License at
 10 * http://www.apple.com/publicsource and read it before using this file.
 11 * 
 12 * This Original Code and all software distributed under the License are
 13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
 14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
 15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
 16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
 17 * License for the specific language governing rights and limitations
 18 * under the License.
 19 * 
 20 * @APPLE_LICENSE_HEADER_END@
 21 */
 22/*******************************************************************************
 23*                                                                              *
 24*      File shchth.c,                                                          *
 25*      Function Sinh(x), Cosh(x) and Tanh(x);                                  *
 26*      Implementation of sine, cosine and tangent hyperbolic for the PowerPC.  *
 27*                                                                              *
 28*      Copyright � 1991-2001 Apple Computer, Inc.  All rights reserved.        *
 29*                                                                              *
 30*      Written by Ali Sazegari, started on November 1991,                      *
 31*      Modified and ported by Robert A. Murley (ram) for Mac OS X.             *
 32*                                                                              *
 33*      A MathLib v4 file.                                                      *
 34*                                                                              *
 35*      January  06 1992: changed the value of SEAM to �epsilon to conserve     *
 36*                        function monotonicity:                                *
 37*                        sqrt ( epsilon ) = 1.646361269956798117e-10           *
 38*                                         = 3fde0000b504f333f9de6484           *
 39*                                                                              *
 40*      December 03 1992: first rs6000 port.                                    *
 41*      January  05 1993: added the environmental controls.                     *
 42*      July     11 1993: changed feholdexcept to _feprocentry to set rounding  *
 43*                        to zero. added fenv_access pragama and fp_private.h,  *
 44*                        corrected the argument of the classification switch.  *
 45*      July     18 1994: merged th with sh and ch.                             *
 46*      August   02 1994: corrected the flags that we missed for the 601 ROM.   *
 47*      August   08 2001: replaced __setflm with FEGETENVD/FESETENVD.           *
 48*                        replaced DblInHex typedef with hexdouble.             *
 49*      Sept     19 2001: added macros to detect PowerPC and correct compiler.  *
 50*      Sept     19 2001: added <CoreServices/CoreServices.h> to get to <fp.h>  *
 51*                        and <fenv.h>, removed denormal comments.              *
 52*      Sept     24 2001: removed fenv_access pragma and some old comments.     *
 53*      Oct      08 2001: removed <CoreServices/CoreServices.h>.                *
 54*                        changed compiler errors to warnings.                  *
 55*      Nov      06 2001: commented out warning about Intel architectures.      *
 56*                                                                              *
 57*      W A R N I N G:                                                          * 
 58*      These routines require a 64-bit double precision IEEE-754 model.        *
 59*      They are written for PowerPC only and are expecting the compiler        *
 60*      to generate the correct sequence of multiply-add fused instructions.    *
 61*                                                                              *
 62*      These routines are not intended for 32-bit Intel architectures.         *
 63*                                                                              *
 64*     A version of gcc higher than 932 is required.                            *
 65*                                                                              *
 66*      GCC compiler options:                                                   *
 67*            optimization level 3 (-O3)                                        *
 68*            -fschedule-insns -finline-functions -funroll-all-loops            *
 69*                                                                              *
 70*******************************************************************************/
 71
 72#include    "math.h"
 73#include    "fenv_private.h"
 74#include    "fp_private.h"
 75
 76/*******************************************************************************
 77*            Functions needed for the computation.                             *
 78********************************************************************************
 79*                                                                              *
 80*     the following fp.h functions are used:                                   *
 81*     __fpclassifyd, copysign, exp, expm1 and __FABS.                          *
 82*******************************************************************************/
 83
 84static const hexdouble SqrtNegEps = HEXDOUBLE(0x3e400000, 0x00000000); 
 85static const hexdouble Huge       = HEXDOUBLE(0x7ff00000, 0x00000000);
 86static const double kMinNormal    = 0x1.0p-1022;                 // 2.2250738585072014e-308;
 87static const double maxExp        = 7.0978271289338397000e+02; /* 0x40862e42, 0xfefa39ef */
 88
 89
 90/*******************************************************************************
 91*     The function is odd.  The positive interval is computed and for          *
 92*     negative values, the sign is reflected in the computation.               *
 93*     Some computational flags are set in the FPSCR.  It is important to fold  *
 94*     them in at the end.                                                      *
 95********************************************************************************
 96*                            S      I      N      H                            *
 97*******************************************************************************/
 98
 99static const hexdouble Log2        = HEXDOUBLE(0x3FE62E42, 0xFEFA39EF); /* = 6.9314718055994530942E-1 */
100static const double kMaxNormal = 1.7976931348623157e308;
101
102double sinh ( double x )
103{
104      register double PositiveX;
105      
106      register double result, FPR_env, FPR_z, FPR_kMinNormal, FPR_half, FPR_one, 
107                              FPR_ln2, FPR_sqreps, FPR_kMaxNormal, FPR_inf;
108      
109      PositiveX = __FABS ( x );
110      FPR_z = 0.0;				FPR_half = 0.5;
111      FPR_one = 1.0;				FPR_sqreps = SqrtNegEps.d;
112      FPR_inf = Huge.d;				FPR_kMinNormal = kMinNormal;
113      FPR_ln2 = Log2.d;				FPR_kMaxNormal = kMaxNormal;
114      
115      FEGETENVD ( FPR_env);
116      __ENSURE( FPR_half, FPR_sqreps, FPR_kMinNormal ); __ENSURE( FPR_z, FPR_kMaxNormal, FPR_ln2 );
117      FESETENVD ( FPR_z );
118      __ENSURE( FPR_z, FPR_one, FPR_inf );
119
120      if ( PositiveX > (maxExp - M_LN2) )
121	  {
122			result = exp ( FPR_half * PositiveX );
123			result = ( FPR_half * result ) * result;
124	  }
125	  else if ( PositiveX > FPR_sqreps )       	/* return the arg if too small  */
126      {                  
127            result = expm1 ( PositiveX );
128			result = FPR_half * ( result + result / ( FPR_one + result ) );
129      }
130      else
131            result = PositiveX;
132
133      FESETENVD ( FPR_env );
134      
135      if (unlikely( result != result))
136            ; /* NOTHING */
137      else if (unlikely( result == FPR_z ))		// iff x == 0.0
138            result = x; 			// Get +-0.0 right
139      else if (unlikely( result < FPR_kMinNormal ))
140            __PROG_UF_INEXACT( FPR_kMinNormal );
141      else if (likely( result < FPR_inf ))
142            __PROG_INEXACT( FPR_ln2 );
143      else if (likely( PositiveX < FPR_inf ))
144            __PROG_OF_INEXACT( FPR_kMaxNormal );
145      
146      if ( x < FPR_z)
147            result = -result;
148      
149      return result;
150}
151      
152/*******************************************************************************
153*                              C      O      S      H                          *
154*******************************************************************************/
155
156double cosh ( double x )
157{
158      hexdouble OldEnvironment, NewEnvironment;
159      
160      register double result, FPR_env, FPR_z, FPR_one, FPR_half, FPR_t, FPR_lim;
161      
162      FPR_t = __FABS ( x );
163      FPR_z = 0.0;				FPR_half = 0.5;
164      FPR_one = 1.0;
165
166      FEGETENVD ( FPR_env);
167      __ENSURE( FPR_z, FPR_half, FPR_one );
168      FPR_lim = maxExp - M_LN2; // gcc-3.5 doesn't fold. Emitted code raises inexact. Care for env!
169      FESETENVD ( FPR_z );
170      
171	  if ( FPR_t < FPR_lim )
172	  {
173		  FPR_t = exp ( FPR_t );
174		  
175		  result = FPR_half * (FPR_t + FPR_one / FPR_t);	OldEnvironment.d = FPR_env;
176	  }
177	  else
178	  {
179		  FPR_t = exp ( FPR_half * FPR_t );
180		  
181		  result = ( FPR_half * FPR_t ) * FPR_t;			OldEnvironment.d = FPR_env;
182	  }
183            
184      FEGETENVD_GRP ( NewEnvironment.d );
185	  OldEnvironment.i.lo |= ( NewEnvironment.i.lo & EXCEPT_MASK );
186      FESETENVD_GRP ( OldEnvironment.d );
187
188      return result;
189}
190
191/*******************************************************************************
192*     This function is odd.  The positive interval is computed and for         *
193*     negative values, the sign is reflected in the computation.               *
194*     This calculation has spurious flags that need to be cleared before final *
195*     exit.  Instead of clearing them, we keep the only set flag (inexact)     *
196*     and do not fold the sticky FPSCR excpeions.  It makes for a faster tanh  *
197*     and less errors with the test vectors.                                   *
198********************************************************************************
199*                            T      A      N      H                            *
200*******************************************************************************/
201
202double tanh ( double x )
203{
204      register double PositiveX;
205
206      register double result, FPR_env, FPR_z, FPR_kMinNormal, FPR_two, FPR_negTwo, 
207                              FPR_ln2, FPR_sqreps, FPR_kMaxNormal, FPR_inf, FPR_t;
208      
209      PositiveX = __FABS ( x );
210      FPR_z = 0.0;				FPR_inf = Huge.d;
211      FPR_two = 2.0;				FPR_negTwo = -2.0;
212      FPR_sqreps = SqrtNegEps.d;		FPR_kMinNormal = kMinNormal;
213      FPR_ln2 = Log2.d;				FPR_kMaxNormal = kMaxNormal;
214      
215      if (unlikely( PositiveX == FPR_inf ))
216            return (x >= FPR_z ? 1.0 : -1.0);
217
218      FEGETENVD ( FPR_env );
219      __ENSURE( FPR_negTwo, FPR_sqreps, FPR_kMinNormal ); __ENSURE( FPR_z, FPR_kMaxNormal, FPR_ln2 );
220      FESETENVD ( FPR_z );
221      __ENSURE( FPR_z, FPR_inf, FPR_two );
222      
223/*******************************************************************************
224*     Reduce the number of calls to expm1 function by using the identity:      *
225*     th(x) = ( e^x - e^-x ) / ( e^x + e^-x )                                  *
226*           = - ( e^-2x - 1 ) / ( 2 + ( e^-2x - 1 ) )                          *
227*******************************************************************************/
228      if ( PositiveX > FPR_sqreps) /* return the arg if too small  */
229      {                  
230            FPR_t = expm1 ( FPR_negTwo * PositiveX ); /* call exp1 once   */
231            result = - FPR_t / ( FPR_two + FPR_t );
232      }
233      else
234            result = PositiveX;
235
236/*******************************************************************************
237*     If the argument to expm1 above is 7fe0000000000000 or 40d0000000000000   *
238*     then expm1 will either set an overflow or an underflow which is          *
239*     undeserved for tanh.                                                     *
240*******************************************************************************/
241
242      FESETENVD ( FPR_env );
243      
244      if (unlikely( result != result ))
245            ; /* NOTHING */
246      else if (unlikely( result == FPR_z ))		// iff x == 0.0
247            result = x; 			// Get +-0.0 right
248      else if (unlikely( result < FPR_kMinNormal ))
249            __PROG_UF_INEXACT( FPR_kMinNormal );
250      else if (likely( result < FPR_inf ))
251            __PROG_INEXACT( FPR_ln2 );
252      else if (likely( PositiveX < FPR_inf ))
253            __PROG_OF_INEXACT( FPR_kMaxNormal );
254      
255      if ( x < FPR_z)
256            result = -result;
257      
258      return result;
259}