src/libm/Source/PowerPC/ashachath.c at fixPythonPipStalling

overby.me / darling-nix
fork atom
this repo has no description
fork atom
darling-nix / src / libm / Source / PowerPC / ashachath.c
at fixPythonPipStalling 419 lines 20 kB view raw
wrap content
Lubos Dolezel Restructured source tree to prepare for merge with the "darling" repo 10y ago
f228ae16
  1/*
  2 * Copyright (c) 2002 Apple Computer, Inc. All rights reserved.
  3 *
  4 * @APPLE_LICENSE_HEADER_START@
  5 * 
  6 * The contents of this file constitute Original Code as defined in and
  7 * are subject to the Apple Public Source License Version 1.1 (the
  8 * "License").  You may not use this file except in compliance with the
  9 * License.  Please obtain a copy of the License at
 10 * http://www.apple.com/publicsource and read it before using this file.
 11 * 
 12 * This Original Code and all software distributed under the License are
 13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
 14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
 15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
 16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
 17 * License for the specific language governing rights and limitations
 18 * under the License.
 19 * 
 20 * @APPLE_LICENSE_HEADER_END@
 21 */
 22/*******************************************************************************
 23*                                                                              *
 24*      File ashachath.c,                                                       *
 25*      Function ArcSinh(x), ArcCosh(x), ArcTanh(x);                            *
 26*      Implementation of arc sine, arc cosine and arc tangent hyperbolic for   *
 27*      the PowerPC.                                                            *
 28*                                                                              *
 29*      Copyright � 1991 Apple Computer, Inc.  All rights reserved.             *
 30*                                                                              *
 31*      Written by Ali Sazegari, started on December 1991,                      *
 32*      Modified and ported by Robert A. Murley (ram) for Mac OS X.             *
 33*                                                                              *
 34*      A MathLib v4 file.                                                      *
 35*                                                                              *
 36*      January  28 1992: added velvel�s algorithms for ArcSinh and ArcCosh.    *
 37*                                                                              *
 38*      December 03 1992: first rs6000 port.                                    *
 39*      January  05 1993: added the environmental controls.                     *
 40*      July     07 1993: changed the comment for square root of epsilon,       *
 41*                        made atanh symmetric about zero using copysign,       *
 42*                        changed the exception switch argument for asinh from  *
 43*                        x to PositiveX, switched to the use of string         *
 44*                        oriented nan.                                         *
 45*      July     29 1993: corrected the string nan.                             *
 46*      July     18 1994: changed the logic to avoid checking for NaNs,         *
 47*                        introduced an automatic xMinusOne in acosh. Replaced  *
 48*                        fenv functions with __setflm.                         *
 49*      August   08 2001: replaced __setflm with FEGETENVD/FESETENVD.           *
 50*                        replaced DblInHex typedef with hexdouble.             *
 51*      Sept     19 2001: added macros to detect PowerPC and correct compiler.  *
 52*      Sept     19 2001: added <CoreServices/CoreServices.h> to get to <fp.h>  *
 53*                        and <fenv.h>, removed denormal comments.              *
 54*     October   08 2001: removed <CoreServices/CoreServices.h>.                *
 55*                        changed compiler errors to warnings.                  *
 56*     November  06 2001: commented out warning about Intel architectures.      *
 57*                                                                              *
 58*    W A R N I N G:                                                            *
 59*    These routines require a 64-bit double precision IEEE-754 model.          *
 60*    They are written for PowerPC only and are expecting the compiler          *
 61*    to generate the correct sequence of multiply-add fused instructions.      *
 62*                                                                              *
 63*    These routines are not intended for 32-bit Intel architectures.           *
 64*                                                                              *
 65*    A version of gcc higher than 932 is required.                            *
 66*                                                                              *
 67*      GCC compiler options:                                                   *
 68*            optimization level 3 (-O3)                                        *
 69*            -fschedule-insns -finline-functions -funroll-all-loops            *
 70*                                                                              *
 71*******************************************************************************/
 72
 73#include    "math.h"
 74#include    "fenv_private.h"
 75#include    "fp_private.h"
 76
 77#pragma STDC FENV_ACCESS ON
 78
 79/*******************************************************************************
 80*            Functions needed for the computation.                             *
 81*******************************************************************************/
 82
 83/*     the following fp.h functions are used:                                 */
 84/*     __fpclassifyd, log1p, log, sqrt, copysign and __FABS;                  */
 85
 86#define      INVERSE_HYPERBOLIC_NAN            "40"
 87
 88static const hexdouble SqrtNegEps  = HEXDOUBLE(0x3e400000, 0x00000000); /* = 7.4505805969238281e-09   */
 89static const hexdouble Log2        = HEXDOUBLE(0x3FE62E42, 0xFEFA39EF); /* = 6.9314718055994530942E-1 */
 90static const hexdouble FiveFourth  = HEXDOUBLE(0x3FF40000, 0x00000000); /* = 1.250000000000000E+00    */
 91
 92/*******************************************************************************
 93*                  A      R      C      T      A      N      H                 *
 94*******************************************************************************/
 95
 96static const double kMinNormal  = 0x1.0p-1022;                 // 2.2250738585072014e-308;
 97static const hexdouble InvSqrtNegEps  = HEXDOUBLE(0x41c00000, 0x00000000); 
 98static const hexdouble Huge       = HEXDOUBLE(0x7ff00000, 0x00000000);
 99
100double atanh ( double x )
101{
102      register double PositiveX;
103      hexdouble OldEnvironment;
104
105      register double result, FPR_env, FPR_z, FPR_half, FPR_ln2, FPR_one, FPR_two, 
106                      FPR_sqreps, FPR_kMinNormal, FPR_inf;
107      
108      PositiveX = __FABS ( x );			FPR_ln2 = Log2.d;
109      FPR_z = 0.0;				FPR_one = 1.0;
110      FPR_half = 0.5;				FPR_two = 2.0;
111      FPR_sqreps = SqrtNegEps.d;		FPR_kMinNormal = kMinNormal;
112      FPR_ln2 = Log2.d;				FPR_inf = Huge.d;
113      
114      FEGETENVD ( FPR_env );
115      __ENSURE( FPR_z, FPR_kMinNormal, FPR_sqreps );	__ENSURE( FPR_half, FPR_two, FPR_one );
116
117      FESETENVD ( FPR_z );
118      __ENSURE( FPR_z, FPR_inf, FPR_ln2 );
119      
120/*******************************************************************************
121*                                                                              *
122*     The expression for computing ArcTanh(x) is:                              *
123*                                                                              *
124*            ArcTanh(x) = 1/2log[(1+x)/(1-x)],      |x| < 1.                   *
125*                                                                              *
126*     We use the more accurate log(1+x) for the evaluation, then the ArcTanh   *
127*     representation is:                                                       *
128*                                                                              *
129*           ArcTanh(x) = 1/2log1p(2x/(1-x)),      |x| < 1,                     *
130*                                                                              *
131*     and for small enough x ( |x| < SqrtNegEps, where 1 - SqrtNegEps^2 = 1 )  *
132*     the first term of the taylor series expansion of log[(1+x)/(1-x)] is     *
133*     2x/(1-x) ~= 2x.  x is returned for ArcTanh(x).                           *
134*                                                                              *
135*     the value of SqrtNegEps is:                                              *
136*                                                                              *
137*     SqrtNegEps =  0x3e40000000000000, then 1-SqrtNegEps^2 = 1.               *
138*                                                                              *
139*     The monotonicity of the function has been preserved across double        *
140*     valued intervals.                                                        *
141*                                                                              *
142*     -inf    -1          -SqrtNegEps     0    +SqrtNegEps          +1   +inf  *
143*     ---------+--------------------+-----+-----+--------------------+-------  *
144*     < N a N >|-inf <= ArcTanh(x) >|< x  0  x >|< ArcTanh(x) => +inf|< N a N >*
145*                                                                              *
146*******************************************************************************/
147      if (likely( PositiveX < FPR_one ))
148      {
149            if (likely( PositiveX >= FPR_sqreps ))
150            {
151                  result = FPR_half * log1p ( FPR_two * PositiveX / ( FPR_one - PositiveX ) );
152                  if ( x < FPR_z )
153                    result = -result;
154            }
155            else
156                  result = x;
157                  
158            FESETENVD ( FPR_env );
159            if (unlikely( result == FPR_z ))
160            {
161                /* NOTHING */
162            }
163            else if (unlikely( __FABS( result ) < FPR_kMinNormal ))
164                  __PROG_UF_INEXACT( FPR_kMinNormal );
165            else
166                  __PROG_INEXACT( FPR_ln2 );
167      }
168      else if ( PositiveX == FPR_one )
169      {
170            result = (x > FPR_z) ? FPR_inf : -FPR_inf;
171            OldEnvironment.d = FPR_env;
172			__NOOP;
173			__NOOP;
174			__NOOP;
175            OldEnvironment.i.lo |= FE_DIVBYZERO;
176            FESETENVD_GRP ( OldEnvironment.d );
177      }
178/*******************************************************************************
179*      If argument is SNaN then a QNaN has to be returned and the invalid      *
180*      flag signaled for SNaN.  Otherwise, the argument is greater than 1.0 and*
181*      a hyperbolic nan is returned.                                           * 
182*******************************************************************************/
183      else if ( x != x )
184      {
185            FESETENVD ( FPR_env );
186            result = x + x;
187      }
188      else 
189      {
190            result = nan ( INVERSE_HYPERBOLIC_NAN );
191            OldEnvironment.d = FPR_env;
192			__NOOP;
193			__NOOP;
194			__NOOP;
195            OldEnvironment.i.lo |= SET_INVALID;
196            FESETENVD_GRP ( OldEnvironment.d );
197      }
198    
199      return result;
200}
201
202/*******************************************************************************
203*                  A      R      C      S      I      N      H                 *
204*******************************************************************************/
205
206double asinh ( double x )
207{
208      register double PositiveX, InvPositiveX;
209
210      register double result, FPR_env, FPR_z, FPR_sqreps, FPR_4div3, FPR_inf,
211                      FPR_one, FPR_two, FPR_ln2, FPR_invsqreps, FPR_kMinNormal;
212      
213      PositiveX = __FABS ( x );			FPR_inf = Huge.d;
214      FPR_z = 0.0;				FPR_sqreps = SqrtNegEps.d;
215      FPR_4div3 = 1.333333333333333333333;	FPR_one = 1.0;
216      FPR_two = 2.0;				FPR_kMinNormal = kMinNormal;
217      FPR_ln2 = Log2.d;				FPR_invsqreps = InvSqrtNegEps.d;
218      
219      FEGETENVD ( FPR_env );
220      __ENSURE( FPR_z, FPR_kMinNormal, FPR_sqreps );
221      __ENSURE( FPR_4div3, FPR_two, FPR_one );
222      FESETENVD ( FPR_z );
223      __ENSURE( FPR_ln2, FPR_inf, FPR_invsqreps );
224      
225/*******************************************************************************
226*                                                                              *
227*     The expression for computing ArcSinh(x) is:                              *
228*                                                                              *
229*           ArcSinh(x) = log(x+sqrt(x^2+1)).                                   *
230*                                                                              *
231*     SqrtNegEps =  0x3e40000000000000, then 1-SqrtNegEps^2 = 1.               *
232*                                                                              *
233*     Asymtotic behaviors, ( exact with respect to machine arithmetic )        *
234*     are filtered out and computed seperately to avoid spurious over and      *
235*     underflows.                                                              *
236*                                                                              *
237*-inf -1/sqrtNegEps -4/3  -sqrtNegEps  0  +sqrtNegEps +4/3  +1/sqrtNegEps  +inf*
238*------------+--------+--------+-------+-------+--------+----------+-----------*
239*<-log(2|x|)>|<  ArcSinh(x)   >|<     (x)     >|<    ArcSinh(x)   >|< log(2x) >*
240*                                                                              *
241*     The constant Log2 can be obtained using the 68040 instruction            *
242*                                                                              *
243*      FMOVECR.X  #$30, FP0 ; -=> FP0 = log(2) = 0x3FE62E42FEFA39EF (in double)*
244*                           ;                  = 6.9314718055994530940e-01     *
245*                                                                              *
246*******************************************************************************/
247      
248      if (unlikely( x == FPR_z ))
249      {
250            result = x;
251            FESETENVD ( FPR_env );
252      }
253      else if (unlikely( PositiveX < FPR_kMinNormal ))
254      {
255            result = x;
256            FESETENVD ( FPR_env );
257            __PROG_UF_INEXACT( FPR_kMinNormal );
258      }
259      else if ( PositiveX < FPR_sqreps )
260      {
261            result = x;
262            FESETENVD ( FPR_env );
263            __PROG_INEXACT( FPR_ln2 );
264      }
265      else if ( PositiveX <= FPR_4div3 )
266      {
267            InvPositiveX = FPR_one / PositiveX;
268            result = log1p ( PositiveX + PositiveX / ( InvPositiveX + 
269                        sqrt ( FPR_one + InvPositiveX * InvPositiveX ) ) );
270            if ( x < FPR_z ) 
271                result = -result;
272            FESETENVD ( FPR_env );
273            __PROG_INEXACT( FPR_ln2 );
274      }
275      else if ( PositiveX <= FPR_invsqreps )
276      {
277            result = log ( FPR_two * PositiveX + FPR_one / ( PositiveX + 
278                        sqrt ( FPR_one + PositiveX *  PositiveX ) ) );
279            if ( x < FPR_z ) 
280                result = -result;
281            FESETENVD ( FPR_env );
282            __PROG_INEXACT( FPR_ln2 );
283      }
284      else if (likely( PositiveX < FPR_inf ))
285      {
286            result = FPR_ln2 + log ( PositiveX );
287            if ( x < FPR_z ) 
288                result = -result;
289            FESETENVD ( FPR_env );
290            __PROG_INEXACT( FPR_ln2 );
291      }
292      else if ( x != x )
293      {
294            FESETENVD ( FPR_env );
295            result = x + x;
296      }
297      else
298      {
299            if ( x < FPR_z ) 
300                result = -FPR_inf;
301            else
302                result = FPR_inf;
303            FESETENVD ( FPR_env );
304      }
305      
306      return result;
307}
308
309/*******************************************************************************
310*                  A      R      C      C      O      S      H                 *
311*******************************************************************************/
312
313double acosh ( double x )
314{
315      register double xMinusOne;
316      hexdouble OldEnvironment;
317      
318      register double result, FPR_env, FPR_z, FPR_ln2, FPR_one, FPR_5div4, 
319                      FPR_two, FPR_invsqreps, FPR_inf;
320      
321      FPR_z = 0.0;				FPR_one = 1.0;
322      FPR_5div4 = FiveFourth.d;			FPR_two = 2.0;
323      FPR_ln2 = Log2.d;				FPR_invsqreps = InvSqrtNegEps.d;
324      FPR_inf = Huge.d;
325      
326      FEGETENVD ( FPR_env );
327      __ENSURE( FPR_z, FPR_ln2, FPR_invsqreps );
328      __ENSURE( FPR_5div4, FPR_two, FPR_one );
329      FESETENVD ( FPR_z );
330      __ENSURE( FPR_z, FPR_inf, FPR_z );    
331      
332      xMinusOne = x - FPR_one;
333
334/*******************************************************************************
335*                                                                              *
336*     The expression for computing ArcCosh(x) is:                              *
337*                                                                              *
338*           ArcCosh(x) = log(x+sqrt(x^2-1)), for x � 1.                        *
339*                                                                              *
340*     SqrtNegEps =  0x3e40000000000000, then 1-SqrtNegEps^2 = 1.               *
341*                                                                              *
342*     (1) if x is in [1, 5/4] then we would like to use the more accurate      *
343*     log1p.  Make the change of variable x => x-1 to handle operations on a   *
344*     lower binade.                                                            *
345*                                                                              *
346*     (2) If x is in a regular range (5/4,1/sqrteps], then multiply            *
347*     x+sqrt(x^2-1) by sqrt(x^2-1)/sqrt(x^2-1) and simplify to get             *
348*     2x-1/(x+sqrt(x^2-1).  This operation will increase the accuracy of the   *
349*     computation.                                                             *
350*                                                                              *
351*     (3) For large x, such that x^2 - 1 = x^2, ArcCosh(x) = log(2x).          *
352*     This asymtotical behavior ( exact with respect to machine arithmetic,    *
353*     is filtered out and computed seperately to avoid spurious overflows.     * 
354*                                                                              *
355*     -inf    +1                   +5/4              +1/sqrtNegEps       +inf  *
356*     ---------+---------------------+---------------------+---------------    *
357*     < N a N >|<  (1)  ArcCosh(x)  >|<  (2)  ArcCosh(x)  >|< (3) log(2x) >    *
358*                                                                              *
359*******************************************************************************/
360      
361      if (likely( x >= FPR_one ))
362      {
363            if (unlikely( x == FPR_one ))
364            {
365                  FESETENVD ( FPR_env );
366                  result = FPR_z;
367            }
368            else if ( x <= FPR_5div4 )
369            {
370                  result = log1p ( xMinusOne + sqrt ( FPR_two * xMinusOne + 
371                                                 xMinusOne * xMinusOne ) );
372                  FESETENVD ( FPR_env );
373                  __PROG_INEXACT( FPR_ln2 );
374            }
375            else if ( ( FPR_5div4 < x ) && ( x <= FPR_invsqreps ) )
376            {
377                  result = log ( FPR_two * x - FPR_one / ( x + sqrt ( x * x - FPR_one ) ) );
378                  FESETENVD ( FPR_env );
379                  __PROG_INEXACT( FPR_ln2 );
380            }
381            else if (likely( x < FPR_inf ))
382            {
383                  result = FPR_ln2 + log ( x );
384                  FESETENVD ( FPR_env );
385                  __PROG_INEXACT( FPR_ln2 );
386            }
387            else
388            {
389                result = FPR_inf;
390                FESETENVD ( FPR_env );
391            }
392      }
393/*******************************************************************************
394*      If argument is SNaN then a QNaN has to be returned and the invalid      *
395*      flag signaled for SNaN.  Otherwise, the argument is less than 1.0 and   *
396*      a hyperbolic nan is returned.                                           * 
397*******************************************************************************/
398
399      else
400      {
401            if ( x != x )              /* check for argument = NaN confirmed. */
402            {
403                  FESETENVD ( FPR_env );
404                  result = x + x;
405            }
406            else 
407            {
408                result = nan ( INVERSE_HYPERBOLIC_NAN );
409                OldEnvironment.d = FPR_env;
410				__NOOP;
411				__NOOP;
412				__NOOP;
413                OldEnvironment.i.lo |= SET_INVALID;
414                FESETENVD_GRP ( OldEnvironment.d );
415            }
416      }
417
418      return result;
419}