src/libm/Source/PowerPC/lgamma.c at fixPythonPipStalling

overby.me / darling-nix
fork atom
this repo has no description
fork atom
darling-nix / src / libm / Source / PowerPC / lgamma.c
at fixPythonPipStalling 494 lines 24 kB view raw
wrap content
Lubos Dolezel Restructured source tree to prepare for merge with the "darling" repo 10y ago
f228ae16
  1/*
  2 * Copyright (c) 2002 Apple Computer, Inc. All rights reserved.
  3 *
  4 * @APPLE_LICENSE_HEADER_START@
  5 * 
  6 * The contents of this file constitute Original Code as defined in and
  7 * are subject to the Apple Public Source License Version 1.1 (the
  8 * "License").  You may not use this file except in compliance with the
  9 * License.  Please obtain a copy of the License at
 10 * http://www.apple.com/publicsource and read it before using this file.
 11 * 
 12 * This Original Code and all software distributed under the License are
 13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
 14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
 15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
 16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
 17 * License for the specific language governing rights and limitations
 18 * under the License.
 19 * 
 20 * @APPLE_LICENSE_HEADER_END@
 21 */
 22/*******************************************************************************
 23*                                                                              *
 24*      File lgamma.c,                                                          *
 25*      Functions lgamma(x),                                                    *
 26*      Implementation of the lgamma function for the PowerPC.                  *
 27*                                                                              *
 28*      Copyright c 1993 Apple Computer, Inc.  All rights reserved.             *
 29*                                                                              *
 30*      Written by Ali Sazegari, started on February 1993,                      *
 31*                                                                              *
 32*      based on FORTRAN routines in SpecFun by J. W. Cody and L. Stoltz of     *
 33*      Applied Mathematics Division of Argonne National Laboratory,            *
 34*      Argonne, IL 60439.                                                      *
 35*                                                                              *
 36*      W A R N I N G:  This routine expects a 64 bit double model.             *
 37*                                                                              *
 38*      January  29  1993: First C implementation for PowerPC.                  *
 39*      April    23  1993: Fixed an error in the interval [1.5,4).              *
 40*      July     14  1993: added #pragma fenv_access. This function is now      *
 41*                         using the the string oriented �nan�.  replaced       *
 42*                         feholdexcept by _feprocentry to guard rounding.      *
 43*      July     29  1993: corrected the string nan.                            *
 44*      October  07  1993: removed the raising of the overflow flag for arg= �. *
 45*      September19  1994: changed all environemental funtions to __setflm,     *
 46*                         changed HUGE_VAL to Huge.d for perfrormance.       *
 47*      January  11  1995: an error similar to the one in gamma.c corrected.    *
 48*                         in the interval [12,2.25e+76], the nonexistant       *
 49*                         array element c[7] is addressed.  it should be c[6]. *
 50*                         a short error analysis reveals that in double        *
 51*                         precision referencing c[7] instead of c[6] has no    *
 52*                         impact on the accuracy of the result, provided that  *
 53*                         the compiler assigns 0.0 to c[7], which the ibm xlc  *
 54*                         does.  this explains why the double precision        *
 55*                         lgamma function passed accuracy tests. the relative  *
 56*                         error in extended is at most 5.56E-17 and occurs     *
 57*                         for x=12.0.  the calculation is no longer affected   *
 58*                         for arguments x�19.                                  *
 59*                                                                              *
 60********************************************************************************
 61*                                                                              *
 62*                           L  G  A  M  M  A  (  X  )                          *
 63*                                                                              *
 64*      This routine calculates the lgamma function for a real positive         *
 65*      argument x.  Computation is based on an algorithm outlined in           *
 66*      reference 1 and 2 below.  The program uses rational functions that      *
 67*      approximate the gamma function to at least 18 significant decimal       *
 68*      digits.  The approximation for x > 12 is from reference 3, while        *
 69*      approximations for x < 12.0 are similar to those in reference 1,        *
 70*      but are unpublished.                                                    *
 71*                                                                              *
 72********************************************************************************
 73*                                                                              *
 74*      Explanation of machine-dependent constants:                             *
 75*                                                                              *
 76*      maxexp    - the smallest positive power of beta that overflows;         *
 77*      xbig      - the largest argument for which gamma(x) is representable    *
 78*                  in the machine, i.e., the solution to the equation          *
 79*                  Log ( gamma ( xbig ) ) = 2 ** maxexp;                       *
 80*      xinf      - the largest machine representable floating-point number     *
 81*                  approximately 2 ** maxexp;                                  *
 82*      eps       - the smallest positive floating-point number such that       *
 83*                  1.0 + eps > 1.0;                                            *
 84*      Root4xbig - Rough estimate of the fourth root of xbig.                  *
 85*                                                                              *
 86*      Approximate values for the macintosh and the powerpc are:               *
 87*                                                                              *
 88*                                base       maxexp        xbig                 *
 89*                                                                              *
 90*      Macintosh       (E.P.)     2         16383            ?                 *
 91*      PowerPC         (D.P.)     2          1024        2.55D+305             *
 92*                                                                              *
 93*                                 xinf         eps       Root4xbig             *
 94*                                                                              *
 95*      Macintosh       (E.P.)   1.19X+4932   5.42X-20        ?                 *
 96*      PowerPC         (D.P.)   1.79D+308    2.22D-16    2.23D-308             *
 97*                                                                              *
 98********************************************************************************
 99*                                                                              *
100*      The program returns a quiet NaN for singularities and infinity when     * 
101*      overflow occurs.  The computation is believed to be free of underflow   *
102*      and overflow.                                                           *
103*                                                                              *
104*      References:                                                             *
105*                                                                              *
106*      [1] "Chebyshev Approximations for the Natural Logarithm of the Gamma    *
107*           Function", W. J. Cody and K. E. Hillstrom, Math. Comp. 21, 1967,   *
108*           pp. 198-203.                                                       *
109*                                                                              *
110*      [2] K. E. Hillstrom, ANL/AMD Program ANLC366S, DGAMMA/DLGAMA, May 1969. *
111*                                                                              *
112*      [3] Hart, Et. Al., Computer Approximations, Wiley and sons, New York,   *
113*          1968.                                                               *
114*******************************************************************************/
115
116#ifndef _REENTRANT
117    #define _REENTRANT 1
118#endif
119
120#include      "math.h"
121#include      "fenv.h"
122#include      "fp_private.h"
123#include      "fenv_private.h"
124
125/*******************************************************************************
126*            Functions needed for the computation.                             *
127*******************************************************************************/
128
129/*     the following fp.h functions are used:                                 */
130/*     __fpclassifyd, nan and log;                                            */
131/*     the following environmental functions are used:                        */
132/*     __setflm.                                                              */
133
134/*******************************************************************************
135*     Coefficients for P1 in lgamma approximation over [0.5,1.5) in decreasing *
136*     order.                                                                   *
137*******************************************************************************/
138
139static const double d1 = -5.772156649015328605195174e-1;
140
141static const double p1[8] = { 4.945235359296727046734888e+0,
142                        2.018112620856775083915565e+2,
143                        2.290838373831346393026739e+3,
144                        1.131967205903380828685045e+4,
145                        2.855724635671635335736389e+4,
146                        3.848496228443793359990269e+4,
147                        2.637748787624195437963534e+4,
148                        7.225813979700288197698961e+3 };
149                       
150/*******************************************************************************
151*     Coefficients for Q1 in gamma approximation over [0.5,1.5) in decreasing  *
152*     order.                                                                   *
153*******************************************************************************/
154
155static const double q1[8] = { 6.748212550303777196073036e+1,
156                        1.113332393857199323513008e+3,
157                        7.738757056935398733233834e+3,
158                        2.763987074403340708898585e+4,
159                        5.499310206226157329794414e+4,
160                        6.161122180066002127833352e+4,
161                        3.635127591501940507276287e+4,
162                        8.785536302431013170870835e+3 };
163                        
164/*******************************************************************************
165*     Coefficients for P2 in lgamma approximation over [1.5,4) in decreasing   *
166*     order.                                                                   *
167*******************************************************************************/
168
169static const double d2 = 4.227843350984671393993777e-1;
170
171static const double p2[8] = { 4.974607845568932035012064e+0,
172                        5.424138599891070494101986e+2,
173                        1.550693864978364947665077e+4,
174                        1.847932904445632425417223e+5,
175                        1.088204769468828767498470e+6,
176                        3.338152967987029735917223e+6,
177                        5.106661678927352456275255e+6,
178                        3.074109054850539556250927e+6 };
179
180/*******************************************************************************
181*     Coefficients for Q2 in gamma approximation over [1.5,4) in decreasing    *
182*     order.                                                                   *
183*******************************************************************************/
184
185static const double q2[8] = { 1.830328399370592604055942e+2,
186                        7.765049321445005871323047e+3,
187                        1.331903827966074194402448e+5,
188                        1.136705821321969608938755e+6,
189                        5.267964117437946917577538e+6,
190                        1.346701454311101692290052e+7,
191                        1.782736530353274213975932e+7,
192                        9.533095591844353613395747e+6 };
193
194/*******************************************************************************
195*     Coefficients for P4 in lgamma approximation over [4,12) in decreasing    *
196*     order.                                                                   *
197*******************************************************************************/
198
199static const double d4 = 1.791759469228055000094023e+0;
200
201static const double p4[8] = { 1.474502166059939948905062e+04,
202                        2.426813369486704502836312e+06,
203                        1.214755574045093227939592e+08,
204                        2.663432449630976949898078e+09,
205                        2.940378956634553899906876e+10,
206                        1.702665737765398868392998e+11,
207                        4.926125793377430887588120e+11,
208                        5.606251856223951465078242e+11 };
209
210/*******************************************************************************
211*     Coefficients for Q4 in gamma approximation over [4,12) in decreasing     *
212*     order.                                                                   *
213*******************************************************************************/
214
215static const double q4[8] = { 2.690530175870899333379843e+03,
216                        6.393885654300092398984238e+05,
217                        4.135599930241388052042842e+07,
218                        1.120872109616147941376570e+09,
219                        1.488613728678813811542398e+10,
220                        1.016803586272438228077304e+11,
221                        3.417476345507377132798597e+11,
222                        4.463158187419713286462081e+11 };
223                        
224/*******************************************************************************
225*     Coefficients for minimax approximation over [12, xbig].                  *
226*******************************************************************************/
227
228static const double c[7] = { -1.910444077728e-03,
229                        8.4171387781295e-04,
230                       -5.952379913043012e-04,
231                        7.93650793500350248e-04,
232                       -2.777777777777681622553e-03,
233                        8.333333333333333331554247e-02,
234                        5.7083835261e-03 };
235
236static const double LogSqrt2pi = 0.9189385332046727417803297e+0;
237static const double xbig       = 2.55e+305;
238static const double Root4xbig  = 2.25e+76;
239static const double eps        = 2.22e-16;
240static const double pnt68      = 0.6796875e+0;
241static const hexdouble Huge    = HEXDOUBLE(0x7FF00000, 0x00000000);
242
243static const double twoTo52      = 0x1.0p+52; // 4503599627370496.0;
244static const double pi  =  3.14159265358979311600e+00; /* 0x400921FB, 0x54442D18 */
245
246/*******************************************************************************
247*            Value of special function NaN.                                    *
248*******************************************************************************/
249
250#define      SET_INVALID    0x01000000
251#define      GAMMA_NAN      "42"
252
253#pragma STDC FENV_ACCESS ON
254
255static double lgammaApprox ( double x )
256      {
257      register int i;
258      register double y, result, numerator, denominator, ysquared, 
259                      corrector, xMinus1, xMinus2, xMinus4; 
260      hexdouble OldEnvironment;
261      
262      FEGETENVD( OldEnvironment.d );               // save environment, set default
263      FESETENVD( 0.0 );
264      
265/*******************************************************************************
266*     The next switch will decipher what sort of argument we have. If argument *
267*     is SNaN then a QNaN has to be returned and the invalid flag signaled.    * 
268*******************************************************************************/
269
270      switch ( __fpclassifyd ( x ) )
271            {
272            case FP_NAN:
273                  x *= 2.0;                  /* quiets NaN */
274                  FESETENVD( OldEnvironment.d );         //   restore caller's environment
275                  return x;
276                  
277            case FP_ZERO:
278                  x = Huge.d;
279                  OldEnvironment.i.lo |= FE_DIVBYZERO;
280                  FESETENVD( OldEnvironment.d );
281                  return x;
282
283            case FP_INFINITE:
284                  x = Huge.d;
285                  FESETENVD( OldEnvironment.d );
286                  return x;
287                  
288            default:                  /*      NORMALNUM and DENORMALNUM      */
289                  break;
290            }
291      
292/*******************************************************************************
293 *      For negative x, since (G is gamma function)
294 *		-x*G(-x)*G(x) = pi/sin(pi*x),
295 * 	we have
296 * 		G(x) = pi/(sin(pi*x)*(-x)*G(-x))
297 *	since G(-x) is positive, sign(G(x)) = sign(sin(pi*x)) for x<0
298 *	Hence, for x<0, signgam = sign(sin(pi*x)) and
299 *		lgamma(x) = log(|Gamma(x)|)
300 *			  = log(pi/(|x*sin(pi*x)|)) - lgamma(-x);
301 *******************************************************************************/
302
303      if ( x < 0.0 )
304            {
305            register double a, y1, IsItAnInt;
306            
307            if ( x <= -twoTo52 ) // big negative integer?
308                {
309                OldEnvironment.i.lo |= FE_DIVBYZERO;
310                FESETENVD( OldEnvironment.d );
311                return Huge.d;
312                }
313                
314            y = - x;
315            y1 = trunc ( y );
316            IsItAnInt = y - y1; // excess over the boundary
317            
318            if ( IsItAnInt == 0.0 ) // negative integer?
319                {
320                OldEnvironment.i.lo |= FE_DIVBYZERO;
321                FESETENVD( OldEnvironment.d );
322                return Huge.d;
323                }
324            else
325                a = sin ( pi * IsItAnInt );
326            
327            FESETENVD( OldEnvironment.d );
328            return log ( pi / fabs ( a * x ) ) - lgammaApprox ( -x );
329            }
330      
331/*******************************************************************************
332*     The argument is positive, if it is bigger than xbig = 2.55e+305 then     *
333*     overflow.                                                                *
334*******************************************************************************/
335
336      if ( x > xbig )
337            {
338            OldEnvironment.i.lo |= FE_OVERFLOW;
339            FESETENVD( OldEnvironment.d );
340            return Huge.d;
341            }
342
343      y = x;
344
345/*******************************************************************************
346*     x <= eps then return the approximation log(x).                           *
347*******************************************************************************/
348
349      if ( y <= eps )
350            {
351            FESETENVD( OldEnvironment.d );
352            return ( - log ( y ) );
353            }
354
355/*******************************************************************************
356*     x is in (eps,1.5] then use d1, p1 and q1 coefficients.                   *
357*******************************************************************************/
358
359      else if ( y <= 1.5 )
360            {
361            if ( y < pnt68 )
362                  {
363                  corrector = - log ( y );
364                  xMinus1 = y;
365                  }
366            else
367                  {
368                  corrector = 0.0;
369                  xMinus1 = ( y - 0.5 ) - 0.5;
370                  }
371            if ( ( y <= 0.5 ) || ( y >= pnt68 ) )
372                  {
373                  denominator = 1.0;
374                  numerator = 0.0;
375                  for ( i = 0; i < 8; i++ )
376                        {
377                        numerator = numerator * xMinus1 + p1[i];
378                        denominator = denominator * xMinus1 + q1[i];
379                        }
380                  result = corrector + ( xMinus1 * ( d1 + xMinus1 * ( numerator / denominator ) ) );
381                  }
382            else
383                  {
384                  xMinus2 = ( y - 0.5 ) - 0.5;
385                  denominator = 1.0;
386                  numerator = 0.0;
387                  for ( i = 0; i < 8; i++ )
388                        {
389                        numerator = numerator * xMinus2 + p2[i];
390                        denominator = denominator * xMinus2 + q2[i];
391                        }
392                  result = corrector + ( xMinus2 * ( d2 + xMinus2 * ( numerator / denominator ) ) );
393                  }
394            }
395
396/*******************************************************************************
397*     x is in (1.5,4.0] then use d2, p2 and q2 coefficients.                   *
398*******************************************************************************/
399
400      else if ( y <= 4.0 )
401            {
402            xMinus2 = y - 2.0;
403            denominator = 1.0;
404            numerator = 0.0;
405            for ( i = 0; i < 8; i++ )
406                  {
407                  numerator = numerator * xMinus2 + p2[i];
408                  denominator = denominator * xMinus2 + q2[i];
409                  }
410            result = xMinus2 * ( d2 + xMinus2 * ( numerator / denominator ) );
411            }
412            
413/*******************************************************************************
414*     x is in (4.0,12.0] then use d4, p4 and q4 coefficients.                  *
415*******************************************************************************/
416
417      else if ( y <= 12.0 )
418            {
419            xMinus4 = y - 4.0;
420            denominator = - 1.0;
421            numerator = 0.0;
422            for ( i = 0; i < 8; i++ )
423                  {
424                  numerator = numerator * xMinus4 + p4[i];
425                  denominator = denominator * xMinus4 + q4[i];
426                  }
427            result = d4 + xMinus4 * ( numerator / denominator );
428            }
429      else  /* ( y >= 12.0 ) */
430            {
431            result = 0.0;
432            if ( y <= Root4xbig )
433                  {
434                  result = c[6];
435                  ysquared = y * y;
436                  for ( i = 0; i < 6; i++ )
437                        result = result / ysquared + c[i];
438                  }
439            result /= y;
440            corrector = log ( y );
441            result += LogSqrt2pi - 0.5 * corrector;
442            result += y * ( corrector - 1.0 );
443            }
444      
445      FESETENVD( OldEnvironment.d );
446      x = rint ( x ); // INEXACT set as a side effect for non integer x  
447      return result;
448      }
449      
450double lgamma ( double x )
451{
452    double g = lgammaApprox ( x );
453
454    signgam = 1;
455    if ( x < 0.0 && (g == g))
456    {
457        double y1 = trunc ( -x );
458        hexdouble OldEnvironment;
459        
460        FEGETENVD( OldEnvironment.d );               // save environment, set default
461        FESETENVD( 0.0 );
462        
463        if ( y1 == trunc ( y1 * 0.5 ) * 2.0 ) 
464            signgam = -1;
465            
466        FESETENVD( OldEnvironment.d );
467    }
468    
469    return g;
470}
471
472double lgamma_r ( double x, int *psigngam )
473{
474    double g = lgammaApprox ( x );
475
476    if (psigngam)
477        *psigngam = 1;
478        
479    if ( x < 0.0 && (g == g))
480    {
481        double y1 = trunc ( -x );
482        hexdouble OldEnvironment;
483        
484        FEGETENVD( OldEnvironment.d );               // save environment, set default
485        FESETENVD( 0.0 );
486        
487        if ( y1 == trunc ( y1 * 0.5 ) * 2.0 && psigngam) 
488            *psigngam = -1;
489            
490        FESETENVD( OldEnvironment.d );
491    }
492    
493    return g;
494}