src/libm/Source/ARM/lgamma.c at fixPythonPipStalling

overby.me / darling-nix
fork atom
this repo has no description
fork atom
darling-nix / src / libm / Source / ARM / lgamma.c
at fixPythonPipStalling 305 lines 9.5 kB view raw
wrap content
Lubos Dolezel Restructured source tree to prepare for merge with the "darling" repo 10y ago
f228ae16
  1
  2/*
  3 *	lgamma.c
  4 *
  5 *		by Ian Ollmann based heavily on the work of Ali Sazegari, Steve Peters and Jeff Kidder
  6 *
  7 *	Copyright (c) 2007, Apple Inc. All Rights Reserved. 
  8 */
  9 
 10#include <math.h>
 11#include <stdint.h>
 12
 13#ifdef ARMLIBM_SET_FLAGS
 14#include "required_arithmetic.h"
 15#endif
 16
 17static const double d1 = -5.772156649015328605195174e-1;
 18
 19static const double p1[8] = { 4.945235359296727046734888e+0,
 20                        2.018112620856775083915565e+2,
 21                        2.290838373831346393026739e+3,
 22                        1.131967205903380828685045e+4,
 23                        2.855724635671635335736389e+4,
 24                        3.848496228443793359990269e+4,
 25                        2.637748787624195437963534e+4,
 26                        7.225813979700288197698961e+3 };
 27						
 28static const double q1[8] = { 6.748212550303777196073036e+1,
 29                        1.113332393857199323513008e+3,
 30                        7.738757056935398733233834e+3,
 31                        2.763987074403340708898585e+4,
 32                        5.499310206226157329794414e+4,
 33                        6.161122180066002127833352e+4,
 34                        3.635127591501940507276287e+4,
 35                        8.785536302431013170870835e+3 };
 36
 37static const double d2 = 4.227843350984671393993777e-1;
 38
 39static const double p2[8] = { 4.974607845568932035012064e+0,
 40                        5.424138599891070494101986e+2,
 41                        1.550693864978364947665077e+4,
 42                        1.847932904445632425417223e+5,
 43                        1.088204769468828767498470e+6,
 44                        3.338152967987029735917223e+6,
 45                        5.106661678927352456275255e+6,
 46                        3.074109054850539556250927e+6 };
 47
 48static const double q2[8] = { 1.830328399370592604055942e+2,
 49                        7.765049321445005871323047e+3,
 50                        1.331903827966074194402448e+5,
 51                        1.136705821321969608938755e+6,
 52                        5.267964117437946917577538e+6,
 53                        1.346701454311101692290052e+7,
 54                        1.782736530353274213975932e+7,
 55                        9.533095591844353613395747e+6 };
 56
 57static const double d4 = 1.791759469228055000094023e+0;
 58
 59static const double p4[8] = { 1.474502166059939948905062e+04,
 60                        2.426813369486704502836312e+06,
 61                        1.214755574045093227939592e+08,
 62                        2.663432449630976949898078e+09,
 63                        2.940378956634553899906876e+10,
 64                        1.702665737765398868392998e+11,
 65                        4.926125793377430887588120e+11,
 66                        5.606251856223951465078242e+11 };
 67
 68static const double q4[8] = { 2.690530175870899333379843e+03,
 69                        6.393885654300092398984238e+05,
 70                        4.135599930241388052042842e+07,
 71                        1.120872109616147941376570e+09,
 72                        1.488613728678813811542398e+10,
 73                        1.016803586272438228077304e+11,
 74                        3.417476345507377132798597e+11,
 75                        4.463158187419713286462081e+11 };
 76						
 77static const double cc[7] = { -1.910444077728e-03,
 78                        8.4171387781295e-04,
 79                       -5.952379913043012e-04,
 80                        7.93650793500350248e-04,
 81                       -2.777777777777681622553e-03,
 82                        8.333333333333333331554247e-02,
 83                        5.7083835261e-03 };
 84
 85
 86static const double Root4xbig  = 2.25e+76;
 87static const double pnt68      = 0.6796875e+0;
 88static const double xbigger       = 2.55e+305;
 89static const double pi         = 3.1415926535897932384626434e+0;
 90static const double eps        = 2.22e-16;
 91static const double LogSqrt2pi = 0.9189385332046727417803297e+0;
 92
 93static inline double lgammaApprox ( double x, int *psigngam );
 94static inline double lgammaApprox ( double x, int *psigngam )
 95{
 96	union{ double d; uint64_t u; }u = {x};
 97
 98	// deal with 0, NaN, Inf
 99	if( (u.u & 0x7fffffffffffffffULL) -1ULL >= 0x7ff0000000000000ULL -1ULL )
100	{
101		// 0
102		if( x == 0.0 ) {
103#ifdef ARMLIBM_SET_FLAGS
104			return required_divide_double( 1.0, __builtin_fabs(x) );		//set div/0 return inf
105#else
106			return __builtin_inf();
107#endif
108		}
109
110		//NaN
111		if( x != x )
112			return x + x;
113			
114		//Inf
115		return __builtin_fabs( x );
116	}
117
118      register int i;
119      register double y, result, numerator, denominator, ysquared, 
120                      corrector, xMinus1, xMinus2, xMinus4; 
121      
122	  *psigngam = 1; 
123      
124      
125/*******************************************************************************
126 *      For negative x, since (G is gamma function)
127 *		-x*G(-x)*G(x) = pi/sin(pi*x),
128 * 	we have
129 * 		G(x) = pi/(sin(pi*x)*(-x)*G(-x))
130 *	since G(-x) is positive, sign(G(x)) = sign(sin(pi*x)) for x<0
131 *	Hence, for x<0, signgam = sign(sin(pi*x)) and
132 *		lgamma(x) = log(|Gamma(x)|)
133 *			  = log(pi/(|x*sin(pi*x)|)) - lgamma(-x);
134 *******************************************************************************/
135
136	if ( x < 0.0 )
137	{
138		int dummy = 1; 	
139		register double a, y1, fraction;
140            
141		if ( x <= -0x1.0p52 ) // big negative integer?
142			return x / -0.0;
143                
144		y = - x;
145		y1 = y;
146
147		// y1 = trunc ( y );
148		int64_t ll = 0;
149		if( y < 0x1.0p63 )
150		{
151			ll = (int64_t) y;
152			y1 = (double) ll;
153		}
154		fraction = y - y1; // excess over the boundary
155            
156		if ( fraction == 0.0 ) { // negative integer?
157#ifdef ARMLIBM_SET_FLAGS
158			return required_divide_double( x, -0.0 );
159#else
160			return __builtin_copysign( __builtin_inf(), -x);
161#endif
162		}
163		
164		else 
165		{
166			a = sin ( pi * fraction );
167			if ( 0ULL == (ll & 1ULL) ) // 0, 2, 4, ...
168				*psigngam = -1; /* Gamma(x) < 0 */ 
169		}
170			
171		return log ( pi / __builtin_fabs ( a * x ) ) - lgammaApprox ( -x, &dummy );
172	}
173      
174/*******************************************************************************
175*     The argument is positive, if it is bigger than xbigger = 2.55e+305 then     *
176*     overflow.                                                                *
177*******************************************************************************/
178
179	if ( x > xbigger ) {
180#ifdef ARMLIBM_SET_FLAGS
181		return required_multiply_double( x, 0x1.0p1023 );	//return inf, set overflow
182#else
183		return __builtin_inf();
184#endif
185	}
186
187	y = x;
188
189/*******************************************************************************
190*     x <= eps then return the approximation log(x).                           *
191*******************************************************************************/
192
193	if ( y <= eps )
194		return ( - log ( y ) );
195
196/*******************************************************************************
197*     x is in (eps,1.5] then use d1, p1 and q1 coefficients.                   *
198*******************************************************************************/
199
200	else if ( y <= 1.5 )
201	{
202		if ( y < pnt68 )
203		{
204			corrector = - log ( y );
205			xMinus1 = y;
206		}
207		else
208	    {
209			corrector = 0.0;
210			xMinus1 = ( y - 0.5 ) - 0.5;
211		}
212		
213		if ( ( y <= 0.5 ) || ( y >= pnt68 ) )
214		{
215			if( 0.0 == xMinus1 )	//early out for exact zero so we get inexact flag right
216				return corrector;
217
218			denominator = 1.0;
219			numerator = 0.0;
220			for ( i = 0; i < 8; i++ )
221			{
222				numerator = numerator * xMinus1 + p1[i];
223				denominator = denominator * xMinus1 + q1[i];
224			}
225			result = corrector + ( xMinus1 * ( d1 + xMinus1 * ( numerator / denominator ) ) );
226		}
227		else
228		{
229			xMinus2 = ( y - 0.5 ) - 0.5;
230			denominator = 1.0;
231			numerator = 0.0;
232			for ( i = 0; i < 8; i++ )
233			{
234				numerator = numerator * xMinus2 + p2[i];
235				denominator = denominator * xMinus2 + q2[i];
236			}
237			result = corrector + ( xMinus2 * ( d2 + xMinus2 * ( numerator / denominator ) ) );
238		}
239	}
240
241/*******************************************************************************
242*     x is in (1.5,4.0] then use d2, p2 and q2 coefficients.                   *
243*******************************************************************************/
244
245	else if ( y <= 4.0 )
246	{
247		xMinus2 = y - 2.0;
248		if( 0.0 == xMinus2 )
249			return 0.0;
250
251		denominator = 1.0;
252		numerator = 0.0;
253		for ( i = 0; i < 8; i++ )
254		{
255			numerator = numerator * xMinus2 + p2[i];
256			denominator = denominator * xMinus2 + q2[i];
257		}
258		result = xMinus2 * ( d2 + xMinus2 * ( numerator / denominator ) );
259	}
260            
261/*******************************************************************************
262*     x is in (4.0,12.0] then use d4, p4 and q4 coefficients.                  *
263*******************************************************************************/
264
265	else if ( y <= 12.0 )
266	{
267		xMinus4 = y - 4.0;
268		denominator = - 1.0;
269		numerator = 0.0;
270		for ( i = 0; i < 8; i++ )
271		{
272			numerator = numerator * xMinus4 + p4[i];
273			denominator = denominator * xMinus4 + q4[i];
274		}
275		result = d4 + xMinus4 * ( numerator / denominator );
276	}
277	else  /* ( y >= 12.0 ) */
278	{
279		result = 0.0;
280		if ( y <= Root4xbig )
281		{
282			result = cc[6];
283			ysquared = y * y;
284			for ( i = 0; i < 6; i++ )
285				result = result / ysquared + cc[i];
286		}
287		result /= y;
288		corrector = log ( y );
289		result += LogSqrt2pi - 0.5 * corrector;
290		result += y * ( corrector - 1.0 );
291	}
292      
293	x = rint ( x ); // INEXACT set as a side effect for non integer x  
294	return result;
295}
296
297double lgamma( double x )
298{
299    return lgammaApprox ( x, &signgam );
300}
301
302double lgamma_r( double, int *);
303double lgamma_r( double x, int *psigngam ) {
304	return lgammaApprox(x, psigngam);
305}