src/libm/Source/ARM/fmodf.c at fixPythonPipStalling

overby.me / darling-nix
fork atom
this repo has no description
fork atom
darling-nix / src / libm / Source / ARM / fmodf.c
at fixPythonPipStalling 265 lines 7.1 kB view raw
wrap content
Lubos Dolezel Restructured source tree to prepare for merge with the "darling" repo 10y ago
f228ae16
  1/*
  2 *  fmodf.c
  3 *  cLibm
  4 *
  5 *     Written by Jon Okada, started on December 7th, 1992.                      
  6 *     Modified by Paul Finlayson (PAF) for MathLib v2.                          
  7 *     Modified by A. Sazegari (ali) for MathLib v3.                             
  8 *     Modified and ported by Robert A. Murley (ram) for Mac OS X.               
  9 *	   Modified for cLibm, fixed a few edge cases, rewrote local_ funcs by Ian Ollmann.
 10 *
 11 *  Copyright 2007 Apple Inc. All rights reserved.
 12 *
 13 */
 14 
 15 #include <math.h>
 16 #include <float.h>
 17 #include <stdint.h>
 18
 19#ifdef ARMLIBM_SET_FLAGS
 20 #include "required_arithmetic.h"
 21#endif
 22
 23static inline int local_ilogbf( float x ) __attribute__ ((always_inline));
 24static inline int local_ilogbf( float x )
 25{
 26	union{ float f; uint32_t u;}u = {x};
 27	
 28	u.u &= 0x7fffffff;
 29	int32_t exp = u.u >> 23;
 30	
 31	if( __builtin_expect( (uint32_t) exp - 1U >= 254, 0 ) )
 32	{ // +-denorm, other possibilities: +-0, +-inf, NaN are screened out by caller
 33	
 34		u.u |= 0x3f800000U;
 35		u.f -= 1.0f;
 36		exp = u.u >> 23;
 37		
 38		return exp - (127+126);
 39	}
 40
 41	return exp - 127;
 42}
 43
 44static inline float local_scalbnf( float x, int n ) __attribute__ ((always_inline));
 45static inline float local_scalbnf( float x, int n )
 46{
 47	//create 2**n in double
 48	union{ uint64_t u; double d;} u = {  ( (int64_t) n + 1023) << 52 };
 49	double d = x;
 50 
 51	//scale by appropriate power of 2
 52	d *= u.d;
 53 
 54	//return result
 55	return (float) d;
 56}
 57
 58
 59float fmodf( float x, float y )
 60{
 61	union{ float f; uint32_t u; }ux = {x};
 62	union{ float f; uint32_t u; }uy = {y};
 63
 64	uint32_t absx = ux.u & 0x7fffffffU;
 65	uint32_t absy = uy.u & 0x7fffffffU;
 66	if( absx - 1U >= 0x7f7fffffU || absy - 1U >= 0x7f7fffffU )
 67	{
 68		float fabsx = __builtin_fabsf(x);	
 69		float fabsy = __builtin_fabsf(y);
 70
 71		// deal with NaN
 72		if( x != x || y != y )
 73			return x + y;
 74
 75		//x = Inf or y = 0, return Invalid per IEEE-754
 76		if( fabsx == __builtin_inff() || 0.0f == y )
 77		{
 78#ifdef ARMLIBM_SET_FLAGS
 79			return required_add_float( __builtin_inff(), -__builtin_inff() );	//set invalid
 80#else
 81			return __builtin_nan("");
 82#endif
 83		}
 84
 85		//handle trivial case
 86		if( fabsy == __builtin_inff() || 0.0f == x )
 87		{
 88#ifdef ARMLIBM_SET_FLAGS
 89			required_add_float( fabsx, 0.0f );	// signal underflow (that is, no flag set,
 90												// but exception occurs if unmasked) if x is denorm
 91#endif
 92			return x;
 93		}
 94	}
 95 
 96	if( absy >= absx )
 97	{
 98		if( absy == absx )
 99		{
100			ux.u &= 0x80000000;
101			return ux.f;
102		}
103		return x;
104	}
105 
106	//extract exponent, mantissa
107	int32_t expx = absx >> 23;
108	int32_t expy = absy >> 23;
109	int32_t sx = absx & 0x007fffff;			// x significand
110	int32_t sy = absy & 0x007fffff;			// y significand
111	if( 0 == expx )		//denormal x
112	{
113		//calculate shift to move leading bit to exponents field
114		uint32_t shift = __builtin_clzl( absx ) - (8*sizeof(long) - 24);
115		sx <<= shift;	//do the shift
116		expx = 1-shift;	//adjust the biased exponent accordingly, -1 here for the implicit shift to make implicit denorm leading bit explicit
117	}
118	if( 0 == expy )		//denormal y
119	{
120		//calculate shift to move leading bit to exponents field
121		uint32_t shift = __builtin_clzl( absy ) - (8*sizeof(long) - 24);
122		sy <<= shift;	//do the shift
123		expy = 1-shift;	//adjust the biased exponent accordingly, -1 here for the implicit shift to make implicit denorm leading bit explicit
124	}
125	//make implicit bits explicit
126	sx |= 0x00800000;
127	sy |= 0x00800000;
128	
129		
130	// The idea here is to iterate until the exponent of x is the same as y
131	// Calculate the number of bits we can safely shave off before we reach parity
132	int32_t idiff = expx - expy;	
133	int32_t shift = 0;
134	int32_t mask;	
135
136	//since idiff is always >= 0 here, it is safe to enter
137	//We always shift by shift+1 here, so in the first iteration, the worst that can happen
138	do
139	{
140		// move the leading bit of x to the 23rd bit
141		sx <<= shift;
142
143		//Keep track that we did that
144		idiff += ~shift;			// idiff -= shift + 1, +1 for the shift below
145	
146		//The two values both have the 23rd bit set as the leading bit  (24 bit unsigned number)
147		//subtract one from the other. This gives us a signed 23 bit number in the range { -0x00ffffff ... 0x00ffffff }
148		sx -= sy;		
149
150		//record the sign
151		mask = sx >> 31;
152
153		//shift x left 1 to restore a 24 bit quantity
154		sx += sx;		//this is potentially 1 shift too far, which we patch up later
155
156		//if negative, we add back sy to restore to postiveness. This is the same as x - y + 0.5y = - 0.5y 
157		// instead of x-y. We've effectively hoisted the subtract that would have appeared in the next loop
158		// iteration here, and saved a test+branch in exchange for a shift and and. (The add happens either way.)
159		sx += sy & mask;	
160
161		//figure out how far we need to shift sx to get the leading bit into the 23rd position
162		shift = __builtin_clzl( sx ) - (8*sizeof(long) - 24);
163	}
164	while( idiff >= shift && sx != 0);
165	
166	//We may have gone too far 
167	if( idiff < 0 )	
168	{
169		//If so, rewind a little.
170		// if sx - sy succeeded, it was the right thing to do, the only thing we did wrong was the shift
171		// if sx - sy yielded a negative number, then we shouldn't have done that either
172		sx += sy & mask;
173		sx >>= 1;
174//debug code to make sure we didn't do something dumb here
175		idiff = 0;
176	}
177
178	//We may have some bits left over to shift in. 
179	sx <<= idiff;
180	
181//convert back to float
182	if( 0 == sx )
183	{
184		ux.u &= 0x80000000;
185		return ux.f;
186	}
187			
188	//figure out how far we need to shift in order to move leading bit into exponent field
189	shift = __builtin_clzl( sx ) - (8*sizeof(long) - 24);
190	sx <<= shift;				// move leading bit into exponent field
191	expy -= shift;				// account for the shift in the exponent
192	sx &= 0x007fffff;			// remove leading bit
193	sx |= ux.u & 0x80000000;	//or in sign
194	if( expy > 0 )
195	{
196		ux.u = sx | (expy << 23);
197		return ux.f;
198	}
199	
200	//denormal
201	expy += 126;
202	ux.u = sx | (expy << 23);
203	return ux.f * 0x1.0p-126f;
204}
205
206//old slower floating point based one.
207/*{
208	union{ float f; uint32_t u; }ux = {x};
209	union{ float f; uint32_t u; }uy = {y};
210
211	float fabsx = __builtin_fabsf(x);	
212	float fabsy = __builtin_fabsf(y);
213	if( (ux.u & 0x7fffffffU) - 1U >= 0x7f7fffffU || (uy.u & 0x7fffffffU) - 1U >= 0x7f7fffffU )
214	{
215		// deal with NaN
216		if( x != x || y != y )
217			return x + y;
218
219		//x = Inf or y = 0, return Invalid per IEEE-754
220		if( fabsx == __builtin_inff() || 0.0f == y )
221		{
222			return required_add_float( __builtin_inff(), -__builtin_inff() );	//set invalid
223		}
224
225		//handle trivial case
226		if( fabsy == __builtin_inff() || 0.0f == x )
227		{
228			required_add_float( fabsx, 0.0f );			//signal underflow (that is, no flag set, but exception occurs if unmasked) if x is denorm
229			return x;
230		}
231	}
232 
233	if( fabsy > fabsx )
234		return x;
235 
236	//we have to work
237	int32_t iscx = local_ilogbf(fabsx);
238	int32_t iscy = local_ilogbf(fabsy);
239	int32_t idiff = iscx - iscy;
240	float x1, y1;
241	x1 = fabsx;
242	y1 = fabsy;
243	int32_t i;
244		
245	if( idiff )
246	{
247		y1 = local_scalbnf( y1, -iscy );
248		x1 = local_scalbnf( x1, -iscx );
249		for( i = idiff; i != 0; i-- )
250		{
251			if( x1 >= y1 )
252				x1 -= y1;
253			x1 += x1;
254		}
255		x1 = local_scalbnf( x1, iscy );
256	}
257	if( x1 >= fabsy )
258		x1 -= fabsy;
259	 
260	if( x < 0 )
261		x1 = -x1;
262	
263	return x1;
264}
265*/