this repo has no description
at fixPythonPipStalling 265 lines 7.1 kB view raw
1/* 2 * fmodf.c 3 * cLibm 4 * 5 * Written by Jon Okada, started on December 7th, 1992. 6 * Modified by Paul Finlayson (PAF) for MathLib v2. 7 * Modified by A. Sazegari (ali) for MathLib v3. 8 * Modified and ported by Robert A. Murley (ram) for Mac OS X. 9 * Modified for cLibm, fixed a few edge cases, rewrote local_ funcs by Ian Ollmann. 10 * 11 * Copyright 2007 Apple Inc. All rights reserved. 12 * 13 */ 14 15 #include <math.h> 16 #include <float.h> 17 #include <stdint.h> 18 19#ifdef ARMLIBM_SET_FLAGS 20 #include "required_arithmetic.h" 21#endif 22 23static inline int local_ilogbf( float x ) __attribute__ ((always_inline)); 24static inline int local_ilogbf( float x ) 25{ 26 union{ float f; uint32_t u;}u = {x}; 27 28 u.u &= 0x7fffffff; 29 int32_t exp = u.u >> 23; 30 31 if( __builtin_expect( (uint32_t) exp - 1U >= 254, 0 ) ) 32 { // +-denorm, other possibilities: +-0, +-inf, NaN are screened out by caller 33 34 u.u |= 0x3f800000U; 35 u.f -= 1.0f; 36 exp = u.u >> 23; 37 38 return exp - (127+126); 39 } 40 41 return exp - 127; 42} 43 44static inline float local_scalbnf( float x, int n ) __attribute__ ((always_inline)); 45static inline float local_scalbnf( float x, int n ) 46{ 47 //create 2**n in double 48 union{ uint64_t u; double d;} u = { ( (int64_t) n + 1023) << 52 }; 49 double d = x; 50 51 //scale by appropriate power of 2 52 d *= u.d; 53 54 //return result 55 return (float) d; 56} 57 58 59float fmodf( float x, float y ) 60{ 61 union{ float f; uint32_t u; }ux = {x}; 62 union{ float f; uint32_t u; }uy = {y}; 63 64 uint32_t absx = ux.u & 0x7fffffffU; 65 uint32_t absy = uy.u & 0x7fffffffU; 66 if( absx - 1U >= 0x7f7fffffU || absy - 1U >= 0x7f7fffffU ) 67 { 68 float fabsx = __builtin_fabsf(x); 69 float fabsy = __builtin_fabsf(y); 70 71 // deal with NaN 72 if( x != x || y != y ) 73 return x + y; 74 75 //x = Inf or y = 0, return Invalid per IEEE-754 76 if( fabsx == __builtin_inff() || 0.0f == y ) 77 { 78#ifdef ARMLIBM_SET_FLAGS 79 return required_add_float( __builtin_inff(), -__builtin_inff() ); //set invalid 80#else 81 return __builtin_nan(""); 82#endif 83 } 84 85 //handle trivial case 86 if( fabsy == __builtin_inff() || 0.0f == x ) 87 { 88#ifdef ARMLIBM_SET_FLAGS 89 required_add_float( fabsx, 0.0f ); // signal underflow (that is, no flag set, 90 // but exception occurs if unmasked) if x is denorm 91#endif 92 return x; 93 } 94 } 95 96 if( absy >= absx ) 97 { 98 if( absy == absx ) 99 { 100 ux.u &= 0x80000000; 101 return ux.f; 102 } 103 return x; 104 } 105 106 //extract exponent, mantissa 107 int32_t expx = absx >> 23; 108 int32_t expy = absy >> 23; 109 int32_t sx = absx & 0x007fffff; // x significand 110 int32_t sy = absy & 0x007fffff; // y significand 111 if( 0 == expx ) //denormal x 112 { 113 //calculate shift to move leading bit to exponents field 114 uint32_t shift = __builtin_clzl( absx ) - (8*sizeof(long) - 24); 115 sx <<= shift; //do the shift 116 expx = 1-shift; //adjust the biased exponent accordingly, -1 here for the implicit shift to make implicit denorm leading bit explicit 117 } 118 if( 0 == expy ) //denormal y 119 { 120 //calculate shift to move leading bit to exponents field 121 uint32_t shift = __builtin_clzl( absy ) - (8*sizeof(long) - 24); 122 sy <<= shift; //do the shift 123 expy = 1-shift; //adjust the biased exponent accordingly, -1 here for the implicit shift to make implicit denorm leading bit explicit 124 } 125 //make implicit bits explicit 126 sx |= 0x00800000; 127 sy |= 0x00800000; 128 129 130 // The idea here is to iterate until the exponent of x is the same as y 131 // Calculate the number of bits we can safely shave off before we reach parity 132 int32_t idiff = expx - expy; 133 int32_t shift = 0; 134 int32_t mask; 135 136 //since idiff is always >= 0 here, it is safe to enter 137 //We always shift by shift+1 here, so in the first iteration, the worst that can happen 138 do 139 { 140 // move the leading bit of x to the 23rd bit 141 sx <<= shift; 142 143 //Keep track that we did that 144 idiff += ~shift; // idiff -= shift + 1, +1 for the shift below 145 146 //The two values both have the 23rd bit set as the leading bit (24 bit unsigned number) 147 //subtract one from the other. This gives us a signed 23 bit number in the range { -0x00ffffff ... 0x00ffffff } 148 sx -= sy; 149 150 //record the sign 151 mask = sx >> 31; 152 153 //shift x left 1 to restore a 24 bit quantity 154 sx += sx; //this is potentially 1 shift too far, which we patch up later 155 156 //if negative, we add back sy to restore to postiveness. This is the same as x - y + 0.5y = - 0.5y 157 // instead of x-y. We've effectively hoisted the subtract that would have appeared in the next loop 158 // iteration here, and saved a test+branch in exchange for a shift and and. (The add happens either way.) 159 sx += sy & mask; 160 161 //figure out how far we need to shift sx to get the leading bit into the 23rd position 162 shift = __builtin_clzl( sx ) - (8*sizeof(long) - 24); 163 } 164 while( idiff >= shift && sx != 0); 165 166 //We may have gone too far 167 if( idiff < 0 ) 168 { 169 //If so, rewind a little. 170 // if sx - sy succeeded, it was the right thing to do, the only thing we did wrong was the shift 171 // if sx - sy yielded a negative number, then we shouldn't have done that either 172 sx += sy & mask; 173 sx >>= 1; 174//debug code to make sure we didn't do something dumb here 175 idiff = 0; 176 } 177 178 //We may have some bits left over to shift in. 179 sx <<= idiff; 180 181//convert back to float 182 if( 0 == sx ) 183 { 184 ux.u &= 0x80000000; 185 return ux.f; 186 } 187 188 //figure out how far we need to shift in order to move leading bit into exponent field 189 shift = __builtin_clzl( sx ) - (8*sizeof(long) - 24); 190 sx <<= shift; // move leading bit into exponent field 191 expy -= shift; // account for the shift in the exponent 192 sx &= 0x007fffff; // remove leading bit 193 sx |= ux.u & 0x80000000; //or in sign 194 if( expy > 0 ) 195 { 196 ux.u = sx | (expy << 23); 197 return ux.f; 198 } 199 200 //denormal 201 expy += 126; 202 ux.u = sx | (expy << 23); 203 return ux.f * 0x1.0p-126f; 204} 205 206//old slower floating point based one. 207/*{ 208 union{ float f; uint32_t u; }ux = {x}; 209 union{ float f; uint32_t u; }uy = {y}; 210 211 float fabsx = __builtin_fabsf(x); 212 float fabsy = __builtin_fabsf(y); 213 if( (ux.u & 0x7fffffffU) - 1U >= 0x7f7fffffU || (uy.u & 0x7fffffffU) - 1U >= 0x7f7fffffU ) 214 { 215 // deal with NaN 216 if( x != x || y != y ) 217 return x + y; 218 219 //x = Inf or y = 0, return Invalid per IEEE-754 220 if( fabsx == __builtin_inff() || 0.0f == y ) 221 { 222 return required_add_float( __builtin_inff(), -__builtin_inff() ); //set invalid 223 } 224 225 //handle trivial case 226 if( fabsy == __builtin_inff() || 0.0f == x ) 227 { 228 required_add_float( fabsx, 0.0f ); //signal underflow (that is, no flag set, but exception occurs if unmasked) if x is denorm 229 return x; 230 } 231 } 232 233 if( fabsy > fabsx ) 234 return x; 235 236 //we have to work 237 int32_t iscx = local_ilogbf(fabsx); 238 int32_t iscy = local_ilogbf(fabsy); 239 int32_t idiff = iscx - iscy; 240 float x1, y1; 241 x1 = fabsx; 242 y1 = fabsy; 243 int32_t i; 244 245 if( idiff ) 246 { 247 y1 = local_scalbnf( y1, -iscy ); 248 x1 = local_scalbnf( x1, -iscx ); 249 for( i = idiff; i != 0; i-- ) 250 { 251 if( x1 >= y1 ) 252 x1 -= y1; 253 x1 += x1; 254 } 255 x1 = local_scalbnf( x1, iscy ); 256 } 257 if( x1 >= fabsy ) 258 x1 -= fabsy; 259 260 if( x < 0 ) 261 x1 = -x1; 262 263 return x1; 264} 265*/