this repo has no description
at fixPythonPipStalling 131 lines 4.8 kB view raw
1// float sqrtf(float) 2// 3// Portable implementation of sqrtf( ) with correct IEEE-754 default rounding. 4// 5// Assumes that integer and float have the same endianness on the target 6// platform. 7// 8// Stephen Canon, July 2010 9 10#include <math.h> 11 12#if defined __SOFTFP__ 13#include <stdint.h> 14#include <limits.h> 15 16typedef float fp_t; 17typedef uint32_t rep_t; 18static const int significandBits = 23; 19#define REP_C UINT32_C 20 21static inline int rep_clz(rep_t a) { 22 return __builtin_clz(a); 23} 24 25static inline rep_t toRep(fp_t x) { 26 const union { rep_t i; fp_t f; } rep = { .f = x }; 27 return rep.i; 28} 29 30static inline fp_t fromRep(rep_t x) { 31 const union { rep_t i; fp_t f; } rep = { .i = x }; 32 return rep.f; 33} 34 35static inline uint32_t mulhi(uint32_t a, uint32_t b) { 36 return (uint64_t)a*b >> 32; 37} 38 39fp_t sqrtf(fp_t x) { 40 41 // Various constants parametrized by the type of x: 42 static const int typeWidth = sizeof(rep_t) * CHAR_BIT; 43 static const int exponentBits = typeWidth - significandBits - 1; 44 static const int exponentBias = (1 << (exponentBits - 1)) - 1; 45 static const rep_t minNormal = REP_C(1) << significandBits; 46 static const rep_t significandMask = minNormal - 1; 47 static const rep_t signBit = REP_C(1) << (typeWidth - 1); 48 static const rep_t absMask = signBit - 1; 49 static const rep_t infRep = absMask ^ significandMask; 50 static const rep_t qnan = infRep | REP_C(1) << (significandBits - 1); 51 52 // Extract the various important bits of x 53 const rep_t xRep = toRep(x); 54 rep_t significand = xRep & significandMask; 55 int exponent = (xRep >> significandBits) - exponentBias; 56 57 // Using an unsigned integer compare, we can detect all of the special 58 // cases with a single branch: zero, denormal, negative, infinity, or NaN. 59 if (xRep - minNormal >= infRep - minNormal) { 60 const rep_t xAbs = xRep & absMask; 61 // sqrt(+/- 0) = +/- 0 62 if (xAbs == 0) return x; 63 // sqrt(NaN) = qNaN 64 if (xAbs > infRep) return fromRep(qnan | xRep); 65 // sqrt(negative) = qNaN 66 if (xRep > signBit) return fromRep(qnan); 67 // sqrt(infinity) = infinity 68 if (xRep == infRep) return x; 69 70 // normalize denormals and fall back into the mainline 71 const int shift = rep_clz(significand) - rep_clz(minNormal); 72 significand <<= shift; 73 exponent += 1 - shift; 74 } 75 76 // Insert the implicit bit of the significand. If x was denormal, then 77 // this bit was already set by the normalization process, but it won't hurt 78 // to set it twice. 79 significand |= minNormal; 80 81 // Halve the exponent to get the exponent of the result, and transform the 82 // significand into a Q30 fixed-point xQ30 in the range [1,4) -- if the 83 // exponent of x is odd, then xQ30 is in [2,4); if it is even, then xQ30 84 // is in [1,2). 85 const int resultExponent = exponent >> 1; 86 uint32_t xQ30 = significand << (7 + (exponent & 1)); 87 88 // Q32 linear approximation to the reciprocal square root of xQ30. This 89 // approximation is good to a bit more than 3.5 bits: 90 // 91 // 1/sqrt(a) ~ 1.1033542890963095 - a/6 92 const uint32_t oneSixthQ34 = UINT32_C(0xaaaaaaaa); 93 uint32_t recipQ32 = UINT32_C(0x1a756d3b) - mulhi(oneSixthQ34, xQ30); 94 95 // Newton-Raphson iterations to improve our reciprocal: 96 const uint32_t threeQ30 = UINT32_C(0xc0000000); 97 uint32_t residualQ30 = mulhi(xQ30, mulhi(recipQ32, recipQ32)); 98 recipQ32 = mulhi(recipQ32, threeQ30 - residualQ30) << 1; 99 residualQ30 = mulhi(xQ30, mulhi(recipQ32, recipQ32)); 100 recipQ32 = mulhi(recipQ32, threeQ30 - residualQ30) << 1; 101 residualQ30 = mulhi(xQ30, mulhi(recipQ32, recipQ32)); 102 recipQ32 = mulhi(recipQ32, threeQ30 - residualQ30) << 1; 103 residualQ30 = mulhi(xQ30, mulhi(recipQ32, recipQ32)); 104 recipQ32 = mulhi(recipQ32, threeQ30 - residualQ30) << 1; 105 106 // recipQ32 now holds an approximate 1/sqrt(x). Multiply by x to get an 107 // initial sqrt(x) in Q23. From the construction of this estimate, we know 108 // that it is either the correctly rounded significand of the result or one 109 // less than the correctly rounded significand (the -2 guarantees that we 110 // fall on the correct side of the actual square root). 111 rep_t result = (mulhi(recipQ32, xQ30) - 2) >> 7; 112 113 // Compute the residual x - result*result to decide if the result needs to 114 // be rounded up. 115 rep_t residual = (xQ30 << 16) - result*result; 116 result += residual > result; 117 118 // Clear the implicit bit of result: 119 result &= significandMask; 120 // Insert the exponent: 121 result |= (rep_t)(resultExponent + exponentBias) << significandBits; 122 return fromRep(result); 123} 124 125#else // __SOFTFP__ 126 127float sqrtf(float x) { 128 return __builtin_sqrtf(x); 129} 130 131#endif // __SOFTFP__