src/libm/Source/ARM/sqrtf.c at fixPythonPipStalling

overby.me / darling-nix
fork atom
this repo has no description
fork atom
darling-nix / src / libm / Source / ARM / sqrtf.c
at fixPythonPipStalling 131 lines 4.8 kB view raw
wrap content
Lubos Dolezel Restructured source tree to prepare for merge with the "darling" repo 10y ago
f228ae16
  1// float sqrtf(float)
  2//
  3// Portable implementation of sqrtf( ) with correct IEEE-754 default rounding.
  4//
  5// Assumes that integer and float have the same endianness on the target
  6// platform.
  7//
  8// Stephen Canon, July 2010
  9
 10#include <math.h>
 11
 12#if defined __SOFTFP__
 13#include <stdint.h>
 14#include <limits.h>
 15
 16typedef float fp_t;
 17typedef uint32_t rep_t;
 18static const int significandBits = 23;
 19#define REP_C UINT32_C
 20
 21static inline int rep_clz(rep_t a) {
 22    return __builtin_clz(a);
 23}
 24
 25static inline rep_t toRep(fp_t x) {
 26    const union { rep_t i; fp_t f; } rep = { .f = x };
 27    return rep.i;
 28}
 29
 30static inline fp_t fromRep(rep_t x) {
 31    const union { rep_t i; fp_t f; } rep = { .i = x };
 32    return rep.f;
 33}
 34
 35static inline uint32_t mulhi(uint32_t a, uint32_t b) {
 36    return (uint64_t)a*b >> 32;
 37}
 38
 39fp_t sqrtf(fp_t x) {
 40    
 41    // Various constants parametrized by the type of x:
 42    static const int typeWidth = sizeof(rep_t) * CHAR_BIT;
 43    static const int exponentBits = typeWidth - significandBits - 1;
 44    static const int exponentBias = (1 << (exponentBits - 1)) - 1;
 45    static const rep_t minNormal = REP_C(1) << significandBits;
 46    static const rep_t significandMask = minNormal - 1;
 47    static const rep_t signBit = REP_C(1) << (typeWidth - 1);
 48    static const rep_t absMask = signBit - 1;
 49    static const rep_t infRep = absMask ^ significandMask;
 50    static const rep_t qnan = infRep | REP_C(1) << (significandBits - 1);
 51    
 52    // Extract the various important bits of x
 53    const rep_t xRep = toRep(x);
 54    rep_t significand = xRep & significandMask;
 55    int exponent = (xRep >> significandBits) - exponentBias;
 56    
 57    // Using an unsigned integer compare, we can detect all of the special
 58    // cases with a single branch: zero, denormal, negative, infinity, or NaN.
 59    if (xRep - minNormal >= infRep - minNormal) {
 60        const rep_t xAbs = xRep & absMask;
 61        // sqrt(+/- 0) = +/- 0
 62        if (xAbs == 0) return x;
 63        // sqrt(NaN) = qNaN
 64        if (xAbs > infRep) return fromRep(qnan | xRep);
 65        // sqrt(negative) = qNaN
 66        if (xRep > signBit) return fromRep(qnan);
 67        // sqrt(infinity) = infinity
 68        if (xRep == infRep) return x;
 69        
 70        // normalize denormals and fall back into the mainline
 71        const int shift = rep_clz(significand) - rep_clz(minNormal);
 72        significand <<= shift;
 73        exponent += 1 - shift;
 74    }
 75    
 76    // Insert the implicit bit of the significand.  If x was denormal, then
 77    // this bit was already set by the normalization process, but it won't hurt
 78    // to set it twice.
 79    significand |= minNormal;
 80    
 81    // Halve the exponent to get the exponent of the result, and transform the
 82    // significand into a Q30 fixed-point xQ30 in the range [1,4) -- if the
 83    // exponent of x is odd, then xQ30 is in [2,4); if it is even, then xQ30
 84    // is in [1,2).
 85    const int resultExponent = exponent >> 1;
 86    uint32_t xQ30 = significand << (7 + (exponent & 1));
 87    
 88    // Q32 linear approximation to the reciprocal square root of xQ30.  This
 89    // approximation is good to a bit more than 3.5 bits:
 90    //
 91    //     1/sqrt(a) ~ 1.1033542890963095 - a/6
 92    const uint32_t oneSixthQ34 = UINT32_C(0xaaaaaaaa);
 93    uint32_t recipQ32 = UINT32_C(0x1a756d3b) - mulhi(oneSixthQ34, xQ30);
 94    
 95    // Newton-Raphson iterations to improve our reciprocal:
 96    const uint32_t threeQ30 = UINT32_C(0xc0000000);
 97    uint32_t residualQ30 = mulhi(xQ30, mulhi(recipQ32, recipQ32));
 98    recipQ32 = mulhi(recipQ32, threeQ30 - residualQ30) << 1;
 99    residualQ30 = mulhi(xQ30, mulhi(recipQ32, recipQ32));
100    recipQ32 = mulhi(recipQ32, threeQ30 - residualQ30) << 1;
101    residualQ30 = mulhi(xQ30, mulhi(recipQ32, recipQ32));
102    recipQ32 = mulhi(recipQ32, threeQ30 - residualQ30) << 1;
103    residualQ30 = mulhi(xQ30, mulhi(recipQ32, recipQ32));
104    recipQ32 = mulhi(recipQ32, threeQ30 - residualQ30) << 1;
105    
106    // recipQ32 now holds an approximate 1/sqrt(x).  Multiply by x to get an
107    // initial sqrt(x) in Q23.  From the construction of this estimate, we know
108    // that it is either the correctly rounded significand of the result or one
109    // less than the correctly rounded significand (the -2 guarantees that we
110    // fall on the correct side of the actual square root).
111    rep_t result = (mulhi(recipQ32, xQ30) - 2) >> 7;
112    
113    // Compute the residual x - result*result to decide if the result needs to
114    // be rounded up.
115    rep_t residual = (xQ30 << 16) - result*result;
116    result += residual > result;
117    
118    // Clear the implicit bit of result:
119    result &= significandMask;
120    // Insert the exponent:
121    result |= (rep_t)(resultExponent + exponentBias) << significandBits;
122    return fromRep(result);    
123}
124
125#else // __SOFTFP__
126
127float sqrtf(float x) {
128	return __builtin_sqrtf(x);
129}
130
131#endif // __SOFTFP__