src/libm/Source/Intel/fmaf.c at fixPythonPipStalling

overby.me / darling-nix
fork atom
this repo has no description
fork atom
darling-nix / src / libm / Source / Intel / fmaf.c
at fixPythonPipStalling 124 lines 5.4 kB view raw
wrap content
Lubos Dolezel Restructured source tree to prepare for merge with the "darling" repo 10y ago
f228ae16
  1// float fmaf(float a, float b, float c)
  2//
  3// The fmaf function returns a*b + c computed without intermediate rounding, and
  4// sets the flags appropriately.  IEEE-754 leaves undefined whether or not an
  5// implementation should set the invalid flag for fmaf(0,inf,NaN).  We choose
  6// not to do so in this implementation.
  7//
  8// This implementation is intended to be fully IEEE-754 compliant.  It is
  9// designed to produce the correctly rounded result in all rounding modes,
 10// and sets all flags appropriately.  It is intended to be relatively easily
 11// ported across architectures and compilers; the one issue to be aware of
 12// is the use of a union for type-punning double and uint64_t.  Some compilers
 13// will produce incorrect codegen for this due to overambitious type-based
 14// aliasing optimizations.  Make sure to check the generated code with your
 15// compiler.
 16//
 17// Stephen Canon, January 2010
 18
 19#include <stdint.h>
 20#include "math.h"
 21
 22float fmaf(float a, float b, float c) {
 23    
 24    // Detect if c is NaN to prevent fmaf(0, inf, NaN) from setting invalid.
 25    // If a or b is NaN, falling through the default path generates the correct
 26    // qNaN result.
 27    
 28    if (isnan(c)) return c + c;
 29    
 30    // Naïve computation of a*b + c in double.  Rounding this back to single
 31    // gives the correct result in all but one-in-a-billion cases.
 32    
 33    double product = (double)a*b;
 34    double resultRoundedToDouble = product + c;
 35    
 36    // Interlude -- possible incorrect double-rounding scenarios when an
 37    // infinitely precise result is rounded first to double, then to single:
 38    //
 39    // In round to nearest:
 40    // --------------------
 41    //                        bits 0 - 23   bits 24 - 52   bits 53 -
 42    //                result     .....0      1000...00        ...1...
 43    //        (double)result     .....0      1000...00
 44    // (float)(double)result     .....0
 45    //         (float)result     .....1
 46    //
 47    //                        bits 0 - 23   bits 24 - 52   bits 53 -
 48    //                result     .....1      0111...11        1...1..
 49    //        (double)result     .....1      1000...00
 50    // (float)(double)result     .....0
 51    //         (float)result     .....1
 52    //
 53    // In the directed rounding modes, double rounding does not occur.
 54    //
 55    // In order to protect against double rounding, we check if
 56    // resultRoundedToDouble is an exact halfway value for rounding to single.
 57    // If it isn't, then rounding a second time (to single) gives the correct
 58    // result.
 59    
 60    union {double d; uint64_t x;} bitPattern = {.d = resultRoundedToDouble};
 61    uint32_t roundingBits = (uint32_t)bitPattern.x & UINT32_C(0x1fffffff);
 62    
 63    if (roundingBits != UINT32_C(0x10000000))
 64        return (float)resultRoundedToDouble;
 65    
 66    // Here we know that we were in an exact halfway case, so we need to find
 67    // the low part of the sum and massage its information into the low part
 68    // of the double precision result.
 69    //
 70    // We calculate the low part of the sum as follows: let "big" be the larger
 71    // (in magnitude) of "product" and "c", and let "small" be the smaller.
 72    // We already know resultRoundedToDouble, big, and small in the following
 73    // equation:
 74    //
 75    //     resultRoundedToDouble + lowPart = big + small
 76    //     
 77    // so we can compute "lowPart" as:
 78    //
 79    //      lowPart = (big - resultRoundedToDouble) + small
 80    //
 81    // This cannot generate spurious NaN/invalid because if resultRoundedToDouble
 82    // were inf, we wouldn't have matched as a halfway case, and wouldn't be on
 83    // this codepath.
 84    //
 85    // Note also that "lowPart" will not be exact in some circumstances in
 86    // directed rounding modes.  This is OK, because we are only going to use
 87    // it to twiddle the low bit of an exact-halfway case for round-to-nearest;
 88    // if we are in a directed rounding mode, this does not effect the final
 89    // result.  It may be an inexact operation, but the inexact flag is already
 90    // raised at this point if that is the case.
 91    
 92    double lowPart;
 93    if (__builtin_fabs(product) >= __builtin_fabs(c))
 94        lowPart = (product - resultRoundedToDouble) + c;
 95    else
 96        lowPart = (c - resultRoundedToDouble) + product;
 97    
 98    // If the lowPart is non-zero, and has the same sign as the result,
 99    // add one to the bit representation of the double-precision result
100    // to act as sticky. This cannot spuriously raise inexact, because
101    // inexact is already (correctly) raised if lowPart is non-zero.  The
102    // product cannot over/underflow because the inputs originally came from
103    // single precision, so the exponents are well controlled.
104    
105    if (lowPart*resultRoundedToDouble > 0.0) {
106        bitPattern.x += 1;
107        resultRoundedToDouble = bitPattern.d;
108    }
109    
110    // If instead the signs are opposite, subtract one.
111    
112    if (lowPart*resultRoundedToDouble < 0.0) {
113        bitPattern.x -= 1;
114        resultRoundedToDouble = bitPattern.d;
115    }
116    
117    // If the low part is exactly zero, then this really is an exact halfway
118    // case for rounding to single, and no action is necessary.
119    
120    // Now that we have massaged in a sticky bit for the low part, we can round
121    // to single-precision and get the correct final result.
122    
123    return (float)resultRoundedToDouble;
124}