this repo has no description
at fixPythonPipStalling 199 lines 5.3 kB view raw
1#include "math.h" 2 3#if !defined(__VFP_FP__) || defined(__SOFTFP__) 4 5#warning VERY SKETCHY FMA 6double fma(double x, double y, double z) { 7 return x*y + z; 8} 9 10#else 11 12/*- 13 * Copyright (c) 2005 David Schultz <das@FreeBSD.ORG> 14 * All rights reserved. 15 * 16 * Redistribution and use in source and binary forms, with or without 17 * modification, are permitted provided that the following conditions 18 * are met: 19 * 1. Redistributions of source code must retain the above copyright 20 * notice, this list of conditions and the following disclaimer. 21 * 2. Redistributions in binary form must reproduce the above copyright 22 * notice, this list of conditions and the following disclaimer in the 23 * documentation and/or other materials provided with the distribution. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 28 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 */ 37 38#define __ISO_C_VISIBLE 1999 39#include <sys/cdefs.h> 40 41#include <fenv.h> 42#include <float.h> 43 44/* 45 * Fused multiply-add: Compute x * y + z with a single rounding error. 46 * 47 * We use scaling to avoid overflow/underflow, along with the 48 * canonical precision-doubling technique adapted from: 49 * 50 * Dekker, T. A Floating-Point Technique for Extending the 51 * Available Precision. Numer. Math. 18, 224-242 (1971). 52 * 53 * This algorithm is sensitive to the rounding precision. FPUs such 54 * as the i387 must be set in double-precision mode if variables are 55 * to be stored in FP registers in order to avoid incorrect results. 56 * This is the default on FreeBSD, but not on many other systems. 57 * 58 * Hardware instructions should be used on architectures that support it, 59 * since this implementation will likely be several times slower. 60 */ 61 62double 63fma(double x, double y, double z) 64{ 65 static const double split = 0x1p27 + 1.0; 66 double xs, ys, zs; 67 double c, cc, hx, hy, p, q, tx, ty; 68 double r, rr, s; 69 int oround; 70 int ex, ey, ez; 71 int spread; 72 73 if (z == 0.0) 74 return (x * y); 75 if (x == 0.0 || y == 0.0) 76 return (x * y + z); 77 78 /* Results of frexp() are undefined for these cases. */ 79 if (!isfinite(x) || !isfinite(y) || !isfinite(z)) 80 return (x * y + z); 81 82 xs = frexp(x, &ex); 83 ys = frexp(y, &ey); 84 zs = frexp(z, &ez); 85 oround = fegetround(); 86 spread = ex + ey - ez; 87 88 /* 89 * If x * y and z are many orders of magnitude apart, the scaling 90 * will overflow, so we handle these cases specially. Rounding 91 * modes other than FE_TONEAREST are painful. 92 */ 93 if (spread > DBL_MANT_DIG * 2) { 94 fenv_t env; 95 feraiseexcept(FE_INEXACT); 96 switch(oround) { 97 case FE_TONEAREST: 98 return (x * y); 99 case FE_TOWARDZERO: 100 if (x > 0.0 ^ y < 0.0 ^ z < 0.0) 101 return (x * y); 102 feholdexcept(&env); 103 r = x * y; 104 if (!fetestexcept(FE_INEXACT)) 105 r = nextafter(r, 0); 106 feupdateenv(&env); 107 return (r); 108 case FE_DOWNWARD: 109 if (z > 0.0) 110 return (x * y); 111 feholdexcept(&env); 112 r = x * y; 113 if (!fetestexcept(FE_INEXACT)) 114 r = nextafter(r, -INFINITY); 115 feupdateenv(&env); 116 return (r); 117 default: /* FE_UPWARD */ 118 if (z < 0.0) 119 return (x * y); 120 feholdexcept(&env); 121 r = x * y; 122 if (!fetestexcept(FE_INEXACT)) 123 r = nextafter(r, INFINITY); 124 feupdateenv(&env); 125 return (r); 126 } 127 } 128 if (spread < -DBL_MANT_DIG) { 129 feraiseexcept(FE_INEXACT); 130 if (!isnormal(z)) 131 feraiseexcept(FE_UNDERFLOW); 132 switch (oround) { 133 case FE_TONEAREST: 134 return (z); 135 case FE_TOWARDZERO: 136 if (x > 0.0 ^ y < 0.0 ^ z < 0.0) 137 return (z); 138 else 139 return (nextafter(z, 0)); 140 case FE_DOWNWARD: 141 if (x > 0.0 ^ y < 0.0) 142 return (z); 143 else 144 return (nextafter(z, -INFINITY)); 145 default: /* FE_UPWARD */ 146 if (x > 0.0 ^ y < 0.0) 147 return (nextafter(z, INFINITY)); 148 else 149 return (z); 150 } 151 } 152 153 /* 154 * Use Dekker's algorithm to perform the multiplication and 155 * subsequent addition in twice the machine precision. 156 * Arrange so that x * y = c + cc, and x * y + z = r + rr. 157 */ 158 fesetround(FE_TONEAREST); 159 160 p = xs * split; 161 hx = xs - p; 162 hx += p; 163 tx = xs - hx; 164 165 p = ys * split; 166 hy = ys - p; 167 hy += p; 168 ty = ys - hy; 169 170 p = hx * hy; 171 q = hx * ty + tx * hy; 172 c = p + q; 173 cc = p - c + q + tx * ty; 174 175 zs = ldexp(zs, -spread); 176 r = c + zs; 177 s = r - c; 178 rr = (c - (r - s)) + (zs - s) + cc; 179 180 spread = ex + ey; 181 if (spread + ilogb(r) > -1023) { 182 fesetround(oround); 183 r = r + rr; 184 } else { 185 /* 186 * The result is subnormal, so we round before scaling to 187 * avoid double rounding. 188 */ 189 p = ldexp(copysign(0x1p-1022, r), -spread); 190 c = r + p; 191 s = c - r; 192 cc = (r - (c - s)) + (p - s) + rr; 193 fesetround(oround); 194 r = (c + cc) - p; 195 } 196 return (ldexp(r, spread)); 197} 198 199#endif