this repo has no description
1#include "math.h"
2
3#if !defined(__VFP_FP__) || defined(__SOFTFP__)
4
5#warning VERY SKETCHY FMA
6double fma(double x, double y, double z) {
7 return x*y + z;
8}
9
10#else
11
12/*-
13 * Copyright (c) 2005 David Schultz <das@FreeBSD.ORG>
14 * All rights reserved.
15 *
16 * Redistribution and use in source and binary forms, with or without
17 * modification, are permitted provided that the following conditions
18 * are met:
19 * 1. Redistributions of source code must retain the above copyright
20 * notice, this list of conditions and the following disclaimer.
21 * 2. Redistributions in binary form must reproduce the above copyright
22 * notice, this list of conditions and the following disclaimer in the
23 * documentation and/or other materials provided with the distribution.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 * SUCH DAMAGE.
36 */
37
38#define __ISO_C_VISIBLE 1999
39#include <sys/cdefs.h>
40
41#include <fenv.h>
42#include <float.h>
43
44/*
45 * Fused multiply-add: Compute x * y + z with a single rounding error.
46 *
47 * We use scaling to avoid overflow/underflow, along with the
48 * canonical precision-doubling technique adapted from:
49 *
50 * Dekker, T. A Floating-Point Technique for Extending the
51 * Available Precision. Numer. Math. 18, 224-242 (1971).
52 *
53 * This algorithm is sensitive to the rounding precision. FPUs such
54 * as the i387 must be set in double-precision mode if variables are
55 * to be stored in FP registers in order to avoid incorrect results.
56 * This is the default on FreeBSD, but not on many other systems.
57 *
58 * Hardware instructions should be used on architectures that support it,
59 * since this implementation will likely be several times slower.
60 */
61
62double
63fma(double x, double y, double z)
64{
65 static const double split = 0x1p27 + 1.0;
66 double xs, ys, zs;
67 double c, cc, hx, hy, p, q, tx, ty;
68 double r, rr, s;
69 int oround;
70 int ex, ey, ez;
71 int spread;
72
73 if (z == 0.0)
74 return (x * y);
75 if (x == 0.0 || y == 0.0)
76 return (x * y + z);
77
78 /* Results of frexp() are undefined for these cases. */
79 if (!isfinite(x) || !isfinite(y) || !isfinite(z))
80 return (x * y + z);
81
82 xs = frexp(x, &ex);
83 ys = frexp(y, &ey);
84 zs = frexp(z, &ez);
85 oround = fegetround();
86 spread = ex + ey - ez;
87
88 /*
89 * If x * y and z are many orders of magnitude apart, the scaling
90 * will overflow, so we handle these cases specially. Rounding
91 * modes other than FE_TONEAREST are painful.
92 */
93 if (spread > DBL_MANT_DIG * 2) {
94 fenv_t env;
95 feraiseexcept(FE_INEXACT);
96 switch(oround) {
97 case FE_TONEAREST:
98 return (x * y);
99 case FE_TOWARDZERO:
100 if (x > 0.0 ^ y < 0.0 ^ z < 0.0)
101 return (x * y);
102 feholdexcept(&env);
103 r = x * y;
104 if (!fetestexcept(FE_INEXACT))
105 r = nextafter(r, 0);
106 feupdateenv(&env);
107 return (r);
108 case FE_DOWNWARD:
109 if (z > 0.0)
110 return (x * y);
111 feholdexcept(&env);
112 r = x * y;
113 if (!fetestexcept(FE_INEXACT))
114 r = nextafter(r, -INFINITY);
115 feupdateenv(&env);
116 return (r);
117 default: /* FE_UPWARD */
118 if (z < 0.0)
119 return (x * y);
120 feholdexcept(&env);
121 r = x * y;
122 if (!fetestexcept(FE_INEXACT))
123 r = nextafter(r, INFINITY);
124 feupdateenv(&env);
125 return (r);
126 }
127 }
128 if (spread < -DBL_MANT_DIG) {
129 feraiseexcept(FE_INEXACT);
130 if (!isnormal(z))
131 feraiseexcept(FE_UNDERFLOW);
132 switch (oround) {
133 case FE_TONEAREST:
134 return (z);
135 case FE_TOWARDZERO:
136 if (x > 0.0 ^ y < 0.0 ^ z < 0.0)
137 return (z);
138 else
139 return (nextafter(z, 0));
140 case FE_DOWNWARD:
141 if (x > 0.0 ^ y < 0.0)
142 return (z);
143 else
144 return (nextafter(z, -INFINITY));
145 default: /* FE_UPWARD */
146 if (x > 0.0 ^ y < 0.0)
147 return (nextafter(z, INFINITY));
148 else
149 return (z);
150 }
151 }
152
153 /*
154 * Use Dekker's algorithm to perform the multiplication and
155 * subsequent addition in twice the machine precision.
156 * Arrange so that x * y = c + cc, and x * y + z = r + rr.
157 */
158 fesetround(FE_TONEAREST);
159
160 p = xs * split;
161 hx = xs - p;
162 hx += p;
163 tx = xs - hx;
164
165 p = ys * split;
166 hy = ys - p;
167 hy += p;
168 ty = ys - hy;
169
170 p = hx * hy;
171 q = hx * ty + tx * hy;
172 c = p + q;
173 cc = p - c + q + tx * ty;
174
175 zs = ldexp(zs, -spread);
176 r = c + zs;
177 s = r - c;
178 rr = (c - (r - s)) + (zs - s) + cc;
179
180 spread = ex + ey;
181 if (spread + ilogb(r) > -1023) {
182 fesetround(oround);
183 r = r + rr;
184 } else {
185 /*
186 * The result is subnormal, so we round before scaling to
187 * avoid double rounding.
188 */
189 p = ldexp(copysign(0x1p-1022, r), -spread);
190 c = r + p;
191 s = c - r;
192 cc = (r - (c - s)) + (p - s) + rr;
193 fesetround(oround);
194 r = (c + cc) - p;
195 }
196 return (ldexp(r, spread));
197}
198
199#endif