this repo has no description
1/*
2 * xmm_fma.c
3 * xmmLibm
4 *
5 * Created by Ian Ollmann on 8/8/05.
6 * Copyright 2005 Apple Computer Inc. All rights reserved.
7 *
8 */
9
10
11#include "xmmLibm_prefix.h"
12
13#include <math.h>
14
15//For any rounding mode, we can calculate A + B exactly as a head to tail result as:
16//
17// Rhi = A + B (A has a larger magnitude than B)
18// Rlo = B - (Rhi - A);
19//
20// Rhi is rounded to the current rounding mode
21// Rlo represents the next 53+ bits of precision
22
23//returns carry bits that don't fit into A
24static inline long double extended_accum( long double *A, long double B ) ALWAYS_INLINE;
25static inline long double extended_accum( long double *A, long double B )
26{
27 // assume B >~ *A, B needs to be larger or in the same binade.
28 long double r = *A + B;
29 long double carry = *A - ( r - B );
30 *A = r;
31 return carry;
32}
33
34// Set the LSB of a long double encoding. Turns +/- inf to nan, and +/- 0 to Dmin.
35static inline long double jam_stickyl(long double x) ALWAYS_INLINE;
36static inline long double jam_stickyl(long double x) {
37 union{
38 long double ld;
39 xUInt64 xu;
40 struct{ uint64_t mantissa; int16_t sexp; } parts;
41 } ux = {x}, uy, mask = {0.};
42 mask.parts.mantissa = 0x1ULL;
43 uy.xu = _mm_or_si128(ux.xu, mask.xu);
44 return uy.ld;
45}
46
47
48// Compute next down unless value is already odd.
49// This is useful for reflecting the contribution of "negative sticky".
50static inline long double jam_nstickyl(long double x) ALWAYS_INLINE;
51static inline long double jam_nstickyl(long double x) {
52 union{
53 long double ld;
54 xUInt64 xu;
55 struct{ uint64_t mantissa; int16_t sexp; } parts;
56 } ux = {x}, uy, mask = {0.}, exp_borrow = {0x1p-16382L};
57 // exp_borrow.parts = {0x8000000000000000ULL, 0x0001}
58 mask.parts.mantissa = 0x1ULL;
59 const xUInt64 is_odd = _mm_and_si128(ux.xu, mask.xu);
60 xUInt64 new_sig = _mm_sub_epi64(ux.xu, _mm_xor_si128(is_odd, mask.xu));
61
62 //Deal with exact powers of 2.
63 if(0x1 & _mm_movemask_pd((xDouble)new_sig)) {
64 /* If the fraction was nonzero, i.e., the significand was 0x8000000000000000,
65 (recall that 80-bit long double has an explicit integer bit)
66 we could safely subtract one without needing to borrow from the exponent.
67 */
68 ;
69 } else {
70 /* If we are here, we subtracted one from an all zero fraction
71 This means we needed to borrow one from the exponent.
72 We also need to restore the explicit integer bit in the 80-bit format.
73 */
74 new_sig = _mm_sub_epi64(new_sig, exp_borrow.xu);
75 }
76 uy.xu = new_sig;
77
78 return uy.ld;
79}
80
81double fma( double a, double b, double c )
82{
83 union{ double ld; uint64_t u;}ua = {a};
84 union{ double ld; uint64_t u;}ub = {b};
85 union{ double ld; uint64_t u;}uc = {c};
86
87 int32_t sign = (ua.u ^ ub.u ^ uc.u)>>63; // 0 for true addition, 1 for true subtraction
88 int32_t aexp = (ua.u>>52) & 0x7ff;
89 int32_t bexp = (ub.u>>52) & 0x7ff;
90 int32_t cexp = (uc.u>>52) & 0x7ff;
91 int32_t pexp = aexp + bexp - 0x3ff;
92
93 if((aexp == 0x7ff)||(bexp == 0x7ff)||(cexp == 0x7ff)) {
94 //deal with NaN
95 if( a != a ) return a;
96 if( b != b ) return b;
97 if( c != c ) return c;
98 //Have at least one inf and no NaNs at this point.
99 //Need to deal with invalid cases for INF - INF.
100 //Also, should avoid inadvertant overflow for big*big - INF.
101 if(cexp < 0x7ff) { // If c is finite, then the product is the result.
102 return a*b; // One of a or b is INF, so this will produce correct exceptions exactly
103 }
104 // c is infinite.
105 if((aexp < 0x7ff) && (0 < aexp)) {
106 ua.u = (ua.u & 0x8000000000000000ULL) | 0x4000000000000000ULL; // copysign(2.0,a)
107 }
108 if((bexp < 0x7ff) && (0 < bexp)) {
109 ub.u = (ub.u & 0x8000000000000000ULL) | 0x4000000000000000ULL; // copysign(2.0,b)
110 }
111 return (ua.ld*ub.ld)+c;
112 }
113 // Having ruled out INF and NaN, do a cheap test to see if any input might be zero
114 if((aexp == 0x000)||(bexp == 0x000)||(cexp == 0x000)) {
115 if(a==0.0 || b==0.0) {
116 // If at least one input is +-0 we can use native arithmetic to compute a*b+c.
117 // If a or b is zero, then we have a signed zero product that we add to c.
118 // Not that in this case, c may also be zero, but the addition will produce the correctly signed zero.
119 return ((a*b)+c);
120 }
121 if(c==0.0) {
122 // If c is zero and the infinitely precise product is non-zero,
123 // we do not add c.
124 // The floating point multiply with produce the correct result including overflow and underflow.
125 // Note that if we tried to do (a*b)+c for this case relying on c being zero
126 // we would get the wrong sign for some underflow cases, e.g., tiny * -tiny + 0.0 should be -0.0.
127 return (a*b);
128 }
129 }
130
131 static const xUInt64 mask26 = { 0xFFFFFFFFFC000000ULL, 0 };
132 double ahi, bhi;
133
134 //break a, b and c into high and low components
135 //The high components have 26 bits of precision
136 //The low components have 27 bits of precision
137 xDouble xa = DOUBLE_2_XDOUBLE(a);
138 xDouble xb = DOUBLE_2_XDOUBLE(b);
139
140 xa = _mm_and_pd( xa, (xDouble) mask26 );
141 xb = _mm_and_pd( xb, (xDouble) mask26 );
142
143 ahi = XDOUBLE_2_DOUBLE( xa );
144 bhi = XDOUBLE_2_DOUBLE( xb );
145
146 //double precision doesn't have enough precision for the next part.
147 //so we abandond it and go to extended.
148 ///
149 //The problem is that the intermediate multiplication product needs to be infinitely
150 //precise. While we can fix the mantissa part of the infinite precision problem,
151 //we can't deal with the case where the product is outside the range of the
152 //representable double precision values. Extended precision allows us to solve
153 //that problem, since all double values and squares of double values are normalized
154 //numbers in extended precision
155 long double Ahi = ahi;
156 long double Bhi = bhi;
157 long double Alo = (long double) a - Ahi;
158 long double Blo = (long double) b - Bhi;
159 long double C = c;
160
161 //The result of A * B is now exactly:
162 //
163 // B1 + Ahi*Bhi + Alo*Bhi + Ahi*Blo + Alo*Blo
164 // all of these intermediate terms have either 52 or 53 bits of precision and are exact
165 long double AhiBhi = Ahi * Bhi; //52 bits
166 long double AloBhi = Alo * Bhi; //53 bits
167 long double AhiBlo = Ahi * Blo; //53 bits
168 long double AloBlo = Alo * Blo; //54 bits
169
170 //accumulate the results into two head/tail buffers. This is infinitely precise.
171 //no effort is taken to make sure that a0 and a1 are actually head to tail
172 long double a0, a1;
173 a0 = AloBlo;
174 a1 = extended_accum( &a0, (AhiBlo + AloBhi) );
175 a1 += extended_accum( &a0, AhiBhi );
176
177 //Add C. If C has greater magnitude than a0, we need to swap them
178 if( __builtin_fabsl( C ) > __builtin_fabsl( a0 ) )
179 {
180 long double temp = C;
181 C = a0;
182 a0 = temp;
183 }
184
185 if(cexp + 120 < pexp) {
186 // Check to see if c < ulp(a*b).
187 // If it is then c only contributes as a sticky bit.
188
189 /* Since we know that the product A*B fits in 106 bits
190 and a0+a1 can accomodate 128 bits,
191 we just need to add something of the correct sign and
192 less than ULP(A*B) but not too small.
193 We construct this by multiplying a0 by +/- 2^-120.
194 */
195 long double ulp_prod_est = a0 * (sign? -0x1p-120L: 0x1p-120L);
196
197 a1 += ulp_prod_est;
198 } else {
199
200 //this will probably overflow, but we have 128 bits of precision here, which should mean we are covered.
201 long double a2 = C;
202 if(__builtin_fabsl(C) < __builtin_fabsl(a0)) {
203 a2 = a0;
204 a0 = C;
205 }
206 a1 += extended_accum( &a0, a2 );
207 }
208
209 //push overflow in a1 back into a0. This should give us the correctly rounded result
210 a0 = extended_accum( &a1, a0 );
211 if(a0 > 0.) a1 = (a1>0.)?jam_stickyl(a1):jam_nstickyl(a1);
212 if(a0 < 0.) a1 = (a1<0.)?jam_stickyl(a1):jam_nstickyl(a1);
213 return a1;
214}
215
216long double fmal( long double a, long double b, long double c )
217{
218 /*****************
219 Edge cases, from Ian's code.
220 *****************/
221
222 union{ long double ld; struct{ uint64_t mantissa; int16_t sexp; }parts; }ua = {a};
223 union{ long double ld; struct{ uint64_t mantissa; int16_t sexp; }parts; }ub = {b};
224
225 int16_t sign = (ua.parts.sexp ^ ub.parts.sexp) & 0x8000;
226 int32_t aexp = (ua.parts.sexp & 0x7fff);
227 int32_t bexp = (ub.parts.sexp & 0x7fff);
228 int32_t exp = aexp + bexp - 16383;
229
230 //deal with NaN
231 if( a != a ) return a;
232 if( b != b ) return b;
233 if( c != c ) return c;
234
235 // a = � | b = �
236 if ((aexp == 0x7fff) || (bexp == 0x7fff)) { // We've already dealt with NaN, so this is only true
237 // if one of a and b is an inf.
238 if (( b == 0.0L ) || ( a == 0.0L)) return a*b; // Return NaN if a = ��, b = 0, c � NaN (or a = 0, b = �)
239 if ( __builtin_fabsl(c) == __builtin_infl() ) {
240 if ( sign & 0x8000 ) {
241 if ( c > 0 )
242 return c - __builtin_infl(); // Return NaN if a = ��, c = -a
243 else
244 return c; // Return �inf if a = c = ��.
245 } else {
246 if ( c < 0 )
247 return c + __builtin_infl(); // Return NaN if a = ��, c = -a
248 else
249 return c; // Return �inf if a = c = ��.
250 }
251 if ( sign & 0x8000 ) return -__builtin_infl();
252 else return __builtin_infl();
253 }
254 }
255
256 // c = �
257 if ( __builtin_fabsl(c) == __builtin_inf() ) return c; // a,b at this point are finite, c is infinite.
258
259
260 if( exp < -16382 - 63 ) //sub denormal
261 return c;
262
263 /*****************
264 Computation of a*b + c. scanon, Jan 2007
265
266 The whole game we're playing here only works in round-to-nearest.
267 *****************/
268
269 long double ahi, alo, bhi, blo, phi, plo, xhi, xlo, yhi, ylo, tmp;
270
271 // split a,b into high and low parts.
272 static const uint64_t split32_mask = 0xffffffff00000000ULL;
273 ua.parts.mantissa = ua.parts.mantissa & split32_mask;
274 ahi = ua.ld; alo = a - ahi;
275 ub.parts.mantissa = ub.parts.mantissa & split32_mask;
276 bhi = ub.ld; blo = b - bhi;
277
278 // compute the product of a and b as a sum phi + plo. This is exact.
279 phi = a * b;
280
281 // In case of overflow, stop here and return phi. This will need to be changed
282 // in order to have a fully C99 fmal.
283 if (__builtin_fabsl(phi) == __builtin_infl()) {
284 return phi; // We know that c != inf or nan, so phi is the correct result.
285 }
286
287 plo = (((ahi * bhi - phi) + ahi*blo) + alo*bhi) + alo*blo;
288
289 // compute plo + c = xhi + xlo where (xhi,xlo) is head-tail.
290 xhi = plo + c;
291 if (__builtin_fabsl(plo) > __builtin_fabsl(c)) {
292 tmp = xhi - plo;
293 xlo = c - tmp;
294 } else {
295 tmp = xhi - c;
296 xlo = plo - tmp;
297 }
298
299 // Special case: xlo == 0, hence return phi + xhi:
300 if (xlo == 0.0L) return phi + xhi;
301
302 // At this point we know that the meaningful bits of phi and xhi are entirely to the
303 // left (larger) side of the meaningful bits of xlo, and that our result is
304 // round(phi + xhi + xlo).
305 yhi = phi + xhi;
306 if (__builtin_fabsl(phi) > __builtin_fabsl(xhi)) {
307 tmp = yhi - phi;
308 ylo = xhi - tmp;
309 } else {
310 tmp = yhi - xhi;
311 ylo = phi - tmp;
312 }
313
314 // Handle the special case that one of yhi or ylo is zero.
315 // If yhi == 0, then ylo is also zero, so yhi + xlo = xlo is the appropriate result.
316 // If ylo == 0, then yhi + xlo is the appropriate result.
317 if ((yhi == 0.0L) || (ylo == 0.0L)) return yhi + xlo;
318
319 // Now we have that (in terms of meaningful bits)
320 // yhi is strictly bigger than ylo is strictly bigger than xlo.
321 // Additionally, our desired result is round(yhi + ylo + xlo).
322
323 // The only way for xlo to affect rounding (in round-to-nearest) is for ylo to
324 // be exactly half an ulp of yhi. Test for the value of the mantissa of ylo;
325 // this is not the same condition, but getting this wrong can't affect the rounding.
326 union{ long double ld; struct{ uint64_t mantissa; int16_t sexp; }parts; } uylo = {ylo};
327 if (uylo.parts.mantissa == 0x8000000000000000ULL) {
328 return yhi + (ylo + copysignl(0.5L*ylo, xlo));
329 }
330 // In the other case, xlo has no affect on the final result, so just return yhi + ylo
331 else {
332 return yhi + ylo;
333 }
334
335 /*
336 Code that Ian wrote to do this in integer, never finished.
337
338 //mantissa product
339 // hi(a.hi*b.hi) lo(a.hi*b.hi)
340 // hi(a.hi*b.lo) lo(a.hi*b.lo)
341 // hi(a.lo*b.hi) lo(a.lo*b.hi)
342 // + hi(a.lo*b.lo) lo(a.lo*b.lo)
343 // --------------------------------------------------------------
344
345 uint32_t ahi = ua.parts.mantissa >> 32;
346 uint32_t bhi = ub.parts.mantissa >> 32;
347 uint32_t alo = ua.parts.mantissa & 0xFFFFFFFFULL;
348 uint32_t blo = ub.parts.mantissa & 0xFFFFFFFFULL;
349
350 uint64_t templl, temphl, templh, temphh;
351 xUInt64 l, h, a;
352
353 templl = (uint64_t) alo * (uint64_t) blo;
354 temphl = (uint64_t) ahi * (uint64_t) blo;
355 templh = (uint64_t) alo * (uint64_t) bhi;
356 temphh = (uint64_t) ahi * (uint64_t) bhi;
357
358 l = _mm_cvtsi32_si128( (int32_t) templl ); templl >>= 32;
359
360 temphl += templl;
361 temphl += templh & 0xFFFFFFFFULL;
362 h = _mm_cvtsi32_si128( (int32_t) temphl); temphl >>= 32;
363 a = _mm_unpacklo_epi32( l, h );
364
365
366 temphh += templh >> 32;
367 temphh += temphl;
368
369 a = _mm_cvtsi32_si128( (int32_t) temphh ); temphh >>= 32;
370 h = _mm_cvtsi32_si128( (int32_t) temphh);
371 a = _mm_unpacklo_epi32( a, h );
372 l = _mm_unpacklo_epi64( l, a );
373
374 union
375 {
376 xUInt64 v;
377 uint64_t u[2];
378 }z = { l };
379
380 long double lo = (long double) temphh.
381 */
382}
383