this repo has no description
1/*
2 FPMATHEM.h
3
4 Copyright (C) 2007 John R. Hauser, Stanislav Shwartsman,
5 Ross Martin, Paul C. Pratt
6
7 You can redistribute this file and/or modify it under the terms
8 of version 2 of the GNU General Public License as published by
9 the Free Software Foundation. You should have received a copy
10 of the license along with this file; see the file COPYING.
11
12 This file is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 license for more details.
16*/
17
18/*
19 Floating Point MATH implemented with software EMulation
20
21 Based on the SoftFloat IEC/IEEE Floating-point Arithmetic
22 Package, Release 2b, written by John R. Hauser.
23
24 Also uses extensions to SoftFloat, written for
25 Bochs (x86 achitecture simulator), by Stanislav Shwartsman.
26*/
27
28/*
29 original SoftFloat Copyright notice:
30
31 Written by John R. Hauser. This work was made possible in part by the
32 International Computer Science Institute, located at Suite 600, 1947 Center
33 Street, Berkeley, California 94704. Funding was partially provided by the
34 National Science Foundation under grant MIP-9311980. The original version
35 of this code was written as part of a project to build a fixed-point vector
36 processor in collaboration with the University of California at Berkeley,
37 overseen by Profs. Nelson Morgan and John Wawrzynek. More information
38 is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
39 arithmetic/SoftFloat.html'.
40
41 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has
42 been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
43 RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
44 AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
45 COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
46 EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
47 INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
48 OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
49
50 Derivative works are acceptable, even for commercial purposes, so long as
51 (1) the source code for the derivative work includes prominent notice that
52 the work is derivative, and (2) the source code includes prominent notice with
53 these four paragraphs for those parts of this code that are retained.
54*/
55
56/*
57 original Stanislav Shwartsman Copyright notice:
58
59 This source file is an extension to the SoftFloat IEC/IEEE Floating-point
60 Arithmetic Package, Release 2b, written for Bochs (x86 achitecture simulator)
61 floating point emulation.
62
63 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has
64 been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
65 RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
66 AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
67 COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
68 EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
69 INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
70 OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
71
72 Derivative works are acceptable, even for commercial purposes, so long as
73 (1) the source code for the derivative work includes prominent notice that
74 the work is derivative, and (2) the source code includes prominent notice with
75 these four paragraphs for those parts of this code that are retained.
76
77 .*============================================================================
78 * Written for Bochs (x86 achitecture simulator) by
79 * Stanislav Shwartsman [sshwarts at sourceforge net]
80 * ==========================================================================*.
81*/
82
83
84/*
85 ReportAbnormalID unused 0x0204 - 0x02FF
86*/
87
88
89/* soft float stuff */
90
91/*
92 should avoid 64 bit literals.
93*/
94
95typedef ui3r flag; /* 0/1 */
96
97/*
98 To fix: softfloat representation of
99 denormalized extended precision numbers
100 is different than on 68881.
101*/
102
103#define cIncludeFPUUnused cIncludeUnused
104
105/* ----- from original file "softfloat.h" ----- */
106
107/*======================================================================
108
109This C header file is part of the SoftFloat IEC/IEEE Floating-point
110Arithmetic Package, Release 2b.
111
112["original SoftFloat Copyright notice" went here, included near top of
113this file.]
114
115======================================================================*/
116
117/*----------------------------------------------------------------------------
118| The macro `FLOATX80' must be defined to enable the extended double-precision
119| floating-point format `floatx80'. If this macro is not defined, the
120| `floatx80' type will not be defined, and none of the functions that either
121| input or output the `floatx80' type will be defined. The same applies to
122| the `FLOAT128' macro and the quadruple-precision format `float128'.
123*----------------------------------------------------------------------------*/
124#define FLOATX80
125#define FLOAT128
126
127/*----------------------------------------------------------------------------
128| Software IEC/IEEE floating-point types.
129*----------------------------------------------------------------------------*/
130
131typedef struct {
132 ui6b low;
133 unsigned short high;
134} floatx80;
135
136#ifdef FLOAT128
137typedef struct {
138 ui6b low, high;
139} float128;
140#endif
141
142
143/* ----- end from original file "softfloat.h" ----- */
144
145
146/*----------------------------------------------------------------------------
147| Software IEC/IEEE floating-point rounding mode.
148*----------------------------------------------------------------------------*/
149enum {
150 float_round_nearest_even = 0,
151 float_round_down = 1,
152 float_round_up = 2,
153 float_round_to_zero = 3
154};
155
156/*----------------------------------------------------------------------------
157| Floating-point rounding mode, extended double-precision rounding precision,
158| and exception flags.
159*----------------------------------------------------------------------------*/
160
161LOCALVAR si3r float_rounding_mode = float_round_nearest_even;
162
163
164/*----------------------------------------------------------------------------
165| Software IEC/IEEE floating-point exception flags.
166*----------------------------------------------------------------------------*/
167
168enum {
169 float_flag_invalid = 1,
170 float_flag_divbyzero = 4,
171 float_flag_overflow = 8,
172 float_flag_underflow = 16,
173 float_flag_inexact = 32
174};
175LOCALVAR si3r float_exception_flags = 0;
176
177/*----------------------------------------------------------------------------
178| Software IEC/IEEE extended double-precision rounding precision. Valid
179| values are 32, 64, and 80.
180*----------------------------------------------------------------------------*/
181
182LOCALVAR si3r floatx80_rounding_precision = 80;
183
184/*----------------------------------------------------------------------------
185| Primitive arithmetic functions, including multi-word arithmetic, and
186| division and square root approximations. (Can be specialized to target if
187| desired.)
188*----------------------------------------------------------------------------*/
189
190/* ----- from original file "softfloat-macros" ----- */
191
192/*============================================================================
193
194This C source fragment is part of the SoftFloat IEC/IEEE Floating-point
195Arithmetic Package, Release 2b.
196
197["original SoftFloat Copyright notice" went here, included near top of this file.]
198
199=============================================================================*/
200
201/*----------------------------------------------------------------------------
202| Shifts `a' right by the number of bits given in `count'. If any nonzero
203| bits are shifted off, they are ``jammed'' into the least significant bit of
204| the result by setting the least significant bit to 1. The value of `count'
205| can be arbitrarily large; in particular, if `count' is greater than 32, the
206| result will be either 0 or 1, depending on whether `a' is zero or nonzero.
207| The result is stored in the location pointed to by `zPtr'.
208*----------------------------------------------------------------------------*/
209
210LOCALINLINEPROC shift32RightJamming( ui5b a, si4r count, ui5b *zPtr )
211{
212 ui5b z;
213
214 if ( count == 0 ) {
215 z = a;
216 }
217 else if ( count < 32 ) {
218 z = ( a>>count ) | ( ( a<<( ( - count ) & 31 ) ) != 0 );
219 }
220 else {
221 z = ( a != 0 );
222 }
223 *zPtr = z;
224
225}
226
227/*----------------------------------------------------------------------------
228| Shifts `a' right by the number of bits given in `count'. If any nonzero
229| bits are shifted off, they are ``jammed'' into the least significant bit of
230| the result by setting the least significant bit to 1. The value of `count'
231| can be arbitrarily large; in particular, if `count' is greater than 64, the
232| result will be either 0 or 1, depending on whether `a' is zero or nonzero.
233| The result is stored in the location pointed to by `zPtr'.
234*----------------------------------------------------------------------------*/
235
236LOCALINLINEPROC shift64RightJamming( ui6b a, si4r count, ui6b *zPtr )
237{
238 ui6b z;
239
240 if ( count == 0 ) {
241 z = a;
242 }
243 else if ( count < 64 ) {
244 z = ( a>>count ) | ( ( a<<( ( - count ) & 63 ) ) != 0 );
245 }
246 else {
247 z = ( a != 0 );
248 }
249 *zPtr = z;
250
251}
252
253/*----------------------------------------------------------------------------
254| Shifts the 128-bit value formed by concatenating `a0' and `a1' right by 64
255| _plus_ the number of bits given in `count'. The shifted result is at most
256| 64 nonzero bits; this is stored at the location pointed to by `z0Ptr'. The
257| bits shifted off form a second 64-bit result as follows: The _last_ bit
258| shifted off is the most-significant bit of the extra result, and the other
259| 63 bits of the extra result are all zero if and only if _all_but_the_last_
260| bits shifted off were all zero. This extra result is stored in the location
261| pointed to by `z1Ptr'. The value of `count' can be arbitrarily large.
262| (This routine makes more sense if `a0' and `a1' are considered to form
263| a fixed-point value with binary point between `a0' and `a1'. This fixed-
264| point value is shifted right by the number of bits given in `count', and
265| the integer part of the result is returned at the location pointed to by
266| `z0Ptr'. The fractional part of the result may be slightly corrupted as
267| described above, and is returned at the location pointed to by `z1Ptr'.)
268*----------------------------------------------------------------------------*/
269
270LOCALINLINEPROC shift64ExtraRightJamming(
271 ui6b a0, ui6b a1, si4r count, ui6b *z0Ptr, ui6b *z1Ptr )
272{
273 ui6b z0, z1;
274 si3r negCount = ( - count ) & 63;
275
276 if ( count == 0 ) {
277 z1 = a1;
278 z0 = a0;
279 }
280 else if ( count < 64 ) {
281 z1 = ( a0<<negCount ) | ( a1 != 0 );
282 z0 = a0>>count;
283 }
284 else {
285 if ( count == 64 ) {
286 z1 = a0 | ( a1 != 0 );
287 }
288 else {
289 z1 = ( ( a0 | a1 ) != 0 );
290 }
291 z0 = 0;
292 }
293 *z1Ptr = z1;
294 *z0Ptr = z0;
295
296}
297
298/*----------------------------------------------------------------------------
299| Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
300| number of bits given in `count'. Any bits shifted off are lost. The value
301| of `count' can be arbitrarily large; in particular, if `count' is greater
302| than 128, the result will be 0. The result is broken into two 64-bit pieces
303| which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
304*----------------------------------------------------------------------------*/
305
306LOCALINLINEPROC shift128Right(
307 ui6b a0, ui6b a1, si4r count, ui6b *z0Ptr, ui6b *z1Ptr )
308{
309 ui6b z0, z1;
310 si3r negCount = ( - count ) & 63;
311
312 if ( count == 0 ) {
313 z1 = a1;
314 z0 = a0;
315 }
316 else if ( count < 64 ) {
317 z1 = ( a0<<negCount ) | ( a1>>count );
318 z0 = a0>>count;
319 }
320 else {
321 z1 = ( count < 64 ) ? ( a0>>( count & 63 ) ) : 0;
322 z0 = 0;
323 }
324 *z1Ptr = z1;
325 *z0Ptr = z0;
326
327}
328
329/*----------------------------------------------------------------------------
330| Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
331| number of bits given in `count'. If any nonzero bits are shifted off, they
332| are ``jammed'' into the least significant bit of the result by setting the
333| least significant bit to 1. The value of `count' can be arbitrarily large;
334| in particular, if `count' is greater than 128, the result will be either
335| 0 or 1, depending on whether the concatenation of `a0' and `a1' is zero or
336| nonzero. The result is broken into two 64-bit pieces which are stored at
337| the locations pointed to by `z0Ptr' and `z1Ptr'.
338*----------------------------------------------------------------------------*/
339
340LOCALINLINEPROC shift128RightJamming(
341 ui6b a0, ui6b a1, si4r count, ui6b *z0Ptr, ui6b *z1Ptr )
342{
343 ui6b z0, z1;
344 si3r negCount = ( - count ) & 63;
345
346 if ( count == 0 ) {
347 z1 = a1;
348 z0 = a0;
349 }
350 else if ( count < 64 ) {
351 z1 = ( a0<<negCount ) | ( a1>>count ) | ( ( a1<<negCount ) != 0 );
352 z0 = a0>>count;
353 }
354 else {
355 if ( count == 64 ) {
356 z1 = a0 | ( a1 != 0 );
357 }
358 else if ( count < 128 ) {
359 z1 = ( a0>>( count & 63 ) ) | ( ( ( a0<<negCount ) | a1 ) != 0 );
360 }
361 else {
362 z1 = ( ( a0 | a1 ) != 0 );
363 }
364 z0 = 0;
365 }
366 *z1Ptr = z1;
367 *z0Ptr = z0;
368
369}
370
371/*----------------------------------------------------------------------------
372| Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' right
373| by 64 _plus_ the number of bits given in `count'. The shifted result is
374| at most 128 nonzero bits; these are broken into two 64-bit pieces which are
375| stored at the locations pointed to by `z0Ptr' and `z1Ptr'. The bits shifted
376| off form a third 64-bit result as follows: The _last_ bit shifted off is
377| the most-significant bit of the extra result, and the other 63 bits of the
378| extra result are all zero if and only if _all_but_the_last_ bits shifted off
379| were all zero. This extra result is stored in the location pointed to by
380| `z2Ptr'. The value of `count' can be arbitrarily large.
381| (This routine makes more sense if `a0', `a1', and `a2' are considered
382| to form a fixed-point value with binary point between `a1' and `a2'. This
383| fixed-point value is shifted right by the number of bits given in `count',
384| and the integer part of the result is returned at the locations pointed to
385| by `z0Ptr' and `z1Ptr'. The fractional part of the result may be slightly
386| corrupted as described above, and is returned at the location pointed to by
387| `z2Ptr'.)
388*----------------------------------------------------------------------------*/
389
390LOCALINLINEPROC shift128ExtraRightJamming(
391 ui6b a0,
392 ui6b a1,
393 ui6b a2,
394 si4r count,
395 ui6b *z0Ptr,
396 ui6b *z1Ptr,
397 ui6b *z2Ptr)
398{
399 ui6b z0, z1, z2;
400 si3r negCount = ( - count ) & 63;
401
402 if ( count == 0 ) {
403 z2 = a2;
404 z1 = a1;
405 z0 = a0;
406 }
407 else {
408 if ( count < 64 ) {
409 z2 = a1<<negCount;
410 z1 = ( a0<<negCount ) | ( a1>>count );
411 z0 = a0>>count;
412 }
413 else {
414 if ( count == 64 ) {
415 z2 = a1;
416 z1 = a0;
417 }
418 else {
419 a2 |= a1;
420 if ( count < 128 ) {
421 z2 = a0<<negCount;
422 z1 = a0>>( count & 63 );
423 }
424 else {
425 z2 = ( count == 128 ) ? a0 : ( a0 != 0 );
426 z1 = 0;
427 }
428 }
429 z0 = 0;
430 }
431 z2 |= ( a2 != 0 );
432 }
433 *z2Ptr = z2;
434 *z1Ptr = z1;
435 *z0Ptr = z0;
436
437}
438
439/*----------------------------------------------------------------------------
440| Shifts the 128-bit value formed by concatenating `a0' and `a1' left by the
441| number of bits given in `count'. Any bits shifted off are lost. The value
442| of `count' must be less than 64. The result is broken into two 64-bit
443| pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
444*----------------------------------------------------------------------------*/
445
446LOCALINLINEPROC shortShift128Left(
447 ui6b a0, ui6b a1, si4r count, ui6b *z0Ptr, ui6b *z1Ptr )
448{
449
450 *z1Ptr = a1<<count;
451 *z0Ptr =
452 ( count == 0 ) ? a0 : ( a0<<count ) | ( a1>>( ( - count ) & 63 ) );
453
454}
455
456/*----------------------------------------------------------------------------
457| Adds the 128-bit value formed by concatenating `a0' and `a1' to the 128-bit
458| value formed by concatenating `b0' and `b1'. Addition is modulo 2^128, so
459| any carry out is lost. The result is broken into two 64-bit pieces which
460| are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
461*----------------------------------------------------------------------------*/
462
463LOCALINLINEPROC add128(
464 ui6b a0, ui6b a1, ui6b b0, ui6b b1, ui6b *z0Ptr, ui6b *z1Ptr )
465{
466 ui6b z1;
467
468 z1 = a1 + b1;
469 *z1Ptr = z1;
470 *z0Ptr = a0 + b0 + ( z1 < a1 );
471}
472
473/*----------------------------------------------------------------------------
474| Adds the 192-bit value formed by concatenating `a0', `a1', and `a2' to the
475| 192-bit value formed by concatenating `b0', `b1', and `b2'. Addition is
476| modulo 2^192, so any carry out is lost. The result is broken into three
477| 64-bit pieces which are stored at the locations pointed to by `z0Ptr',
478| `z1Ptr', and `z2Ptr'.
479*----------------------------------------------------------------------------*/
480
481LOCALINLINEPROC add192(
482 ui6b a0,
483 ui6b a1,
484 ui6b a2,
485 ui6b b0,
486 ui6b b1,
487 ui6b b2,
488 ui6b *z0Ptr,
489 ui6b *z1Ptr,
490 ui6b *z2Ptr)
491{
492 ui6b z0, z1, z2;
493 si3r carry0, carry1;
494
495 z2 = a2 + b2;
496 carry1 = ( z2 < a2 );
497 z1 = a1 + b1;
498 carry0 = ( z1 < a1 );
499 z0 = a0 + b0;
500 z1 += carry1;
501 z0 += ( z1 < carry1 );
502 z0 += carry0;
503 *z2Ptr = z2;
504 *z1Ptr = z1;
505 *z0Ptr = z0;
506
507}
508
509/*----------------------------------------------------------------------------
510| Subtracts the 128-bit value formed by concatenating `b0' and `b1' from the
511| 128-bit value formed by concatenating `a0' and `a1'. Subtraction is modulo
512| 2^128, so any borrow out (carry out) is lost. The result is broken into two
513| 64-bit pieces which are stored at the locations pointed to by `z0Ptr' and
514| `z1Ptr'.
515*----------------------------------------------------------------------------*/
516
517LOCALINLINEPROC
518 sub128(
519 ui6b a0, ui6b a1, ui6b b0, ui6b b1, ui6b *z0Ptr, ui6b *z1Ptr )
520{
521
522 *z1Ptr = a1 - b1;
523 *z0Ptr = a0 - b0 - ( a1 < b1 );
524
525}
526
527/*----------------------------------------------------------------------------
528| Subtracts the 192-bit value formed by concatenating `b0', `b1', and `b2'
529| from the 192-bit value formed by concatenating `a0', `a1', and `a2'.
530| Subtraction is modulo 2^192, so any borrow out (carry out) is lost. The
531| result is broken into three 64-bit pieces which are stored at the locations
532| pointed to by `z0Ptr', `z1Ptr', and `z2Ptr'.
533*----------------------------------------------------------------------------*/
534
535LOCALINLINEPROC
536 sub192(
537 ui6b a0,
538 ui6b a1,
539 ui6b a2,
540 ui6b b0,
541 ui6b b1,
542 ui6b b2,
543 ui6b *z0Ptr,
544 ui6b *z1Ptr,
545 ui6b *z2Ptr
546 )
547{
548 ui6b z0, z1, z2;
549 si3r borrow0, borrow1;
550
551 z2 = a2 - b2;
552 borrow1 = ( a2 < b2 );
553 z1 = a1 - b1;
554 borrow0 = ( a1 < b1 );
555 z0 = a0 - b0;
556 z0 -= ( z1 < borrow1 );
557 z1 -= borrow1;
558 z0 -= borrow0;
559 *z2Ptr = z2;
560 *z1Ptr = z1;
561 *z0Ptr = z0;
562
563}
564
565/*----------------------------------------------------------------------------
566| Multiplies `a' by `b' to obtain a 128-bit product. The product is broken
567| into two 64-bit pieces which are stored at the locations pointed to by
568| `z0Ptr' and `z1Ptr'.
569*----------------------------------------------------------------------------*/
570
571
572#ifndef HaveUi5to6Mul
573#define HaveUi5to6Mul 1
574#endif
575
576#if HaveUi5to6Mul
577LOCALINLINEPROC Ui5to6Mul( ui5b src1, ui5b src2, ui6b *z)
578{
579 *z = ((ui6b) src1) * src2;
580}
581#else
582
583LOCALINLINEPROC Ui6fromHiLo(ui5b hi, ui5b lo, ui6b *z)
584{
585 *z = (((ui6b)(hi)) << 32) + lo;
586#if 0
587 z->lo = hi;
588 z->hi = lo;
589#endif
590}
591
592LOCALPROC Ui5to6Mul( ui5b src1, ui5b src2, ui6b *z)
593{
594 ui4b src1_lo = ui5b_lo(src1);
595 ui4b src2_lo = ui5b_lo(src2);
596 ui4b src1_hi = ui5b_hi(src1);
597 ui4b src2_hi = ui5b_hi(src2);
598
599 ui5b r0 = ( (ui6b) src1_lo ) * src2_lo;
600 ui5b r1 = ( (ui6b) src1_hi ) * src2_lo;
601 ui5b r2 = ( (ui6b) src1_lo ) * src2_hi;
602 ui5b r3 = ( (ui6b) src1_hi ) * src2_hi;
603
604 ui5b ra1 = ui5b_hi(r0) + ui5b_lo(r1) + ui5b_lo(r2);
605
606 ui5b lo = (ui5b_lo(ra1) << 16) | ui5b_lo(r0);
607 ui5b hi = ui5b_hi(ra1) + ui5b_hi(r1) + ui5b_hi(r2) + r3;
608
609 Ui6fromHiLo(hi, lo, z);
610}
611
612#endif
613
614
615LOCALINLINEPROC mul64To128( ui6b a, ui6b b, ui6b *z0Ptr, ui6b *z1Ptr )
616{
617 ui5b aHigh, aLow, bHigh, bLow;
618 ui6b z0, zMiddleA, zMiddleB, z1;
619
620 aLow = a;
621 aHigh = a>>32;
622 bLow = b;
623 bHigh = b>>32;
624
625 Ui5to6Mul(aLow, bLow, &z1);
626 Ui5to6Mul(aLow, bHigh, &zMiddleA);
627 Ui5to6Mul(aHigh, bLow, &zMiddleB);
628 Ui5to6Mul(aHigh, bHigh, &z0);
629
630 zMiddleA += zMiddleB;
631 z0 += ( ( (ui6b) ( zMiddleA < zMiddleB ) )<<32 ) + ( zMiddleA>>32 );
632 zMiddleA <<= 32;
633 z1 += zMiddleA;
634 z0 += ( z1 < zMiddleA );
635 *z1Ptr = z1;
636 *z0Ptr = z0;
637
638}
639
640/*----------------------------------------------------------------------------
641| Multiplies the 128-bit value formed by concatenating `a0' and `a1' by
642| `b' to obtain a 192-bit product. The product is broken into three 64-bit
643| pieces which are stored at the locations pointed to by `z0Ptr', `z1Ptr', and
644| `z2Ptr'.
645*----------------------------------------------------------------------------*/
646
647LOCALINLINEPROC
648 mul128By64To192(
649 ui6b a0,
650 ui6b a1,
651 ui6b b,
652 ui6b *z0Ptr,
653 ui6b *z1Ptr,
654 ui6b *z2Ptr
655 )
656{
657 ui6b z0, z1, z2, more1;
658
659 mul64To128( a1, b, &z1, &z2 );
660 mul64To128( a0, b, &z0, &more1 );
661 add128( z0, more1, 0, z1, &z0, &z1 );
662 *z2Ptr = z2;
663 *z1Ptr = z1;
664 *z0Ptr = z0;
665
666}
667
668/*----------------------------------------------------------------------------
669| Multiplies the 128-bit value formed by concatenating `a0' and `a1' to the
670| 128-bit value formed by concatenating `b0' and `b1' to obtain a 256-bit
671| product. The product is broken into four 64-bit pieces which are stored at
672| the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'.
673*----------------------------------------------------------------------------*/
674
675LOCALINLINEPROC
676 mul128To256(
677 ui6b a0,
678 ui6b a1,
679 ui6b b0,
680 ui6b b1,
681 ui6b *z0Ptr,
682 ui6b *z1Ptr,
683 ui6b *z2Ptr,
684 ui6b *z3Ptr
685 )
686{
687 ui6b z0, z1, z2, z3;
688 ui6b more1, more2;
689
690 mul64To128( a1, b1, &z2, &z3 );
691 mul64To128( a1, b0, &z1, &more2 );
692 add128( z1, more2, 0, z2, &z1, &z2 );
693 mul64To128( a0, b0, &z0, &more1 );
694 add128( z0, more1, 0, z1, &z0, &z1 );
695 mul64To128( a0, b1, &more1, &more2 );
696 add128( more1, more2, 0, z2, &more1, &z2 );
697 add128( z0, z1, 0, more1, &z0, &z1 );
698 *z3Ptr = z3;
699 *z2Ptr = z2;
700 *z1Ptr = z1;
701 *z0Ptr = z0;
702
703}
704
705/*----------------------------------------------------------------------------
706| Returns an approximation to the 64-bit integer quotient obtained by dividing
707| `b' into the 128-bit value formed by concatenating `a0' and `a1'. The
708| divisor `b' must be at least 2^63. If q is the exact quotient truncated
709| toward zero, the approximation returned lies between q and q + 2 inclusive.
710| If the exact quotient q is larger than 64 bits, the maximum positive 64-bit
711| unsigned integer is returned.
712*----------------------------------------------------------------------------*/
713
714#ifndef HaveUi6Div
715#define HaveUi6Div 0
716#endif
717
718#if HaveUi6Div
719#define Ui6Div(x, y) ((x) / (y))
720#else
721/*
722 Assuming other 64 bit operations available,
723 like compare, subtract, shift.
724*/
725LOCALFUNC ui6b Ui6Div(ui6b num, ui6b den)
726{
727 ui6b bit = 1;
728 ui6b res = 0;
729
730 while ((den < num) && bit && ! (den & (LIT64(1) << 63))) {
731 den <<= 1;
732 bit <<= 1;
733 }
734
735 while (bit) {
736 if (num >= den) {
737 num -= den;
738 res |= bit;
739 }
740 bit >>= 1;
741 den >>= 1;
742 }
743
744 return res;
745}
746#endif
747
748LOCALFUNC ui6b estimateDiv128To64( ui6b a0, ui6b a1, ui6b b )
749{
750 ui6b b0, b1;
751 ui6b rem0, rem1, term0, term1;
752 ui6b z;
753
754 if ( b <= a0 ) return LIT64( 0xFFFFFFFFFFFFFFFF );
755 b0 = b>>32;
756 z = ( b0<<32 <= a0 ) ? LIT64( 0xFFFFFFFF00000000 ) : Ui6Div(a0, b0) << 32;
757 mul64To128( b, z, &term0, &term1 );
758 sub128( a0, a1, term0, term1, &rem0, &rem1 );
759 while ( ( (si6b) rem0 ) < 0 ) {
760 z -= LIT64( 0x100000000 );
761 b1 = b<<32;
762 add128( rem0, rem1, b0, b1, &rem0, &rem1 );
763 }
764 rem0 = ( rem0<<32 ) | ( rem1>>32 );
765 z |= ( b0<<32 <= rem0 ) ? 0xFFFFFFFF : Ui6Div(rem0, b0);
766 return z;
767
768}
769
770/*----------------------------------------------------------------------------
771| Returns an approximation to the square root of the 32-bit significand given
772| by `a'. Considered as an integer, `a' must be at least 2^31. If bit 0 of
773| `aExp' (the least significant bit) is 1, the integer returned approximates
774| 2^31*sqrt(`a'/2^31), where `a' is considered an integer. If bit 0 of `aExp'
775| is 0, the integer returned approximates 2^31*sqrt(`a'/2^30). In either
776| case, the approximation returned lies strictly within +/-2 of the exact
777| value.
778*----------------------------------------------------------------------------*/
779
780LOCALFUNC ui5b estimateSqrt32( si4r aExp, ui5b a )
781{
782 static const ui4b sqrtOddAdjustments[] = {
783 0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0,
784 0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67
785 };
786 static const ui4b sqrtEvenAdjustments[] = {
787 0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E,
788 0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002
789 };
790 si3r index;
791 ui5b z;
792
793 index = ( a>>27 ) & 15;
794 if ( aExp & 1 ) {
795 z = 0x4000 + ( a>>17 ) - sqrtOddAdjustments[ index ];
796 z = ( ( a / z )<<14 ) + ( z<<15 );
797 a >>= 1;
798 }
799 else {
800 z = 0x8000 + ( a>>17 ) - sqrtEvenAdjustments[ index ];
801 z = a / z + z;
802 z = ( 0x20000 <= z ) ? 0xFFFF8000 : ( z<<15 );
803 if ( z <= a ) return (ui5b) ( ( (si5b) a )>>1 );
804 }
805 return ( (ui5b) Ui6Div( ( ( (ui6b) a )<<31 ), z ) ) + ( z>>1 );
806
807}
808
809/*----------------------------------------------------------------------------
810| Returns the number of leading 0 bits before the most-significant 1 bit of
811| `a'. If `a' is zero, 32 is returned.
812*----------------------------------------------------------------------------*/
813
814LOCALFUNC si3r countLeadingZeros32( ui5b a )
815{
816 static const si3r countLeadingZerosHigh[] = {
817 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
818 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
819 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
820 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
821 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
822 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
823 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
824 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
825 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
826 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
827 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
828 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
829 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
830 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
831 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
832 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
833 };
834 si3r shiftCount;
835
836 shiftCount = 0;
837 if ( a < 0x10000 ) {
838 shiftCount += 16;
839 a <<= 16;
840 }
841 if ( a < 0x1000000 ) {
842 shiftCount += 8;
843 a <<= 8;
844 }
845 shiftCount += countLeadingZerosHigh[ a>>24 ];
846 return shiftCount;
847
848}
849
850/*----------------------------------------------------------------------------
851| Returns the number of leading 0 bits before the most-significant 1 bit of
852| `a'. If `a' is zero, 64 is returned.
853*----------------------------------------------------------------------------*/
854
855LOCALFUNC si3r countLeadingZeros64( ui6b a )
856{
857 si3r shiftCount;
858
859 shiftCount = 0;
860 if ( a < ( (ui6b) 1 )<<32 ) {
861 shiftCount += 32;
862 }
863 else {
864 a >>= 32;
865 }
866 shiftCount += countLeadingZeros32( a );
867 return shiftCount;
868
869}
870
871/*----------------------------------------------------------------------------
872| Returns 1 if the 128-bit value formed by concatenating `a0' and `a1'
873| is equal to the 128-bit value formed by concatenating `b0' and `b1'.
874| Otherwise, returns 0.
875*----------------------------------------------------------------------------*/
876
877LOCALINLINEFUNC flag eq128( ui6b a0, ui6b a1, ui6b b0, ui6b b1 )
878{
879
880 return ( a0 == b0 ) && ( a1 == b1 );
881
882}
883
884/*----------------------------------------------------------------------------
885| Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
886| than or equal to the 128-bit value formed by concatenating `b0' and `b1'.
887| Otherwise, returns 0.
888*----------------------------------------------------------------------------*/
889
890LOCALINLINEFUNC flag le128( ui6b a0, ui6b a1, ui6b b0, ui6b b1 )
891{
892
893 return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 <= b1 ) );
894
895}
896
897/*----------------------------------------------------------------------------
898| Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
899| than the 128-bit value formed by concatenating `b0' and `b1'. Otherwise,
900| returns 0.
901*----------------------------------------------------------------------------*/
902
903LOCALINLINEFUNC flag lt128( ui6b a0, ui6b a1, ui6b b0, ui6b b1 )
904{
905
906 return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 < b1 ) );
907
908}
909
910/*----------------------------------------------------------------------------
911| Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is
912| not equal to the 128-bit value formed by concatenating `b0' and `b1'.
913| Otherwise, returns 0.
914*----------------------------------------------------------------------------*/
915
916#if cIncludeFPUUnused
917LOCALINLINEFUNC flag ne128( ui6b a0, ui6b a1, ui6b b0, ui6b b1 )
918{
919
920 return ( a0 != b0 ) || ( a1 != b1 );
921
922}
923#endif
924
925/* ----- end from original file "softfloat-macros" ----- */
926
927/*----------------------------------------------------------------------------
928| Functions and definitions to determine: (1) whether tininess for underflow
929| is detected before or after rounding by default, (2) what (if anything)
930| happens when exceptions are raised, (3) how signaling NaNs are distinguished
931| from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
932| are propagated from function inputs to output. These details are target-
933| specific.
934*----------------------------------------------------------------------------*/
935
936/* ----- from original file "softfloat-specialize" ----- */
937
938/*============================================================================
939
940This C source fragment is part of the SoftFloat IEC/IEEE Floating-point
941Arithmetic Package, Release 2b.
942
943["original SoftFloat Copyright notice" went here, included near top of this file.]
944
945=============================================================================*/
946
947/*----------------------------------------------------------------------------
948| Software IEC/IEEE floating-point underflow tininess-detection mode.
949*----------------------------------------------------------------------------*/
950enum {
951 float_tininess_after_rounding = 0,
952 float_tininess_before_rounding = 1
953};
954
955/*----------------------------------------------------------------------------
956| Underflow tininess-detection mode, statically initialized to default value.
957| (The declaration in `softfloat.h' must match the `si3r' type here.)
958*----------------------------------------------------------------------------*/
959LOCALVAR si3r float_detect_tininess = float_tininess_after_rounding;
960
961/*----------------------------------------------------------------------------
962| Routine to raise any or all of the software IEC/IEEE floating-point
963| exception flags.
964*----------------------------------------------------------------------------*/
965/*----------------------------------------------------------------------------
966| Raises the exceptions specified by `flags'. Floating-point traps can be
967| defined here if desired. It is currently not possible for such a trap
968| to substitute a result value. If traps are not implemented, this routine
969| should be simply `float_exception_flags |= flags;'.
970*----------------------------------------------------------------------------*/
971
972LOCALFUNC void float_raise( si3r flags )
973{
974
975 float_exception_flags |= flags;
976
977}
978
979/*----------------------------------------------------------------------------
980| Internal canonical NaN format.
981*----------------------------------------------------------------------------*/
982typedef struct {
983 flag sign;
984 ui6b high, low;
985} commonNaNT;
986
987/*----------------------------------------------------------------------------
988| The pattern for a default generated extended double-precision NaN. The
989| `high' and `low' values hold the most- and least-significant bits,
990| respectively.
991*----------------------------------------------------------------------------*/
992#define floatx80_default_nan_high 0xFFFF
993#define floatx80_default_nan_low LIT64( 0xC000000000000000 )
994
995/*----------------------------------------------------------------------------
996| Returns 1 if the extended double-precision floating-point value `a' is a
997| NaN; otherwise returns 0.
998*----------------------------------------------------------------------------*/
999
1000LOCALFUNC flag floatx80_is_nan( floatx80 a )
1001{
1002
1003 return ( ( a.high & 0x7FFF ) == 0x7FFF ) && (ui6b) ( a.low<<1 );
1004
1005}
1006
1007/*----------------------------------------------------------------------------
1008| Returns 1 if the extended double-precision floating-point value `a' is a
1009| signaling NaN; otherwise returns 0.
1010*----------------------------------------------------------------------------*/
1011
1012LOCALFUNC flag floatx80_is_signaling_nan( floatx80 a )
1013{
1014 ui6b aLow;
1015
1016 aLow = a.low & ~ LIT64( 0x4000000000000000 );
1017 return
1018 ( ( a.high & 0x7FFF ) == 0x7FFF )
1019 && (ui6b) ( aLow<<1 )
1020 && ( a.low == aLow );
1021
1022}
1023
1024/*----------------------------------------------------------------------------
1025| Returns the result of converting the extended double-precision floating-
1026| point NaN `a' to the canonical NaN format. If `a' is a signaling NaN, the
1027| invalid exception is raised.
1028*----------------------------------------------------------------------------*/
1029
1030LOCALFUNC commonNaNT floatx80ToCommonNaN( floatx80 a )
1031{
1032 commonNaNT z;
1033
1034 if ( floatx80_is_signaling_nan( a ) ) float_raise( float_flag_invalid );
1035 z.sign = a.high>>15;
1036 z.low = 0;
1037 z.high = a.low<<1;
1038 return z;
1039
1040}
1041
1042/*----------------------------------------------------------------------------
1043| Returns the result of converting the canonical NaN `a' to the extended
1044| double-precision floating-point format.
1045*----------------------------------------------------------------------------*/
1046
1047LOCALFUNC floatx80 commonNaNToFloatx80( commonNaNT a )
1048{
1049 floatx80 z;
1050
1051 z.low = LIT64( 0xC000000000000000 ) | ( a.high>>1 );
1052 z.high = ( ( (ui4b) a.sign )<<15 ) | 0x7FFF;
1053 return z;
1054
1055}
1056
1057/*----------------------------------------------------------------------------
1058| Takes two extended double-precision floating-point values `a' and `b', one
1059| of which is a NaN, and returns the appropriate NaN result. If either `a' or
1060| `b' is a signaling NaN, the invalid exception is raised.
1061*----------------------------------------------------------------------------*/
1062
1063LOCALFUNC floatx80 propagateFloatx80NaN( floatx80 a, floatx80 b )
1064{
1065 flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN;
1066
1067 aIsNaN = floatx80_is_nan( a );
1068 aIsSignalingNaN = floatx80_is_signaling_nan( a );
1069 bIsNaN = floatx80_is_nan( b );
1070 bIsSignalingNaN = floatx80_is_signaling_nan( b );
1071 a.low |= LIT64( 0xC000000000000000 );
1072 b.low |= LIT64( 0xC000000000000000 );
1073 if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid );
1074 if ( aIsSignalingNaN ) {
1075 if ( bIsSignalingNaN ) goto returnLargerSignificand;
1076 return bIsNaN ? b : a;
1077 }
1078 else if ( aIsNaN ) {
1079 if ( bIsSignalingNaN | ! bIsNaN ) return a;
1080 returnLargerSignificand:
1081 if ( a.low < b.low ) return b;
1082 if ( b.low < a.low ) return a;
1083 return ( a.high < b.high ) ? a : b;
1084 }
1085 else {
1086 return b;
1087 }
1088
1089}
1090
1091#ifdef FLOAT128
1092
1093/*----------------------------------------------------------------------------
1094| The pattern for a default generated quadruple-precision NaN. The `high' and
1095| `low' values hold the most- and least-significant bits, respectively.
1096*----------------------------------------------------------------------------*/
1097#define float128_default_nan_high LIT64( 0xFFFF800000000000 )
1098#define float128_default_nan_low LIT64( 0x0000000000000000 )
1099
1100/*----------------------------------------------------------------------------
1101| Returns 1 if the quadruple-precision floating-point value `a' is a NaN;
1102| otherwise returns 0.
1103*----------------------------------------------------------------------------*/
1104
1105LOCALFUNC flag float128_is_nan( float128 a )
1106{
1107
1108 return
1109 ( LIT64( 0xFFFE000000000000 ) <= (ui6b) ( a.high<<1 ) )
1110 && ( a.low || ( a.high & LIT64( 0x0000FFFFFFFFFFFF ) ) );
1111
1112}
1113
1114/*----------------------------------------------------------------------------
1115| Returns 1 if the quadruple-precision floating-point value `a' is a
1116| signaling NaN; otherwise returns 0.
1117*----------------------------------------------------------------------------*/
1118
1119LOCALFUNC flag float128_is_signaling_nan( float128 a )
1120{
1121
1122 return
1123 ( ( ( a.high>>47 ) & 0xFFFF ) == 0xFFFE )
1124 && ( a.low || ( a.high & LIT64( 0x00007FFFFFFFFFFF ) ) );
1125
1126}
1127
1128/*----------------------------------------------------------------------------
1129| Returns the result of converting the quadruple-precision floating-point NaN
1130| `a' to the canonical NaN format. If `a' is a signaling NaN, the invalid
1131| exception is raised.
1132*----------------------------------------------------------------------------*/
1133
1134LOCALFUNC commonNaNT float128ToCommonNaN( float128 a )
1135{
1136 commonNaNT z;
1137
1138 if ( float128_is_signaling_nan( a ) ) float_raise( float_flag_invalid );
1139 z.sign = a.high>>63;
1140 shortShift128Left( a.high, a.low, 16, &z.high, &z.low );
1141 return z;
1142
1143}
1144
1145/*----------------------------------------------------------------------------
1146| Returns the result of converting the canonical NaN `a' to the quadruple-
1147| precision floating-point format.
1148*----------------------------------------------------------------------------*/
1149
1150LOCALFUNC float128 commonNaNToFloat128( commonNaNT a )
1151{
1152 float128 z;
1153
1154 shift128Right( a.high, a.low, 16, &z.high, &z.low );
1155 z.high |= ( ( (ui6b) a.sign )<<63 ) | LIT64( 0x7FFF800000000000 );
1156 return z;
1157
1158}
1159
1160/*----------------------------------------------------------------------------
1161| Takes two quadruple-precision floating-point values `a' and `b', one of
1162| which is a NaN, and returns the appropriate NaN result. If either `a' or
1163| `b' is a signaling NaN, the invalid exception is raised.
1164*----------------------------------------------------------------------------*/
1165
1166LOCALFUNC float128 propagateFloat128NaN( float128 a, float128 b )
1167{
1168 flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN;
1169
1170 aIsNaN = float128_is_nan( a );
1171 aIsSignalingNaN = float128_is_signaling_nan( a );
1172 bIsNaN = float128_is_nan( b );
1173 bIsSignalingNaN = float128_is_signaling_nan( b );
1174 a.high |= LIT64( 0x0000800000000000 );
1175 b.high |= LIT64( 0x0000800000000000 );
1176 if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid );
1177 if ( aIsSignalingNaN ) {
1178 if ( bIsSignalingNaN ) goto returnLargerSignificand;
1179 return bIsNaN ? b : a;
1180 }
1181 else if ( aIsNaN ) {
1182 if ( bIsSignalingNaN | ! bIsNaN ) return a;
1183 returnLargerSignificand:
1184 if ( lt128( a.high<<1, a.low, b.high<<1, b.low ) ) return b;
1185 if ( lt128( b.high<<1, b.low, a.high<<1, a.low ) ) return a;
1186 return ( a.high < b.high ) ? a : b;
1187 }
1188 else {
1189 return b;
1190 }
1191
1192}
1193
1194#endif
1195
1196/* ----- end from original file "softfloat-specialize" ----- */
1197
1198/* ----- from original file "softfloat.c" ----- */
1199
1200
1201/*============================================================================
1202
1203This C source file is part of the SoftFloat IEC/IEEE Floating-point Arithmetic
1204Package, Release 2b.
1205
1206["original SoftFloat Copyright notice" went here, included near top of this file.]
1207
1208=============================================================================*/
1209
1210/*----------------------------------------------------------------------------
1211| Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
1212| and 7, and returns the properly rounded 32-bit integer corresponding to the
1213| input. If `zSign' is 1, the input is negated before being converted to an
1214| integer. Bit 63 of `absZ' must be zero. Ordinarily, the fixed-point input
1215| is simply rounded to an integer, with the inexact exception raised if the
1216| input cannot be represented exactly as an integer. However, if the fixed-
1217| point input is too large, the invalid exception is raised and the largest
1218| positive or negative integer is returned.
1219*----------------------------------------------------------------------------*/
1220
1221LOCALFUNC si5r roundAndPackInt32( flag zSign, ui6b absZ )
1222{
1223 si3r roundingMode;
1224 flag roundNearestEven;
1225 si3r roundIncrement, roundBits;
1226 si5r z;
1227
1228 roundingMode = float_rounding_mode;
1229 roundNearestEven = ( roundingMode == float_round_nearest_even );
1230 roundIncrement = 0x40;
1231 if ( ! roundNearestEven ) {
1232 if ( roundingMode == float_round_to_zero ) {
1233 roundIncrement = 0;
1234 }
1235 else {
1236 roundIncrement = 0x7F;
1237 if ( zSign ) {
1238 if ( roundingMode == float_round_up ) roundIncrement = 0;
1239 }
1240 else {
1241 if ( roundingMode == float_round_down ) roundIncrement = 0;
1242 }
1243 }
1244 }
1245 roundBits = absZ & 0x7F;
1246 absZ = ( absZ + roundIncrement )>>7;
1247 absZ &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
1248 z = absZ;
1249 if ( zSign ) z = - z;
1250 if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
1251 float_raise( float_flag_invalid );
1252 return zSign ? (si5b) 0x80000000 : 0x7FFFFFFF;
1253 }
1254 if ( roundBits ) float_exception_flags |= float_flag_inexact;
1255 return z;
1256
1257}
1258
1259/*----------------------------------------------------------------------------
1260| Returns the fraction bits of the extended double-precision floating-point
1261| value `a'.
1262*----------------------------------------------------------------------------*/
1263
1264LOCALINLINEFUNC ui6b extractFloatx80Frac( floatx80 a )
1265{
1266
1267 return a.low;
1268
1269}
1270
1271/*----------------------------------------------------------------------------
1272| Returns the exponent bits of the extended double-precision floating-point
1273| value `a'.
1274*----------------------------------------------------------------------------*/
1275
1276LOCALINLINEFUNC si5r extractFloatx80Exp( floatx80 a )
1277{
1278
1279 return a.high & 0x7FFF;
1280
1281}
1282
1283/*----------------------------------------------------------------------------
1284| Returns the sign bit of the extended double-precision floating-point value
1285| `a'.
1286*----------------------------------------------------------------------------*/
1287
1288LOCALINLINEFUNC flag extractFloatx80Sign( floatx80 a )
1289{
1290
1291 return a.high>>15;
1292
1293}
1294
1295/*----------------------------------------------------------------------------
1296| Normalizes the subnormal extended double-precision floating-point value
1297| represented by the denormalized significand `aSig'. The normalized exponent
1298| and significand are stored at the locations pointed to by `zExpPtr' and
1299| `zSigPtr', respectively.
1300*----------------------------------------------------------------------------*/
1301
1302LOCALPROC
1303 normalizeFloatx80Subnormal( ui6b aSig, si5r *zExpPtr, ui6b *zSigPtr )
1304{
1305 si3r shiftCount;
1306
1307 shiftCount = countLeadingZeros64( aSig );
1308 *zSigPtr = aSig<<shiftCount;
1309 *zExpPtr = 1 - shiftCount;
1310
1311}
1312
1313/*----------------------------------------------------------------------------
1314| Packs the sign `zSign', exponent `zExp', and significand `zSig' into an
1315| extended double-precision floating-point value, returning the result.
1316*----------------------------------------------------------------------------*/
1317
1318LOCALINLINEFUNC floatx80 packFloatx80( flag zSign, si5r zExp, ui6b zSig )
1319{
1320 floatx80 z;
1321
1322 z.low = zSig;
1323 z.high = ( ( (ui4b) zSign )<<15 ) + zExp;
1324 return z;
1325
1326}
1327
1328/*----------------------------------------------------------------------------
1329| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
1330| and extended significand formed by the concatenation of `zSig0' and `zSig1',
1331| and returns the proper extended double-precision floating-point value
1332| corresponding to the abstract input. Ordinarily, the abstract value is
1333| rounded and packed into the extended double-precision format, with the
1334| inexact exception raised if the abstract input cannot be represented
1335| exactly. However, if the abstract value is too large, the overflow and
1336| inexact exceptions are raised and an infinity or maximal finite value is
1337| returned. If the abstract value is too small, the input value is rounded to
1338| a subnormal number, and the underflow and inexact exceptions are raised if
1339| the abstract input cannot be represented exactly as a subnormal extended
1340| double-precision floating-point number.
1341| If `roundingPrecision' is 32 or 64, the result is rounded to the same
1342| number of bits as single or double precision, respectively. Otherwise, the
1343| result is rounded to the full precision of the extended double-precision
1344| format.
1345| The input significand must be normalized or smaller. If the input
1346| significand is not normalized, `zExp' must be 0; in that case, the result
1347| returned is a subnormal number, and it must not require rounding. The
1348| handling of underflow and overflow follows the IEC/IEEE Standard for Binary
1349| Floating-Point Arithmetic.
1350*----------------------------------------------------------------------------*/
1351
1352LOCALFUNC floatx80
1353 roundAndPackFloatx80(
1354 si3r roundingPrecision, flag zSign, si5r zExp, ui6b zSig0, ui6b zSig1
1355 )
1356{
1357 si3r roundingMode;
1358 flag roundNearestEven, increment, isTiny;
1359 si6r roundIncrement, roundMask, roundBits;
1360
1361 roundingMode = float_rounding_mode;
1362 roundNearestEven = ( roundingMode == float_round_nearest_even );
1363 if ( roundingPrecision == 80 ) goto precision80;
1364 if ( roundingPrecision == 64 ) {
1365 roundIncrement = LIT64( 0x0000000000000400 );
1366 roundMask = LIT64( 0x00000000000007FF );
1367 }
1368 else if ( roundingPrecision == 32 ) {
1369 roundIncrement = LIT64( 0x0000008000000000 );
1370 roundMask = LIT64( 0x000000FFFFFFFFFF );
1371 }
1372 else {
1373 goto precision80;
1374 }
1375 zSig0 |= ( zSig1 != 0 );
1376 if ( ! roundNearestEven ) {
1377 if ( roundingMode == float_round_to_zero ) {
1378 roundIncrement = 0;
1379 }
1380 else {
1381 roundIncrement = roundMask;
1382 if ( zSign ) {
1383 if ( roundingMode == float_round_up ) roundIncrement = 0;
1384 }
1385 else {
1386 if ( roundingMode == float_round_down ) roundIncrement = 0;
1387 }
1388 }
1389 }
1390 roundBits = zSig0 & roundMask;
1391 if ( 0x7FFD <= (ui5b) ( zExp - 1 ) ) {
1392 if ( ( 0x7FFE < zExp )
1393 || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
1394 ) {
1395 goto overflow;
1396 }
1397 if ( zExp <= 0 ) {
1398 isTiny =
1399 ( float_detect_tininess == float_tininess_before_rounding )
1400 || ( zExp < 0 )
1401 || ( zSig0 <= zSig0 + roundIncrement );
1402 shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
1403 zExp = 0;
1404 roundBits = zSig0 & roundMask;
1405 if ( isTiny && roundBits ) float_raise( float_flag_underflow );
1406 if ( roundBits ) float_exception_flags |= float_flag_inexact;
1407 zSig0 += roundIncrement;
1408 if ( (si6b) zSig0 < 0 ) zExp = 1;
1409 roundIncrement = roundMask + 1;
1410 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
1411 roundMask |= roundIncrement;
1412 }
1413 zSig0 &= ~ roundMask;
1414 return packFloatx80( zSign, zExp, zSig0 );
1415 }
1416 }
1417 if ( roundBits ) float_exception_flags |= float_flag_inexact;
1418 zSig0 += roundIncrement;
1419 if ( zSig0 < roundIncrement ) {
1420 ++zExp;
1421 zSig0 = LIT64( 0x8000000000000000 );
1422 }
1423 roundIncrement = roundMask + 1;
1424 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
1425 roundMask |= roundIncrement;
1426 }
1427 zSig0 &= ~ roundMask;
1428 if ( zSig0 == 0 ) zExp = 0;
1429 return packFloatx80( zSign, zExp, zSig0 );
1430 precision80:
1431 increment = ( (si6b) zSig1 < 0 );
1432 if ( ! roundNearestEven ) {
1433 if ( roundingMode == float_round_to_zero ) {
1434 increment = 0;
1435 }
1436 else {
1437 if ( zSign ) {
1438 increment = ( roundingMode == float_round_down ) && zSig1;
1439 }
1440 else {
1441 increment = ( roundingMode == float_round_up ) && zSig1;
1442 }
1443 }
1444 }
1445 if ( 0x7FFD <= (ui5b) ( zExp - 1 ) ) {
1446 if ( ( 0x7FFE < zExp )
1447 || ( ( zExp == 0x7FFE )
1448 && ( zSig0 == LIT64( 0xFFFFFFFFFFFFFFFF ) )
1449 && increment
1450 )
1451 ) {
1452 roundMask = 0;
1453 overflow:
1454 float_raise( float_flag_overflow | float_flag_inexact );
1455 if ( ( roundingMode == float_round_to_zero )
1456 || ( zSign && ( roundingMode == float_round_up ) )
1457 || ( ! zSign && ( roundingMode == float_round_down ) )
1458 ) {
1459 return packFloatx80( zSign, 0x7FFE, ~ roundMask );
1460 }
1461 return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
1462 }
1463 if ( zExp <= 0 ) {
1464 isTiny =
1465 ( float_detect_tininess == float_tininess_before_rounding )
1466 || ( zExp < 0 )
1467 || ! increment
1468 || ( zSig0 < LIT64( 0xFFFFFFFFFFFFFFFF ) );
1469 shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
1470 zExp = 0;
1471 if ( isTiny && zSig1 ) float_raise( float_flag_underflow );
1472 if ( zSig1 ) float_exception_flags |= float_flag_inexact;
1473 if ( roundNearestEven ) {
1474 increment = ( (si6b) zSig1 < 0 );
1475 }
1476 else {
1477 if ( zSign ) {
1478 increment = ( roundingMode == float_round_down ) && zSig1;
1479 }
1480 else {
1481 increment = ( roundingMode == float_round_up ) && zSig1;
1482 }
1483 }
1484 if ( increment ) {
1485 ++zSig0;
1486 zSig0 &=
1487 ~ ( ( (ui6b) ( zSig1<<1 ) == 0 ) & roundNearestEven );
1488 if ( (si6b) zSig0 < 0 ) zExp = 1;
1489 }
1490 return packFloatx80( zSign, zExp, zSig0 );
1491 }
1492 }
1493 if ( zSig1 ) float_exception_flags |= float_flag_inexact;
1494 if ( increment ) {
1495 ++zSig0;
1496 if ( zSig0 == 0 ) {
1497 ++zExp;
1498 zSig0 = LIT64( 0x8000000000000000 );
1499 }
1500 else {
1501 zSig0 &= ~ ( ( (ui6b) ( zSig1<<1 ) == 0 ) & roundNearestEven );
1502 }
1503 }
1504 else {
1505 if ( zSig0 == 0 ) zExp = 0;
1506 }
1507 return packFloatx80( zSign, zExp, zSig0 );
1508
1509}
1510
1511/*----------------------------------------------------------------------------
1512| Takes an abstract floating-point value having sign `zSign', exponent
1513| `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
1514| and returns the proper extended double-precision floating-point value
1515| corresponding to the abstract input. This routine is just like
1516| `roundAndPackFloatx80' except that the input significand does not have to be
1517| normalized.
1518*----------------------------------------------------------------------------*/
1519
1520LOCALFUNC floatx80
1521 normalizeRoundAndPackFloatx80(
1522 si3r roundingPrecision, flag zSign, si5r zExp, ui6b zSig0, ui6b zSig1
1523 )
1524{
1525 si3r shiftCount;
1526
1527 if ( zSig0 == 0 ) {
1528 zSig0 = zSig1;
1529 zSig1 = 0;
1530 zExp -= 64;
1531 }
1532 shiftCount = countLeadingZeros64( zSig0 );
1533 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
1534 zExp -= shiftCount;
1535 return
1536 roundAndPackFloatx80( roundingPrecision, zSign, zExp, zSig0, zSig1 );
1537
1538}
1539
1540#ifdef FLOAT128
1541
1542/*----------------------------------------------------------------------------
1543| Returns the least-significant 64 fraction bits of the quadruple-precision
1544| floating-point value `a'.
1545*----------------------------------------------------------------------------*/
1546
1547LOCALINLINEFUNC ui6b extractFloat128Frac1( float128 a )
1548{
1549
1550 return a.low;
1551
1552}
1553
1554/*----------------------------------------------------------------------------
1555| Returns the most-significant 48 fraction bits of the quadruple-precision
1556| floating-point value `a'.
1557*----------------------------------------------------------------------------*/
1558
1559LOCALINLINEFUNC ui6b extractFloat128Frac0( float128 a )
1560{
1561
1562 return a.high & LIT64( 0x0000FFFFFFFFFFFF );
1563
1564}
1565
1566/*----------------------------------------------------------------------------
1567| Returns the exponent bits of the quadruple-precision floating-point value
1568| `a'.
1569*----------------------------------------------------------------------------*/
1570
1571LOCALINLINEFUNC si5r extractFloat128Exp( float128 a )
1572{
1573
1574 return ( a.high>>48 ) & 0x7FFF;
1575
1576}
1577
1578/*----------------------------------------------------------------------------
1579| Returns the sign bit of the quadruple-precision floating-point value `a'.
1580*----------------------------------------------------------------------------*/
1581
1582LOCALINLINEFUNC flag extractFloat128Sign( float128 a )
1583{
1584
1585 return a.high>>63;
1586
1587}
1588
1589/*----------------------------------------------------------------------------
1590| Normalizes the subnormal quadruple-precision floating-point value
1591| represented by the denormalized significand formed by the concatenation of
1592| `aSig0' and `aSig1'. The normalized exponent is stored at the location
1593| pointed to by `zExpPtr'. The most significant 49 bits of the normalized
1594| significand are stored at the location pointed to by `zSig0Ptr', and the
1595| least significant 64 bits of the normalized significand are stored at the
1596| location pointed to by `zSig1Ptr'.
1597*----------------------------------------------------------------------------*/
1598
1599LOCALPROC
1600 normalizeFloat128Subnormal(
1601 ui6b aSig0,
1602 ui6b aSig1,
1603 si5r *zExpPtr,
1604 ui6b *zSig0Ptr,
1605 ui6b *zSig1Ptr
1606 )
1607{
1608 si3r shiftCount;
1609
1610 if ( aSig0 == 0 ) {
1611 shiftCount = countLeadingZeros64( aSig1 ) - 15;
1612 if ( shiftCount < 0 ) {
1613 *zSig0Ptr = aSig1>>( - shiftCount );
1614 *zSig1Ptr = aSig1<<( shiftCount & 63 );
1615 }
1616 else {
1617 *zSig0Ptr = aSig1<<shiftCount;
1618 *zSig1Ptr = 0;
1619 }
1620 *zExpPtr = - shiftCount - 63;
1621 }
1622 else {
1623 shiftCount = countLeadingZeros64( aSig0 ) - 15;
1624 shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
1625 *zExpPtr = 1 - shiftCount;
1626 }
1627
1628}
1629
1630/*----------------------------------------------------------------------------
1631| Packs the sign `zSign', the exponent `zExp', and the significand formed
1632| by the concatenation of `zSig0' and `zSig1' into a quadruple-precision
1633| floating-point value, returning the result. After being shifted into the
1634| proper positions, the three fields `zSign', `zExp', and `zSig0' are simply
1635| added together to form the most significant 32 bits of the result. This
1636| means that any integer portion of `zSig0' will be added into the exponent.
1637| Since a properly normalized significand will have an integer portion equal
1638| to 1, the `zExp' input should be 1 less than the desired result exponent
1639| whenever `zSig0' and `zSig1' concatenated form a complete, normalized
1640| significand.
1641*----------------------------------------------------------------------------*/
1642
1643LOCALINLINEFUNC float128
1644 packFloat128( flag zSign, si5r zExp, ui6b zSig0, ui6b zSig1 )
1645{
1646 float128 z;
1647
1648 z.low = zSig1;
1649 z.high = ( ( (ui6b) zSign )<<63 ) + ( ( (ui6b) zExp )<<48 ) + zSig0;
1650 return z;
1651
1652}
1653
1654/*----------------------------------------------------------------------------
1655| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
1656| and extended significand formed by the concatenation of `zSig0', `zSig1',
1657| and `zSig2', and returns the proper quadruple-precision floating-point value
1658| corresponding to the abstract input. Ordinarily, the abstract value is
1659| simply rounded and packed into the quadruple-precision format, with the
1660| inexact exception raised if the abstract input cannot be represented
1661| exactly. However, if the abstract value is too large, the overflow and
1662| inexact exceptions are raised and an infinity or maximal finite value is
1663| returned. If the abstract value is too small, the input value is rounded to
1664| a subnormal number, and the underflow and inexact exceptions are raised if
1665| the abstract input cannot be represented exactly as a subnormal quadruple-
1666| precision floating-point number.
1667| The input significand must be normalized or smaller. If the input
1668| significand is not normalized, `zExp' must be 0; in that case, the result
1669| returned is a subnormal number, and it must not require rounding. In the
1670| usual case that the input significand is normalized, `zExp' must be 1 less
1671| than the ``true'' floating-point exponent. The handling of underflow and
1672| overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1673*----------------------------------------------------------------------------*/
1674
1675LOCALFUNC float128
1676 roundAndPackFloat128(
1677 flag zSign, si5r zExp, ui6b zSig0, ui6b zSig1, ui6b zSig2 )
1678{
1679 si3r roundingMode;
1680 flag roundNearestEven, increment, isTiny;
1681
1682 roundingMode = float_rounding_mode;
1683 roundNearestEven = ( roundingMode == float_round_nearest_even );
1684 increment = ( (si6b) zSig2 < 0 );
1685 if ( ! roundNearestEven ) {
1686 if ( roundingMode == float_round_to_zero ) {
1687 increment = 0;
1688 }
1689 else {
1690 if ( zSign ) {
1691 increment = ( roundingMode == float_round_down ) && zSig2;
1692 }
1693 else {
1694 increment = ( roundingMode == float_round_up ) && zSig2;
1695 }
1696 }
1697 }
1698 if ( 0x7FFD <= (ui5b) zExp ) {
1699 if ( ( 0x7FFD < zExp )
1700 || ( ( zExp == 0x7FFD )
1701 && eq128(
1702 LIT64( 0x0001FFFFFFFFFFFF ),
1703 LIT64( 0xFFFFFFFFFFFFFFFF ),
1704 zSig0,
1705 zSig1
1706 )
1707 && increment
1708 )
1709 ) {
1710 float_raise( float_flag_overflow | float_flag_inexact );
1711 if ( ( roundingMode == float_round_to_zero )
1712 || ( zSign && ( roundingMode == float_round_up ) )
1713 || ( ! zSign && ( roundingMode == float_round_down ) )
1714 ) {
1715 return
1716 packFloat128(
1717 zSign,
1718 0x7FFE,
1719 LIT64( 0x0000FFFFFFFFFFFF ),
1720 LIT64( 0xFFFFFFFFFFFFFFFF )
1721 );
1722 }
1723 return packFloat128( zSign, 0x7FFF, 0, 0 );
1724 }
1725 if ( zExp < 0 ) {
1726 isTiny =
1727 ( float_detect_tininess == float_tininess_before_rounding )
1728 || ( zExp < -1 )
1729 || ! increment
1730 || lt128(
1731 zSig0,
1732 zSig1,
1733 LIT64( 0x0001FFFFFFFFFFFF ),
1734 LIT64( 0xFFFFFFFFFFFFFFFF )
1735 );
1736 shift128ExtraRightJamming(
1737 zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
1738 zExp = 0;
1739 if ( isTiny && zSig2 ) float_raise( float_flag_underflow );
1740 if ( roundNearestEven ) {
1741 increment = ( (si6b) zSig2 < 0 );
1742 }
1743 else {
1744 if ( zSign ) {
1745 increment = ( roundingMode == float_round_down ) && zSig2;
1746 }
1747 else {
1748 increment = ( roundingMode == float_round_up ) && zSig2;
1749 }
1750 }
1751 }
1752 }
1753 if ( zSig2 ) float_exception_flags |= float_flag_inexact;
1754 if ( increment ) {
1755 add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
1756 zSig1 &= ~ ( ( zSig2 + zSig2 == 0 ) & roundNearestEven );
1757 }
1758 else {
1759 if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0;
1760 }
1761 return packFloat128( zSign, zExp, zSig0, zSig1 );
1762
1763}
1764
1765/*----------------------------------------------------------------------------
1766| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
1767| and significand formed by the concatenation of `zSig0' and `zSig1', and
1768| returns the proper quadruple-precision floating-point value corresponding
1769| to the abstract input. This routine is just like `roundAndPackFloat128'
1770| except that the input significand has fewer bits and does not have to be
1771| normalized. In all cases, `zExp' must be 1 less than the ``true'' floating-
1772| point exponent.
1773*----------------------------------------------------------------------------*/
1774
1775LOCALFUNC float128
1776 normalizeRoundAndPackFloat128(
1777 flag zSign, si5r zExp, ui6b zSig0, ui6b zSig1 )
1778{
1779 si3r shiftCount;
1780 ui6b zSig2;
1781
1782 if ( zSig0 == 0 ) {
1783 zSig0 = zSig1;
1784 zSig1 = 0;
1785 zExp -= 64;
1786 }
1787 shiftCount = countLeadingZeros64( zSig0 ) - 15;
1788 if ( 0 <= shiftCount ) {
1789 zSig2 = 0;
1790 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
1791 }
1792 else {
1793 shift128ExtraRightJamming(
1794 zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
1795 }
1796 zExp -= shiftCount;
1797 return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 );
1798
1799}
1800
1801#endif
1802
1803/*----------------------------------------------------------------------------
1804| Returns the result of converting the 32-bit two's complement integer `a'
1805| to the extended double-precision floating-point format. The conversion
1806| is performed according to the IEC/IEEE Standard for Binary Floating-Point
1807| Arithmetic.
1808*----------------------------------------------------------------------------*/
1809
1810LOCALFUNC floatx80 int32_to_floatx80( si5r a )
1811{
1812 flag zSign;
1813 ui5r absA;
1814 si3r shiftCount;
1815 ui6b zSig;
1816
1817 if ( a == 0 ) return packFloatx80( 0, 0, 0 );
1818 zSign = ( a < 0 );
1819 absA = zSign ? - a : a;
1820 shiftCount = countLeadingZeros32( absA ) + 32;
1821 zSig = absA;
1822 return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );
1823
1824}
1825
1826/*----------------------------------------------------------------------------
1827| Returns the result of converting the extended double-precision floating-
1828| point value `a' to the 32-bit two's complement integer format. The
1829| conversion is performed according to the IEC/IEEE Standard for Binary
1830| Floating-Point Arithmetic---which means in particular that the conversion
1831| is rounded according to the current rounding mode. If `a' is a NaN, the
1832| largest positive integer is returned. Otherwise, if the conversion
1833| overflows, the largest integer with the same sign as `a' is returned.
1834*----------------------------------------------------------------------------*/
1835
1836LOCALFUNC si5r floatx80_to_int32( floatx80 a )
1837{
1838 flag aSign;
1839 si5r aExp, shiftCount;
1840 ui6b aSig;
1841
1842 aSig = extractFloatx80Frac( a );
1843 aExp = extractFloatx80Exp( a );
1844 aSign = extractFloatx80Sign( a );
1845 if ( ( aExp == 0x7FFF ) && (ui6b) ( aSig<<1 ) ) aSign = 0;
1846 shiftCount = 0x4037 - aExp;
1847 if ( shiftCount <= 0 ) shiftCount = 1;
1848 shift64RightJamming( aSig, shiftCount, &aSig );
1849 return roundAndPackInt32( aSign, aSig );
1850
1851}
1852
1853/*----------------------------------------------------------------------------
1854| Returns the result of converting the extended double-precision floating-
1855| point value `a' to the 32-bit two's complement integer format. The
1856| conversion is performed according to the IEC/IEEE Standard for Binary
1857| Floating-Point Arithmetic, except that the conversion is always rounded
1858| toward zero. If `a' is a NaN, the largest positive integer is returned.
1859| Otherwise, if the conversion overflows, the largest integer with the same
1860| sign as `a' is returned.
1861*----------------------------------------------------------------------------*/
1862
1863#if cIncludeFPUUnused
1864LOCALFUNC si5r floatx80_to_int32_round_to_zero( floatx80 a )
1865{
1866 flag aSign;
1867 si5r aExp, shiftCount;
1868 ui6b aSig, savedASig;
1869 si5r z;
1870
1871 aSig = extractFloatx80Frac( a );
1872 aExp = extractFloatx80Exp( a );
1873 aSign = extractFloatx80Sign( a );
1874 if ( 0x401E < aExp ) {
1875 if ( ( aExp == 0x7FFF ) && (ui6b) ( aSig<<1 ) ) aSign = 0;
1876 goto invalid;
1877 }
1878 else if ( aExp < 0x3FFF ) {
1879 if ( aExp || aSig ) float_exception_flags |= float_flag_inexact;
1880 return 0;
1881 }
1882 shiftCount = 0x403E - aExp;
1883 savedASig = aSig;
1884 aSig >>= shiftCount;
1885 z = aSig;
1886 if ( aSign ) z = - z;
1887 if ( ( z < 0 ) ^ aSign ) {
1888 invalid:
1889 float_raise( float_flag_invalid );
1890 return aSign ? (si5b) 0x80000000 : 0x7FFFFFFF;
1891 }
1892 if ( ( aSig<<shiftCount ) != savedASig ) {
1893 float_exception_flags |= float_flag_inexact;
1894 }
1895 return z;
1896
1897}
1898#endif
1899
1900#ifdef FLOAT128
1901
1902/*----------------------------------------------------------------------------
1903| Returns the result of converting the extended double-precision floating-
1904| point value `a' to the quadruple-precision floating-point format. The
1905| conversion is performed according to the IEC/IEEE Standard for Binary
1906| Floating-Point Arithmetic.
1907*----------------------------------------------------------------------------*/
1908
1909LOCALFUNC float128 floatx80_to_float128( floatx80 a )
1910{
1911 flag aSign;
1912 si4r aExp;
1913 ui6b aSig, zSig0, zSig1;
1914
1915 aSig = extractFloatx80Frac( a );
1916 aExp = extractFloatx80Exp( a );
1917 aSign = extractFloatx80Sign( a );
1918 if ( ( aExp == 0x7FFF ) && (ui6b) ( aSig<<1 ) ) {
1919 return commonNaNToFloat128( floatx80ToCommonNaN( a ) );
1920 }
1921 shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 );
1922 return packFloat128( aSign, aExp, zSig0, zSig1 );
1923
1924}
1925
1926#endif
1927
1928/*----------------------------------------------------------------------------
1929| Rounds the extended double-precision floating-point value `a' to an integer,
1930| and returns the result as an extended quadruple-precision floating-point
1931| value. The operation is performed according to the IEC/IEEE Standard for
1932| Binary Floating-Point Arithmetic.
1933*----------------------------------------------------------------------------*/
1934
1935LOCALFUNC floatx80 floatx80_round_to_int( floatx80 a )
1936{
1937 flag aSign;
1938 si5r aExp;
1939 ui6b lastBitMask, roundBitsMask;
1940 si3r roundingMode;
1941 floatx80 z;
1942
1943 aExp = extractFloatx80Exp( a );
1944 if ( 0x403E <= aExp ) {
1945 if ( ( aExp == 0x7FFF ) && (ui6b) ( extractFloatx80Frac( a )<<1 ) ) {
1946 return propagateFloatx80NaN( a, a );
1947 }
1948 return a;
1949 }
1950 if ( aExp < 0x3FFF ) {
1951 if ( ( aExp == 0 )
1952 && ( (ui6b) ( extractFloatx80Frac( a )<<1 ) == 0 ) ) {
1953 return a;
1954 }
1955 float_exception_flags |= float_flag_inexact;
1956 aSign = extractFloatx80Sign( a );
1957 switch ( float_rounding_mode ) {
1958 case float_round_nearest_even:
1959 if ( ( aExp == 0x3FFE ) && (ui6b) ( extractFloatx80Frac( a )<<1 )
1960 ) {
1961 return
1962 packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) );
1963 }
1964 break;
1965 case float_round_down:
1966 return
1967 aSign ?
1968 packFloatx80( 1, 0x3FFF, LIT64( 0x8000000000000000 ) )
1969 : packFloatx80( 0, 0, 0 );
1970 case float_round_up:
1971 return
1972 aSign ? packFloatx80( 1, 0, 0 )
1973 : packFloatx80( 0, 0x3FFF, LIT64( 0x8000000000000000 ) );
1974 }
1975 return packFloatx80( aSign, 0, 0 );
1976 }
1977 lastBitMask = 1;
1978 lastBitMask <<= 0x403E - aExp;
1979 roundBitsMask = lastBitMask - 1;
1980 z = a;
1981 roundingMode = float_rounding_mode;
1982 if ( roundingMode == float_round_nearest_even ) {
1983 z.low += lastBitMask>>1;
1984 if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
1985 }
1986 else if ( roundingMode != float_round_to_zero ) {
1987 if ( extractFloatx80Sign( z ) ^ ( roundingMode == float_round_up ) ) {
1988 z.low += roundBitsMask;
1989 }
1990 }
1991 z.low &= ~ roundBitsMask;
1992 if ( z.low == 0 ) {
1993 ++z.high;
1994 z.low = LIT64( 0x8000000000000000 );
1995 }
1996 if ( z.low != a.low ) float_exception_flags |= float_flag_inexact;
1997 return z;
1998
1999}
2000
2001/*----------------------------------------------------------------------------
2002| Returns the result of adding the absolute values of the extended double-
2003| precision floating-point values `a' and `b'. If `zSign' is 1, the sum is
2004| negated before being returned. `zSign' is ignored if the result is a NaN.
2005| The addition is performed according to the IEC/IEEE Standard for Binary
2006| Floating-Point Arithmetic.
2007*----------------------------------------------------------------------------*/
2008
2009LOCALFUNC floatx80 addFloatx80Sigs( floatx80 a, floatx80 b, flag zSign )
2010{
2011 si5r aExp, bExp, zExp;
2012 ui6b aSig, bSig, zSig0, zSig1;
2013 si5r expDiff;
2014
2015 aSig = extractFloatx80Frac( a );
2016 aExp = extractFloatx80Exp( a );
2017 bSig = extractFloatx80Frac( b );
2018 bExp = extractFloatx80Exp( b );
2019 expDiff = aExp - bExp;
2020 if ( 0 < expDiff ) {
2021 if ( aExp == 0x7FFF ) {
2022 if ( (ui6b) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b );
2023 return a;
2024 }
2025 if ( bExp == 0 ) --expDiff;
2026 shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
2027 zExp = aExp;
2028 }
2029 else if ( expDiff < 0 ) {
2030 if ( bExp == 0x7FFF ) {
2031 if ( (ui6b) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
2032 return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
2033 }
2034 if ( aExp == 0 ) ++expDiff;
2035 shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
2036 zExp = bExp;
2037 }
2038 else {
2039 if ( aExp == 0x7FFF ) {
2040 if ( (ui6b) ( ( aSig | bSig )<<1 ) ) {
2041 return propagateFloatx80NaN( a, b );
2042 }
2043 return a;
2044 }
2045 zSig1 = 0;
2046 zSig0 = aSig + bSig;
2047 if ( aExp == 0 ) {
2048 normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 );
2049 goto roundAndPack;
2050 }
2051 zExp = aExp;
2052 goto shiftRight1;
2053 }
2054 zSig0 = aSig + bSig;
2055 if ( (si6b) zSig0 < 0 ) goto roundAndPack;
2056 shiftRight1:
2057 shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 );
2058 zSig0 |= LIT64( 0x8000000000000000 );
2059 ++zExp;
2060 roundAndPack:
2061 return
2062 roundAndPackFloatx80(
2063 floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
2064
2065}
2066
2067/*----------------------------------------------------------------------------
2068| Returns the result of subtracting the absolute values of the extended
2069| double-precision floating-point values `a' and `b'. If `zSign' is 1, the
2070| difference is negated before being returned. `zSign' is ignored if the
2071| result is a NaN. The subtraction is performed according to the IEC/IEEE
2072| Standard for Binary Floating-Point Arithmetic.
2073*----------------------------------------------------------------------------*/
2074
2075LOCALFUNC floatx80 subFloatx80Sigs( floatx80 a, floatx80 b, flag zSign )
2076{
2077 si5r aExp, bExp, zExp;
2078 ui6b aSig, bSig, zSig0, zSig1;
2079 si5r expDiff;
2080 floatx80 z;
2081
2082 aSig = extractFloatx80Frac( a );
2083 aExp = extractFloatx80Exp( a );
2084 bSig = extractFloatx80Frac( b );
2085 bExp = extractFloatx80Exp( b );
2086 expDiff = aExp - bExp;
2087 if ( 0 < expDiff ) goto aExpBigger;
2088 if ( expDiff < 0 ) goto bExpBigger;
2089 if ( aExp == 0x7FFF ) {
2090 if ( (ui6b) ( ( aSig | bSig )<<1 ) ) {
2091 return propagateFloatx80NaN( a, b );
2092 }
2093 float_raise( float_flag_invalid );
2094 z.low = floatx80_default_nan_low;
2095 z.high = floatx80_default_nan_high;
2096 return z;
2097 }
2098 if ( aExp == 0 ) {
2099 aExp = 1;
2100 bExp = 1;
2101 }
2102 zSig1 = 0;
2103 if ( bSig < aSig ) goto aBigger;
2104 if ( aSig < bSig ) goto bBigger;
2105 return packFloatx80( float_rounding_mode == float_round_down, 0, 0 );
2106 bExpBigger:
2107 if ( bExp == 0x7FFF ) {
2108 if ( (ui6b) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
2109 return packFloatx80( zSign ^ 1, 0x7FFF, LIT64( 0x8000000000000000 ) );
2110 }
2111 if ( aExp == 0 ) ++expDiff;
2112 shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
2113 bBigger:
2114 sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 );
2115 zExp = bExp;
2116 zSign ^= 1;
2117 goto normalizeRoundAndPack;
2118 aExpBigger:
2119 if ( aExp == 0x7FFF ) {
2120 if ( (ui6b) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b );
2121 return a;
2122 }
2123 if ( bExp == 0 ) --expDiff;
2124 shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
2125 aBigger:
2126 sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 );
2127 zExp = aExp;
2128 normalizeRoundAndPack:
2129 return
2130 normalizeRoundAndPackFloatx80(
2131 floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
2132
2133}
2134
2135/*----------------------------------------------------------------------------
2136| Returns the result of adding the extended double-precision floating-point
2137| values `a' and `b'. The operation is performed according to the IEC/IEEE
2138| Standard for Binary Floating-Point Arithmetic.
2139*----------------------------------------------------------------------------*/
2140
2141LOCALFUNC floatx80 floatx80_add( floatx80 a, floatx80 b )
2142{
2143 flag aSign, bSign;
2144
2145 aSign = extractFloatx80Sign( a );
2146 bSign = extractFloatx80Sign( b );
2147 if ( aSign == bSign ) {
2148 return addFloatx80Sigs( a, b, aSign );
2149 }
2150 else {
2151 return subFloatx80Sigs( a, b, aSign );
2152 }
2153
2154}
2155
2156/*----------------------------------------------------------------------------
2157| Returns the result of subtracting the extended double-precision floating-
2158| point values `a' and `b'. The operation is performed according to the
2159| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2160*----------------------------------------------------------------------------*/
2161
2162LOCALFUNC floatx80 floatx80_sub( floatx80 a, floatx80 b )
2163{
2164 flag aSign, bSign;
2165
2166 aSign = extractFloatx80Sign( a );
2167 bSign = extractFloatx80Sign( b );
2168 if ( aSign == bSign ) {
2169 return subFloatx80Sigs( a, b, aSign );
2170 }
2171 else {
2172 return addFloatx80Sigs( a, b, aSign );
2173 }
2174
2175}
2176
2177/*----------------------------------------------------------------------------
2178| Returns the result of multiplying the extended double-precision floating-
2179| point values `a' and `b'. The operation is performed according to the
2180| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2181*----------------------------------------------------------------------------*/
2182
2183LOCALFUNC floatx80 floatx80_mul( floatx80 a, floatx80 b )
2184{
2185 flag aSign, bSign, zSign;
2186 si5r aExp, bExp, zExp;
2187 ui6b aSig, bSig, zSig0, zSig1;
2188 floatx80 z;
2189
2190 aSig = extractFloatx80Frac( a );
2191 aExp = extractFloatx80Exp( a );
2192 aSign = extractFloatx80Sign( a );
2193 bSig = extractFloatx80Frac( b );
2194 bExp = extractFloatx80Exp( b );
2195 bSign = extractFloatx80Sign( b );
2196 zSign = aSign ^ bSign;
2197 if ( aExp == 0x7FFF ) {
2198 if ( (ui6b) ( aSig<<1 )
2199 || ( ( bExp == 0x7FFF ) && (ui6b) ( bSig<<1 ) ) ) {
2200 return propagateFloatx80NaN( a, b );
2201 }
2202 if ( ( bExp | bSig ) == 0 ) goto invalid;
2203 return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
2204 }
2205 if ( bExp == 0x7FFF ) {
2206 if ( (ui6b) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
2207 if ( ( aExp | aSig ) == 0 ) {
2208 invalid:
2209 float_raise( float_flag_invalid );
2210 z.low = floatx80_default_nan_low;
2211 z.high = floatx80_default_nan_high;
2212 return z;
2213 }
2214 return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
2215 }
2216 if ( aExp == 0 ) {
2217 if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
2218 normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
2219 }
2220 if ( bExp == 0 ) {
2221 if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 );
2222 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
2223 }
2224 zExp = aExp + bExp - 0x3FFE;
2225 mul64To128( aSig, bSig, &zSig0, &zSig1 );
2226 if ( 0 < (si6b) zSig0 ) {
2227 shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
2228 --zExp;
2229 }
2230 return
2231 roundAndPackFloatx80(
2232 floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
2233
2234}
2235
2236/*----------------------------------------------------------------------------
2237| Returns the result of dividing the extended double-precision floating-point
2238| value `a' by the corresponding value `b'. The operation is performed
2239| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2240*----------------------------------------------------------------------------*/
2241
2242LOCALFUNC floatx80 floatx80_div( floatx80 a, floatx80 b )
2243{
2244 flag aSign, bSign, zSign;
2245 si5r aExp, bExp, zExp;
2246 ui6b aSig, bSig, zSig0, zSig1;
2247 ui6b rem0, rem1, rem2, term0, term1, term2;
2248 floatx80 z;
2249
2250 aSig = extractFloatx80Frac( a );
2251 aExp = extractFloatx80Exp( a );
2252 aSign = extractFloatx80Sign( a );
2253 bSig = extractFloatx80Frac( b );
2254 bExp = extractFloatx80Exp( b );
2255 bSign = extractFloatx80Sign( b );
2256 zSign = aSign ^ bSign;
2257 if ( aExp == 0x7FFF ) {
2258 if ( (ui6b) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b );
2259 if ( bExp == 0x7FFF ) {
2260 if ( (ui6b) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
2261 goto invalid;
2262 }
2263 return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
2264 }
2265 if ( bExp == 0x7FFF ) {
2266 if ( (ui6b) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
2267 return packFloatx80( zSign, 0, 0 );
2268 }
2269 if ( bExp == 0 ) {
2270 if ( bSig == 0 ) {
2271 if ( ( aExp | aSig ) == 0 ) {
2272 invalid:
2273 float_raise( float_flag_invalid );
2274 z.low = floatx80_default_nan_low;
2275 z.high = floatx80_default_nan_high;
2276 return z;
2277 }
2278 float_raise( float_flag_divbyzero );
2279 return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
2280 }
2281 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
2282 }
2283 if ( aExp == 0 ) {
2284 if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
2285 normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
2286 }
2287 if ( aSig == 0 ) {
2288 /*
2289 added by pcp. this invalid input seems to
2290 cause hang in estimateDiv128To64. should
2291 validate inputs generally.
2292 */
2293 return packFloatx80( zSign, 0, 0 );
2294 }
2295 if ( (si6b) bSig >= 0 ) {
2296 /*
2297 added by pcp. another check.
2298 invalid input, most significant bit should be set?
2299 */
2300 return packFloatx80( zSign, 0, 0 );
2301 }
2302 zExp = aExp - bExp + 0x3FFE;
2303 rem1 = 0;
2304 if ( bSig <= aSig ) {
2305 shift128Right( aSig, 0, 1, &aSig, &rem1 );
2306 ++zExp;
2307 }
2308 zSig0 = estimateDiv128To64( aSig, rem1, bSig );
2309 mul64To128( bSig, zSig0, &term0, &term1 );
2310 sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
2311 while ( (si6b) rem0 < 0 ) {
2312 --zSig0;
2313 add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
2314 }
2315 zSig1 = estimateDiv128To64( rem1, 0, bSig );
2316 if ( (ui6b) ( zSig1<<1 ) <= 8 ) {
2317 mul64To128( bSig, zSig1, &term1, &term2 );
2318 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
2319 while ( (si6b) rem1 < 0 ) {
2320 --zSig1;
2321 add128( rem1, rem2, 0, bSig, &rem1, &rem2 );
2322 }
2323 zSig1 |= ( ( rem1 | rem2 ) != 0 );
2324 }
2325 return
2326 roundAndPackFloatx80(
2327 floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
2328
2329}
2330
2331/*----------------------------------------------------------------------------
2332| Returns the remainder of the extended double-precision floating-point value
2333| `a' with respect to the corresponding value `b'. The operation is performed
2334| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2335*----------------------------------------------------------------------------*/
2336
2337LOCALFUNC floatx80 floatx80_rem( floatx80 a, floatx80 b )
2338{
2339 flag aSign, /* bSign, */ zSign;
2340 si5r aExp, bExp, expDiff;
2341 ui6b aSig0, aSig1, bSig;
2342 ui6b q, term0, term1, alternateASig0, alternateASig1;
2343 floatx80 z;
2344
2345 aSig0 = extractFloatx80Frac( a );
2346 aExp = extractFloatx80Exp( a );
2347 aSign = extractFloatx80Sign( a );
2348 bSig = extractFloatx80Frac( b );
2349 bExp = extractFloatx80Exp( b );
2350 /*
2351 not used. should it be?
2352 bSign = extractFloatx80Sign( b );
2353 */
2354 if ( aExp == 0x7FFF ) {
2355 if ( (ui6b) ( aSig0<<1 )
2356 || ( ( bExp == 0x7FFF ) && (ui6b) ( bSig<<1 ) ) ) {
2357 return propagateFloatx80NaN( a, b );
2358 }
2359 goto invalid;
2360 }
2361 if ( bExp == 0x7FFF ) {
2362 if ( (ui6b) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
2363 return a;
2364 }
2365 if ( bExp == 0 ) {
2366 if ( bSig == 0 ) {
2367 invalid:
2368 float_raise( float_flag_invalid );
2369 z.low = floatx80_default_nan_low;
2370 z.high = floatx80_default_nan_high;
2371 return z;
2372 }
2373 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
2374 }
2375 if ( aExp == 0 ) {
2376 if ( (ui6b) ( aSig0<<1 ) == 0 ) return a;
2377 normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
2378 }
2379 bSig |= LIT64( 0x8000000000000000 );
2380 zSign = aSign;
2381 expDiff = aExp - bExp;
2382 aSig1 = 0;
2383 if ( expDiff < 0 ) {
2384 if ( expDiff < -1 ) return a;
2385 shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
2386 expDiff = 0;
2387 }
2388 q = ( bSig <= aSig0 );
2389 if ( q ) aSig0 -= bSig;
2390 expDiff -= 64;
2391 while ( 0 < expDiff ) {
2392 q = estimateDiv128To64( aSig0, aSig1, bSig );
2393 q = ( 2 < q ) ? q - 2 : 0;
2394 mul64To128( bSig, q, &term0, &term1 );
2395 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
2396 shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
2397 expDiff -= 62;
2398 }
2399 expDiff += 64;
2400 if ( 0 < expDiff ) {
2401 q = estimateDiv128To64( aSig0, aSig1, bSig );
2402 q = ( 2 < q ) ? q - 2 : 0;
2403 q >>= 64 - expDiff;
2404 mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );
2405 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
2406 shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
2407 while ( le128( term0, term1, aSig0, aSig1 ) ) {
2408 ++q;
2409 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
2410 }
2411 }
2412 else {
2413 term1 = 0;
2414 term0 = bSig;
2415 }
2416 sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
2417 if ( lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
2418 || ( eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
2419 && ( q & 1 ) )
2420 ) {
2421 aSig0 = alternateASig0;
2422 aSig1 = alternateASig1;
2423 zSign = ! zSign;
2424 }
2425 return
2426 normalizeRoundAndPackFloatx80(
2427 80, zSign, bExp + expDiff, aSig0, aSig1 );
2428
2429}
2430
2431/*----------------------------------------------------------------------------
2432| Returns the square root of the extended double-precision floating-point
2433| value `a'. The operation is performed according to the IEC/IEEE Standard
2434| for Binary Floating-Point Arithmetic.
2435*----------------------------------------------------------------------------*/
2436
2437LOCALFUNC floatx80 floatx80_sqrt( floatx80 a )
2438{
2439 flag aSign;
2440 si5r aExp, zExp;
2441 ui6b aSig0, aSig1, zSig0, zSig1, doubleZSig0;
2442 ui6b rem0, rem1, rem2, rem3, term0, term1, term2, term3;
2443 floatx80 z;
2444
2445 aSig0 = extractFloatx80Frac( a );
2446 aExp = extractFloatx80Exp( a );
2447 aSign = extractFloatx80Sign( a );
2448 if ( aExp == 0x7FFF ) {
2449 if ( (ui6b) ( aSig0<<1 ) ) return propagateFloatx80NaN( a, a );
2450 if ( ! aSign ) return a;
2451 goto invalid;
2452 }
2453 if ( aSign ) {
2454 if ( ( aExp | aSig0 ) == 0 ) return a;
2455 invalid:
2456 float_raise( float_flag_invalid );
2457 z.low = floatx80_default_nan_low;
2458 z.high = floatx80_default_nan_high;
2459 return z;
2460 }
2461 if ( aExp == 0 ) {
2462 if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 );
2463 normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
2464 }
2465 zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF;
2466 zSig0 = estimateSqrt32( aExp, aSig0>>32 );
2467 shift128Right( aSig0, 0, 2 + ( aExp & 1 ), &aSig0, &aSig1 );
2468 zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
2469 doubleZSig0 = zSig0<<1;
2470 mul64To128( zSig0, zSig0, &term0, &term1 );
2471 sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
2472 while ( (si6b) rem0 < 0 ) {
2473 --zSig0;
2474 doubleZSig0 -= 2;
2475 add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
2476 }
2477 zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
2478 if ( ( zSig1 & LIT64( 0x3FFFFFFFFFFFFFFF ) ) <= 5 ) {
2479 if ( zSig1 == 0 ) zSig1 = 1;
2480 mul64To128( doubleZSig0, zSig1, &term1, &term2 );
2481 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
2482 mul64To128( zSig1, zSig1, &term2, &term3 );
2483 sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
2484 while ( (si6b) rem1 < 0 ) {
2485 --zSig1;
2486 shortShift128Left( 0, zSig1, 1, &term2, &term3 );
2487 term3 |= 1;
2488 term2 |= doubleZSig0;
2489 add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
2490 }
2491 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
2492 }
2493 shortShift128Left( 0, zSig1, 1, &zSig0, &zSig1 );
2494 zSig0 |= doubleZSig0;
2495 return
2496 roundAndPackFloatx80(
2497 floatx80_rounding_precision, 0, zExp, zSig0, zSig1 );
2498
2499}
2500
2501/*----------------------------------------------------------------------------
2502| Returns 1 if the extended double-precision floating-point value `a' is
2503| equal to the corresponding value `b', and 0 otherwise. The comparison is
2504| performed according to the IEC/IEEE Standard for Binary Floating-Point
2505| Arithmetic.
2506*----------------------------------------------------------------------------*/
2507
2508#if cIncludeFPUUnused
2509LOCALFUNC flag floatx80_eq( floatx80 a, floatx80 b )
2510{
2511
2512 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
2513 && (ui6b) ( extractFloatx80Frac( a )<<1 ) )
2514 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
2515 && (ui6b) ( extractFloatx80Frac( b )<<1 ) )
2516 ) {
2517 if ( floatx80_is_signaling_nan( a )
2518 || floatx80_is_signaling_nan( b ) ) {
2519 float_raise( float_flag_invalid );
2520 }
2521 return 0;
2522 }
2523 return
2524 ( a.low == b.low )
2525 && ( ( a.high == b.high )
2526 || ( ( a.low == 0 )
2527 && ( (ui4b) ( ( a.high | b.high )<<1 ) == 0 ) )
2528 );
2529
2530}
2531#endif
2532
2533/*----------------------------------------------------------------------------
2534| Returns 1 if the extended double-precision floating-point value `a' is
2535| less than or equal to the corresponding value `b', and 0 otherwise. The
2536| comparison is performed according to the IEC/IEEE Standard for Binary
2537| Floating-Point Arithmetic.
2538*----------------------------------------------------------------------------*/
2539
2540#if cIncludeFPUUnused
2541LOCALFUNC flag floatx80_le( floatx80 a, floatx80 b )
2542{
2543 flag aSign, bSign;
2544
2545 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
2546 && (ui6b) ( extractFloatx80Frac( a )<<1 ) )
2547 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
2548 && (ui6b) ( extractFloatx80Frac( b )<<1 ) )
2549 ) {
2550 float_raise( float_flag_invalid );
2551 return 0;
2552 }
2553 aSign = extractFloatx80Sign( a );
2554 bSign = extractFloatx80Sign( b );
2555 if ( aSign != bSign ) {
2556 return
2557 aSign
2558 || ( ( ( (ui4b) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
2559 == 0 );
2560 }
2561 return
2562 aSign ? le128( b.high, b.low, a.high, a.low )
2563 : le128( a.high, a.low, b.high, b.low );
2564
2565}
2566#endif
2567
2568/*----------------------------------------------------------------------------
2569| Returns 1 if the extended double-precision floating-point value `a' is
2570| less than the corresponding value `b', and 0 otherwise. The comparison
2571| is performed according to the IEC/IEEE Standard for Binary Floating-Point
2572| Arithmetic.
2573*----------------------------------------------------------------------------*/
2574
2575#if cIncludeFPUUnused
2576LOCALFUNC flag floatx80_lt( floatx80 a, floatx80 b )
2577{
2578 flag aSign, bSign;
2579
2580 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
2581 && (ui6b) ( extractFloatx80Frac( a )<<1 ) )
2582 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
2583 && (ui6b) ( extractFloatx80Frac( b )<<1 ) )
2584 ) {
2585 float_raise( float_flag_invalid );
2586 return 0;
2587 }
2588 aSign = extractFloatx80Sign( a );
2589 bSign = extractFloatx80Sign( b );
2590 if ( aSign != bSign ) {
2591 return
2592 aSign
2593 && ( ( ( (ui4b) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
2594 != 0 );
2595 }
2596 return
2597 aSign ? lt128( b.high, b.low, a.high, a.low )
2598 : lt128( a.high, a.low, b.high, b.low );
2599
2600}
2601#endif
2602
2603/*----------------------------------------------------------------------------
2604| Returns 1 if the extended double-precision floating-point value `a' is equal
2605| to the corresponding value `b', and 0 otherwise. The invalid exception is
2606| raised if either operand is a NaN. Otherwise, the comparison is performed
2607| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2608*----------------------------------------------------------------------------*/
2609
2610#if cIncludeFPUUnused
2611LOCALFUNC flag floatx80_eq_signaling( floatx80 a, floatx80 b )
2612{
2613
2614 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
2615 && (ui6b) ( extractFloatx80Frac( a )<<1 ) )
2616 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
2617 && (ui6b) ( extractFloatx80Frac( b )<<1 ) )
2618 ) {
2619 float_raise( float_flag_invalid );
2620 return 0;
2621 }
2622 return
2623 ( a.low == b.low )
2624 && ( ( a.high == b.high )
2625 || ( ( a.low == 0 )
2626 && ( (ui4b) ( ( a.high | b.high )<<1 ) == 0 ) )
2627 );
2628
2629}
2630#endif
2631
2632/*----------------------------------------------------------------------------
2633| Returns 1 if the extended double-precision floating-point value `a' is less
2634| than or equal to the corresponding value `b', and 0 otherwise. Quiet NaNs
2635| do not cause an exception. Otherwise, the comparison is performed according
2636| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2637*----------------------------------------------------------------------------*/
2638
2639#if cIncludeFPUUnused
2640LOCALFUNC flag floatx80_le_quiet( floatx80 a, floatx80 b )
2641{
2642 flag aSign, bSign;
2643
2644 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
2645 && (ui6b) ( extractFloatx80Frac( a )<<1 ) )
2646 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
2647 && (ui6b) ( extractFloatx80Frac( b )<<1 ) )
2648 ) {
2649 if ( floatx80_is_signaling_nan( a )
2650 || floatx80_is_signaling_nan( b ) ) {
2651 float_raise( float_flag_invalid );
2652 }
2653 return 0;
2654 }
2655 aSign = extractFloatx80Sign( a );
2656 bSign = extractFloatx80Sign( b );
2657 if ( aSign != bSign ) {
2658 return
2659 aSign
2660 || ( ( ( (ui4b) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
2661 == 0 );
2662 }
2663 return
2664 aSign ? le128( b.high, b.low, a.high, a.low )
2665 : le128( a.high, a.low, b.high, b.low );
2666
2667}
2668#endif
2669
2670/*----------------------------------------------------------------------------
2671| Returns 1 if the extended double-precision floating-point value `a' is less
2672| than the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause
2673| an exception. Otherwise, the comparison is performed according to the
2674| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2675*----------------------------------------------------------------------------*/
2676
2677#if cIncludeFPUUnused
2678LOCALFUNC flag floatx80_lt_quiet( floatx80 a, floatx80 b )
2679{
2680 flag aSign, bSign;
2681
2682 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
2683 && (ui6b) ( extractFloatx80Frac( a )<<1 ) )
2684 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
2685 && (ui6b) ( extractFloatx80Frac( b )<<1 ) )
2686 ) {
2687 if ( floatx80_is_signaling_nan( a )
2688 || floatx80_is_signaling_nan( b ) ) {
2689 float_raise( float_flag_invalid );
2690 }
2691 return 0;
2692 }
2693 aSign = extractFloatx80Sign( a );
2694 bSign = extractFloatx80Sign( b );
2695 if ( aSign != bSign ) {
2696 return
2697 aSign
2698 && ( ( ( (ui4b) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
2699 != 0 );
2700 }
2701 return
2702 aSign ? lt128( b.high, b.low, a.high, a.low )
2703 : lt128( a.high, a.low, b.high, b.low );
2704
2705}
2706#endif
2707
2708#ifdef FLOAT128
2709
2710/*----------------------------------------------------------------------------
2711| Returns the result of converting the quadruple-precision floating-point
2712| value `a' to the extended double-precision floating-point format. The
2713| conversion is performed according to the IEC/IEEE Standard for Binary
2714| Floating-Point Arithmetic.
2715*----------------------------------------------------------------------------*/
2716
2717LOCALFUNC floatx80 float128_to_floatx80( float128 a )
2718{
2719 flag aSign;
2720 si5r aExp;
2721 ui6b aSig0, aSig1;
2722
2723 aSig1 = extractFloat128Frac1( a );
2724 aSig0 = extractFloat128Frac0( a );
2725 aExp = extractFloat128Exp( a );
2726 aSign = extractFloat128Sign( a );
2727 if ( aExp == 0x7FFF ) {
2728 if ( aSig0 | aSig1 ) {
2729 return commonNaNToFloatx80( float128ToCommonNaN( a ) );
2730 }
2731 return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
2732 }
2733 if ( aExp == 0 ) {
2734 if ( ( aSig0 | aSig1 ) == 0 ) return packFloatx80( aSign, 0, 0 );
2735 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
2736 }
2737 else {
2738 aSig0 |= LIT64( 0x0001000000000000 );
2739 }
2740 shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 );
2741 return roundAndPackFloatx80( 80, aSign, aExp, aSig0, aSig1 );
2742
2743}
2744
2745/*----------------------------------------------------------------------------
2746| Returns the result of adding the absolute values of the quadruple-precision
2747| floating-point values `a' and `b'. If `zSign' is 1, the sum is negated
2748| before being returned. `zSign' is ignored if the result is a NaN.
2749| The addition is performed according to the IEC/IEEE Standard for Binary
2750| Floating-Point Arithmetic.
2751*----------------------------------------------------------------------------*/
2752
2753LOCALFUNC float128 addFloat128Sigs( float128 a, float128 b, flag zSign )
2754{
2755 si5r aExp, bExp, zExp;
2756 ui6b aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
2757 si5r expDiff;
2758
2759 aSig1 = extractFloat128Frac1( a );
2760 aSig0 = extractFloat128Frac0( a );
2761 aExp = extractFloat128Exp( a );
2762 bSig1 = extractFloat128Frac1( b );
2763 bSig0 = extractFloat128Frac0( b );
2764 bExp = extractFloat128Exp( b );
2765 expDiff = aExp - bExp;
2766 if ( 0 < expDiff ) {
2767 if ( aExp == 0x7FFF ) {
2768 if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b );
2769 return a;
2770 }
2771 if ( bExp == 0 ) {
2772 --expDiff;
2773 }
2774 else {
2775 bSig0 |= LIT64( 0x0001000000000000 );
2776 }
2777 shift128ExtraRightJamming(
2778 bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 );
2779 zExp = aExp;
2780 }
2781 else if ( expDiff < 0 ) {
2782 if ( bExp == 0x7FFF ) {
2783 if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
2784 return packFloat128( zSign, 0x7FFF, 0, 0 );
2785 }
2786 if ( aExp == 0 ) {
2787 ++expDiff;
2788 }
2789 else {
2790 aSig0 |= LIT64( 0x0001000000000000 );
2791 }
2792 shift128ExtraRightJamming(
2793 aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 );
2794 zExp = bExp;
2795 }
2796 else {
2797 if ( aExp == 0x7FFF ) {
2798 if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
2799 return propagateFloat128NaN( a, b );
2800 }
2801 return a;
2802 }
2803 add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
2804 if ( aExp == 0 ) return packFloat128( zSign, 0, zSig0, zSig1 );
2805 zSig2 = 0;
2806 zSig0 |= LIT64( 0x0002000000000000 );
2807 zExp = aExp;
2808 goto shiftRight1;
2809 }
2810 aSig0 |= LIT64( 0x0001000000000000 );
2811 add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
2812 --zExp;
2813 if ( zSig0 < LIT64( 0x0002000000000000 ) ) goto roundAndPack;
2814 ++zExp;
2815 shiftRight1:
2816 shift128ExtraRightJamming(
2817 zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
2818 roundAndPack:
2819 return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 );
2820
2821}
2822
2823/*----------------------------------------------------------------------------
2824| Returns the result of subtracting the absolute values of the quadruple-
2825| precision floating-point values `a' and `b'. If `zSign' is 1, the
2826| difference is negated before being returned. `zSign' is ignored if the
2827| result is a NaN. The subtraction is performed according to the IEC/IEEE
2828| Standard for Binary Floating-Point Arithmetic.
2829*----------------------------------------------------------------------------*/
2830
2831LOCALFUNC float128 subFloat128Sigs( float128 a, float128 b, flag zSign )
2832{
2833 si5r aExp, bExp, zExp;
2834 ui6b aSig0, aSig1, bSig0, bSig1, zSig0, zSig1;
2835 si5r expDiff;
2836 float128 z;
2837
2838 aSig1 = extractFloat128Frac1( a );
2839 aSig0 = extractFloat128Frac0( a );
2840 aExp = extractFloat128Exp( a );
2841 bSig1 = extractFloat128Frac1( b );
2842 bSig0 = extractFloat128Frac0( b );
2843 bExp = extractFloat128Exp( b );
2844 expDiff = aExp - bExp;
2845 shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
2846 shortShift128Left( bSig0, bSig1, 14, &bSig0, &bSig1 );
2847 if ( 0 < expDiff ) goto aExpBigger;
2848 if ( expDiff < 0 ) goto bExpBigger;
2849 if ( aExp == 0x7FFF ) {
2850 if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
2851 return propagateFloat128NaN( a, b );
2852 }
2853 float_raise( float_flag_invalid );
2854 z.low = float128_default_nan_low;
2855 z.high = float128_default_nan_high;
2856 return z;
2857 }
2858 if ( aExp == 0 ) {
2859 aExp = 1;
2860 bExp = 1;
2861 }
2862 if ( bSig0 < aSig0 ) goto aBigger;
2863 if ( aSig0 < bSig0 ) goto bBigger;
2864 if ( bSig1 < aSig1 ) goto aBigger;
2865 if ( aSig1 < bSig1 ) goto bBigger;
2866 return packFloat128( float_rounding_mode == float_round_down, 0, 0, 0 );
2867 bExpBigger:
2868 if ( bExp == 0x7FFF ) {
2869 if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
2870 return packFloat128( zSign ^ 1, 0x7FFF, 0, 0 );
2871 }
2872 if ( aExp == 0 ) {
2873 ++expDiff;
2874 }
2875 else {
2876 aSig0 |= LIT64( 0x4000000000000000 );
2877 }
2878 shift128RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
2879 bSig0 |= LIT64( 0x4000000000000000 );
2880 bBigger:
2881 sub128( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 );
2882 zExp = bExp;
2883 zSign ^= 1;
2884 goto normalizeRoundAndPack;
2885 aExpBigger:
2886 if ( aExp == 0x7FFF ) {
2887 if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b );
2888 return a;
2889 }
2890 if ( bExp == 0 ) {
2891 --expDiff;
2892 }
2893 else {
2894 bSig0 |= LIT64( 0x4000000000000000 );
2895 }
2896 shift128RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 );
2897 aSig0 |= LIT64( 0x4000000000000000 );
2898 aBigger:
2899 sub128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
2900 zExp = aExp;
2901 normalizeRoundAndPack:
2902 --zExp;
2903 return normalizeRoundAndPackFloat128( zSign, zExp - 14, zSig0, zSig1 );
2904
2905}
2906
2907/*----------------------------------------------------------------------------
2908| Returns the result of adding the quadruple-precision floating-point values
2909| `a' and `b'. The operation is performed according to the IEC/IEEE Standard
2910| for Binary Floating-Point Arithmetic.
2911*----------------------------------------------------------------------------*/
2912
2913LOCALFUNC float128 float128_add( float128 a, float128 b )
2914{
2915 flag aSign, bSign;
2916
2917 aSign = extractFloat128Sign( a );
2918 bSign = extractFloat128Sign( b );
2919 if ( aSign == bSign ) {
2920 return addFloat128Sigs( a, b, aSign );
2921 }
2922 else {
2923 return subFloat128Sigs( a, b, aSign );
2924 }
2925
2926}
2927
2928/*----------------------------------------------------------------------------
2929| Returns the result of subtracting the quadruple-precision floating-point
2930| values `a' and `b'. The operation is performed according to the IEC/IEEE
2931| Standard for Binary Floating-Point Arithmetic.
2932*----------------------------------------------------------------------------*/
2933
2934LOCALFUNC float128 float128_sub( float128 a, float128 b )
2935{
2936 flag aSign, bSign;
2937
2938 aSign = extractFloat128Sign( a );
2939 bSign = extractFloat128Sign( b );
2940 if ( aSign == bSign ) {
2941 return subFloat128Sigs( a, b, aSign );
2942 }
2943 else {
2944 return addFloat128Sigs( a, b, aSign );
2945 }
2946
2947}
2948
2949/*----------------------------------------------------------------------------
2950| Returns the result of multiplying the quadruple-precision floating-point
2951| values `a' and `b'. The operation is performed according to the IEC/IEEE
2952| Standard for Binary Floating-Point Arithmetic.
2953*----------------------------------------------------------------------------*/
2954
2955LOCALFUNC float128 float128_mul( float128 a, float128 b )
2956{
2957 flag aSign, bSign, zSign;
2958 si5r aExp, bExp, zExp;
2959 ui6b aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3;
2960 float128 z;
2961
2962 aSig1 = extractFloat128Frac1( a );
2963 aSig0 = extractFloat128Frac0( a );
2964 aExp = extractFloat128Exp( a );
2965 aSign = extractFloat128Sign( a );
2966 bSig1 = extractFloat128Frac1( b );
2967 bSig0 = extractFloat128Frac0( b );
2968 bExp = extractFloat128Exp( b );
2969 bSign = extractFloat128Sign( b );
2970 zSign = aSign ^ bSign;
2971 if ( aExp == 0x7FFF ) {
2972 if ( ( aSig0 | aSig1 )
2973 || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
2974 return propagateFloat128NaN( a, b );
2975 }
2976 if ( ( bExp | bSig0 | bSig1 ) == 0 ) goto invalid;
2977 return packFloat128( zSign, 0x7FFF, 0, 0 );
2978 }
2979 if ( bExp == 0x7FFF ) {
2980 if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
2981 if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
2982 invalid:
2983 float_raise( float_flag_invalid );
2984 z.low = float128_default_nan_low;
2985 z.high = float128_default_nan_high;
2986 return z;
2987 }
2988 return packFloat128( zSign, 0x7FFF, 0, 0 );
2989 }
2990 if ( aExp == 0 ) {
2991 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
2992 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
2993 }
2994 if ( bExp == 0 ) {
2995 if ( ( bSig0 | bSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
2996 normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
2997 }
2998 zExp = aExp + bExp - 0x4000;
2999 aSig0 |= LIT64( 0x0001000000000000 );
3000 shortShift128Left( bSig0, bSig1, 16, &bSig0, &bSig1 );
3001 mul128To256( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3 );
3002 add128( zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1 );
3003 zSig2 |= ( zSig3 != 0 );
3004 if ( LIT64( 0x0002000000000000 ) <= zSig0 ) {
3005 shift128ExtraRightJamming(
3006 zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
3007 ++zExp;
3008 }
3009 return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 );
3010
3011}
3012
3013/*----------------------------------------------------------------------------
3014| Returns the result of dividing the quadruple-precision floating-point value
3015| `a' by the corresponding value `b'. The operation is performed according to
3016| the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3017*----------------------------------------------------------------------------*/
3018
3019LOCALFUNC float128 float128_div( float128 a, float128 b )
3020{
3021 flag aSign, bSign, zSign;
3022 si5r aExp, bExp, zExp;
3023 ui6b aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
3024 ui6b rem0, rem1, rem2, rem3, term0, term1, term2, term3;
3025 float128 z;
3026
3027 aSig1 = extractFloat128Frac1( a );
3028 aSig0 = extractFloat128Frac0( a );
3029 aExp = extractFloat128Exp( a );
3030 aSign = extractFloat128Sign( a );
3031 bSig1 = extractFloat128Frac1( b );
3032 bSig0 = extractFloat128Frac0( b );
3033 bExp = extractFloat128Exp( b );
3034 bSign = extractFloat128Sign( b );
3035 zSign = aSign ^ bSign;
3036 if ( aExp == 0x7FFF ) {
3037 if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b );
3038 if ( bExp == 0x7FFF ) {
3039 if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
3040 goto invalid;
3041 }
3042 return packFloat128( zSign, 0x7FFF, 0, 0 );
3043 }
3044 if ( bExp == 0x7FFF ) {
3045 if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
3046 return packFloat128( zSign, 0, 0, 0 );
3047 }
3048 if ( bExp == 0 ) {
3049 if ( ( bSig0 | bSig1 ) == 0 ) {
3050 if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
3051 invalid:
3052 float_raise( float_flag_invalid );
3053 z.low = float128_default_nan_low;
3054 z.high = float128_default_nan_high;
3055 return z;
3056 }
3057 float_raise( float_flag_divbyzero );
3058 return packFloat128( zSign, 0x7FFF, 0, 0 );
3059 }
3060 normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
3061 }
3062 if ( aExp == 0 ) {
3063 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
3064 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
3065 }
3066 zExp = aExp - bExp + 0x3FFD;
3067 shortShift128Left(
3068 aSig0 | LIT64( 0x0001000000000000 ), aSig1, 15, &aSig0, &aSig1 );
3069 shortShift128Left(
3070 bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
3071 if ( le128( bSig0, bSig1, aSig0, aSig1 ) ) {
3072 shift128Right( aSig0, aSig1, 1, &aSig0, &aSig1 );
3073 ++zExp;
3074 }
3075 zSig0 = estimateDiv128To64( aSig0, aSig1, bSig0 );
3076 mul128By64To192( bSig0, bSig1, zSig0, &term0, &term1, &term2 );
3077 sub192( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 );
3078 while ( (si6b) rem0 < 0 ) {
3079 --zSig0;
3080 add192( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 );
3081 }
3082 zSig1 = estimateDiv128To64( rem1, rem2, bSig0 );
3083 if ( ( zSig1 & 0x3FFF ) <= 4 ) {
3084 mul128By64To192( bSig0, bSig1, zSig1, &term1, &term2, &term3 );
3085 sub192( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 );
3086 while ( (si6b) rem1 < 0 ) {
3087 --zSig1;
3088 add192( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 );
3089 }
3090 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
3091 }
3092 shift128ExtraRightJamming( zSig0, zSig1, 0, 15, &zSig0, &zSig1, &zSig2 );
3093 return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 );
3094
3095}
3096
3097#endif
3098
3099typedef unsigned int float32;
3100
3101/*----------------------------------------------------------------------------
3102| Normalizes the subnormal single-precision floating-point value represented
3103| by the denormalized significand `aSig'. The normalized exponent and
3104| significand are stored at the locations pointed to by `zExpPtr' and
3105| `zSigPtr', respectively.
3106*----------------------------------------------------------------------------*/
3107
3108LOCALPROC
3109 normalizeFloat32Subnormal( ui5b aSig, si4r *zExpPtr, ui5b *zSigPtr )
3110{
3111 si3r shiftCount;
3112
3113 shiftCount = countLeadingZeros32( aSig ) - 8;
3114 *zSigPtr = aSig<<shiftCount;
3115 *zExpPtr = 1 - shiftCount;
3116
3117}
3118
3119/*----------------------------------------------------------------------------
3120| Returns the fraction bits of the single-precision floating-point value `a'.
3121*----------------------------------------------------------------------------*/
3122
3123LOCALINLINEFUNC ui5b extractFloat32Frac( float32 a )
3124{
3125
3126 return a & 0x007FFFFF;
3127
3128}
3129
3130/*----------------------------------------------------------------------------
3131| Returns the exponent bits of the single-precision floating-point value `a'.
3132*----------------------------------------------------------------------------*/
3133
3134LOCALINLINEFUNC si4r extractFloat32Exp( float32 a )
3135{
3136
3137 return ( a>>23 ) & 0xFF;
3138
3139}
3140
3141/*----------------------------------------------------------------------------
3142| Returns the sign bit of the single-precision floating-point value `a'.
3143*----------------------------------------------------------------------------*/
3144
3145LOCALINLINEFUNC flag extractFloat32Sign( float32 a )
3146{
3147
3148 return a>>31;
3149
3150}
3151
3152/*----------------------------------------------------------------------------
3153| Returns 1 if the single-precision floating-point value `a' is a signaling
3154| NaN; otherwise returns 0.
3155*----------------------------------------------------------------------------*/
3156
3157LOCALFUNC flag float32_is_signaling_nan( float32 a )
3158{
3159
3160 return ( ( ( a>>22 ) & 0x1FF ) == 0x1FE ) && ( a & 0x003FFFFF );
3161
3162}
3163
3164/*----------------------------------------------------------------------------
3165| Returns the result of converting the single-precision floating-point NaN
3166| `a' to the canonical NaN format. If `a' is a signaling NaN, the invalid
3167| exception is raised.
3168*----------------------------------------------------------------------------*/
3169
3170LOCALFUNC commonNaNT float32ToCommonNaN( float32 a )
3171{
3172 commonNaNT z;
3173
3174 if ( float32_is_signaling_nan( a ) ) float_raise( float_flag_invalid );
3175 z.sign = a>>31;
3176 z.low = 0;
3177 z.high = ( (ui6b) a )<<41;
3178 return z;
3179
3180}
3181
3182/*----------------------------------------------------------------------------
3183| Returns the result of converting the single-precision floating-point value
3184| `a' to the extended double-precision floating-point format. The conversion
3185| is performed according to the IEC/IEEE Standard for Binary Floating-Point
3186| Arithmetic.
3187*----------------------------------------------------------------------------*/
3188
3189LOCALFUNC floatx80 float32_to_floatx80( float32 a )
3190{
3191 flag aSign;
3192 si4r aExp;
3193 ui5b aSig;
3194
3195 aSig = extractFloat32Frac( a );
3196 aExp = extractFloat32Exp( a );
3197 aSign = extractFloat32Sign( a );
3198 if ( aExp == 0xFF ) {
3199 if ( aSig ) return commonNaNToFloatx80( float32ToCommonNaN( a ) );
3200 return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
3201 }
3202 if ( aExp == 0 ) {
3203 if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
3204 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
3205 }
3206 aSig |= 0x00800000;
3207 return packFloatx80( aSign, aExp + 0x3F80, ( (ui6b) aSig )<<40 );
3208
3209}
3210
3211/*----------------------------------------------------------------------------
3212| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
3213| single-precision floating-point value, returning the result. After being
3214| shifted into the proper positions, the three fields are simply added
3215| together to form the result. This means that any integer portion of `zSig'
3216| will be added into the exponent. Since a properly normalized significand
3217| will have an integer portion equal to 1, the `zExp' input should be 1 less
3218| than the desired result exponent whenever `zSig' is a complete, normalized
3219| significand.
3220*----------------------------------------------------------------------------*/
3221
3222LOCALINLINEFUNC float32 packFloat32( flag zSign, si4r zExp, ui5b zSig )
3223{
3224
3225 return ( ( (ui5b) zSign )<<31 ) + ( ( (ui5b) zExp )<<23 ) + zSig;
3226
3227}
3228
3229/*----------------------------------------------------------------------------
3230| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
3231| and significand `zSig', and returns the proper single-precision floating-
3232| point value corresponding to the abstract input. Ordinarily, the abstract
3233| value is simply rounded and packed into the single-precision format, with
3234| the inexact exception raised if the abstract input cannot be represented
3235| exactly. However, if the abstract value is too large, the overflow and
3236| inexact exceptions are raised and an infinity or maximal finite value is
3237| returned. If the abstract value is too small, the input value is rounded to
3238| a subnormal number, and the underflow and inexact exceptions are raised if
3239| the abstract input cannot be represented exactly as a subnormal single-
3240| precision floating-point number.
3241| The input significand `zSig' has its binary point between bits 30
3242| and 29, which is 7 bits to the left of the usual location. This shifted
3243| significand must be normalized or smaller. If `zSig' is not normalized,
3244| `zExp' must be 0; in that case, the result returned is a subnormal number,
3245| and it must not require rounding. In the usual case that `zSig' is
3246| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
3247| The handling of underflow and overflow follows the IEC/IEEE Standard for
3248| Binary Floating-Point Arithmetic.
3249*----------------------------------------------------------------------------*/
3250
3251LOCALFUNC float32 roundAndPackFloat32( flag zSign, si4r zExp, ui5b zSig )
3252{
3253 si3r roundingMode;
3254 flag roundNearestEven;
3255 si3r roundIncrement, roundBits;
3256 flag isTiny;
3257
3258 roundingMode = float_rounding_mode;
3259 roundNearestEven = ( roundingMode == float_round_nearest_even );
3260 roundIncrement = 0x40;
3261 if ( ! roundNearestEven ) {
3262 if ( roundingMode == float_round_to_zero ) {
3263 roundIncrement = 0;
3264 }
3265 else {
3266 roundIncrement = 0x7F;
3267 if ( zSign ) {
3268 if ( roundingMode == float_round_up ) roundIncrement = 0;
3269 }
3270 else {
3271 if ( roundingMode == float_round_down ) roundIncrement = 0;
3272 }
3273 }
3274 }
3275 roundBits = zSig & 0x7F;
3276 if ( 0xFD <= (ui4b) zExp ) {
3277 if ( ( 0xFD < zExp )
3278 || ( ( zExp == 0xFD )
3279 && ( (si5b) ( zSig + roundIncrement ) < 0 ) )
3280 ) {
3281 float_raise( float_flag_overflow | float_flag_inexact );
3282 return packFloat32( zSign, 0xFF, 0 ) - ( roundIncrement == 0 );
3283 }
3284 if ( zExp < 0 ) {
3285 isTiny =
3286 ( float_detect_tininess == float_tininess_before_rounding )
3287 || ( zExp < -1 )
3288 || ( zSig + roundIncrement < 0x80000000 );
3289 shift32RightJamming( zSig, - zExp, &zSig );
3290 zExp = 0;
3291 roundBits = zSig & 0x7F;
3292 if ( isTiny && roundBits ) float_raise( float_flag_underflow );
3293 }
3294 }
3295 if ( roundBits ) float_exception_flags |= float_flag_inexact;
3296 zSig = ( zSig + roundIncrement )>>7;
3297 zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
3298 if ( zSig == 0 ) zExp = 0;
3299 return packFloat32( zSign, zExp, zSig );
3300
3301}
3302
3303/*----------------------------------------------------------------------------
3304| Returns the result of converting the canonical NaN `a' to the single-
3305| precision floating-point format.
3306*----------------------------------------------------------------------------*/
3307
3308LOCALFUNC float32 commonNaNToFloat32( commonNaNT a )
3309{
3310
3311 return ( ( (ui5b) a.sign )<<31 ) | 0x7FC00000 | ( a.high>>41 );
3312
3313}
3314
3315/*----------------------------------------------------------------------------
3316| Returns the result of converting the extended double-precision floating-
3317| point value `a' to the single-precision floating-point format. The
3318| conversion is performed according to the IEC/IEEE Standard for Binary
3319| Floating-Point Arithmetic.
3320*----------------------------------------------------------------------------*/
3321
3322LOCALFUNC float32 floatx80_to_float32( floatx80 a )
3323{
3324 flag aSign;
3325 si5r aExp;
3326 ui6b aSig;
3327
3328 aSig = extractFloatx80Frac( a );
3329 aExp = extractFloatx80Exp( a );
3330 aSign = extractFloatx80Sign( a );
3331 if ( aExp == 0x7FFF ) {
3332 if ( (ui6b) ( aSig<<1 ) ) {
3333 return commonNaNToFloat32( floatx80ToCommonNaN( a ) );
3334 }
3335 return packFloat32( aSign, 0xFF, 0 );
3336 }
3337 shift64RightJamming( aSig, 33, &aSig );
3338 if ( aExp || aSig ) aExp -= 0x3F81;
3339 return roundAndPackFloat32( aSign, aExp, aSig );
3340
3341}
3342
3343
3344typedef ui6b float64;
3345
3346/*----------------------------------------------------------------------------
3347| Returns the fraction bits of the double-precision floating-point value `a'.
3348*----------------------------------------------------------------------------*/
3349
3350LOCALINLINEFUNC ui6b extractFloat64Frac( float64 a )
3351{
3352
3353 return a & LIT64( 0x000FFFFFFFFFFFFF );
3354
3355}
3356
3357/*----------------------------------------------------------------------------
3358| Returns the exponent bits of the double-precision floating-point value `a'.
3359*----------------------------------------------------------------------------*/
3360
3361LOCALINLINEFUNC si4r extractFloat64Exp( float64 a )
3362{
3363
3364 return ( a>>52 ) & 0x7FF;
3365
3366}
3367
3368/*----------------------------------------------------------------------------
3369| Returns the sign bit of the double-precision floating-point value `a'.
3370*----------------------------------------------------------------------------*/
3371
3372LOCALINLINEFUNC flag extractFloat64Sign( float64 a )
3373{
3374
3375 return a>>63;
3376
3377}
3378
3379/*----------------------------------------------------------------------------
3380| Returns 1 if the double-precision floating-point value `a' is a signaling
3381| NaN; otherwise returns 0.
3382*----------------------------------------------------------------------------*/
3383
3384LOCALFUNC flag float64_is_signaling_nan( float64 a )
3385{
3386
3387 return
3388 ( ( ( a>>51 ) & 0xFFF ) == 0xFFE )
3389 && ( a & LIT64( 0x0007FFFFFFFFFFFF ) );
3390
3391}
3392
3393/*----------------------------------------------------------------------------
3394| Returns the result of converting the double-precision floating-point NaN
3395| `a' to the canonical NaN format. If `a' is a signaling NaN, the invalid
3396| exception is raised.
3397*----------------------------------------------------------------------------*/
3398
3399LOCALFUNC commonNaNT float64ToCommonNaN( float64 a )
3400{
3401 commonNaNT z;
3402
3403 if ( float64_is_signaling_nan( a ) ) float_raise( float_flag_invalid );
3404 z.sign = a>>63;
3405 z.low = 0;
3406 z.high = a<<12;
3407 return z;
3408
3409}
3410
3411/*----------------------------------------------------------------------------
3412| Normalizes the subnormal double-precision floating-point value represented
3413| by the denormalized significand `aSig'. The normalized exponent and
3414| significand are stored at the locations pointed to by `zExpPtr' and
3415| `zSigPtr', respectively.
3416*----------------------------------------------------------------------------*/
3417
3418LOCALPROC
3419 normalizeFloat64Subnormal( ui6b aSig, si4r *zExpPtr, ui6b *zSigPtr )
3420{
3421 si3r shiftCount;
3422
3423 shiftCount = countLeadingZeros64( aSig ) - 11;
3424 *zSigPtr = aSig<<shiftCount;
3425 *zExpPtr = 1 - shiftCount;
3426
3427}
3428
3429/*----------------------------------------------------------------------------
3430| Returns the result of converting the double-precision floating-point value
3431| `a' to the extended double-precision floating-point format. The conversion
3432| is performed according to the IEC/IEEE Standard for Binary Floating-Point
3433| Arithmetic.
3434*----------------------------------------------------------------------------*/
3435
3436LOCALFUNC floatx80 float64_to_floatx80( float64 a )
3437{
3438 flag aSign;
3439 si4r aExp;
3440 ui6b aSig;
3441
3442 aSig = extractFloat64Frac( a );
3443 aExp = extractFloat64Exp( a );
3444 aSign = extractFloat64Sign( a );
3445 if ( aExp == 0x7FF ) {
3446 if ( aSig ) return commonNaNToFloatx80( float64ToCommonNaN( a ) );
3447 return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
3448 }
3449 if ( aExp == 0 ) {
3450 if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
3451 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
3452 }
3453 return
3454 packFloatx80(
3455 aSign, aExp + 0x3C00, ( aSig | LIT64( 0x0010000000000000 ) )<<11 );
3456
3457}
3458
3459/*----------------------------------------------------------------------------
3460| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
3461| double-precision floating-point value, returning the result. After being
3462| shifted into the proper positions, the three fields are simply added
3463| together to form the result. This means that any integer portion of `zSig'
3464| will be added into the exponent. Since a properly normalized significand
3465| will have an integer portion equal to 1, the `zExp' input should be 1 less
3466| than the desired result exponent whenever `zSig' is a complete, normalized
3467| significand.
3468*----------------------------------------------------------------------------*/
3469
3470LOCALINLINEFUNC float64 packFloat64( flag zSign, si4r zExp, ui6b zSig )
3471{
3472
3473 return ( ( (ui6b) zSign )<<63 ) + ( ( (ui6b) zExp )<<52 ) + zSig;
3474
3475}
3476
3477/*----------------------------------------------------------------------------
3478| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
3479| and significand `zSig', and returns the proper double-precision floating-
3480| point value corresponding to the abstract input. Ordinarily, the abstract
3481| value is simply rounded and packed into the double-precision format, with
3482| the inexact exception raised if the abstract input cannot be represented
3483| exactly. However, if the abstract value is too large, the overflow and
3484| inexact exceptions are raised and an infinity or maximal finite value is
3485| returned. If the abstract value is too small, the input value is rounded
3486| to a subnormal number, and the underflow and inexact exceptions are raised
3487| if the abstract input cannot be represented exactly as a subnormal double-
3488| precision floating-point number.
3489| The input significand `zSig' has its binary point between bits 62
3490| and 61, which is 10 bits to the left of the usual location. This shifted
3491| significand must be normalized or smaller. If `zSig' is not normalized,
3492| `zExp' must be 0; in that case, the result returned is a subnormal number,
3493| and it must not require rounding. In the usual case that `zSig' is
3494| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
3495| The handling of underflow and overflow follows the IEC/IEEE Standard for
3496| Binary Floating-Point Arithmetic.
3497*----------------------------------------------------------------------------*/
3498
3499LOCALFUNC float64 roundAndPackFloat64( flag zSign, si4r zExp, ui6b zSig )
3500{
3501 si3r roundingMode;
3502 flag roundNearestEven;
3503 si4r roundIncrement, roundBits;
3504 flag isTiny;
3505
3506 roundingMode = float_rounding_mode;
3507 roundNearestEven = ( roundingMode == float_round_nearest_even );
3508 roundIncrement = 0x200;
3509 if ( ! roundNearestEven ) {
3510 if ( roundingMode == float_round_to_zero ) {
3511 roundIncrement = 0;
3512 }
3513 else {
3514 roundIncrement = 0x3FF;
3515 if ( zSign ) {
3516 if ( roundingMode == float_round_up ) roundIncrement = 0;
3517 }
3518 else {
3519 if ( roundingMode == float_round_down ) roundIncrement = 0;
3520 }
3521 }
3522 }
3523 roundBits = zSig & 0x3FF;
3524 if ( 0x7FD <= (ui4b) zExp ) {
3525 if ( ( 0x7FD < zExp )
3526 || ( ( zExp == 0x7FD )
3527 && ( (si6b) ( zSig + roundIncrement ) < 0 ) )
3528 ) {
3529 float_raise( float_flag_overflow | float_flag_inexact );
3530 return packFloat64( zSign, 0x7FF, 0 ) - ( roundIncrement == 0 );
3531 }
3532 if ( zExp < 0 ) {
3533 isTiny =
3534 ( float_detect_tininess == float_tininess_before_rounding )
3535 || ( zExp < -1 )
3536 || ( zSig + roundIncrement < LIT64( 0x8000000000000000 ) );
3537 shift64RightJamming( zSig, - zExp, &zSig );
3538 zExp = 0;
3539 roundBits = zSig & 0x3FF;
3540 if ( isTiny && roundBits ) float_raise( float_flag_underflow );
3541 }
3542 }
3543 if ( roundBits ) float_exception_flags |= float_flag_inexact;
3544 zSig = ( zSig + roundIncrement )>>10;
3545 zSig &= ~ ( ( ( roundBits ^ 0x200 ) == 0 ) & roundNearestEven );
3546 if ( zSig == 0 ) zExp = 0;
3547 return packFloat64( zSign, zExp, zSig );
3548
3549}
3550
3551/*----------------------------------------------------------------------------
3552| Returns the result of converting the canonical NaN `a' to the double-
3553| precision floating-point format.
3554*----------------------------------------------------------------------------*/
3555
3556LOCALFUNC float64 commonNaNToFloat64( commonNaNT a )
3557{
3558
3559 return
3560 ( ( (ui6b) a.sign )<<63 )
3561 | LIT64( 0x7FF8000000000000 )
3562 | ( a.high>>12 );
3563
3564}
3565
3566/*----------------------------------------------------------------------------
3567| Returns the result of converting the extended double-precision floating-
3568| point value `a' to the double-precision floating-point format. The
3569| conversion is performed according to the IEC/IEEE Standard for Binary
3570| Floating-Point Arithmetic.
3571*----------------------------------------------------------------------------*/
3572
3573LOCALFUNC float64 floatx80_to_float64( floatx80 a )
3574{
3575 flag aSign;
3576 si5r aExp;
3577 ui6b aSig, zSig;
3578
3579 aSig = extractFloatx80Frac( a );
3580 aExp = extractFloatx80Exp( a );
3581 aSign = extractFloatx80Sign( a );
3582 if ( aExp == 0x7FFF ) {
3583 if ( (ui6b) ( aSig<<1 ) ) {
3584 return commonNaNToFloat64( floatx80ToCommonNaN( a ) );
3585 }
3586 return packFloat64( aSign, 0x7FF, 0 );
3587 }
3588 shift64RightJamming( aSig, 1, &zSig );
3589 if ( aExp || aSig ) aExp -= 0x3C01;
3590 return roundAndPackFloat64( aSign, aExp, zSig );
3591
3592}
3593
3594/* ----- end from original file "softfloat.c" ----- */
3595
3596typedef si4r Bit16s;
3597typedef si5r Bit32s;
3598typedef si6r Bit64s;
3599
3600typedef ui5r Bit32u;
3601typedef ui6r Bit64u;
3602
3603#define int16_indefinite 0x8000
3604
3605/*----------------------------------------------------------------------------
3606| Takes extended double-precision floating-point NaN `a' and returns the
3607| appropriate NaN result. If `a' is a signaling NaN, the invalid exception
3608| is raised.
3609*----------------------------------------------------------------------------*/
3610
3611LOCALFUNC floatx80 propagateOneFloatx80NaN(floatx80 *a)
3612{
3613 if (floatx80_is_signaling_nan(*a))
3614 float_raise(float_flag_invalid);
3615
3616 a->low |= LIT64(0xC000000000000000);
3617
3618 return *a;
3619}
3620
3621#define float_flag_denormal 0x02
3622
3623#define floatx80_default_nan_exp 0xFFFF
3624#define floatx80_default_nan_fraction LIT64(0xC000000000000000)
3625
3626#define packFloatx80m(zSign, zExp, zSig) {(zSig), ((zSign) << 15) + (zExp) }
3627
3628/*----------------------------------------------------------------------------
3629| Packs two 64-bit precision integers into into the quadruple-precision
3630| floating-point value, returning the result.
3631*----------------------------------------------------------------------------*/
3632
3633#define packFloat2x128m(zHi, zLo) {(zLo), (zHi)}
3634#define PACK_FLOAT_128(hi,lo) packFloat2x128m(LIT64(hi),LIT64(lo))
3635
3636static const floatx80 floatx80_default_nan =
3637 packFloatx80m(0, floatx80_default_nan_exp, floatx80_default_nan_fraction);
3638
3639/*----------------------------------------------------------------------------
3640| Software IEC/IEEE floating-point ordering relations
3641*----------------------------------------------------------------------------*/
3642enum {
3643 float_relation_less = -1,
3644 float_relation_equal = 0,
3645 float_relation_greater = 1,
3646 float_relation_unordered = 2
3647};
3648
3649#define EXP_BIAS 0x3FFF
3650
3651
3652/*----------------------------------------------------------------------------
3653| Returns the result of multiplying the extended double-precision floating-
3654| point value `a' and quadruple-precision floating point value `b'. The
3655| operation is performed according to the IEC/IEEE Standard for Binary
3656| Floating-Point Arithmetic.
3657*----------------------------------------------------------------------------*/
3658
3659LOCALFUNC floatx80 floatx80_mul128(floatx80 a, float128 b)
3660{
3661 Bit32s aExp, bExp, zExp;
3662 Bit64u aSig, bSig0, bSig1, zSig0, zSig1, zSig2;
3663 int aSign, bSign, zSign;
3664
3665 // handle unsupported extended double-precision floating encodings
3666 if (0 /* floatx80_is_unsupported(a) */)
3667 {
3668 invalid:
3669 float_raise(float_flag_invalid);
3670 return floatx80_default_nan;
3671 }
3672
3673 aSig = extractFloatx80Frac(a);
3674 aExp = extractFloatx80Exp(a);
3675 aSign = extractFloatx80Sign(a);
3676 bSig0 = extractFloat128Frac0(b);
3677 bSig1 = extractFloat128Frac1(b);
3678 bExp = extractFloat128Exp(b);
3679 bSign = extractFloat128Sign(b);
3680
3681 zSign = aSign ^ bSign;
3682
3683 if (aExp == 0x7FFF) {
3684 if ((Bit64u) (aSig<<1)
3685 || ((bExp == 0x7FFF) && (bSig0 | bSig1)))
3686 {
3687 floatx80 r = commonNaNToFloatx80(float128ToCommonNaN(b));
3688 return propagateFloatx80NaN(a, r);
3689 }
3690 if (bExp == 0) {
3691 if ((bSig0 | bSig1) == 0) goto invalid;
3692 float_raise(float_flag_denormal);
3693 }
3694 return packFloatx80(zSign, 0x7FFF, LIT64(0x8000000000000000));
3695 }
3696 if (bExp == 0x7FFF) {
3697 if (bSig0 | bSig1) {
3698 floatx80 r = commonNaNToFloatx80(float128ToCommonNaN(b));
3699 return propagateFloatx80NaN(a, r);
3700 }
3701 if (aExp == 0) {
3702 if (aSig == 0) goto invalid;
3703 float_raise(float_flag_denormal);
3704 }
3705 return packFloatx80(zSign, 0x7FFF, LIT64(0x8000000000000000));
3706 }
3707 if (aExp == 0) {
3708 if (aSig == 0) {
3709 if ((bExp == 0) && (bSig0 | bSig1)) float_raise(float_flag_denormal);
3710 return packFloatx80(zSign, 0, 0);
3711 }
3712 float_raise(float_flag_denormal);
3713 normalizeFloatx80Subnormal(aSig, &aExp, &aSig);
3714 }
3715 if (bExp == 0) {
3716 if ((bSig0 | bSig1) == 0) return packFloatx80(zSign, 0, 0);
3717 float_raise(float_flag_denormal);
3718 normalizeFloat128Subnormal(bSig0, bSig1, &bExp, &bSig0, &bSig1);
3719 }
3720 else bSig0 |= LIT64(0x0001000000000000);
3721
3722 zExp = aExp + bExp - 0x3FFE;
3723 shortShift128Left(bSig0, bSig1, 15, &bSig0, &bSig1);
3724 mul128By64To192(bSig0, bSig1, aSig, &zSig0, &zSig1, &zSig2);
3725 if (0 < (Bit64s) zSig0) {
3726 shortShift128Left(zSig0, zSig1, 1, &zSig0, &zSig1);
3727 --zExp;
3728 }
3729 return
3730 roundAndPackFloatx80(floatx80_rounding_precision,
3731 zSign, zExp, zSig0, zSig1);
3732}
3733
3734/* ----- from original file "softfloatx80.h" ----- */
3735
3736/*
3737 ["original Stanislav Shwartsman Copyright notice" went here, included near top of this file.]
3738*/
3739
3740
3741/*----------------------------------------------------------------------------
3742| Software IEC/IEEE floating-point class.
3743*----------------------------------------------------------------------------*/
3744typedef enum {
3745 float_zero,
3746 float_NaN,
3747 float_negative_inf,
3748 float_positive_inf,
3749 float_denormal,
3750 float_normalized
3751} float_class_t;
3752
3753
3754/*-----------------------------------------------------------------------------
3755| Calculates the absolute value of the extended double-precision floating-point
3756| value `a'. The operation is performed according to the IEC/IEEE Standard
3757| for Binary Floating-Point Arithmetic.
3758*----------------------------------------------------------------------------*/
3759
3760#if 0
3761inline floatx80 floatx80_abs(floatx80 *reg)
3762{
3763 reg->high &= 0x7FFF;
3764 return *reg;
3765}
3766#endif
3767
3768/*-----------------------------------------------------------------------------
3769| Changes the sign of the extended double-precision floating-point value 'a'.
3770| The operation is performed according to the IEC/IEEE Standard for Binary
3771| Floating-Point Arithmetic.
3772*----------------------------------------------------------------------------*/
3773
3774LOCALFUNC floatx80 floatx80_chs(floatx80 *x)
3775{
3776 x->high ^= 0x8000;
3777 return *x;
3778}
3779
3780/* ----- end from original file "softfloatx80.h" ----- */
3781
3782/* ----- from original file "softfloatx80.cc" ----- */
3783
3784/*
3785 ["original Stanislav Shwartsman Copyright notice" went here, included near top of this file.]
3786*/
3787
3788/*----------------------------------------------------------------------------
3789| Returns the result of converting the extended double-precision floating-
3790| point value `a' to the 16-bit two's complement integer format. The
3791| conversion is performed according to the IEC/IEEE Standard for Binary
3792| Floating-Point Arithmetic - which means in particular that the conversion
3793| is rounded according to the current rounding mode. If `a' is a NaN or the
3794| conversion overflows, the integer indefinite value is returned.
3795*----------------------------------------------------------------------------*/
3796
3797#if cIncludeFPUUnused
3798LOCALFUNC Bit16s floatx80_to_int16(floatx80 a)
3799{
3800#if 0
3801 if (floatx80_is_unsupported(a))
3802 {
3803 float_raise(float_flag_invalid);
3804 return int16_indefinite;
3805 }
3806#endif
3807
3808 Bit32s v32 = floatx80_to_int32(a);
3809
3810 if ((v32 > 32767) || (v32 < -32768)) {
3811 float_exception_flags = float_flag_invalid; // throw way other flags
3812 return int16_indefinite;
3813 }
3814
3815 return (Bit16s) v32;
3816}
3817#endif
3818
3819/*----------------------------------------------------------------------------
3820| Returns the result of converting the extended double-precision floating-
3821| point value `a' to the 16-bit two's complement integer format. The
3822| conversion is performed according to the IEC/IEEE Standard for Binary
3823| Floating-Point Arithmetic, except that the conversion is always rounded
3824| toward zero. If `a' is a NaN or the conversion overflows, the integer
3825| indefinite value is returned.
3826*----------------------------------------------------------------------------*/
3827
3828#if cIncludeFPUUnused
3829LOCALFUNC Bit16s floatx80_to_int16_round_to_zero(floatx80 a)
3830{
3831#if 0
3832 if (floatx80_is_unsupported(a))
3833 {
3834 float_raise(float_flag_invalid);
3835 return int16_indefinite;
3836 }
3837#endif
3838
3839 Bit32s v32 = floatx80_to_int32_round_to_zero(a);
3840
3841 if ((v32 > 32767) || (v32 < -32768)) {
3842 float_exception_flags = float_flag_invalid; // throw way other flags
3843 return int16_indefinite;
3844 }
3845
3846 return (Bit16s) v32;
3847}
3848#endif
3849
3850/*----------------------------------------------------------------------------
3851| Separate the source extended double-precision floating point value `a'
3852| into its exponent and significand, store the significant back to the
3853| 'a' and return the exponent. The operation performed is a superset of
3854| the IEC/IEEE recommended logb(x) function.
3855*----------------------------------------------------------------------------*/
3856
3857LOCALFUNC floatx80 floatx80_extract(floatx80 *a)
3858{
3859 Bit64u aSig = extractFloatx80Frac(*a);
3860 Bit32s aExp = extractFloatx80Exp(*a);
3861 int aSign = extractFloatx80Sign(*a);
3862
3863#if 0
3864 if (floatx80_is_unsupported(*a))
3865 {
3866 float_raise(float_flag_invalid);
3867 *a = floatx80_default_nan;
3868 return *a;
3869 }
3870#endif
3871
3872 if (aExp == 0x7FFF) {
3873 if ((Bit64u) (aSig<<1))
3874 {
3875 *a = propagateOneFloatx80NaN(a);
3876 return *a;
3877 }
3878 return packFloatx80(0, 0x7FFF, LIT64(0x8000000000000000));
3879 }
3880 if (aExp == 0)
3881 {
3882 if (aSig == 0) {
3883 float_raise(float_flag_divbyzero);
3884 *a = packFloatx80(aSign, 0, 0);
3885 return packFloatx80(1, 0x7FFF, LIT64(0x8000000000000000));
3886 }
3887 float_raise(float_flag_denormal);
3888 normalizeFloatx80Subnormal(aSig, &aExp, &aSig);
3889 }
3890
3891 a->high = (aSign << 15) + 0x3FFF;
3892 a->low = aSig;
3893 return int32_to_floatx80(aExp - 0x3FFF);
3894}
3895
3896/*----------------------------------------------------------------------------
3897| Scales extended double-precision floating-point value in operand `a' by
3898| value `b'. The function truncates the value in the second operand 'b' to
3899| an integral value and adds that value to the exponent of the operand 'a'.
3900| The operation performed according to the IEC/IEEE Standard for Binary
3901| Floating-Point Arithmetic.
3902*----------------------------------------------------------------------------*/
3903
3904LOCALFUNC floatx80 floatx80_scale(floatx80 a, floatx80 b)
3905{
3906 int shiftCount;
3907 Bit32s scale;
3908
3909 // handle unsupported extended double-precision floating encodings
3910#if 0
3911 if (floatx80_is_unsupported(a) || floatx80_is_unsupported(b))
3912 {
3913 float_raise(float_flag_invalid);
3914 return floatx80_default_nan;
3915 }
3916#endif
3917
3918 Bit64u aSig = extractFloatx80Frac(a);
3919 Bit32s aExp = extractFloatx80Exp(a);
3920 int aSign = extractFloatx80Sign(a);
3921 Bit64u bSig = extractFloatx80Frac(b);
3922 Bit32s bExp = extractFloatx80Exp(b);
3923 int bSign = extractFloatx80Sign(b);
3924
3925 if (aExp == 0x7FFF) {
3926 if ((Bit64u) (aSig<<1) || ((bExp == 0x7FFF) && (Bit64u) (bSig<<1)))
3927 {
3928 return propagateFloatx80NaN(a, b);
3929 }
3930 if ((bExp == 0x7FFF) && bSign) {
3931 float_raise(float_flag_invalid);
3932 return floatx80_default_nan;
3933 }
3934 if (bSig && (bExp == 0)) float_raise(float_flag_denormal);
3935 return a;
3936 }
3937 if (bExp == 0x7FFF) {
3938 if ((Bit64u) (bSig<<1)) return propagateFloatx80NaN(a, b);
3939 if ((aExp | aSig) == 0) {
3940 if (! bSign) {
3941 float_raise(float_flag_invalid);
3942 return floatx80_default_nan;
3943 }
3944 return a;
3945 }
3946 if (aSig && (aExp == 0)) float_raise(float_flag_denormal);
3947 if (bSign) return packFloatx80(aSign, 0, 0);
3948 return packFloatx80(aSign, 0x7FFF, LIT64(0x8000000000000000));
3949 }
3950 if (aExp == 0) {
3951 if (bSig && (bExp == 0)) float_raise(float_flag_denormal);
3952 if (aSig == 0) return a;
3953 float_raise(float_flag_denormal);
3954 normalizeFloatx80Subnormal(aSig, &aExp, &aSig);
3955 if (bExp < 0x3FFF)
3956 return normalizeRoundAndPackFloatx80(80, aSign, aExp, aSig, 0);
3957 }
3958 if (bExp == 0) {
3959 if (bSig == 0) return a;
3960 float_raise(float_flag_denormal);
3961 normalizeFloatx80Subnormal(bSig, &bExp, &bSig);
3962 }
3963
3964 if (bExp > 0x400E) {
3965 /* generate appropriate overflow/underflow */
3966 return roundAndPackFloatx80(80, aSign,
3967 bSign ? -0x3FFF : 0x7FFF, aSig, 0);
3968 }
3969
3970 if (bExp < 0x3FFF) return a;
3971
3972 shiftCount = 0x403E - bExp;
3973 bSig >>= shiftCount;
3974 scale = (Bit32s) bSig;
3975 if (bSign) scale = -scale; /* -32768..32767 */
3976 return
3977 roundAndPackFloatx80(80, aSign, aExp+scale, aSig, 0);
3978}
3979
3980/*----------------------------------------------------------------------------
3981| Determine extended-precision floating-point number class.
3982*----------------------------------------------------------------------------*/
3983
3984#if cIncludeFPUUnused
3985LOCALFUNC float_class_t floatx80_class(floatx80 a)
3986{
3987 Bit32s aExp = extractFloatx80Exp(a);
3988 Bit64u aSig = extractFloatx80Frac(a);
3989
3990 if(aExp == 0) {
3991 if (aSig == 0)
3992 return float_zero;
3993
3994 /* denormal or pseudo-denormal */
3995 return float_denormal;
3996 }
3997
3998 /* valid numbers have the MS bit set */
3999 if (!(aSig & LIT64(0x8000000000000000)))
4000 return float_NaN; /* report unsupported as NaNs */
4001
4002 if(aExp == 0x7fff) {
4003 int aSign = extractFloatx80Sign(a);
4004
4005 if (((Bit64u) (aSig<< 1)) == 0)
4006 return (aSign) ? float_negative_inf : float_positive_inf;
4007
4008 return float_NaN;
4009 }
4010
4011 return float_normalized;
4012}
4013#endif
4014
4015/*----------------------------------------------------------------------------
4016| Compare between two extended precision floating point numbers. Returns
4017| 'float_relation_equal' if the operands are equal, 'float_relation_less' if
4018| the value 'a' is less than the corresponding value `b',
4019| 'float_relation_greater' if the value 'a' is greater than the corresponding
4020| value `b', or 'float_relation_unordered' otherwise.
4021*----------------------------------------------------------------------------*/
4022
4023#if cIncludeFPUUnused
4024LOCALFUNC int floatx80_compare(floatx80 a, floatx80 b)
4025{
4026 int aSign;
4027 int bSign;
4028 Bit64u aSig;
4029 Bit32s aExp;
4030 Bit64u bSig;
4031 Bit32s bExp;
4032 int less_than;
4033 float_class_t aClass = floatx80_class(a);
4034 float_class_t bClass = floatx80_class(b);
4035
4036 if (aClass == float_NaN || bClass == float_NaN)
4037 {
4038 float_raise(float_flag_invalid);
4039 return float_relation_unordered;
4040 }
4041
4042 if (aClass == float_denormal || bClass == float_denormal)
4043 {
4044 float_raise(float_flag_denormal);
4045 }
4046
4047 aSign = extractFloatx80Sign(a);
4048 bSign = extractFloatx80Sign(b);
4049
4050 if (aClass == float_zero) {
4051 if (bClass == float_zero) return float_relation_equal;
4052 return bSign ? float_relation_greater : float_relation_less;
4053 }
4054
4055 if (bClass == float_zero || aSign != bSign) {
4056 return aSign ? float_relation_less : float_relation_greater;
4057 }
4058
4059 aSig = extractFloatx80Frac(a);
4060 aExp = extractFloatx80Exp(a);
4061 bSig = extractFloatx80Frac(b);
4062 bExp = extractFloatx80Exp(b);
4063
4064 if (aClass == float_denormal)
4065 normalizeFloatx80Subnormal(aSig, &aExp, &aSig);
4066
4067 if (bClass == float_denormal)
4068 normalizeFloatx80Subnormal(bSig, &bExp, &bSig);
4069
4070 if (aExp == bExp && aSig == bSig)
4071 return float_relation_equal;
4072
4073 less_than =
4074 aSign ? ((bExp < aExp) || ((bExp == aExp) && (bSig < aSig)))
4075 : ((aExp < bExp) || ((aExp == bExp) && (aSig < bSig)));
4076
4077 if (less_than) return float_relation_less;
4078
4079 return float_relation_greater;
4080}
4081#endif
4082
4083/*----------------------------------------------------------------------------
4084| Compare between two extended precision floating point numbers. Returns
4085| 'float_relation_equal' if the operands are equal, 'float_relation_less' if
4086| the value 'a' is less than the corresponding value `b',
4087| 'float_relation_greater' if the value 'a' is greater than the corresponding
4088| value `b', or 'float_relation_unordered' otherwise. Quiet NaNs do not cause
4089| an exception.
4090*----------------------------------------------------------------------------*/
4091
4092#if cIncludeFPUUnused
4093LOCALFUNC int floatx80_compare_quiet(floatx80 a, floatx80 b)
4094{
4095 int aSign;
4096 int bSign;
4097 Bit64u aSig;
4098 Bit32s aExp;
4099 Bit64u bSig;
4100 Bit32s bExp;
4101 int less_than;
4102 float_class_t aClass = floatx80_class(a);
4103 float_class_t bClass = floatx80_class(b);
4104
4105 if (aClass == float_NaN || bClass == float_NaN)
4106 {
4107#if 0
4108 if (floatx80_is_unsupported(a) || floatx80_is_unsupported(b))
4109 float_raise(float_flag_invalid);
4110#endif
4111
4112 if (floatx80_is_signaling_nan(a) || floatx80_is_signaling_nan(b))
4113 float_raise(float_flag_invalid);
4114
4115 return float_relation_unordered;
4116 }
4117
4118 if (aClass == float_denormal || bClass == float_denormal)
4119 {
4120 float_raise(float_flag_denormal);
4121 }
4122
4123 aSign = extractFloatx80Sign(a);
4124 bSign = extractFloatx80Sign(b);
4125
4126 if (aClass == float_zero) {
4127 if (bClass == float_zero) return float_relation_equal;
4128 return bSign ? float_relation_greater : float_relation_less;
4129 }
4130
4131 if (bClass == float_zero || aSign != bSign) {
4132 return aSign ? float_relation_less : float_relation_greater;
4133 }
4134
4135 aSig = extractFloatx80Frac(a);
4136 aExp = extractFloatx80Exp(a);
4137 bSig = extractFloatx80Frac(b);
4138 bExp = extractFloatx80Exp(b);
4139
4140 if (aClass == float_denormal)
4141 normalizeFloatx80Subnormal(aSig, &aExp, &aSig);
4142
4143 if (bClass == float_denormal)
4144 normalizeFloatx80Subnormal(bSig, &bExp, &bSig);
4145
4146 if (aExp == bExp && aSig == bSig)
4147 return float_relation_equal;
4148
4149 less_than =
4150 aSign ? ((bExp < aExp) || ((bExp == aExp) && (bSig < aSig)))
4151 : ((aExp < bExp) || ((aExp == bExp) && (aSig < bSig)));
4152
4153 if (less_than) return float_relation_less;
4154 return float_relation_greater;
4155}
4156#endif
4157
4158/* ----- end from original file "softfloatx80.cc" ----- */
4159
4160/* ----- from original file "fprem.cc" ----- */
4161
4162/*
4163 ["original Stanislav Shwartsman Copyright notice" went here, included near top of this file.]
4164*/
4165
4166#define USE_estimateDiv128To64
4167
4168/* executes single exponent reduction cycle */
4169LOCALFUNC Bit64u remainder_kernel(Bit64u aSig0, Bit64u bSig, int expDiff, Bit64u *zSig0, Bit64u *zSig1)
4170{
4171 Bit64u term0, term1;
4172 Bit64u aSig1 = 0;
4173 Bit64u q;
4174
4175 shortShift128Left(aSig1, aSig0, expDiff, &aSig1, &aSig0);
4176 q = estimateDiv128To64(aSig1, aSig0, bSig);
4177 mul64To128(bSig, q, &term0, &term1);
4178 sub128(aSig1, aSig0, term0, term1, zSig1, zSig0);
4179 while ((Bit64s)(*zSig1) < 0) {
4180 --q;
4181 add128(*zSig1, *zSig0, 0, bSig, zSig1, zSig0);
4182 }
4183 return q;
4184}
4185
4186LOCALFUNC floatx80 do_fprem(floatx80 a, floatx80 b, Bit64u *q, int rounding_mode)
4187{
4188 Bit32s aExp, bExp, zExp, expDiff;
4189 Bit64u aSig0, aSig1, bSig;
4190 int aSign;
4191 *q = 0;
4192
4193#if 0
4194 // handle unsupported extended double-precision floating encodings
4195 if (floatx80_is_unsupported(a) || floatx80_is_unsupported(b))
4196 {
4197 float_raise(float_flag_invalid);
4198 return floatx80_default_nan;
4199 }
4200#endif
4201
4202 aSig0 = extractFloatx80Frac(a);
4203 aExp = extractFloatx80Exp(a);
4204 aSign = extractFloatx80Sign(a);
4205 bSig = extractFloatx80Frac(b);
4206 bExp = extractFloatx80Exp(b);
4207
4208 if (aExp == 0x7FFF) {
4209 if ((Bit64u) (aSig0<<1) || ((bExp == 0x7FFF) && (Bit64u) (bSig<<1))) {
4210 return propagateFloatx80NaN(a, b);
4211 }
4212 float_raise(float_flag_invalid);
4213 return floatx80_default_nan;
4214 }
4215 if (bExp == 0x7FFF) {
4216 if ((Bit64u) (bSig<<1)) return propagateFloatx80NaN(a, b);
4217 if (aExp == 0 && aSig0) {
4218 float_raise(float_flag_denormal);
4219 normalizeFloatx80Subnormal(aSig0, &aExp, &aSig0);
4220 return (a.low & LIT64(0x8000000000000000)) ?
4221 packFloatx80(aSign, aExp, aSig0) : a;
4222 }
4223 return a;
4224 }
4225 if (bExp == 0) {
4226 if (bSig == 0) {
4227 float_raise(float_flag_invalid);
4228 return floatx80_default_nan;
4229 }
4230 float_raise(float_flag_denormal);
4231 normalizeFloatx80Subnormal(bSig, &bExp, &bSig);
4232 }
4233 if (aExp == 0) {
4234 if (aSig0 == 0) return a;
4235 float_raise(float_flag_denormal);
4236 normalizeFloatx80Subnormal(aSig0, &aExp, &aSig0);
4237 }
4238 expDiff = aExp - bExp;
4239 aSig1 = 0;
4240
4241 if (expDiff >= 64) {
4242 int n = (expDiff & 0x1f) | 0x20;
4243 remainder_kernel(aSig0, bSig, n, &aSig0, &aSig1);
4244 zExp = aExp - n;
4245 *q = (Bit64u) -1;
4246 }
4247 else {
4248 zExp = bExp;
4249
4250 if (expDiff < 0) {
4251 if (expDiff < -1)
4252 return (a.low & LIT64(0x8000000000000000)) ?
4253 packFloatx80(aSign, aExp, aSig0) : a;
4254 shift128Right(aSig0, 0, 1, &aSig0, &aSig1);
4255 expDiff = 0;
4256 }
4257
4258 if (expDiff > 0) {
4259 *q = remainder_kernel(aSig0, bSig, expDiff, &aSig0, &aSig1);
4260 }
4261 else {
4262 if (bSig <= aSig0) {
4263 aSig0 -= bSig;
4264 *q = 1;
4265 }
4266 }
4267
4268 if (rounding_mode == float_round_nearest_even)
4269 {
4270 Bit64u term0, term1;
4271 shift128Right(bSig, 0, 1, &term0, &term1);
4272
4273 if (! lt128(aSig0, aSig1, term0, term1))
4274 {
4275 int lt = lt128(term0, term1, aSig0, aSig1);
4276 int eq = eq128(aSig0, aSig1, term0, term1);
4277
4278 if ((eq && (*q & 1)) || lt) {
4279 aSign = !aSign;
4280 ++*q;
4281 }
4282 if (lt) sub128(bSig, 0, aSig0, aSig1, &aSig0, &aSig1);
4283 }
4284 }
4285 }
4286
4287 return normalizeRoundAndPackFloatx80(80, aSign, zExp, aSig0, aSig1);
4288}
4289
4290/*----------------------------------------------------------------------------
4291| Returns the remainder of the extended double-precision floating-point value
4292| `a' with respect to the corresponding value `b'. The operation is performed
4293| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4294*----------------------------------------------------------------------------*/
4295
4296#if cIncludeFPUUnused
4297LOCALFUNC floatx80 floatx80_ieee754_remainder(floatx80 a, floatx80 b, Bit64u *q)
4298{
4299 return do_fprem(a, b, q, float_round_nearest_even);
4300}
4301#endif
4302
4303/*----------------------------------------------------------------------------
4304| Returns the remainder of the extended double-precision floating-point value
4305| `a' with respect to the corresponding value `b'. Unlike previous function
4306| the function does not compute the remainder specified in the IEC/IEEE
4307| Standard for Binary Floating-Point Arithmetic. This function operates
4308| differently from the previous function in the way that it rounds the
4309| quotient of 'a' divided by 'b' to an integer.
4310*----------------------------------------------------------------------------*/
4311
4312LOCALFUNC floatx80 floatx80_remainder(floatx80 a, floatx80 b, Bit64u *q)
4313{
4314 return do_fprem(a, b, q, float_round_to_zero);
4315}
4316
4317/* ----- end from original file "fprem.cc" ----- */
4318
4319/* ----- from original file "fpu_constant.h" ----- */
4320
4321/*
4322 ["original Stanislav Shwartsman Copyright notice" went here, included near top of this file.]
4323*/
4324
4325// Pentium CPU uses only 68-bit precision M_PI approximation
4326// #define BETTER_THAN_PENTIUM
4327
4328//////////////////////////////
4329// PI, PI/2, PI/4 constants
4330//////////////////////////////
4331
4332#define FLOATX80_PI_EXP (0x4000)
4333
4334// 128-bit PI fraction
4335#ifdef BETTER_THAN_PENTIUM
4336#define FLOAT_PI_HI (LIT64(0xc90fdaa22168c234))
4337#define FLOAT_PI_LO (LIT64(0xc4c6628b80dc1cd1))
4338#else
4339#define FLOAT_PI_HI (LIT64(0xc90fdaa22168c234))
4340#define FLOAT_PI_LO (LIT64(0xC000000000000000))
4341#endif
4342
4343#define FLOATX80_PI2_EXP (0x3FFF)
4344#define FLOATX80_PI4_EXP (0x3FFE)
4345
4346//////////////////////////////
4347// 3PI/4 constant
4348//////////////////////////////
4349
4350#define FLOATX80_3PI4_EXP (0x4000)
4351
4352// 128-bit 3PI/4 fraction
4353#ifdef BETTER_THAN_PENTIUM
4354#define FLOAT_3PI4_HI (LIT64(0x96cbe3f9990e91a7))
4355#define FLOAT_3PI4_LO (LIT64(0x9394c9e8a0a5159c))
4356#else
4357#define FLOAT_3PI4_HI (LIT64(0x96cbe3f9990e91a7))
4358#define FLOAT_3PI4_LO (LIT64(0x9000000000000000))
4359#endif
4360
4361//////////////////////////////
4362// 1/LN2 constant
4363//////////////////////////////
4364
4365#define FLOAT_LN2INV_EXP (0x3FFF)
4366
4367// 128-bit 1/LN2 fraction
4368#ifdef BETTER_THAN_PENTIUM
4369#define FLOAT_LN2INV_HI (LIT64(0xb8aa3b295c17f0bb))
4370#define FLOAT_LN2INV_LO (LIT64(0xbe87fed0691d3e89))
4371#else
4372#define FLOAT_LN2INV_HI (LIT64(0xb8aa3b295c17f0bb))
4373#define FLOAT_LN2INV_LO (LIT64(0xC000000000000000))
4374#endif
4375
4376/* ----- end from original file "fpu_constant.h" ----- */
4377
4378/* ----- from original file "poly.cc" ----- */
4379
4380/*
4381 ["original Stanislav Shwartsman Copyright notice" went here, included near top of this file.]
4382*/
4383
4384// 2 3 4 n
4385// f(x) ~ C + (C * x) + (C * x) + (C * x) + (C * x) + ... + (C * x)
4386// 0 1 2 3 4 n
4387//
4388// -- 2k -- 2k+1
4389// p(x) = > C * x q(x) = > C * x
4390// -- 2k -- 2k+1
4391//
4392// f(x) ~ [ p(x) + x * q(x) ]
4393//
4394
4395LOCALFUNC float128 EvalPoly(float128 x, float128 *arr, unsigned n)
4396{
4397 float128 r2;
4398 float128 x2 = float128_mul(x, x);
4399 unsigned i;
4400
4401#if 0
4402 assert(n > 1);
4403#endif
4404
4405 float128 r1 = arr[--n];
4406 i = n;
4407 while(i >= 2) {
4408 r1 = float128_mul(r1, x2);
4409 i -= 2;
4410 r1 = float128_add(r1, arr[i]);
4411 }
4412 if (i) r1 = float128_mul(r1, x);
4413
4414 r2 = arr[--n];
4415 i = n;
4416 while(i >= 2) {
4417 r2 = float128_mul(r2, x2);
4418 i -= 2;
4419 r2 = float128_add(r2, arr[i]);
4420 }
4421 if (i) r2 = float128_mul(r2, x);
4422
4423 return float128_add(r1, r2);
4424}
4425
4426// 2 4 6 8 2n
4427// f(x) ~ C + (C * x) + (C * x) + (C * x) + (C * x) + ... + (C * x)
4428// 0 1 2 3 4 n
4429//
4430// -- 4k -- 4k+2
4431// p(x) = > C * x q(x) = > C * x
4432// -- 2k -- 2k+1
4433//
4434// 2
4435// f(x) ~ [ p(x) + x * q(x) ]
4436//
4437
4438LOCALFUNC float128 EvenPoly(float128 x, float128 *arr, unsigned n)
4439{
4440 return EvalPoly(float128_mul(x, x), arr, n);
4441}
4442
4443// 3 5 7 9 2n+1
4444// f(x) ~ (C * x) + (C * x) + (C * x) + (C * x) + (C * x) + ... + (C * x)
4445// 0 1 2 3 4 n
4446// 2 4 6 8 2n
4447// = x * [ C + (C * x) + (C * x) + (C * x) + (C * x) + ... + (C * x)
4448// 0 1 2 3 4 n
4449//
4450// -- 4k -- 4k+2
4451// p(x) = > C * x q(x) = > C * x
4452// -- 2k -- 2k+1
4453//
4454// 2
4455// f(x) ~ x * [ p(x) + x * q(x) ]
4456//
4457
4458LOCALFUNC float128 OddPoly(float128 x, float128 *arr, unsigned n)
4459{
4460 return float128_mul(x, EvenPoly(x, arr, n));
4461}
4462
4463/* ----- end from original file "poly.cc" ----- */
4464
4465/* ----- from original file "fyl2x.cc" ----- */
4466
4467/*
4468 ["original Stanislav Shwartsman Copyright notice" went here, included near top of this file.]
4469*/
4470
4471static const floatx80 floatx80_one =
4472 packFloatx80m(0, 0x3fff, LIT64(0x8000000000000000));
4473
4474static const float128 float128_one =
4475 packFloat2x128m(LIT64(0x3fff000000000000), LIT64(0x0000000000000000));
4476static const float128 float128_two =
4477 packFloat2x128m(LIT64(0x4000000000000000), LIT64(0x0000000000000000));
4478
4479static const float128 float128_ln2inv2 =
4480 packFloat2x128m(LIT64(0x400071547652b82f), LIT64(0xe1777d0ffda0d23a));
4481
4482#define SQRT2_HALF_SIG LIT64(0xb504f333f9de6484)
4483
4484#define L2_ARR_SIZE 9
4485
4486static float128 ln_arr[L2_ARR_SIZE] =
4487{
4488 PACK_FLOAT_128(0x3fff000000000000, 0x0000000000000000), /* 1 */
4489 PACK_FLOAT_128(0x3ffd555555555555, 0x5555555555555555), /* 3 */
4490 PACK_FLOAT_128(0x3ffc999999999999, 0x999999999999999a), /* 5 */
4491 PACK_FLOAT_128(0x3ffc249249249249, 0x2492492492492492), /* 7 */
4492 PACK_FLOAT_128(0x3ffbc71c71c71c71, 0xc71c71c71c71c71c), /* 9 */
4493 PACK_FLOAT_128(0x3ffb745d1745d174, 0x5d1745d1745d1746), /* 11 */
4494 PACK_FLOAT_128(0x3ffb3b13b13b13b1, 0x3b13b13b13b13b14), /* 13 */
4495 PACK_FLOAT_128(0x3ffb111111111111, 0x1111111111111111), /* 15 */
4496 PACK_FLOAT_128(0x3ffae1e1e1e1e1e1, 0xe1e1e1e1e1e1e1e2) /* 17 */
4497};
4498
4499LOCALFUNC float128 poly_ln(float128 x1)
4500{
4501/*
4502 //
4503 // 3 5 7 9 11 13 15
4504 // 1+u u u u u u u u
4505 // 1/2 ln --- ~ u + --- + --- + --- + --- + ---- + ---- + ---- =
4506 // 1-u 3 5 7 9 11 13 15
4507 //
4508 // 2 4 6 8 10 12 14
4509 // u u u u u u u
4510 // = u * [ 1 + --- + --- + --- + --- + ---- + ---- + ---- ] =
4511 // 3 5 7 9 11 13 15
4512 //
4513 // 3 3
4514 // -- 4k -- 4k+2
4515 // p(u) = > C * u q(u) = > C * u
4516 // -- 2k -- 2k+1
4517 // k=0 k=0
4518 //
4519 // 1+u 2
4520 // 1/2 ln --- ~ u * [ p(u) + u * q(u) ]
4521 // 1-u
4522 //
4523*/
4524 return OddPoly(x1, ln_arr, L2_ARR_SIZE);
4525}
4526
4527/* required sqrt(2)/2 < x < sqrt(2) */
4528LOCALFUNC float128 poly_l2(float128 x)
4529{
4530 /* using float128 for approximation */
4531 float128 x_p1 = float128_add(x, float128_one);
4532 float128 x_m1 = float128_sub(x, float128_one);
4533 x = float128_div(x_m1, x_p1);
4534 x = poly_ln(x);
4535 x = float128_mul(x, float128_ln2inv2);
4536 return x;
4537}
4538
4539LOCALFUNC float128 poly_l2p1(float128 x)
4540{
4541 /* using float128 for approximation */
4542 float128 x_p2 = float128_add(x, float128_two);
4543 x = float128_div(x, x_p2);
4544 x = poly_ln(x);
4545 x = float128_mul(x, float128_ln2inv2);
4546 return x;
4547}
4548
4549// =================================================
4550// FYL2X Compute y * log (x)
4551// 2
4552// =================================================
4553
4554//
4555// Uses the following identities:
4556//
4557// 1. ----------------------------------------------------------
4558// ln(x)
4559// log (x) = -------, ln (x*y) = ln(x) + ln(y)
4560// 2 ln(2)
4561//
4562// 2. ----------------------------------------------------------
4563// 1+u x-1
4564// ln (x) = ln -----, when u = -----
4565// 1-u x+1
4566//
4567// 3. ----------------------------------------------------------
4568// 3 5 7 2n+1
4569// 1+u u u u u
4570// ln ----- = 2 [ u + --- + --- + --- + ... + ------ + ... ]
4571// 1-u 3 5 7 2n+1
4572//
4573
4574LOCALFUNC floatx80 fyl2x(floatx80 a, floatx80 b)
4575{
4576 int aSign;
4577 int bSign;
4578 Bit64u aSig;
4579 Bit32s aExp;
4580 Bit64u bSig;
4581 Bit32s bExp;
4582 int zSign;
4583 int ExpDiff;
4584 Bit64u zSig0, zSig1;
4585 float128 x;
4586
4587 // handle unsupported extended double-precision floating encodings
4588 if (0 /* floatx80_is_unsupported(a) */) {
4589invalid:
4590 float_raise(float_flag_invalid);
4591 return floatx80_default_nan;
4592 }
4593
4594 aSig = extractFloatx80Frac(a);
4595 aExp = extractFloatx80Exp(a);
4596 aSign = extractFloatx80Sign(a);
4597 bSig = extractFloatx80Frac(b);
4598 bExp = extractFloatx80Exp(b);
4599 bSign = extractFloatx80Sign(b);
4600
4601 zSign = bSign ^ 1;
4602
4603 if (aExp == 0x7FFF) {
4604 if ((Bit64u) (aSig<<1)
4605 || ((bExp == 0x7FFF) && (Bit64u) (bSig<<1)))
4606 {
4607 return propagateFloatx80NaN(a, b);
4608 }
4609 if (aSign) goto invalid;
4610 else {
4611 if (bExp == 0) {
4612 if (bSig == 0) goto invalid;
4613 float_raise(float_flag_denormal);
4614 }
4615 return packFloatx80(bSign, 0x7FFF, LIT64(0x8000000000000000));
4616 }
4617 }
4618 if (bExp == 0x7FFF)
4619 {
4620 if ((Bit64u) (bSig<<1)) return propagateFloatx80NaN(a, b);
4621 if (aSign && (Bit64u)(aExp | aSig)) goto invalid;
4622 if (aSig && (aExp == 0))
4623 float_raise(float_flag_denormal);
4624 if (aExp < 0x3FFF) {
4625 return packFloatx80(zSign, 0x7FFF, LIT64(0x8000000000000000));
4626 }
4627 if (aExp == 0x3FFF && ((Bit64u) (aSig<<1) == 0)) goto invalid;
4628 return packFloatx80(bSign, 0x7FFF, LIT64(0x8000000000000000));
4629 }
4630 if (aExp == 0) {
4631 if (aSig == 0) {
4632 if ((bExp | bSig) == 0) goto invalid;
4633 float_raise(float_flag_divbyzero);
4634 return packFloatx80(zSign, 0x7FFF, LIT64(0x8000000000000000));
4635 }
4636 if (aSign) goto invalid;
4637 float_raise(float_flag_denormal);
4638 normalizeFloatx80Subnormal(aSig, &aExp, &aSig);
4639 }
4640 if (aSign) goto invalid;
4641 if (bExp == 0) {
4642 if (bSig == 0) {
4643 if (aExp < 0x3FFF) return packFloatx80(zSign, 0, 0);
4644 return packFloatx80(bSign, 0, 0);
4645 }
4646 float_raise(float_flag_denormal);
4647 normalizeFloatx80Subnormal(bSig, &bExp, &bSig);
4648 }
4649 if (aExp == 0x3FFF && ((Bit64u) (aSig<<1) == 0))
4650 return packFloatx80(bSign, 0, 0);
4651
4652 float_raise(float_flag_inexact);
4653
4654 ExpDiff = aExp - 0x3FFF;
4655 aExp = 0;
4656 if (aSig >= SQRT2_HALF_SIG) {
4657 ExpDiff++;
4658 aExp--;
4659 }
4660
4661 /* ******************************** */
4662 /* using float128 for approximation */
4663 /* ******************************** */
4664
4665 shift128Right(aSig<<1, 0, 16, &zSig0, &zSig1);
4666 x = packFloat128(0, aExp+0x3FFF, zSig0, zSig1);
4667 x = poly_l2(x);
4668 x = float128_add(x, floatx80_to_float128(int32_to_floatx80(ExpDiff)));
4669 return floatx80_mul128(b, x);
4670}
4671
4672// =================================================
4673// FYL2XP1 Compute y * log (x + 1)
4674// 2
4675// =================================================
4676
4677//
4678// Uses the following identities:
4679//
4680// 1. ----------------------------------------------------------
4681// ln(x)
4682// log (x) = -------
4683// 2 ln(2)
4684//
4685// 2. ----------------------------------------------------------
4686// 1+u x
4687// ln (x+1) = ln -----, when u = -----
4688// 1-u x+2
4689//
4690// 3. ----------------------------------------------------------
4691// 3 5 7 2n+1
4692// 1+u u u u u
4693// ln ----- = 2 [ u + --- + --- + --- + ... + ------ + ... ]
4694// 1-u 3 5 7 2n+1
4695//
4696
4697LOCALFUNC floatx80 fyl2xp1(floatx80 a, floatx80 b)
4698{
4699 Bit32s aExp, bExp;
4700 Bit64u aSig, bSig, zSig0, zSig1, zSig2;
4701 int aSign, bSign;
4702 int zSign;
4703 float128 x;
4704
4705 // handle unsupported extended double-precision floating encodings
4706 if (0 /* floatx80_is_unsupported(a) */) {
4707invalid:
4708 float_raise(float_flag_invalid);
4709 return floatx80_default_nan;
4710 }
4711
4712 aSig = extractFloatx80Frac(a);
4713 aExp = extractFloatx80Exp(a);
4714 aSign = extractFloatx80Sign(a);
4715 bSig = extractFloatx80Frac(b);
4716 bExp = extractFloatx80Exp(b);
4717 bSign = extractFloatx80Sign(b);
4718 zSign = aSign ^ bSign;
4719
4720 if (aExp == 0x7FFF) {
4721 if ((Bit64u) (aSig<<1)
4722 || ((bExp == 0x7FFF) && (Bit64u) (bSig<<1)))
4723 {
4724 return propagateFloatx80NaN(a, b);
4725 }
4726 if (aSign) goto invalid;
4727 else {
4728 if (bExp == 0) {
4729 if (bSig == 0) goto invalid;
4730 float_raise(float_flag_denormal);
4731 }
4732 return packFloatx80(bSign, 0x7FFF, LIT64(0x8000000000000000));
4733 }
4734 }
4735 if (bExp == 0x7FFF)
4736 {
4737 if ((Bit64u) (bSig<<1))
4738 return propagateFloatx80NaN(a, b);
4739
4740 if (aExp == 0) {
4741 if (aSig == 0) goto invalid;
4742 float_raise(float_flag_denormal);
4743 }
4744
4745 return packFloatx80(zSign, 0x7FFF, LIT64(0x8000000000000000));
4746 }
4747 if (aExp == 0) {
4748 if (aSig == 0) {
4749 if (bSig && (bExp == 0)) float_raise(float_flag_denormal);
4750 return packFloatx80(zSign, 0, 0);
4751 }
4752 float_raise(float_flag_denormal);
4753 normalizeFloatx80Subnormal(aSig, &aExp, &aSig);
4754 }
4755 if (bExp == 0) {
4756 if (bSig == 0) return packFloatx80(zSign, 0, 0);
4757 float_raise(float_flag_denormal);
4758 normalizeFloatx80Subnormal(bSig, &bExp, &bSig);
4759 }
4760
4761 float_raise(float_flag_inexact);
4762
4763 if (aSign && aExp >= 0x3FFF)
4764 return a;
4765
4766 if (aExp >= 0x3FFC) // big argument
4767 {
4768 return fyl2x(floatx80_add(a, floatx80_one), b);
4769 }
4770
4771 // handle tiny argument
4772 if (aExp < EXP_BIAS-70)
4773 {
4774 // first order approximation, return (a*b)/ln(2)
4775 Bit32s zExp = aExp + FLOAT_LN2INV_EXP - 0x3FFE;
4776
4777 mul128By64To192(FLOAT_LN2INV_HI, FLOAT_LN2INV_LO, aSig, &zSig0, &zSig1, &zSig2);
4778 if (0 < (Bit64s) zSig0) {
4779 shortShift128Left(zSig0, zSig1, 1, &zSig0, &zSig1);
4780 --zExp;
4781 }
4782
4783 zExp = zExp + bExp - 0x3FFE;
4784 mul128By64To192(zSig0, zSig1, bSig, &zSig0, &zSig1, &zSig2);
4785 if (0 < (Bit64s) zSig0) {
4786 shortShift128Left(zSig0, zSig1, 1, &zSig0, &zSig1);
4787 --zExp;
4788 }
4789
4790 return
4791 roundAndPackFloatx80(80, aSign ^ bSign, zExp, zSig0, zSig1);
4792 }
4793
4794 /* ******************************** */
4795 /* using float128 for approximation */
4796 /* ******************************** */
4797
4798 shift128Right(aSig<<1, 0, 16, &zSig0, &zSig1);
4799 x = packFloat128(aSign, aExp, zSig0, zSig1);
4800 x = poly_l2p1(x);
4801 return floatx80_mul128(b, x);
4802}
4803
4804/* ----- end from original file "fyl2x.cc" ----- */
4805
4806/* ----- from original file "f2xm1.cc" ----- */
4807
4808/*
4809 ["original Stanislav Shwartsman Copyright notice" went here, included near top of this file.]
4810*/
4811
4812static const floatx80 floatx80_negone = packFloatx80m(1, 0x3fff, LIT64(0x8000000000000000));
4813static const floatx80 floatx80_neghalf = packFloatx80m(1, 0x3ffe, LIT64(0x8000000000000000));
4814static const float128 float128_ln2 =
4815 packFloat2x128m(LIT64(0x3ffe62e42fefa39e), LIT64(0xf35793c7673007e6));
4816
4817#define LN2_SIG LIT64(0xb17217f7d1cf79ac)
4818
4819#define EXP_ARR_SIZE 15
4820
4821static float128 exp_arr[EXP_ARR_SIZE] =
4822{
4823 PACK_FLOAT_128(0x3fff000000000000, 0x0000000000000000), /* 1 */
4824 PACK_FLOAT_128(0x3ffe000000000000, 0x0000000000000000), /* 2 */
4825 PACK_FLOAT_128(0x3ffc555555555555, 0x5555555555555555), /* 3 */
4826 PACK_FLOAT_128(0x3ffa555555555555, 0x5555555555555555), /* 4 */
4827 PACK_FLOAT_128(0x3ff8111111111111, 0x1111111111111111), /* 5 */
4828 PACK_FLOAT_128(0x3ff56c16c16c16c1, 0x6c16c16c16c16c17), /* 6 */
4829 PACK_FLOAT_128(0x3ff2a01a01a01a01, 0xa01a01a01a01a01a), /* 7 */
4830 PACK_FLOAT_128(0x3fefa01a01a01a01, 0xa01a01a01a01a01a), /* 8 */
4831 PACK_FLOAT_128(0x3fec71de3a556c73, 0x38faac1c88e50017), /* 9 */
4832 PACK_FLOAT_128(0x3fe927e4fb7789f5, 0xc72ef016d3ea6679), /* 10 */
4833 PACK_FLOAT_128(0x3fe5ae64567f544e, 0x38fe747e4b837dc7), /* 11 */
4834 PACK_FLOAT_128(0x3fe21eed8eff8d89, 0x7b544da987acfe85), /* 12 */
4835 PACK_FLOAT_128(0x3fde6124613a86d0, 0x97ca38331d23af68), /* 13 */
4836 PACK_FLOAT_128(0x3fda93974a8c07c9, 0xd20badf145dfa3e5), /* 14 */
4837 PACK_FLOAT_128(0x3fd6ae7f3e733b81, 0xf11d8656b0ee8cb0) /* 15 */
4838};
4839
4840/* required -1 < x < 1 */
4841LOCALFUNC float128 poly_exp(float128 x)
4842{
4843/*
4844 // 2 3 4 5 6 7 8 9
4845 // x x x x x x x x x
4846 // e - 1 ~ x + --- + --- + --- + --- + --- + --- + --- + --- + ...
4847 // 2! 3! 4! 5! 6! 7! 8! 9!
4848 //
4849 // 2 3 4 5 6 7 8
4850 // x x x x x x x x
4851 // = x [ 1 + --- + --- + --- + --- + --- + --- + --- + --- + ... ]
4852 // 2! 3! 4! 5! 6! 7! 8! 9!
4853 //
4854 // 8 8
4855 // -- 2k -- 2k+1
4856 // p(x) = > C * x q(x) = > C * x
4857 // -- 2k -- 2k+1
4858 // k=0 k=0
4859 //
4860 // x
4861 // e - 1 ~ x * [ p(x) + x * q(x) ]
4862 //
4863*/
4864 float128 t = EvalPoly(x, exp_arr, EXP_ARR_SIZE);
4865 return float128_mul(t, x);
4866}
4867
4868// =================================================
4869// x
4870// FX2P1 Compute 2 - 1
4871// =================================================
4872
4873//
4874// Uses the following identities:
4875//
4876// 1. ----------------------------------------------------------
4877// x x*ln(2)
4878// 2 = e
4879//
4880// 2. ----------------------------------------------------------
4881// 2 3 4 5 n
4882// x x x x x x x
4883// e = 1 + --- + --- + --- + --- + --- + ... + --- + ...
4884// 1! 2! 3! 4! 5! n!
4885//
4886
4887LOCALFUNC floatx80 f2xm1(floatx80 a)
4888{
4889 Bit64u zSig0, zSig1;
4890 float128 x;
4891
4892#if 0
4893 // handle unsupported extended double-precision floating encodings
4894 if (floatx80_is_unsupported(a))
4895 {
4896 float_raise(float_flag_invalid);
4897 return floatx80_default_nan;
4898 }
4899#endif
4900
4901 Bit64u aSig = extractFloatx80Frac(a);
4902 Bit32s aExp = extractFloatx80Exp(a);
4903 int aSign = extractFloatx80Sign(a);
4904
4905 if (aExp == 0x7FFF) {
4906 if ((Bit64u) (aSig<<1))
4907 return propagateOneFloatx80NaN(&a);
4908
4909 return (aSign) ? floatx80_negone : a;
4910 }
4911
4912 if (aExp == 0) {
4913 if (aSig == 0) return a;
4914 float_raise(float_flag_denormal | float_flag_inexact);
4915 normalizeFloatx80Subnormal(aSig, &aExp, &aSig);
4916
4917 tiny_argument:
4918 mul64To128(aSig, LN2_SIG, &zSig0, &zSig1);
4919 if (0 < (Bit64s) zSig0) {
4920 shortShift128Left(zSig0, zSig1, 1, &zSig0, &zSig1);
4921 --aExp;
4922 }
4923 return
4924 roundAndPackFloatx80(80, aSign, aExp, zSig0, zSig1);
4925 }
4926
4927 float_raise(float_flag_inexact);
4928
4929 if (aExp < 0x3FFF)
4930 {
4931 if (aExp < EXP_BIAS-68)
4932 goto tiny_argument;
4933
4934 /* ******************************** */
4935 /* using float128 for approximation */
4936 /* ******************************** */
4937
4938 x = floatx80_to_float128(a);
4939 x = float128_mul(x, float128_ln2);
4940 x = poly_exp(x);
4941 return float128_to_floatx80(x);
4942 }
4943 else
4944 {
4945 if ((a.high == 0xBFFF) && (! (aSig<<1)))
4946 return floatx80_neghalf;
4947
4948 return a;
4949 }
4950}
4951
4952/* ----- end from original file "f2xm1.cc" ----- */
4953
4954/* ----- from original file "fsincos.cc" ----- */
4955
4956/*
4957 ["original Stanislav Shwartsman Copyright notice" went here, included near top of this file.]
4958*/
4959
4960#define USE_estimateDiv128To64
4961
4962#if 0
4963static const floatx80 floatx80_one = packFloatx80m(0, 0x3fff, LIT64(0x8000000000000000));
4964#endif
4965
4966/* reduce trigonometric function argument using 128-bit precision
4967 M_PI approximation */
4968LOCALFUNC Bit64u argument_reduction_kernel(Bit64u aSig0, int Exp, Bit64u *zSig0, Bit64u *zSig1)
4969{
4970 Bit64u term0, term1, term2;
4971 Bit64u aSig1 = 0;
4972 Bit64u q;
4973
4974 shortShift128Left(aSig1, aSig0, Exp, &aSig1, &aSig0);
4975 q = estimateDiv128To64(aSig1, aSig0, FLOAT_PI_HI);
4976 mul128By64To192(FLOAT_PI_HI, FLOAT_PI_LO, q, &term0, &term1, &term2);
4977 sub128(aSig1, aSig0, term0, term1, zSig1, zSig0);
4978 while ((Bit64s)(*zSig1) < 0) {
4979 --q;
4980 add192(*zSig1, *zSig0, term2, 0, FLOAT_PI_HI, FLOAT_PI_LO, zSig1, zSig0, &term2);
4981 }
4982 *zSig1 = term2;
4983 return q;
4984}
4985
4986LOCALFUNC int reduce_trig_arg(int expDiff, int *zSign, Bit64u *aSig0, Bit64u *aSig1)
4987{
4988 Bit64u term0, term1, q = 0;
4989
4990 if (expDiff < 0) {
4991 shift128Right(*aSig0, 0, 1, aSig0, aSig1);
4992 expDiff = 0;
4993 }
4994 if (expDiff > 0) {
4995 q = argument_reduction_kernel(*aSig0, expDiff, aSig0, aSig1);
4996 }
4997 else {
4998 if (FLOAT_PI_HI <= *aSig0) {
4999 *aSig0 -= FLOAT_PI_HI;
5000 q = 1;
5001 }
5002 }
5003
5004 shift128Right(FLOAT_PI_HI, FLOAT_PI_LO, 1, &term0, &term1);
5005 if (! lt128(*aSig0, *aSig1, term0, term1))
5006 {
5007 int lt = lt128(term0, term1, *aSig0, *aSig1);
5008 int eq = eq128(*aSig0, *aSig1, term0, term1);
5009
5010 if ((eq && (q & 1)) || lt) {
5011 *zSign = !*zSign;
5012 ++q;
5013 }
5014 if (lt) sub128(FLOAT_PI_HI, FLOAT_PI_LO, *aSig0, *aSig1, aSig0, aSig1);
5015 }
5016
5017 return (int)(q & 3);
5018}
5019
5020#define SIN_ARR_SIZE 9
5021#define COS_ARR_SIZE 9
5022
5023static float128 sin_arr[SIN_ARR_SIZE] =
5024{
5025 PACK_FLOAT_128(0x3fff000000000000, 0x0000000000000000), /* 1 */
5026 PACK_FLOAT_128(0xbffc555555555555, 0x5555555555555555), /* 3 */
5027 PACK_FLOAT_128(0x3ff8111111111111, 0x1111111111111111), /* 5 */
5028 PACK_FLOAT_128(0xbff2a01a01a01a01, 0xa01a01a01a01a01a), /* 7 */
5029 PACK_FLOAT_128(0x3fec71de3a556c73, 0x38faac1c88e50017), /* 9 */
5030 PACK_FLOAT_128(0xbfe5ae64567f544e, 0x38fe747e4b837dc7), /* 11 */
5031 PACK_FLOAT_128(0x3fde6124613a86d0, 0x97ca38331d23af68), /* 13 */
5032 PACK_FLOAT_128(0xbfd6ae7f3e733b81, 0xf11d8656b0ee8cb0), /* 15 */
5033 PACK_FLOAT_128(0x3fce952c77030ad4, 0xa6b2605197771b00) /* 17 */
5034};
5035
5036static float128 cos_arr[COS_ARR_SIZE] =
5037{
5038 PACK_FLOAT_128(0x3fff000000000000, 0x0000000000000000), /* 0 */
5039 PACK_FLOAT_128(0xbffe000000000000, 0x0000000000000000), /* 2 */
5040 PACK_FLOAT_128(0x3ffa555555555555, 0x5555555555555555), /* 4 */
5041 PACK_FLOAT_128(0xbff56c16c16c16c1, 0x6c16c16c16c16c17), /* 6 */
5042 PACK_FLOAT_128(0x3fefa01a01a01a01, 0xa01a01a01a01a01a), /* 8 */
5043 PACK_FLOAT_128(0xbfe927e4fb7789f5, 0xc72ef016d3ea6679), /* 10 */
5044 PACK_FLOAT_128(0x3fe21eed8eff8d89, 0x7b544da987acfe85), /* 12 */
5045 PACK_FLOAT_128(0xbfda93974a8c07c9, 0xd20badf145dfa3e5), /* 14 */
5046 PACK_FLOAT_128(0x3fd2ae7f3e733b81, 0xf11d8656b0ee8cb0) /* 16 */
5047};
5048
5049/* 0 <= x <= pi/4 */
5050LOCALINLINEFUNC float128 poly_sin(float128 x)
5051{
5052 // 3 5 7 9 11 13 15
5053 // x x x x x x x
5054 // sin (x) ~ x - --- + --- - --- + --- - ---- + ---- - ---- =
5055 // 3! 5! 7! 9! 11! 13! 15!
5056 //
5057 // 2 4 6 8 10 12 14
5058 // x x x x x x x
5059 // = x * [ 1 - --- + --- - --- + --- - ---- + ---- - ---- ] =
5060 // 3! 5! 7! 9! 11! 13! 15!
5061 //
5062 // 3 3
5063 // -- 4k -- 4k+2
5064 // p(x) = > C * x > 0 q(x) = > C * x < 0
5065 // -- 2k -- 2k+1
5066 // k=0 k=0
5067 //
5068 // 2
5069 // sin(x) ~ x * [ p(x) + x * q(x) ]
5070 //
5071
5072 return OddPoly(x, sin_arr, SIN_ARR_SIZE);
5073}
5074
5075/* 0 <= x <= pi/4 */
5076LOCALINLINEFUNC float128 poly_cos(float128 x)
5077{
5078 // 2 4 6 8 10 12 14
5079 // x x x x x x x
5080 // cos (x) ~ 1 - --- + --- - --- + --- - ---- + ---- - ----
5081 // 2! 4! 6! 8! 10! 12! 14!
5082 //
5083 // 3 3
5084 // -- 4k -- 4k+2
5085 // p(x) = > C * x > 0 q(x) = > C * x < 0
5086 // -- 2k -- 2k+1
5087 // k=0 k=0
5088 //
5089 // 2
5090 // cos(x) ~ [ p(x) + x * q(x) ]
5091 //
5092
5093 return EvenPoly(x, cos_arr, COS_ARR_SIZE);
5094}
5095
5096LOCALINLINEPROC sincos_invalid(floatx80 *sin_a, floatx80 *cos_a, floatx80 a)
5097{
5098 if (sin_a) *sin_a = a;
5099 if (cos_a) *cos_a = a;
5100}
5101
5102LOCALINLINEPROC sincos_tiny_argument(floatx80 *sin_a, floatx80 *cos_a, floatx80 a)
5103{
5104 if (sin_a) *sin_a = a;
5105 if (cos_a) *cos_a = floatx80_one;
5106}
5107
5108LOCALFUNC floatx80 sincos_approximation(int neg, float128 r, Bit64u quotient)
5109{
5110 floatx80 result;
5111
5112 if (quotient & 0x1) {
5113 r = poly_cos(r);
5114 neg = 0;
5115 } else {
5116 r = poly_sin(r);
5117 }
5118
5119 result = float128_to_floatx80(r);
5120 if (quotient & 0x2)
5121 neg = ! neg;
5122
5123 if (neg)
5124 floatx80_chs(&result);
5125
5126 return result;
5127}
5128
5129// =================================================
5130// FSINCOS Compute sin(x) and cos(x)
5131// =================================================
5132
5133//
5134// Uses the following identities:
5135// ----------------------------------------------------------
5136//
5137// sin(-x) = -sin(x)
5138// cos(-x) = cos(x)
5139//
5140// sin(x+y) = sin(x)*cos(y)+cos(x)*sin(y)
5141// cos(x+y) = sin(x)*sin(y)+cos(x)*cos(y)
5142//
5143// sin(x+ pi/2) = cos(x)
5144// sin(x+ pi) = -sin(x)
5145// sin(x+3pi/2) = -cos(x)
5146// sin(x+2pi) = sin(x)
5147//
5148
5149LOCALFUNC int fsincos(floatx80 a, floatx80 *sin_a, floatx80 *cos_a)
5150{
5151 float128 r;
5152 Bit64u aSig0, aSig1 = 0;
5153 Bit32s aExp, zExp, expDiff;
5154 int aSign, zSign;
5155 int q = 0;
5156
5157#if 0
5158 // handle unsupported extended double-precision floating encodings
5159 if (floatx80_is_unsupported(a))
5160 {
5161 goto invalid;
5162 }
5163#endif
5164
5165 aSig0 = extractFloatx80Frac(a);
5166 aExp = extractFloatx80Exp(a);
5167 aSign = extractFloatx80Sign(a);
5168
5169 /* invalid argument */
5170 if (aExp == 0x7FFF) {
5171 if ((Bit64u) (aSig0<<1)) {
5172 sincos_invalid(sin_a, cos_a, propagateOneFloatx80NaN(&a));
5173 return 0;
5174 }
5175
5176#if 0
5177 invalid:
5178#endif
5179 float_raise(float_flag_invalid);
5180 sincos_invalid(sin_a, cos_a, floatx80_default_nan);
5181 return 0;
5182 }
5183
5184 if (aExp == 0) {
5185 if (aSig0 == 0) {
5186 sincos_tiny_argument(sin_a, cos_a, a);
5187 return 0;
5188 }
5189
5190 float_raise(float_flag_denormal);
5191
5192 /* handle pseudo denormals */
5193 if (! (aSig0 & LIT64(0x8000000000000000)))
5194 {
5195 float_raise(float_flag_inexact);
5196 if (sin_a)
5197 float_raise(float_flag_underflow);
5198 sincos_tiny_argument(sin_a, cos_a, a);
5199 return 0;
5200 }
5201
5202 normalizeFloatx80Subnormal(aSig0, &aExp, &aSig0);
5203 }
5204
5205 zSign = aSign;
5206 zExp = EXP_BIAS;
5207 expDiff = aExp - zExp;
5208
5209 /* argument is out-of-range */
5210 if (expDiff >= 63)
5211 return -1;
5212
5213 float_raise(float_flag_inexact);
5214
5215 if (expDiff < -1) { // doesn't require reduction
5216 if (expDiff <= -68) {
5217 a = packFloatx80(aSign, aExp, aSig0);
5218 sincos_tiny_argument(sin_a, cos_a, a);
5219 return 0;
5220 }
5221 zExp = aExp;
5222 }
5223 else {
5224 q = reduce_trig_arg(expDiff, &zSign, &aSig0, &aSig1);
5225 }
5226
5227 /* **************************** */
5228 /* argument reduction completed */
5229 /* **************************** */
5230
5231 /* using float128 for approximation */
5232 r = normalizeRoundAndPackFloat128(0, zExp-0x10, aSig0, aSig1);
5233
5234 if (aSign) q = -q;
5235 if (sin_a) *sin_a = sincos_approximation(zSign, r, q);
5236 if (cos_a) *cos_a = sincos_approximation(zSign, r, q+1);
5237
5238 return 0;
5239}
5240
5241// =================================================
5242// FPTAN Compute tan(x)
5243// =================================================
5244
5245//
5246// Uses the following identities:
5247//
5248// 1. ----------------------------------------------------------
5249//
5250// sin(-x) = -sin(x)
5251// cos(-x) = cos(x)
5252//
5253// sin(x+y) = sin(x)*cos(y)+cos(x)*sin(y)
5254// cos(x+y) = sin(x)*sin(y)+cos(x)*cos(y)
5255//
5256// sin(x+ pi/2) = cos(x)
5257// sin(x+ pi) = -sin(x)
5258// sin(x+3pi/2) = -cos(x)
5259// sin(x+2pi) = sin(x)
5260//
5261// 2. ----------------------------------------------------------
5262//
5263// sin(x)
5264// tan(x) = ------
5265// cos(x)
5266//
5267
5268LOCALFUNC int ftan(floatx80 *a)
5269{
5270 float128 r;
5271 float128 sin_r;
5272 float128 cos_r;
5273 Bit64u aSig0, aSig1 = 0;
5274 Bit32s aExp, zExp, expDiff;
5275 int aSign, zSign;
5276 int q = 0;
5277
5278#if 0
5279 // handle unsupported extended double-precision floating encodings
5280 if (floatx80_is_unsupported(*a))
5281 {
5282 goto invalid;
5283 }
5284#endif
5285
5286 aSig0 = extractFloatx80Frac(*a);
5287 aExp = extractFloatx80Exp(*a);
5288 aSign = extractFloatx80Sign(*a);
5289
5290 /* invalid argument */
5291 if (aExp == 0x7FFF) {
5292 if ((Bit64u) (aSig0<<1))
5293 {
5294 *a = propagateOneFloatx80NaN(a);
5295 return 0;
5296 }
5297
5298#if 0
5299 invalid:
5300#endif
5301 float_raise(float_flag_invalid);
5302 *a = floatx80_default_nan;
5303 return 0;
5304 }
5305
5306 if (aExp == 0) {
5307 if (aSig0 == 0) return 0;
5308 float_raise(float_flag_denormal);
5309 /* handle pseudo denormals */
5310 if (! (aSig0 & LIT64(0x8000000000000000)))
5311 {
5312 float_raise(float_flag_inexact | float_flag_underflow);
5313 return 0;
5314 }
5315 normalizeFloatx80Subnormal(aSig0, &aExp, &aSig0);
5316 }
5317
5318 zSign = aSign;
5319 zExp = EXP_BIAS;
5320 expDiff = aExp - zExp;
5321
5322 /* argument is out-of-range */
5323 if (expDiff >= 63)
5324 return -1;
5325
5326 float_raise(float_flag_inexact);
5327
5328 if (expDiff < -1) { // doesn't require reduction
5329 if (expDiff <= -68) {
5330 *a = packFloatx80(aSign, aExp, aSig0);
5331 return 0;
5332 }
5333 zExp = aExp;
5334 }
5335 else {
5336 q = reduce_trig_arg(expDiff, &zSign, &aSig0, &aSig1);
5337 }
5338
5339 /* **************************** */
5340 /* argument reduction completed */
5341 /* **************************** */
5342
5343 /* using float128 for approximation */
5344 r = normalizeRoundAndPackFloat128(0, zExp-0x10, aSig0, aSig1);
5345
5346 sin_r = poly_sin(r);
5347 cos_r = poly_cos(r);
5348
5349 if (q & 0x1) {
5350 r = float128_div(cos_r, sin_r);
5351 zSign = ! zSign;
5352 } else {
5353 r = float128_div(sin_r, cos_r);
5354 }
5355
5356 *a = float128_to_floatx80(r);
5357 if (zSign)
5358 floatx80_chs(a);
5359
5360 return 0;
5361}
5362
5363/* ----- end from original file "fsincos.cc" ----- */
5364
5365/* ----- from original file "fpatan.cc" ----- */
5366
5367/*
5368 ["original Stanislav Shwartsman Copyright notice" went here, included near top of this file.]
5369*/
5370
5371#define FPATAN_ARR_SIZE 11
5372
5373#if 0
5374static const float128 float128_one =
5375 packFloat2x128m(LIT64(0x3fff000000000000), LIT64(0x0000000000000000));
5376#endif
5377static const float128 float128_sqrt3 =
5378 packFloat2x128m(LIT64(0x3fffbb67ae8584ca), LIT64(0xa73b25742d7078b8));
5379static const floatx80 floatx80_pi =
5380 packFloatx80m(0, 0x4000, LIT64(0xc90fdaa22168c235));
5381
5382static const float128 float128_pi2 =
5383 packFloat2x128m(LIT64(0x3fff921fb54442d1), LIT64(0x8469898CC5170416));
5384static const float128 float128_pi4 =
5385 packFloat2x128m(LIT64(0x3ffe921fb54442d1), LIT64(0x8469898CC5170416));
5386static const float128 float128_pi6 =
5387 packFloat2x128m(LIT64(0x3ffe0c152382d736), LIT64(0x58465BB32E0F580F));
5388
5389static float128 atan_arr[FPATAN_ARR_SIZE] =
5390{
5391 PACK_FLOAT_128(0x3fff000000000000, 0x0000000000000000), /* 1 */
5392 PACK_FLOAT_128(0xbffd555555555555, 0x5555555555555555), /* 3 */
5393 PACK_FLOAT_128(0x3ffc999999999999, 0x999999999999999a), /* 5 */
5394 PACK_FLOAT_128(0xbffc249249249249, 0x2492492492492492), /* 7 */
5395 PACK_FLOAT_128(0x3ffbc71c71c71c71, 0xc71c71c71c71c71c), /* 9 */
5396 PACK_FLOAT_128(0xbffb745d1745d174, 0x5d1745d1745d1746), /* 11 */
5397 PACK_FLOAT_128(0x3ffb3b13b13b13b1, 0x3b13b13b13b13b14), /* 13 */
5398 PACK_FLOAT_128(0xbffb111111111111, 0x1111111111111111), /* 15 */
5399 PACK_FLOAT_128(0x3ffae1e1e1e1e1e1, 0xe1e1e1e1e1e1e1e2), /* 17 */
5400 PACK_FLOAT_128(0xbffaaf286bca1af2, 0x86bca1af286bca1b), /* 19 */
5401 PACK_FLOAT_128(0x3ffa861861861861, 0x8618618618618618) /* 21 */
5402};
5403
5404/* |x| < 1/4 */
5405LOCALFUNC float128 poly_atan(float128 x1)
5406{
5407/*
5408 // 3 5 7 9 11 13 15 17
5409 // x x x x x x x x
5410 // atan(x) ~ x - --- + --- - --- + --- - ---- + ---- - ---- + ----
5411 // 3 5 7 9 11 13 15 17
5412 //
5413 // 2 4 6 8 10 12 14 16
5414 // x x x x x x x x
5415 // = x * [ 1 - --- + --- - --- + --- - ---- + ---- - ---- + ---- ]
5416 // 3 5 7 9 11 13 15 17
5417 //
5418 // 5 5
5419 // -- 4k -- 4k+2
5420 // p(x) = > C * x q(x) = > C * x
5421 // -- 2k -- 2k+1
5422 // k=0 k=0
5423 //
5424 // 2
5425 // atan(x) ~ x * [ p(x) + x * q(x) ]
5426 //
5427*/
5428 return OddPoly(x1, atan_arr, FPATAN_ARR_SIZE);
5429}
5430
5431// =================================================
5432// FPATAN Compute y * log (x)
5433// 2
5434// =================================================
5435
5436//
5437// Uses the following identities:
5438//
5439// 1. ----------------------------------------------------------
5440//
5441// atan(-x) = -atan(x)
5442//
5443// 2. ----------------------------------------------------------
5444//
5445// x + y
5446// atan(x) + atan(y) = atan -------, xy < 1
5447// 1-xy
5448//
5449// x + y
5450// atan(x) + atan(y) = atan ------- + PI, x > 0, xy > 1
5451// 1-xy
5452//
5453// x + y
5454// atan(x) + atan(y) = atan ------- - PI, x < 0, xy > 1
5455// 1-xy
5456//
5457// 3. ----------------------------------------------------------
5458//
5459// atan(x) = atan(INF) + atan(- 1/x)
5460//
5461// x-1
5462// atan(x) = PI/4 + atan( ----- )
5463// x+1
5464//
5465// x * sqrt(3) - 1
5466// atan(x) = PI/6 + atan( ----------------- )
5467// x + sqrt(3)
5468//
5469// 4. ----------------------------------------------------------
5470// 3 5 7 9 2n+1
5471// x x x x n x
5472// atan(x) = x - --- + --- - --- + --- - ... + (-1) ------ + ...
5473// 3 5 7 9 2n+1
5474//
5475
5476LOCALFUNC floatx80 fpatan(floatx80 a, floatx80 b)
5477{
5478 float128 a128;
5479 float128 b128;
5480 float128 x;
5481 int swap, add_pi6, add_pi4;
5482 Bit32s xExp;
5483 floatx80 result;
5484 int rSign;
5485
5486 // handle unsupported extended double-precision floating encodings
5487#if 0
5488 if (floatx80_is_unsupported(a)) {
5489 float_raise(float_flag_invalid);
5490 return floatx80_default_nan;
5491 }
5492#endif
5493
5494 Bit64u aSig = extractFloatx80Frac(a);
5495 Bit32s aExp = extractFloatx80Exp(a);
5496 int aSign = extractFloatx80Sign(a);
5497 Bit64u bSig = extractFloatx80Frac(b);
5498 Bit32s bExp = extractFloatx80Exp(b);
5499 int bSign = extractFloatx80Sign(b);
5500
5501 int zSign = aSign ^ bSign;
5502
5503 if (bExp == 0x7FFF)
5504 {
5505 if ((Bit64u) (bSig<<1))
5506 return propagateFloatx80NaN(a, b);
5507
5508 if (aExp == 0x7FFF) {
5509 if ((Bit64u) (aSig<<1))
5510 return propagateFloatx80NaN(a, b);
5511
5512 if (aSign) { /* return 3PI/4 */
5513 return roundAndPackFloatx80(80, bSign,
5514 FLOATX80_3PI4_EXP, FLOAT_3PI4_HI, FLOAT_3PI4_LO);
5515 }
5516 else { /* return PI/4 */
5517 return roundAndPackFloatx80(80, bSign,
5518 FLOATX80_PI4_EXP, FLOAT_PI_HI, FLOAT_PI_LO);
5519 }
5520 }
5521
5522 if (aSig && (aExp == 0))
5523 float_raise(float_flag_denormal);
5524
5525 /* return PI/2 */
5526 return roundAndPackFloatx80(80, bSign, FLOATX80_PI2_EXP, FLOAT_PI_HI, FLOAT_PI_LO);
5527 }
5528 if (aExp == 0x7FFF)
5529 {
5530 if ((Bit64u) (aSig<<1))
5531 return propagateFloatx80NaN(a, b);
5532
5533 if (bSig && (bExp == 0))
5534 float_raise(float_flag_denormal);
5535
5536return_PI_or_ZERO:
5537
5538 if (aSign) { /* return PI */
5539 return roundAndPackFloatx80(80, bSign, FLOATX80_PI_EXP, FLOAT_PI_HI, FLOAT_PI_LO);
5540 } else { /* return 0 */
5541 return packFloatx80(bSign, 0, 0);
5542 }
5543 }
5544 if (bExp == 0)
5545 {
5546 if (bSig == 0) {
5547 if (aSig && (aExp == 0)) float_raise(float_flag_denormal);
5548 goto return_PI_or_ZERO;
5549 }
5550
5551 float_raise(float_flag_denormal);
5552 normalizeFloatx80Subnormal(bSig, &bExp, &bSig);
5553 }
5554 if (aExp == 0)
5555 {
5556 if (aSig == 0) /* return PI/2 */
5557 return roundAndPackFloatx80(80, bSign, FLOATX80_PI2_EXP, FLOAT_PI_HI, FLOAT_PI_LO);
5558
5559 float_raise(float_flag_denormal);
5560 normalizeFloatx80Subnormal(aSig, &aExp, &aSig);
5561 }
5562
5563 float_raise(float_flag_inexact);
5564
5565 /* |a| = |b| ==> return PI/4 */
5566 if (aSig == bSig && aExp == bExp)
5567 return roundAndPackFloatx80(80, bSign, FLOATX80_PI4_EXP, FLOAT_PI_HI, FLOAT_PI_LO);
5568
5569 /* ******************************** */
5570 /* using float128 for approximation */
5571 /* ******************************** */
5572
5573 a128 = normalizeRoundAndPackFloat128(0, aExp-0x10, aSig, 0);
5574 b128 = normalizeRoundAndPackFloat128(0, bExp-0x10, bSig, 0);
5575 swap = 0;
5576 add_pi6 = 0;
5577 add_pi4 = 0;
5578
5579 if (aExp > bExp || (aExp == bExp && aSig > bSig))
5580 {
5581 x = float128_div(b128, a128);
5582 }
5583 else {
5584 x = float128_div(a128, b128);
5585 swap = 1;
5586 }
5587
5588 xExp = extractFloat128Exp(x);
5589
5590 if (xExp <= EXP_BIAS-40)
5591 goto approximation_completed;
5592
5593 if (x.high >= LIT64(0x3ffe800000000000)) // 3/4 < x < 1
5594 {
5595 /*
5596 arctan(x) = arctan((x-1)/(x+1)) + pi/4
5597 */
5598 float128 t1 = float128_sub(x, float128_one);
5599 float128 t2 = float128_add(x, float128_one);
5600 x = float128_div(t1, t2);
5601 add_pi4 = 1;
5602 }
5603 else
5604 {
5605 /* argument correction */
5606 if (xExp >= 0x3FFD) // 1/4 < x < 3/4
5607 {
5608 /*
5609 arctan(x) = arctan((x*sqrt(3)-1)/(x+sqrt(3))) + pi/6
5610 */
5611 float128 t1 = float128_mul(x, float128_sqrt3);
5612 float128 t2 = float128_add(x, float128_sqrt3);
5613 x = float128_sub(t1, float128_one);
5614 x = float128_div(x, t2);
5615 add_pi6 = 1;
5616 }
5617 }
5618
5619 x = poly_atan(x);
5620 if (add_pi6) x = float128_add(x, float128_pi6);
5621 if (add_pi4) x = float128_add(x, float128_pi4);
5622
5623approximation_completed:
5624 if (swap) x = float128_sub(float128_pi2, x);
5625 result = float128_to_floatx80(x);
5626 if (zSign) floatx80_chs(&result);
5627 rSign = extractFloatx80Sign(result);
5628 if (!bSign && rSign)
5629 return floatx80_add(result, floatx80_pi);
5630 if (bSign && !rSign)
5631 return floatx80_sub(result, floatx80_pi);
5632 return result;
5633}
5634
5635/* ----- end from original file "fpatan.cc" ----- */
5636
5637/* end soft float stuff */
5638
5639typedef floatx80 myfpr;
5640
5641LOCALPROC myfp_FromExtendedFormat(myfpr *r, ui4r v2, ui5r v1, ui5r v0)
5642{
5643 r->high = v2;
5644 r->low = (((ui6b)v1) << 32) | (v0 & 0xFFFFFFFF);
5645}
5646
5647LOCALPROC myfp_ToExtendedFormat(myfpr *dd, ui4r *v2, ui5r *v1, ui5r *v0)
5648{
5649 *v0 = ((ui6b) dd->low) & 0xFFFFFFFF;
5650 *v1 = (((ui6b) dd->low) >> 32) & 0xFFFFFFFF;
5651 *v2 = dd->high;
5652}
5653
5654LOCALPROC myfp_FromDoubleFormat(myfpr *r, ui5r v1, ui5r v0)
5655{
5656 float64 t = (float64)((((ui6b)v1) << 32) | (v0 & 0xFFFFFFFF));
5657
5658 *r = float64_to_floatx80(t);
5659}
5660
5661LOCALPROC myfp_ToDoubleFormat(myfpr *dd, ui5r *v1, ui5r *v0)
5662{
5663 float64 t = floatx80_to_float64(*dd);
5664
5665 *v0 = ((ui6b) t) & 0xFFFFFFFF;
5666 *v1 = (((ui6b) t) >> 32) & 0xFFFFFFFF;
5667}
5668
5669LOCALPROC myfp_FromSingleFormat(myfpr *r, ui5r x)
5670{
5671 *r = float32_to_floatx80(x);
5672}
5673
5674LOCALFUNC ui5r myfp_ToSingleFormat(myfpr *ff)
5675{
5676 return floatx80_to_float32(*ff);
5677}
5678
5679LOCALPROC myfp_FromLong(myfpr *r, ui5r x)
5680{
5681 *r = int32_to_floatx80( x );
5682}
5683
5684LOCALFUNC ui5r myfp_ToLong(myfpr *x)
5685{
5686 return floatx80_to_int32( *x );
5687}
5688
5689LOCALFUNC blnr myfp_IsNan(myfpr *x)
5690{
5691 return floatx80_is_nan(*x);
5692}
5693
5694LOCALFUNC blnr myfp_IsInf(myfpr *x)
5695{
5696 return ( ( x->high & 0x7FFF ) == 0x7FFF ) && (0 == ((ui6b) ( x->low<<1 )));
5697}
5698
5699LOCALFUNC blnr myfp_IsZero(myfpr *x)
5700{
5701 return ( ( x->high & 0x7FFF ) == 0x0000 ) && (0 == ((ui6b) ( x->low<<1 )));
5702}
5703
5704LOCALFUNC blnr myfp_IsNeg(myfpr *x)
5705{
5706 return ( ( x->high & 0x8000 ) != 0x0000 );
5707}
5708
5709LOCALPROC myfp_Add(myfpr *r, const myfpr *a, const myfpr *b)
5710{
5711 *r = floatx80_add(*a, *b);
5712}
5713
5714LOCALPROC myfp_Sub(myfpr *r, const myfpr *a, const myfpr *b)
5715{
5716 *r = floatx80_sub(*a, *b);
5717}
5718
5719LOCALPROC myfp_Mul(myfpr *r, const myfpr *a, const myfpr *b)
5720{
5721 *r = floatx80_mul(*a, *b);
5722}
5723
5724LOCALPROC myfp_Div(myfpr *r, const myfpr *a, const myfpr *b)
5725{
5726 *r = floatx80_div(*a, *b);
5727}
5728
5729LOCALPROC myfp_Rem(myfpr *r, const myfpr *a, const myfpr *b)
5730{
5731 *r = floatx80_rem(*a, *b);
5732}
5733
5734LOCALPROC myfp_Sqrt(myfpr *r, myfpr *x)
5735{
5736 *r = floatx80_sqrt(*x);
5737}
5738
5739LOCALPROC myfp_Mod(myfpr *r, myfpr *a, myfpr *b)
5740{
5741 Bit64u q;
5742 *r = floatx80_remainder(*a, *b, &q);
5743 /* should save low byte of q */
5744}
5745
5746LOCALPROC myfp_Scale(myfpr *r, myfpr *a, myfpr *b)
5747{
5748 *r = floatx80_scale(*a, *b);
5749}
5750
5751LOCALPROC myfp_GetMan(myfpr *r, myfpr *x)
5752{
5753 *r = *x;
5754 (void) floatx80_extract(r);
5755}
5756
5757LOCALPROC myfp_GetExp(myfpr *r, myfpr *x)
5758{
5759 floatx80 t0 = *x;
5760 *r = floatx80_extract(&t0);
5761}
5762
5763LOCALPROC myfp_floor(myfpr *r, myfpr *x)
5764{
5765 si3r SaveRoundingMode = float_rounding_mode;
5766
5767 float_rounding_mode = float_round_down;
5768 *r = floatx80_round_to_int(*x);
5769 float_rounding_mode = SaveRoundingMode;
5770}
5771
5772LOCALPROC myfp_IntRZ(myfpr *r, myfpr *x)
5773{
5774 si3r SaveRoundingMode = float_rounding_mode;
5775
5776 float_rounding_mode = float_round_to_zero;
5777 *r = floatx80_round_to_int(*x);
5778 float_rounding_mode = SaveRoundingMode;
5779}
5780
5781LOCALPROC myfp_Int(myfpr *r, myfpr *x)
5782{
5783 *r = floatx80_round_to_int(*x);
5784}
5785
5786LOCALPROC myfp_RoundToSingle(myfpr *r, myfpr *x)
5787{
5788 float32 t0 = floatx80_to_float32(*x);
5789
5790 *r = float32_to_floatx80(t0);
5791}
5792
5793LOCALPROC myfp_RoundToDouble(myfpr *r, myfpr *x)
5794{
5795 float64 t0 = floatx80_to_float64(*x);
5796
5797 *r = float64_to_floatx80(t0);
5798}
5799
5800LOCALPROC myfp_Abs(myfpr *r, myfpr *x)
5801{
5802 *r = *x;
5803 r->high &= 0x7FFF;
5804}
5805
5806LOCALPROC myfp_Neg(myfpr *r, myfpr *x)
5807{
5808 *r = *x;
5809 r->high ^= 0x8000;
5810}
5811
5812LOCALPROC myfp_TwoToX(myfpr *r, myfpr *x)
5813{
5814 floatx80 t2;
5815 floatx80 t3;
5816 floatx80 t4;
5817 floatx80 t5;
5818 myfp_floor(&t2, x);
5819 t3 = floatx80_sub(*x, t2);
5820 t4 = f2xm1(t3);
5821 t5 = floatx80_add(t4, floatx80_one);
5822 *r = floatx80_scale(t5, t2);
5823}
5824
5825LOCALPROC myfp_TenToX(myfpr *r, myfpr *x)
5826{
5827 floatx80 t1;
5828 const floatx80 t = /* 1.0 / log(2.0) */
5829 packFloatx80m(0, 0x3fff, LIT64(0xb8aa3b295c17f0bc));
5830 const floatx80 t2 = /* log(10.0) */
5831 packFloatx80m(0, 0x4000, LIT64(0x935d8dddaaa8ac17));
5832 t1 = floatx80_mul(floatx80_mul(*x, t), t2);
5833 myfp_TwoToX(r, &t1);
5834}
5835
5836LOCALPROC myfp_EToX(myfpr *r, myfpr *x)
5837{
5838 floatx80 t1;
5839 const floatx80 t = /* 1.0 / log(2.0) */
5840 packFloatx80m(0, 0x3fff, LIT64(0xb8aa3b295c17f0bc));
5841 t1 = floatx80_mul(*x, t);
5842 myfp_TwoToX(r, &t1);
5843}
5844
5845LOCALPROC myfp_EToXM1(myfpr *r, myfpr *x)
5846{
5847 floatx80 t1;
5848 floatx80 t2;
5849 floatx80 t3;
5850 floatx80 t4;
5851 floatx80 t5;
5852 floatx80 t6;
5853 const floatx80 t = /* 1.0 / log(2.0) */
5854 packFloatx80m(0, 0x3fff, LIT64(0xb8aa3b295c17f0bc));
5855 t1 = floatx80_mul(*x, t);
5856 myfp_floor(&t2, &t1);
5857 t3 = floatx80_sub(t1, t2);
5858 t4 = f2xm1(t3);
5859 if (myfp_IsZero(&t2)) {
5860 *r = t4;
5861 } else {
5862 t5 = floatx80_add(t4, floatx80_one);
5863 t6 = floatx80_scale(t5, t2);
5864 *r = floatx80_sub(t6, floatx80_one);
5865 }
5866}
5867
5868LOCALPROC myfp_Log2(myfpr *r, myfpr *x)
5869{
5870 *r = fyl2x(*x, floatx80_one);
5871}
5872
5873LOCALPROC myfp_LogN(myfpr *r, myfpr *x)
5874{
5875 const floatx80 t = /* log(2.0) */
5876 packFloatx80m(0, 0x3ffe, LIT64(0xb17217f7d1cf79ac));
5877 *r = fyl2x(*x, t);
5878}
5879
5880LOCALPROC myfp_Log10(myfpr *r, myfpr *x)
5881{
5882 const floatx80 t = /* log10(2.0) = ln(2) / ln(10), unknown accuracy */
5883 packFloatx80m(0, 0x3ffd, LIT64(0x9a209a84fbcff798));
5884 *r = fyl2x(*x, t);
5885}
5886
5887LOCALPROC myfp_LogNP1(myfpr *r, myfpr *x)
5888{
5889 const floatx80 t = /* log(2.0) */
5890 packFloatx80m(0, 0x3ffe, LIT64(0xb17217f7d1cf79ac));
5891 *r = fyl2xp1(*x, t);
5892}
5893
5894LOCALPROC myfp_Sin(myfpr *r, myfpr *x)
5895{
5896 (void) fsincos(*x, r, 0);
5897}
5898
5899LOCALPROC myfp_Cos(myfpr *r, myfpr *x)
5900{
5901 (void) fsincos(*x, 0, r);
5902}
5903
5904LOCALPROC myfp_Tan(myfpr *r, myfpr *x)
5905{
5906 *r = *x;
5907 (void) ftan(r);
5908}
5909
5910LOCALPROC myfp_ATan(myfpr *r, myfpr *x)
5911{
5912 *r = fpatan(floatx80_one, *x);
5913}
5914
5915LOCALPROC myfp_ASin(myfpr *r, myfpr *x)
5916{
5917 floatx80 x2 = floatx80_mul(*x, *x);
5918 floatx80 mx2 = floatx80_sub(floatx80_one, x2);
5919 floatx80 cx = floatx80_sqrt(mx2);
5920
5921 *r = fpatan(cx, *x);
5922}
5923
5924LOCALPROC myfp_ACos(myfpr *r, myfpr *x)
5925{
5926 floatx80 x2 = floatx80_mul(*x, *x);
5927 floatx80 mx2 = floatx80_sub(floatx80_one, x2);
5928 floatx80 cx = floatx80_sqrt(mx2);
5929
5930 *r = fpatan(*x, cx);
5931}
5932
5933static const floatx80 floatx80_zero =
5934 packFloatx80m(0, 0x0000, LIT64(0x0000000000000000));
5935
5936static const floatx80 floatx80_Two =
5937 packFloatx80m(0, 0x4000, LIT64(0x8000000000000000));
5938
5939static const floatx80 floatx80_Ten =
5940 packFloatx80m(0, 0x4002, LIT64(0xa000000000000000));
5941
5942LOCALPROC myfp_Sinh(myfpr *r, myfpr *x)
5943{
5944 myfpr ex;
5945 myfpr nx;
5946 myfpr enx;
5947 myfpr t1;
5948
5949 myfp_EToX(&ex, x);
5950 myfp_Neg(&nx, x);
5951 myfp_EToX(&enx, &nx);
5952 myfp_Sub(&t1, &ex, &enx);
5953 myfp_Div(r, &t1, &floatx80_Two);
5954}
5955
5956LOCALPROC myfp_Cosh(myfpr *r, myfpr *x)
5957{
5958 myfpr ex;
5959 myfpr nx;
5960 myfpr enx;
5961 myfpr t1;
5962
5963 myfp_EToX(&ex, x);
5964 myfp_Neg(&nx, x);
5965 myfp_EToX(&enx, &nx);
5966 myfp_Add(&t1, &ex, &enx);
5967 myfp_Div(r, &t1, &floatx80_Two);
5968}
5969
5970LOCALPROC myfp_Tanh(myfpr *r, myfpr *x)
5971{
5972 myfpr x2;
5973 myfpr ex2;
5974 myfpr ex2m1;
5975 myfpr ex2p1;
5976
5977 myfp_Mul(&x2, x, &floatx80_Two);
5978 myfp_EToX(&ex2, &x2);
5979 myfp_Sub(&ex2m1, &ex2, &floatx80_one);
5980 myfp_Add(&ex2p1, &ex2, &floatx80_one);
5981 myfp_Div(r, &ex2m1, &ex2p1);
5982}
5983
5984LOCALPROC myfp_ATanh(myfpr *r, myfpr *x)
5985{
5986 myfpr onepx;
5987 myfpr onemx;
5988 myfpr dv;
5989 myfpr ldv;
5990
5991 myfp_Add(&onepx, x, &floatx80_one);
5992 myfp_Sub(&onemx, x, &floatx80_one);
5993 myfp_Div(&dv, &onepx, &onemx);
5994 myfp_LogN(&ldv, &dv);
5995 myfp_Div(r, &ldv, &floatx80_Two);
5996}
5997
5998LOCALPROC myfp_SinCos(myfpr *r_sin, myfpr *r_cos, myfpr *source)
5999{
6000 (void) fsincos(*source, r_sin, r_cos);
6001}
6002
6003LOCALFUNC blnr myfp_getCR(myfpr *r, ui4b opmode)
6004{
6005 switch (opmode) {
6006 case 0x00:
6007 *r = floatx80_pi; /* M_PI */
6008 break;
6009 case 0x0B:
6010 {
6011 const floatx80 t =
6012 packFloatx80m(0, 0x3ffd, LIT64(0x9a209a84fbcff798));
6013 *r = t; /* log10(2.0) = ln(2) / ln(10), unknown accuracy */
6014 }
6015 break;
6016 case 0x0C:
6017 {
6018 const floatx80 t =
6019 packFloatx80m(0, 0x4000, LIT64(0xadf85458a2bb4a9b));
6020 *r = t; /* exp(1.0), unknown accuracy */
6021 }
6022 break;
6023 case 0x0D:
6024 {
6025 const floatx80 t =
6026 packFloatx80m(0, 0x3fff, LIT64(0xb8aa3b295c17f0bc));
6027 *r = t; /* 1.0 / log(2.0) */
6028 }
6029 break;
6030 case 0x0E:
6031 {
6032 const floatx80 t =
6033 packFloatx80m(0, 0x3ffd, LIT64(0xde5bd8a937287195));
6034 *r = t; /* 1.0 / log(10.0), unknown accuracy */
6035 }
6036 break;
6037 case 0x0F:
6038 *r = floatx80_zero; /* 0.0 */
6039 break;
6040 case 0x30:
6041 {
6042 const floatx80 t =
6043 packFloatx80m(0, 0x3ffe, LIT64(0xb17217f7d1cf79ac));
6044 *r = t; /* log(2.0) */
6045 }
6046 break;
6047 case 0x31:
6048 {
6049 const floatx80 t =
6050 packFloatx80m(0, 0x4000, LIT64(0x935d8dddaaa8ac17));
6051 *r = t; /* log(10.0) */
6052 }
6053 break;
6054 case 0x32:
6055 *r = floatx80_one; /* 1.0 */
6056 break;
6057 case 0x33:
6058 *r = floatx80_Ten; /* 10.0 */
6059 break;
6060 case 0x34:
6061 {
6062 const floatx80 t =
6063 packFloatx80m(0, 0x4005, LIT64(0xc800000000000000));
6064 *r = t; /* 100.0 */
6065 }
6066 break;
6067 case 0x35:
6068 {
6069 const floatx80 t =
6070 packFloatx80m(0, 0x400c, LIT64(0x9c40000000000000));
6071 *r = t; /* 10000.0 */
6072 }
6073 break;
6074 case 0x36:
6075 {
6076 const floatx80 t =
6077 packFloatx80m(0, 0x4019, LIT64(0xbebc200000000000));
6078 *r = t; /* 1.0e8 */
6079 }
6080 break;
6081 case 0x37:
6082 {
6083 const floatx80 t =
6084 packFloatx80m(0, 0x4034, LIT64(0x8e1bc9bf04000000));
6085 *r = t; /* 1.0e16 */
6086 }
6087 break;
6088 case 0x38:
6089 {
6090 const floatx80 t =
6091 packFloatx80m(0, 0x4069, LIT64(0x9dc5ada82b70b59e));
6092 *r = t; /* 1.0e32 */
6093 }
6094 break;
6095 case 0x39:
6096 {
6097 const floatx80 t =
6098 packFloatx80m(0, 0x40d3, LIT64(0xc2781f49ffcfa6d5));
6099 *r = t; /* 1.0e64 */
6100 }
6101 break;
6102 case 0x3A:
6103 {
6104 const floatx80 t =
6105 packFloatx80m(0, 0x41a8, LIT64(0x93ba47c980e98ce0));
6106 *r = t; /* 1.0e128 */
6107 }
6108 break;
6109 case 0x3B:
6110 {
6111 const floatx80 t =
6112 packFloatx80m(0, 0x4351, LIT64(0xaa7eebfb9df9de8e));
6113 *r = t; /* 1.0e256 */
6114 }
6115 break;
6116 case 0x3C:
6117 {
6118 const floatx80 t =
6119 packFloatx80m(0, 0x46a3, LIT64(0xe319a0aea60e91c7));
6120 *r = t; /* 1.0e512 */
6121 }
6122 break;
6123 case 0x3D:
6124 {
6125 const floatx80 t =
6126 packFloatx80m(0, 0x4d48, LIT64(0xc976758681750c17));
6127 *r = t; /* 1.0e1024 */
6128 }
6129 break;
6130 case 0x3E:
6131 {
6132 const floatx80 t =
6133 packFloatx80m(0, 0x5a92, LIT64(0x9e8b3b5dc53d5de5));
6134 *r = t; /* 1.0e2048 */
6135 }
6136 break;
6137 case 0x3F:
6138 {
6139 const floatx80 t =
6140 packFloatx80m(0, 0x7525, LIT64(0xc46052028a20979b));
6141 *r = t; /* 1.0e4096 */
6142 }
6143 break;
6144 default:
6145 return falseblnr;
6146 }
6147 return trueblnr;
6148}
6149
6150/* Floating point control register */
6151
6152LOCALPROC myfp_SetFPCR(ui5r v)
6153{
6154 switch ((v >> 4) & 0x03) {
6155 case 0:
6156 float_rounding_mode = float_round_nearest_even;
6157 break;
6158 case 1:
6159 float_rounding_mode = float_round_to_zero;
6160 break;
6161 case 2:
6162 float_rounding_mode = float_round_down;
6163 break;
6164 case 3:
6165 float_rounding_mode = float_round_up;
6166 break;
6167 }
6168 switch ((v >> 6) & 0x03) {
6169 case 0:
6170 floatx80_rounding_precision = 80;
6171 break;
6172 case 1:
6173 floatx80_rounding_precision = 32;
6174 break;
6175 case 2:
6176 floatx80_rounding_precision = 64;
6177 break;
6178 case 3:
6179 ReportAbnormalID(0x0201,
6180 "Bad rounding precision in myfp_SetFPCR");
6181 floatx80_rounding_precision = 80;
6182 break;
6183 }
6184 if (0 != (v & 0xF)) {
6185 ReportAbnormalID(0x0202,
6186 "Reserved bits not zero in myfp_SetFPCR");
6187 }
6188}
6189
6190LOCALFUNC ui5r myfp_GetFPCR(void)
6191{
6192 ui5r v = 0;
6193
6194 switch (float_rounding_mode) {
6195 case float_round_nearest_even:
6196 /* v |= (0 << 4); */
6197 break;
6198 case float_round_to_zero:
6199 v |= (1 << 4);
6200 break;
6201 case float_round_down:
6202 v |= (2 << 4);
6203 break;
6204 case float_round_up:
6205 v |= (3 << 4);
6206 break;
6207 }
6208
6209 if (80 == floatx80_rounding_precision) {
6210 /* v |= (0 << 6); */
6211 } else if (32 == floatx80_rounding_precision) {
6212 v |= (1 << 6);
6213 } else if (64 == floatx80_rounding_precision) {
6214 v |= (2 << 6);
6215 } else {
6216 ReportAbnormalID(0x0203,
6217 "Bad rounding precision in myfp_GetFPCR");
6218 }
6219
6220 return v;
6221}
6222
6223LOCALVAR struct myfp_envStruct
6224{
6225 ui5r FPSR; /* Floating point status register */
6226} myfp_env;
6227
6228LOCALPROC myfp_SetFPSR(ui5r v)
6229{
6230 myfp_env.FPSR = v;
6231}
6232
6233LOCALFUNC ui5r myfp_GetFPSR(void)
6234{
6235 return myfp_env.FPSR;
6236}
6237
6238LOCALFUNC ui3r myfp_GetConditionCodeByte(void)
6239{
6240 return (myfp_env.FPSR >> 24) & 0x0F;
6241}
6242
6243LOCALPROC myfp_SetConditionCodeByte(ui3r v)
6244{
6245 myfp_env.FPSR = ((myfp_env.FPSR & 0x00FFFFFF)
6246 | (v << 24));
6247}
6248
6249LOCALPROC myfp_SetConditionCodeByteFromResult(myfpr *result)
6250{
6251 /* Set condition codes here based on result */
6252
6253 int c_nan = myfp_IsNan(result) ? 1 : 0;
6254 int c_inf = myfp_IsInf(result) ? 1 : 0;
6255 int c_zero = myfp_IsZero(result) ? 1 : 0;
6256 int c_neg = myfp_IsNeg(result) ? 1 : 0;
6257
6258 myfp_SetConditionCodeByte(c_nan
6259 | (c_inf << 1)
6260 | (c_zero << 2)
6261 | (c_neg << 3));
6262}