this repo has no description
at main 6262 lines 187 kB view raw
1/* 2 FPMATHEM.h 3 4 Copyright (C) 2007 John R. Hauser, Stanislav Shwartsman, 5 Ross Martin, Paul C. Pratt 6 7 You can redistribute this file and/or modify it under the terms 8 of version 2 of the GNU General Public License as published by 9 the Free Software Foundation. You should have received a copy 10 of the license along with this file; see the file COPYING. 11 12 This file is distributed in the hope that it will be useful, 13 but WITHOUT ANY WARRANTY; without even the implied warranty of 14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 license for more details. 16*/ 17 18/* 19 Floating Point MATH implemented with software EMulation 20 21 Based on the SoftFloat IEC/IEEE Floating-point Arithmetic 22 Package, Release 2b, written by John R. Hauser. 23 24 Also uses extensions to SoftFloat, written for 25 Bochs (x86 achitecture simulator), by Stanislav Shwartsman. 26*/ 27 28/* 29 original SoftFloat Copyright notice: 30 31 Written by John R. Hauser. This work was made possible in part by the 32 International Computer Science Institute, located at Suite 600, 1947 Center 33 Street, Berkeley, California 94704. Funding was partially provided by the 34 National Science Foundation under grant MIP-9311980. The original version 35 of this code was written as part of a project to build a fixed-point vector 36 processor in collaboration with the University of California at Berkeley, 37 overseen by Profs. Nelson Morgan and John Wawrzynek. More information 38 is available through the Web page `http://www.cs.berkeley.edu/~jhauser/ 39 arithmetic/SoftFloat.html'. 40 41 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has 42 been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES 43 RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS 44 AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES, 45 COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE 46 EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE 47 INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR 48 OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE. 49 50 Derivative works are acceptable, even for commercial purposes, so long as 51 (1) the source code for the derivative work includes prominent notice that 52 the work is derivative, and (2) the source code includes prominent notice with 53 these four paragraphs for those parts of this code that are retained. 54*/ 55 56/* 57 original Stanislav Shwartsman Copyright notice: 58 59 This source file is an extension to the SoftFloat IEC/IEEE Floating-point 60 Arithmetic Package, Release 2b, written for Bochs (x86 achitecture simulator) 61 floating point emulation. 62 63 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has 64 been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES 65 RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS 66 AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES, 67 COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE 68 EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE 69 INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR 70 OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE. 71 72 Derivative works are acceptable, even for commercial purposes, so long as 73 (1) the source code for the derivative work includes prominent notice that 74 the work is derivative, and (2) the source code includes prominent notice with 75 these four paragraphs for those parts of this code that are retained. 76 77 .*============================================================================ 78 * Written for Bochs (x86 achitecture simulator) by 79 * Stanislav Shwartsman [sshwarts at sourceforge net] 80 * ==========================================================================*. 81*/ 82 83 84/* 85 ReportAbnormalID unused 0x0204 - 0x02FF 86*/ 87 88 89/* soft float stuff */ 90 91/* 92 should avoid 64 bit literals. 93*/ 94 95typedef ui3r flag; /* 0/1 */ 96 97/* 98 To fix: softfloat representation of 99 denormalized extended precision numbers 100 is different than on 68881. 101*/ 102 103#define cIncludeFPUUnused cIncludeUnused 104 105/* ----- from original file "softfloat.h" ----- */ 106 107/*====================================================================== 108 109This C header file is part of the SoftFloat IEC/IEEE Floating-point 110Arithmetic Package, Release 2b. 111 112["original SoftFloat Copyright notice" went here, included near top of 113this file.] 114 115======================================================================*/ 116 117/*---------------------------------------------------------------------------- 118| The macro `FLOATX80' must be defined to enable the extended double-precision 119| floating-point format `floatx80'. If this macro is not defined, the 120| `floatx80' type will not be defined, and none of the functions that either 121| input or output the `floatx80' type will be defined. The same applies to 122| the `FLOAT128' macro and the quadruple-precision format `float128'. 123*----------------------------------------------------------------------------*/ 124#define FLOATX80 125#define FLOAT128 126 127/*---------------------------------------------------------------------------- 128| Software IEC/IEEE floating-point types. 129*----------------------------------------------------------------------------*/ 130 131typedef struct { 132 ui6b low; 133 unsigned short high; 134} floatx80; 135 136#ifdef FLOAT128 137typedef struct { 138 ui6b low, high; 139} float128; 140#endif 141 142 143/* ----- end from original file "softfloat.h" ----- */ 144 145 146/*---------------------------------------------------------------------------- 147| Software IEC/IEEE floating-point rounding mode. 148*----------------------------------------------------------------------------*/ 149enum { 150 float_round_nearest_even = 0, 151 float_round_down = 1, 152 float_round_up = 2, 153 float_round_to_zero = 3 154}; 155 156/*---------------------------------------------------------------------------- 157| Floating-point rounding mode, extended double-precision rounding precision, 158| and exception flags. 159*----------------------------------------------------------------------------*/ 160 161LOCALVAR si3r float_rounding_mode = float_round_nearest_even; 162 163 164/*---------------------------------------------------------------------------- 165| Software IEC/IEEE floating-point exception flags. 166*----------------------------------------------------------------------------*/ 167 168enum { 169 float_flag_invalid = 1, 170 float_flag_divbyzero = 4, 171 float_flag_overflow = 8, 172 float_flag_underflow = 16, 173 float_flag_inexact = 32 174}; 175LOCALVAR si3r float_exception_flags = 0; 176 177/*---------------------------------------------------------------------------- 178| Software IEC/IEEE extended double-precision rounding precision. Valid 179| values are 32, 64, and 80. 180*----------------------------------------------------------------------------*/ 181 182LOCALVAR si3r floatx80_rounding_precision = 80; 183 184/*---------------------------------------------------------------------------- 185| Primitive arithmetic functions, including multi-word arithmetic, and 186| division and square root approximations. (Can be specialized to target if 187| desired.) 188*----------------------------------------------------------------------------*/ 189 190/* ----- from original file "softfloat-macros" ----- */ 191 192/*============================================================================ 193 194This C source fragment is part of the SoftFloat IEC/IEEE Floating-point 195Arithmetic Package, Release 2b. 196 197["original SoftFloat Copyright notice" went here, included near top of this file.] 198 199=============================================================================*/ 200 201/*---------------------------------------------------------------------------- 202| Shifts `a' right by the number of bits given in `count'. If any nonzero 203| bits are shifted off, they are ``jammed'' into the least significant bit of 204| the result by setting the least significant bit to 1. The value of `count' 205| can be arbitrarily large; in particular, if `count' is greater than 32, the 206| result will be either 0 or 1, depending on whether `a' is zero or nonzero. 207| The result is stored in the location pointed to by `zPtr'. 208*----------------------------------------------------------------------------*/ 209 210LOCALINLINEPROC shift32RightJamming( ui5b a, si4r count, ui5b *zPtr ) 211{ 212 ui5b z; 213 214 if ( count == 0 ) { 215 z = a; 216 } 217 else if ( count < 32 ) { 218 z = ( a>>count ) | ( ( a<<( ( - count ) & 31 ) ) != 0 ); 219 } 220 else { 221 z = ( a != 0 ); 222 } 223 *zPtr = z; 224 225} 226 227/*---------------------------------------------------------------------------- 228| Shifts `a' right by the number of bits given in `count'. If any nonzero 229| bits are shifted off, they are ``jammed'' into the least significant bit of 230| the result by setting the least significant bit to 1. The value of `count' 231| can be arbitrarily large; in particular, if `count' is greater than 64, the 232| result will be either 0 or 1, depending on whether `a' is zero or nonzero. 233| The result is stored in the location pointed to by `zPtr'. 234*----------------------------------------------------------------------------*/ 235 236LOCALINLINEPROC shift64RightJamming( ui6b a, si4r count, ui6b *zPtr ) 237{ 238 ui6b z; 239 240 if ( count == 0 ) { 241 z = a; 242 } 243 else if ( count < 64 ) { 244 z = ( a>>count ) | ( ( a<<( ( - count ) & 63 ) ) != 0 ); 245 } 246 else { 247 z = ( a != 0 ); 248 } 249 *zPtr = z; 250 251} 252 253/*---------------------------------------------------------------------------- 254| Shifts the 128-bit value formed by concatenating `a0' and `a1' right by 64 255| _plus_ the number of bits given in `count'. The shifted result is at most 256| 64 nonzero bits; this is stored at the location pointed to by `z0Ptr'. The 257| bits shifted off form a second 64-bit result as follows: The _last_ bit 258| shifted off is the most-significant bit of the extra result, and the other 259| 63 bits of the extra result are all zero if and only if _all_but_the_last_ 260| bits shifted off were all zero. This extra result is stored in the location 261| pointed to by `z1Ptr'. The value of `count' can be arbitrarily large. 262| (This routine makes more sense if `a0' and `a1' are considered to form 263| a fixed-point value with binary point between `a0' and `a1'. This fixed- 264| point value is shifted right by the number of bits given in `count', and 265| the integer part of the result is returned at the location pointed to by 266| `z0Ptr'. The fractional part of the result may be slightly corrupted as 267| described above, and is returned at the location pointed to by `z1Ptr'.) 268*----------------------------------------------------------------------------*/ 269 270LOCALINLINEPROC shift64ExtraRightJamming( 271 ui6b a0, ui6b a1, si4r count, ui6b *z0Ptr, ui6b *z1Ptr ) 272{ 273 ui6b z0, z1; 274 si3r negCount = ( - count ) & 63; 275 276 if ( count == 0 ) { 277 z1 = a1; 278 z0 = a0; 279 } 280 else if ( count < 64 ) { 281 z1 = ( a0<<negCount ) | ( a1 != 0 ); 282 z0 = a0>>count; 283 } 284 else { 285 if ( count == 64 ) { 286 z1 = a0 | ( a1 != 0 ); 287 } 288 else { 289 z1 = ( ( a0 | a1 ) != 0 ); 290 } 291 z0 = 0; 292 } 293 *z1Ptr = z1; 294 *z0Ptr = z0; 295 296} 297 298/*---------------------------------------------------------------------------- 299| Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the 300| number of bits given in `count'. Any bits shifted off are lost. The value 301| of `count' can be arbitrarily large; in particular, if `count' is greater 302| than 128, the result will be 0. The result is broken into two 64-bit pieces 303| which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'. 304*----------------------------------------------------------------------------*/ 305 306LOCALINLINEPROC shift128Right( 307 ui6b a0, ui6b a1, si4r count, ui6b *z0Ptr, ui6b *z1Ptr ) 308{ 309 ui6b z0, z1; 310 si3r negCount = ( - count ) & 63; 311 312 if ( count == 0 ) { 313 z1 = a1; 314 z0 = a0; 315 } 316 else if ( count < 64 ) { 317 z1 = ( a0<<negCount ) | ( a1>>count ); 318 z0 = a0>>count; 319 } 320 else { 321 z1 = ( count < 64 ) ? ( a0>>( count & 63 ) ) : 0; 322 z0 = 0; 323 } 324 *z1Ptr = z1; 325 *z0Ptr = z0; 326 327} 328 329/*---------------------------------------------------------------------------- 330| Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the 331| number of bits given in `count'. If any nonzero bits are shifted off, they 332| are ``jammed'' into the least significant bit of the result by setting the 333| least significant bit to 1. The value of `count' can be arbitrarily large; 334| in particular, if `count' is greater than 128, the result will be either 335| 0 or 1, depending on whether the concatenation of `a0' and `a1' is zero or 336| nonzero. The result is broken into two 64-bit pieces which are stored at 337| the locations pointed to by `z0Ptr' and `z1Ptr'. 338*----------------------------------------------------------------------------*/ 339 340LOCALINLINEPROC shift128RightJamming( 341 ui6b a0, ui6b a1, si4r count, ui6b *z0Ptr, ui6b *z1Ptr ) 342{ 343 ui6b z0, z1; 344 si3r negCount = ( - count ) & 63; 345 346 if ( count == 0 ) { 347 z1 = a1; 348 z0 = a0; 349 } 350 else if ( count < 64 ) { 351 z1 = ( a0<<negCount ) | ( a1>>count ) | ( ( a1<<negCount ) != 0 ); 352 z0 = a0>>count; 353 } 354 else { 355 if ( count == 64 ) { 356 z1 = a0 | ( a1 != 0 ); 357 } 358 else if ( count < 128 ) { 359 z1 = ( a0>>( count & 63 ) ) | ( ( ( a0<<negCount ) | a1 ) != 0 ); 360 } 361 else { 362 z1 = ( ( a0 | a1 ) != 0 ); 363 } 364 z0 = 0; 365 } 366 *z1Ptr = z1; 367 *z0Ptr = z0; 368 369} 370 371/*---------------------------------------------------------------------------- 372| Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' right 373| by 64 _plus_ the number of bits given in `count'. The shifted result is 374| at most 128 nonzero bits; these are broken into two 64-bit pieces which are 375| stored at the locations pointed to by `z0Ptr' and `z1Ptr'. The bits shifted 376| off form a third 64-bit result as follows: The _last_ bit shifted off is 377| the most-significant bit of the extra result, and the other 63 bits of the 378| extra result are all zero if and only if _all_but_the_last_ bits shifted off 379| were all zero. This extra result is stored in the location pointed to by 380| `z2Ptr'. The value of `count' can be arbitrarily large. 381| (This routine makes more sense if `a0', `a1', and `a2' are considered 382| to form a fixed-point value with binary point between `a1' and `a2'. This 383| fixed-point value is shifted right by the number of bits given in `count', 384| and the integer part of the result is returned at the locations pointed to 385| by `z0Ptr' and `z1Ptr'. The fractional part of the result may be slightly 386| corrupted as described above, and is returned at the location pointed to by 387| `z2Ptr'.) 388*----------------------------------------------------------------------------*/ 389 390LOCALINLINEPROC shift128ExtraRightJamming( 391 ui6b a0, 392 ui6b a1, 393 ui6b a2, 394 si4r count, 395 ui6b *z0Ptr, 396 ui6b *z1Ptr, 397 ui6b *z2Ptr) 398{ 399 ui6b z0, z1, z2; 400 si3r negCount = ( - count ) & 63; 401 402 if ( count == 0 ) { 403 z2 = a2; 404 z1 = a1; 405 z0 = a0; 406 } 407 else { 408 if ( count < 64 ) { 409 z2 = a1<<negCount; 410 z1 = ( a0<<negCount ) | ( a1>>count ); 411 z0 = a0>>count; 412 } 413 else { 414 if ( count == 64 ) { 415 z2 = a1; 416 z1 = a0; 417 } 418 else { 419 a2 |= a1; 420 if ( count < 128 ) { 421 z2 = a0<<negCount; 422 z1 = a0>>( count & 63 ); 423 } 424 else { 425 z2 = ( count == 128 ) ? a0 : ( a0 != 0 ); 426 z1 = 0; 427 } 428 } 429 z0 = 0; 430 } 431 z2 |= ( a2 != 0 ); 432 } 433 *z2Ptr = z2; 434 *z1Ptr = z1; 435 *z0Ptr = z0; 436 437} 438 439/*---------------------------------------------------------------------------- 440| Shifts the 128-bit value formed by concatenating `a0' and `a1' left by the 441| number of bits given in `count'. Any bits shifted off are lost. The value 442| of `count' must be less than 64. The result is broken into two 64-bit 443| pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'. 444*----------------------------------------------------------------------------*/ 445 446LOCALINLINEPROC shortShift128Left( 447 ui6b a0, ui6b a1, si4r count, ui6b *z0Ptr, ui6b *z1Ptr ) 448{ 449 450 *z1Ptr = a1<<count; 451 *z0Ptr = 452 ( count == 0 ) ? a0 : ( a0<<count ) | ( a1>>( ( - count ) & 63 ) ); 453 454} 455 456/*---------------------------------------------------------------------------- 457| Adds the 128-bit value formed by concatenating `a0' and `a1' to the 128-bit 458| value formed by concatenating `b0' and `b1'. Addition is modulo 2^128, so 459| any carry out is lost. The result is broken into two 64-bit pieces which 460| are stored at the locations pointed to by `z0Ptr' and `z1Ptr'. 461*----------------------------------------------------------------------------*/ 462 463LOCALINLINEPROC add128( 464 ui6b a0, ui6b a1, ui6b b0, ui6b b1, ui6b *z0Ptr, ui6b *z1Ptr ) 465{ 466 ui6b z1; 467 468 z1 = a1 + b1; 469 *z1Ptr = z1; 470 *z0Ptr = a0 + b0 + ( z1 < a1 ); 471} 472 473/*---------------------------------------------------------------------------- 474| Adds the 192-bit value formed by concatenating `a0', `a1', and `a2' to the 475| 192-bit value formed by concatenating `b0', `b1', and `b2'. Addition is 476| modulo 2^192, so any carry out is lost. The result is broken into three 477| 64-bit pieces which are stored at the locations pointed to by `z0Ptr', 478| `z1Ptr', and `z2Ptr'. 479*----------------------------------------------------------------------------*/ 480 481LOCALINLINEPROC add192( 482 ui6b a0, 483 ui6b a1, 484 ui6b a2, 485 ui6b b0, 486 ui6b b1, 487 ui6b b2, 488 ui6b *z0Ptr, 489 ui6b *z1Ptr, 490 ui6b *z2Ptr) 491{ 492 ui6b z0, z1, z2; 493 si3r carry0, carry1; 494 495 z2 = a2 + b2; 496 carry1 = ( z2 < a2 ); 497 z1 = a1 + b1; 498 carry0 = ( z1 < a1 ); 499 z0 = a0 + b0; 500 z1 += carry1; 501 z0 += ( z1 < carry1 ); 502 z0 += carry0; 503 *z2Ptr = z2; 504 *z1Ptr = z1; 505 *z0Ptr = z0; 506 507} 508 509/*---------------------------------------------------------------------------- 510| Subtracts the 128-bit value formed by concatenating `b0' and `b1' from the 511| 128-bit value formed by concatenating `a0' and `a1'. Subtraction is modulo 512| 2^128, so any borrow out (carry out) is lost. The result is broken into two 513| 64-bit pieces which are stored at the locations pointed to by `z0Ptr' and 514| `z1Ptr'. 515*----------------------------------------------------------------------------*/ 516 517LOCALINLINEPROC 518 sub128( 519 ui6b a0, ui6b a1, ui6b b0, ui6b b1, ui6b *z0Ptr, ui6b *z1Ptr ) 520{ 521 522 *z1Ptr = a1 - b1; 523 *z0Ptr = a0 - b0 - ( a1 < b1 ); 524 525} 526 527/*---------------------------------------------------------------------------- 528| Subtracts the 192-bit value formed by concatenating `b0', `b1', and `b2' 529| from the 192-bit value formed by concatenating `a0', `a1', and `a2'. 530| Subtraction is modulo 2^192, so any borrow out (carry out) is lost. The 531| result is broken into three 64-bit pieces which are stored at the locations 532| pointed to by `z0Ptr', `z1Ptr', and `z2Ptr'. 533*----------------------------------------------------------------------------*/ 534 535LOCALINLINEPROC 536 sub192( 537 ui6b a0, 538 ui6b a1, 539 ui6b a2, 540 ui6b b0, 541 ui6b b1, 542 ui6b b2, 543 ui6b *z0Ptr, 544 ui6b *z1Ptr, 545 ui6b *z2Ptr 546 ) 547{ 548 ui6b z0, z1, z2; 549 si3r borrow0, borrow1; 550 551 z2 = a2 - b2; 552 borrow1 = ( a2 < b2 ); 553 z1 = a1 - b1; 554 borrow0 = ( a1 < b1 ); 555 z0 = a0 - b0; 556 z0 -= ( z1 < borrow1 ); 557 z1 -= borrow1; 558 z0 -= borrow0; 559 *z2Ptr = z2; 560 *z1Ptr = z1; 561 *z0Ptr = z0; 562 563} 564 565/*---------------------------------------------------------------------------- 566| Multiplies `a' by `b' to obtain a 128-bit product. The product is broken 567| into two 64-bit pieces which are stored at the locations pointed to by 568| `z0Ptr' and `z1Ptr'. 569*----------------------------------------------------------------------------*/ 570 571 572#ifndef HaveUi5to6Mul 573#define HaveUi5to6Mul 1 574#endif 575 576#if HaveUi5to6Mul 577LOCALINLINEPROC Ui5to6Mul( ui5b src1, ui5b src2, ui6b *z) 578{ 579 *z = ((ui6b) src1) * src2; 580} 581#else 582 583LOCALINLINEPROC Ui6fromHiLo(ui5b hi, ui5b lo, ui6b *z) 584{ 585 *z = (((ui6b)(hi)) << 32) + lo; 586#if 0 587 z->lo = hi; 588 z->hi = lo; 589#endif 590} 591 592LOCALPROC Ui5to6Mul( ui5b src1, ui5b src2, ui6b *z) 593{ 594 ui4b src1_lo = ui5b_lo(src1); 595 ui4b src2_lo = ui5b_lo(src2); 596 ui4b src1_hi = ui5b_hi(src1); 597 ui4b src2_hi = ui5b_hi(src2); 598 599 ui5b r0 = ( (ui6b) src1_lo ) * src2_lo; 600 ui5b r1 = ( (ui6b) src1_hi ) * src2_lo; 601 ui5b r2 = ( (ui6b) src1_lo ) * src2_hi; 602 ui5b r3 = ( (ui6b) src1_hi ) * src2_hi; 603 604 ui5b ra1 = ui5b_hi(r0) + ui5b_lo(r1) + ui5b_lo(r2); 605 606 ui5b lo = (ui5b_lo(ra1) << 16) | ui5b_lo(r0); 607 ui5b hi = ui5b_hi(ra1) + ui5b_hi(r1) + ui5b_hi(r2) + r3; 608 609 Ui6fromHiLo(hi, lo, z); 610} 611 612#endif 613 614 615LOCALINLINEPROC mul64To128( ui6b a, ui6b b, ui6b *z0Ptr, ui6b *z1Ptr ) 616{ 617 ui5b aHigh, aLow, bHigh, bLow; 618 ui6b z0, zMiddleA, zMiddleB, z1; 619 620 aLow = a; 621 aHigh = a>>32; 622 bLow = b; 623 bHigh = b>>32; 624 625 Ui5to6Mul(aLow, bLow, &z1); 626 Ui5to6Mul(aLow, bHigh, &zMiddleA); 627 Ui5to6Mul(aHigh, bLow, &zMiddleB); 628 Ui5to6Mul(aHigh, bHigh, &z0); 629 630 zMiddleA += zMiddleB; 631 z0 += ( ( (ui6b) ( zMiddleA < zMiddleB ) )<<32 ) + ( zMiddleA>>32 ); 632 zMiddleA <<= 32; 633 z1 += zMiddleA; 634 z0 += ( z1 < zMiddleA ); 635 *z1Ptr = z1; 636 *z0Ptr = z0; 637 638} 639 640/*---------------------------------------------------------------------------- 641| Multiplies the 128-bit value formed by concatenating `a0' and `a1' by 642| `b' to obtain a 192-bit product. The product is broken into three 64-bit 643| pieces which are stored at the locations pointed to by `z0Ptr', `z1Ptr', and 644| `z2Ptr'. 645*----------------------------------------------------------------------------*/ 646 647LOCALINLINEPROC 648 mul128By64To192( 649 ui6b a0, 650 ui6b a1, 651 ui6b b, 652 ui6b *z0Ptr, 653 ui6b *z1Ptr, 654 ui6b *z2Ptr 655 ) 656{ 657 ui6b z0, z1, z2, more1; 658 659 mul64To128( a1, b, &z1, &z2 ); 660 mul64To128( a0, b, &z0, &more1 ); 661 add128( z0, more1, 0, z1, &z0, &z1 ); 662 *z2Ptr = z2; 663 *z1Ptr = z1; 664 *z0Ptr = z0; 665 666} 667 668/*---------------------------------------------------------------------------- 669| Multiplies the 128-bit value formed by concatenating `a0' and `a1' to the 670| 128-bit value formed by concatenating `b0' and `b1' to obtain a 256-bit 671| product. The product is broken into four 64-bit pieces which are stored at 672| the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'. 673*----------------------------------------------------------------------------*/ 674 675LOCALINLINEPROC 676 mul128To256( 677 ui6b a0, 678 ui6b a1, 679 ui6b b0, 680 ui6b b1, 681 ui6b *z0Ptr, 682 ui6b *z1Ptr, 683 ui6b *z2Ptr, 684 ui6b *z3Ptr 685 ) 686{ 687 ui6b z0, z1, z2, z3; 688 ui6b more1, more2; 689 690 mul64To128( a1, b1, &z2, &z3 ); 691 mul64To128( a1, b0, &z1, &more2 ); 692 add128( z1, more2, 0, z2, &z1, &z2 ); 693 mul64To128( a0, b0, &z0, &more1 ); 694 add128( z0, more1, 0, z1, &z0, &z1 ); 695 mul64To128( a0, b1, &more1, &more2 ); 696 add128( more1, more2, 0, z2, &more1, &z2 ); 697 add128( z0, z1, 0, more1, &z0, &z1 ); 698 *z3Ptr = z3; 699 *z2Ptr = z2; 700 *z1Ptr = z1; 701 *z0Ptr = z0; 702 703} 704 705/*---------------------------------------------------------------------------- 706| Returns an approximation to the 64-bit integer quotient obtained by dividing 707| `b' into the 128-bit value formed by concatenating `a0' and `a1'. The 708| divisor `b' must be at least 2^63. If q is the exact quotient truncated 709| toward zero, the approximation returned lies between q and q + 2 inclusive. 710| If the exact quotient q is larger than 64 bits, the maximum positive 64-bit 711| unsigned integer is returned. 712*----------------------------------------------------------------------------*/ 713 714#ifndef HaveUi6Div 715#define HaveUi6Div 0 716#endif 717 718#if HaveUi6Div 719#define Ui6Div(x, y) ((x) / (y)) 720#else 721/* 722 Assuming other 64 bit operations available, 723 like compare, subtract, shift. 724*/ 725LOCALFUNC ui6b Ui6Div(ui6b num, ui6b den) 726{ 727 ui6b bit = 1; 728 ui6b res = 0; 729 730 while ((den < num) && bit && ! (den & (LIT64(1) << 63))) { 731 den <<= 1; 732 bit <<= 1; 733 } 734 735 while (bit) { 736 if (num >= den) { 737 num -= den; 738 res |= bit; 739 } 740 bit >>= 1; 741 den >>= 1; 742 } 743 744 return res; 745} 746#endif 747 748LOCALFUNC ui6b estimateDiv128To64( ui6b a0, ui6b a1, ui6b b ) 749{ 750 ui6b b0, b1; 751 ui6b rem0, rem1, term0, term1; 752 ui6b z; 753 754 if ( b <= a0 ) return LIT64( 0xFFFFFFFFFFFFFFFF ); 755 b0 = b>>32; 756 z = ( b0<<32 <= a0 ) ? LIT64( 0xFFFFFFFF00000000 ) : Ui6Div(a0, b0) << 32; 757 mul64To128( b, z, &term0, &term1 ); 758 sub128( a0, a1, term0, term1, &rem0, &rem1 ); 759 while ( ( (si6b) rem0 ) < 0 ) { 760 z -= LIT64( 0x100000000 ); 761 b1 = b<<32; 762 add128( rem0, rem1, b0, b1, &rem0, &rem1 ); 763 } 764 rem0 = ( rem0<<32 ) | ( rem1>>32 ); 765 z |= ( b0<<32 <= rem0 ) ? 0xFFFFFFFF : Ui6Div(rem0, b0); 766 return z; 767 768} 769 770/*---------------------------------------------------------------------------- 771| Returns an approximation to the square root of the 32-bit significand given 772| by `a'. Considered as an integer, `a' must be at least 2^31. If bit 0 of 773| `aExp' (the least significant bit) is 1, the integer returned approximates 774| 2^31*sqrt(`a'/2^31), where `a' is considered an integer. If bit 0 of `aExp' 775| is 0, the integer returned approximates 2^31*sqrt(`a'/2^30). In either 776| case, the approximation returned lies strictly within +/-2 of the exact 777| value. 778*----------------------------------------------------------------------------*/ 779 780LOCALFUNC ui5b estimateSqrt32( si4r aExp, ui5b a ) 781{ 782 static const ui4b sqrtOddAdjustments[] = { 783 0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0, 784 0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67 785 }; 786 static const ui4b sqrtEvenAdjustments[] = { 787 0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E, 788 0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002 789 }; 790 si3r index; 791 ui5b z; 792 793 index = ( a>>27 ) & 15; 794 if ( aExp & 1 ) { 795 z = 0x4000 + ( a>>17 ) - sqrtOddAdjustments[ index ]; 796 z = ( ( a / z )<<14 ) + ( z<<15 ); 797 a >>= 1; 798 } 799 else { 800 z = 0x8000 + ( a>>17 ) - sqrtEvenAdjustments[ index ]; 801 z = a / z + z; 802 z = ( 0x20000 <= z ) ? 0xFFFF8000 : ( z<<15 ); 803 if ( z <= a ) return (ui5b) ( ( (si5b) a )>>1 ); 804 } 805 return ( (ui5b) Ui6Div( ( ( (ui6b) a )<<31 ), z ) ) + ( z>>1 ); 806 807} 808 809/*---------------------------------------------------------------------------- 810| Returns the number of leading 0 bits before the most-significant 1 bit of 811| `a'. If `a' is zero, 32 is returned. 812*----------------------------------------------------------------------------*/ 813 814LOCALFUNC si3r countLeadingZeros32( ui5b a ) 815{ 816 static const si3r countLeadingZerosHigh[] = { 817 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 818 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 819 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 820 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 821 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 822 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 823 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 824 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 825 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 826 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 827 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 828 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 829 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 830 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 831 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 832 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 833 }; 834 si3r shiftCount; 835 836 shiftCount = 0; 837 if ( a < 0x10000 ) { 838 shiftCount += 16; 839 a <<= 16; 840 } 841 if ( a < 0x1000000 ) { 842 shiftCount += 8; 843 a <<= 8; 844 } 845 shiftCount += countLeadingZerosHigh[ a>>24 ]; 846 return shiftCount; 847 848} 849 850/*---------------------------------------------------------------------------- 851| Returns the number of leading 0 bits before the most-significant 1 bit of 852| `a'. If `a' is zero, 64 is returned. 853*----------------------------------------------------------------------------*/ 854 855LOCALFUNC si3r countLeadingZeros64( ui6b a ) 856{ 857 si3r shiftCount; 858 859 shiftCount = 0; 860 if ( a < ( (ui6b) 1 )<<32 ) { 861 shiftCount += 32; 862 } 863 else { 864 a >>= 32; 865 } 866 shiftCount += countLeadingZeros32( a ); 867 return shiftCount; 868 869} 870 871/*---------------------------------------------------------------------------- 872| Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' 873| is equal to the 128-bit value formed by concatenating `b0' and `b1'. 874| Otherwise, returns 0. 875*----------------------------------------------------------------------------*/ 876 877LOCALINLINEFUNC flag eq128( ui6b a0, ui6b a1, ui6b b0, ui6b b1 ) 878{ 879 880 return ( a0 == b0 ) && ( a1 == b1 ); 881 882} 883 884/*---------------------------------------------------------------------------- 885| Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less 886| than or equal to the 128-bit value formed by concatenating `b0' and `b1'. 887| Otherwise, returns 0. 888*----------------------------------------------------------------------------*/ 889 890LOCALINLINEFUNC flag le128( ui6b a0, ui6b a1, ui6b b0, ui6b b1 ) 891{ 892 893 return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 <= b1 ) ); 894 895} 896 897/*---------------------------------------------------------------------------- 898| Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less 899| than the 128-bit value formed by concatenating `b0' and `b1'. Otherwise, 900| returns 0. 901*----------------------------------------------------------------------------*/ 902 903LOCALINLINEFUNC flag lt128( ui6b a0, ui6b a1, ui6b b0, ui6b b1 ) 904{ 905 906 return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 < b1 ) ); 907 908} 909 910/*---------------------------------------------------------------------------- 911| Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is 912| not equal to the 128-bit value formed by concatenating `b0' and `b1'. 913| Otherwise, returns 0. 914*----------------------------------------------------------------------------*/ 915 916#if cIncludeFPUUnused 917LOCALINLINEFUNC flag ne128( ui6b a0, ui6b a1, ui6b b0, ui6b b1 ) 918{ 919 920 return ( a0 != b0 ) || ( a1 != b1 ); 921 922} 923#endif 924 925/* ----- end from original file "softfloat-macros" ----- */ 926 927/*---------------------------------------------------------------------------- 928| Functions and definitions to determine: (1) whether tininess for underflow 929| is detected before or after rounding by default, (2) what (if anything) 930| happens when exceptions are raised, (3) how signaling NaNs are distinguished 931| from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs 932| are propagated from function inputs to output. These details are target- 933| specific. 934*----------------------------------------------------------------------------*/ 935 936/* ----- from original file "softfloat-specialize" ----- */ 937 938/*============================================================================ 939 940This C source fragment is part of the SoftFloat IEC/IEEE Floating-point 941Arithmetic Package, Release 2b. 942 943["original SoftFloat Copyright notice" went here, included near top of this file.] 944 945=============================================================================*/ 946 947/*---------------------------------------------------------------------------- 948| Software IEC/IEEE floating-point underflow tininess-detection mode. 949*----------------------------------------------------------------------------*/ 950enum { 951 float_tininess_after_rounding = 0, 952 float_tininess_before_rounding = 1 953}; 954 955/*---------------------------------------------------------------------------- 956| Underflow tininess-detection mode, statically initialized to default value. 957| (The declaration in `softfloat.h' must match the `si3r' type here.) 958*----------------------------------------------------------------------------*/ 959LOCALVAR si3r float_detect_tininess = float_tininess_after_rounding; 960 961/*---------------------------------------------------------------------------- 962| Routine to raise any or all of the software IEC/IEEE floating-point 963| exception flags. 964*----------------------------------------------------------------------------*/ 965/*---------------------------------------------------------------------------- 966| Raises the exceptions specified by `flags'. Floating-point traps can be 967| defined here if desired. It is currently not possible for such a trap 968| to substitute a result value. If traps are not implemented, this routine 969| should be simply `float_exception_flags |= flags;'. 970*----------------------------------------------------------------------------*/ 971 972LOCALFUNC void float_raise( si3r flags ) 973{ 974 975 float_exception_flags |= flags; 976 977} 978 979/*---------------------------------------------------------------------------- 980| Internal canonical NaN format. 981*----------------------------------------------------------------------------*/ 982typedef struct { 983 flag sign; 984 ui6b high, low; 985} commonNaNT; 986 987/*---------------------------------------------------------------------------- 988| The pattern for a default generated extended double-precision NaN. The 989| `high' and `low' values hold the most- and least-significant bits, 990| respectively. 991*----------------------------------------------------------------------------*/ 992#define floatx80_default_nan_high 0xFFFF 993#define floatx80_default_nan_low LIT64( 0xC000000000000000 ) 994 995/*---------------------------------------------------------------------------- 996| Returns 1 if the extended double-precision floating-point value `a' is a 997| NaN; otherwise returns 0. 998*----------------------------------------------------------------------------*/ 999 1000LOCALFUNC flag floatx80_is_nan( floatx80 a ) 1001{ 1002 1003 return ( ( a.high & 0x7FFF ) == 0x7FFF ) && (ui6b) ( a.low<<1 ); 1004 1005} 1006 1007/*---------------------------------------------------------------------------- 1008| Returns 1 if the extended double-precision floating-point value `a' is a 1009| signaling NaN; otherwise returns 0. 1010*----------------------------------------------------------------------------*/ 1011 1012LOCALFUNC flag floatx80_is_signaling_nan( floatx80 a ) 1013{ 1014 ui6b aLow; 1015 1016 aLow = a.low & ~ LIT64( 0x4000000000000000 ); 1017 return 1018 ( ( a.high & 0x7FFF ) == 0x7FFF ) 1019 && (ui6b) ( aLow<<1 ) 1020 && ( a.low == aLow ); 1021 1022} 1023 1024/*---------------------------------------------------------------------------- 1025| Returns the result of converting the extended double-precision floating- 1026| point NaN `a' to the canonical NaN format. If `a' is a signaling NaN, the 1027| invalid exception is raised. 1028*----------------------------------------------------------------------------*/ 1029 1030LOCALFUNC commonNaNT floatx80ToCommonNaN( floatx80 a ) 1031{ 1032 commonNaNT z; 1033 1034 if ( floatx80_is_signaling_nan( a ) ) float_raise( float_flag_invalid ); 1035 z.sign = a.high>>15; 1036 z.low = 0; 1037 z.high = a.low<<1; 1038 return z; 1039 1040} 1041 1042/*---------------------------------------------------------------------------- 1043| Returns the result of converting the canonical NaN `a' to the extended 1044| double-precision floating-point format. 1045*----------------------------------------------------------------------------*/ 1046 1047LOCALFUNC floatx80 commonNaNToFloatx80( commonNaNT a ) 1048{ 1049 floatx80 z; 1050 1051 z.low = LIT64( 0xC000000000000000 ) | ( a.high>>1 ); 1052 z.high = ( ( (ui4b) a.sign )<<15 ) | 0x7FFF; 1053 return z; 1054 1055} 1056 1057/*---------------------------------------------------------------------------- 1058| Takes two extended double-precision floating-point values `a' and `b', one 1059| of which is a NaN, and returns the appropriate NaN result. If either `a' or 1060| `b' is a signaling NaN, the invalid exception is raised. 1061*----------------------------------------------------------------------------*/ 1062 1063LOCALFUNC floatx80 propagateFloatx80NaN( floatx80 a, floatx80 b ) 1064{ 1065 flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN; 1066 1067 aIsNaN = floatx80_is_nan( a ); 1068 aIsSignalingNaN = floatx80_is_signaling_nan( a ); 1069 bIsNaN = floatx80_is_nan( b ); 1070 bIsSignalingNaN = floatx80_is_signaling_nan( b ); 1071 a.low |= LIT64( 0xC000000000000000 ); 1072 b.low |= LIT64( 0xC000000000000000 ); 1073 if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid ); 1074 if ( aIsSignalingNaN ) { 1075 if ( bIsSignalingNaN ) goto returnLargerSignificand; 1076 return bIsNaN ? b : a; 1077 } 1078 else if ( aIsNaN ) { 1079 if ( bIsSignalingNaN | ! bIsNaN ) return a; 1080 returnLargerSignificand: 1081 if ( a.low < b.low ) return b; 1082 if ( b.low < a.low ) return a; 1083 return ( a.high < b.high ) ? a : b; 1084 } 1085 else { 1086 return b; 1087 } 1088 1089} 1090 1091#ifdef FLOAT128 1092 1093/*---------------------------------------------------------------------------- 1094| The pattern for a default generated quadruple-precision NaN. The `high' and 1095| `low' values hold the most- and least-significant bits, respectively. 1096*----------------------------------------------------------------------------*/ 1097#define float128_default_nan_high LIT64( 0xFFFF800000000000 ) 1098#define float128_default_nan_low LIT64( 0x0000000000000000 ) 1099 1100/*---------------------------------------------------------------------------- 1101| Returns 1 if the quadruple-precision floating-point value `a' is a NaN; 1102| otherwise returns 0. 1103*----------------------------------------------------------------------------*/ 1104 1105LOCALFUNC flag float128_is_nan( float128 a ) 1106{ 1107 1108 return 1109 ( LIT64( 0xFFFE000000000000 ) <= (ui6b) ( a.high<<1 ) ) 1110 && ( a.low || ( a.high & LIT64( 0x0000FFFFFFFFFFFF ) ) ); 1111 1112} 1113 1114/*---------------------------------------------------------------------------- 1115| Returns 1 if the quadruple-precision floating-point value `a' is a 1116| signaling NaN; otherwise returns 0. 1117*----------------------------------------------------------------------------*/ 1118 1119LOCALFUNC flag float128_is_signaling_nan( float128 a ) 1120{ 1121 1122 return 1123 ( ( ( a.high>>47 ) & 0xFFFF ) == 0xFFFE ) 1124 && ( a.low || ( a.high & LIT64( 0x00007FFFFFFFFFFF ) ) ); 1125 1126} 1127 1128/*---------------------------------------------------------------------------- 1129| Returns the result of converting the quadruple-precision floating-point NaN 1130| `a' to the canonical NaN format. If `a' is a signaling NaN, the invalid 1131| exception is raised. 1132*----------------------------------------------------------------------------*/ 1133 1134LOCALFUNC commonNaNT float128ToCommonNaN( float128 a ) 1135{ 1136 commonNaNT z; 1137 1138 if ( float128_is_signaling_nan( a ) ) float_raise( float_flag_invalid ); 1139 z.sign = a.high>>63; 1140 shortShift128Left( a.high, a.low, 16, &z.high, &z.low ); 1141 return z; 1142 1143} 1144 1145/*---------------------------------------------------------------------------- 1146| Returns the result of converting the canonical NaN `a' to the quadruple- 1147| precision floating-point format. 1148*----------------------------------------------------------------------------*/ 1149 1150LOCALFUNC float128 commonNaNToFloat128( commonNaNT a ) 1151{ 1152 float128 z; 1153 1154 shift128Right( a.high, a.low, 16, &z.high, &z.low ); 1155 z.high |= ( ( (ui6b) a.sign )<<63 ) | LIT64( 0x7FFF800000000000 ); 1156 return z; 1157 1158} 1159 1160/*---------------------------------------------------------------------------- 1161| Takes two quadruple-precision floating-point values `a' and `b', one of 1162| which is a NaN, and returns the appropriate NaN result. If either `a' or 1163| `b' is a signaling NaN, the invalid exception is raised. 1164*----------------------------------------------------------------------------*/ 1165 1166LOCALFUNC float128 propagateFloat128NaN( float128 a, float128 b ) 1167{ 1168 flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN; 1169 1170 aIsNaN = float128_is_nan( a ); 1171 aIsSignalingNaN = float128_is_signaling_nan( a ); 1172 bIsNaN = float128_is_nan( b ); 1173 bIsSignalingNaN = float128_is_signaling_nan( b ); 1174 a.high |= LIT64( 0x0000800000000000 ); 1175 b.high |= LIT64( 0x0000800000000000 ); 1176 if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid ); 1177 if ( aIsSignalingNaN ) { 1178 if ( bIsSignalingNaN ) goto returnLargerSignificand; 1179 return bIsNaN ? b : a; 1180 } 1181 else if ( aIsNaN ) { 1182 if ( bIsSignalingNaN | ! bIsNaN ) return a; 1183 returnLargerSignificand: 1184 if ( lt128( a.high<<1, a.low, b.high<<1, b.low ) ) return b; 1185 if ( lt128( b.high<<1, b.low, a.high<<1, a.low ) ) return a; 1186 return ( a.high < b.high ) ? a : b; 1187 } 1188 else { 1189 return b; 1190 } 1191 1192} 1193 1194#endif 1195 1196/* ----- end from original file "softfloat-specialize" ----- */ 1197 1198/* ----- from original file "softfloat.c" ----- */ 1199 1200 1201/*============================================================================ 1202 1203This C source file is part of the SoftFloat IEC/IEEE Floating-point Arithmetic 1204Package, Release 2b. 1205 1206["original SoftFloat Copyright notice" went here, included near top of this file.] 1207 1208=============================================================================*/ 1209 1210/*---------------------------------------------------------------------------- 1211| Takes a 64-bit fixed-point value `absZ' with binary point between bits 6 1212| and 7, and returns the properly rounded 32-bit integer corresponding to the 1213| input. If `zSign' is 1, the input is negated before being converted to an 1214| integer. Bit 63 of `absZ' must be zero. Ordinarily, the fixed-point input 1215| is simply rounded to an integer, with the inexact exception raised if the 1216| input cannot be represented exactly as an integer. However, if the fixed- 1217| point input is too large, the invalid exception is raised and the largest 1218| positive or negative integer is returned. 1219*----------------------------------------------------------------------------*/ 1220 1221LOCALFUNC si5r roundAndPackInt32( flag zSign, ui6b absZ ) 1222{ 1223 si3r roundingMode; 1224 flag roundNearestEven; 1225 si3r roundIncrement, roundBits; 1226 si5r z; 1227 1228 roundingMode = float_rounding_mode; 1229 roundNearestEven = ( roundingMode == float_round_nearest_even ); 1230 roundIncrement = 0x40; 1231 if ( ! roundNearestEven ) { 1232 if ( roundingMode == float_round_to_zero ) { 1233 roundIncrement = 0; 1234 } 1235 else { 1236 roundIncrement = 0x7F; 1237 if ( zSign ) { 1238 if ( roundingMode == float_round_up ) roundIncrement = 0; 1239 } 1240 else { 1241 if ( roundingMode == float_round_down ) roundIncrement = 0; 1242 } 1243 } 1244 } 1245 roundBits = absZ & 0x7F; 1246 absZ = ( absZ + roundIncrement )>>7; 1247 absZ &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven ); 1248 z = absZ; 1249 if ( zSign ) z = - z; 1250 if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) { 1251 float_raise( float_flag_invalid ); 1252 return zSign ? (si5b) 0x80000000 : 0x7FFFFFFF; 1253 } 1254 if ( roundBits ) float_exception_flags |= float_flag_inexact; 1255 return z; 1256 1257} 1258 1259/*---------------------------------------------------------------------------- 1260| Returns the fraction bits of the extended double-precision floating-point 1261| value `a'. 1262*----------------------------------------------------------------------------*/ 1263 1264LOCALINLINEFUNC ui6b extractFloatx80Frac( floatx80 a ) 1265{ 1266 1267 return a.low; 1268 1269} 1270 1271/*---------------------------------------------------------------------------- 1272| Returns the exponent bits of the extended double-precision floating-point 1273| value `a'. 1274*----------------------------------------------------------------------------*/ 1275 1276LOCALINLINEFUNC si5r extractFloatx80Exp( floatx80 a ) 1277{ 1278 1279 return a.high & 0x7FFF; 1280 1281} 1282 1283/*---------------------------------------------------------------------------- 1284| Returns the sign bit of the extended double-precision floating-point value 1285| `a'. 1286*----------------------------------------------------------------------------*/ 1287 1288LOCALINLINEFUNC flag extractFloatx80Sign( floatx80 a ) 1289{ 1290 1291 return a.high>>15; 1292 1293} 1294 1295/*---------------------------------------------------------------------------- 1296| Normalizes the subnormal extended double-precision floating-point value 1297| represented by the denormalized significand `aSig'. The normalized exponent 1298| and significand are stored at the locations pointed to by `zExpPtr' and 1299| `zSigPtr', respectively. 1300*----------------------------------------------------------------------------*/ 1301 1302LOCALPROC 1303 normalizeFloatx80Subnormal( ui6b aSig, si5r *zExpPtr, ui6b *zSigPtr ) 1304{ 1305 si3r shiftCount; 1306 1307 shiftCount = countLeadingZeros64( aSig ); 1308 *zSigPtr = aSig<<shiftCount; 1309 *zExpPtr = 1 - shiftCount; 1310 1311} 1312 1313/*---------------------------------------------------------------------------- 1314| Packs the sign `zSign', exponent `zExp', and significand `zSig' into an 1315| extended double-precision floating-point value, returning the result. 1316*----------------------------------------------------------------------------*/ 1317 1318LOCALINLINEFUNC floatx80 packFloatx80( flag zSign, si5r zExp, ui6b zSig ) 1319{ 1320 floatx80 z; 1321 1322 z.low = zSig; 1323 z.high = ( ( (ui4b) zSign )<<15 ) + zExp; 1324 return z; 1325 1326} 1327 1328/*---------------------------------------------------------------------------- 1329| Takes an abstract floating-point value having sign `zSign', exponent `zExp', 1330| and extended significand formed by the concatenation of `zSig0' and `zSig1', 1331| and returns the proper extended double-precision floating-point value 1332| corresponding to the abstract input. Ordinarily, the abstract value is 1333| rounded and packed into the extended double-precision format, with the 1334| inexact exception raised if the abstract input cannot be represented 1335| exactly. However, if the abstract value is too large, the overflow and 1336| inexact exceptions are raised and an infinity or maximal finite value is 1337| returned. If the abstract value is too small, the input value is rounded to 1338| a subnormal number, and the underflow and inexact exceptions are raised if 1339| the abstract input cannot be represented exactly as a subnormal extended 1340| double-precision floating-point number. 1341| If `roundingPrecision' is 32 or 64, the result is rounded to the same 1342| number of bits as single or double precision, respectively. Otherwise, the 1343| result is rounded to the full precision of the extended double-precision 1344| format. 1345| The input significand must be normalized or smaller. If the input 1346| significand is not normalized, `zExp' must be 0; in that case, the result 1347| returned is a subnormal number, and it must not require rounding. The 1348| handling of underflow and overflow follows the IEC/IEEE Standard for Binary 1349| Floating-Point Arithmetic. 1350*----------------------------------------------------------------------------*/ 1351 1352LOCALFUNC floatx80 1353 roundAndPackFloatx80( 1354 si3r roundingPrecision, flag zSign, si5r zExp, ui6b zSig0, ui6b zSig1 1355 ) 1356{ 1357 si3r roundingMode; 1358 flag roundNearestEven, increment, isTiny; 1359 si6r roundIncrement, roundMask, roundBits; 1360 1361 roundingMode = float_rounding_mode; 1362 roundNearestEven = ( roundingMode == float_round_nearest_even ); 1363 if ( roundingPrecision == 80 ) goto precision80; 1364 if ( roundingPrecision == 64 ) { 1365 roundIncrement = LIT64( 0x0000000000000400 ); 1366 roundMask = LIT64( 0x00000000000007FF ); 1367 } 1368 else if ( roundingPrecision == 32 ) { 1369 roundIncrement = LIT64( 0x0000008000000000 ); 1370 roundMask = LIT64( 0x000000FFFFFFFFFF ); 1371 } 1372 else { 1373 goto precision80; 1374 } 1375 zSig0 |= ( zSig1 != 0 ); 1376 if ( ! roundNearestEven ) { 1377 if ( roundingMode == float_round_to_zero ) { 1378 roundIncrement = 0; 1379 } 1380 else { 1381 roundIncrement = roundMask; 1382 if ( zSign ) { 1383 if ( roundingMode == float_round_up ) roundIncrement = 0; 1384 } 1385 else { 1386 if ( roundingMode == float_round_down ) roundIncrement = 0; 1387 } 1388 } 1389 } 1390 roundBits = zSig0 & roundMask; 1391 if ( 0x7FFD <= (ui5b) ( zExp - 1 ) ) { 1392 if ( ( 0x7FFE < zExp ) 1393 || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) ) 1394 ) { 1395 goto overflow; 1396 } 1397 if ( zExp <= 0 ) { 1398 isTiny = 1399 ( float_detect_tininess == float_tininess_before_rounding ) 1400 || ( zExp < 0 ) 1401 || ( zSig0 <= zSig0 + roundIncrement ); 1402 shift64RightJamming( zSig0, 1 - zExp, &zSig0 ); 1403 zExp = 0; 1404 roundBits = zSig0 & roundMask; 1405 if ( isTiny && roundBits ) float_raise( float_flag_underflow ); 1406 if ( roundBits ) float_exception_flags |= float_flag_inexact; 1407 zSig0 += roundIncrement; 1408 if ( (si6b) zSig0 < 0 ) zExp = 1; 1409 roundIncrement = roundMask + 1; 1410 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) { 1411 roundMask |= roundIncrement; 1412 } 1413 zSig0 &= ~ roundMask; 1414 return packFloatx80( zSign, zExp, zSig0 ); 1415 } 1416 } 1417 if ( roundBits ) float_exception_flags |= float_flag_inexact; 1418 zSig0 += roundIncrement; 1419 if ( zSig0 < roundIncrement ) { 1420 ++zExp; 1421 zSig0 = LIT64( 0x8000000000000000 ); 1422 } 1423 roundIncrement = roundMask + 1; 1424 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) { 1425 roundMask |= roundIncrement; 1426 } 1427 zSig0 &= ~ roundMask; 1428 if ( zSig0 == 0 ) zExp = 0; 1429 return packFloatx80( zSign, zExp, zSig0 ); 1430 precision80: 1431 increment = ( (si6b) zSig1 < 0 ); 1432 if ( ! roundNearestEven ) { 1433 if ( roundingMode == float_round_to_zero ) { 1434 increment = 0; 1435 } 1436 else { 1437 if ( zSign ) { 1438 increment = ( roundingMode == float_round_down ) && zSig1; 1439 } 1440 else { 1441 increment = ( roundingMode == float_round_up ) && zSig1; 1442 } 1443 } 1444 } 1445 if ( 0x7FFD <= (ui5b) ( zExp - 1 ) ) { 1446 if ( ( 0x7FFE < zExp ) 1447 || ( ( zExp == 0x7FFE ) 1448 && ( zSig0 == LIT64( 0xFFFFFFFFFFFFFFFF ) ) 1449 && increment 1450 ) 1451 ) { 1452 roundMask = 0; 1453 overflow: 1454 float_raise( float_flag_overflow | float_flag_inexact ); 1455 if ( ( roundingMode == float_round_to_zero ) 1456 || ( zSign && ( roundingMode == float_round_up ) ) 1457 || ( ! zSign && ( roundingMode == float_round_down ) ) 1458 ) { 1459 return packFloatx80( zSign, 0x7FFE, ~ roundMask ); 1460 } 1461 return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); 1462 } 1463 if ( zExp <= 0 ) { 1464 isTiny = 1465 ( float_detect_tininess == float_tininess_before_rounding ) 1466 || ( zExp < 0 ) 1467 || ! increment 1468 || ( zSig0 < LIT64( 0xFFFFFFFFFFFFFFFF ) ); 1469 shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 ); 1470 zExp = 0; 1471 if ( isTiny && zSig1 ) float_raise( float_flag_underflow ); 1472 if ( zSig1 ) float_exception_flags |= float_flag_inexact; 1473 if ( roundNearestEven ) { 1474 increment = ( (si6b) zSig1 < 0 ); 1475 } 1476 else { 1477 if ( zSign ) { 1478 increment = ( roundingMode == float_round_down ) && zSig1; 1479 } 1480 else { 1481 increment = ( roundingMode == float_round_up ) && zSig1; 1482 } 1483 } 1484 if ( increment ) { 1485 ++zSig0; 1486 zSig0 &= 1487 ~ ( ( (ui6b) ( zSig1<<1 ) == 0 ) & roundNearestEven ); 1488 if ( (si6b) zSig0 < 0 ) zExp = 1; 1489 } 1490 return packFloatx80( zSign, zExp, zSig0 ); 1491 } 1492 } 1493 if ( zSig1 ) float_exception_flags |= float_flag_inexact; 1494 if ( increment ) { 1495 ++zSig0; 1496 if ( zSig0 == 0 ) { 1497 ++zExp; 1498 zSig0 = LIT64( 0x8000000000000000 ); 1499 } 1500 else { 1501 zSig0 &= ~ ( ( (ui6b) ( zSig1<<1 ) == 0 ) & roundNearestEven ); 1502 } 1503 } 1504 else { 1505 if ( zSig0 == 0 ) zExp = 0; 1506 } 1507 return packFloatx80( zSign, zExp, zSig0 ); 1508 1509} 1510 1511/*---------------------------------------------------------------------------- 1512| Takes an abstract floating-point value having sign `zSign', exponent 1513| `zExp', and significand formed by the concatenation of `zSig0' and `zSig1', 1514| and returns the proper extended double-precision floating-point value 1515| corresponding to the abstract input. This routine is just like 1516| `roundAndPackFloatx80' except that the input significand does not have to be 1517| normalized. 1518*----------------------------------------------------------------------------*/ 1519 1520LOCALFUNC floatx80 1521 normalizeRoundAndPackFloatx80( 1522 si3r roundingPrecision, flag zSign, si5r zExp, ui6b zSig0, ui6b zSig1 1523 ) 1524{ 1525 si3r shiftCount; 1526 1527 if ( zSig0 == 0 ) { 1528 zSig0 = zSig1; 1529 zSig1 = 0; 1530 zExp -= 64; 1531 } 1532 shiftCount = countLeadingZeros64( zSig0 ); 1533 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 ); 1534 zExp -= shiftCount; 1535 return 1536 roundAndPackFloatx80( roundingPrecision, zSign, zExp, zSig0, zSig1 ); 1537 1538} 1539 1540#ifdef FLOAT128 1541 1542/*---------------------------------------------------------------------------- 1543| Returns the least-significant 64 fraction bits of the quadruple-precision 1544| floating-point value `a'. 1545*----------------------------------------------------------------------------*/ 1546 1547LOCALINLINEFUNC ui6b extractFloat128Frac1( float128 a ) 1548{ 1549 1550 return a.low; 1551 1552} 1553 1554/*---------------------------------------------------------------------------- 1555| Returns the most-significant 48 fraction bits of the quadruple-precision 1556| floating-point value `a'. 1557*----------------------------------------------------------------------------*/ 1558 1559LOCALINLINEFUNC ui6b extractFloat128Frac0( float128 a ) 1560{ 1561 1562 return a.high & LIT64( 0x0000FFFFFFFFFFFF ); 1563 1564} 1565 1566/*---------------------------------------------------------------------------- 1567| Returns the exponent bits of the quadruple-precision floating-point value 1568| `a'. 1569*----------------------------------------------------------------------------*/ 1570 1571LOCALINLINEFUNC si5r extractFloat128Exp( float128 a ) 1572{ 1573 1574 return ( a.high>>48 ) & 0x7FFF; 1575 1576} 1577 1578/*---------------------------------------------------------------------------- 1579| Returns the sign bit of the quadruple-precision floating-point value `a'. 1580*----------------------------------------------------------------------------*/ 1581 1582LOCALINLINEFUNC flag extractFloat128Sign( float128 a ) 1583{ 1584 1585 return a.high>>63; 1586 1587} 1588 1589/*---------------------------------------------------------------------------- 1590| Normalizes the subnormal quadruple-precision floating-point value 1591| represented by the denormalized significand formed by the concatenation of 1592| `aSig0' and `aSig1'. The normalized exponent is stored at the location 1593| pointed to by `zExpPtr'. The most significant 49 bits of the normalized 1594| significand are stored at the location pointed to by `zSig0Ptr', and the 1595| least significant 64 bits of the normalized significand are stored at the 1596| location pointed to by `zSig1Ptr'. 1597*----------------------------------------------------------------------------*/ 1598 1599LOCALPROC 1600 normalizeFloat128Subnormal( 1601 ui6b aSig0, 1602 ui6b aSig1, 1603 si5r *zExpPtr, 1604 ui6b *zSig0Ptr, 1605 ui6b *zSig1Ptr 1606 ) 1607{ 1608 si3r shiftCount; 1609 1610 if ( aSig0 == 0 ) { 1611 shiftCount = countLeadingZeros64( aSig1 ) - 15; 1612 if ( shiftCount < 0 ) { 1613 *zSig0Ptr = aSig1>>( - shiftCount ); 1614 *zSig1Ptr = aSig1<<( shiftCount & 63 ); 1615 } 1616 else { 1617 *zSig0Ptr = aSig1<<shiftCount; 1618 *zSig1Ptr = 0; 1619 } 1620 *zExpPtr = - shiftCount - 63; 1621 } 1622 else { 1623 shiftCount = countLeadingZeros64( aSig0 ) - 15; 1624 shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr ); 1625 *zExpPtr = 1 - shiftCount; 1626 } 1627 1628} 1629 1630/*---------------------------------------------------------------------------- 1631| Packs the sign `zSign', the exponent `zExp', and the significand formed 1632| by the concatenation of `zSig0' and `zSig1' into a quadruple-precision 1633| floating-point value, returning the result. After being shifted into the 1634| proper positions, the three fields `zSign', `zExp', and `zSig0' are simply 1635| added together to form the most significant 32 bits of the result. This 1636| means that any integer portion of `zSig0' will be added into the exponent. 1637| Since a properly normalized significand will have an integer portion equal 1638| to 1, the `zExp' input should be 1 less than the desired result exponent 1639| whenever `zSig0' and `zSig1' concatenated form a complete, normalized 1640| significand. 1641*----------------------------------------------------------------------------*/ 1642 1643LOCALINLINEFUNC float128 1644 packFloat128( flag zSign, si5r zExp, ui6b zSig0, ui6b zSig1 ) 1645{ 1646 float128 z; 1647 1648 z.low = zSig1; 1649 z.high = ( ( (ui6b) zSign )<<63 ) + ( ( (ui6b) zExp )<<48 ) + zSig0; 1650 return z; 1651 1652} 1653 1654/*---------------------------------------------------------------------------- 1655| Takes an abstract floating-point value having sign `zSign', exponent `zExp', 1656| and extended significand formed by the concatenation of `zSig0', `zSig1', 1657| and `zSig2', and returns the proper quadruple-precision floating-point value 1658| corresponding to the abstract input. Ordinarily, the abstract value is 1659| simply rounded and packed into the quadruple-precision format, with the 1660| inexact exception raised if the abstract input cannot be represented 1661| exactly. However, if the abstract value is too large, the overflow and 1662| inexact exceptions are raised and an infinity or maximal finite value is 1663| returned. If the abstract value is too small, the input value is rounded to 1664| a subnormal number, and the underflow and inexact exceptions are raised if 1665| the abstract input cannot be represented exactly as a subnormal quadruple- 1666| precision floating-point number. 1667| The input significand must be normalized or smaller. If the input 1668| significand is not normalized, `zExp' must be 0; in that case, the result 1669| returned is a subnormal number, and it must not require rounding. In the 1670| usual case that the input significand is normalized, `zExp' must be 1 less 1671| than the ``true'' floating-point exponent. The handling of underflow and 1672| overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 1673*----------------------------------------------------------------------------*/ 1674 1675LOCALFUNC float128 1676 roundAndPackFloat128( 1677 flag zSign, si5r zExp, ui6b zSig0, ui6b zSig1, ui6b zSig2 ) 1678{ 1679 si3r roundingMode; 1680 flag roundNearestEven, increment, isTiny; 1681 1682 roundingMode = float_rounding_mode; 1683 roundNearestEven = ( roundingMode == float_round_nearest_even ); 1684 increment = ( (si6b) zSig2 < 0 ); 1685 if ( ! roundNearestEven ) { 1686 if ( roundingMode == float_round_to_zero ) { 1687 increment = 0; 1688 } 1689 else { 1690 if ( zSign ) { 1691 increment = ( roundingMode == float_round_down ) && zSig2; 1692 } 1693 else { 1694 increment = ( roundingMode == float_round_up ) && zSig2; 1695 } 1696 } 1697 } 1698 if ( 0x7FFD <= (ui5b) zExp ) { 1699 if ( ( 0x7FFD < zExp ) 1700 || ( ( zExp == 0x7FFD ) 1701 && eq128( 1702 LIT64( 0x0001FFFFFFFFFFFF ), 1703 LIT64( 0xFFFFFFFFFFFFFFFF ), 1704 zSig0, 1705 zSig1 1706 ) 1707 && increment 1708 ) 1709 ) { 1710 float_raise( float_flag_overflow | float_flag_inexact ); 1711 if ( ( roundingMode == float_round_to_zero ) 1712 || ( zSign && ( roundingMode == float_round_up ) ) 1713 || ( ! zSign && ( roundingMode == float_round_down ) ) 1714 ) { 1715 return 1716 packFloat128( 1717 zSign, 1718 0x7FFE, 1719 LIT64( 0x0000FFFFFFFFFFFF ), 1720 LIT64( 0xFFFFFFFFFFFFFFFF ) 1721 ); 1722 } 1723 return packFloat128( zSign, 0x7FFF, 0, 0 ); 1724 } 1725 if ( zExp < 0 ) { 1726 isTiny = 1727 ( float_detect_tininess == float_tininess_before_rounding ) 1728 || ( zExp < -1 ) 1729 || ! increment 1730 || lt128( 1731 zSig0, 1732 zSig1, 1733 LIT64( 0x0001FFFFFFFFFFFF ), 1734 LIT64( 0xFFFFFFFFFFFFFFFF ) 1735 ); 1736 shift128ExtraRightJamming( 1737 zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 ); 1738 zExp = 0; 1739 if ( isTiny && zSig2 ) float_raise( float_flag_underflow ); 1740 if ( roundNearestEven ) { 1741 increment = ( (si6b) zSig2 < 0 ); 1742 } 1743 else { 1744 if ( zSign ) { 1745 increment = ( roundingMode == float_round_down ) && zSig2; 1746 } 1747 else { 1748 increment = ( roundingMode == float_round_up ) && zSig2; 1749 } 1750 } 1751 } 1752 } 1753 if ( zSig2 ) float_exception_flags |= float_flag_inexact; 1754 if ( increment ) { 1755 add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 ); 1756 zSig1 &= ~ ( ( zSig2 + zSig2 == 0 ) & roundNearestEven ); 1757 } 1758 else { 1759 if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0; 1760 } 1761 return packFloat128( zSign, zExp, zSig0, zSig1 ); 1762 1763} 1764 1765/*---------------------------------------------------------------------------- 1766| Takes an abstract floating-point value having sign `zSign', exponent `zExp', 1767| and significand formed by the concatenation of `zSig0' and `zSig1', and 1768| returns the proper quadruple-precision floating-point value corresponding 1769| to the abstract input. This routine is just like `roundAndPackFloat128' 1770| except that the input significand has fewer bits and does not have to be 1771| normalized. In all cases, `zExp' must be 1 less than the ``true'' floating- 1772| point exponent. 1773*----------------------------------------------------------------------------*/ 1774 1775LOCALFUNC float128 1776 normalizeRoundAndPackFloat128( 1777 flag zSign, si5r zExp, ui6b zSig0, ui6b zSig1 ) 1778{ 1779 si3r shiftCount; 1780 ui6b zSig2; 1781 1782 if ( zSig0 == 0 ) { 1783 zSig0 = zSig1; 1784 zSig1 = 0; 1785 zExp -= 64; 1786 } 1787 shiftCount = countLeadingZeros64( zSig0 ) - 15; 1788 if ( 0 <= shiftCount ) { 1789 zSig2 = 0; 1790 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 ); 1791 } 1792 else { 1793 shift128ExtraRightJamming( 1794 zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 ); 1795 } 1796 zExp -= shiftCount; 1797 return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 ); 1798 1799} 1800 1801#endif 1802 1803/*---------------------------------------------------------------------------- 1804| Returns the result of converting the 32-bit two's complement integer `a' 1805| to the extended double-precision floating-point format. The conversion 1806| is performed according to the IEC/IEEE Standard for Binary Floating-Point 1807| Arithmetic. 1808*----------------------------------------------------------------------------*/ 1809 1810LOCALFUNC floatx80 int32_to_floatx80( si5r a ) 1811{ 1812 flag zSign; 1813 ui5r absA; 1814 si3r shiftCount; 1815 ui6b zSig; 1816 1817 if ( a == 0 ) return packFloatx80( 0, 0, 0 ); 1818 zSign = ( a < 0 ); 1819 absA = zSign ? - a : a; 1820 shiftCount = countLeadingZeros32( absA ) + 32; 1821 zSig = absA; 1822 return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount ); 1823 1824} 1825 1826/*---------------------------------------------------------------------------- 1827| Returns the result of converting the extended double-precision floating- 1828| point value `a' to the 32-bit two's complement integer format. The 1829| conversion is performed according to the IEC/IEEE Standard for Binary 1830| Floating-Point Arithmetic---which means in particular that the conversion 1831| is rounded according to the current rounding mode. If `a' is a NaN, the 1832| largest positive integer is returned. Otherwise, if the conversion 1833| overflows, the largest integer with the same sign as `a' is returned. 1834*----------------------------------------------------------------------------*/ 1835 1836LOCALFUNC si5r floatx80_to_int32( floatx80 a ) 1837{ 1838 flag aSign; 1839 si5r aExp, shiftCount; 1840 ui6b aSig; 1841 1842 aSig = extractFloatx80Frac( a ); 1843 aExp = extractFloatx80Exp( a ); 1844 aSign = extractFloatx80Sign( a ); 1845 if ( ( aExp == 0x7FFF ) && (ui6b) ( aSig<<1 ) ) aSign = 0; 1846 shiftCount = 0x4037 - aExp; 1847 if ( shiftCount <= 0 ) shiftCount = 1; 1848 shift64RightJamming( aSig, shiftCount, &aSig ); 1849 return roundAndPackInt32( aSign, aSig ); 1850 1851} 1852 1853/*---------------------------------------------------------------------------- 1854| Returns the result of converting the extended double-precision floating- 1855| point value `a' to the 32-bit two's complement integer format. The 1856| conversion is performed according to the IEC/IEEE Standard for Binary 1857| Floating-Point Arithmetic, except that the conversion is always rounded 1858| toward zero. If `a' is a NaN, the largest positive integer is returned. 1859| Otherwise, if the conversion overflows, the largest integer with the same 1860| sign as `a' is returned. 1861*----------------------------------------------------------------------------*/ 1862 1863#if cIncludeFPUUnused 1864LOCALFUNC si5r floatx80_to_int32_round_to_zero( floatx80 a ) 1865{ 1866 flag aSign; 1867 si5r aExp, shiftCount; 1868 ui6b aSig, savedASig; 1869 si5r z; 1870 1871 aSig = extractFloatx80Frac( a ); 1872 aExp = extractFloatx80Exp( a ); 1873 aSign = extractFloatx80Sign( a ); 1874 if ( 0x401E < aExp ) { 1875 if ( ( aExp == 0x7FFF ) && (ui6b) ( aSig<<1 ) ) aSign = 0; 1876 goto invalid; 1877 } 1878 else if ( aExp < 0x3FFF ) { 1879 if ( aExp || aSig ) float_exception_flags |= float_flag_inexact; 1880 return 0; 1881 } 1882 shiftCount = 0x403E - aExp; 1883 savedASig = aSig; 1884 aSig >>= shiftCount; 1885 z = aSig; 1886 if ( aSign ) z = - z; 1887 if ( ( z < 0 ) ^ aSign ) { 1888 invalid: 1889 float_raise( float_flag_invalid ); 1890 return aSign ? (si5b) 0x80000000 : 0x7FFFFFFF; 1891 } 1892 if ( ( aSig<<shiftCount ) != savedASig ) { 1893 float_exception_flags |= float_flag_inexact; 1894 } 1895 return z; 1896 1897} 1898#endif 1899 1900#ifdef FLOAT128 1901 1902/*---------------------------------------------------------------------------- 1903| Returns the result of converting the extended double-precision floating- 1904| point value `a' to the quadruple-precision floating-point format. The 1905| conversion is performed according to the IEC/IEEE Standard for Binary 1906| Floating-Point Arithmetic. 1907*----------------------------------------------------------------------------*/ 1908 1909LOCALFUNC float128 floatx80_to_float128( floatx80 a ) 1910{ 1911 flag aSign; 1912 si4r aExp; 1913 ui6b aSig, zSig0, zSig1; 1914 1915 aSig = extractFloatx80Frac( a ); 1916 aExp = extractFloatx80Exp( a ); 1917 aSign = extractFloatx80Sign( a ); 1918 if ( ( aExp == 0x7FFF ) && (ui6b) ( aSig<<1 ) ) { 1919 return commonNaNToFloat128( floatx80ToCommonNaN( a ) ); 1920 } 1921 shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 ); 1922 return packFloat128( aSign, aExp, zSig0, zSig1 ); 1923 1924} 1925 1926#endif 1927 1928/*---------------------------------------------------------------------------- 1929| Rounds the extended double-precision floating-point value `a' to an integer, 1930| and returns the result as an extended quadruple-precision floating-point 1931| value. The operation is performed according to the IEC/IEEE Standard for 1932| Binary Floating-Point Arithmetic. 1933*----------------------------------------------------------------------------*/ 1934 1935LOCALFUNC floatx80 floatx80_round_to_int( floatx80 a ) 1936{ 1937 flag aSign; 1938 si5r aExp; 1939 ui6b lastBitMask, roundBitsMask; 1940 si3r roundingMode; 1941 floatx80 z; 1942 1943 aExp = extractFloatx80Exp( a ); 1944 if ( 0x403E <= aExp ) { 1945 if ( ( aExp == 0x7FFF ) && (ui6b) ( extractFloatx80Frac( a )<<1 ) ) { 1946 return propagateFloatx80NaN( a, a ); 1947 } 1948 return a; 1949 } 1950 if ( aExp < 0x3FFF ) { 1951 if ( ( aExp == 0 ) 1952 && ( (ui6b) ( extractFloatx80Frac( a )<<1 ) == 0 ) ) { 1953 return a; 1954 } 1955 float_exception_flags |= float_flag_inexact; 1956 aSign = extractFloatx80Sign( a ); 1957 switch ( float_rounding_mode ) { 1958 case float_round_nearest_even: 1959 if ( ( aExp == 0x3FFE ) && (ui6b) ( extractFloatx80Frac( a )<<1 ) 1960 ) { 1961 return 1962 packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) ); 1963 } 1964 break; 1965 case float_round_down: 1966 return 1967 aSign ? 1968 packFloatx80( 1, 0x3FFF, LIT64( 0x8000000000000000 ) ) 1969 : packFloatx80( 0, 0, 0 ); 1970 case float_round_up: 1971 return 1972 aSign ? packFloatx80( 1, 0, 0 ) 1973 : packFloatx80( 0, 0x3FFF, LIT64( 0x8000000000000000 ) ); 1974 } 1975 return packFloatx80( aSign, 0, 0 ); 1976 } 1977 lastBitMask = 1; 1978 lastBitMask <<= 0x403E - aExp; 1979 roundBitsMask = lastBitMask - 1; 1980 z = a; 1981 roundingMode = float_rounding_mode; 1982 if ( roundingMode == float_round_nearest_even ) { 1983 z.low += lastBitMask>>1; 1984 if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask; 1985 } 1986 else if ( roundingMode != float_round_to_zero ) { 1987 if ( extractFloatx80Sign( z ) ^ ( roundingMode == float_round_up ) ) { 1988 z.low += roundBitsMask; 1989 } 1990 } 1991 z.low &= ~ roundBitsMask; 1992 if ( z.low == 0 ) { 1993 ++z.high; 1994 z.low = LIT64( 0x8000000000000000 ); 1995 } 1996 if ( z.low != a.low ) float_exception_flags |= float_flag_inexact; 1997 return z; 1998 1999} 2000 2001/*---------------------------------------------------------------------------- 2002| Returns the result of adding the absolute values of the extended double- 2003| precision floating-point values `a' and `b'. If `zSign' is 1, the sum is 2004| negated before being returned. `zSign' is ignored if the result is a NaN. 2005| The addition is performed according to the IEC/IEEE Standard for Binary 2006| Floating-Point Arithmetic. 2007*----------------------------------------------------------------------------*/ 2008 2009LOCALFUNC floatx80 addFloatx80Sigs( floatx80 a, floatx80 b, flag zSign ) 2010{ 2011 si5r aExp, bExp, zExp; 2012 ui6b aSig, bSig, zSig0, zSig1; 2013 si5r expDiff; 2014 2015 aSig = extractFloatx80Frac( a ); 2016 aExp = extractFloatx80Exp( a ); 2017 bSig = extractFloatx80Frac( b ); 2018 bExp = extractFloatx80Exp( b ); 2019 expDiff = aExp - bExp; 2020 if ( 0 < expDiff ) { 2021 if ( aExp == 0x7FFF ) { 2022 if ( (ui6b) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b ); 2023 return a; 2024 } 2025 if ( bExp == 0 ) --expDiff; 2026 shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 ); 2027 zExp = aExp; 2028 } 2029 else if ( expDiff < 0 ) { 2030 if ( bExp == 0x7FFF ) { 2031 if ( (ui6b) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b ); 2032 return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); 2033 } 2034 if ( aExp == 0 ) ++expDiff; 2035 shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 ); 2036 zExp = bExp; 2037 } 2038 else { 2039 if ( aExp == 0x7FFF ) { 2040 if ( (ui6b) ( ( aSig | bSig )<<1 ) ) { 2041 return propagateFloatx80NaN( a, b ); 2042 } 2043 return a; 2044 } 2045 zSig1 = 0; 2046 zSig0 = aSig + bSig; 2047 if ( aExp == 0 ) { 2048 normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 ); 2049 goto roundAndPack; 2050 } 2051 zExp = aExp; 2052 goto shiftRight1; 2053 } 2054 zSig0 = aSig + bSig; 2055 if ( (si6b) zSig0 < 0 ) goto roundAndPack; 2056 shiftRight1: 2057 shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 ); 2058 zSig0 |= LIT64( 0x8000000000000000 ); 2059 ++zExp; 2060 roundAndPack: 2061 return 2062 roundAndPackFloatx80( 2063 floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 ); 2064 2065} 2066 2067/*---------------------------------------------------------------------------- 2068| Returns the result of subtracting the absolute values of the extended 2069| double-precision floating-point values `a' and `b'. If `zSign' is 1, the 2070| difference is negated before being returned. `zSign' is ignored if the 2071| result is a NaN. The subtraction is performed according to the IEC/IEEE 2072| Standard for Binary Floating-Point Arithmetic. 2073*----------------------------------------------------------------------------*/ 2074 2075LOCALFUNC floatx80 subFloatx80Sigs( floatx80 a, floatx80 b, flag zSign ) 2076{ 2077 si5r aExp, bExp, zExp; 2078 ui6b aSig, bSig, zSig0, zSig1; 2079 si5r expDiff; 2080 floatx80 z; 2081 2082 aSig = extractFloatx80Frac( a ); 2083 aExp = extractFloatx80Exp( a ); 2084 bSig = extractFloatx80Frac( b ); 2085 bExp = extractFloatx80Exp( b ); 2086 expDiff = aExp - bExp; 2087 if ( 0 < expDiff ) goto aExpBigger; 2088 if ( expDiff < 0 ) goto bExpBigger; 2089 if ( aExp == 0x7FFF ) { 2090 if ( (ui6b) ( ( aSig | bSig )<<1 ) ) { 2091 return propagateFloatx80NaN( a, b ); 2092 } 2093 float_raise( float_flag_invalid ); 2094 z.low = floatx80_default_nan_low; 2095 z.high = floatx80_default_nan_high; 2096 return z; 2097 } 2098 if ( aExp == 0 ) { 2099 aExp = 1; 2100 bExp = 1; 2101 } 2102 zSig1 = 0; 2103 if ( bSig < aSig ) goto aBigger; 2104 if ( aSig < bSig ) goto bBigger; 2105 return packFloatx80( float_rounding_mode == float_round_down, 0, 0 ); 2106 bExpBigger: 2107 if ( bExp == 0x7FFF ) { 2108 if ( (ui6b) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b ); 2109 return packFloatx80( zSign ^ 1, 0x7FFF, LIT64( 0x8000000000000000 ) ); 2110 } 2111 if ( aExp == 0 ) ++expDiff; 2112 shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 ); 2113 bBigger: 2114 sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 ); 2115 zExp = bExp; 2116 zSign ^= 1; 2117 goto normalizeRoundAndPack; 2118 aExpBigger: 2119 if ( aExp == 0x7FFF ) { 2120 if ( (ui6b) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b ); 2121 return a; 2122 } 2123 if ( bExp == 0 ) --expDiff; 2124 shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 ); 2125 aBigger: 2126 sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 ); 2127 zExp = aExp; 2128 normalizeRoundAndPack: 2129 return 2130 normalizeRoundAndPackFloatx80( 2131 floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 ); 2132 2133} 2134 2135/*---------------------------------------------------------------------------- 2136| Returns the result of adding the extended double-precision floating-point 2137| values `a' and `b'. The operation is performed according to the IEC/IEEE 2138| Standard for Binary Floating-Point Arithmetic. 2139*----------------------------------------------------------------------------*/ 2140 2141LOCALFUNC floatx80 floatx80_add( floatx80 a, floatx80 b ) 2142{ 2143 flag aSign, bSign; 2144 2145 aSign = extractFloatx80Sign( a ); 2146 bSign = extractFloatx80Sign( b ); 2147 if ( aSign == bSign ) { 2148 return addFloatx80Sigs( a, b, aSign ); 2149 } 2150 else { 2151 return subFloatx80Sigs( a, b, aSign ); 2152 } 2153 2154} 2155 2156/*---------------------------------------------------------------------------- 2157| Returns the result of subtracting the extended double-precision floating- 2158| point values `a' and `b'. The operation is performed according to the 2159| IEC/IEEE Standard for Binary Floating-Point Arithmetic. 2160*----------------------------------------------------------------------------*/ 2161 2162LOCALFUNC floatx80 floatx80_sub( floatx80 a, floatx80 b ) 2163{ 2164 flag aSign, bSign; 2165 2166 aSign = extractFloatx80Sign( a ); 2167 bSign = extractFloatx80Sign( b ); 2168 if ( aSign == bSign ) { 2169 return subFloatx80Sigs( a, b, aSign ); 2170 } 2171 else { 2172 return addFloatx80Sigs( a, b, aSign ); 2173 } 2174 2175} 2176 2177/*---------------------------------------------------------------------------- 2178| Returns the result of multiplying the extended double-precision floating- 2179| point values `a' and `b'. The operation is performed according to the 2180| IEC/IEEE Standard for Binary Floating-Point Arithmetic. 2181*----------------------------------------------------------------------------*/ 2182 2183LOCALFUNC floatx80 floatx80_mul( floatx80 a, floatx80 b ) 2184{ 2185 flag aSign, bSign, zSign; 2186 si5r aExp, bExp, zExp; 2187 ui6b aSig, bSig, zSig0, zSig1; 2188 floatx80 z; 2189 2190 aSig = extractFloatx80Frac( a ); 2191 aExp = extractFloatx80Exp( a ); 2192 aSign = extractFloatx80Sign( a ); 2193 bSig = extractFloatx80Frac( b ); 2194 bExp = extractFloatx80Exp( b ); 2195 bSign = extractFloatx80Sign( b ); 2196 zSign = aSign ^ bSign; 2197 if ( aExp == 0x7FFF ) { 2198 if ( (ui6b) ( aSig<<1 ) 2199 || ( ( bExp == 0x7FFF ) && (ui6b) ( bSig<<1 ) ) ) { 2200 return propagateFloatx80NaN( a, b ); 2201 } 2202 if ( ( bExp | bSig ) == 0 ) goto invalid; 2203 return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); 2204 } 2205 if ( bExp == 0x7FFF ) { 2206 if ( (ui6b) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b ); 2207 if ( ( aExp | aSig ) == 0 ) { 2208 invalid: 2209 float_raise( float_flag_invalid ); 2210 z.low = floatx80_default_nan_low; 2211 z.high = floatx80_default_nan_high; 2212 return z; 2213 } 2214 return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); 2215 } 2216 if ( aExp == 0 ) { 2217 if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 ); 2218 normalizeFloatx80Subnormal( aSig, &aExp, &aSig ); 2219 } 2220 if ( bExp == 0 ) { 2221 if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 ); 2222 normalizeFloatx80Subnormal( bSig, &bExp, &bSig ); 2223 } 2224 zExp = aExp + bExp - 0x3FFE; 2225 mul64To128( aSig, bSig, &zSig0, &zSig1 ); 2226 if ( 0 < (si6b) zSig0 ) { 2227 shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 ); 2228 --zExp; 2229 } 2230 return 2231 roundAndPackFloatx80( 2232 floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 ); 2233 2234} 2235 2236/*---------------------------------------------------------------------------- 2237| Returns the result of dividing the extended double-precision floating-point 2238| value `a' by the corresponding value `b'. The operation is performed 2239| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 2240*----------------------------------------------------------------------------*/ 2241 2242LOCALFUNC floatx80 floatx80_div( floatx80 a, floatx80 b ) 2243{ 2244 flag aSign, bSign, zSign; 2245 si5r aExp, bExp, zExp; 2246 ui6b aSig, bSig, zSig0, zSig1; 2247 ui6b rem0, rem1, rem2, term0, term1, term2; 2248 floatx80 z; 2249 2250 aSig = extractFloatx80Frac( a ); 2251 aExp = extractFloatx80Exp( a ); 2252 aSign = extractFloatx80Sign( a ); 2253 bSig = extractFloatx80Frac( b ); 2254 bExp = extractFloatx80Exp( b ); 2255 bSign = extractFloatx80Sign( b ); 2256 zSign = aSign ^ bSign; 2257 if ( aExp == 0x7FFF ) { 2258 if ( (ui6b) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b ); 2259 if ( bExp == 0x7FFF ) { 2260 if ( (ui6b) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b ); 2261 goto invalid; 2262 } 2263 return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); 2264 } 2265 if ( bExp == 0x7FFF ) { 2266 if ( (ui6b) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b ); 2267 return packFloatx80( zSign, 0, 0 ); 2268 } 2269 if ( bExp == 0 ) { 2270 if ( bSig == 0 ) { 2271 if ( ( aExp | aSig ) == 0 ) { 2272 invalid: 2273 float_raise( float_flag_invalid ); 2274 z.low = floatx80_default_nan_low; 2275 z.high = floatx80_default_nan_high; 2276 return z; 2277 } 2278 float_raise( float_flag_divbyzero ); 2279 return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); 2280 } 2281 normalizeFloatx80Subnormal( bSig, &bExp, &bSig ); 2282 } 2283 if ( aExp == 0 ) { 2284 if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 ); 2285 normalizeFloatx80Subnormal( aSig, &aExp, &aSig ); 2286 } 2287 if ( aSig == 0 ) { 2288 /* 2289 added by pcp. this invalid input seems to 2290 cause hang in estimateDiv128To64. should 2291 validate inputs generally. 2292 */ 2293 return packFloatx80( zSign, 0, 0 ); 2294 } 2295 if ( (si6b) bSig >= 0 ) { 2296 /* 2297 added by pcp. another check. 2298 invalid input, most significant bit should be set? 2299 */ 2300 return packFloatx80( zSign, 0, 0 ); 2301 } 2302 zExp = aExp - bExp + 0x3FFE; 2303 rem1 = 0; 2304 if ( bSig <= aSig ) { 2305 shift128Right( aSig, 0, 1, &aSig, &rem1 ); 2306 ++zExp; 2307 } 2308 zSig0 = estimateDiv128To64( aSig, rem1, bSig ); 2309 mul64To128( bSig, zSig0, &term0, &term1 ); 2310 sub128( aSig, rem1, term0, term1, &rem0, &rem1 ); 2311 while ( (si6b) rem0 < 0 ) { 2312 --zSig0; 2313 add128( rem0, rem1, 0, bSig, &rem0, &rem1 ); 2314 } 2315 zSig1 = estimateDiv128To64( rem1, 0, bSig ); 2316 if ( (ui6b) ( zSig1<<1 ) <= 8 ) { 2317 mul64To128( bSig, zSig1, &term1, &term2 ); 2318 sub128( rem1, 0, term1, term2, &rem1, &rem2 ); 2319 while ( (si6b) rem1 < 0 ) { 2320 --zSig1; 2321 add128( rem1, rem2, 0, bSig, &rem1, &rem2 ); 2322 } 2323 zSig1 |= ( ( rem1 | rem2 ) != 0 ); 2324 } 2325 return 2326 roundAndPackFloatx80( 2327 floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 ); 2328 2329} 2330 2331/*---------------------------------------------------------------------------- 2332| Returns the remainder of the extended double-precision floating-point value 2333| `a' with respect to the corresponding value `b'. The operation is performed 2334| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 2335*----------------------------------------------------------------------------*/ 2336 2337LOCALFUNC floatx80 floatx80_rem( floatx80 a, floatx80 b ) 2338{ 2339 flag aSign, /* bSign, */ zSign; 2340 si5r aExp, bExp, expDiff; 2341 ui6b aSig0, aSig1, bSig; 2342 ui6b q, term0, term1, alternateASig0, alternateASig1; 2343 floatx80 z; 2344 2345 aSig0 = extractFloatx80Frac( a ); 2346 aExp = extractFloatx80Exp( a ); 2347 aSign = extractFloatx80Sign( a ); 2348 bSig = extractFloatx80Frac( b ); 2349 bExp = extractFloatx80Exp( b ); 2350 /* 2351 not used. should it be? 2352 bSign = extractFloatx80Sign( b ); 2353 */ 2354 if ( aExp == 0x7FFF ) { 2355 if ( (ui6b) ( aSig0<<1 ) 2356 || ( ( bExp == 0x7FFF ) && (ui6b) ( bSig<<1 ) ) ) { 2357 return propagateFloatx80NaN( a, b ); 2358 } 2359 goto invalid; 2360 } 2361 if ( bExp == 0x7FFF ) { 2362 if ( (ui6b) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b ); 2363 return a; 2364 } 2365 if ( bExp == 0 ) { 2366 if ( bSig == 0 ) { 2367 invalid: 2368 float_raise( float_flag_invalid ); 2369 z.low = floatx80_default_nan_low; 2370 z.high = floatx80_default_nan_high; 2371 return z; 2372 } 2373 normalizeFloatx80Subnormal( bSig, &bExp, &bSig ); 2374 } 2375 if ( aExp == 0 ) { 2376 if ( (ui6b) ( aSig0<<1 ) == 0 ) return a; 2377 normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 ); 2378 } 2379 bSig |= LIT64( 0x8000000000000000 ); 2380 zSign = aSign; 2381 expDiff = aExp - bExp; 2382 aSig1 = 0; 2383 if ( expDiff < 0 ) { 2384 if ( expDiff < -1 ) return a; 2385 shift128Right( aSig0, 0, 1, &aSig0, &aSig1 ); 2386 expDiff = 0; 2387 } 2388 q = ( bSig <= aSig0 ); 2389 if ( q ) aSig0 -= bSig; 2390 expDiff -= 64; 2391 while ( 0 < expDiff ) { 2392 q = estimateDiv128To64( aSig0, aSig1, bSig ); 2393 q = ( 2 < q ) ? q - 2 : 0; 2394 mul64To128( bSig, q, &term0, &term1 ); 2395 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 ); 2396 shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 ); 2397 expDiff -= 62; 2398 } 2399 expDiff += 64; 2400 if ( 0 < expDiff ) { 2401 q = estimateDiv128To64( aSig0, aSig1, bSig ); 2402 q = ( 2 < q ) ? q - 2 : 0; 2403 q >>= 64 - expDiff; 2404 mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 ); 2405 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 ); 2406 shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 ); 2407 while ( le128( term0, term1, aSig0, aSig1 ) ) { 2408 ++q; 2409 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 ); 2410 } 2411 } 2412 else { 2413 term1 = 0; 2414 term0 = bSig; 2415 } 2416 sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 ); 2417 if ( lt128( alternateASig0, alternateASig1, aSig0, aSig1 ) 2418 || ( eq128( alternateASig0, alternateASig1, aSig0, aSig1 ) 2419 && ( q & 1 ) ) 2420 ) { 2421 aSig0 = alternateASig0; 2422 aSig1 = alternateASig1; 2423 zSign = ! zSign; 2424 } 2425 return 2426 normalizeRoundAndPackFloatx80( 2427 80, zSign, bExp + expDiff, aSig0, aSig1 ); 2428 2429} 2430 2431/*---------------------------------------------------------------------------- 2432| Returns the square root of the extended double-precision floating-point 2433| value `a'. The operation is performed according to the IEC/IEEE Standard 2434| for Binary Floating-Point Arithmetic. 2435*----------------------------------------------------------------------------*/ 2436 2437LOCALFUNC floatx80 floatx80_sqrt( floatx80 a ) 2438{ 2439 flag aSign; 2440 si5r aExp, zExp; 2441 ui6b aSig0, aSig1, zSig0, zSig1, doubleZSig0; 2442 ui6b rem0, rem1, rem2, rem3, term0, term1, term2, term3; 2443 floatx80 z; 2444 2445 aSig0 = extractFloatx80Frac( a ); 2446 aExp = extractFloatx80Exp( a ); 2447 aSign = extractFloatx80Sign( a ); 2448 if ( aExp == 0x7FFF ) { 2449 if ( (ui6b) ( aSig0<<1 ) ) return propagateFloatx80NaN( a, a ); 2450 if ( ! aSign ) return a; 2451 goto invalid; 2452 } 2453 if ( aSign ) { 2454 if ( ( aExp | aSig0 ) == 0 ) return a; 2455 invalid: 2456 float_raise( float_flag_invalid ); 2457 z.low = floatx80_default_nan_low; 2458 z.high = floatx80_default_nan_high; 2459 return z; 2460 } 2461 if ( aExp == 0 ) { 2462 if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 ); 2463 normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 ); 2464 } 2465 zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF; 2466 zSig0 = estimateSqrt32( aExp, aSig0>>32 ); 2467 shift128Right( aSig0, 0, 2 + ( aExp & 1 ), &aSig0, &aSig1 ); 2468 zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 ); 2469 doubleZSig0 = zSig0<<1; 2470 mul64To128( zSig0, zSig0, &term0, &term1 ); 2471 sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 ); 2472 while ( (si6b) rem0 < 0 ) { 2473 --zSig0; 2474 doubleZSig0 -= 2; 2475 add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 ); 2476 } 2477 zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 ); 2478 if ( ( zSig1 & LIT64( 0x3FFFFFFFFFFFFFFF ) ) <= 5 ) { 2479 if ( zSig1 == 0 ) zSig1 = 1; 2480 mul64To128( doubleZSig0, zSig1, &term1, &term2 ); 2481 sub128( rem1, 0, term1, term2, &rem1, &rem2 ); 2482 mul64To128( zSig1, zSig1, &term2, &term3 ); 2483 sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 ); 2484 while ( (si6b) rem1 < 0 ) { 2485 --zSig1; 2486 shortShift128Left( 0, zSig1, 1, &term2, &term3 ); 2487 term3 |= 1; 2488 term2 |= doubleZSig0; 2489 add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 ); 2490 } 2491 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 ); 2492 } 2493 shortShift128Left( 0, zSig1, 1, &zSig0, &zSig1 ); 2494 zSig0 |= doubleZSig0; 2495 return 2496 roundAndPackFloatx80( 2497 floatx80_rounding_precision, 0, zExp, zSig0, zSig1 ); 2498 2499} 2500 2501/*---------------------------------------------------------------------------- 2502| Returns 1 if the extended double-precision floating-point value `a' is 2503| equal to the corresponding value `b', and 0 otherwise. The comparison is 2504| performed according to the IEC/IEEE Standard for Binary Floating-Point 2505| Arithmetic. 2506*----------------------------------------------------------------------------*/ 2507 2508#if cIncludeFPUUnused 2509LOCALFUNC flag floatx80_eq( floatx80 a, floatx80 b ) 2510{ 2511 2512 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF ) 2513 && (ui6b) ( extractFloatx80Frac( a )<<1 ) ) 2514 || ( ( extractFloatx80Exp( b ) == 0x7FFF ) 2515 && (ui6b) ( extractFloatx80Frac( b )<<1 ) ) 2516 ) { 2517 if ( floatx80_is_signaling_nan( a ) 2518 || floatx80_is_signaling_nan( b ) ) { 2519 float_raise( float_flag_invalid ); 2520 } 2521 return 0; 2522 } 2523 return 2524 ( a.low == b.low ) 2525 && ( ( a.high == b.high ) 2526 || ( ( a.low == 0 ) 2527 && ( (ui4b) ( ( a.high | b.high )<<1 ) == 0 ) ) 2528 ); 2529 2530} 2531#endif 2532 2533/*---------------------------------------------------------------------------- 2534| Returns 1 if the extended double-precision floating-point value `a' is 2535| less than or equal to the corresponding value `b', and 0 otherwise. The 2536| comparison is performed according to the IEC/IEEE Standard for Binary 2537| Floating-Point Arithmetic. 2538*----------------------------------------------------------------------------*/ 2539 2540#if cIncludeFPUUnused 2541LOCALFUNC flag floatx80_le( floatx80 a, floatx80 b ) 2542{ 2543 flag aSign, bSign; 2544 2545 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF ) 2546 && (ui6b) ( extractFloatx80Frac( a )<<1 ) ) 2547 || ( ( extractFloatx80Exp( b ) == 0x7FFF ) 2548 && (ui6b) ( extractFloatx80Frac( b )<<1 ) ) 2549 ) { 2550 float_raise( float_flag_invalid ); 2551 return 0; 2552 } 2553 aSign = extractFloatx80Sign( a ); 2554 bSign = extractFloatx80Sign( b ); 2555 if ( aSign != bSign ) { 2556 return 2557 aSign 2558 || ( ( ( (ui4b) ( ( a.high | b.high )<<1 ) ) | a.low | b.low ) 2559 == 0 ); 2560 } 2561 return 2562 aSign ? le128( b.high, b.low, a.high, a.low ) 2563 : le128( a.high, a.low, b.high, b.low ); 2564 2565} 2566#endif 2567 2568/*---------------------------------------------------------------------------- 2569| Returns 1 if the extended double-precision floating-point value `a' is 2570| less than the corresponding value `b', and 0 otherwise. The comparison 2571| is performed according to the IEC/IEEE Standard for Binary Floating-Point 2572| Arithmetic. 2573*----------------------------------------------------------------------------*/ 2574 2575#if cIncludeFPUUnused 2576LOCALFUNC flag floatx80_lt( floatx80 a, floatx80 b ) 2577{ 2578 flag aSign, bSign; 2579 2580 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF ) 2581 && (ui6b) ( extractFloatx80Frac( a )<<1 ) ) 2582 || ( ( extractFloatx80Exp( b ) == 0x7FFF ) 2583 && (ui6b) ( extractFloatx80Frac( b )<<1 ) ) 2584 ) { 2585 float_raise( float_flag_invalid ); 2586 return 0; 2587 } 2588 aSign = extractFloatx80Sign( a ); 2589 bSign = extractFloatx80Sign( b ); 2590 if ( aSign != bSign ) { 2591 return 2592 aSign 2593 && ( ( ( (ui4b) ( ( a.high | b.high )<<1 ) ) | a.low | b.low ) 2594 != 0 ); 2595 } 2596 return 2597 aSign ? lt128( b.high, b.low, a.high, a.low ) 2598 : lt128( a.high, a.low, b.high, b.low ); 2599 2600} 2601#endif 2602 2603/*---------------------------------------------------------------------------- 2604| Returns 1 if the extended double-precision floating-point value `a' is equal 2605| to the corresponding value `b', and 0 otherwise. The invalid exception is 2606| raised if either operand is a NaN. Otherwise, the comparison is performed 2607| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 2608*----------------------------------------------------------------------------*/ 2609 2610#if cIncludeFPUUnused 2611LOCALFUNC flag floatx80_eq_signaling( floatx80 a, floatx80 b ) 2612{ 2613 2614 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF ) 2615 && (ui6b) ( extractFloatx80Frac( a )<<1 ) ) 2616 || ( ( extractFloatx80Exp( b ) == 0x7FFF ) 2617 && (ui6b) ( extractFloatx80Frac( b )<<1 ) ) 2618 ) { 2619 float_raise( float_flag_invalid ); 2620 return 0; 2621 } 2622 return 2623 ( a.low == b.low ) 2624 && ( ( a.high == b.high ) 2625 || ( ( a.low == 0 ) 2626 && ( (ui4b) ( ( a.high | b.high )<<1 ) == 0 ) ) 2627 ); 2628 2629} 2630#endif 2631 2632/*---------------------------------------------------------------------------- 2633| Returns 1 if the extended double-precision floating-point value `a' is less 2634| than or equal to the corresponding value `b', and 0 otherwise. Quiet NaNs 2635| do not cause an exception. Otherwise, the comparison is performed according 2636| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 2637*----------------------------------------------------------------------------*/ 2638 2639#if cIncludeFPUUnused 2640LOCALFUNC flag floatx80_le_quiet( floatx80 a, floatx80 b ) 2641{ 2642 flag aSign, bSign; 2643 2644 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF ) 2645 && (ui6b) ( extractFloatx80Frac( a )<<1 ) ) 2646 || ( ( extractFloatx80Exp( b ) == 0x7FFF ) 2647 && (ui6b) ( extractFloatx80Frac( b )<<1 ) ) 2648 ) { 2649 if ( floatx80_is_signaling_nan( a ) 2650 || floatx80_is_signaling_nan( b ) ) { 2651 float_raise( float_flag_invalid ); 2652 } 2653 return 0; 2654 } 2655 aSign = extractFloatx80Sign( a ); 2656 bSign = extractFloatx80Sign( b ); 2657 if ( aSign != bSign ) { 2658 return 2659 aSign 2660 || ( ( ( (ui4b) ( ( a.high | b.high )<<1 ) ) | a.low | b.low ) 2661 == 0 ); 2662 } 2663 return 2664 aSign ? le128( b.high, b.low, a.high, a.low ) 2665 : le128( a.high, a.low, b.high, b.low ); 2666 2667} 2668#endif 2669 2670/*---------------------------------------------------------------------------- 2671| Returns 1 if the extended double-precision floating-point value `a' is less 2672| than the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause 2673| an exception. Otherwise, the comparison is performed according to the 2674| IEC/IEEE Standard for Binary Floating-Point Arithmetic. 2675*----------------------------------------------------------------------------*/ 2676 2677#if cIncludeFPUUnused 2678LOCALFUNC flag floatx80_lt_quiet( floatx80 a, floatx80 b ) 2679{ 2680 flag aSign, bSign; 2681 2682 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF ) 2683 && (ui6b) ( extractFloatx80Frac( a )<<1 ) ) 2684 || ( ( extractFloatx80Exp( b ) == 0x7FFF ) 2685 && (ui6b) ( extractFloatx80Frac( b )<<1 ) ) 2686 ) { 2687 if ( floatx80_is_signaling_nan( a ) 2688 || floatx80_is_signaling_nan( b ) ) { 2689 float_raise( float_flag_invalid ); 2690 } 2691 return 0; 2692 } 2693 aSign = extractFloatx80Sign( a ); 2694 bSign = extractFloatx80Sign( b ); 2695 if ( aSign != bSign ) { 2696 return 2697 aSign 2698 && ( ( ( (ui4b) ( ( a.high | b.high )<<1 ) ) | a.low | b.low ) 2699 != 0 ); 2700 } 2701 return 2702 aSign ? lt128( b.high, b.low, a.high, a.low ) 2703 : lt128( a.high, a.low, b.high, b.low ); 2704 2705} 2706#endif 2707 2708#ifdef FLOAT128 2709 2710/*---------------------------------------------------------------------------- 2711| Returns the result of converting the quadruple-precision floating-point 2712| value `a' to the extended double-precision floating-point format. The 2713| conversion is performed according to the IEC/IEEE Standard for Binary 2714| Floating-Point Arithmetic. 2715*----------------------------------------------------------------------------*/ 2716 2717LOCALFUNC floatx80 float128_to_floatx80( float128 a ) 2718{ 2719 flag aSign; 2720 si5r aExp; 2721 ui6b aSig0, aSig1; 2722 2723 aSig1 = extractFloat128Frac1( a ); 2724 aSig0 = extractFloat128Frac0( a ); 2725 aExp = extractFloat128Exp( a ); 2726 aSign = extractFloat128Sign( a ); 2727 if ( aExp == 0x7FFF ) { 2728 if ( aSig0 | aSig1 ) { 2729 return commonNaNToFloatx80( float128ToCommonNaN( a ) ); 2730 } 2731 return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); 2732 } 2733 if ( aExp == 0 ) { 2734 if ( ( aSig0 | aSig1 ) == 0 ) return packFloatx80( aSign, 0, 0 ); 2735 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 ); 2736 } 2737 else { 2738 aSig0 |= LIT64( 0x0001000000000000 ); 2739 } 2740 shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 ); 2741 return roundAndPackFloatx80( 80, aSign, aExp, aSig0, aSig1 ); 2742 2743} 2744 2745/*---------------------------------------------------------------------------- 2746| Returns the result of adding the absolute values of the quadruple-precision 2747| floating-point values `a' and `b'. If `zSign' is 1, the sum is negated 2748| before being returned. `zSign' is ignored if the result is a NaN. 2749| The addition is performed according to the IEC/IEEE Standard for Binary 2750| Floating-Point Arithmetic. 2751*----------------------------------------------------------------------------*/ 2752 2753LOCALFUNC float128 addFloat128Sigs( float128 a, float128 b, flag zSign ) 2754{ 2755 si5r aExp, bExp, zExp; 2756 ui6b aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2; 2757 si5r expDiff; 2758 2759 aSig1 = extractFloat128Frac1( a ); 2760 aSig0 = extractFloat128Frac0( a ); 2761 aExp = extractFloat128Exp( a ); 2762 bSig1 = extractFloat128Frac1( b ); 2763 bSig0 = extractFloat128Frac0( b ); 2764 bExp = extractFloat128Exp( b ); 2765 expDiff = aExp - bExp; 2766 if ( 0 < expDiff ) { 2767 if ( aExp == 0x7FFF ) { 2768 if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b ); 2769 return a; 2770 } 2771 if ( bExp == 0 ) { 2772 --expDiff; 2773 } 2774 else { 2775 bSig0 |= LIT64( 0x0001000000000000 ); 2776 } 2777 shift128ExtraRightJamming( 2778 bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 ); 2779 zExp = aExp; 2780 } 2781 else if ( expDiff < 0 ) { 2782 if ( bExp == 0x7FFF ) { 2783 if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b ); 2784 return packFloat128( zSign, 0x7FFF, 0, 0 ); 2785 } 2786 if ( aExp == 0 ) { 2787 ++expDiff; 2788 } 2789 else { 2790 aSig0 |= LIT64( 0x0001000000000000 ); 2791 } 2792 shift128ExtraRightJamming( 2793 aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 ); 2794 zExp = bExp; 2795 } 2796 else { 2797 if ( aExp == 0x7FFF ) { 2798 if ( aSig0 | aSig1 | bSig0 | bSig1 ) { 2799 return propagateFloat128NaN( a, b ); 2800 } 2801 return a; 2802 } 2803 add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 ); 2804 if ( aExp == 0 ) return packFloat128( zSign, 0, zSig0, zSig1 ); 2805 zSig2 = 0; 2806 zSig0 |= LIT64( 0x0002000000000000 ); 2807 zExp = aExp; 2808 goto shiftRight1; 2809 } 2810 aSig0 |= LIT64( 0x0001000000000000 ); 2811 add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 ); 2812 --zExp; 2813 if ( zSig0 < LIT64( 0x0002000000000000 ) ) goto roundAndPack; 2814 ++zExp; 2815 shiftRight1: 2816 shift128ExtraRightJamming( 2817 zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 ); 2818 roundAndPack: 2819 return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 ); 2820 2821} 2822 2823/*---------------------------------------------------------------------------- 2824| Returns the result of subtracting the absolute values of the quadruple- 2825| precision floating-point values `a' and `b'. If `zSign' is 1, the 2826| difference is negated before being returned. `zSign' is ignored if the 2827| result is a NaN. The subtraction is performed according to the IEC/IEEE 2828| Standard for Binary Floating-Point Arithmetic. 2829*----------------------------------------------------------------------------*/ 2830 2831LOCALFUNC float128 subFloat128Sigs( float128 a, float128 b, flag zSign ) 2832{ 2833 si5r aExp, bExp, zExp; 2834 ui6b aSig0, aSig1, bSig0, bSig1, zSig0, zSig1; 2835 si5r expDiff; 2836 float128 z; 2837 2838 aSig1 = extractFloat128Frac1( a ); 2839 aSig0 = extractFloat128Frac0( a ); 2840 aExp = extractFloat128Exp( a ); 2841 bSig1 = extractFloat128Frac1( b ); 2842 bSig0 = extractFloat128Frac0( b ); 2843 bExp = extractFloat128Exp( b ); 2844 expDiff = aExp - bExp; 2845 shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 ); 2846 shortShift128Left( bSig0, bSig1, 14, &bSig0, &bSig1 ); 2847 if ( 0 < expDiff ) goto aExpBigger; 2848 if ( expDiff < 0 ) goto bExpBigger; 2849 if ( aExp == 0x7FFF ) { 2850 if ( aSig0 | aSig1 | bSig0 | bSig1 ) { 2851 return propagateFloat128NaN( a, b ); 2852 } 2853 float_raise( float_flag_invalid ); 2854 z.low = float128_default_nan_low; 2855 z.high = float128_default_nan_high; 2856 return z; 2857 } 2858 if ( aExp == 0 ) { 2859 aExp = 1; 2860 bExp = 1; 2861 } 2862 if ( bSig0 < aSig0 ) goto aBigger; 2863 if ( aSig0 < bSig0 ) goto bBigger; 2864 if ( bSig1 < aSig1 ) goto aBigger; 2865 if ( aSig1 < bSig1 ) goto bBigger; 2866 return packFloat128( float_rounding_mode == float_round_down, 0, 0, 0 ); 2867 bExpBigger: 2868 if ( bExp == 0x7FFF ) { 2869 if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b ); 2870 return packFloat128( zSign ^ 1, 0x7FFF, 0, 0 ); 2871 } 2872 if ( aExp == 0 ) { 2873 ++expDiff; 2874 } 2875 else { 2876 aSig0 |= LIT64( 0x4000000000000000 ); 2877 } 2878 shift128RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 ); 2879 bSig0 |= LIT64( 0x4000000000000000 ); 2880 bBigger: 2881 sub128( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 ); 2882 zExp = bExp; 2883 zSign ^= 1; 2884 goto normalizeRoundAndPack; 2885 aExpBigger: 2886 if ( aExp == 0x7FFF ) { 2887 if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b ); 2888 return a; 2889 } 2890 if ( bExp == 0 ) { 2891 --expDiff; 2892 } 2893 else { 2894 bSig0 |= LIT64( 0x4000000000000000 ); 2895 } 2896 shift128RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 ); 2897 aSig0 |= LIT64( 0x4000000000000000 ); 2898 aBigger: 2899 sub128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 ); 2900 zExp = aExp; 2901 normalizeRoundAndPack: 2902 --zExp; 2903 return normalizeRoundAndPackFloat128( zSign, zExp - 14, zSig0, zSig1 ); 2904 2905} 2906 2907/*---------------------------------------------------------------------------- 2908| Returns the result of adding the quadruple-precision floating-point values 2909| `a' and `b'. The operation is performed according to the IEC/IEEE Standard 2910| for Binary Floating-Point Arithmetic. 2911*----------------------------------------------------------------------------*/ 2912 2913LOCALFUNC float128 float128_add( float128 a, float128 b ) 2914{ 2915 flag aSign, bSign; 2916 2917 aSign = extractFloat128Sign( a ); 2918 bSign = extractFloat128Sign( b ); 2919 if ( aSign == bSign ) { 2920 return addFloat128Sigs( a, b, aSign ); 2921 } 2922 else { 2923 return subFloat128Sigs( a, b, aSign ); 2924 } 2925 2926} 2927 2928/*---------------------------------------------------------------------------- 2929| Returns the result of subtracting the quadruple-precision floating-point 2930| values `a' and `b'. The operation is performed according to the IEC/IEEE 2931| Standard for Binary Floating-Point Arithmetic. 2932*----------------------------------------------------------------------------*/ 2933 2934LOCALFUNC float128 float128_sub( float128 a, float128 b ) 2935{ 2936 flag aSign, bSign; 2937 2938 aSign = extractFloat128Sign( a ); 2939 bSign = extractFloat128Sign( b ); 2940 if ( aSign == bSign ) { 2941 return subFloat128Sigs( a, b, aSign ); 2942 } 2943 else { 2944 return addFloat128Sigs( a, b, aSign ); 2945 } 2946 2947} 2948 2949/*---------------------------------------------------------------------------- 2950| Returns the result of multiplying the quadruple-precision floating-point 2951| values `a' and `b'. The operation is performed according to the IEC/IEEE 2952| Standard for Binary Floating-Point Arithmetic. 2953*----------------------------------------------------------------------------*/ 2954 2955LOCALFUNC float128 float128_mul( float128 a, float128 b ) 2956{ 2957 flag aSign, bSign, zSign; 2958 si5r aExp, bExp, zExp; 2959 ui6b aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3; 2960 float128 z; 2961 2962 aSig1 = extractFloat128Frac1( a ); 2963 aSig0 = extractFloat128Frac0( a ); 2964 aExp = extractFloat128Exp( a ); 2965 aSign = extractFloat128Sign( a ); 2966 bSig1 = extractFloat128Frac1( b ); 2967 bSig0 = extractFloat128Frac0( b ); 2968 bExp = extractFloat128Exp( b ); 2969 bSign = extractFloat128Sign( b ); 2970 zSign = aSign ^ bSign; 2971 if ( aExp == 0x7FFF ) { 2972 if ( ( aSig0 | aSig1 ) 2973 || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) { 2974 return propagateFloat128NaN( a, b ); 2975 } 2976 if ( ( bExp | bSig0 | bSig1 ) == 0 ) goto invalid; 2977 return packFloat128( zSign, 0x7FFF, 0, 0 ); 2978 } 2979 if ( bExp == 0x7FFF ) { 2980 if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b ); 2981 if ( ( aExp | aSig0 | aSig1 ) == 0 ) { 2982 invalid: 2983 float_raise( float_flag_invalid ); 2984 z.low = float128_default_nan_low; 2985 z.high = float128_default_nan_high; 2986 return z; 2987 } 2988 return packFloat128( zSign, 0x7FFF, 0, 0 ); 2989 } 2990 if ( aExp == 0 ) { 2991 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 ); 2992 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 ); 2993 } 2994 if ( bExp == 0 ) { 2995 if ( ( bSig0 | bSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 ); 2996 normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 ); 2997 } 2998 zExp = aExp + bExp - 0x4000; 2999 aSig0 |= LIT64( 0x0001000000000000 ); 3000 shortShift128Left( bSig0, bSig1, 16, &bSig0, &bSig1 ); 3001 mul128To256( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3 ); 3002 add128( zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1 ); 3003 zSig2 |= ( zSig3 != 0 ); 3004 if ( LIT64( 0x0002000000000000 ) <= zSig0 ) { 3005 shift128ExtraRightJamming( 3006 zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 ); 3007 ++zExp; 3008 } 3009 return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 ); 3010 3011} 3012 3013/*---------------------------------------------------------------------------- 3014| Returns the result of dividing the quadruple-precision floating-point value 3015| `a' by the corresponding value `b'. The operation is performed according to 3016| the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 3017*----------------------------------------------------------------------------*/ 3018 3019LOCALFUNC float128 float128_div( float128 a, float128 b ) 3020{ 3021 flag aSign, bSign, zSign; 3022 si5r aExp, bExp, zExp; 3023 ui6b aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2; 3024 ui6b rem0, rem1, rem2, rem3, term0, term1, term2, term3; 3025 float128 z; 3026 3027 aSig1 = extractFloat128Frac1( a ); 3028 aSig0 = extractFloat128Frac0( a ); 3029 aExp = extractFloat128Exp( a ); 3030 aSign = extractFloat128Sign( a ); 3031 bSig1 = extractFloat128Frac1( b ); 3032 bSig0 = extractFloat128Frac0( b ); 3033 bExp = extractFloat128Exp( b ); 3034 bSign = extractFloat128Sign( b ); 3035 zSign = aSign ^ bSign; 3036 if ( aExp == 0x7FFF ) { 3037 if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b ); 3038 if ( bExp == 0x7FFF ) { 3039 if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b ); 3040 goto invalid; 3041 } 3042 return packFloat128( zSign, 0x7FFF, 0, 0 ); 3043 } 3044 if ( bExp == 0x7FFF ) { 3045 if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b ); 3046 return packFloat128( zSign, 0, 0, 0 ); 3047 } 3048 if ( bExp == 0 ) { 3049 if ( ( bSig0 | bSig1 ) == 0 ) { 3050 if ( ( aExp | aSig0 | aSig1 ) == 0 ) { 3051 invalid: 3052 float_raise( float_flag_invalid ); 3053 z.low = float128_default_nan_low; 3054 z.high = float128_default_nan_high; 3055 return z; 3056 } 3057 float_raise( float_flag_divbyzero ); 3058 return packFloat128( zSign, 0x7FFF, 0, 0 ); 3059 } 3060 normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 ); 3061 } 3062 if ( aExp == 0 ) { 3063 if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 ); 3064 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 ); 3065 } 3066 zExp = aExp - bExp + 0x3FFD; 3067 shortShift128Left( 3068 aSig0 | LIT64( 0x0001000000000000 ), aSig1, 15, &aSig0, &aSig1 ); 3069 shortShift128Left( 3070 bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 ); 3071 if ( le128( bSig0, bSig1, aSig0, aSig1 ) ) { 3072 shift128Right( aSig0, aSig1, 1, &aSig0, &aSig1 ); 3073 ++zExp; 3074 } 3075 zSig0 = estimateDiv128To64( aSig0, aSig1, bSig0 ); 3076 mul128By64To192( bSig0, bSig1, zSig0, &term0, &term1, &term2 ); 3077 sub192( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 ); 3078 while ( (si6b) rem0 < 0 ) { 3079 --zSig0; 3080 add192( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 ); 3081 } 3082 zSig1 = estimateDiv128To64( rem1, rem2, bSig0 ); 3083 if ( ( zSig1 & 0x3FFF ) <= 4 ) { 3084 mul128By64To192( bSig0, bSig1, zSig1, &term1, &term2, &term3 ); 3085 sub192( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 ); 3086 while ( (si6b) rem1 < 0 ) { 3087 --zSig1; 3088 add192( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 ); 3089 } 3090 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 ); 3091 } 3092 shift128ExtraRightJamming( zSig0, zSig1, 0, 15, &zSig0, &zSig1, &zSig2 ); 3093 return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 ); 3094 3095} 3096 3097#endif 3098 3099typedef unsigned int float32; 3100 3101/*---------------------------------------------------------------------------- 3102| Normalizes the subnormal single-precision floating-point value represented 3103| by the denormalized significand `aSig'. The normalized exponent and 3104| significand are stored at the locations pointed to by `zExpPtr' and 3105| `zSigPtr', respectively. 3106*----------------------------------------------------------------------------*/ 3107 3108LOCALPROC 3109 normalizeFloat32Subnormal( ui5b aSig, si4r *zExpPtr, ui5b *zSigPtr ) 3110{ 3111 si3r shiftCount; 3112 3113 shiftCount = countLeadingZeros32( aSig ) - 8; 3114 *zSigPtr = aSig<<shiftCount; 3115 *zExpPtr = 1 - shiftCount; 3116 3117} 3118 3119/*---------------------------------------------------------------------------- 3120| Returns the fraction bits of the single-precision floating-point value `a'. 3121*----------------------------------------------------------------------------*/ 3122 3123LOCALINLINEFUNC ui5b extractFloat32Frac( float32 a ) 3124{ 3125 3126 return a & 0x007FFFFF; 3127 3128} 3129 3130/*---------------------------------------------------------------------------- 3131| Returns the exponent bits of the single-precision floating-point value `a'. 3132*----------------------------------------------------------------------------*/ 3133 3134LOCALINLINEFUNC si4r extractFloat32Exp( float32 a ) 3135{ 3136 3137 return ( a>>23 ) & 0xFF; 3138 3139} 3140 3141/*---------------------------------------------------------------------------- 3142| Returns the sign bit of the single-precision floating-point value `a'. 3143*----------------------------------------------------------------------------*/ 3144 3145LOCALINLINEFUNC flag extractFloat32Sign( float32 a ) 3146{ 3147 3148 return a>>31; 3149 3150} 3151 3152/*---------------------------------------------------------------------------- 3153| Returns 1 if the single-precision floating-point value `a' is a signaling 3154| NaN; otherwise returns 0. 3155*----------------------------------------------------------------------------*/ 3156 3157LOCALFUNC flag float32_is_signaling_nan( float32 a ) 3158{ 3159 3160 return ( ( ( a>>22 ) & 0x1FF ) == 0x1FE ) && ( a & 0x003FFFFF ); 3161 3162} 3163 3164/*---------------------------------------------------------------------------- 3165| Returns the result of converting the single-precision floating-point NaN 3166| `a' to the canonical NaN format. If `a' is a signaling NaN, the invalid 3167| exception is raised. 3168*----------------------------------------------------------------------------*/ 3169 3170LOCALFUNC commonNaNT float32ToCommonNaN( float32 a ) 3171{ 3172 commonNaNT z; 3173 3174 if ( float32_is_signaling_nan( a ) ) float_raise( float_flag_invalid ); 3175 z.sign = a>>31; 3176 z.low = 0; 3177 z.high = ( (ui6b) a )<<41; 3178 return z; 3179 3180} 3181 3182/*---------------------------------------------------------------------------- 3183| Returns the result of converting the single-precision floating-point value 3184| `a' to the extended double-precision floating-point format. The conversion 3185| is performed according to the IEC/IEEE Standard for Binary Floating-Point 3186| Arithmetic. 3187*----------------------------------------------------------------------------*/ 3188 3189LOCALFUNC floatx80 float32_to_floatx80( float32 a ) 3190{ 3191 flag aSign; 3192 si4r aExp; 3193 ui5b aSig; 3194 3195 aSig = extractFloat32Frac( a ); 3196 aExp = extractFloat32Exp( a ); 3197 aSign = extractFloat32Sign( a ); 3198 if ( aExp == 0xFF ) { 3199 if ( aSig ) return commonNaNToFloatx80( float32ToCommonNaN( a ) ); 3200 return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); 3201 } 3202 if ( aExp == 0 ) { 3203 if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 ); 3204 normalizeFloat32Subnormal( aSig, &aExp, &aSig ); 3205 } 3206 aSig |= 0x00800000; 3207 return packFloatx80( aSign, aExp + 0x3F80, ( (ui6b) aSig )<<40 ); 3208 3209} 3210 3211/*---------------------------------------------------------------------------- 3212| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a 3213| single-precision floating-point value, returning the result. After being 3214| shifted into the proper positions, the three fields are simply added 3215| together to form the result. This means that any integer portion of `zSig' 3216| will be added into the exponent. Since a properly normalized significand 3217| will have an integer portion equal to 1, the `zExp' input should be 1 less 3218| than the desired result exponent whenever `zSig' is a complete, normalized 3219| significand. 3220*----------------------------------------------------------------------------*/ 3221 3222LOCALINLINEFUNC float32 packFloat32( flag zSign, si4r zExp, ui5b zSig ) 3223{ 3224 3225 return ( ( (ui5b) zSign )<<31 ) + ( ( (ui5b) zExp )<<23 ) + zSig; 3226 3227} 3228 3229/*---------------------------------------------------------------------------- 3230| Takes an abstract floating-point value having sign `zSign', exponent `zExp', 3231| and significand `zSig', and returns the proper single-precision floating- 3232| point value corresponding to the abstract input. Ordinarily, the abstract 3233| value is simply rounded and packed into the single-precision format, with 3234| the inexact exception raised if the abstract input cannot be represented 3235| exactly. However, if the abstract value is too large, the overflow and 3236| inexact exceptions are raised and an infinity or maximal finite value is 3237| returned. If the abstract value is too small, the input value is rounded to 3238| a subnormal number, and the underflow and inexact exceptions are raised if 3239| the abstract input cannot be represented exactly as a subnormal single- 3240| precision floating-point number. 3241| The input significand `zSig' has its binary point between bits 30 3242| and 29, which is 7 bits to the left of the usual location. This shifted 3243| significand must be normalized or smaller. If `zSig' is not normalized, 3244| `zExp' must be 0; in that case, the result returned is a subnormal number, 3245| and it must not require rounding. In the usual case that `zSig' is 3246| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent. 3247| The handling of underflow and overflow follows the IEC/IEEE Standard for 3248| Binary Floating-Point Arithmetic. 3249*----------------------------------------------------------------------------*/ 3250 3251LOCALFUNC float32 roundAndPackFloat32( flag zSign, si4r zExp, ui5b zSig ) 3252{ 3253 si3r roundingMode; 3254 flag roundNearestEven; 3255 si3r roundIncrement, roundBits; 3256 flag isTiny; 3257 3258 roundingMode = float_rounding_mode; 3259 roundNearestEven = ( roundingMode == float_round_nearest_even ); 3260 roundIncrement = 0x40; 3261 if ( ! roundNearestEven ) { 3262 if ( roundingMode == float_round_to_zero ) { 3263 roundIncrement = 0; 3264 } 3265 else { 3266 roundIncrement = 0x7F; 3267 if ( zSign ) { 3268 if ( roundingMode == float_round_up ) roundIncrement = 0; 3269 } 3270 else { 3271 if ( roundingMode == float_round_down ) roundIncrement = 0; 3272 } 3273 } 3274 } 3275 roundBits = zSig & 0x7F; 3276 if ( 0xFD <= (ui4b) zExp ) { 3277 if ( ( 0xFD < zExp ) 3278 || ( ( zExp == 0xFD ) 3279 && ( (si5b) ( zSig + roundIncrement ) < 0 ) ) 3280 ) { 3281 float_raise( float_flag_overflow | float_flag_inexact ); 3282 return packFloat32( zSign, 0xFF, 0 ) - ( roundIncrement == 0 ); 3283 } 3284 if ( zExp < 0 ) { 3285 isTiny = 3286 ( float_detect_tininess == float_tininess_before_rounding ) 3287 || ( zExp < -1 ) 3288 || ( zSig + roundIncrement < 0x80000000 ); 3289 shift32RightJamming( zSig, - zExp, &zSig ); 3290 zExp = 0; 3291 roundBits = zSig & 0x7F; 3292 if ( isTiny && roundBits ) float_raise( float_flag_underflow ); 3293 } 3294 } 3295 if ( roundBits ) float_exception_flags |= float_flag_inexact; 3296 zSig = ( zSig + roundIncrement )>>7; 3297 zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven ); 3298 if ( zSig == 0 ) zExp = 0; 3299 return packFloat32( zSign, zExp, zSig ); 3300 3301} 3302 3303/*---------------------------------------------------------------------------- 3304| Returns the result of converting the canonical NaN `a' to the single- 3305| precision floating-point format. 3306*----------------------------------------------------------------------------*/ 3307 3308LOCALFUNC float32 commonNaNToFloat32( commonNaNT a ) 3309{ 3310 3311 return ( ( (ui5b) a.sign )<<31 ) | 0x7FC00000 | ( a.high>>41 ); 3312 3313} 3314 3315/*---------------------------------------------------------------------------- 3316| Returns the result of converting the extended double-precision floating- 3317| point value `a' to the single-precision floating-point format. The 3318| conversion is performed according to the IEC/IEEE Standard for Binary 3319| Floating-Point Arithmetic. 3320*----------------------------------------------------------------------------*/ 3321 3322LOCALFUNC float32 floatx80_to_float32( floatx80 a ) 3323{ 3324 flag aSign; 3325 si5r aExp; 3326 ui6b aSig; 3327 3328 aSig = extractFloatx80Frac( a ); 3329 aExp = extractFloatx80Exp( a ); 3330 aSign = extractFloatx80Sign( a ); 3331 if ( aExp == 0x7FFF ) { 3332 if ( (ui6b) ( aSig<<1 ) ) { 3333 return commonNaNToFloat32( floatx80ToCommonNaN( a ) ); 3334 } 3335 return packFloat32( aSign, 0xFF, 0 ); 3336 } 3337 shift64RightJamming( aSig, 33, &aSig ); 3338 if ( aExp || aSig ) aExp -= 0x3F81; 3339 return roundAndPackFloat32( aSign, aExp, aSig ); 3340 3341} 3342 3343 3344typedef ui6b float64; 3345 3346/*---------------------------------------------------------------------------- 3347| Returns the fraction bits of the double-precision floating-point value `a'. 3348*----------------------------------------------------------------------------*/ 3349 3350LOCALINLINEFUNC ui6b extractFloat64Frac( float64 a ) 3351{ 3352 3353 return a & LIT64( 0x000FFFFFFFFFFFFF ); 3354 3355} 3356 3357/*---------------------------------------------------------------------------- 3358| Returns the exponent bits of the double-precision floating-point value `a'. 3359*----------------------------------------------------------------------------*/ 3360 3361LOCALINLINEFUNC si4r extractFloat64Exp( float64 a ) 3362{ 3363 3364 return ( a>>52 ) & 0x7FF; 3365 3366} 3367 3368/*---------------------------------------------------------------------------- 3369| Returns the sign bit of the double-precision floating-point value `a'. 3370*----------------------------------------------------------------------------*/ 3371 3372LOCALINLINEFUNC flag extractFloat64Sign( float64 a ) 3373{ 3374 3375 return a>>63; 3376 3377} 3378 3379/*---------------------------------------------------------------------------- 3380| Returns 1 if the double-precision floating-point value `a' is a signaling 3381| NaN; otherwise returns 0. 3382*----------------------------------------------------------------------------*/ 3383 3384LOCALFUNC flag float64_is_signaling_nan( float64 a ) 3385{ 3386 3387 return 3388 ( ( ( a>>51 ) & 0xFFF ) == 0xFFE ) 3389 && ( a & LIT64( 0x0007FFFFFFFFFFFF ) ); 3390 3391} 3392 3393/*---------------------------------------------------------------------------- 3394| Returns the result of converting the double-precision floating-point NaN 3395| `a' to the canonical NaN format. If `a' is a signaling NaN, the invalid 3396| exception is raised. 3397*----------------------------------------------------------------------------*/ 3398 3399LOCALFUNC commonNaNT float64ToCommonNaN( float64 a ) 3400{ 3401 commonNaNT z; 3402 3403 if ( float64_is_signaling_nan( a ) ) float_raise( float_flag_invalid ); 3404 z.sign = a>>63; 3405 z.low = 0; 3406 z.high = a<<12; 3407 return z; 3408 3409} 3410 3411/*---------------------------------------------------------------------------- 3412| Normalizes the subnormal double-precision floating-point value represented 3413| by the denormalized significand `aSig'. The normalized exponent and 3414| significand are stored at the locations pointed to by `zExpPtr' and 3415| `zSigPtr', respectively. 3416*----------------------------------------------------------------------------*/ 3417 3418LOCALPROC 3419 normalizeFloat64Subnormal( ui6b aSig, si4r *zExpPtr, ui6b *zSigPtr ) 3420{ 3421 si3r shiftCount; 3422 3423 shiftCount = countLeadingZeros64( aSig ) - 11; 3424 *zSigPtr = aSig<<shiftCount; 3425 *zExpPtr = 1 - shiftCount; 3426 3427} 3428 3429/*---------------------------------------------------------------------------- 3430| Returns the result of converting the double-precision floating-point value 3431| `a' to the extended double-precision floating-point format. The conversion 3432| is performed according to the IEC/IEEE Standard for Binary Floating-Point 3433| Arithmetic. 3434*----------------------------------------------------------------------------*/ 3435 3436LOCALFUNC floatx80 float64_to_floatx80( float64 a ) 3437{ 3438 flag aSign; 3439 si4r aExp; 3440 ui6b aSig; 3441 3442 aSig = extractFloat64Frac( a ); 3443 aExp = extractFloat64Exp( a ); 3444 aSign = extractFloat64Sign( a ); 3445 if ( aExp == 0x7FF ) { 3446 if ( aSig ) return commonNaNToFloatx80( float64ToCommonNaN( a ) ); 3447 return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); 3448 } 3449 if ( aExp == 0 ) { 3450 if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 ); 3451 normalizeFloat64Subnormal( aSig, &aExp, &aSig ); 3452 } 3453 return 3454 packFloatx80( 3455 aSign, aExp + 0x3C00, ( aSig | LIT64( 0x0010000000000000 ) )<<11 ); 3456 3457} 3458 3459/*---------------------------------------------------------------------------- 3460| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a 3461| double-precision floating-point value, returning the result. After being 3462| shifted into the proper positions, the three fields are simply added 3463| together to form the result. This means that any integer portion of `zSig' 3464| will be added into the exponent. Since a properly normalized significand 3465| will have an integer portion equal to 1, the `zExp' input should be 1 less 3466| than the desired result exponent whenever `zSig' is a complete, normalized 3467| significand. 3468*----------------------------------------------------------------------------*/ 3469 3470LOCALINLINEFUNC float64 packFloat64( flag zSign, si4r zExp, ui6b zSig ) 3471{ 3472 3473 return ( ( (ui6b) zSign )<<63 ) + ( ( (ui6b) zExp )<<52 ) + zSig; 3474 3475} 3476 3477/*---------------------------------------------------------------------------- 3478| Takes an abstract floating-point value having sign `zSign', exponent `zExp', 3479| and significand `zSig', and returns the proper double-precision floating- 3480| point value corresponding to the abstract input. Ordinarily, the abstract 3481| value is simply rounded and packed into the double-precision format, with 3482| the inexact exception raised if the abstract input cannot be represented 3483| exactly. However, if the abstract value is too large, the overflow and 3484| inexact exceptions are raised and an infinity or maximal finite value is 3485| returned. If the abstract value is too small, the input value is rounded 3486| to a subnormal number, and the underflow and inexact exceptions are raised 3487| if the abstract input cannot be represented exactly as a subnormal double- 3488| precision floating-point number. 3489| The input significand `zSig' has its binary point between bits 62 3490| and 61, which is 10 bits to the left of the usual location. This shifted 3491| significand must be normalized or smaller. If `zSig' is not normalized, 3492| `zExp' must be 0; in that case, the result returned is a subnormal number, 3493| and it must not require rounding. In the usual case that `zSig' is 3494| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent. 3495| The handling of underflow and overflow follows the IEC/IEEE Standard for 3496| Binary Floating-Point Arithmetic. 3497*----------------------------------------------------------------------------*/ 3498 3499LOCALFUNC float64 roundAndPackFloat64( flag zSign, si4r zExp, ui6b zSig ) 3500{ 3501 si3r roundingMode; 3502 flag roundNearestEven; 3503 si4r roundIncrement, roundBits; 3504 flag isTiny; 3505 3506 roundingMode = float_rounding_mode; 3507 roundNearestEven = ( roundingMode == float_round_nearest_even ); 3508 roundIncrement = 0x200; 3509 if ( ! roundNearestEven ) { 3510 if ( roundingMode == float_round_to_zero ) { 3511 roundIncrement = 0; 3512 } 3513 else { 3514 roundIncrement = 0x3FF; 3515 if ( zSign ) { 3516 if ( roundingMode == float_round_up ) roundIncrement = 0; 3517 } 3518 else { 3519 if ( roundingMode == float_round_down ) roundIncrement = 0; 3520 } 3521 } 3522 } 3523 roundBits = zSig & 0x3FF; 3524 if ( 0x7FD <= (ui4b) zExp ) { 3525 if ( ( 0x7FD < zExp ) 3526 || ( ( zExp == 0x7FD ) 3527 && ( (si6b) ( zSig + roundIncrement ) < 0 ) ) 3528 ) { 3529 float_raise( float_flag_overflow | float_flag_inexact ); 3530 return packFloat64( zSign, 0x7FF, 0 ) - ( roundIncrement == 0 ); 3531 } 3532 if ( zExp < 0 ) { 3533 isTiny = 3534 ( float_detect_tininess == float_tininess_before_rounding ) 3535 || ( zExp < -1 ) 3536 || ( zSig + roundIncrement < LIT64( 0x8000000000000000 ) ); 3537 shift64RightJamming( zSig, - zExp, &zSig ); 3538 zExp = 0; 3539 roundBits = zSig & 0x3FF; 3540 if ( isTiny && roundBits ) float_raise( float_flag_underflow ); 3541 } 3542 } 3543 if ( roundBits ) float_exception_flags |= float_flag_inexact; 3544 zSig = ( zSig + roundIncrement )>>10; 3545 zSig &= ~ ( ( ( roundBits ^ 0x200 ) == 0 ) & roundNearestEven ); 3546 if ( zSig == 0 ) zExp = 0; 3547 return packFloat64( zSign, zExp, zSig ); 3548 3549} 3550 3551/*---------------------------------------------------------------------------- 3552| Returns the result of converting the canonical NaN `a' to the double- 3553| precision floating-point format. 3554*----------------------------------------------------------------------------*/ 3555 3556LOCALFUNC float64 commonNaNToFloat64( commonNaNT a ) 3557{ 3558 3559 return 3560 ( ( (ui6b) a.sign )<<63 ) 3561 | LIT64( 0x7FF8000000000000 ) 3562 | ( a.high>>12 ); 3563 3564} 3565 3566/*---------------------------------------------------------------------------- 3567| Returns the result of converting the extended double-precision floating- 3568| point value `a' to the double-precision floating-point format. The 3569| conversion is performed according to the IEC/IEEE Standard for Binary 3570| Floating-Point Arithmetic. 3571*----------------------------------------------------------------------------*/ 3572 3573LOCALFUNC float64 floatx80_to_float64( floatx80 a ) 3574{ 3575 flag aSign; 3576 si5r aExp; 3577 ui6b aSig, zSig; 3578 3579 aSig = extractFloatx80Frac( a ); 3580 aExp = extractFloatx80Exp( a ); 3581 aSign = extractFloatx80Sign( a ); 3582 if ( aExp == 0x7FFF ) { 3583 if ( (ui6b) ( aSig<<1 ) ) { 3584 return commonNaNToFloat64( floatx80ToCommonNaN( a ) ); 3585 } 3586 return packFloat64( aSign, 0x7FF, 0 ); 3587 } 3588 shift64RightJamming( aSig, 1, &zSig ); 3589 if ( aExp || aSig ) aExp -= 0x3C01; 3590 return roundAndPackFloat64( aSign, aExp, zSig ); 3591 3592} 3593 3594/* ----- end from original file "softfloat.c" ----- */ 3595 3596typedef si4r Bit16s; 3597typedef si5r Bit32s; 3598typedef si6r Bit64s; 3599 3600typedef ui5r Bit32u; 3601typedef ui6r Bit64u; 3602 3603#define int16_indefinite 0x8000 3604 3605/*---------------------------------------------------------------------------- 3606| Takes extended double-precision floating-point NaN `a' and returns the 3607| appropriate NaN result. If `a' is a signaling NaN, the invalid exception 3608| is raised. 3609*----------------------------------------------------------------------------*/ 3610 3611LOCALFUNC floatx80 propagateOneFloatx80NaN(floatx80 *a) 3612{ 3613 if (floatx80_is_signaling_nan(*a)) 3614 float_raise(float_flag_invalid); 3615 3616 a->low |= LIT64(0xC000000000000000); 3617 3618 return *a; 3619} 3620 3621#define float_flag_denormal 0x02 3622 3623#define floatx80_default_nan_exp 0xFFFF 3624#define floatx80_default_nan_fraction LIT64(0xC000000000000000) 3625 3626#define packFloatx80m(zSign, zExp, zSig) {(zSig), ((zSign) << 15) + (zExp) } 3627 3628/*---------------------------------------------------------------------------- 3629| Packs two 64-bit precision integers into into the quadruple-precision 3630| floating-point value, returning the result. 3631*----------------------------------------------------------------------------*/ 3632 3633#define packFloat2x128m(zHi, zLo) {(zLo), (zHi)} 3634#define PACK_FLOAT_128(hi,lo) packFloat2x128m(LIT64(hi),LIT64(lo)) 3635 3636static const floatx80 floatx80_default_nan = 3637 packFloatx80m(0, floatx80_default_nan_exp, floatx80_default_nan_fraction); 3638 3639/*---------------------------------------------------------------------------- 3640| Software IEC/IEEE floating-point ordering relations 3641*----------------------------------------------------------------------------*/ 3642enum { 3643 float_relation_less = -1, 3644 float_relation_equal = 0, 3645 float_relation_greater = 1, 3646 float_relation_unordered = 2 3647}; 3648 3649#define EXP_BIAS 0x3FFF 3650 3651 3652/*---------------------------------------------------------------------------- 3653| Returns the result of multiplying the extended double-precision floating- 3654| point value `a' and quadruple-precision floating point value `b'. The 3655| operation is performed according to the IEC/IEEE Standard for Binary 3656| Floating-Point Arithmetic. 3657*----------------------------------------------------------------------------*/ 3658 3659LOCALFUNC floatx80 floatx80_mul128(floatx80 a, float128 b) 3660{ 3661 Bit32s aExp, bExp, zExp; 3662 Bit64u aSig, bSig0, bSig1, zSig0, zSig1, zSig2; 3663 int aSign, bSign, zSign; 3664 3665 // handle unsupported extended double-precision floating encodings 3666 if (0 /* floatx80_is_unsupported(a) */) 3667 { 3668 invalid: 3669 float_raise(float_flag_invalid); 3670 return floatx80_default_nan; 3671 } 3672 3673 aSig = extractFloatx80Frac(a); 3674 aExp = extractFloatx80Exp(a); 3675 aSign = extractFloatx80Sign(a); 3676 bSig0 = extractFloat128Frac0(b); 3677 bSig1 = extractFloat128Frac1(b); 3678 bExp = extractFloat128Exp(b); 3679 bSign = extractFloat128Sign(b); 3680 3681 zSign = aSign ^ bSign; 3682 3683 if (aExp == 0x7FFF) { 3684 if ((Bit64u) (aSig<<1) 3685 || ((bExp == 0x7FFF) && (bSig0 | bSig1))) 3686 { 3687 floatx80 r = commonNaNToFloatx80(float128ToCommonNaN(b)); 3688 return propagateFloatx80NaN(a, r); 3689 } 3690 if (bExp == 0) { 3691 if ((bSig0 | bSig1) == 0) goto invalid; 3692 float_raise(float_flag_denormal); 3693 } 3694 return packFloatx80(zSign, 0x7FFF, LIT64(0x8000000000000000)); 3695 } 3696 if (bExp == 0x7FFF) { 3697 if (bSig0 | bSig1) { 3698 floatx80 r = commonNaNToFloatx80(float128ToCommonNaN(b)); 3699 return propagateFloatx80NaN(a, r); 3700 } 3701 if (aExp == 0) { 3702 if (aSig == 0) goto invalid; 3703 float_raise(float_flag_denormal); 3704 } 3705 return packFloatx80(zSign, 0x7FFF, LIT64(0x8000000000000000)); 3706 } 3707 if (aExp == 0) { 3708 if (aSig == 0) { 3709 if ((bExp == 0) && (bSig0 | bSig1)) float_raise(float_flag_denormal); 3710 return packFloatx80(zSign, 0, 0); 3711 } 3712 float_raise(float_flag_denormal); 3713 normalizeFloatx80Subnormal(aSig, &aExp, &aSig); 3714 } 3715 if (bExp == 0) { 3716 if ((bSig0 | bSig1) == 0) return packFloatx80(zSign, 0, 0); 3717 float_raise(float_flag_denormal); 3718 normalizeFloat128Subnormal(bSig0, bSig1, &bExp, &bSig0, &bSig1); 3719 } 3720 else bSig0 |= LIT64(0x0001000000000000); 3721 3722 zExp = aExp + bExp - 0x3FFE; 3723 shortShift128Left(bSig0, bSig1, 15, &bSig0, &bSig1); 3724 mul128By64To192(bSig0, bSig1, aSig, &zSig0, &zSig1, &zSig2); 3725 if (0 < (Bit64s) zSig0) { 3726 shortShift128Left(zSig0, zSig1, 1, &zSig0, &zSig1); 3727 --zExp; 3728 } 3729 return 3730 roundAndPackFloatx80(floatx80_rounding_precision, 3731 zSign, zExp, zSig0, zSig1); 3732} 3733 3734/* ----- from original file "softfloatx80.h" ----- */ 3735 3736/* 3737 ["original Stanislav Shwartsman Copyright notice" went here, included near top of this file.] 3738*/ 3739 3740 3741/*---------------------------------------------------------------------------- 3742| Software IEC/IEEE floating-point class. 3743*----------------------------------------------------------------------------*/ 3744typedef enum { 3745 float_zero, 3746 float_NaN, 3747 float_negative_inf, 3748 float_positive_inf, 3749 float_denormal, 3750 float_normalized 3751} float_class_t; 3752 3753 3754/*----------------------------------------------------------------------------- 3755| Calculates the absolute value of the extended double-precision floating-point 3756| value `a'. The operation is performed according to the IEC/IEEE Standard 3757| for Binary Floating-Point Arithmetic. 3758*----------------------------------------------------------------------------*/ 3759 3760#if 0 3761inline floatx80 floatx80_abs(floatx80 *reg) 3762{ 3763 reg->high &= 0x7FFF; 3764 return *reg; 3765} 3766#endif 3767 3768/*----------------------------------------------------------------------------- 3769| Changes the sign of the extended double-precision floating-point value 'a'. 3770| The operation is performed according to the IEC/IEEE Standard for Binary 3771| Floating-Point Arithmetic. 3772*----------------------------------------------------------------------------*/ 3773 3774LOCALFUNC floatx80 floatx80_chs(floatx80 *x) 3775{ 3776 x->high ^= 0x8000; 3777 return *x; 3778} 3779 3780/* ----- end from original file "softfloatx80.h" ----- */ 3781 3782/* ----- from original file "softfloatx80.cc" ----- */ 3783 3784/* 3785 ["original Stanislav Shwartsman Copyright notice" went here, included near top of this file.] 3786*/ 3787 3788/*---------------------------------------------------------------------------- 3789| Returns the result of converting the extended double-precision floating- 3790| point value `a' to the 16-bit two's complement integer format. The 3791| conversion is performed according to the IEC/IEEE Standard for Binary 3792| Floating-Point Arithmetic - which means in particular that the conversion 3793| is rounded according to the current rounding mode. If `a' is a NaN or the 3794| conversion overflows, the integer indefinite value is returned. 3795*----------------------------------------------------------------------------*/ 3796 3797#if cIncludeFPUUnused 3798LOCALFUNC Bit16s floatx80_to_int16(floatx80 a) 3799{ 3800#if 0 3801 if (floatx80_is_unsupported(a)) 3802 { 3803 float_raise(float_flag_invalid); 3804 return int16_indefinite; 3805 } 3806#endif 3807 3808 Bit32s v32 = floatx80_to_int32(a); 3809 3810 if ((v32 > 32767) || (v32 < -32768)) { 3811 float_exception_flags = float_flag_invalid; // throw way other flags 3812 return int16_indefinite; 3813 } 3814 3815 return (Bit16s) v32; 3816} 3817#endif 3818 3819/*---------------------------------------------------------------------------- 3820| Returns the result of converting the extended double-precision floating- 3821| point value `a' to the 16-bit two's complement integer format. The 3822| conversion is performed according to the IEC/IEEE Standard for Binary 3823| Floating-Point Arithmetic, except that the conversion is always rounded 3824| toward zero. If `a' is a NaN or the conversion overflows, the integer 3825| indefinite value is returned. 3826*----------------------------------------------------------------------------*/ 3827 3828#if cIncludeFPUUnused 3829LOCALFUNC Bit16s floatx80_to_int16_round_to_zero(floatx80 a) 3830{ 3831#if 0 3832 if (floatx80_is_unsupported(a)) 3833 { 3834 float_raise(float_flag_invalid); 3835 return int16_indefinite; 3836 } 3837#endif 3838 3839 Bit32s v32 = floatx80_to_int32_round_to_zero(a); 3840 3841 if ((v32 > 32767) || (v32 < -32768)) { 3842 float_exception_flags = float_flag_invalid; // throw way other flags 3843 return int16_indefinite; 3844 } 3845 3846 return (Bit16s) v32; 3847} 3848#endif 3849 3850/*---------------------------------------------------------------------------- 3851| Separate the source extended double-precision floating point value `a' 3852| into its exponent and significand, store the significant back to the 3853| 'a' and return the exponent. The operation performed is a superset of 3854| the IEC/IEEE recommended logb(x) function. 3855*----------------------------------------------------------------------------*/ 3856 3857LOCALFUNC floatx80 floatx80_extract(floatx80 *a) 3858{ 3859 Bit64u aSig = extractFloatx80Frac(*a); 3860 Bit32s aExp = extractFloatx80Exp(*a); 3861 int aSign = extractFloatx80Sign(*a); 3862 3863#if 0 3864 if (floatx80_is_unsupported(*a)) 3865 { 3866 float_raise(float_flag_invalid); 3867 *a = floatx80_default_nan; 3868 return *a; 3869 } 3870#endif 3871 3872 if (aExp == 0x7FFF) { 3873 if ((Bit64u) (aSig<<1)) 3874 { 3875 *a = propagateOneFloatx80NaN(a); 3876 return *a; 3877 } 3878 return packFloatx80(0, 0x7FFF, LIT64(0x8000000000000000)); 3879 } 3880 if (aExp == 0) 3881 { 3882 if (aSig == 0) { 3883 float_raise(float_flag_divbyzero); 3884 *a = packFloatx80(aSign, 0, 0); 3885 return packFloatx80(1, 0x7FFF, LIT64(0x8000000000000000)); 3886 } 3887 float_raise(float_flag_denormal); 3888 normalizeFloatx80Subnormal(aSig, &aExp, &aSig); 3889 } 3890 3891 a->high = (aSign << 15) + 0x3FFF; 3892 a->low = aSig; 3893 return int32_to_floatx80(aExp - 0x3FFF); 3894} 3895 3896/*---------------------------------------------------------------------------- 3897| Scales extended double-precision floating-point value in operand `a' by 3898| value `b'. The function truncates the value in the second operand 'b' to 3899| an integral value and adds that value to the exponent of the operand 'a'. 3900| The operation performed according to the IEC/IEEE Standard for Binary 3901| Floating-Point Arithmetic. 3902*----------------------------------------------------------------------------*/ 3903 3904LOCALFUNC floatx80 floatx80_scale(floatx80 a, floatx80 b) 3905{ 3906 int shiftCount; 3907 Bit32s scale; 3908 3909 // handle unsupported extended double-precision floating encodings 3910#if 0 3911 if (floatx80_is_unsupported(a) || floatx80_is_unsupported(b)) 3912 { 3913 float_raise(float_flag_invalid); 3914 return floatx80_default_nan; 3915 } 3916#endif 3917 3918 Bit64u aSig = extractFloatx80Frac(a); 3919 Bit32s aExp = extractFloatx80Exp(a); 3920 int aSign = extractFloatx80Sign(a); 3921 Bit64u bSig = extractFloatx80Frac(b); 3922 Bit32s bExp = extractFloatx80Exp(b); 3923 int bSign = extractFloatx80Sign(b); 3924 3925 if (aExp == 0x7FFF) { 3926 if ((Bit64u) (aSig<<1) || ((bExp == 0x7FFF) && (Bit64u) (bSig<<1))) 3927 { 3928 return propagateFloatx80NaN(a, b); 3929 } 3930 if ((bExp == 0x7FFF) && bSign) { 3931 float_raise(float_flag_invalid); 3932 return floatx80_default_nan; 3933 } 3934 if (bSig && (bExp == 0)) float_raise(float_flag_denormal); 3935 return a; 3936 } 3937 if (bExp == 0x7FFF) { 3938 if ((Bit64u) (bSig<<1)) return propagateFloatx80NaN(a, b); 3939 if ((aExp | aSig) == 0) { 3940 if (! bSign) { 3941 float_raise(float_flag_invalid); 3942 return floatx80_default_nan; 3943 } 3944 return a; 3945 } 3946 if (aSig && (aExp == 0)) float_raise(float_flag_denormal); 3947 if (bSign) return packFloatx80(aSign, 0, 0); 3948 return packFloatx80(aSign, 0x7FFF, LIT64(0x8000000000000000)); 3949 } 3950 if (aExp == 0) { 3951 if (bSig && (bExp == 0)) float_raise(float_flag_denormal); 3952 if (aSig == 0) return a; 3953 float_raise(float_flag_denormal); 3954 normalizeFloatx80Subnormal(aSig, &aExp, &aSig); 3955 if (bExp < 0x3FFF) 3956 return normalizeRoundAndPackFloatx80(80, aSign, aExp, aSig, 0); 3957 } 3958 if (bExp == 0) { 3959 if (bSig == 0) return a; 3960 float_raise(float_flag_denormal); 3961 normalizeFloatx80Subnormal(bSig, &bExp, &bSig); 3962 } 3963 3964 if (bExp > 0x400E) { 3965 /* generate appropriate overflow/underflow */ 3966 return roundAndPackFloatx80(80, aSign, 3967 bSign ? -0x3FFF : 0x7FFF, aSig, 0); 3968 } 3969 3970 if (bExp < 0x3FFF) return a; 3971 3972 shiftCount = 0x403E - bExp; 3973 bSig >>= shiftCount; 3974 scale = (Bit32s) bSig; 3975 if (bSign) scale = -scale; /* -32768..32767 */ 3976 return 3977 roundAndPackFloatx80(80, aSign, aExp+scale, aSig, 0); 3978} 3979 3980/*---------------------------------------------------------------------------- 3981| Determine extended-precision floating-point number class. 3982*----------------------------------------------------------------------------*/ 3983 3984#if cIncludeFPUUnused 3985LOCALFUNC float_class_t floatx80_class(floatx80 a) 3986{ 3987 Bit32s aExp = extractFloatx80Exp(a); 3988 Bit64u aSig = extractFloatx80Frac(a); 3989 3990 if(aExp == 0) { 3991 if (aSig == 0) 3992 return float_zero; 3993 3994 /* denormal or pseudo-denormal */ 3995 return float_denormal; 3996 } 3997 3998 /* valid numbers have the MS bit set */ 3999 if (!(aSig & LIT64(0x8000000000000000))) 4000 return float_NaN; /* report unsupported as NaNs */ 4001 4002 if(aExp == 0x7fff) { 4003 int aSign = extractFloatx80Sign(a); 4004 4005 if (((Bit64u) (aSig<< 1)) == 0) 4006 return (aSign) ? float_negative_inf : float_positive_inf; 4007 4008 return float_NaN; 4009 } 4010 4011 return float_normalized; 4012} 4013#endif 4014 4015/*---------------------------------------------------------------------------- 4016| Compare between two extended precision floating point numbers. Returns 4017| 'float_relation_equal' if the operands are equal, 'float_relation_less' if 4018| the value 'a' is less than the corresponding value `b', 4019| 'float_relation_greater' if the value 'a' is greater than the corresponding 4020| value `b', or 'float_relation_unordered' otherwise. 4021*----------------------------------------------------------------------------*/ 4022 4023#if cIncludeFPUUnused 4024LOCALFUNC int floatx80_compare(floatx80 a, floatx80 b) 4025{ 4026 int aSign; 4027 int bSign; 4028 Bit64u aSig; 4029 Bit32s aExp; 4030 Bit64u bSig; 4031 Bit32s bExp; 4032 int less_than; 4033 float_class_t aClass = floatx80_class(a); 4034 float_class_t bClass = floatx80_class(b); 4035 4036 if (aClass == float_NaN || bClass == float_NaN) 4037 { 4038 float_raise(float_flag_invalid); 4039 return float_relation_unordered; 4040 } 4041 4042 if (aClass == float_denormal || bClass == float_denormal) 4043 { 4044 float_raise(float_flag_denormal); 4045 } 4046 4047 aSign = extractFloatx80Sign(a); 4048 bSign = extractFloatx80Sign(b); 4049 4050 if (aClass == float_zero) { 4051 if (bClass == float_zero) return float_relation_equal; 4052 return bSign ? float_relation_greater : float_relation_less; 4053 } 4054 4055 if (bClass == float_zero || aSign != bSign) { 4056 return aSign ? float_relation_less : float_relation_greater; 4057 } 4058 4059 aSig = extractFloatx80Frac(a); 4060 aExp = extractFloatx80Exp(a); 4061 bSig = extractFloatx80Frac(b); 4062 bExp = extractFloatx80Exp(b); 4063 4064 if (aClass == float_denormal) 4065 normalizeFloatx80Subnormal(aSig, &aExp, &aSig); 4066 4067 if (bClass == float_denormal) 4068 normalizeFloatx80Subnormal(bSig, &bExp, &bSig); 4069 4070 if (aExp == bExp && aSig == bSig) 4071 return float_relation_equal; 4072 4073 less_than = 4074 aSign ? ((bExp < aExp) || ((bExp == aExp) && (bSig < aSig))) 4075 : ((aExp < bExp) || ((aExp == bExp) && (aSig < bSig))); 4076 4077 if (less_than) return float_relation_less; 4078 4079 return float_relation_greater; 4080} 4081#endif 4082 4083/*---------------------------------------------------------------------------- 4084| Compare between two extended precision floating point numbers. Returns 4085| 'float_relation_equal' if the operands are equal, 'float_relation_less' if 4086| the value 'a' is less than the corresponding value `b', 4087| 'float_relation_greater' if the value 'a' is greater than the corresponding 4088| value `b', or 'float_relation_unordered' otherwise. Quiet NaNs do not cause 4089| an exception. 4090*----------------------------------------------------------------------------*/ 4091 4092#if cIncludeFPUUnused 4093LOCALFUNC int floatx80_compare_quiet(floatx80 a, floatx80 b) 4094{ 4095 int aSign; 4096 int bSign; 4097 Bit64u aSig; 4098 Bit32s aExp; 4099 Bit64u bSig; 4100 Bit32s bExp; 4101 int less_than; 4102 float_class_t aClass = floatx80_class(a); 4103 float_class_t bClass = floatx80_class(b); 4104 4105 if (aClass == float_NaN || bClass == float_NaN) 4106 { 4107#if 0 4108 if (floatx80_is_unsupported(a) || floatx80_is_unsupported(b)) 4109 float_raise(float_flag_invalid); 4110#endif 4111 4112 if (floatx80_is_signaling_nan(a) || floatx80_is_signaling_nan(b)) 4113 float_raise(float_flag_invalid); 4114 4115 return float_relation_unordered; 4116 } 4117 4118 if (aClass == float_denormal || bClass == float_denormal) 4119 { 4120 float_raise(float_flag_denormal); 4121 } 4122 4123 aSign = extractFloatx80Sign(a); 4124 bSign = extractFloatx80Sign(b); 4125 4126 if (aClass == float_zero) { 4127 if (bClass == float_zero) return float_relation_equal; 4128 return bSign ? float_relation_greater : float_relation_less; 4129 } 4130 4131 if (bClass == float_zero || aSign != bSign) { 4132 return aSign ? float_relation_less : float_relation_greater; 4133 } 4134 4135 aSig = extractFloatx80Frac(a); 4136 aExp = extractFloatx80Exp(a); 4137 bSig = extractFloatx80Frac(b); 4138 bExp = extractFloatx80Exp(b); 4139 4140 if (aClass == float_denormal) 4141 normalizeFloatx80Subnormal(aSig, &aExp, &aSig); 4142 4143 if (bClass == float_denormal) 4144 normalizeFloatx80Subnormal(bSig, &bExp, &bSig); 4145 4146 if (aExp == bExp && aSig == bSig) 4147 return float_relation_equal; 4148 4149 less_than = 4150 aSign ? ((bExp < aExp) || ((bExp == aExp) && (bSig < aSig))) 4151 : ((aExp < bExp) || ((aExp == bExp) && (aSig < bSig))); 4152 4153 if (less_than) return float_relation_less; 4154 return float_relation_greater; 4155} 4156#endif 4157 4158/* ----- end from original file "softfloatx80.cc" ----- */ 4159 4160/* ----- from original file "fprem.cc" ----- */ 4161 4162/* 4163 ["original Stanislav Shwartsman Copyright notice" went here, included near top of this file.] 4164*/ 4165 4166#define USE_estimateDiv128To64 4167 4168/* executes single exponent reduction cycle */ 4169LOCALFUNC Bit64u remainder_kernel(Bit64u aSig0, Bit64u bSig, int expDiff, Bit64u *zSig0, Bit64u *zSig1) 4170{ 4171 Bit64u term0, term1; 4172 Bit64u aSig1 = 0; 4173 Bit64u q; 4174 4175 shortShift128Left(aSig1, aSig0, expDiff, &aSig1, &aSig0); 4176 q = estimateDiv128To64(aSig1, aSig0, bSig); 4177 mul64To128(bSig, q, &term0, &term1); 4178 sub128(aSig1, aSig0, term0, term1, zSig1, zSig0); 4179 while ((Bit64s)(*zSig1) < 0) { 4180 --q; 4181 add128(*zSig1, *zSig0, 0, bSig, zSig1, zSig0); 4182 } 4183 return q; 4184} 4185 4186LOCALFUNC floatx80 do_fprem(floatx80 a, floatx80 b, Bit64u *q, int rounding_mode) 4187{ 4188 Bit32s aExp, bExp, zExp, expDiff; 4189 Bit64u aSig0, aSig1, bSig; 4190 int aSign; 4191 *q = 0; 4192 4193#if 0 4194 // handle unsupported extended double-precision floating encodings 4195 if (floatx80_is_unsupported(a) || floatx80_is_unsupported(b)) 4196 { 4197 float_raise(float_flag_invalid); 4198 return floatx80_default_nan; 4199 } 4200#endif 4201 4202 aSig0 = extractFloatx80Frac(a); 4203 aExp = extractFloatx80Exp(a); 4204 aSign = extractFloatx80Sign(a); 4205 bSig = extractFloatx80Frac(b); 4206 bExp = extractFloatx80Exp(b); 4207 4208 if (aExp == 0x7FFF) { 4209 if ((Bit64u) (aSig0<<1) || ((bExp == 0x7FFF) && (Bit64u) (bSig<<1))) { 4210 return propagateFloatx80NaN(a, b); 4211 } 4212 float_raise(float_flag_invalid); 4213 return floatx80_default_nan; 4214 } 4215 if (bExp == 0x7FFF) { 4216 if ((Bit64u) (bSig<<1)) return propagateFloatx80NaN(a, b); 4217 if (aExp == 0 && aSig0) { 4218 float_raise(float_flag_denormal); 4219 normalizeFloatx80Subnormal(aSig0, &aExp, &aSig0); 4220 return (a.low & LIT64(0x8000000000000000)) ? 4221 packFloatx80(aSign, aExp, aSig0) : a; 4222 } 4223 return a; 4224 } 4225 if (bExp == 0) { 4226 if (bSig == 0) { 4227 float_raise(float_flag_invalid); 4228 return floatx80_default_nan; 4229 } 4230 float_raise(float_flag_denormal); 4231 normalizeFloatx80Subnormal(bSig, &bExp, &bSig); 4232 } 4233 if (aExp == 0) { 4234 if (aSig0 == 0) return a; 4235 float_raise(float_flag_denormal); 4236 normalizeFloatx80Subnormal(aSig0, &aExp, &aSig0); 4237 } 4238 expDiff = aExp - bExp; 4239 aSig1 = 0; 4240 4241 if (expDiff >= 64) { 4242 int n = (expDiff & 0x1f) | 0x20; 4243 remainder_kernel(aSig0, bSig, n, &aSig0, &aSig1); 4244 zExp = aExp - n; 4245 *q = (Bit64u) -1; 4246 } 4247 else { 4248 zExp = bExp; 4249 4250 if (expDiff < 0) { 4251 if (expDiff < -1) 4252 return (a.low & LIT64(0x8000000000000000)) ? 4253 packFloatx80(aSign, aExp, aSig0) : a; 4254 shift128Right(aSig0, 0, 1, &aSig0, &aSig1); 4255 expDiff = 0; 4256 } 4257 4258 if (expDiff > 0) { 4259 *q = remainder_kernel(aSig0, bSig, expDiff, &aSig0, &aSig1); 4260 } 4261 else { 4262 if (bSig <= aSig0) { 4263 aSig0 -= bSig; 4264 *q = 1; 4265 } 4266 } 4267 4268 if (rounding_mode == float_round_nearest_even) 4269 { 4270 Bit64u term0, term1; 4271 shift128Right(bSig, 0, 1, &term0, &term1); 4272 4273 if (! lt128(aSig0, aSig1, term0, term1)) 4274 { 4275 int lt = lt128(term0, term1, aSig0, aSig1); 4276 int eq = eq128(aSig0, aSig1, term0, term1); 4277 4278 if ((eq && (*q & 1)) || lt) { 4279 aSign = !aSign; 4280 ++*q; 4281 } 4282 if (lt) sub128(bSig, 0, aSig0, aSig1, &aSig0, &aSig1); 4283 } 4284 } 4285 } 4286 4287 return normalizeRoundAndPackFloatx80(80, aSign, zExp, aSig0, aSig1); 4288} 4289 4290/*---------------------------------------------------------------------------- 4291| Returns the remainder of the extended double-precision floating-point value 4292| `a' with respect to the corresponding value `b'. The operation is performed 4293| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 4294*----------------------------------------------------------------------------*/ 4295 4296#if cIncludeFPUUnused 4297LOCALFUNC floatx80 floatx80_ieee754_remainder(floatx80 a, floatx80 b, Bit64u *q) 4298{ 4299 return do_fprem(a, b, q, float_round_nearest_even); 4300} 4301#endif 4302 4303/*---------------------------------------------------------------------------- 4304| Returns the remainder of the extended double-precision floating-point value 4305| `a' with respect to the corresponding value `b'. Unlike previous function 4306| the function does not compute the remainder specified in the IEC/IEEE 4307| Standard for Binary Floating-Point Arithmetic. This function operates 4308| differently from the previous function in the way that it rounds the 4309| quotient of 'a' divided by 'b' to an integer. 4310*----------------------------------------------------------------------------*/ 4311 4312LOCALFUNC floatx80 floatx80_remainder(floatx80 a, floatx80 b, Bit64u *q) 4313{ 4314 return do_fprem(a, b, q, float_round_to_zero); 4315} 4316 4317/* ----- end from original file "fprem.cc" ----- */ 4318 4319/* ----- from original file "fpu_constant.h" ----- */ 4320 4321/* 4322 ["original Stanislav Shwartsman Copyright notice" went here, included near top of this file.] 4323*/ 4324 4325// Pentium CPU uses only 68-bit precision M_PI approximation 4326// #define BETTER_THAN_PENTIUM 4327 4328////////////////////////////// 4329// PI, PI/2, PI/4 constants 4330////////////////////////////// 4331 4332#define FLOATX80_PI_EXP (0x4000) 4333 4334// 128-bit PI fraction 4335#ifdef BETTER_THAN_PENTIUM 4336#define FLOAT_PI_HI (LIT64(0xc90fdaa22168c234)) 4337#define FLOAT_PI_LO (LIT64(0xc4c6628b80dc1cd1)) 4338#else 4339#define FLOAT_PI_HI (LIT64(0xc90fdaa22168c234)) 4340#define FLOAT_PI_LO (LIT64(0xC000000000000000)) 4341#endif 4342 4343#define FLOATX80_PI2_EXP (0x3FFF) 4344#define FLOATX80_PI4_EXP (0x3FFE) 4345 4346////////////////////////////// 4347// 3PI/4 constant 4348////////////////////////////// 4349 4350#define FLOATX80_3PI4_EXP (0x4000) 4351 4352// 128-bit 3PI/4 fraction 4353#ifdef BETTER_THAN_PENTIUM 4354#define FLOAT_3PI4_HI (LIT64(0x96cbe3f9990e91a7)) 4355#define FLOAT_3PI4_LO (LIT64(0x9394c9e8a0a5159c)) 4356#else 4357#define FLOAT_3PI4_HI (LIT64(0x96cbe3f9990e91a7)) 4358#define FLOAT_3PI4_LO (LIT64(0x9000000000000000)) 4359#endif 4360 4361////////////////////////////// 4362// 1/LN2 constant 4363////////////////////////////// 4364 4365#define FLOAT_LN2INV_EXP (0x3FFF) 4366 4367// 128-bit 1/LN2 fraction 4368#ifdef BETTER_THAN_PENTIUM 4369#define FLOAT_LN2INV_HI (LIT64(0xb8aa3b295c17f0bb)) 4370#define FLOAT_LN2INV_LO (LIT64(0xbe87fed0691d3e89)) 4371#else 4372#define FLOAT_LN2INV_HI (LIT64(0xb8aa3b295c17f0bb)) 4373#define FLOAT_LN2INV_LO (LIT64(0xC000000000000000)) 4374#endif 4375 4376/* ----- end from original file "fpu_constant.h" ----- */ 4377 4378/* ----- from original file "poly.cc" ----- */ 4379 4380/* 4381 ["original Stanislav Shwartsman Copyright notice" went here, included near top of this file.] 4382*/ 4383 4384// 2 3 4 n 4385// f(x) ~ C + (C * x) + (C * x) + (C * x) + (C * x) + ... + (C * x) 4386// 0 1 2 3 4 n 4387// 4388// -- 2k -- 2k+1 4389// p(x) = > C * x q(x) = > C * x 4390// -- 2k -- 2k+1 4391// 4392// f(x) ~ [ p(x) + x * q(x) ] 4393// 4394 4395LOCALFUNC float128 EvalPoly(float128 x, float128 *arr, unsigned n) 4396{ 4397 float128 r2; 4398 float128 x2 = float128_mul(x, x); 4399 unsigned i; 4400 4401#if 0 4402 assert(n > 1); 4403#endif 4404 4405 float128 r1 = arr[--n]; 4406 i = n; 4407 while(i >= 2) { 4408 r1 = float128_mul(r1, x2); 4409 i -= 2; 4410 r1 = float128_add(r1, arr[i]); 4411 } 4412 if (i) r1 = float128_mul(r1, x); 4413 4414 r2 = arr[--n]; 4415 i = n; 4416 while(i >= 2) { 4417 r2 = float128_mul(r2, x2); 4418 i -= 2; 4419 r2 = float128_add(r2, arr[i]); 4420 } 4421 if (i) r2 = float128_mul(r2, x); 4422 4423 return float128_add(r1, r2); 4424} 4425 4426// 2 4 6 8 2n 4427// f(x) ~ C + (C * x) + (C * x) + (C * x) + (C * x) + ... + (C * x) 4428// 0 1 2 3 4 n 4429// 4430// -- 4k -- 4k+2 4431// p(x) = > C * x q(x) = > C * x 4432// -- 2k -- 2k+1 4433// 4434// 2 4435// f(x) ~ [ p(x) + x * q(x) ] 4436// 4437 4438LOCALFUNC float128 EvenPoly(float128 x, float128 *arr, unsigned n) 4439{ 4440 return EvalPoly(float128_mul(x, x), arr, n); 4441} 4442 4443// 3 5 7 9 2n+1 4444// f(x) ~ (C * x) + (C * x) + (C * x) + (C * x) + (C * x) + ... + (C * x) 4445// 0 1 2 3 4 n 4446// 2 4 6 8 2n 4447// = x * [ C + (C * x) + (C * x) + (C * x) + (C * x) + ... + (C * x) 4448// 0 1 2 3 4 n 4449// 4450// -- 4k -- 4k+2 4451// p(x) = > C * x q(x) = > C * x 4452// -- 2k -- 2k+1 4453// 4454// 2 4455// f(x) ~ x * [ p(x) + x * q(x) ] 4456// 4457 4458LOCALFUNC float128 OddPoly(float128 x, float128 *arr, unsigned n) 4459{ 4460 return float128_mul(x, EvenPoly(x, arr, n)); 4461} 4462 4463/* ----- end from original file "poly.cc" ----- */ 4464 4465/* ----- from original file "fyl2x.cc" ----- */ 4466 4467/* 4468 ["original Stanislav Shwartsman Copyright notice" went here, included near top of this file.] 4469*/ 4470 4471static const floatx80 floatx80_one = 4472 packFloatx80m(0, 0x3fff, LIT64(0x8000000000000000)); 4473 4474static const float128 float128_one = 4475 packFloat2x128m(LIT64(0x3fff000000000000), LIT64(0x0000000000000000)); 4476static const float128 float128_two = 4477 packFloat2x128m(LIT64(0x4000000000000000), LIT64(0x0000000000000000)); 4478 4479static const float128 float128_ln2inv2 = 4480 packFloat2x128m(LIT64(0x400071547652b82f), LIT64(0xe1777d0ffda0d23a)); 4481 4482#define SQRT2_HALF_SIG LIT64(0xb504f333f9de6484) 4483 4484#define L2_ARR_SIZE 9 4485 4486static float128 ln_arr[L2_ARR_SIZE] = 4487{ 4488 PACK_FLOAT_128(0x3fff000000000000, 0x0000000000000000), /* 1 */ 4489 PACK_FLOAT_128(0x3ffd555555555555, 0x5555555555555555), /* 3 */ 4490 PACK_FLOAT_128(0x3ffc999999999999, 0x999999999999999a), /* 5 */ 4491 PACK_FLOAT_128(0x3ffc249249249249, 0x2492492492492492), /* 7 */ 4492 PACK_FLOAT_128(0x3ffbc71c71c71c71, 0xc71c71c71c71c71c), /* 9 */ 4493 PACK_FLOAT_128(0x3ffb745d1745d174, 0x5d1745d1745d1746), /* 11 */ 4494 PACK_FLOAT_128(0x3ffb3b13b13b13b1, 0x3b13b13b13b13b14), /* 13 */ 4495 PACK_FLOAT_128(0x3ffb111111111111, 0x1111111111111111), /* 15 */ 4496 PACK_FLOAT_128(0x3ffae1e1e1e1e1e1, 0xe1e1e1e1e1e1e1e2) /* 17 */ 4497}; 4498 4499LOCALFUNC float128 poly_ln(float128 x1) 4500{ 4501/* 4502 // 4503 // 3 5 7 9 11 13 15 4504 // 1+u u u u u u u u 4505 // 1/2 ln --- ~ u + --- + --- + --- + --- + ---- + ---- + ---- = 4506 // 1-u 3 5 7 9 11 13 15 4507 // 4508 // 2 4 6 8 10 12 14 4509 // u u u u u u u 4510 // = u * [ 1 + --- + --- + --- + --- + ---- + ---- + ---- ] = 4511 // 3 5 7 9 11 13 15 4512 // 4513 // 3 3 4514 // -- 4k -- 4k+2 4515 // p(u) = > C * u q(u) = > C * u 4516 // -- 2k -- 2k+1 4517 // k=0 k=0 4518 // 4519 // 1+u 2 4520 // 1/2 ln --- ~ u * [ p(u) + u * q(u) ] 4521 // 1-u 4522 // 4523*/ 4524 return OddPoly(x1, ln_arr, L2_ARR_SIZE); 4525} 4526 4527/* required sqrt(2)/2 < x < sqrt(2) */ 4528LOCALFUNC float128 poly_l2(float128 x) 4529{ 4530 /* using float128 for approximation */ 4531 float128 x_p1 = float128_add(x, float128_one); 4532 float128 x_m1 = float128_sub(x, float128_one); 4533 x = float128_div(x_m1, x_p1); 4534 x = poly_ln(x); 4535 x = float128_mul(x, float128_ln2inv2); 4536 return x; 4537} 4538 4539LOCALFUNC float128 poly_l2p1(float128 x) 4540{ 4541 /* using float128 for approximation */ 4542 float128 x_p2 = float128_add(x, float128_two); 4543 x = float128_div(x, x_p2); 4544 x = poly_ln(x); 4545 x = float128_mul(x, float128_ln2inv2); 4546 return x; 4547} 4548 4549// ================================================= 4550// FYL2X Compute y * log (x) 4551// 2 4552// ================================================= 4553 4554// 4555// Uses the following identities: 4556// 4557// 1. ---------------------------------------------------------- 4558// ln(x) 4559// log (x) = -------, ln (x*y) = ln(x) + ln(y) 4560// 2 ln(2) 4561// 4562// 2. ---------------------------------------------------------- 4563// 1+u x-1 4564// ln (x) = ln -----, when u = ----- 4565// 1-u x+1 4566// 4567// 3. ---------------------------------------------------------- 4568// 3 5 7 2n+1 4569// 1+u u u u u 4570// ln ----- = 2 [ u + --- + --- + --- + ... + ------ + ... ] 4571// 1-u 3 5 7 2n+1 4572// 4573 4574LOCALFUNC floatx80 fyl2x(floatx80 a, floatx80 b) 4575{ 4576 int aSign; 4577 int bSign; 4578 Bit64u aSig; 4579 Bit32s aExp; 4580 Bit64u bSig; 4581 Bit32s bExp; 4582 int zSign; 4583 int ExpDiff; 4584 Bit64u zSig0, zSig1; 4585 float128 x; 4586 4587 // handle unsupported extended double-precision floating encodings 4588 if (0 /* floatx80_is_unsupported(a) */) { 4589invalid: 4590 float_raise(float_flag_invalid); 4591 return floatx80_default_nan; 4592 } 4593 4594 aSig = extractFloatx80Frac(a); 4595 aExp = extractFloatx80Exp(a); 4596 aSign = extractFloatx80Sign(a); 4597 bSig = extractFloatx80Frac(b); 4598 bExp = extractFloatx80Exp(b); 4599 bSign = extractFloatx80Sign(b); 4600 4601 zSign = bSign ^ 1; 4602 4603 if (aExp == 0x7FFF) { 4604 if ((Bit64u) (aSig<<1) 4605 || ((bExp == 0x7FFF) && (Bit64u) (bSig<<1))) 4606 { 4607 return propagateFloatx80NaN(a, b); 4608 } 4609 if (aSign) goto invalid; 4610 else { 4611 if (bExp == 0) { 4612 if (bSig == 0) goto invalid; 4613 float_raise(float_flag_denormal); 4614 } 4615 return packFloatx80(bSign, 0x7FFF, LIT64(0x8000000000000000)); 4616 } 4617 } 4618 if (bExp == 0x7FFF) 4619 { 4620 if ((Bit64u) (bSig<<1)) return propagateFloatx80NaN(a, b); 4621 if (aSign && (Bit64u)(aExp | aSig)) goto invalid; 4622 if (aSig && (aExp == 0)) 4623 float_raise(float_flag_denormal); 4624 if (aExp < 0x3FFF) { 4625 return packFloatx80(zSign, 0x7FFF, LIT64(0x8000000000000000)); 4626 } 4627 if (aExp == 0x3FFF && ((Bit64u) (aSig<<1) == 0)) goto invalid; 4628 return packFloatx80(bSign, 0x7FFF, LIT64(0x8000000000000000)); 4629 } 4630 if (aExp == 0) { 4631 if (aSig == 0) { 4632 if ((bExp | bSig) == 0) goto invalid; 4633 float_raise(float_flag_divbyzero); 4634 return packFloatx80(zSign, 0x7FFF, LIT64(0x8000000000000000)); 4635 } 4636 if (aSign) goto invalid; 4637 float_raise(float_flag_denormal); 4638 normalizeFloatx80Subnormal(aSig, &aExp, &aSig); 4639 } 4640 if (aSign) goto invalid; 4641 if (bExp == 0) { 4642 if (bSig == 0) { 4643 if (aExp < 0x3FFF) return packFloatx80(zSign, 0, 0); 4644 return packFloatx80(bSign, 0, 0); 4645 } 4646 float_raise(float_flag_denormal); 4647 normalizeFloatx80Subnormal(bSig, &bExp, &bSig); 4648 } 4649 if (aExp == 0x3FFF && ((Bit64u) (aSig<<1) == 0)) 4650 return packFloatx80(bSign, 0, 0); 4651 4652 float_raise(float_flag_inexact); 4653 4654 ExpDiff = aExp - 0x3FFF; 4655 aExp = 0; 4656 if (aSig >= SQRT2_HALF_SIG) { 4657 ExpDiff++; 4658 aExp--; 4659 } 4660 4661 /* ******************************** */ 4662 /* using float128 for approximation */ 4663 /* ******************************** */ 4664 4665 shift128Right(aSig<<1, 0, 16, &zSig0, &zSig1); 4666 x = packFloat128(0, aExp+0x3FFF, zSig0, zSig1); 4667 x = poly_l2(x); 4668 x = float128_add(x, floatx80_to_float128(int32_to_floatx80(ExpDiff))); 4669 return floatx80_mul128(b, x); 4670} 4671 4672// ================================================= 4673// FYL2XP1 Compute y * log (x + 1) 4674// 2 4675// ================================================= 4676 4677// 4678// Uses the following identities: 4679// 4680// 1. ---------------------------------------------------------- 4681// ln(x) 4682// log (x) = ------- 4683// 2 ln(2) 4684// 4685// 2. ---------------------------------------------------------- 4686// 1+u x 4687// ln (x+1) = ln -----, when u = ----- 4688// 1-u x+2 4689// 4690// 3. ---------------------------------------------------------- 4691// 3 5 7 2n+1 4692// 1+u u u u u 4693// ln ----- = 2 [ u + --- + --- + --- + ... + ------ + ... ] 4694// 1-u 3 5 7 2n+1 4695// 4696 4697LOCALFUNC floatx80 fyl2xp1(floatx80 a, floatx80 b) 4698{ 4699 Bit32s aExp, bExp; 4700 Bit64u aSig, bSig, zSig0, zSig1, zSig2; 4701 int aSign, bSign; 4702 int zSign; 4703 float128 x; 4704 4705 // handle unsupported extended double-precision floating encodings 4706 if (0 /* floatx80_is_unsupported(a) */) { 4707invalid: 4708 float_raise(float_flag_invalid); 4709 return floatx80_default_nan; 4710 } 4711 4712 aSig = extractFloatx80Frac(a); 4713 aExp = extractFloatx80Exp(a); 4714 aSign = extractFloatx80Sign(a); 4715 bSig = extractFloatx80Frac(b); 4716 bExp = extractFloatx80Exp(b); 4717 bSign = extractFloatx80Sign(b); 4718 zSign = aSign ^ bSign; 4719 4720 if (aExp == 0x7FFF) { 4721 if ((Bit64u) (aSig<<1) 4722 || ((bExp == 0x7FFF) && (Bit64u) (bSig<<1))) 4723 { 4724 return propagateFloatx80NaN(a, b); 4725 } 4726 if (aSign) goto invalid; 4727 else { 4728 if (bExp == 0) { 4729 if (bSig == 0) goto invalid; 4730 float_raise(float_flag_denormal); 4731 } 4732 return packFloatx80(bSign, 0x7FFF, LIT64(0x8000000000000000)); 4733 } 4734 } 4735 if (bExp == 0x7FFF) 4736 { 4737 if ((Bit64u) (bSig<<1)) 4738 return propagateFloatx80NaN(a, b); 4739 4740 if (aExp == 0) { 4741 if (aSig == 0) goto invalid; 4742 float_raise(float_flag_denormal); 4743 } 4744 4745 return packFloatx80(zSign, 0x7FFF, LIT64(0x8000000000000000)); 4746 } 4747 if (aExp == 0) { 4748 if (aSig == 0) { 4749 if (bSig && (bExp == 0)) float_raise(float_flag_denormal); 4750 return packFloatx80(zSign, 0, 0); 4751 } 4752 float_raise(float_flag_denormal); 4753 normalizeFloatx80Subnormal(aSig, &aExp, &aSig); 4754 } 4755 if (bExp == 0) { 4756 if (bSig == 0) return packFloatx80(zSign, 0, 0); 4757 float_raise(float_flag_denormal); 4758 normalizeFloatx80Subnormal(bSig, &bExp, &bSig); 4759 } 4760 4761 float_raise(float_flag_inexact); 4762 4763 if (aSign && aExp >= 0x3FFF) 4764 return a; 4765 4766 if (aExp >= 0x3FFC) // big argument 4767 { 4768 return fyl2x(floatx80_add(a, floatx80_one), b); 4769 } 4770 4771 // handle tiny argument 4772 if (aExp < EXP_BIAS-70) 4773 { 4774 // first order approximation, return (a*b)/ln(2) 4775 Bit32s zExp = aExp + FLOAT_LN2INV_EXP - 0x3FFE; 4776 4777 mul128By64To192(FLOAT_LN2INV_HI, FLOAT_LN2INV_LO, aSig, &zSig0, &zSig1, &zSig2); 4778 if (0 < (Bit64s) zSig0) { 4779 shortShift128Left(zSig0, zSig1, 1, &zSig0, &zSig1); 4780 --zExp; 4781 } 4782 4783 zExp = zExp + bExp - 0x3FFE; 4784 mul128By64To192(zSig0, zSig1, bSig, &zSig0, &zSig1, &zSig2); 4785 if (0 < (Bit64s) zSig0) { 4786 shortShift128Left(zSig0, zSig1, 1, &zSig0, &zSig1); 4787 --zExp; 4788 } 4789 4790 return 4791 roundAndPackFloatx80(80, aSign ^ bSign, zExp, zSig0, zSig1); 4792 } 4793 4794 /* ******************************** */ 4795 /* using float128 for approximation */ 4796 /* ******************************** */ 4797 4798 shift128Right(aSig<<1, 0, 16, &zSig0, &zSig1); 4799 x = packFloat128(aSign, aExp, zSig0, zSig1); 4800 x = poly_l2p1(x); 4801 return floatx80_mul128(b, x); 4802} 4803 4804/* ----- end from original file "fyl2x.cc" ----- */ 4805 4806/* ----- from original file "f2xm1.cc" ----- */ 4807 4808/* 4809 ["original Stanislav Shwartsman Copyright notice" went here, included near top of this file.] 4810*/ 4811 4812static const floatx80 floatx80_negone = packFloatx80m(1, 0x3fff, LIT64(0x8000000000000000)); 4813static const floatx80 floatx80_neghalf = packFloatx80m(1, 0x3ffe, LIT64(0x8000000000000000)); 4814static const float128 float128_ln2 = 4815 packFloat2x128m(LIT64(0x3ffe62e42fefa39e), LIT64(0xf35793c7673007e6)); 4816 4817#define LN2_SIG LIT64(0xb17217f7d1cf79ac) 4818 4819#define EXP_ARR_SIZE 15 4820 4821static float128 exp_arr[EXP_ARR_SIZE] = 4822{ 4823 PACK_FLOAT_128(0x3fff000000000000, 0x0000000000000000), /* 1 */ 4824 PACK_FLOAT_128(0x3ffe000000000000, 0x0000000000000000), /* 2 */ 4825 PACK_FLOAT_128(0x3ffc555555555555, 0x5555555555555555), /* 3 */ 4826 PACK_FLOAT_128(0x3ffa555555555555, 0x5555555555555555), /* 4 */ 4827 PACK_FLOAT_128(0x3ff8111111111111, 0x1111111111111111), /* 5 */ 4828 PACK_FLOAT_128(0x3ff56c16c16c16c1, 0x6c16c16c16c16c17), /* 6 */ 4829 PACK_FLOAT_128(0x3ff2a01a01a01a01, 0xa01a01a01a01a01a), /* 7 */ 4830 PACK_FLOAT_128(0x3fefa01a01a01a01, 0xa01a01a01a01a01a), /* 8 */ 4831 PACK_FLOAT_128(0x3fec71de3a556c73, 0x38faac1c88e50017), /* 9 */ 4832 PACK_FLOAT_128(0x3fe927e4fb7789f5, 0xc72ef016d3ea6679), /* 10 */ 4833 PACK_FLOAT_128(0x3fe5ae64567f544e, 0x38fe747e4b837dc7), /* 11 */ 4834 PACK_FLOAT_128(0x3fe21eed8eff8d89, 0x7b544da987acfe85), /* 12 */ 4835 PACK_FLOAT_128(0x3fde6124613a86d0, 0x97ca38331d23af68), /* 13 */ 4836 PACK_FLOAT_128(0x3fda93974a8c07c9, 0xd20badf145dfa3e5), /* 14 */ 4837 PACK_FLOAT_128(0x3fd6ae7f3e733b81, 0xf11d8656b0ee8cb0) /* 15 */ 4838}; 4839 4840/* required -1 < x < 1 */ 4841LOCALFUNC float128 poly_exp(float128 x) 4842{ 4843/* 4844 // 2 3 4 5 6 7 8 9 4845 // x x x x x x x x x 4846 // e - 1 ~ x + --- + --- + --- + --- + --- + --- + --- + --- + ... 4847 // 2! 3! 4! 5! 6! 7! 8! 9! 4848 // 4849 // 2 3 4 5 6 7 8 4850 // x x x x x x x x 4851 // = x [ 1 + --- + --- + --- + --- + --- + --- + --- + --- + ... ] 4852 // 2! 3! 4! 5! 6! 7! 8! 9! 4853 // 4854 // 8 8 4855 // -- 2k -- 2k+1 4856 // p(x) = > C * x q(x) = > C * x 4857 // -- 2k -- 2k+1 4858 // k=0 k=0 4859 // 4860 // x 4861 // e - 1 ~ x * [ p(x) + x * q(x) ] 4862 // 4863*/ 4864 float128 t = EvalPoly(x, exp_arr, EXP_ARR_SIZE); 4865 return float128_mul(t, x); 4866} 4867 4868// ================================================= 4869// x 4870// FX2P1 Compute 2 - 1 4871// ================================================= 4872 4873// 4874// Uses the following identities: 4875// 4876// 1. ---------------------------------------------------------- 4877// x x*ln(2) 4878// 2 = e 4879// 4880// 2. ---------------------------------------------------------- 4881// 2 3 4 5 n 4882// x x x x x x x 4883// e = 1 + --- + --- + --- + --- + --- + ... + --- + ... 4884// 1! 2! 3! 4! 5! n! 4885// 4886 4887LOCALFUNC floatx80 f2xm1(floatx80 a) 4888{ 4889 Bit64u zSig0, zSig1; 4890 float128 x; 4891 4892#if 0 4893 // handle unsupported extended double-precision floating encodings 4894 if (floatx80_is_unsupported(a)) 4895 { 4896 float_raise(float_flag_invalid); 4897 return floatx80_default_nan; 4898 } 4899#endif 4900 4901 Bit64u aSig = extractFloatx80Frac(a); 4902 Bit32s aExp = extractFloatx80Exp(a); 4903 int aSign = extractFloatx80Sign(a); 4904 4905 if (aExp == 0x7FFF) { 4906 if ((Bit64u) (aSig<<1)) 4907 return propagateOneFloatx80NaN(&a); 4908 4909 return (aSign) ? floatx80_negone : a; 4910 } 4911 4912 if (aExp == 0) { 4913 if (aSig == 0) return a; 4914 float_raise(float_flag_denormal | float_flag_inexact); 4915 normalizeFloatx80Subnormal(aSig, &aExp, &aSig); 4916 4917 tiny_argument: 4918 mul64To128(aSig, LN2_SIG, &zSig0, &zSig1); 4919 if (0 < (Bit64s) zSig0) { 4920 shortShift128Left(zSig0, zSig1, 1, &zSig0, &zSig1); 4921 --aExp; 4922 } 4923 return 4924 roundAndPackFloatx80(80, aSign, aExp, zSig0, zSig1); 4925 } 4926 4927 float_raise(float_flag_inexact); 4928 4929 if (aExp < 0x3FFF) 4930 { 4931 if (aExp < EXP_BIAS-68) 4932 goto tiny_argument; 4933 4934 /* ******************************** */ 4935 /* using float128 for approximation */ 4936 /* ******************************** */ 4937 4938 x = floatx80_to_float128(a); 4939 x = float128_mul(x, float128_ln2); 4940 x = poly_exp(x); 4941 return float128_to_floatx80(x); 4942 } 4943 else 4944 { 4945 if ((a.high == 0xBFFF) && (! (aSig<<1))) 4946 return floatx80_neghalf; 4947 4948 return a; 4949 } 4950} 4951 4952/* ----- end from original file "f2xm1.cc" ----- */ 4953 4954/* ----- from original file "fsincos.cc" ----- */ 4955 4956/* 4957 ["original Stanislav Shwartsman Copyright notice" went here, included near top of this file.] 4958*/ 4959 4960#define USE_estimateDiv128To64 4961 4962#if 0 4963static const floatx80 floatx80_one = packFloatx80m(0, 0x3fff, LIT64(0x8000000000000000)); 4964#endif 4965 4966/* reduce trigonometric function argument using 128-bit precision 4967 M_PI approximation */ 4968LOCALFUNC Bit64u argument_reduction_kernel(Bit64u aSig0, int Exp, Bit64u *zSig0, Bit64u *zSig1) 4969{ 4970 Bit64u term0, term1, term2; 4971 Bit64u aSig1 = 0; 4972 Bit64u q; 4973 4974 shortShift128Left(aSig1, aSig0, Exp, &aSig1, &aSig0); 4975 q = estimateDiv128To64(aSig1, aSig0, FLOAT_PI_HI); 4976 mul128By64To192(FLOAT_PI_HI, FLOAT_PI_LO, q, &term0, &term1, &term2); 4977 sub128(aSig1, aSig0, term0, term1, zSig1, zSig0); 4978 while ((Bit64s)(*zSig1) < 0) { 4979 --q; 4980 add192(*zSig1, *zSig0, term2, 0, FLOAT_PI_HI, FLOAT_PI_LO, zSig1, zSig0, &term2); 4981 } 4982 *zSig1 = term2; 4983 return q; 4984} 4985 4986LOCALFUNC int reduce_trig_arg(int expDiff, int *zSign, Bit64u *aSig0, Bit64u *aSig1) 4987{ 4988 Bit64u term0, term1, q = 0; 4989 4990 if (expDiff < 0) { 4991 shift128Right(*aSig0, 0, 1, aSig0, aSig1); 4992 expDiff = 0; 4993 } 4994 if (expDiff > 0) { 4995 q = argument_reduction_kernel(*aSig0, expDiff, aSig0, aSig1); 4996 } 4997 else { 4998 if (FLOAT_PI_HI <= *aSig0) { 4999 *aSig0 -= FLOAT_PI_HI; 5000 q = 1; 5001 } 5002 } 5003 5004 shift128Right(FLOAT_PI_HI, FLOAT_PI_LO, 1, &term0, &term1); 5005 if (! lt128(*aSig0, *aSig1, term0, term1)) 5006 { 5007 int lt = lt128(term0, term1, *aSig0, *aSig1); 5008 int eq = eq128(*aSig0, *aSig1, term0, term1); 5009 5010 if ((eq && (q & 1)) || lt) { 5011 *zSign = !*zSign; 5012 ++q; 5013 } 5014 if (lt) sub128(FLOAT_PI_HI, FLOAT_PI_LO, *aSig0, *aSig1, aSig0, aSig1); 5015 } 5016 5017 return (int)(q & 3); 5018} 5019 5020#define SIN_ARR_SIZE 9 5021#define COS_ARR_SIZE 9 5022 5023static float128 sin_arr[SIN_ARR_SIZE] = 5024{ 5025 PACK_FLOAT_128(0x3fff000000000000, 0x0000000000000000), /* 1 */ 5026 PACK_FLOAT_128(0xbffc555555555555, 0x5555555555555555), /* 3 */ 5027 PACK_FLOAT_128(0x3ff8111111111111, 0x1111111111111111), /* 5 */ 5028 PACK_FLOAT_128(0xbff2a01a01a01a01, 0xa01a01a01a01a01a), /* 7 */ 5029 PACK_FLOAT_128(0x3fec71de3a556c73, 0x38faac1c88e50017), /* 9 */ 5030 PACK_FLOAT_128(0xbfe5ae64567f544e, 0x38fe747e4b837dc7), /* 11 */ 5031 PACK_FLOAT_128(0x3fde6124613a86d0, 0x97ca38331d23af68), /* 13 */ 5032 PACK_FLOAT_128(0xbfd6ae7f3e733b81, 0xf11d8656b0ee8cb0), /* 15 */ 5033 PACK_FLOAT_128(0x3fce952c77030ad4, 0xa6b2605197771b00) /* 17 */ 5034}; 5035 5036static float128 cos_arr[COS_ARR_SIZE] = 5037{ 5038 PACK_FLOAT_128(0x3fff000000000000, 0x0000000000000000), /* 0 */ 5039 PACK_FLOAT_128(0xbffe000000000000, 0x0000000000000000), /* 2 */ 5040 PACK_FLOAT_128(0x3ffa555555555555, 0x5555555555555555), /* 4 */ 5041 PACK_FLOAT_128(0xbff56c16c16c16c1, 0x6c16c16c16c16c17), /* 6 */ 5042 PACK_FLOAT_128(0x3fefa01a01a01a01, 0xa01a01a01a01a01a), /* 8 */ 5043 PACK_FLOAT_128(0xbfe927e4fb7789f5, 0xc72ef016d3ea6679), /* 10 */ 5044 PACK_FLOAT_128(0x3fe21eed8eff8d89, 0x7b544da987acfe85), /* 12 */ 5045 PACK_FLOAT_128(0xbfda93974a8c07c9, 0xd20badf145dfa3e5), /* 14 */ 5046 PACK_FLOAT_128(0x3fd2ae7f3e733b81, 0xf11d8656b0ee8cb0) /* 16 */ 5047}; 5048 5049/* 0 <= x <= pi/4 */ 5050LOCALINLINEFUNC float128 poly_sin(float128 x) 5051{ 5052 // 3 5 7 9 11 13 15 5053 // x x x x x x x 5054 // sin (x) ~ x - --- + --- - --- + --- - ---- + ---- - ---- = 5055 // 3! 5! 7! 9! 11! 13! 15! 5056 // 5057 // 2 4 6 8 10 12 14 5058 // x x x x x x x 5059 // = x * [ 1 - --- + --- - --- + --- - ---- + ---- - ---- ] = 5060 // 3! 5! 7! 9! 11! 13! 15! 5061 // 5062 // 3 3 5063 // -- 4k -- 4k+2 5064 // p(x) = > C * x > 0 q(x) = > C * x < 0 5065 // -- 2k -- 2k+1 5066 // k=0 k=0 5067 // 5068 // 2 5069 // sin(x) ~ x * [ p(x) + x * q(x) ] 5070 // 5071 5072 return OddPoly(x, sin_arr, SIN_ARR_SIZE); 5073} 5074 5075/* 0 <= x <= pi/4 */ 5076LOCALINLINEFUNC float128 poly_cos(float128 x) 5077{ 5078 // 2 4 6 8 10 12 14 5079 // x x x x x x x 5080 // cos (x) ~ 1 - --- + --- - --- + --- - ---- + ---- - ---- 5081 // 2! 4! 6! 8! 10! 12! 14! 5082 // 5083 // 3 3 5084 // -- 4k -- 4k+2 5085 // p(x) = > C * x > 0 q(x) = > C * x < 0 5086 // -- 2k -- 2k+1 5087 // k=0 k=0 5088 // 5089 // 2 5090 // cos(x) ~ [ p(x) + x * q(x) ] 5091 // 5092 5093 return EvenPoly(x, cos_arr, COS_ARR_SIZE); 5094} 5095 5096LOCALINLINEPROC sincos_invalid(floatx80 *sin_a, floatx80 *cos_a, floatx80 a) 5097{ 5098 if (sin_a) *sin_a = a; 5099 if (cos_a) *cos_a = a; 5100} 5101 5102LOCALINLINEPROC sincos_tiny_argument(floatx80 *sin_a, floatx80 *cos_a, floatx80 a) 5103{ 5104 if (sin_a) *sin_a = a; 5105 if (cos_a) *cos_a = floatx80_one; 5106} 5107 5108LOCALFUNC floatx80 sincos_approximation(int neg, float128 r, Bit64u quotient) 5109{ 5110 floatx80 result; 5111 5112 if (quotient & 0x1) { 5113 r = poly_cos(r); 5114 neg = 0; 5115 } else { 5116 r = poly_sin(r); 5117 } 5118 5119 result = float128_to_floatx80(r); 5120 if (quotient & 0x2) 5121 neg = ! neg; 5122 5123 if (neg) 5124 floatx80_chs(&result); 5125 5126 return result; 5127} 5128 5129// ================================================= 5130// FSINCOS Compute sin(x) and cos(x) 5131// ================================================= 5132 5133// 5134// Uses the following identities: 5135// ---------------------------------------------------------- 5136// 5137// sin(-x) = -sin(x) 5138// cos(-x) = cos(x) 5139// 5140// sin(x+y) = sin(x)*cos(y)+cos(x)*sin(y) 5141// cos(x+y) = sin(x)*sin(y)+cos(x)*cos(y) 5142// 5143// sin(x+ pi/2) = cos(x) 5144// sin(x+ pi) = -sin(x) 5145// sin(x+3pi/2) = -cos(x) 5146// sin(x+2pi) = sin(x) 5147// 5148 5149LOCALFUNC int fsincos(floatx80 a, floatx80 *sin_a, floatx80 *cos_a) 5150{ 5151 float128 r; 5152 Bit64u aSig0, aSig1 = 0; 5153 Bit32s aExp, zExp, expDiff; 5154 int aSign, zSign; 5155 int q = 0; 5156 5157#if 0 5158 // handle unsupported extended double-precision floating encodings 5159 if (floatx80_is_unsupported(a)) 5160 { 5161 goto invalid; 5162 } 5163#endif 5164 5165 aSig0 = extractFloatx80Frac(a); 5166 aExp = extractFloatx80Exp(a); 5167 aSign = extractFloatx80Sign(a); 5168 5169 /* invalid argument */ 5170 if (aExp == 0x7FFF) { 5171 if ((Bit64u) (aSig0<<1)) { 5172 sincos_invalid(sin_a, cos_a, propagateOneFloatx80NaN(&a)); 5173 return 0; 5174 } 5175 5176#if 0 5177 invalid: 5178#endif 5179 float_raise(float_flag_invalid); 5180 sincos_invalid(sin_a, cos_a, floatx80_default_nan); 5181 return 0; 5182 } 5183 5184 if (aExp == 0) { 5185 if (aSig0 == 0) { 5186 sincos_tiny_argument(sin_a, cos_a, a); 5187 return 0; 5188 } 5189 5190 float_raise(float_flag_denormal); 5191 5192 /* handle pseudo denormals */ 5193 if (! (aSig0 & LIT64(0x8000000000000000))) 5194 { 5195 float_raise(float_flag_inexact); 5196 if (sin_a) 5197 float_raise(float_flag_underflow); 5198 sincos_tiny_argument(sin_a, cos_a, a); 5199 return 0; 5200 } 5201 5202 normalizeFloatx80Subnormal(aSig0, &aExp, &aSig0); 5203 } 5204 5205 zSign = aSign; 5206 zExp = EXP_BIAS; 5207 expDiff = aExp - zExp; 5208 5209 /* argument is out-of-range */ 5210 if (expDiff >= 63) 5211 return -1; 5212 5213 float_raise(float_flag_inexact); 5214 5215 if (expDiff < -1) { // doesn't require reduction 5216 if (expDiff <= -68) { 5217 a = packFloatx80(aSign, aExp, aSig0); 5218 sincos_tiny_argument(sin_a, cos_a, a); 5219 return 0; 5220 } 5221 zExp = aExp; 5222 } 5223 else { 5224 q = reduce_trig_arg(expDiff, &zSign, &aSig0, &aSig1); 5225 } 5226 5227 /* **************************** */ 5228 /* argument reduction completed */ 5229 /* **************************** */ 5230 5231 /* using float128 for approximation */ 5232 r = normalizeRoundAndPackFloat128(0, zExp-0x10, aSig0, aSig1); 5233 5234 if (aSign) q = -q; 5235 if (sin_a) *sin_a = sincos_approximation(zSign, r, q); 5236 if (cos_a) *cos_a = sincos_approximation(zSign, r, q+1); 5237 5238 return 0; 5239} 5240 5241// ================================================= 5242// FPTAN Compute tan(x) 5243// ================================================= 5244 5245// 5246// Uses the following identities: 5247// 5248// 1. ---------------------------------------------------------- 5249// 5250// sin(-x) = -sin(x) 5251// cos(-x) = cos(x) 5252// 5253// sin(x+y) = sin(x)*cos(y)+cos(x)*sin(y) 5254// cos(x+y) = sin(x)*sin(y)+cos(x)*cos(y) 5255// 5256// sin(x+ pi/2) = cos(x) 5257// sin(x+ pi) = -sin(x) 5258// sin(x+3pi/2) = -cos(x) 5259// sin(x+2pi) = sin(x) 5260// 5261// 2. ---------------------------------------------------------- 5262// 5263// sin(x) 5264// tan(x) = ------ 5265// cos(x) 5266// 5267 5268LOCALFUNC int ftan(floatx80 *a) 5269{ 5270 float128 r; 5271 float128 sin_r; 5272 float128 cos_r; 5273 Bit64u aSig0, aSig1 = 0; 5274 Bit32s aExp, zExp, expDiff; 5275 int aSign, zSign; 5276 int q = 0; 5277 5278#if 0 5279 // handle unsupported extended double-precision floating encodings 5280 if (floatx80_is_unsupported(*a)) 5281 { 5282 goto invalid; 5283 } 5284#endif 5285 5286 aSig0 = extractFloatx80Frac(*a); 5287 aExp = extractFloatx80Exp(*a); 5288 aSign = extractFloatx80Sign(*a); 5289 5290 /* invalid argument */ 5291 if (aExp == 0x7FFF) { 5292 if ((Bit64u) (aSig0<<1)) 5293 { 5294 *a = propagateOneFloatx80NaN(a); 5295 return 0; 5296 } 5297 5298#if 0 5299 invalid: 5300#endif 5301 float_raise(float_flag_invalid); 5302 *a = floatx80_default_nan; 5303 return 0; 5304 } 5305 5306 if (aExp == 0) { 5307 if (aSig0 == 0) return 0; 5308 float_raise(float_flag_denormal); 5309 /* handle pseudo denormals */ 5310 if (! (aSig0 & LIT64(0x8000000000000000))) 5311 { 5312 float_raise(float_flag_inexact | float_flag_underflow); 5313 return 0; 5314 } 5315 normalizeFloatx80Subnormal(aSig0, &aExp, &aSig0); 5316 } 5317 5318 zSign = aSign; 5319 zExp = EXP_BIAS; 5320 expDiff = aExp - zExp; 5321 5322 /* argument is out-of-range */ 5323 if (expDiff >= 63) 5324 return -1; 5325 5326 float_raise(float_flag_inexact); 5327 5328 if (expDiff < -1) { // doesn't require reduction 5329 if (expDiff <= -68) { 5330 *a = packFloatx80(aSign, aExp, aSig0); 5331 return 0; 5332 } 5333 zExp = aExp; 5334 } 5335 else { 5336 q = reduce_trig_arg(expDiff, &zSign, &aSig0, &aSig1); 5337 } 5338 5339 /* **************************** */ 5340 /* argument reduction completed */ 5341 /* **************************** */ 5342 5343 /* using float128 for approximation */ 5344 r = normalizeRoundAndPackFloat128(0, zExp-0x10, aSig0, aSig1); 5345 5346 sin_r = poly_sin(r); 5347 cos_r = poly_cos(r); 5348 5349 if (q & 0x1) { 5350 r = float128_div(cos_r, sin_r); 5351 zSign = ! zSign; 5352 } else { 5353 r = float128_div(sin_r, cos_r); 5354 } 5355 5356 *a = float128_to_floatx80(r); 5357 if (zSign) 5358 floatx80_chs(a); 5359 5360 return 0; 5361} 5362 5363/* ----- end from original file "fsincos.cc" ----- */ 5364 5365/* ----- from original file "fpatan.cc" ----- */ 5366 5367/* 5368 ["original Stanislav Shwartsman Copyright notice" went here, included near top of this file.] 5369*/ 5370 5371#define FPATAN_ARR_SIZE 11 5372 5373#if 0 5374static const float128 float128_one = 5375 packFloat2x128m(LIT64(0x3fff000000000000), LIT64(0x0000000000000000)); 5376#endif 5377static const float128 float128_sqrt3 = 5378 packFloat2x128m(LIT64(0x3fffbb67ae8584ca), LIT64(0xa73b25742d7078b8)); 5379static const floatx80 floatx80_pi = 5380 packFloatx80m(0, 0x4000, LIT64(0xc90fdaa22168c235)); 5381 5382static const float128 float128_pi2 = 5383 packFloat2x128m(LIT64(0x3fff921fb54442d1), LIT64(0x8469898CC5170416)); 5384static const float128 float128_pi4 = 5385 packFloat2x128m(LIT64(0x3ffe921fb54442d1), LIT64(0x8469898CC5170416)); 5386static const float128 float128_pi6 = 5387 packFloat2x128m(LIT64(0x3ffe0c152382d736), LIT64(0x58465BB32E0F580F)); 5388 5389static float128 atan_arr[FPATAN_ARR_SIZE] = 5390{ 5391 PACK_FLOAT_128(0x3fff000000000000, 0x0000000000000000), /* 1 */ 5392 PACK_FLOAT_128(0xbffd555555555555, 0x5555555555555555), /* 3 */ 5393 PACK_FLOAT_128(0x3ffc999999999999, 0x999999999999999a), /* 5 */ 5394 PACK_FLOAT_128(0xbffc249249249249, 0x2492492492492492), /* 7 */ 5395 PACK_FLOAT_128(0x3ffbc71c71c71c71, 0xc71c71c71c71c71c), /* 9 */ 5396 PACK_FLOAT_128(0xbffb745d1745d174, 0x5d1745d1745d1746), /* 11 */ 5397 PACK_FLOAT_128(0x3ffb3b13b13b13b1, 0x3b13b13b13b13b14), /* 13 */ 5398 PACK_FLOAT_128(0xbffb111111111111, 0x1111111111111111), /* 15 */ 5399 PACK_FLOAT_128(0x3ffae1e1e1e1e1e1, 0xe1e1e1e1e1e1e1e2), /* 17 */ 5400 PACK_FLOAT_128(0xbffaaf286bca1af2, 0x86bca1af286bca1b), /* 19 */ 5401 PACK_FLOAT_128(0x3ffa861861861861, 0x8618618618618618) /* 21 */ 5402}; 5403 5404/* |x| < 1/4 */ 5405LOCALFUNC float128 poly_atan(float128 x1) 5406{ 5407/* 5408 // 3 5 7 9 11 13 15 17 5409 // x x x x x x x x 5410 // atan(x) ~ x - --- + --- - --- + --- - ---- + ---- - ---- + ---- 5411 // 3 5 7 9 11 13 15 17 5412 // 5413 // 2 4 6 8 10 12 14 16 5414 // x x x x x x x x 5415 // = x * [ 1 - --- + --- - --- + --- - ---- + ---- - ---- + ---- ] 5416 // 3 5 7 9 11 13 15 17 5417 // 5418 // 5 5 5419 // -- 4k -- 4k+2 5420 // p(x) = > C * x q(x) = > C * x 5421 // -- 2k -- 2k+1 5422 // k=0 k=0 5423 // 5424 // 2 5425 // atan(x) ~ x * [ p(x) + x * q(x) ] 5426 // 5427*/ 5428 return OddPoly(x1, atan_arr, FPATAN_ARR_SIZE); 5429} 5430 5431// ================================================= 5432// FPATAN Compute y * log (x) 5433// 2 5434// ================================================= 5435 5436// 5437// Uses the following identities: 5438// 5439// 1. ---------------------------------------------------------- 5440// 5441// atan(-x) = -atan(x) 5442// 5443// 2. ---------------------------------------------------------- 5444// 5445// x + y 5446// atan(x) + atan(y) = atan -------, xy < 1 5447// 1-xy 5448// 5449// x + y 5450// atan(x) + atan(y) = atan ------- + PI, x > 0, xy > 1 5451// 1-xy 5452// 5453// x + y 5454// atan(x) + atan(y) = atan ------- - PI, x < 0, xy > 1 5455// 1-xy 5456// 5457// 3. ---------------------------------------------------------- 5458// 5459// atan(x) = atan(INF) + atan(- 1/x) 5460// 5461// x-1 5462// atan(x) = PI/4 + atan( ----- ) 5463// x+1 5464// 5465// x * sqrt(3) - 1 5466// atan(x) = PI/6 + atan( ----------------- ) 5467// x + sqrt(3) 5468// 5469// 4. ---------------------------------------------------------- 5470// 3 5 7 9 2n+1 5471// x x x x n x 5472// atan(x) = x - --- + --- - --- + --- - ... + (-1) ------ + ... 5473// 3 5 7 9 2n+1 5474// 5475 5476LOCALFUNC floatx80 fpatan(floatx80 a, floatx80 b) 5477{ 5478 float128 a128; 5479 float128 b128; 5480 float128 x; 5481 int swap, add_pi6, add_pi4; 5482 Bit32s xExp; 5483 floatx80 result; 5484 int rSign; 5485 5486 // handle unsupported extended double-precision floating encodings 5487#if 0 5488 if (floatx80_is_unsupported(a)) { 5489 float_raise(float_flag_invalid); 5490 return floatx80_default_nan; 5491 } 5492#endif 5493 5494 Bit64u aSig = extractFloatx80Frac(a); 5495 Bit32s aExp = extractFloatx80Exp(a); 5496 int aSign = extractFloatx80Sign(a); 5497 Bit64u bSig = extractFloatx80Frac(b); 5498 Bit32s bExp = extractFloatx80Exp(b); 5499 int bSign = extractFloatx80Sign(b); 5500 5501 int zSign = aSign ^ bSign; 5502 5503 if (bExp == 0x7FFF) 5504 { 5505 if ((Bit64u) (bSig<<1)) 5506 return propagateFloatx80NaN(a, b); 5507 5508 if (aExp == 0x7FFF) { 5509 if ((Bit64u) (aSig<<1)) 5510 return propagateFloatx80NaN(a, b); 5511 5512 if (aSign) { /* return 3PI/4 */ 5513 return roundAndPackFloatx80(80, bSign, 5514 FLOATX80_3PI4_EXP, FLOAT_3PI4_HI, FLOAT_3PI4_LO); 5515 } 5516 else { /* return PI/4 */ 5517 return roundAndPackFloatx80(80, bSign, 5518 FLOATX80_PI4_EXP, FLOAT_PI_HI, FLOAT_PI_LO); 5519 } 5520 } 5521 5522 if (aSig && (aExp == 0)) 5523 float_raise(float_flag_denormal); 5524 5525 /* return PI/2 */ 5526 return roundAndPackFloatx80(80, bSign, FLOATX80_PI2_EXP, FLOAT_PI_HI, FLOAT_PI_LO); 5527 } 5528 if (aExp == 0x7FFF) 5529 { 5530 if ((Bit64u) (aSig<<1)) 5531 return propagateFloatx80NaN(a, b); 5532 5533 if (bSig && (bExp == 0)) 5534 float_raise(float_flag_denormal); 5535 5536return_PI_or_ZERO: 5537 5538 if (aSign) { /* return PI */ 5539 return roundAndPackFloatx80(80, bSign, FLOATX80_PI_EXP, FLOAT_PI_HI, FLOAT_PI_LO); 5540 } else { /* return 0 */ 5541 return packFloatx80(bSign, 0, 0); 5542 } 5543 } 5544 if (bExp == 0) 5545 { 5546 if (bSig == 0) { 5547 if (aSig && (aExp == 0)) float_raise(float_flag_denormal); 5548 goto return_PI_or_ZERO; 5549 } 5550 5551 float_raise(float_flag_denormal); 5552 normalizeFloatx80Subnormal(bSig, &bExp, &bSig); 5553 } 5554 if (aExp == 0) 5555 { 5556 if (aSig == 0) /* return PI/2 */ 5557 return roundAndPackFloatx80(80, bSign, FLOATX80_PI2_EXP, FLOAT_PI_HI, FLOAT_PI_LO); 5558 5559 float_raise(float_flag_denormal); 5560 normalizeFloatx80Subnormal(aSig, &aExp, &aSig); 5561 } 5562 5563 float_raise(float_flag_inexact); 5564 5565 /* |a| = |b| ==> return PI/4 */ 5566 if (aSig == bSig && aExp == bExp) 5567 return roundAndPackFloatx80(80, bSign, FLOATX80_PI4_EXP, FLOAT_PI_HI, FLOAT_PI_LO); 5568 5569 /* ******************************** */ 5570 /* using float128 for approximation */ 5571 /* ******************************** */ 5572 5573 a128 = normalizeRoundAndPackFloat128(0, aExp-0x10, aSig, 0); 5574 b128 = normalizeRoundAndPackFloat128(0, bExp-0x10, bSig, 0); 5575 swap = 0; 5576 add_pi6 = 0; 5577 add_pi4 = 0; 5578 5579 if (aExp > bExp || (aExp == bExp && aSig > bSig)) 5580 { 5581 x = float128_div(b128, a128); 5582 } 5583 else { 5584 x = float128_div(a128, b128); 5585 swap = 1; 5586 } 5587 5588 xExp = extractFloat128Exp(x); 5589 5590 if (xExp <= EXP_BIAS-40) 5591 goto approximation_completed; 5592 5593 if (x.high >= LIT64(0x3ffe800000000000)) // 3/4 < x < 1 5594 { 5595 /* 5596 arctan(x) = arctan((x-1)/(x+1)) + pi/4 5597 */ 5598 float128 t1 = float128_sub(x, float128_one); 5599 float128 t2 = float128_add(x, float128_one); 5600 x = float128_div(t1, t2); 5601 add_pi4 = 1; 5602 } 5603 else 5604 { 5605 /* argument correction */ 5606 if (xExp >= 0x3FFD) // 1/4 < x < 3/4 5607 { 5608 /* 5609 arctan(x) = arctan((x*sqrt(3)-1)/(x+sqrt(3))) + pi/6 5610 */ 5611 float128 t1 = float128_mul(x, float128_sqrt3); 5612 float128 t2 = float128_add(x, float128_sqrt3); 5613 x = float128_sub(t1, float128_one); 5614 x = float128_div(x, t2); 5615 add_pi6 = 1; 5616 } 5617 } 5618 5619 x = poly_atan(x); 5620 if (add_pi6) x = float128_add(x, float128_pi6); 5621 if (add_pi4) x = float128_add(x, float128_pi4); 5622 5623approximation_completed: 5624 if (swap) x = float128_sub(float128_pi2, x); 5625 result = float128_to_floatx80(x); 5626 if (zSign) floatx80_chs(&result); 5627 rSign = extractFloatx80Sign(result); 5628 if (!bSign && rSign) 5629 return floatx80_add(result, floatx80_pi); 5630 if (bSign && !rSign) 5631 return floatx80_sub(result, floatx80_pi); 5632 return result; 5633} 5634 5635/* ----- end from original file "fpatan.cc" ----- */ 5636 5637/* end soft float stuff */ 5638 5639typedef floatx80 myfpr; 5640 5641LOCALPROC myfp_FromExtendedFormat(myfpr *r, ui4r v2, ui5r v1, ui5r v0) 5642{ 5643 r->high = v2; 5644 r->low = (((ui6b)v1) << 32) | (v0 & 0xFFFFFFFF); 5645} 5646 5647LOCALPROC myfp_ToExtendedFormat(myfpr *dd, ui4r *v2, ui5r *v1, ui5r *v0) 5648{ 5649 *v0 = ((ui6b) dd->low) & 0xFFFFFFFF; 5650 *v1 = (((ui6b) dd->low) >> 32) & 0xFFFFFFFF; 5651 *v2 = dd->high; 5652} 5653 5654LOCALPROC myfp_FromDoubleFormat(myfpr *r, ui5r v1, ui5r v0) 5655{ 5656 float64 t = (float64)((((ui6b)v1) << 32) | (v0 & 0xFFFFFFFF)); 5657 5658 *r = float64_to_floatx80(t); 5659} 5660 5661LOCALPROC myfp_ToDoubleFormat(myfpr *dd, ui5r *v1, ui5r *v0) 5662{ 5663 float64 t = floatx80_to_float64(*dd); 5664 5665 *v0 = ((ui6b) t) & 0xFFFFFFFF; 5666 *v1 = (((ui6b) t) >> 32) & 0xFFFFFFFF; 5667} 5668 5669LOCALPROC myfp_FromSingleFormat(myfpr *r, ui5r x) 5670{ 5671 *r = float32_to_floatx80(x); 5672} 5673 5674LOCALFUNC ui5r myfp_ToSingleFormat(myfpr *ff) 5675{ 5676 return floatx80_to_float32(*ff); 5677} 5678 5679LOCALPROC myfp_FromLong(myfpr *r, ui5r x) 5680{ 5681 *r = int32_to_floatx80( x ); 5682} 5683 5684LOCALFUNC ui5r myfp_ToLong(myfpr *x) 5685{ 5686 return floatx80_to_int32( *x ); 5687} 5688 5689LOCALFUNC blnr myfp_IsNan(myfpr *x) 5690{ 5691 return floatx80_is_nan(*x); 5692} 5693 5694LOCALFUNC blnr myfp_IsInf(myfpr *x) 5695{ 5696 return ( ( x->high & 0x7FFF ) == 0x7FFF ) && (0 == ((ui6b) ( x->low<<1 ))); 5697} 5698 5699LOCALFUNC blnr myfp_IsZero(myfpr *x) 5700{ 5701 return ( ( x->high & 0x7FFF ) == 0x0000 ) && (0 == ((ui6b) ( x->low<<1 ))); 5702} 5703 5704LOCALFUNC blnr myfp_IsNeg(myfpr *x) 5705{ 5706 return ( ( x->high & 0x8000 ) != 0x0000 ); 5707} 5708 5709LOCALPROC myfp_Add(myfpr *r, const myfpr *a, const myfpr *b) 5710{ 5711 *r = floatx80_add(*a, *b); 5712} 5713 5714LOCALPROC myfp_Sub(myfpr *r, const myfpr *a, const myfpr *b) 5715{ 5716 *r = floatx80_sub(*a, *b); 5717} 5718 5719LOCALPROC myfp_Mul(myfpr *r, const myfpr *a, const myfpr *b) 5720{ 5721 *r = floatx80_mul(*a, *b); 5722} 5723 5724LOCALPROC myfp_Div(myfpr *r, const myfpr *a, const myfpr *b) 5725{ 5726 *r = floatx80_div(*a, *b); 5727} 5728 5729LOCALPROC myfp_Rem(myfpr *r, const myfpr *a, const myfpr *b) 5730{ 5731 *r = floatx80_rem(*a, *b); 5732} 5733 5734LOCALPROC myfp_Sqrt(myfpr *r, myfpr *x) 5735{ 5736 *r = floatx80_sqrt(*x); 5737} 5738 5739LOCALPROC myfp_Mod(myfpr *r, myfpr *a, myfpr *b) 5740{ 5741 Bit64u q; 5742 *r = floatx80_remainder(*a, *b, &q); 5743 /* should save low byte of q */ 5744} 5745 5746LOCALPROC myfp_Scale(myfpr *r, myfpr *a, myfpr *b) 5747{ 5748 *r = floatx80_scale(*a, *b); 5749} 5750 5751LOCALPROC myfp_GetMan(myfpr *r, myfpr *x) 5752{ 5753 *r = *x; 5754 (void) floatx80_extract(r); 5755} 5756 5757LOCALPROC myfp_GetExp(myfpr *r, myfpr *x) 5758{ 5759 floatx80 t0 = *x; 5760 *r = floatx80_extract(&t0); 5761} 5762 5763LOCALPROC myfp_floor(myfpr *r, myfpr *x) 5764{ 5765 si3r SaveRoundingMode = float_rounding_mode; 5766 5767 float_rounding_mode = float_round_down; 5768 *r = floatx80_round_to_int(*x); 5769 float_rounding_mode = SaveRoundingMode; 5770} 5771 5772LOCALPROC myfp_IntRZ(myfpr *r, myfpr *x) 5773{ 5774 si3r SaveRoundingMode = float_rounding_mode; 5775 5776 float_rounding_mode = float_round_to_zero; 5777 *r = floatx80_round_to_int(*x); 5778 float_rounding_mode = SaveRoundingMode; 5779} 5780 5781LOCALPROC myfp_Int(myfpr *r, myfpr *x) 5782{ 5783 *r = floatx80_round_to_int(*x); 5784} 5785 5786LOCALPROC myfp_RoundToSingle(myfpr *r, myfpr *x) 5787{ 5788 float32 t0 = floatx80_to_float32(*x); 5789 5790 *r = float32_to_floatx80(t0); 5791} 5792 5793LOCALPROC myfp_RoundToDouble(myfpr *r, myfpr *x) 5794{ 5795 float64 t0 = floatx80_to_float64(*x); 5796 5797 *r = float64_to_floatx80(t0); 5798} 5799 5800LOCALPROC myfp_Abs(myfpr *r, myfpr *x) 5801{ 5802 *r = *x; 5803 r->high &= 0x7FFF; 5804} 5805 5806LOCALPROC myfp_Neg(myfpr *r, myfpr *x) 5807{ 5808 *r = *x; 5809 r->high ^= 0x8000; 5810} 5811 5812LOCALPROC myfp_TwoToX(myfpr *r, myfpr *x) 5813{ 5814 floatx80 t2; 5815 floatx80 t3; 5816 floatx80 t4; 5817 floatx80 t5; 5818 myfp_floor(&t2, x); 5819 t3 = floatx80_sub(*x, t2); 5820 t4 = f2xm1(t3); 5821 t5 = floatx80_add(t4, floatx80_one); 5822 *r = floatx80_scale(t5, t2); 5823} 5824 5825LOCALPROC myfp_TenToX(myfpr *r, myfpr *x) 5826{ 5827 floatx80 t1; 5828 const floatx80 t = /* 1.0 / log(2.0) */ 5829 packFloatx80m(0, 0x3fff, LIT64(0xb8aa3b295c17f0bc)); 5830 const floatx80 t2 = /* log(10.0) */ 5831 packFloatx80m(0, 0x4000, LIT64(0x935d8dddaaa8ac17)); 5832 t1 = floatx80_mul(floatx80_mul(*x, t), t2); 5833 myfp_TwoToX(r, &t1); 5834} 5835 5836LOCALPROC myfp_EToX(myfpr *r, myfpr *x) 5837{ 5838 floatx80 t1; 5839 const floatx80 t = /* 1.0 / log(2.0) */ 5840 packFloatx80m(0, 0x3fff, LIT64(0xb8aa3b295c17f0bc)); 5841 t1 = floatx80_mul(*x, t); 5842 myfp_TwoToX(r, &t1); 5843} 5844 5845LOCALPROC myfp_EToXM1(myfpr *r, myfpr *x) 5846{ 5847 floatx80 t1; 5848 floatx80 t2; 5849 floatx80 t3; 5850 floatx80 t4; 5851 floatx80 t5; 5852 floatx80 t6; 5853 const floatx80 t = /* 1.0 / log(2.0) */ 5854 packFloatx80m(0, 0x3fff, LIT64(0xb8aa3b295c17f0bc)); 5855 t1 = floatx80_mul(*x, t); 5856 myfp_floor(&t2, &t1); 5857 t3 = floatx80_sub(t1, t2); 5858 t4 = f2xm1(t3); 5859 if (myfp_IsZero(&t2)) { 5860 *r = t4; 5861 } else { 5862 t5 = floatx80_add(t4, floatx80_one); 5863 t6 = floatx80_scale(t5, t2); 5864 *r = floatx80_sub(t6, floatx80_one); 5865 } 5866} 5867 5868LOCALPROC myfp_Log2(myfpr *r, myfpr *x) 5869{ 5870 *r = fyl2x(*x, floatx80_one); 5871} 5872 5873LOCALPROC myfp_LogN(myfpr *r, myfpr *x) 5874{ 5875 const floatx80 t = /* log(2.0) */ 5876 packFloatx80m(0, 0x3ffe, LIT64(0xb17217f7d1cf79ac)); 5877 *r = fyl2x(*x, t); 5878} 5879 5880LOCALPROC myfp_Log10(myfpr *r, myfpr *x) 5881{ 5882 const floatx80 t = /* log10(2.0) = ln(2) / ln(10), unknown accuracy */ 5883 packFloatx80m(0, 0x3ffd, LIT64(0x9a209a84fbcff798)); 5884 *r = fyl2x(*x, t); 5885} 5886 5887LOCALPROC myfp_LogNP1(myfpr *r, myfpr *x) 5888{ 5889 const floatx80 t = /* log(2.0) */ 5890 packFloatx80m(0, 0x3ffe, LIT64(0xb17217f7d1cf79ac)); 5891 *r = fyl2xp1(*x, t); 5892} 5893 5894LOCALPROC myfp_Sin(myfpr *r, myfpr *x) 5895{ 5896 (void) fsincos(*x, r, 0); 5897} 5898 5899LOCALPROC myfp_Cos(myfpr *r, myfpr *x) 5900{ 5901 (void) fsincos(*x, 0, r); 5902} 5903 5904LOCALPROC myfp_Tan(myfpr *r, myfpr *x) 5905{ 5906 *r = *x; 5907 (void) ftan(r); 5908} 5909 5910LOCALPROC myfp_ATan(myfpr *r, myfpr *x) 5911{ 5912 *r = fpatan(floatx80_one, *x); 5913} 5914 5915LOCALPROC myfp_ASin(myfpr *r, myfpr *x) 5916{ 5917 floatx80 x2 = floatx80_mul(*x, *x); 5918 floatx80 mx2 = floatx80_sub(floatx80_one, x2); 5919 floatx80 cx = floatx80_sqrt(mx2); 5920 5921 *r = fpatan(cx, *x); 5922} 5923 5924LOCALPROC myfp_ACos(myfpr *r, myfpr *x) 5925{ 5926 floatx80 x2 = floatx80_mul(*x, *x); 5927 floatx80 mx2 = floatx80_sub(floatx80_one, x2); 5928 floatx80 cx = floatx80_sqrt(mx2); 5929 5930 *r = fpatan(*x, cx); 5931} 5932 5933static const floatx80 floatx80_zero = 5934 packFloatx80m(0, 0x0000, LIT64(0x0000000000000000)); 5935 5936static const floatx80 floatx80_Two = 5937 packFloatx80m(0, 0x4000, LIT64(0x8000000000000000)); 5938 5939static const floatx80 floatx80_Ten = 5940 packFloatx80m(0, 0x4002, LIT64(0xa000000000000000)); 5941 5942LOCALPROC myfp_Sinh(myfpr *r, myfpr *x) 5943{ 5944 myfpr ex; 5945 myfpr nx; 5946 myfpr enx; 5947 myfpr t1; 5948 5949 myfp_EToX(&ex, x); 5950 myfp_Neg(&nx, x); 5951 myfp_EToX(&enx, &nx); 5952 myfp_Sub(&t1, &ex, &enx); 5953 myfp_Div(r, &t1, &floatx80_Two); 5954} 5955 5956LOCALPROC myfp_Cosh(myfpr *r, myfpr *x) 5957{ 5958 myfpr ex; 5959 myfpr nx; 5960 myfpr enx; 5961 myfpr t1; 5962 5963 myfp_EToX(&ex, x); 5964 myfp_Neg(&nx, x); 5965 myfp_EToX(&enx, &nx); 5966 myfp_Add(&t1, &ex, &enx); 5967 myfp_Div(r, &t1, &floatx80_Two); 5968} 5969 5970LOCALPROC myfp_Tanh(myfpr *r, myfpr *x) 5971{ 5972 myfpr x2; 5973 myfpr ex2; 5974 myfpr ex2m1; 5975 myfpr ex2p1; 5976 5977 myfp_Mul(&x2, x, &floatx80_Two); 5978 myfp_EToX(&ex2, &x2); 5979 myfp_Sub(&ex2m1, &ex2, &floatx80_one); 5980 myfp_Add(&ex2p1, &ex2, &floatx80_one); 5981 myfp_Div(r, &ex2m1, &ex2p1); 5982} 5983 5984LOCALPROC myfp_ATanh(myfpr *r, myfpr *x) 5985{ 5986 myfpr onepx; 5987 myfpr onemx; 5988 myfpr dv; 5989 myfpr ldv; 5990 5991 myfp_Add(&onepx, x, &floatx80_one); 5992 myfp_Sub(&onemx, x, &floatx80_one); 5993 myfp_Div(&dv, &onepx, &onemx); 5994 myfp_LogN(&ldv, &dv); 5995 myfp_Div(r, &ldv, &floatx80_Two); 5996} 5997 5998LOCALPROC myfp_SinCos(myfpr *r_sin, myfpr *r_cos, myfpr *source) 5999{ 6000 (void) fsincos(*source, r_sin, r_cos); 6001} 6002 6003LOCALFUNC blnr myfp_getCR(myfpr *r, ui4b opmode) 6004{ 6005 switch (opmode) { 6006 case 0x00: 6007 *r = floatx80_pi; /* M_PI */ 6008 break; 6009 case 0x0B: 6010 { 6011 const floatx80 t = 6012 packFloatx80m(0, 0x3ffd, LIT64(0x9a209a84fbcff798)); 6013 *r = t; /* log10(2.0) = ln(2) / ln(10), unknown accuracy */ 6014 } 6015 break; 6016 case 0x0C: 6017 { 6018 const floatx80 t = 6019 packFloatx80m(0, 0x4000, LIT64(0xadf85458a2bb4a9b)); 6020 *r = t; /* exp(1.0), unknown accuracy */ 6021 } 6022 break; 6023 case 0x0D: 6024 { 6025 const floatx80 t = 6026 packFloatx80m(0, 0x3fff, LIT64(0xb8aa3b295c17f0bc)); 6027 *r = t; /* 1.0 / log(2.0) */ 6028 } 6029 break; 6030 case 0x0E: 6031 { 6032 const floatx80 t = 6033 packFloatx80m(0, 0x3ffd, LIT64(0xde5bd8a937287195)); 6034 *r = t; /* 1.0 / log(10.0), unknown accuracy */ 6035 } 6036 break; 6037 case 0x0F: 6038 *r = floatx80_zero; /* 0.0 */ 6039 break; 6040 case 0x30: 6041 { 6042 const floatx80 t = 6043 packFloatx80m(0, 0x3ffe, LIT64(0xb17217f7d1cf79ac)); 6044 *r = t; /* log(2.0) */ 6045 } 6046 break; 6047 case 0x31: 6048 { 6049 const floatx80 t = 6050 packFloatx80m(0, 0x4000, LIT64(0x935d8dddaaa8ac17)); 6051 *r = t; /* log(10.0) */ 6052 } 6053 break; 6054 case 0x32: 6055 *r = floatx80_one; /* 1.0 */ 6056 break; 6057 case 0x33: 6058 *r = floatx80_Ten; /* 10.0 */ 6059 break; 6060 case 0x34: 6061 { 6062 const floatx80 t = 6063 packFloatx80m(0, 0x4005, LIT64(0xc800000000000000)); 6064 *r = t; /* 100.0 */ 6065 } 6066 break; 6067 case 0x35: 6068 { 6069 const floatx80 t = 6070 packFloatx80m(0, 0x400c, LIT64(0x9c40000000000000)); 6071 *r = t; /* 10000.0 */ 6072 } 6073 break; 6074 case 0x36: 6075 { 6076 const floatx80 t = 6077 packFloatx80m(0, 0x4019, LIT64(0xbebc200000000000)); 6078 *r = t; /* 1.0e8 */ 6079 } 6080 break; 6081 case 0x37: 6082 { 6083 const floatx80 t = 6084 packFloatx80m(0, 0x4034, LIT64(0x8e1bc9bf04000000)); 6085 *r = t; /* 1.0e16 */ 6086 } 6087 break; 6088 case 0x38: 6089 { 6090 const floatx80 t = 6091 packFloatx80m(0, 0x4069, LIT64(0x9dc5ada82b70b59e)); 6092 *r = t; /* 1.0e32 */ 6093 } 6094 break; 6095 case 0x39: 6096 { 6097 const floatx80 t = 6098 packFloatx80m(0, 0x40d3, LIT64(0xc2781f49ffcfa6d5)); 6099 *r = t; /* 1.0e64 */ 6100 } 6101 break; 6102 case 0x3A: 6103 { 6104 const floatx80 t = 6105 packFloatx80m(0, 0x41a8, LIT64(0x93ba47c980e98ce0)); 6106 *r = t; /* 1.0e128 */ 6107 } 6108 break; 6109 case 0x3B: 6110 { 6111 const floatx80 t = 6112 packFloatx80m(0, 0x4351, LIT64(0xaa7eebfb9df9de8e)); 6113 *r = t; /* 1.0e256 */ 6114 } 6115 break; 6116 case 0x3C: 6117 { 6118 const floatx80 t = 6119 packFloatx80m(0, 0x46a3, LIT64(0xe319a0aea60e91c7)); 6120 *r = t; /* 1.0e512 */ 6121 } 6122 break; 6123 case 0x3D: 6124 { 6125 const floatx80 t = 6126 packFloatx80m(0, 0x4d48, LIT64(0xc976758681750c17)); 6127 *r = t; /* 1.0e1024 */ 6128 } 6129 break; 6130 case 0x3E: 6131 { 6132 const floatx80 t = 6133 packFloatx80m(0, 0x5a92, LIT64(0x9e8b3b5dc53d5de5)); 6134 *r = t; /* 1.0e2048 */ 6135 } 6136 break; 6137 case 0x3F: 6138 { 6139 const floatx80 t = 6140 packFloatx80m(0, 0x7525, LIT64(0xc46052028a20979b)); 6141 *r = t; /* 1.0e4096 */ 6142 } 6143 break; 6144 default: 6145 return falseblnr; 6146 } 6147 return trueblnr; 6148} 6149 6150/* Floating point control register */ 6151 6152LOCALPROC myfp_SetFPCR(ui5r v) 6153{ 6154 switch ((v >> 4) & 0x03) { 6155 case 0: 6156 float_rounding_mode = float_round_nearest_even; 6157 break; 6158 case 1: 6159 float_rounding_mode = float_round_to_zero; 6160 break; 6161 case 2: 6162 float_rounding_mode = float_round_down; 6163 break; 6164 case 3: 6165 float_rounding_mode = float_round_up; 6166 break; 6167 } 6168 switch ((v >> 6) & 0x03) { 6169 case 0: 6170 floatx80_rounding_precision = 80; 6171 break; 6172 case 1: 6173 floatx80_rounding_precision = 32; 6174 break; 6175 case 2: 6176 floatx80_rounding_precision = 64; 6177 break; 6178 case 3: 6179 ReportAbnormalID(0x0201, 6180 "Bad rounding precision in myfp_SetFPCR"); 6181 floatx80_rounding_precision = 80; 6182 break; 6183 } 6184 if (0 != (v & 0xF)) { 6185 ReportAbnormalID(0x0202, 6186 "Reserved bits not zero in myfp_SetFPCR"); 6187 } 6188} 6189 6190LOCALFUNC ui5r myfp_GetFPCR(void) 6191{ 6192 ui5r v = 0; 6193 6194 switch (float_rounding_mode) { 6195 case float_round_nearest_even: 6196 /* v |= (0 << 4); */ 6197 break; 6198 case float_round_to_zero: 6199 v |= (1 << 4); 6200 break; 6201 case float_round_down: 6202 v |= (2 << 4); 6203 break; 6204 case float_round_up: 6205 v |= (3 << 4); 6206 break; 6207 } 6208 6209 if (80 == floatx80_rounding_precision) { 6210 /* v |= (0 << 6); */ 6211 } else if (32 == floatx80_rounding_precision) { 6212 v |= (1 << 6); 6213 } else if (64 == floatx80_rounding_precision) { 6214 v |= (2 << 6); 6215 } else { 6216 ReportAbnormalID(0x0203, 6217 "Bad rounding precision in myfp_GetFPCR"); 6218 } 6219 6220 return v; 6221} 6222 6223LOCALVAR struct myfp_envStruct 6224{ 6225 ui5r FPSR; /* Floating point status register */ 6226} myfp_env; 6227 6228LOCALPROC myfp_SetFPSR(ui5r v) 6229{ 6230 myfp_env.FPSR = v; 6231} 6232 6233LOCALFUNC ui5r myfp_GetFPSR(void) 6234{ 6235 return myfp_env.FPSR; 6236} 6237 6238LOCALFUNC ui3r myfp_GetConditionCodeByte(void) 6239{ 6240 return (myfp_env.FPSR >> 24) & 0x0F; 6241} 6242 6243LOCALPROC myfp_SetConditionCodeByte(ui3r v) 6244{ 6245 myfp_env.FPSR = ((myfp_env.FPSR & 0x00FFFFFF) 6246 | (v << 24)); 6247} 6248 6249LOCALPROC myfp_SetConditionCodeByteFromResult(myfpr *result) 6250{ 6251 /* Set condition codes here based on result */ 6252 6253 int c_nan = myfp_IsNan(result) ? 1 : 0; 6254 int c_inf = myfp_IsInf(result) ? 1 : 0; 6255 int c_zero = myfp_IsZero(result) ? 1 : 0; 6256 int c_neg = myfp_IsNeg(result) ? 1 : 0; 6257 6258 myfp_SetConditionCodeByte(c_nan 6259 | (c_inf << 1) 6260 | (c_zero << 2) 6261 | (c_neg << 3)); 6262}