src/FPMATHEM.h at main · jcs.org/minivmac

jcs.org / minivmac
fork atom
this repo has no description
fork atom
minivmac / src / FPMATHEM.h
at main 6262 lines 187 kB view raw
wrap content
jcs.org import minivmac-37.03 4y ago
3627906f
   1/*
   2	FPMATHEM.h
   3
   4	Copyright (C) 2007 John R. Hauser, Stanislav Shwartsman,
   5		Ross Martin, Paul C. Pratt
   6
   7	You can redistribute this file and/or modify it under the terms
   8	of version 2 of the GNU General Public License as published by
   9	the Free Software Foundation.  You should have received a copy
  10	of the license along with this file; see the file COPYING.
  11
  12	This file is distributed in the hope that it will be useful,
  13	but WITHOUT ANY WARRANTY; without even the implied warranty of
  14	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15	license for more details.
  16*/
  17
  18/*
  19	Floating Point MATH implemented with software EMulation
  20
  21	Based on the SoftFloat IEC/IEEE Floating-point Arithmetic
  22	Package, Release 2b, written by John R. Hauser.
  23
  24	Also uses extensions to SoftFloat, written for
  25	Bochs (x86 achitecture simulator), by Stanislav Shwartsman.
  26*/
  27
  28/*
  29	original SoftFloat Copyright notice:
  30
  31	Written by John R. Hauser.  This work was made possible in part by the
  32	International Computer Science Institute, located at Suite 600, 1947 Center
  33	Street, Berkeley, California 94704.  Funding was partially provided by the
  34	National Science Foundation under grant MIP-9311980.  The original version
  35	of this code was written as part of a project to build a fixed-point vector
  36	processor in collaboration with the University of California at Berkeley,
  37	overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
  38	is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
  39	arithmetic/SoftFloat.html'.
  40
  41	THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort has
  42	been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
  43	RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
  44	AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
  45	COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
  46	EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
  47	INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
  48	OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
  49
  50	Derivative works are acceptable, even for commercial purposes, so long as
  51	(1) the source code for the derivative work includes prominent notice that
  52	the work is derivative, and (2) the source code includes prominent notice with
  53	these four paragraphs for those parts of this code that are retained.
  54*/
  55
  56/*
  57	original Stanislav Shwartsman Copyright notice:
  58
  59	This source file is an extension to the SoftFloat IEC/IEEE Floating-point
  60	Arithmetic Package, Release 2b, written for Bochs (x86 achitecture simulator)
  61	floating point emulation.
  62
  63	THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort has
  64	been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
  65	RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
  66	AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
  67	COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
  68	EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
  69	INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
  70	OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
  71
  72	Derivative works are acceptable, even for commercial purposes, so long as
  73	(1) the source code for the derivative work includes prominent notice that
  74	the work is derivative, and (2) the source code includes prominent notice with
  75	these four paragraphs for those parts of this code that are retained.
  76
  77	.*============================================================================
  78	* Written for Bochs (x86 achitecture simulator) by
  79	*            Stanislav Shwartsman [sshwarts at sourceforge net]
  80	* ==========================================================================*.
  81*/
  82
  83
  84/*
  85	ReportAbnormalID unused 0x0204 - 0x02FF
  86*/
  87
  88
  89/* soft float stuff */
  90
  91/*
  92	should avoid 64 bit literals.
  93*/
  94
  95typedef ui3r flag; /* 0/1 */
  96
  97/*
  98	To fix: softfloat representation of
  99	denormalized extended precision numbers
 100	is different than on 68881.
 101*/
 102
 103#define cIncludeFPUUnused cIncludeUnused
 104
 105/* ----- from original file "softfloat.h" ----- */
 106
 107/*======================================================================
 108
 109This C header file is part of the SoftFloat IEC/IEEE Floating-point
 110Arithmetic Package, Release 2b.
 111
 112["original SoftFloat Copyright notice" went here, included near top of
 113this file.]
 114
 115======================================================================*/
 116
 117/*----------------------------------------------------------------------------
 118| The macro `FLOATX80' must be defined to enable the extended double-precision
 119| floating-point format `floatx80'.  If this macro is not defined, the
 120| `floatx80' type will not be defined, and none of the functions that either
 121| input or output the `floatx80' type will be defined.  The same applies to
 122| the `FLOAT128' macro and the quadruple-precision format `float128'.
 123*----------------------------------------------------------------------------*/
 124#define FLOATX80
 125#define FLOAT128
 126
 127/*----------------------------------------------------------------------------
 128| Software IEC/IEEE floating-point types.
 129*----------------------------------------------------------------------------*/
 130
 131typedef struct {
 132	ui6b low;
 133	unsigned short high;
 134} floatx80;
 135
 136#ifdef FLOAT128
 137typedef struct {
 138	ui6b low, high;
 139} float128;
 140#endif
 141
 142
 143/* ----- end from original file "softfloat.h" ----- */
 144
 145
 146/*----------------------------------------------------------------------------
 147| Software IEC/IEEE floating-point rounding mode.
 148*----------------------------------------------------------------------------*/
 149enum {
 150	float_round_nearest_even = 0,
 151	float_round_down         = 1,
 152	float_round_up           = 2,
 153	float_round_to_zero      = 3
 154};
 155
 156/*----------------------------------------------------------------------------
 157| Floating-point rounding mode, extended double-precision rounding precision,
 158| and exception flags.
 159*----------------------------------------------------------------------------*/
 160
 161LOCALVAR si3r float_rounding_mode = float_round_nearest_even;
 162
 163
 164/*----------------------------------------------------------------------------
 165| Software IEC/IEEE floating-point exception flags.
 166*----------------------------------------------------------------------------*/
 167
 168enum {
 169	float_flag_invalid   =  1,
 170	float_flag_divbyzero =  4,
 171	float_flag_overflow  =  8,
 172	float_flag_underflow = 16,
 173	float_flag_inexact   = 32
 174};
 175LOCALVAR si3r float_exception_flags = 0;
 176
 177/*----------------------------------------------------------------------------
 178| Software IEC/IEEE extended double-precision rounding precision.  Valid
 179| values are 32, 64, and 80.
 180*----------------------------------------------------------------------------*/
 181
 182LOCALVAR si3r floatx80_rounding_precision = 80;
 183
 184/*----------------------------------------------------------------------------
 185| Primitive arithmetic functions, including multi-word arithmetic, and
 186| division and square root approximations.  (Can be specialized to target if
 187| desired.)
 188*----------------------------------------------------------------------------*/
 189
 190/* ----- from original file "softfloat-macros" ----- */
 191
 192/*============================================================================
 193
 194This C source fragment is part of the SoftFloat IEC/IEEE Floating-point
 195Arithmetic Package, Release 2b.
 196
 197["original SoftFloat Copyright notice" went here, included near top of this file.]
 198
 199=============================================================================*/
 200
 201/*----------------------------------------------------------------------------
 202| Shifts `a' right by the number of bits given in `count'.  If any nonzero
 203| bits are shifted off, they are ``jammed'' into the least significant bit of
 204| the result by setting the least significant bit to 1.  The value of `count'
 205| can be arbitrarily large; in particular, if `count' is greater than 32, the
 206| result will be either 0 or 1, depending on whether `a' is zero or nonzero.
 207| The result is stored in the location pointed to by `zPtr'.
 208*----------------------------------------------------------------------------*/
 209
 210LOCALINLINEPROC shift32RightJamming( ui5b a, si4r count, ui5b *zPtr )
 211{
 212	ui5b z;
 213
 214	if ( count == 0 ) {
 215		z = a;
 216	}
 217	else if ( count < 32 ) {
 218		z = ( a>>count ) | ( ( a<<( ( - count ) & 31 ) ) != 0 );
 219	}
 220	else {
 221		z = ( a != 0 );
 222	}
 223	*zPtr = z;
 224
 225}
 226
 227/*----------------------------------------------------------------------------
 228| Shifts `a' right by the number of bits given in `count'.  If any nonzero
 229| bits are shifted off, they are ``jammed'' into the least significant bit of
 230| the result by setting the least significant bit to 1.  The value of `count'
 231| can be arbitrarily large; in particular, if `count' is greater than 64, the
 232| result will be either 0 or 1, depending on whether `a' is zero or nonzero.
 233| The result is stored in the location pointed to by `zPtr'.
 234*----------------------------------------------------------------------------*/
 235
 236LOCALINLINEPROC shift64RightJamming( ui6b a, si4r count, ui6b *zPtr )
 237{
 238	ui6b z;
 239
 240	if ( count == 0 ) {
 241		z = a;
 242	}
 243	else if ( count < 64 ) {
 244		z = ( a>>count ) | ( ( a<<( ( - count ) & 63 ) ) != 0 );
 245	}
 246	else {
 247		z = ( a != 0 );
 248	}
 249	*zPtr = z;
 250
 251}
 252
 253/*----------------------------------------------------------------------------
 254| Shifts the 128-bit value formed by concatenating `a0' and `a1' right by 64
 255| _plus_ the number of bits given in `count'.  The shifted result is at most
 256| 64 nonzero bits; this is stored at the location pointed to by `z0Ptr'.  The
 257| bits shifted off form a second 64-bit result as follows:  The _last_ bit
 258| shifted off is the most-significant bit of the extra result, and the other
 259| 63 bits of the extra result are all zero if and only if _all_but_the_last_
 260| bits shifted off were all zero.  This extra result is stored in the location
 261| pointed to by `z1Ptr'.  The value of `count' can be arbitrarily large.
 262|     (This routine makes more sense if `a0' and `a1' are considered to form
 263| a fixed-point value with binary point between `a0' and `a1'.  This fixed-
 264| point value is shifted right by the number of bits given in `count', and
 265| the integer part of the result is returned at the location pointed to by
 266| `z0Ptr'.  The fractional part of the result may be slightly corrupted as
 267| described above, and is returned at the location pointed to by `z1Ptr'.)
 268*----------------------------------------------------------------------------*/
 269
 270LOCALINLINEPROC shift64ExtraRightJamming(
 271	ui6b a0, ui6b a1, si4r count, ui6b *z0Ptr, ui6b *z1Ptr )
 272{
 273	ui6b z0, z1;
 274	si3r negCount = ( - count ) & 63;
 275
 276	if ( count == 0 ) {
 277		z1 = a1;
 278		z0 = a0;
 279	}
 280	else if ( count < 64 ) {
 281		z1 = ( a0<<negCount ) | ( a1 != 0 );
 282		z0 = a0>>count;
 283	}
 284	else {
 285		if ( count == 64 ) {
 286			z1 = a0 | ( a1 != 0 );
 287		}
 288		else {
 289			z1 = ( ( a0 | a1 ) != 0 );
 290		}
 291		z0 = 0;
 292	}
 293	*z1Ptr = z1;
 294	*z0Ptr = z0;
 295
 296}
 297
 298/*----------------------------------------------------------------------------
 299| Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
 300| number of bits given in `count'.  Any bits shifted off are lost.  The value
 301| of `count' can be arbitrarily large; in particular, if `count' is greater
 302| than 128, the result will be 0.  The result is broken into two 64-bit pieces
 303| which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
 304*----------------------------------------------------------------------------*/
 305
 306LOCALINLINEPROC shift128Right(
 307	ui6b a0, ui6b a1, si4r count, ui6b *z0Ptr, ui6b *z1Ptr )
 308{
 309	ui6b z0, z1;
 310	si3r negCount = ( - count ) & 63;
 311
 312	if ( count == 0 ) {
 313		z1 = a1;
 314		z0 = a0;
 315	}
 316	else if ( count < 64 ) {
 317		z1 = ( a0<<negCount ) | ( a1>>count );
 318		z0 = a0>>count;
 319	}
 320	else {
 321		z1 = ( count < 64 ) ? ( a0>>( count & 63 ) ) : 0;
 322		z0 = 0;
 323	}
 324	*z1Ptr = z1;
 325	*z0Ptr = z0;
 326
 327}
 328
 329/*----------------------------------------------------------------------------
 330| Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
 331| number of bits given in `count'.  If any nonzero bits are shifted off, they
 332| are ``jammed'' into the least significant bit of the result by setting the
 333| least significant bit to 1.  The value of `count' can be arbitrarily large;
 334| in particular, if `count' is greater than 128, the result will be either
 335| 0 or 1, depending on whether the concatenation of `a0' and `a1' is zero or
 336| nonzero.  The result is broken into two 64-bit pieces which are stored at
 337| the locations pointed to by `z0Ptr' and `z1Ptr'.
 338*----------------------------------------------------------------------------*/
 339
 340LOCALINLINEPROC shift128RightJamming(
 341	ui6b a0, ui6b a1, si4r count, ui6b *z0Ptr, ui6b *z1Ptr )
 342{
 343	ui6b z0, z1;
 344	si3r negCount = ( - count ) & 63;
 345
 346	if ( count == 0 ) {
 347		z1 = a1;
 348		z0 = a0;
 349	}
 350	else if ( count < 64 ) {
 351		z1 = ( a0<<negCount ) | ( a1>>count ) | ( ( a1<<negCount ) != 0 );
 352		z0 = a0>>count;
 353	}
 354	else {
 355		if ( count == 64 ) {
 356			z1 = a0 | ( a1 != 0 );
 357		}
 358		else if ( count < 128 ) {
 359			z1 = ( a0>>( count & 63 ) ) | ( ( ( a0<<negCount ) | a1 ) != 0 );
 360		}
 361		else {
 362			z1 = ( ( a0 | a1 ) != 0 );
 363		}
 364		z0 = 0;
 365	}
 366	*z1Ptr = z1;
 367	*z0Ptr = z0;
 368
 369}
 370
 371/*----------------------------------------------------------------------------
 372| Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' right
 373| by 64 _plus_ the number of bits given in `count'.  The shifted result is
 374| at most 128 nonzero bits; these are broken into two 64-bit pieces which are
 375| stored at the locations pointed to by `z0Ptr' and `z1Ptr'.  The bits shifted
 376| off form a third 64-bit result as follows:  The _last_ bit shifted off is
 377| the most-significant bit of the extra result, and the other 63 bits of the
 378| extra result are all zero if and only if _all_but_the_last_ bits shifted off
 379| were all zero.  This extra result is stored in the location pointed to by
 380| `z2Ptr'.  The value of `count' can be arbitrarily large.
 381|     (This routine makes more sense if `a0', `a1', and `a2' are considered
 382| to form a fixed-point value with binary point between `a1' and `a2'.  This
 383| fixed-point value is shifted right by the number of bits given in `count',
 384| and the integer part of the result is returned at the locations pointed to
 385| by `z0Ptr' and `z1Ptr'.  The fractional part of the result may be slightly
 386| corrupted as described above, and is returned at the location pointed to by
 387| `z2Ptr'.)
 388*----------------------------------------------------------------------------*/
 389
 390LOCALINLINEPROC shift128ExtraRightJamming(
 391	ui6b a0,
 392	ui6b a1,
 393	ui6b a2,
 394	si4r count,
 395	ui6b *z0Ptr,
 396	ui6b *z1Ptr,
 397	ui6b *z2Ptr)
 398{
 399	ui6b z0, z1, z2;
 400	si3r negCount = ( - count ) & 63;
 401
 402	if ( count == 0 ) {
 403		z2 = a2;
 404		z1 = a1;
 405		z0 = a0;
 406	}
 407	else {
 408		if ( count < 64 ) {
 409			z2 = a1<<negCount;
 410			z1 = ( a0<<negCount ) | ( a1>>count );
 411			z0 = a0>>count;
 412		}
 413		else {
 414			if ( count == 64 ) {
 415				z2 = a1;
 416				z1 = a0;
 417			}
 418			else {
 419				a2 |= a1;
 420				if ( count < 128 ) {
 421					z2 = a0<<negCount;
 422					z1 = a0>>( count & 63 );
 423				}
 424				else {
 425					z2 = ( count == 128 ) ? a0 : ( a0 != 0 );
 426					z1 = 0;
 427				}
 428			}
 429			z0 = 0;
 430		}
 431		z2 |= ( a2 != 0 );
 432	}
 433	*z2Ptr = z2;
 434	*z1Ptr = z1;
 435	*z0Ptr = z0;
 436
 437}
 438
 439/*----------------------------------------------------------------------------
 440| Shifts the 128-bit value formed by concatenating `a0' and `a1' left by the
 441| number of bits given in `count'.  Any bits shifted off are lost.  The value
 442| of `count' must be less than 64.  The result is broken into two 64-bit
 443| pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
 444*----------------------------------------------------------------------------*/
 445
 446LOCALINLINEPROC shortShift128Left(
 447	ui6b a0, ui6b a1, si4r count, ui6b *z0Ptr, ui6b *z1Ptr )
 448{
 449
 450	*z1Ptr = a1<<count;
 451	*z0Ptr =
 452		( count == 0 ) ? a0 : ( a0<<count ) | ( a1>>( ( - count ) & 63 ) );
 453
 454}
 455
 456/*----------------------------------------------------------------------------
 457| Adds the 128-bit value formed by concatenating `a0' and `a1' to the 128-bit
 458| value formed by concatenating `b0' and `b1'.  Addition is modulo 2^128, so
 459| any carry out is lost.  The result is broken into two 64-bit pieces which
 460| are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
 461*----------------------------------------------------------------------------*/
 462
 463LOCALINLINEPROC add128(
 464	ui6b a0, ui6b a1, ui6b b0, ui6b b1, ui6b *z0Ptr, ui6b *z1Ptr )
 465{
 466	ui6b z1;
 467
 468	z1 = a1 + b1;
 469	*z1Ptr = z1;
 470	*z0Ptr = a0 + b0 + ( z1 < a1 );
 471}
 472
 473/*----------------------------------------------------------------------------
 474| Adds the 192-bit value formed by concatenating `a0', `a1', and `a2' to the
 475| 192-bit value formed by concatenating `b0', `b1', and `b2'.  Addition is
 476| modulo 2^192, so any carry out is lost.  The result is broken into three
 477| 64-bit pieces which are stored at the locations pointed to by `z0Ptr',
 478| `z1Ptr', and `z2Ptr'.
 479*----------------------------------------------------------------------------*/
 480
 481LOCALINLINEPROC add192(
 482	ui6b a0,
 483	ui6b a1,
 484	ui6b a2,
 485	ui6b b0,
 486	ui6b b1,
 487	ui6b b2,
 488	ui6b *z0Ptr,
 489	ui6b *z1Ptr,
 490	ui6b *z2Ptr)
 491{
 492	ui6b z0, z1, z2;
 493	si3r carry0, carry1;
 494
 495	z2 = a2 + b2;
 496	carry1 = ( z2 < a2 );
 497	z1 = a1 + b1;
 498	carry0 = ( z1 < a1 );
 499	z0 = a0 + b0;
 500	z1 += carry1;
 501	z0 += ( z1 < carry1 );
 502	z0 += carry0;
 503	*z2Ptr = z2;
 504	*z1Ptr = z1;
 505	*z0Ptr = z0;
 506
 507}
 508
 509/*----------------------------------------------------------------------------
 510| Subtracts the 128-bit value formed by concatenating `b0' and `b1' from the
 511| 128-bit value formed by concatenating `a0' and `a1'.  Subtraction is modulo
 512| 2^128, so any borrow out (carry out) is lost.  The result is broken into two
 513| 64-bit pieces which are stored at the locations pointed to by `z0Ptr' and
 514| `z1Ptr'.
 515*----------------------------------------------------------------------------*/
 516
 517LOCALINLINEPROC
 518 sub128(
 519	 ui6b a0, ui6b a1, ui6b b0, ui6b b1, ui6b *z0Ptr, ui6b *z1Ptr )
 520{
 521
 522	*z1Ptr = a1 - b1;
 523	*z0Ptr = a0 - b0 - ( a1 < b1 );
 524
 525}
 526
 527/*----------------------------------------------------------------------------
 528| Subtracts the 192-bit value formed by concatenating `b0', `b1', and `b2'
 529| from the 192-bit value formed by concatenating `a0', `a1', and `a2'.
 530| Subtraction is modulo 2^192, so any borrow out (carry out) is lost.  The
 531| result is broken into three 64-bit pieces which are stored at the locations
 532| pointed to by `z0Ptr', `z1Ptr', and `z2Ptr'.
 533*----------------------------------------------------------------------------*/
 534
 535LOCALINLINEPROC
 536 sub192(
 537	 ui6b a0,
 538	 ui6b a1,
 539	 ui6b a2,
 540	 ui6b b0,
 541	 ui6b b1,
 542	 ui6b b2,
 543	 ui6b *z0Ptr,
 544	 ui6b *z1Ptr,
 545	 ui6b *z2Ptr
 546 )
 547{
 548	ui6b z0, z1, z2;
 549	si3r borrow0, borrow1;
 550
 551	z2 = a2 - b2;
 552	borrow1 = ( a2 < b2 );
 553	z1 = a1 - b1;
 554	borrow0 = ( a1 < b1 );
 555	z0 = a0 - b0;
 556	z0 -= ( z1 < borrow1 );
 557	z1 -= borrow1;
 558	z0 -= borrow0;
 559	*z2Ptr = z2;
 560	*z1Ptr = z1;
 561	*z0Ptr = z0;
 562
 563}
 564
 565/*----------------------------------------------------------------------------
 566| Multiplies `a' by `b' to obtain a 128-bit product.  The product is broken
 567| into two 64-bit pieces which are stored at the locations pointed to by
 568| `z0Ptr' and `z1Ptr'.
 569*----------------------------------------------------------------------------*/
 570
 571
 572#ifndef HaveUi5to6Mul
 573#define HaveUi5to6Mul 1
 574#endif
 575
 576#if HaveUi5to6Mul
 577LOCALINLINEPROC Ui5to6Mul( ui5b src1, ui5b src2, ui6b *z)
 578{
 579	*z = ((ui6b) src1) * src2;
 580}
 581#else
 582
 583LOCALINLINEPROC Ui6fromHiLo(ui5b hi, ui5b lo, ui6b *z)
 584{
 585	*z = (((ui6b)(hi)) << 32) + lo;
 586#if 0
 587	z->lo = hi;
 588	z->hi = lo;
 589#endif
 590}
 591
 592LOCALPROC Ui5to6Mul( ui5b src1, ui5b src2, ui6b *z)
 593{
 594	ui4b src1_lo = ui5b_lo(src1);
 595	ui4b src2_lo = ui5b_lo(src2);
 596	ui4b src1_hi = ui5b_hi(src1);
 597	ui4b src2_hi = ui5b_hi(src2);
 598
 599	ui5b r0 = ( (ui6b) src1_lo ) * src2_lo;
 600	ui5b r1 = ( (ui6b) src1_hi ) * src2_lo;
 601	ui5b r2 = ( (ui6b) src1_lo ) * src2_hi;
 602	ui5b r3 = ( (ui6b) src1_hi ) * src2_hi;
 603
 604	ui5b ra1 = ui5b_hi(r0) + ui5b_lo(r1) + ui5b_lo(r2);
 605
 606	ui5b lo = (ui5b_lo(ra1) << 16) | ui5b_lo(r0);
 607	ui5b hi = ui5b_hi(ra1) + ui5b_hi(r1) + ui5b_hi(r2) + r3;
 608
 609	Ui6fromHiLo(hi, lo, z);
 610}
 611
 612#endif
 613
 614
 615LOCALINLINEPROC mul64To128( ui6b a, ui6b b, ui6b *z0Ptr, ui6b *z1Ptr )
 616{
 617	ui5b aHigh, aLow, bHigh, bLow;
 618	ui6b z0, zMiddleA, zMiddleB, z1;
 619
 620	aLow = a;
 621	aHigh = a>>32;
 622	bLow = b;
 623	bHigh = b>>32;
 624
 625	Ui5to6Mul(aLow, bLow, &z1);
 626	Ui5to6Mul(aLow, bHigh, &zMiddleA);
 627	Ui5to6Mul(aHigh, bLow, &zMiddleB);
 628	Ui5to6Mul(aHigh, bHigh, &z0);
 629
 630	zMiddleA += zMiddleB;
 631	z0 += ( ( (ui6b) ( zMiddleA < zMiddleB ) )<<32 ) + ( zMiddleA>>32 );
 632	zMiddleA <<= 32;
 633	z1 += zMiddleA;
 634	z0 += ( z1 < zMiddleA );
 635	*z1Ptr = z1;
 636	*z0Ptr = z0;
 637
 638}
 639
 640/*----------------------------------------------------------------------------
 641| Multiplies the 128-bit value formed by concatenating `a0' and `a1' by
 642| `b' to obtain a 192-bit product.  The product is broken into three 64-bit
 643| pieces which are stored at the locations pointed to by `z0Ptr', `z1Ptr', and
 644| `z2Ptr'.
 645*----------------------------------------------------------------------------*/
 646
 647LOCALINLINEPROC
 648 mul128By64To192(
 649	 ui6b a0,
 650	 ui6b a1,
 651	 ui6b b,
 652	 ui6b *z0Ptr,
 653	 ui6b *z1Ptr,
 654	 ui6b *z2Ptr
 655 )
 656{
 657	ui6b z0, z1, z2, more1;
 658
 659	mul64To128( a1, b, &z1, &z2 );
 660	mul64To128( a0, b, &z0, &more1 );
 661	add128( z0, more1, 0, z1, &z0, &z1 );
 662	*z2Ptr = z2;
 663	*z1Ptr = z1;
 664	*z0Ptr = z0;
 665
 666}
 667
 668/*----------------------------------------------------------------------------
 669| Multiplies the 128-bit value formed by concatenating `a0' and `a1' to the
 670| 128-bit value formed by concatenating `b0' and `b1' to obtain a 256-bit
 671| product.  The product is broken into four 64-bit pieces which are stored at
 672| the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'.
 673*----------------------------------------------------------------------------*/
 674
 675LOCALINLINEPROC
 676 mul128To256(
 677	 ui6b a0,
 678	 ui6b a1,
 679	 ui6b b0,
 680	 ui6b b1,
 681	 ui6b *z0Ptr,
 682	 ui6b *z1Ptr,
 683	 ui6b *z2Ptr,
 684	 ui6b *z3Ptr
 685 )
 686{
 687	ui6b z0, z1, z2, z3;
 688	ui6b more1, more2;
 689
 690	mul64To128( a1, b1, &z2, &z3 );
 691	mul64To128( a1, b0, &z1, &more2 );
 692	add128( z1, more2, 0, z2, &z1, &z2 );
 693	mul64To128( a0, b0, &z0, &more1 );
 694	add128( z0, more1, 0, z1, &z0, &z1 );
 695	mul64To128( a0, b1, &more1, &more2 );
 696	add128( more1, more2, 0, z2, &more1, &z2 );
 697	add128( z0, z1, 0, more1, &z0, &z1 );
 698	*z3Ptr = z3;
 699	*z2Ptr = z2;
 700	*z1Ptr = z1;
 701	*z0Ptr = z0;
 702
 703}
 704
 705/*----------------------------------------------------------------------------
 706| Returns an approximation to the 64-bit integer quotient obtained by dividing
 707| `b' into the 128-bit value formed by concatenating `a0' and `a1'.  The
 708| divisor `b' must be at least 2^63.  If q is the exact quotient truncated
 709| toward zero, the approximation returned lies between q and q + 2 inclusive.
 710| If the exact quotient q is larger than 64 bits, the maximum positive 64-bit
 711| unsigned integer is returned.
 712*----------------------------------------------------------------------------*/
 713
 714#ifndef HaveUi6Div
 715#define HaveUi6Div 0
 716#endif
 717
 718#if HaveUi6Div
 719#define Ui6Div(x, y) ((x) / (y))
 720#else
 721/*
 722	Assuming other 64 bit operations available,
 723	like compare, subtract, shift.
 724*/
 725LOCALFUNC ui6b Ui6Div(ui6b num, ui6b den)
 726{
 727	ui6b bit = 1;
 728	ui6b res = 0;
 729
 730	while ((den < num) && bit && ! (den & (LIT64(1) << 63))) {
 731		den <<= 1;
 732		bit <<= 1;
 733	}
 734
 735	while (bit) {
 736		if (num >= den) {
 737			num -= den;
 738			res |= bit;
 739		}
 740		bit >>= 1;
 741		den >>= 1;
 742	}
 743
 744	return res;
 745}
 746#endif
 747
 748LOCALFUNC ui6b estimateDiv128To64( ui6b a0, ui6b a1, ui6b b )
 749{
 750	ui6b b0, b1;
 751	ui6b rem0, rem1, term0, term1;
 752	ui6b z;
 753
 754	if ( b <= a0 ) return LIT64( 0xFFFFFFFFFFFFFFFF );
 755	b0 = b>>32;
 756	z = ( b0<<32 <= a0 ) ? LIT64( 0xFFFFFFFF00000000 ) : Ui6Div(a0, b0) << 32;
 757	mul64To128( b, z, &term0, &term1 );
 758	sub128( a0, a1, term0, term1, &rem0, &rem1 );
 759	while ( ( (si6b) rem0 ) < 0 ) {
 760		z -= LIT64( 0x100000000 );
 761		b1 = b<<32;
 762		add128( rem0, rem1, b0, b1, &rem0, &rem1 );
 763	}
 764	rem0 = ( rem0<<32 ) | ( rem1>>32 );
 765	z |= ( b0<<32 <= rem0 ) ? 0xFFFFFFFF : Ui6Div(rem0, b0);
 766	return z;
 767
 768}
 769
 770/*----------------------------------------------------------------------------
 771| Returns an approximation to the square root of the 32-bit significand given
 772| by `a'.  Considered as an integer, `a' must be at least 2^31.  If bit 0 of
 773| `aExp' (the least significant bit) is 1, the integer returned approximates
 774| 2^31*sqrt(`a'/2^31), where `a' is considered an integer.  If bit 0 of `aExp'
 775| is 0, the integer returned approximates 2^31*sqrt(`a'/2^30).  In either
 776| case, the approximation returned lies strictly within +/-2 of the exact
 777| value.
 778*----------------------------------------------------------------------------*/
 779
 780LOCALFUNC ui5b estimateSqrt32( si4r aExp, ui5b a )
 781{
 782	static const ui4b sqrtOddAdjustments[] = {
 783		0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0,
 784		0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67
 785	};
 786	static const ui4b sqrtEvenAdjustments[] = {
 787		0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E,
 788		0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002
 789	};
 790	si3r index;
 791	ui5b z;
 792
 793	index = ( a>>27 ) & 15;
 794	if ( aExp & 1 ) {
 795		z = 0x4000 + ( a>>17 ) - sqrtOddAdjustments[ index ];
 796		z = ( ( a / z )<<14 ) + ( z<<15 );
 797		a >>= 1;
 798	}
 799	else {
 800		z = 0x8000 + ( a>>17 ) - sqrtEvenAdjustments[ index ];
 801		z = a / z + z;
 802		z = ( 0x20000 <= z ) ? 0xFFFF8000 : ( z<<15 );
 803		if ( z <= a ) return (ui5b) ( ( (si5b) a )>>1 );
 804	}
 805	return ( (ui5b) Ui6Div( ( ( (ui6b) a )<<31 ), z ) ) + ( z>>1 );
 806
 807}
 808
 809/*----------------------------------------------------------------------------
 810| Returns the number of leading 0 bits before the most-significant 1 bit of
 811| `a'.  If `a' is zero, 32 is returned.
 812*----------------------------------------------------------------------------*/
 813
 814LOCALFUNC si3r countLeadingZeros32( ui5b a )
 815{
 816	static const si3r countLeadingZerosHigh[] = {
 817		8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
 818		3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
 819		2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 820		2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 821		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 822		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 823		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 824		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
 825		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 826		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 827		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 828		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 829		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 830		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 831		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 832		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
 833	};
 834	si3r shiftCount;
 835
 836	shiftCount = 0;
 837	if ( a < 0x10000 ) {
 838		shiftCount += 16;
 839		a <<= 16;
 840	}
 841	if ( a < 0x1000000 ) {
 842		shiftCount += 8;
 843		a <<= 8;
 844	}
 845	shiftCount += countLeadingZerosHigh[ a>>24 ];
 846	return shiftCount;
 847
 848}
 849
 850/*----------------------------------------------------------------------------
 851| Returns the number of leading 0 bits before the most-significant 1 bit of
 852| `a'.  If `a' is zero, 64 is returned.
 853*----------------------------------------------------------------------------*/
 854
 855LOCALFUNC si3r countLeadingZeros64( ui6b a )
 856{
 857	si3r shiftCount;
 858
 859	shiftCount = 0;
 860	if ( a < ( (ui6b) 1 )<<32 ) {
 861		shiftCount += 32;
 862	}
 863	else {
 864		a >>= 32;
 865	}
 866	shiftCount += countLeadingZeros32( a );
 867	return shiftCount;
 868
 869}
 870
 871/*----------------------------------------------------------------------------
 872| Returns 1 if the 128-bit value formed by concatenating `a0' and `a1'
 873| is equal to the 128-bit value formed by concatenating `b0' and `b1'.
 874| Otherwise, returns 0.
 875*----------------------------------------------------------------------------*/
 876
 877LOCALINLINEFUNC flag eq128( ui6b a0, ui6b a1, ui6b b0, ui6b b1 )
 878{
 879
 880	return ( a0 == b0 ) && ( a1 == b1 );
 881
 882}
 883
 884/*----------------------------------------------------------------------------
 885| Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
 886| than or equal to the 128-bit value formed by concatenating `b0' and `b1'.
 887| Otherwise, returns 0.
 888*----------------------------------------------------------------------------*/
 889
 890LOCALINLINEFUNC flag le128( ui6b a0, ui6b a1, ui6b b0, ui6b b1 )
 891{
 892
 893	return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 <= b1 ) );
 894
 895}
 896
 897/*----------------------------------------------------------------------------
 898| Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
 899| than the 128-bit value formed by concatenating `b0' and `b1'.  Otherwise,
 900| returns 0.
 901*----------------------------------------------------------------------------*/
 902
 903LOCALINLINEFUNC flag lt128( ui6b a0, ui6b a1, ui6b b0, ui6b b1 )
 904{
 905
 906	return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 < b1 ) );
 907
 908}
 909
 910/*----------------------------------------------------------------------------
 911| Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is
 912| not equal to the 128-bit value formed by concatenating `b0' and `b1'.
 913| Otherwise, returns 0.
 914*----------------------------------------------------------------------------*/
 915
 916#if cIncludeFPUUnused
 917LOCALINLINEFUNC flag ne128( ui6b a0, ui6b a1, ui6b b0, ui6b b1 )
 918{
 919
 920	return ( a0 != b0 ) || ( a1 != b1 );
 921
 922}
 923#endif
 924
 925/* ----- end from original file "softfloat-macros" ----- */
 926
 927/*----------------------------------------------------------------------------
 928| Functions and definitions to determine:  (1) whether tininess for underflow
 929| is detected before or after rounding by default, (2) what (if anything)
 930| happens when exceptions are raised, (3) how signaling NaNs are distinguished
 931| from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
 932| are propagated from function inputs to output.  These details are target-
 933| specific.
 934*----------------------------------------------------------------------------*/
 935
 936/* ----- from original file "softfloat-specialize" ----- */
 937
 938/*============================================================================
 939
 940This C source fragment is part of the SoftFloat IEC/IEEE Floating-point
 941Arithmetic Package, Release 2b.
 942
 943["original SoftFloat Copyright notice" went here, included near top of this file.]
 944
 945=============================================================================*/
 946
 947/*----------------------------------------------------------------------------
 948| Software IEC/IEEE floating-point underflow tininess-detection mode.
 949*----------------------------------------------------------------------------*/
 950enum {
 951	float_tininess_after_rounding  = 0,
 952	float_tininess_before_rounding = 1
 953};
 954
 955/*----------------------------------------------------------------------------
 956| Underflow tininess-detection mode, statically initialized to default value.
 957| (The declaration in `softfloat.h' must match the `si3r' type here.)
 958*----------------------------------------------------------------------------*/
 959LOCALVAR si3r float_detect_tininess = float_tininess_after_rounding;
 960
 961/*----------------------------------------------------------------------------
 962| Routine to raise any or all of the software IEC/IEEE floating-point
 963| exception flags.
 964*----------------------------------------------------------------------------*/
 965/*----------------------------------------------------------------------------
 966| Raises the exceptions specified by `flags'.  Floating-point traps can be
 967| defined here if desired.  It is currently not possible for such a trap
 968| to substitute a result value.  If traps are not implemented, this routine
 969| should be simply `float_exception_flags |= flags;'.
 970*----------------------------------------------------------------------------*/
 971
 972LOCALFUNC void float_raise( si3r flags )
 973{
 974
 975	float_exception_flags |= flags;
 976
 977}
 978
 979/*----------------------------------------------------------------------------
 980| Internal canonical NaN format.
 981*----------------------------------------------------------------------------*/
 982typedef struct {
 983	flag sign;
 984	ui6b high, low;
 985} commonNaNT;
 986
 987/*----------------------------------------------------------------------------
 988| The pattern for a default generated extended double-precision NaN.  The
 989| `high' and `low' values hold the most- and least-significant bits,
 990| respectively.
 991*----------------------------------------------------------------------------*/
 992#define floatx80_default_nan_high 0xFFFF
 993#define floatx80_default_nan_low  LIT64( 0xC000000000000000 )
 994
 995/*----------------------------------------------------------------------------
 996| Returns 1 if the extended double-precision floating-point value `a' is a
 997| NaN; otherwise returns 0.
 998*----------------------------------------------------------------------------*/
 999
1000LOCALFUNC flag floatx80_is_nan( floatx80 a )
1001{
1002
1003	return ( ( a.high & 0x7FFF ) == 0x7FFF ) && (ui6b) ( a.low<<1 );
1004
1005}
1006
1007/*----------------------------------------------------------------------------
1008| Returns 1 if the extended double-precision floating-point value `a' is a
1009| signaling NaN; otherwise returns 0.
1010*----------------------------------------------------------------------------*/
1011
1012LOCALFUNC flag floatx80_is_signaling_nan( floatx80 a )
1013{
1014	ui6b aLow;
1015
1016	aLow = a.low & ~ LIT64( 0x4000000000000000 );
1017	return
1018		   ( ( a.high & 0x7FFF ) == 0x7FFF )
1019		&& (ui6b) ( aLow<<1 )
1020		&& ( a.low == aLow );
1021
1022}
1023
1024/*----------------------------------------------------------------------------
1025| Returns the result of converting the extended double-precision floating-
1026| point NaN `a' to the canonical NaN format.  If `a' is a signaling NaN, the
1027| invalid exception is raised.
1028*----------------------------------------------------------------------------*/
1029
1030LOCALFUNC commonNaNT floatx80ToCommonNaN( floatx80 a )
1031{
1032	commonNaNT z;
1033
1034	if ( floatx80_is_signaling_nan( a ) ) float_raise( float_flag_invalid );
1035	z.sign = a.high>>15;
1036	z.low = 0;
1037	z.high = a.low<<1;
1038	return z;
1039
1040}
1041
1042/*----------------------------------------------------------------------------
1043| Returns the result of converting the canonical NaN `a' to the extended
1044| double-precision floating-point format.
1045*----------------------------------------------------------------------------*/
1046
1047LOCALFUNC floatx80 commonNaNToFloatx80( commonNaNT a )
1048{
1049	floatx80 z;
1050
1051	z.low = LIT64( 0xC000000000000000 ) | ( a.high>>1 );
1052	z.high = ( ( (ui4b) a.sign )<<15 ) | 0x7FFF;
1053	return z;
1054
1055}
1056
1057/*----------------------------------------------------------------------------
1058| Takes two extended double-precision floating-point values `a' and `b', one
1059| of which is a NaN, and returns the appropriate NaN result.  If either `a' or
1060| `b' is a signaling NaN, the invalid exception is raised.
1061*----------------------------------------------------------------------------*/
1062
1063LOCALFUNC floatx80 propagateFloatx80NaN( floatx80 a, floatx80 b )
1064{
1065	flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN;
1066
1067	aIsNaN = floatx80_is_nan( a );
1068	aIsSignalingNaN = floatx80_is_signaling_nan( a );
1069	bIsNaN = floatx80_is_nan( b );
1070	bIsSignalingNaN = floatx80_is_signaling_nan( b );
1071	a.low |= LIT64( 0xC000000000000000 );
1072	b.low |= LIT64( 0xC000000000000000 );
1073	if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid );
1074	if ( aIsSignalingNaN ) {
1075		if ( bIsSignalingNaN ) goto returnLargerSignificand;
1076		return bIsNaN ? b : a;
1077	}
1078	else if ( aIsNaN ) {
1079		if ( bIsSignalingNaN | ! bIsNaN ) return a;
1080 returnLargerSignificand:
1081		if ( a.low < b.low ) return b;
1082		if ( b.low < a.low ) return a;
1083		return ( a.high < b.high ) ? a : b;
1084	}
1085	else {
1086		return b;
1087	}
1088
1089}
1090
1091#ifdef FLOAT128
1092
1093/*----------------------------------------------------------------------------
1094| The pattern for a default generated quadruple-precision NaN.  The `high' and
1095| `low' values hold the most- and least-significant bits, respectively.
1096*----------------------------------------------------------------------------*/
1097#define float128_default_nan_high LIT64( 0xFFFF800000000000 )
1098#define float128_default_nan_low  LIT64( 0x0000000000000000 )
1099
1100/*----------------------------------------------------------------------------
1101| Returns 1 if the quadruple-precision floating-point value `a' is a NaN;
1102| otherwise returns 0.
1103*----------------------------------------------------------------------------*/
1104
1105LOCALFUNC flag float128_is_nan( float128 a )
1106{
1107
1108	return
1109		   ( LIT64( 0xFFFE000000000000 ) <= (ui6b) ( a.high<<1 ) )
1110		&& ( a.low || ( a.high & LIT64( 0x0000FFFFFFFFFFFF ) ) );
1111
1112}
1113
1114/*----------------------------------------------------------------------------
1115| Returns 1 if the quadruple-precision floating-point value `a' is a
1116| signaling NaN; otherwise returns 0.
1117*----------------------------------------------------------------------------*/
1118
1119LOCALFUNC flag float128_is_signaling_nan( float128 a )
1120{
1121
1122	return
1123		   ( ( ( a.high>>47 ) & 0xFFFF ) == 0xFFFE )
1124		&& ( a.low || ( a.high & LIT64( 0x00007FFFFFFFFFFF ) ) );
1125
1126}
1127
1128/*----------------------------------------------------------------------------
1129| Returns the result of converting the quadruple-precision floating-point NaN
1130| `a' to the canonical NaN format.  If `a' is a signaling NaN, the invalid
1131| exception is raised.
1132*----------------------------------------------------------------------------*/
1133
1134LOCALFUNC commonNaNT float128ToCommonNaN( float128 a )
1135{
1136	commonNaNT z;
1137
1138	if ( float128_is_signaling_nan( a ) ) float_raise( float_flag_invalid );
1139	z.sign = a.high>>63;
1140	shortShift128Left( a.high, a.low, 16, &z.high, &z.low );
1141	return z;
1142
1143}
1144
1145/*----------------------------------------------------------------------------
1146| Returns the result of converting the canonical NaN `a' to the quadruple-
1147| precision floating-point format.
1148*----------------------------------------------------------------------------*/
1149
1150LOCALFUNC float128 commonNaNToFloat128( commonNaNT a )
1151{
1152	float128 z;
1153
1154	shift128Right( a.high, a.low, 16, &z.high, &z.low );
1155	z.high |= ( ( (ui6b) a.sign )<<63 ) | LIT64( 0x7FFF800000000000 );
1156	return z;
1157
1158}
1159
1160/*----------------------------------------------------------------------------
1161| Takes two quadruple-precision floating-point values `a' and `b', one of
1162| which is a NaN, and returns the appropriate NaN result.  If either `a' or
1163| `b' is a signaling NaN, the invalid exception is raised.
1164*----------------------------------------------------------------------------*/
1165
1166LOCALFUNC float128 propagateFloat128NaN( float128 a, float128 b )
1167{
1168	flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN;
1169
1170	aIsNaN = float128_is_nan( a );
1171	aIsSignalingNaN = float128_is_signaling_nan( a );
1172	bIsNaN = float128_is_nan( b );
1173	bIsSignalingNaN = float128_is_signaling_nan( b );
1174	a.high |= LIT64( 0x0000800000000000 );
1175	b.high |= LIT64( 0x0000800000000000 );
1176	if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid );
1177	if ( aIsSignalingNaN ) {
1178		if ( bIsSignalingNaN ) goto returnLargerSignificand;
1179		return bIsNaN ? b : a;
1180	}
1181	else if ( aIsNaN ) {
1182		if ( bIsSignalingNaN | ! bIsNaN ) return a;
1183 returnLargerSignificand:
1184		if ( lt128( a.high<<1, a.low, b.high<<1, b.low ) ) return b;
1185		if ( lt128( b.high<<1, b.low, a.high<<1, a.low ) ) return a;
1186		return ( a.high < b.high ) ? a : b;
1187	}
1188	else {
1189		return b;
1190	}
1191
1192}
1193
1194#endif
1195
1196/* ----- end from original file "softfloat-specialize" ----- */
1197
1198/* ----- from original file "softfloat.c" ----- */
1199
1200
1201/*============================================================================
1202
1203This C source file is part of the SoftFloat IEC/IEEE Floating-point Arithmetic
1204Package, Release 2b.
1205
1206["original SoftFloat Copyright notice" went here, included near top of this file.]
1207
1208=============================================================================*/
1209
1210/*----------------------------------------------------------------------------
1211| Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
1212| and 7, and returns the properly rounded 32-bit integer corresponding to the
1213| input.  If `zSign' is 1, the input is negated before being converted to an
1214| integer.  Bit 63 of `absZ' must be zero.  Ordinarily, the fixed-point input
1215| is simply rounded to an integer, with the inexact exception raised if the
1216| input cannot be represented exactly as an integer.  However, if the fixed-
1217| point input is too large, the invalid exception is raised and the largest
1218| positive or negative integer is returned.
1219*----------------------------------------------------------------------------*/
1220
1221LOCALFUNC si5r roundAndPackInt32( flag zSign, ui6b absZ )
1222{
1223	si3r roundingMode;
1224	flag roundNearestEven;
1225	si3r roundIncrement, roundBits;
1226	si5r z;
1227
1228	roundingMode = float_rounding_mode;
1229	roundNearestEven = ( roundingMode == float_round_nearest_even );
1230	roundIncrement = 0x40;
1231	if ( ! roundNearestEven ) {
1232		if ( roundingMode == float_round_to_zero ) {
1233			roundIncrement = 0;
1234		}
1235		else {
1236			roundIncrement = 0x7F;
1237			if ( zSign ) {
1238				if ( roundingMode == float_round_up ) roundIncrement = 0;
1239			}
1240			else {
1241				if ( roundingMode == float_round_down ) roundIncrement = 0;
1242			}
1243		}
1244	}
1245	roundBits = absZ & 0x7F;
1246	absZ = ( absZ + roundIncrement )>>7;
1247	absZ &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
1248	z = absZ;
1249	if ( zSign ) z = - z;
1250	if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
1251		float_raise( float_flag_invalid );
1252		return zSign ? (si5b) 0x80000000 : 0x7FFFFFFF;
1253	}
1254	if ( roundBits ) float_exception_flags |= float_flag_inexact;
1255	return z;
1256
1257}
1258
1259/*----------------------------------------------------------------------------
1260| Returns the fraction bits of the extended double-precision floating-point
1261| value `a'.
1262*----------------------------------------------------------------------------*/
1263
1264LOCALINLINEFUNC ui6b extractFloatx80Frac( floatx80 a )
1265{
1266
1267	return a.low;
1268
1269}
1270
1271/*----------------------------------------------------------------------------
1272| Returns the exponent bits of the extended double-precision floating-point
1273| value `a'.
1274*----------------------------------------------------------------------------*/
1275
1276LOCALINLINEFUNC si5r extractFloatx80Exp( floatx80 a )
1277{
1278
1279	return a.high & 0x7FFF;
1280
1281}
1282
1283/*----------------------------------------------------------------------------
1284| Returns the sign bit of the extended double-precision floating-point value
1285| `a'.
1286*----------------------------------------------------------------------------*/
1287
1288LOCALINLINEFUNC flag extractFloatx80Sign( floatx80 a )
1289{
1290
1291	return a.high>>15;
1292
1293}
1294
1295/*----------------------------------------------------------------------------
1296| Normalizes the subnormal extended double-precision floating-point value
1297| represented by the denormalized significand `aSig'.  The normalized exponent
1298| and significand are stored at the locations pointed to by `zExpPtr' and
1299| `zSigPtr', respectively.
1300*----------------------------------------------------------------------------*/
1301
1302LOCALPROC
1303 normalizeFloatx80Subnormal( ui6b aSig, si5r *zExpPtr, ui6b *zSigPtr )
1304{
1305	si3r shiftCount;
1306
1307	shiftCount = countLeadingZeros64( aSig );
1308	*zSigPtr = aSig<<shiftCount;
1309	*zExpPtr = 1 - shiftCount;
1310
1311}
1312
1313/*----------------------------------------------------------------------------
1314| Packs the sign `zSign', exponent `zExp', and significand `zSig' into an
1315| extended double-precision floating-point value, returning the result.
1316*----------------------------------------------------------------------------*/
1317
1318LOCALINLINEFUNC floatx80 packFloatx80( flag zSign, si5r zExp, ui6b zSig )
1319{
1320	floatx80 z;
1321
1322	z.low = zSig;
1323	z.high = ( ( (ui4b) zSign )<<15 ) + zExp;
1324	return z;
1325
1326}
1327
1328/*----------------------------------------------------------------------------
1329| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
1330| and extended significand formed by the concatenation of `zSig0' and `zSig1',
1331| and returns the proper extended double-precision floating-point value
1332| corresponding to the abstract input.  Ordinarily, the abstract value is
1333| rounded and packed into the extended double-precision format, with the
1334| inexact exception raised if the abstract input cannot be represented
1335| exactly.  However, if the abstract value is too large, the overflow and
1336| inexact exceptions are raised and an infinity or maximal finite value is
1337| returned.  If the abstract value is too small, the input value is rounded to
1338| a subnormal number, and the underflow and inexact exceptions are raised if
1339| the abstract input cannot be represented exactly as a subnormal extended
1340| double-precision floating-point number.
1341|     If `roundingPrecision' is 32 or 64, the result is rounded to the same
1342| number of bits as single or double precision, respectively.  Otherwise, the
1343| result is rounded to the full precision of the extended double-precision
1344| format.
1345|     The input significand must be normalized or smaller.  If the input
1346| significand is not normalized, `zExp' must be 0; in that case, the result
1347| returned is a subnormal number, and it must not require rounding.  The
1348| handling of underflow and overflow follows the IEC/IEEE Standard for Binary
1349| Floating-Point Arithmetic.
1350*----------------------------------------------------------------------------*/
1351
1352LOCALFUNC floatx80
1353 roundAndPackFloatx80(
1354	 si3r roundingPrecision, flag zSign, si5r zExp, ui6b zSig0, ui6b zSig1
1355 )
1356{
1357	si3r roundingMode;
1358	flag roundNearestEven, increment, isTiny;
1359	si6r roundIncrement, roundMask, roundBits;
1360
1361	roundingMode = float_rounding_mode;
1362	roundNearestEven = ( roundingMode == float_round_nearest_even );
1363	if ( roundingPrecision == 80 ) goto precision80;
1364	if ( roundingPrecision == 64 ) {
1365		roundIncrement = LIT64( 0x0000000000000400 );
1366		roundMask = LIT64( 0x00000000000007FF );
1367	}
1368	else if ( roundingPrecision == 32 ) {
1369		roundIncrement = LIT64( 0x0000008000000000 );
1370		roundMask = LIT64( 0x000000FFFFFFFFFF );
1371	}
1372	else {
1373		goto precision80;
1374	}
1375	zSig0 |= ( zSig1 != 0 );
1376	if ( ! roundNearestEven ) {
1377		if ( roundingMode == float_round_to_zero ) {
1378			roundIncrement = 0;
1379		}
1380		else {
1381			roundIncrement = roundMask;
1382			if ( zSign ) {
1383				if ( roundingMode == float_round_up ) roundIncrement = 0;
1384			}
1385			else {
1386				if ( roundingMode == float_round_down ) roundIncrement = 0;
1387			}
1388		}
1389	}
1390	roundBits = zSig0 & roundMask;
1391	if ( 0x7FFD <= (ui5b) ( zExp - 1 ) ) {
1392		if (    ( 0x7FFE < zExp )
1393			 || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
1394		   ) {
1395			goto overflow;
1396		}
1397		if ( zExp <= 0 ) {
1398			isTiny =
1399				   ( float_detect_tininess == float_tininess_before_rounding )
1400				|| ( zExp < 0 )
1401				|| ( zSig0 <= zSig0 + roundIncrement );
1402			shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
1403			zExp = 0;
1404			roundBits = zSig0 & roundMask;
1405			if ( isTiny && roundBits ) float_raise( float_flag_underflow );
1406			if ( roundBits ) float_exception_flags |= float_flag_inexact;
1407			zSig0 += roundIncrement;
1408			if ( (si6b) zSig0 < 0 ) zExp = 1;
1409			roundIncrement = roundMask + 1;
1410			if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
1411				roundMask |= roundIncrement;
1412			}
1413			zSig0 &= ~ roundMask;
1414			return packFloatx80( zSign, zExp, zSig0 );
1415		}
1416	}
1417	if ( roundBits ) float_exception_flags |= float_flag_inexact;
1418	zSig0 += roundIncrement;
1419	if ( zSig0 < roundIncrement ) {
1420		++zExp;
1421		zSig0 = LIT64( 0x8000000000000000 );
1422	}
1423	roundIncrement = roundMask + 1;
1424	if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
1425		roundMask |= roundIncrement;
1426	}
1427	zSig0 &= ~ roundMask;
1428	if ( zSig0 == 0 ) zExp = 0;
1429	return packFloatx80( zSign, zExp, zSig0 );
1430 precision80:
1431	increment = ( (si6b) zSig1 < 0 );
1432	if ( ! roundNearestEven ) {
1433		if ( roundingMode == float_round_to_zero ) {
1434			increment = 0;
1435		}
1436		else {
1437			if ( zSign ) {
1438				increment = ( roundingMode == float_round_down ) && zSig1;
1439			}
1440			else {
1441				increment = ( roundingMode == float_round_up ) && zSig1;
1442			}
1443		}
1444	}
1445	if ( 0x7FFD <= (ui5b) ( zExp - 1 ) ) {
1446		if (    ( 0x7FFE < zExp )
1447			 || (    ( zExp == 0x7FFE )
1448				  && ( zSig0 == LIT64( 0xFFFFFFFFFFFFFFFF ) )
1449				  && increment
1450				)
1451		   ) {
1452			roundMask = 0;
1453 overflow:
1454			float_raise( float_flag_overflow | float_flag_inexact );
1455			if (    ( roundingMode == float_round_to_zero )
1456				 || ( zSign && ( roundingMode == float_round_up ) )
1457				 || ( ! zSign && ( roundingMode == float_round_down ) )
1458			   ) {
1459				return packFloatx80( zSign, 0x7FFE, ~ roundMask );
1460			}
1461			return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
1462		}
1463		if ( zExp <= 0 ) {
1464			isTiny =
1465				   ( float_detect_tininess == float_tininess_before_rounding )
1466				|| ( zExp < 0 )
1467				|| ! increment
1468				|| ( zSig0 < LIT64( 0xFFFFFFFFFFFFFFFF ) );
1469			shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
1470			zExp = 0;
1471			if ( isTiny && zSig1 ) float_raise( float_flag_underflow );
1472			if ( zSig1 ) float_exception_flags |= float_flag_inexact;
1473			if ( roundNearestEven ) {
1474				increment = ( (si6b) zSig1 < 0 );
1475			}
1476			else {
1477				if ( zSign ) {
1478					increment = ( roundingMode == float_round_down ) && zSig1;
1479				}
1480				else {
1481					increment = ( roundingMode == float_round_up ) && zSig1;
1482				}
1483			}
1484			if ( increment ) {
1485				++zSig0;
1486				zSig0 &=
1487					~ ( ( (ui6b) ( zSig1<<1 ) == 0 ) & roundNearestEven );
1488				if ( (si6b) zSig0 < 0 ) zExp = 1;
1489			}
1490			return packFloatx80( zSign, zExp, zSig0 );
1491		}
1492	}
1493	if ( zSig1 ) float_exception_flags |= float_flag_inexact;
1494	if ( increment ) {
1495		++zSig0;
1496		if ( zSig0 == 0 ) {
1497			++zExp;
1498			zSig0 = LIT64( 0x8000000000000000 );
1499		}
1500		else {
1501			zSig0 &= ~ ( ( (ui6b) ( zSig1<<1 ) == 0 ) & roundNearestEven );
1502		}
1503	}
1504	else {
1505		if ( zSig0 == 0 ) zExp = 0;
1506	}
1507	return packFloatx80( zSign, zExp, zSig0 );
1508
1509}
1510
1511/*----------------------------------------------------------------------------
1512| Takes an abstract floating-point value having sign `zSign', exponent
1513| `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
1514| and returns the proper extended double-precision floating-point value
1515| corresponding to the abstract input.  This routine is just like
1516| `roundAndPackFloatx80' except that the input significand does not have to be
1517| normalized.
1518*----------------------------------------------------------------------------*/
1519
1520LOCALFUNC floatx80
1521 normalizeRoundAndPackFloatx80(
1522	 si3r roundingPrecision, flag zSign, si5r zExp, ui6b zSig0, ui6b zSig1
1523 )
1524{
1525	si3r shiftCount;
1526
1527	if ( zSig0 == 0 ) {
1528		zSig0 = zSig1;
1529		zSig1 = 0;
1530		zExp -= 64;
1531	}
1532	shiftCount = countLeadingZeros64( zSig0 );
1533	shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
1534	zExp -= shiftCount;
1535	return
1536		roundAndPackFloatx80( roundingPrecision, zSign, zExp, zSig0, zSig1 );
1537
1538}
1539
1540#ifdef FLOAT128
1541
1542/*----------------------------------------------------------------------------
1543| Returns the least-significant 64 fraction bits of the quadruple-precision
1544| floating-point value `a'.
1545*----------------------------------------------------------------------------*/
1546
1547LOCALINLINEFUNC ui6b extractFloat128Frac1( float128 a )
1548{
1549
1550	return a.low;
1551
1552}
1553
1554/*----------------------------------------------------------------------------
1555| Returns the most-significant 48 fraction bits of the quadruple-precision
1556| floating-point value `a'.
1557*----------------------------------------------------------------------------*/
1558
1559LOCALINLINEFUNC ui6b extractFloat128Frac0( float128 a )
1560{
1561
1562	return a.high & LIT64( 0x0000FFFFFFFFFFFF );
1563
1564}
1565
1566/*----------------------------------------------------------------------------
1567| Returns the exponent bits of the quadruple-precision floating-point value
1568| `a'.
1569*----------------------------------------------------------------------------*/
1570
1571LOCALINLINEFUNC si5r extractFloat128Exp( float128 a )
1572{
1573
1574	return ( a.high>>48 ) & 0x7FFF;
1575
1576}
1577
1578/*----------------------------------------------------------------------------
1579| Returns the sign bit of the quadruple-precision floating-point value `a'.
1580*----------------------------------------------------------------------------*/
1581
1582LOCALINLINEFUNC flag extractFloat128Sign( float128 a )
1583{
1584
1585	return a.high>>63;
1586
1587}
1588
1589/*----------------------------------------------------------------------------
1590| Normalizes the subnormal quadruple-precision floating-point value
1591| represented by the denormalized significand formed by the concatenation of
1592| `aSig0' and `aSig1'.  The normalized exponent is stored at the location
1593| pointed to by `zExpPtr'.  The most significant 49 bits of the normalized
1594| significand are stored at the location pointed to by `zSig0Ptr', and the
1595| least significant 64 bits of the normalized significand are stored at the
1596| location pointed to by `zSig1Ptr'.
1597*----------------------------------------------------------------------------*/
1598
1599LOCALPROC
1600 normalizeFloat128Subnormal(
1601	 ui6b aSig0,
1602	 ui6b aSig1,
1603	 si5r *zExpPtr,
1604	 ui6b *zSig0Ptr,
1605	 ui6b *zSig1Ptr
1606 )
1607{
1608	si3r shiftCount;
1609
1610	if ( aSig0 == 0 ) {
1611		shiftCount = countLeadingZeros64( aSig1 ) - 15;
1612		if ( shiftCount < 0 ) {
1613			*zSig0Ptr = aSig1>>( - shiftCount );
1614			*zSig1Ptr = aSig1<<( shiftCount & 63 );
1615		}
1616		else {
1617			*zSig0Ptr = aSig1<<shiftCount;
1618			*zSig1Ptr = 0;
1619		}
1620		*zExpPtr = - shiftCount - 63;
1621	}
1622	else {
1623		shiftCount = countLeadingZeros64( aSig0 ) - 15;
1624		shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
1625		*zExpPtr = 1 - shiftCount;
1626	}
1627
1628}
1629
1630/*----------------------------------------------------------------------------
1631| Packs the sign `zSign', the exponent `zExp', and the significand formed
1632| by the concatenation of `zSig0' and `zSig1' into a quadruple-precision
1633| floating-point value, returning the result.  After being shifted into the
1634| proper positions, the three fields `zSign', `zExp', and `zSig0' are simply
1635| added together to form the most significant 32 bits of the result.  This
1636| means that any integer portion of `zSig0' will be added into the exponent.
1637| Since a properly normalized significand will have an integer portion equal
1638| to 1, the `zExp' input should be 1 less than the desired result exponent
1639| whenever `zSig0' and `zSig1' concatenated form a complete, normalized
1640| significand.
1641*----------------------------------------------------------------------------*/
1642
1643LOCALINLINEFUNC float128
1644 packFloat128( flag zSign, si5r zExp, ui6b zSig0, ui6b zSig1 )
1645{
1646	float128 z;
1647
1648	z.low = zSig1;
1649	z.high = ( ( (ui6b) zSign )<<63 ) + ( ( (ui6b) zExp )<<48 ) + zSig0;
1650	return z;
1651
1652}
1653
1654/*----------------------------------------------------------------------------
1655| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
1656| and extended significand formed by the concatenation of `zSig0', `zSig1',
1657| and `zSig2', and returns the proper quadruple-precision floating-point value
1658| corresponding to the abstract input.  Ordinarily, the abstract value is
1659| simply rounded and packed into the quadruple-precision format, with the
1660| inexact exception raised if the abstract input cannot be represented
1661| exactly.  However, if the abstract value is too large, the overflow and
1662| inexact exceptions are raised and an infinity or maximal finite value is
1663| returned.  If the abstract value is too small, the input value is rounded to
1664| a subnormal number, and the underflow and inexact exceptions are raised if
1665| the abstract input cannot be represented exactly as a subnormal quadruple-
1666| precision floating-point number.
1667|     The input significand must be normalized or smaller.  If the input
1668| significand is not normalized, `zExp' must be 0; in that case, the result
1669| returned is a subnormal number, and it must not require rounding.  In the
1670| usual case that the input significand is normalized, `zExp' must be 1 less
1671| than the ``true'' floating-point exponent.  The handling of underflow and
1672| overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1673*----------------------------------------------------------------------------*/
1674
1675LOCALFUNC float128
1676 roundAndPackFloat128(
1677	 flag zSign, si5r zExp, ui6b zSig0, ui6b zSig1, ui6b zSig2 )
1678{
1679	si3r roundingMode;
1680	flag roundNearestEven, increment, isTiny;
1681
1682	roundingMode = float_rounding_mode;
1683	roundNearestEven = ( roundingMode == float_round_nearest_even );
1684	increment = ( (si6b) zSig2 < 0 );
1685	if ( ! roundNearestEven ) {
1686		if ( roundingMode == float_round_to_zero ) {
1687			increment = 0;
1688		}
1689		else {
1690			if ( zSign ) {
1691				increment = ( roundingMode == float_round_down ) && zSig2;
1692			}
1693			else {
1694				increment = ( roundingMode == float_round_up ) && zSig2;
1695			}
1696		}
1697	}
1698	if ( 0x7FFD <= (ui5b) zExp ) {
1699		if (    ( 0x7FFD < zExp )
1700			 || (    ( zExp == 0x7FFD )
1701				  && eq128(
1702						 LIT64( 0x0001FFFFFFFFFFFF ),
1703						 LIT64( 0xFFFFFFFFFFFFFFFF ),
1704						 zSig0,
1705						 zSig1
1706					 )
1707				  && increment
1708				)
1709		   ) {
1710			float_raise( float_flag_overflow | float_flag_inexact );
1711			if (    ( roundingMode == float_round_to_zero )
1712				 || ( zSign && ( roundingMode == float_round_up ) )
1713				 || ( ! zSign && ( roundingMode == float_round_down ) )
1714			   ) {
1715				return
1716					packFloat128(
1717						zSign,
1718						0x7FFE,
1719						LIT64( 0x0000FFFFFFFFFFFF ),
1720						LIT64( 0xFFFFFFFFFFFFFFFF )
1721					);
1722			}
1723			return packFloat128( zSign, 0x7FFF, 0, 0 );
1724		}
1725		if ( zExp < 0 ) {
1726			isTiny =
1727				   ( float_detect_tininess == float_tininess_before_rounding )
1728				|| ( zExp < -1 )
1729				|| ! increment
1730				|| lt128(
1731					   zSig0,
1732					   zSig1,
1733					   LIT64( 0x0001FFFFFFFFFFFF ),
1734					   LIT64( 0xFFFFFFFFFFFFFFFF )
1735				   );
1736			shift128ExtraRightJamming(
1737				zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
1738			zExp = 0;
1739			if ( isTiny && zSig2 ) float_raise( float_flag_underflow );
1740			if ( roundNearestEven ) {
1741				increment = ( (si6b) zSig2 < 0 );
1742			}
1743			else {
1744				if ( zSign ) {
1745					increment = ( roundingMode == float_round_down ) && zSig2;
1746				}
1747				else {
1748					increment = ( roundingMode == float_round_up ) && zSig2;
1749				}
1750			}
1751		}
1752	}
1753	if ( zSig2 ) float_exception_flags |= float_flag_inexact;
1754	if ( increment ) {
1755		add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
1756		zSig1 &= ~ ( ( zSig2 + zSig2 == 0 ) & roundNearestEven );
1757	}
1758	else {
1759		if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0;
1760	}
1761	return packFloat128( zSign, zExp, zSig0, zSig1 );
1762
1763}
1764
1765/*----------------------------------------------------------------------------
1766| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
1767| and significand formed by the concatenation of `zSig0' and `zSig1', and
1768| returns the proper quadruple-precision floating-point value corresponding
1769| to the abstract input.  This routine is just like `roundAndPackFloat128'
1770| except that the input significand has fewer bits and does not have to be
1771| normalized.  In all cases, `zExp' must be 1 less than the ``true'' floating-
1772| point exponent.
1773*----------------------------------------------------------------------------*/
1774
1775LOCALFUNC float128
1776 normalizeRoundAndPackFloat128(
1777	 flag zSign, si5r zExp, ui6b zSig0, ui6b zSig1 )
1778{
1779	si3r shiftCount;
1780	ui6b zSig2;
1781
1782	if ( zSig0 == 0 ) {
1783		zSig0 = zSig1;
1784		zSig1 = 0;
1785		zExp -= 64;
1786	}
1787	shiftCount = countLeadingZeros64( zSig0 ) - 15;
1788	if ( 0 <= shiftCount ) {
1789		zSig2 = 0;
1790		shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
1791	}
1792	else {
1793		shift128ExtraRightJamming(
1794			zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
1795	}
1796	zExp -= shiftCount;
1797	return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 );
1798
1799}
1800
1801#endif
1802
1803/*----------------------------------------------------------------------------
1804| Returns the result of converting the 32-bit two's complement integer `a'
1805| to the extended double-precision floating-point format.  The conversion
1806| is performed according to the IEC/IEEE Standard for Binary Floating-Point
1807| Arithmetic.
1808*----------------------------------------------------------------------------*/
1809
1810LOCALFUNC floatx80 int32_to_floatx80( si5r a )
1811{
1812	flag zSign;
1813	ui5r absA;
1814	si3r shiftCount;
1815	ui6b zSig;
1816
1817	if ( a == 0 ) return packFloatx80( 0, 0, 0 );
1818	zSign = ( a < 0 );
1819	absA = zSign ? - a : a;
1820	shiftCount = countLeadingZeros32( absA ) + 32;
1821	zSig = absA;
1822	return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );
1823
1824}
1825
1826/*----------------------------------------------------------------------------
1827| Returns the result of converting the extended double-precision floating-
1828| point value `a' to the 32-bit two's complement integer format.  The
1829| conversion is performed according to the IEC/IEEE Standard for Binary
1830| Floating-Point Arithmetic---which means in particular that the conversion
1831| is rounded according to the current rounding mode.  If `a' is a NaN, the
1832| largest positive integer is returned.  Otherwise, if the conversion
1833| overflows, the largest integer with the same sign as `a' is returned.
1834*----------------------------------------------------------------------------*/
1835
1836LOCALFUNC si5r floatx80_to_int32( floatx80 a )
1837{
1838	flag aSign;
1839	si5r aExp, shiftCount;
1840	ui6b aSig;
1841
1842	aSig = extractFloatx80Frac( a );
1843	aExp = extractFloatx80Exp( a );
1844	aSign = extractFloatx80Sign( a );
1845	if ( ( aExp == 0x7FFF ) && (ui6b) ( aSig<<1 ) ) aSign = 0;
1846	shiftCount = 0x4037 - aExp;
1847	if ( shiftCount <= 0 ) shiftCount = 1;
1848	shift64RightJamming( aSig, shiftCount, &aSig );
1849	return roundAndPackInt32( aSign, aSig );
1850
1851}
1852
1853/*----------------------------------------------------------------------------
1854| Returns the result of converting the extended double-precision floating-
1855| point value `a' to the 32-bit two's complement integer format.  The
1856| conversion is performed according to the IEC/IEEE Standard for Binary
1857| Floating-Point Arithmetic, except that the conversion is always rounded
1858| toward zero.  If `a' is a NaN, the largest positive integer is returned.
1859| Otherwise, if the conversion overflows, the largest integer with the same
1860| sign as `a' is returned.
1861*----------------------------------------------------------------------------*/
1862
1863#if cIncludeFPUUnused
1864LOCALFUNC si5r floatx80_to_int32_round_to_zero( floatx80 a )
1865{
1866	flag aSign;
1867	si5r aExp, shiftCount;
1868	ui6b aSig, savedASig;
1869	si5r z;
1870
1871	aSig = extractFloatx80Frac( a );
1872	aExp = extractFloatx80Exp( a );
1873	aSign = extractFloatx80Sign( a );
1874	if ( 0x401E < aExp ) {
1875		if ( ( aExp == 0x7FFF ) && (ui6b) ( aSig<<1 ) ) aSign = 0;
1876		goto invalid;
1877	}
1878	else if ( aExp < 0x3FFF ) {
1879		if ( aExp || aSig ) float_exception_flags |= float_flag_inexact;
1880		return 0;
1881	}
1882	shiftCount = 0x403E - aExp;
1883	savedASig = aSig;
1884	aSig >>= shiftCount;
1885	z = aSig;
1886	if ( aSign ) z = - z;
1887	if ( ( z < 0 ) ^ aSign ) {
1888 invalid:
1889		float_raise( float_flag_invalid );
1890		return aSign ? (si5b) 0x80000000 : 0x7FFFFFFF;
1891	}
1892	if ( ( aSig<<shiftCount ) != savedASig ) {
1893		float_exception_flags |= float_flag_inexact;
1894	}
1895	return z;
1896
1897}
1898#endif
1899
1900#ifdef FLOAT128
1901
1902/*----------------------------------------------------------------------------
1903| Returns the result of converting the extended double-precision floating-
1904| point value `a' to the quadruple-precision floating-point format.  The
1905| conversion is performed according to the IEC/IEEE Standard for Binary
1906| Floating-Point Arithmetic.
1907*----------------------------------------------------------------------------*/
1908
1909LOCALFUNC float128 floatx80_to_float128( floatx80 a )
1910{
1911	flag aSign;
1912	si4r aExp;
1913	ui6b aSig, zSig0, zSig1;
1914
1915	aSig = extractFloatx80Frac( a );
1916	aExp = extractFloatx80Exp( a );
1917	aSign = extractFloatx80Sign( a );
1918	if ( ( aExp == 0x7FFF ) && (ui6b) ( aSig<<1 ) ) {
1919		return commonNaNToFloat128( floatx80ToCommonNaN( a ) );
1920	}
1921	shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 );
1922	return packFloat128( aSign, aExp, zSig0, zSig1 );
1923
1924}
1925
1926#endif
1927
1928/*----------------------------------------------------------------------------
1929| Rounds the extended double-precision floating-point value `a' to an integer,
1930| and returns the result as an extended quadruple-precision floating-point
1931| value.  The operation is performed according to the IEC/IEEE Standard for
1932| Binary Floating-Point Arithmetic.
1933*----------------------------------------------------------------------------*/
1934
1935LOCALFUNC floatx80 floatx80_round_to_int( floatx80 a )
1936{
1937	flag aSign;
1938	si5r aExp;
1939	ui6b lastBitMask, roundBitsMask;
1940	si3r roundingMode;
1941	floatx80 z;
1942
1943	aExp = extractFloatx80Exp( a );
1944	if ( 0x403E <= aExp ) {
1945		if ( ( aExp == 0x7FFF ) && (ui6b) ( extractFloatx80Frac( a )<<1 ) ) {
1946			return propagateFloatx80NaN( a, a );
1947		}
1948		return a;
1949	}
1950	if ( aExp < 0x3FFF ) {
1951		if (    ( aExp == 0 )
1952			 && ( (ui6b) ( extractFloatx80Frac( a )<<1 ) == 0 ) ) {
1953			return a;
1954		}
1955		float_exception_flags |= float_flag_inexact;
1956		aSign = extractFloatx80Sign( a );
1957		switch ( float_rounding_mode ) {
1958		 case float_round_nearest_even:
1959			if ( ( aExp == 0x3FFE ) && (ui6b) ( extractFloatx80Frac( a )<<1 )
1960			   ) {
1961				return
1962					packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) );
1963			}
1964			break;
1965		 case float_round_down:
1966			return
1967				  aSign ?
1968					  packFloatx80( 1, 0x3FFF, LIT64( 0x8000000000000000 ) )
1969				: packFloatx80( 0, 0, 0 );
1970		 case float_round_up:
1971			return
1972				  aSign ? packFloatx80( 1, 0, 0 )
1973				: packFloatx80( 0, 0x3FFF, LIT64( 0x8000000000000000 ) );
1974		}
1975		return packFloatx80( aSign, 0, 0 );
1976	}
1977	lastBitMask = 1;
1978	lastBitMask <<= 0x403E - aExp;
1979	roundBitsMask = lastBitMask - 1;
1980	z = a;
1981	roundingMode = float_rounding_mode;
1982	if ( roundingMode == float_round_nearest_even ) {
1983		z.low += lastBitMask>>1;
1984		if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
1985	}
1986	else if ( roundingMode != float_round_to_zero ) {
1987		if ( extractFloatx80Sign( z ) ^ ( roundingMode == float_round_up ) ) {
1988			z.low += roundBitsMask;
1989		}
1990	}
1991	z.low &= ~ roundBitsMask;
1992	if ( z.low == 0 ) {
1993		++z.high;
1994		z.low = LIT64( 0x8000000000000000 );
1995	}
1996	if ( z.low != a.low ) float_exception_flags |= float_flag_inexact;
1997	return z;
1998
1999}
2000
2001/*----------------------------------------------------------------------------
2002| Returns the result of adding the absolute values of the extended double-
2003| precision floating-point values `a' and `b'.  If `zSign' is 1, the sum is
2004| negated before being returned.  `zSign' is ignored if the result is a NaN.
2005| The addition is performed according to the IEC/IEEE Standard for Binary
2006| Floating-Point Arithmetic.
2007*----------------------------------------------------------------------------*/
2008
2009LOCALFUNC floatx80 addFloatx80Sigs( floatx80 a, floatx80 b, flag zSign )
2010{
2011	si5r aExp, bExp, zExp;
2012	ui6b aSig, bSig, zSig0, zSig1;
2013	si5r expDiff;
2014
2015	aSig = extractFloatx80Frac( a );
2016	aExp = extractFloatx80Exp( a );
2017	bSig = extractFloatx80Frac( b );
2018	bExp = extractFloatx80Exp( b );
2019	expDiff = aExp - bExp;
2020	if ( 0 < expDiff ) {
2021		if ( aExp == 0x7FFF ) {
2022			if ( (ui6b) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b );
2023			return a;
2024		}
2025		if ( bExp == 0 ) --expDiff;
2026		shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
2027		zExp = aExp;
2028	}
2029	else if ( expDiff < 0 ) {
2030		if ( bExp == 0x7FFF ) {
2031			if ( (ui6b) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
2032			return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
2033		}
2034		if ( aExp == 0 ) ++expDiff;
2035		shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
2036		zExp = bExp;
2037	}
2038	else {
2039		if ( aExp == 0x7FFF ) {
2040			if ( (ui6b) ( ( aSig | bSig )<<1 ) ) {
2041				return propagateFloatx80NaN( a, b );
2042			}
2043			return a;
2044		}
2045		zSig1 = 0;
2046		zSig0 = aSig + bSig;
2047		if ( aExp == 0 ) {
2048			normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 );
2049			goto roundAndPack;
2050		}
2051		zExp = aExp;
2052		goto shiftRight1;
2053	}
2054	zSig0 = aSig + bSig;
2055	if ( (si6b) zSig0 < 0 ) goto roundAndPack;
2056 shiftRight1:
2057	shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 );
2058	zSig0 |= LIT64( 0x8000000000000000 );
2059	++zExp;
2060 roundAndPack:
2061	return
2062		roundAndPackFloatx80(
2063			floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
2064
2065}
2066
2067/*----------------------------------------------------------------------------
2068| Returns the result of subtracting the absolute values of the extended
2069| double-precision floating-point values `a' and `b'.  If `zSign' is 1, the
2070| difference is negated before being returned.  `zSign' is ignored if the
2071| result is a NaN.  The subtraction is performed according to the IEC/IEEE
2072| Standard for Binary Floating-Point Arithmetic.
2073*----------------------------------------------------------------------------*/
2074
2075LOCALFUNC floatx80 subFloatx80Sigs( floatx80 a, floatx80 b, flag zSign )
2076{
2077	si5r aExp, bExp, zExp;
2078	ui6b aSig, bSig, zSig0, zSig1;
2079	si5r expDiff;
2080	floatx80 z;
2081
2082	aSig = extractFloatx80Frac( a );
2083	aExp = extractFloatx80Exp( a );
2084	bSig = extractFloatx80Frac( b );
2085	bExp = extractFloatx80Exp( b );
2086	expDiff = aExp - bExp;
2087	if ( 0 < expDiff ) goto aExpBigger;
2088	if ( expDiff < 0 ) goto bExpBigger;
2089	if ( aExp == 0x7FFF ) {
2090		if ( (ui6b) ( ( aSig | bSig )<<1 ) ) {
2091			return propagateFloatx80NaN( a, b );
2092		}
2093		float_raise( float_flag_invalid );
2094		z.low = floatx80_default_nan_low;
2095		z.high = floatx80_default_nan_high;
2096		return z;
2097	}
2098	if ( aExp == 0 ) {
2099		aExp = 1;
2100		bExp = 1;
2101	}
2102	zSig1 = 0;
2103	if ( bSig < aSig ) goto aBigger;
2104	if ( aSig < bSig ) goto bBigger;
2105	return packFloatx80( float_rounding_mode == float_round_down, 0, 0 );
2106 bExpBigger:
2107	if ( bExp == 0x7FFF ) {
2108		if ( (ui6b) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
2109		return packFloatx80( zSign ^ 1, 0x7FFF, LIT64( 0x8000000000000000 ) );
2110	}
2111	if ( aExp == 0 ) ++expDiff;
2112	shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
2113 bBigger:
2114	sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 );
2115	zExp = bExp;
2116	zSign ^= 1;
2117	goto normalizeRoundAndPack;
2118 aExpBigger:
2119	if ( aExp == 0x7FFF ) {
2120		if ( (ui6b) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b );
2121		return a;
2122	}
2123	if ( bExp == 0 ) --expDiff;
2124	shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
2125 aBigger:
2126	sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 );
2127	zExp = aExp;
2128 normalizeRoundAndPack:
2129	return
2130		normalizeRoundAndPackFloatx80(
2131			floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
2132
2133}
2134
2135/*----------------------------------------------------------------------------
2136| Returns the result of adding the extended double-precision floating-point
2137| values `a' and `b'.  The operation is performed according to the IEC/IEEE
2138| Standard for Binary Floating-Point Arithmetic.
2139*----------------------------------------------------------------------------*/
2140
2141LOCALFUNC floatx80 floatx80_add( floatx80 a, floatx80 b )
2142{
2143	flag aSign, bSign;
2144
2145	aSign = extractFloatx80Sign( a );
2146	bSign = extractFloatx80Sign( b );
2147	if ( aSign == bSign ) {
2148		return addFloatx80Sigs( a, b, aSign );
2149	}
2150	else {
2151		return subFloatx80Sigs( a, b, aSign );
2152	}
2153
2154}
2155
2156/*----------------------------------------------------------------------------
2157| Returns the result of subtracting the extended double-precision floating-
2158| point values `a' and `b'.  The operation is performed according to the
2159| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2160*----------------------------------------------------------------------------*/
2161
2162LOCALFUNC floatx80 floatx80_sub( floatx80 a, floatx80 b )
2163{
2164	flag aSign, bSign;
2165
2166	aSign = extractFloatx80Sign( a );
2167	bSign = extractFloatx80Sign( b );
2168	if ( aSign == bSign ) {
2169		return subFloatx80Sigs( a, b, aSign );
2170	}
2171	else {
2172		return addFloatx80Sigs( a, b, aSign );
2173	}
2174
2175}
2176
2177/*----------------------------------------------------------------------------
2178| Returns the result of multiplying the extended double-precision floating-
2179| point values `a' and `b'.  The operation is performed according to the
2180| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2181*----------------------------------------------------------------------------*/
2182
2183LOCALFUNC floatx80 floatx80_mul( floatx80 a, floatx80 b )
2184{
2185	flag aSign, bSign, zSign;
2186	si5r aExp, bExp, zExp;
2187	ui6b aSig, bSig, zSig0, zSig1;
2188	floatx80 z;
2189
2190	aSig = extractFloatx80Frac( a );
2191	aExp = extractFloatx80Exp( a );
2192	aSign = extractFloatx80Sign( a );
2193	bSig = extractFloatx80Frac( b );
2194	bExp = extractFloatx80Exp( b );
2195	bSign = extractFloatx80Sign( b );
2196	zSign = aSign ^ bSign;
2197	if ( aExp == 0x7FFF ) {
2198		if (    (ui6b) ( aSig<<1 )
2199			 || ( ( bExp == 0x7FFF ) && (ui6b) ( bSig<<1 ) ) ) {
2200			return propagateFloatx80NaN( a, b );
2201		}
2202		if ( ( bExp | bSig ) == 0 ) goto invalid;
2203		return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
2204	}
2205	if ( bExp == 0x7FFF ) {
2206		if ( (ui6b) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
2207		if ( ( aExp | aSig ) == 0 ) {
2208 invalid:
2209			float_raise( float_flag_invalid );
2210			z.low = floatx80_default_nan_low;
2211			z.high = floatx80_default_nan_high;
2212			return z;
2213		}
2214		return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
2215	}
2216	if ( aExp == 0 ) {
2217		if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
2218		normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
2219	}
2220	if ( bExp == 0 ) {
2221		if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 );
2222		normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
2223	}
2224	zExp = aExp + bExp - 0x3FFE;
2225	mul64To128( aSig, bSig, &zSig0, &zSig1 );
2226	if ( 0 < (si6b) zSig0 ) {
2227		shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
2228		--zExp;
2229	}
2230	return
2231		roundAndPackFloatx80(
2232			floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
2233
2234}
2235
2236/*----------------------------------------------------------------------------
2237| Returns the result of dividing the extended double-precision floating-point
2238| value `a' by the corresponding value `b'.  The operation is performed
2239| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2240*----------------------------------------------------------------------------*/
2241
2242LOCALFUNC floatx80 floatx80_div( floatx80 a, floatx80 b )
2243{
2244	flag aSign, bSign, zSign;
2245	si5r aExp, bExp, zExp;
2246	ui6b aSig, bSig, zSig0, zSig1;
2247	ui6b rem0, rem1, rem2, term0, term1, term2;
2248	floatx80 z;
2249
2250	aSig = extractFloatx80Frac( a );
2251	aExp = extractFloatx80Exp( a );
2252	aSign = extractFloatx80Sign( a );
2253	bSig = extractFloatx80Frac( b );
2254	bExp = extractFloatx80Exp( b );
2255	bSign = extractFloatx80Sign( b );
2256	zSign = aSign ^ bSign;
2257	if ( aExp == 0x7FFF ) {
2258		if ( (ui6b) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b );
2259		if ( bExp == 0x7FFF ) {
2260			if ( (ui6b) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
2261			goto invalid;
2262		}
2263		return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
2264	}
2265	if ( bExp == 0x7FFF ) {
2266		if ( (ui6b) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
2267		return packFloatx80( zSign, 0, 0 );
2268	}
2269	if ( bExp == 0 ) {
2270		if ( bSig == 0 ) {
2271			if ( ( aExp | aSig ) == 0 ) {
2272 invalid:
2273				float_raise( float_flag_invalid );
2274				z.low = floatx80_default_nan_low;
2275				z.high = floatx80_default_nan_high;
2276				return z;
2277			}
2278			float_raise( float_flag_divbyzero );
2279			return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
2280		}
2281		normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
2282	}
2283	if ( aExp == 0 ) {
2284		if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
2285		normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
2286	}
2287	if ( aSig == 0 ) {
2288		/*
2289			added by pcp. this invalid input seems to
2290			cause hang in estimateDiv128To64. should
2291			validate inputs generally.
2292		*/
2293		return packFloatx80( zSign, 0, 0 );
2294	}
2295	if ( (si6b) bSig >= 0 ) {
2296		/*
2297			added by pcp. another check.
2298			invalid input, most significant bit should be set?
2299		*/
2300		return packFloatx80( zSign, 0, 0 );
2301	}
2302	zExp = aExp - bExp + 0x3FFE;
2303	rem1 = 0;
2304	if ( bSig <= aSig ) {
2305		shift128Right( aSig, 0, 1, &aSig, &rem1 );
2306		++zExp;
2307	}
2308	zSig0 = estimateDiv128To64( aSig, rem1, bSig );
2309	mul64To128( bSig, zSig0, &term0, &term1 );
2310	sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
2311	while ( (si6b) rem0 < 0 ) {
2312		--zSig0;
2313		add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
2314	}
2315	zSig1 = estimateDiv128To64( rem1, 0, bSig );
2316	if ( (ui6b) ( zSig1<<1 ) <= 8 ) {
2317		mul64To128( bSig, zSig1, &term1, &term2 );
2318		sub128( rem1, 0, term1, term2, &rem1, &rem2 );
2319		while ( (si6b) rem1 < 0 ) {
2320			--zSig1;
2321			add128( rem1, rem2, 0, bSig, &rem1, &rem2 );
2322		}
2323		zSig1 |= ( ( rem1 | rem2 ) != 0 );
2324	}
2325	return
2326		roundAndPackFloatx80(
2327			floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
2328
2329}
2330
2331/*----------------------------------------------------------------------------
2332| Returns the remainder of the extended double-precision floating-point value
2333| `a' with respect to the corresponding value `b'.  The operation is performed
2334| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2335*----------------------------------------------------------------------------*/
2336
2337LOCALFUNC floatx80 floatx80_rem( floatx80 a, floatx80 b )
2338{
2339	flag aSign, /* bSign, */ zSign;
2340	si5r aExp, bExp, expDiff;
2341	ui6b aSig0, aSig1, bSig;
2342	ui6b q, term0, term1, alternateASig0, alternateASig1;
2343	floatx80 z;
2344
2345	aSig0 = extractFloatx80Frac( a );
2346	aExp = extractFloatx80Exp( a );
2347	aSign = extractFloatx80Sign( a );
2348	bSig = extractFloatx80Frac( b );
2349	bExp = extractFloatx80Exp( b );
2350	/*
2351		not used. should it be?
2352		bSign = extractFloatx80Sign( b );
2353	*/
2354	if ( aExp == 0x7FFF ) {
2355		if (    (ui6b) ( aSig0<<1 )
2356			 || ( ( bExp == 0x7FFF ) && (ui6b) ( bSig<<1 ) ) ) {
2357			return propagateFloatx80NaN( a, b );
2358		}
2359		goto invalid;
2360	}
2361	if ( bExp == 0x7FFF ) {
2362		if ( (ui6b) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
2363		return a;
2364	}
2365	if ( bExp == 0 ) {
2366		if ( bSig == 0 ) {
2367 invalid:
2368			float_raise( float_flag_invalid );
2369			z.low = floatx80_default_nan_low;
2370			z.high = floatx80_default_nan_high;
2371			return z;
2372		}
2373		normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
2374	}
2375	if ( aExp == 0 ) {
2376		if ( (ui6b) ( aSig0<<1 ) == 0 ) return a;
2377		normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
2378	}
2379	bSig |= LIT64( 0x8000000000000000 );
2380	zSign = aSign;
2381	expDiff = aExp - bExp;
2382	aSig1 = 0;
2383	if ( expDiff < 0 ) {
2384		if ( expDiff < -1 ) return a;
2385		shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
2386		expDiff = 0;
2387	}
2388	q = ( bSig <= aSig0 );
2389	if ( q ) aSig0 -= bSig;
2390	expDiff -= 64;
2391	while ( 0 < expDiff ) {
2392		q = estimateDiv128To64( aSig0, aSig1, bSig );
2393		q = ( 2 < q ) ? q - 2 : 0;
2394		mul64To128( bSig, q, &term0, &term1 );
2395		sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
2396		shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
2397		expDiff -= 62;
2398	}
2399	expDiff += 64;
2400	if ( 0 < expDiff ) {
2401		q = estimateDiv128To64( aSig0, aSig1, bSig );
2402		q = ( 2 < q ) ? q - 2 : 0;
2403		q >>= 64 - expDiff;
2404		mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );
2405		sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
2406		shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
2407		while ( le128( term0, term1, aSig0, aSig1 ) ) {
2408			++q;
2409			sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
2410		}
2411	}
2412	else {
2413		term1 = 0;
2414		term0 = bSig;
2415	}
2416	sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
2417	if (    lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
2418		 || (    eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
2419			  && ( q & 1 ) )
2420	   ) {
2421		aSig0 = alternateASig0;
2422		aSig1 = alternateASig1;
2423		zSign = ! zSign;
2424	}
2425	return
2426		normalizeRoundAndPackFloatx80(
2427			80, zSign, bExp + expDiff, aSig0, aSig1 );
2428
2429}
2430
2431/*----------------------------------------------------------------------------
2432| Returns the square root of the extended double-precision floating-point
2433| value `a'.  The operation is performed according to the IEC/IEEE Standard
2434| for Binary Floating-Point Arithmetic.
2435*----------------------------------------------------------------------------*/
2436
2437LOCALFUNC floatx80 floatx80_sqrt( floatx80 a )
2438{
2439	flag aSign;
2440	si5r aExp, zExp;
2441	ui6b aSig0, aSig1, zSig0, zSig1, doubleZSig0;
2442	ui6b rem0, rem1, rem2, rem3, term0, term1, term2, term3;
2443	floatx80 z;
2444
2445	aSig0 = extractFloatx80Frac( a );
2446	aExp = extractFloatx80Exp( a );
2447	aSign = extractFloatx80Sign( a );
2448	if ( aExp == 0x7FFF ) {
2449		if ( (ui6b) ( aSig0<<1 ) ) return propagateFloatx80NaN( a, a );
2450		if ( ! aSign ) return a;
2451		goto invalid;
2452	}
2453	if ( aSign ) {
2454		if ( ( aExp | aSig0 ) == 0 ) return a;
2455 invalid:
2456		float_raise( float_flag_invalid );
2457		z.low = floatx80_default_nan_low;
2458		z.high = floatx80_default_nan_high;
2459		return z;
2460	}
2461	if ( aExp == 0 ) {
2462		if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 );
2463		normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
2464	}
2465	zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF;
2466	zSig0 = estimateSqrt32( aExp, aSig0>>32 );
2467	shift128Right( aSig0, 0, 2 + ( aExp & 1 ), &aSig0, &aSig1 );
2468	zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
2469	doubleZSig0 = zSig0<<1;
2470	mul64To128( zSig0, zSig0, &term0, &term1 );
2471	sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
2472	while ( (si6b) rem0 < 0 ) {
2473		--zSig0;
2474		doubleZSig0 -= 2;
2475		add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
2476	}
2477	zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
2478	if ( ( zSig1 & LIT64( 0x3FFFFFFFFFFFFFFF ) ) <= 5 ) {
2479		if ( zSig1 == 0 ) zSig1 = 1;
2480		mul64To128( doubleZSig0, zSig1, &term1, &term2 );
2481		sub128( rem1, 0, term1, term2, &rem1, &rem2 );
2482		mul64To128( zSig1, zSig1, &term2, &term3 );
2483		sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
2484		while ( (si6b) rem1 < 0 ) {
2485			--zSig1;
2486			shortShift128Left( 0, zSig1, 1, &term2, &term3 );
2487			term3 |= 1;
2488			term2 |= doubleZSig0;
2489			add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
2490		}
2491		zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
2492	}
2493	shortShift128Left( 0, zSig1, 1, &zSig0, &zSig1 );
2494	zSig0 |= doubleZSig0;
2495	return
2496		roundAndPackFloatx80(
2497			floatx80_rounding_precision, 0, zExp, zSig0, zSig1 );
2498
2499}
2500
2501/*----------------------------------------------------------------------------
2502| Returns 1 if the extended double-precision floating-point value `a' is
2503| equal to the corresponding value `b', and 0 otherwise.  The comparison is
2504| performed according to the IEC/IEEE Standard for Binary Floating-Point
2505| Arithmetic.
2506*----------------------------------------------------------------------------*/
2507
2508#if cIncludeFPUUnused
2509LOCALFUNC flag floatx80_eq( floatx80 a, floatx80 b )
2510{
2511
2512	if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
2513			  && (ui6b) ( extractFloatx80Frac( a )<<1 ) )
2514		 || (    ( extractFloatx80Exp( b ) == 0x7FFF )
2515			  && (ui6b) ( extractFloatx80Frac( b )<<1 ) )
2516	   ) {
2517		if (    floatx80_is_signaling_nan( a )
2518			 || floatx80_is_signaling_nan( b ) ) {
2519			float_raise( float_flag_invalid );
2520		}
2521		return 0;
2522	}
2523	return
2524		   ( a.low == b.low )
2525		&& (    ( a.high == b.high )
2526			 || (    ( a.low == 0 )
2527				  && ( (ui4b) ( ( a.high | b.high )<<1 ) == 0 ) )
2528		   );
2529
2530}
2531#endif
2532
2533/*----------------------------------------------------------------------------
2534| Returns 1 if the extended double-precision floating-point value `a' is
2535| less than or equal to the corresponding value `b', and 0 otherwise.  The
2536| comparison is performed according to the IEC/IEEE Standard for Binary
2537| Floating-Point Arithmetic.
2538*----------------------------------------------------------------------------*/
2539
2540#if cIncludeFPUUnused
2541LOCALFUNC flag floatx80_le( floatx80 a, floatx80 b )
2542{
2543	flag aSign, bSign;
2544
2545	if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
2546			  && (ui6b) ( extractFloatx80Frac( a )<<1 ) )
2547		 || (    ( extractFloatx80Exp( b ) == 0x7FFF )
2548			  && (ui6b) ( extractFloatx80Frac( b )<<1 ) )
2549	   ) {
2550		float_raise( float_flag_invalid );
2551		return 0;
2552	}
2553	aSign = extractFloatx80Sign( a );
2554	bSign = extractFloatx80Sign( b );
2555	if ( aSign != bSign ) {
2556		return
2557			   aSign
2558			|| (    ( ( (ui4b) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
2559				 == 0 );
2560	}
2561	return
2562		  aSign ? le128( b.high, b.low, a.high, a.low )
2563		: le128( a.high, a.low, b.high, b.low );
2564
2565}
2566#endif
2567
2568/*----------------------------------------------------------------------------
2569| Returns 1 if the extended double-precision floating-point value `a' is
2570| less than the corresponding value `b', and 0 otherwise.  The comparison
2571| is performed according to the IEC/IEEE Standard for Binary Floating-Point
2572| Arithmetic.
2573*----------------------------------------------------------------------------*/
2574
2575#if cIncludeFPUUnused
2576LOCALFUNC flag floatx80_lt( floatx80 a, floatx80 b )
2577{
2578	flag aSign, bSign;
2579
2580	if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
2581			  && (ui6b) ( extractFloatx80Frac( a )<<1 ) )
2582		 || (    ( extractFloatx80Exp( b ) == 0x7FFF )
2583			  && (ui6b) ( extractFloatx80Frac( b )<<1 ) )
2584	   ) {
2585		float_raise( float_flag_invalid );
2586		return 0;
2587	}
2588	aSign = extractFloatx80Sign( a );
2589	bSign = extractFloatx80Sign( b );
2590	if ( aSign != bSign ) {
2591		return
2592			   aSign
2593			&& (    ( ( (ui4b) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
2594				 != 0 );
2595	}
2596	return
2597		  aSign ? lt128( b.high, b.low, a.high, a.low )
2598		: lt128( a.high, a.low, b.high, b.low );
2599
2600}
2601#endif
2602
2603/*----------------------------------------------------------------------------
2604| Returns 1 if the extended double-precision floating-point value `a' is equal
2605| to the corresponding value `b', and 0 otherwise.  The invalid exception is
2606| raised if either operand is a NaN.  Otherwise, the comparison is performed
2607| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2608*----------------------------------------------------------------------------*/
2609
2610#if cIncludeFPUUnused
2611LOCALFUNC flag floatx80_eq_signaling( floatx80 a, floatx80 b )
2612{
2613
2614	if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
2615			  && (ui6b) ( extractFloatx80Frac( a )<<1 ) )
2616		 || (    ( extractFloatx80Exp( b ) == 0x7FFF )
2617			  && (ui6b) ( extractFloatx80Frac( b )<<1 ) )
2618	   ) {
2619		float_raise( float_flag_invalid );
2620		return 0;
2621	}
2622	return
2623		   ( a.low == b.low )
2624		&& (    ( a.high == b.high )
2625			 || (    ( a.low == 0 )
2626				  && ( (ui4b) ( ( a.high | b.high )<<1 ) == 0 ) )
2627		   );
2628
2629}
2630#endif
2631
2632/*----------------------------------------------------------------------------
2633| Returns 1 if the extended double-precision floating-point value `a' is less
2634| than or equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs
2635| do not cause an exception.  Otherwise, the comparison is performed according
2636| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2637*----------------------------------------------------------------------------*/
2638
2639#if cIncludeFPUUnused
2640LOCALFUNC flag floatx80_le_quiet( floatx80 a, floatx80 b )
2641{
2642	flag aSign, bSign;
2643
2644	if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
2645			  && (ui6b) ( extractFloatx80Frac( a )<<1 ) )
2646		 || (    ( extractFloatx80Exp( b ) == 0x7FFF )
2647			  && (ui6b) ( extractFloatx80Frac( b )<<1 ) )
2648	   ) {
2649		if (    floatx80_is_signaling_nan( a )
2650			 || floatx80_is_signaling_nan( b ) ) {
2651			float_raise( float_flag_invalid );
2652		}
2653		return 0;
2654	}
2655	aSign = extractFloatx80Sign( a );
2656	bSign = extractFloatx80Sign( b );
2657	if ( aSign != bSign ) {
2658		return
2659			   aSign
2660			|| (    ( ( (ui4b) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
2661				 == 0 );
2662	}
2663	return
2664		  aSign ? le128( b.high, b.low, a.high, a.low )
2665		: le128( a.high, a.low, b.high, b.low );
2666
2667}
2668#endif
2669
2670/*----------------------------------------------------------------------------
2671| Returns 1 if the extended double-precision floating-point value `a' is less
2672| than the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause
2673| an exception.  Otherwise, the comparison is performed according to the
2674| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2675*----------------------------------------------------------------------------*/
2676
2677#if cIncludeFPUUnused
2678LOCALFUNC flag floatx80_lt_quiet( floatx80 a, floatx80 b )
2679{
2680	flag aSign, bSign;
2681
2682	if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
2683			  && (ui6b) ( extractFloatx80Frac( a )<<1 ) )
2684		 || (    ( extractFloatx80Exp( b ) == 0x7FFF )
2685			  && (ui6b) ( extractFloatx80Frac( b )<<1 ) )
2686	   ) {
2687		if (    floatx80_is_signaling_nan( a )
2688			 || floatx80_is_signaling_nan( b ) ) {
2689			float_raise( float_flag_invalid );
2690		}
2691		return 0;
2692	}
2693	aSign = extractFloatx80Sign( a );
2694	bSign = extractFloatx80Sign( b );
2695	if ( aSign != bSign ) {
2696		return
2697			   aSign
2698			&& (    ( ( (ui4b) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
2699				 != 0 );
2700	}
2701	return
2702		  aSign ? lt128( b.high, b.low, a.high, a.low )
2703		: lt128( a.high, a.low, b.high, b.low );
2704
2705}
2706#endif
2707
2708#ifdef FLOAT128
2709
2710/*----------------------------------------------------------------------------
2711| Returns the result of converting the quadruple-precision floating-point
2712| value `a' to the extended double-precision floating-point format.  The
2713| conversion is performed according to the IEC/IEEE Standard for Binary
2714| Floating-Point Arithmetic.
2715*----------------------------------------------------------------------------*/
2716
2717LOCALFUNC floatx80 float128_to_floatx80( float128 a )
2718{
2719	flag aSign;
2720	si5r aExp;
2721	ui6b aSig0, aSig1;
2722
2723	aSig1 = extractFloat128Frac1( a );
2724	aSig0 = extractFloat128Frac0( a );
2725	aExp = extractFloat128Exp( a );
2726	aSign = extractFloat128Sign( a );
2727	if ( aExp == 0x7FFF ) {
2728		if ( aSig0 | aSig1 ) {
2729			return commonNaNToFloatx80( float128ToCommonNaN( a ) );
2730		}
2731		return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
2732	}
2733	if ( aExp == 0 ) {
2734		if ( ( aSig0 | aSig1 ) == 0 ) return packFloatx80( aSign, 0, 0 );
2735		normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
2736	}
2737	else {
2738		aSig0 |= LIT64( 0x0001000000000000 );
2739	}
2740	shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 );
2741	return roundAndPackFloatx80( 80, aSign, aExp, aSig0, aSig1 );
2742
2743}
2744
2745/*----------------------------------------------------------------------------
2746| Returns the result of adding the absolute values of the quadruple-precision
2747| floating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
2748| before being returned.  `zSign' is ignored if the result is a NaN.
2749| The addition is performed according to the IEC/IEEE Standard for Binary
2750| Floating-Point Arithmetic.
2751*----------------------------------------------------------------------------*/
2752
2753LOCALFUNC float128 addFloat128Sigs( float128 a, float128 b, flag zSign )
2754{
2755	si5r aExp, bExp, zExp;
2756	ui6b aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
2757	si5r expDiff;
2758
2759	aSig1 = extractFloat128Frac1( a );
2760	aSig0 = extractFloat128Frac0( a );
2761	aExp = extractFloat128Exp( a );
2762	bSig1 = extractFloat128Frac1( b );
2763	bSig0 = extractFloat128Frac0( b );
2764	bExp = extractFloat128Exp( b );
2765	expDiff = aExp - bExp;
2766	if ( 0 < expDiff ) {
2767		if ( aExp == 0x7FFF ) {
2768			if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b );
2769			return a;
2770		}
2771		if ( bExp == 0 ) {
2772			--expDiff;
2773		}
2774		else {
2775			bSig0 |= LIT64( 0x0001000000000000 );
2776		}
2777		shift128ExtraRightJamming(
2778			bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 );
2779		zExp = aExp;
2780	}
2781	else if ( expDiff < 0 ) {
2782		if ( bExp == 0x7FFF ) {
2783			if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
2784			return packFloat128( zSign, 0x7FFF, 0, 0 );
2785		}
2786		if ( aExp == 0 ) {
2787			++expDiff;
2788		}
2789		else {
2790			aSig0 |= LIT64( 0x0001000000000000 );
2791		}
2792		shift128ExtraRightJamming(
2793			aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 );
2794		zExp = bExp;
2795	}
2796	else {
2797		if ( aExp == 0x7FFF ) {
2798			if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
2799				return propagateFloat128NaN( a, b );
2800			}
2801			return a;
2802		}
2803		add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
2804		if ( aExp == 0 ) return packFloat128( zSign, 0, zSig0, zSig1 );
2805		zSig2 = 0;
2806		zSig0 |= LIT64( 0x0002000000000000 );
2807		zExp = aExp;
2808		goto shiftRight1;
2809	}
2810	aSig0 |= LIT64( 0x0001000000000000 );
2811	add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
2812	--zExp;
2813	if ( zSig0 < LIT64( 0x0002000000000000 ) ) goto roundAndPack;
2814	++zExp;
2815 shiftRight1:
2816	shift128ExtraRightJamming(
2817		zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
2818 roundAndPack:
2819	return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 );
2820
2821}
2822
2823/*----------------------------------------------------------------------------
2824| Returns the result of subtracting the absolute values of the quadruple-
2825| precision floating-point values `a' and `b'.  If `zSign' is 1, the
2826| difference is negated before being returned.  `zSign' is ignored if the
2827| result is a NaN.  The subtraction is performed according to the IEC/IEEE
2828| Standard for Binary Floating-Point Arithmetic.
2829*----------------------------------------------------------------------------*/
2830
2831LOCALFUNC float128 subFloat128Sigs( float128 a, float128 b, flag zSign )
2832{
2833	si5r aExp, bExp, zExp;
2834	ui6b aSig0, aSig1, bSig0, bSig1, zSig0, zSig1;
2835	si5r expDiff;
2836	float128 z;
2837
2838	aSig1 = extractFloat128Frac1( a );
2839	aSig0 = extractFloat128Frac0( a );
2840	aExp = extractFloat128Exp( a );
2841	bSig1 = extractFloat128Frac1( b );
2842	bSig0 = extractFloat128Frac0( b );
2843	bExp = extractFloat128Exp( b );
2844	expDiff = aExp - bExp;
2845	shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
2846	shortShift128Left( bSig0, bSig1, 14, &bSig0, &bSig1 );
2847	if ( 0 < expDiff ) goto aExpBigger;
2848	if ( expDiff < 0 ) goto bExpBigger;
2849	if ( aExp == 0x7FFF ) {
2850		if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
2851			return propagateFloat128NaN( a, b );
2852		}
2853		float_raise( float_flag_invalid );
2854		z.low = float128_default_nan_low;
2855		z.high = float128_default_nan_high;
2856		return z;
2857	}
2858	if ( aExp == 0 ) {
2859		aExp = 1;
2860		bExp = 1;
2861	}
2862	if ( bSig0 < aSig0 ) goto aBigger;
2863	if ( aSig0 < bSig0 ) goto bBigger;
2864	if ( bSig1 < aSig1 ) goto aBigger;
2865	if ( aSig1 < bSig1 ) goto bBigger;
2866	return packFloat128( float_rounding_mode == float_round_down, 0, 0, 0 );
2867 bExpBigger:
2868	if ( bExp == 0x7FFF ) {
2869		if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
2870		return packFloat128( zSign ^ 1, 0x7FFF, 0, 0 );
2871	}
2872	if ( aExp == 0 ) {
2873		++expDiff;
2874	}
2875	else {
2876		aSig0 |= LIT64( 0x4000000000000000 );
2877	}
2878	shift128RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
2879	bSig0 |= LIT64( 0x4000000000000000 );
2880 bBigger:
2881	sub128( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 );
2882	zExp = bExp;
2883	zSign ^= 1;
2884	goto normalizeRoundAndPack;
2885 aExpBigger:
2886	if ( aExp == 0x7FFF ) {
2887		if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b );
2888		return a;
2889	}
2890	if ( bExp == 0 ) {
2891		--expDiff;
2892	}
2893	else {
2894		bSig0 |= LIT64( 0x4000000000000000 );
2895	}
2896	shift128RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 );
2897	aSig0 |= LIT64( 0x4000000000000000 );
2898 aBigger:
2899	sub128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
2900	zExp = aExp;
2901 normalizeRoundAndPack:
2902	--zExp;
2903	return normalizeRoundAndPackFloat128( zSign, zExp - 14, zSig0, zSig1 );
2904
2905}
2906
2907/*----------------------------------------------------------------------------
2908| Returns the result of adding the quadruple-precision floating-point values
2909| `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
2910| for Binary Floating-Point Arithmetic.
2911*----------------------------------------------------------------------------*/
2912
2913LOCALFUNC float128 float128_add( float128 a, float128 b )
2914{
2915	flag aSign, bSign;
2916
2917	aSign = extractFloat128Sign( a );
2918	bSign = extractFloat128Sign( b );
2919	if ( aSign == bSign ) {
2920		return addFloat128Sigs( a, b, aSign );
2921	}
2922	else {
2923		return subFloat128Sigs( a, b, aSign );
2924	}
2925
2926}
2927
2928/*----------------------------------------------------------------------------
2929| Returns the result of subtracting the quadruple-precision floating-point
2930| values `a' and `b'.  The operation is performed according to the IEC/IEEE
2931| Standard for Binary Floating-Point Arithmetic.
2932*----------------------------------------------------------------------------*/
2933
2934LOCALFUNC float128 float128_sub( float128 a, float128 b )
2935{
2936	flag aSign, bSign;
2937
2938	aSign = extractFloat128Sign( a );
2939	bSign = extractFloat128Sign( b );
2940	if ( aSign == bSign ) {
2941		return subFloat128Sigs( a, b, aSign );
2942	}
2943	else {
2944		return addFloat128Sigs( a, b, aSign );
2945	}
2946
2947}
2948
2949/*----------------------------------------------------------------------------
2950| Returns the result of multiplying the quadruple-precision floating-point
2951| values `a' and `b'.  The operation is performed according to the IEC/IEEE
2952| Standard for Binary Floating-Point Arithmetic.
2953*----------------------------------------------------------------------------*/
2954
2955LOCALFUNC float128 float128_mul( float128 a, float128 b )
2956{
2957	flag aSign, bSign, zSign;
2958	si5r aExp, bExp, zExp;
2959	ui6b aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3;
2960	float128 z;
2961
2962	aSig1 = extractFloat128Frac1( a );
2963	aSig0 = extractFloat128Frac0( a );
2964	aExp = extractFloat128Exp( a );
2965	aSign = extractFloat128Sign( a );
2966	bSig1 = extractFloat128Frac1( b );
2967	bSig0 = extractFloat128Frac0( b );
2968	bExp = extractFloat128Exp( b );
2969	bSign = extractFloat128Sign( b );
2970	zSign = aSign ^ bSign;
2971	if ( aExp == 0x7FFF ) {
2972		if (    ( aSig0 | aSig1 )
2973			 || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
2974			return propagateFloat128NaN( a, b );
2975		}
2976		if ( ( bExp | bSig0 | bSig1 ) == 0 ) goto invalid;
2977		return packFloat128( zSign, 0x7FFF, 0, 0 );
2978	}
2979	if ( bExp == 0x7FFF ) {
2980		if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
2981		if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
2982 invalid:
2983			float_raise( float_flag_invalid );
2984			z.low = float128_default_nan_low;
2985			z.high = float128_default_nan_high;
2986			return z;
2987		}
2988		return packFloat128( zSign, 0x7FFF, 0, 0 );
2989	}
2990	if ( aExp == 0 ) {
2991		if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
2992		normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
2993	}
2994	if ( bExp == 0 ) {
2995		if ( ( bSig0 | bSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
2996		normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
2997	}
2998	zExp = aExp + bExp - 0x4000;
2999	aSig0 |= LIT64( 0x0001000000000000 );
3000	shortShift128Left( bSig0, bSig1, 16, &bSig0, &bSig1 );
3001	mul128To256( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3 );
3002	add128( zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1 );
3003	zSig2 |= ( zSig3 != 0 );
3004	if ( LIT64( 0x0002000000000000 ) <= zSig0 ) {
3005		shift128ExtraRightJamming(
3006			zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
3007		++zExp;
3008	}
3009	return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 );
3010
3011}
3012
3013/*----------------------------------------------------------------------------
3014| Returns the result of dividing the quadruple-precision floating-point value
3015| `a' by the corresponding value `b'.  The operation is performed according to
3016| the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3017*----------------------------------------------------------------------------*/
3018
3019LOCALFUNC float128 float128_div( float128 a, float128 b )
3020{
3021	flag aSign, bSign, zSign;
3022	si5r aExp, bExp, zExp;
3023	ui6b aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
3024	ui6b rem0, rem1, rem2, rem3, term0, term1, term2, term3;
3025	float128 z;
3026
3027	aSig1 = extractFloat128Frac1( a );
3028	aSig0 = extractFloat128Frac0( a );
3029	aExp = extractFloat128Exp( a );
3030	aSign = extractFloat128Sign( a );
3031	bSig1 = extractFloat128Frac1( b );
3032	bSig0 = extractFloat128Frac0( b );
3033	bExp = extractFloat128Exp( b );
3034	bSign = extractFloat128Sign( b );
3035	zSign = aSign ^ bSign;
3036	if ( aExp == 0x7FFF ) {
3037		if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b );
3038		if ( bExp == 0x7FFF ) {
3039			if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
3040			goto invalid;
3041		}
3042		return packFloat128( zSign, 0x7FFF, 0, 0 );
3043	}
3044	if ( bExp == 0x7FFF ) {
3045		if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
3046		return packFloat128( zSign, 0, 0, 0 );
3047	}
3048	if ( bExp == 0 ) {
3049		if ( ( bSig0 | bSig1 ) == 0 ) {
3050			if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
3051 invalid:
3052				float_raise( float_flag_invalid );
3053				z.low = float128_default_nan_low;
3054				z.high = float128_default_nan_high;
3055				return z;
3056			}
3057			float_raise( float_flag_divbyzero );
3058			return packFloat128( zSign, 0x7FFF, 0, 0 );
3059		}
3060		normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
3061	}
3062	if ( aExp == 0 ) {
3063		if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
3064		normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
3065	}
3066	zExp = aExp - bExp + 0x3FFD;
3067	shortShift128Left(
3068		aSig0 | LIT64( 0x0001000000000000 ), aSig1, 15, &aSig0, &aSig1 );
3069	shortShift128Left(
3070		bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
3071	if ( le128( bSig0, bSig1, aSig0, aSig1 ) ) {
3072		shift128Right( aSig0, aSig1, 1, &aSig0, &aSig1 );
3073		++zExp;
3074	}
3075	zSig0 = estimateDiv128To64( aSig0, aSig1, bSig0 );
3076	mul128By64To192( bSig0, bSig1, zSig0, &term0, &term1, &term2 );
3077	sub192( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 );
3078	while ( (si6b) rem0 < 0 ) {
3079		--zSig0;
3080		add192( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 );
3081	}
3082	zSig1 = estimateDiv128To64( rem1, rem2, bSig0 );
3083	if ( ( zSig1 & 0x3FFF ) <= 4 ) {
3084		mul128By64To192( bSig0, bSig1, zSig1, &term1, &term2, &term3 );
3085		sub192( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 );
3086		while ( (si6b) rem1 < 0 ) {
3087			--zSig1;
3088			add192( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 );
3089		}
3090		zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
3091	}
3092	shift128ExtraRightJamming( zSig0, zSig1, 0, 15, &zSig0, &zSig1, &zSig2 );
3093	return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 );
3094
3095}
3096
3097#endif
3098
3099typedef unsigned int float32;
3100
3101/*----------------------------------------------------------------------------
3102| Normalizes the subnormal single-precision floating-point value represented
3103| by the denormalized significand `aSig'.  The normalized exponent and
3104| significand are stored at the locations pointed to by `zExpPtr' and
3105| `zSigPtr', respectively.
3106*----------------------------------------------------------------------------*/
3107
3108LOCALPROC
3109 normalizeFloat32Subnormal( ui5b aSig, si4r *zExpPtr, ui5b *zSigPtr )
3110{
3111	si3r shiftCount;
3112
3113	shiftCount = countLeadingZeros32( aSig ) - 8;
3114	*zSigPtr = aSig<<shiftCount;
3115	*zExpPtr = 1 - shiftCount;
3116
3117}
3118
3119/*----------------------------------------------------------------------------
3120| Returns the fraction bits of the single-precision floating-point value `a'.
3121*----------------------------------------------------------------------------*/
3122
3123LOCALINLINEFUNC ui5b extractFloat32Frac( float32 a )
3124{
3125
3126	return a & 0x007FFFFF;
3127
3128}
3129
3130/*----------------------------------------------------------------------------
3131| Returns the exponent bits of the single-precision floating-point value `a'.
3132*----------------------------------------------------------------------------*/
3133
3134LOCALINLINEFUNC si4r extractFloat32Exp( float32 a )
3135{
3136
3137	return ( a>>23 ) & 0xFF;
3138
3139}
3140
3141/*----------------------------------------------------------------------------
3142| Returns the sign bit of the single-precision floating-point value `a'.
3143*----------------------------------------------------------------------------*/
3144
3145LOCALINLINEFUNC flag extractFloat32Sign( float32 a )
3146{
3147
3148	return a>>31;
3149
3150}
3151
3152/*----------------------------------------------------------------------------
3153| Returns 1 if the single-precision floating-point value `a' is a signaling
3154| NaN; otherwise returns 0.
3155*----------------------------------------------------------------------------*/
3156
3157LOCALFUNC flag float32_is_signaling_nan( float32 a )
3158{
3159
3160	return ( ( ( a>>22 ) & 0x1FF ) == 0x1FE ) && ( a & 0x003FFFFF );
3161
3162}
3163
3164/*----------------------------------------------------------------------------
3165| Returns the result of converting the single-precision floating-point NaN
3166| `a' to the canonical NaN format.  If `a' is a signaling NaN, the invalid
3167| exception is raised.
3168*----------------------------------------------------------------------------*/
3169
3170LOCALFUNC commonNaNT float32ToCommonNaN( float32 a )
3171{
3172	commonNaNT z;
3173
3174	if ( float32_is_signaling_nan( a ) ) float_raise( float_flag_invalid );
3175	z.sign = a>>31;
3176	z.low = 0;
3177	z.high = ( (ui6b) a )<<41;
3178	return z;
3179
3180}
3181
3182/*----------------------------------------------------------------------------
3183| Returns the result of converting the single-precision floating-point value
3184| `a' to the extended double-precision floating-point format.  The conversion
3185| is performed according to the IEC/IEEE Standard for Binary Floating-Point
3186| Arithmetic.
3187*----------------------------------------------------------------------------*/
3188
3189LOCALFUNC floatx80 float32_to_floatx80( float32 a )
3190{
3191	flag aSign;
3192	si4r aExp;
3193	ui5b aSig;
3194
3195	aSig = extractFloat32Frac( a );
3196	aExp = extractFloat32Exp( a );
3197	aSign = extractFloat32Sign( a );
3198	if ( aExp == 0xFF ) {
3199		if ( aSig ) return commonNaNToFloatx80( float32ToCommonNaN( a ) );
3200		return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
3201	}
3202	if ( aExp == 0 ) {
3203		if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
3204		normalizeFloat32Subnormal( aSig, &aExp, &aSig );
3205	}
3206	aSig |= 0x00800000;
3207	return packFloatx80( aSign, aExp + 0x3F80, ( (ui6b) aSig )<<40 );
3208
3209}
3210
3211/*----------------------------------------------------------------------------
3212| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
3213| single-precision floating-point value, returning the result.  After being
3214| shifted into the proper positions, the three fields are simply added
3215| together to form the result.  This means that any integer portion of `zSig'
3216| will be added into the exponent.  Since a properly normalized significand
3217| will have an integer portion equal to 1, the `zExp' input should be 1 less
3218| than the desired result exponent whenever `zSig' is a complete, normalized
3219| significand.
3220*----------------------------------------------------------------------------*/
3221
3222LOCALINLINEFUNC float32 packFloat32( flag zSign, si4r zExp, ui5b zSig )
3223{
3224
3225	return ( ( (ui5b) zSign )<<31 ) + ( ( (ui5b) zExp )<<23 ) + zSig;
3226
3227}
3228
3229/*----------------------------------------------------------------------------
3230| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
3231| and significand `zSig', and returns the proper single-precision floating-
3232| point value corresponding to the abstract input.  Ordinarily, the abstract
3233| value is simply rounded and packed into the single-precision format, with
3234| the inexact exception raised if the abstract input cannot be represented
3235| exactly.  However, if the abstract value is too large, the overflow and
3236| inexact exceptions are raised and an infinity or maximal finite value is
3237| returned.  If the abstract value is too small, the input value is rounded to
3238| a subnormal number, and the underflow and inexact exceptions are raised if
3239| the abstract input cannot be represented exactly as a subnormal single-
3240| precision floating-point number.
3241|     The input significand `zSig' has its binary point between bits 30
3242| and 29, which is 7 bits to the left of the usual location.  This shifted
3243| significand must be normalized or smaller.  If `zSig' is not normalized,
3244| `zExp' must be 0; in that case, the result returned is a subnormal number,
3245| and it must not require rounding.  In the usual case that `zSig' is
3246| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
3247| The handling of underflow and overflow follows the IEC/IEEE Standard for
3248| Binary Floating-Point Arithmetic.
3249*----------------------------------------------------------------------------*/
3250
3251LOCALFUNC float32 roundAndPackFloat32( flag zSign, si4r zExp, ui5b zSig )
3252{
3253	si3r roundingMode;
3254	flag roundNearestEven;
3255	si3r roundIncrement, roundBits;
3256	flag isTiny;
3257
3258	roundingMode = float_rounding_mode;
3259	roundNearestEven = ( roundingMode == float_round_nearest_even );
3260	roundIncrement = 0x40;
3261	if ( ! roundNearestEven ) {
3262		if ( roundingMode == float_round_to_zero ) {
3263			roundIncrement = 0;
3264		}
3265		else {
3266			roundIncrement = 0x7F;
3267			if ( zSign ) {
3268				if ( roundingMode == float_round_up ) roundIncrement = 0;
3269			}
3270			else {
3271				if ( roundingMode == float_round_down ) roundIncrement = 0;
3272			}
3273		}
3274	}
3275	roundBits = zSig & 0x7F;
3276	if ( 0xFD <= (ui4b) zExp ) {
3277		if (    ( 0xFD < zExp )
3278			 || (    ( zExp == 0xFD )
3279				  && ( (si5b) ( zSig + roundIncrement ) < 0 ) )
3280		   ) {
3281			float_raise( float_flag_overflow | float_flag_inexact );
3282			return packFloat32( zSign, 0xFF, 0 ) - ( roundIncrement == 0 );
3283		}
3284		if ( zExp < 0 ) {
3285			isTiny =
3286				   ( float_detect_tininess == float_tininess_before_rounding )
3287				|| ( zExp < -1 )
3288				|| ( zSig + roundIncrement < 0x80000000 );
3289			shift32RightJamming( zSig, - zExp, &zSig );
3290			zExp = 0;
3291			roundBits = zSig & 0x7F;
3292			if ( isTiny && roundBits ) float_raise( float_flag_underflow );
3293		}
3294	}
3295	if ( roundBits ) float_exception_flags |= float_flag_inexact;
3296	zSig = ( zSig + roundIncrement )>>7;
3297	zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
3298	if ( zSig == 0 ) zExp = 0;
3299	return packFloat32( zSign, zExp, zSig );
3300
3301}
3302
3303/*----------------------------------------------------------------------------
3304| Returns the result of converting the canonical NaN `a' to the single-
3305| precision floating-point format.
3306*----------------------------------------------------------------------------*/
3307
3308LOCALFUNC float32 commonNaNToFloat32( commonNaNT a )
3309{
3310
3311	return ( ( (ui5b) a.sign )<<31 ) | 0x7FC00000 | ( a.high>>41 );
3312
3313}
3314
3315/*----------------------------------------------------------------------------
3316| Returns the result of converting the extended double-precision floating-
3317| point value `a' to the single-precision floating-point format.  The
3318| conversion is performed according to the IEC/IEEE Standard for Binary
3319| Floating-Point Arithmetic.
3320*----------------------------------------------------------------------------*/
3321
3322LOCALFUNC float32 floatx80_to_float32( floatx80 a )
3323{
3324	flag aSign;
3325	si5r aExp;
3326	ui6b aSig;
3327
3328	aSig = extractFloatx80Frac( a );
3329	aExp = extractFloatx80Exp( a );
3330	aSign = extractFloatx80Sign( a );
3331	if ( aExp == 0x7FFF ) {
3332		if ( (ui6b) ( aSig<<1 ) ) {
3333			return commonNaNToFloat32( floatx80ToCommonNaN( a ) );
3334		}
3335		return packFloat32( aSign, 0xFF, 0 );
3336	}
3337	shift64RightJamming( aSig, 33, &aSig );
3338	if ( aExp || aSig ) aExp -= 0x3F81;
3339	return roundAndPackFloat32( aSign, aExp, aSig );
3340
3341}
3342
3343
3344typedef ui6b float64;
3345
3346/*----------------------------------------------------------------------------
3347| Returns the fraction bits of the double-precision floating-point value `a'.
3348*----------------------------------------------------------------------------*/
3349
3350LOCALINLINEFUNC ui6b extractFloat64Frac( float64 a )
3351{
3352
3353	return a & LIT64( 0x000FFFFFFFFFFFFF );
3354
3355}
3356
3357/*----------------------------------------------------------------------------
3358| Returns the exponent bits of the double-precision floating-point value `a'.
3359*----------------------------------------------------------------------------*/
3360
3361LOCALINLINEFUNC si4r extractFloat64Exp( float64 a )
3362{
3363
3364	return ( a>>52 ) & 0x7FF;
3365
3366}
3367
3368/*----------------------------------------------------------------------------
3369| Returns the sign bit of the double-precision floating-point value `a'.
3370*----------------------------------------------------------------------------*/
3371
3372LOCALINLINEFUNC flag extractFloat64Sign( float64 a )
3373{
3374
3375	return a>>63;
3376
3377}
3378
3379/*----------------------------------------------------------------------------
3380| Returns 1 if the double-precision floating-point value `a' is a signaling
3381| NaN; otherwise returns 0.
3382*----------------------------------------------------------------------------*/
3383
3384LOCALFUNC flag float64_is_signaling_nan( float64 a )
3385{
3386
3387	return
3388		   ( ( ( a>>51 ) & 0xFFF ) == 0xFFE )
3389		&& ( a & LIT64( 0x0007FFFFFFFFFFFF ) );
3390
3391}
3392
3393/*----------------------------------------------------------------------------
3394| Returns the result of converting the double-precision floating-point NaN
3395| `a' to the canonical NaN format.  If `a' is a signaling NaN, the invalid
3396| exception is raised.
3397*----------------------------------------------------------------------------*/
3398
3399LOCALFUNC commonNaNT float64ToCommonNaN( float64 a )
3400{
3401	commonNaNT z;
3402
3403	if ( float64_is_signaling_nan( a ) ) float_raise( float_flag_invalid );
3404	z.sign = a>>63;
3405	z.low = 0;
3406	z.high = a<<12;
3407	return z;
3408
3409}
3410
3411/*----------------------------------------------------------------------------
3412| Normalizes the subnormal double-precision floating-point value represented
3413| by the denormalized significand `aSig'.  The normalized exponent and
3414| significand are stored at the locations pointed to by `zExpPtr' and
3415| `zSigPtr', respectively.
3416*----------------------------------------------------------------------------*/
3417
3418LOCALPROC
3419 normalizeFloat64Subnormal( ui6b aSig, si4r *zExpPtr, ui6b *zSigPtr )
3420{
3421	si3r shiftCount;
3422
3423	shiftCount = countLeadingZeros64( aSig ) - 11;
3424	*zSigPtr = aSig<<shiftCount;
3425	*zExpPtr = 1 - shiftCount;
3426
3427}
3428
3429/*----------------------------------------------------------------------------
3430| Returns the result of converting the double-precision floating-point value
3431| `a' to the extended double-precision floating-point format.  The conversion
3432| is performed according to the IEC/IEEE Standard for Binary Floating-Point
3433| Arithmetic.
3434*----------------------------------------------------------------------------*/
3435
3436LOCALFUNC floatx80 float64_to_floatx80( float64 a )
3437{
3438	flag aSign;
3439	si4r aExp;
3440	ui6b aSig;
3441
3442	aSig = extractFloat64Frac( a );
3443	aExp = extractFloat64Exp( a );
3444	aSign = extractFloat64Sign( a );
3445	if ( aExp == 0x7FF ) {
3446		if ( aSig ) return commonNaNToFloatx80( float64ToCommonNaN( a ) );
3447		return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
3448	}
3449	if ( aExp == 0 ) {
3450		if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
3451		normalizeFloat64Subnormal( aSig, &aExp, &aSig );
3452	}
3453	return
3454		packFloatx80(
3455			aSign, aExp + 0x3C00, ( aSig | LIT64( 0x0010000000000000 ) )<<11 );
3456
3457}
3458
3459/*----------------------------------------------------------------------------
3460| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
3461| double-precision floating-point value, returning the result.  After being
3462| shifted into the proper positions, the three fields are simply added
3463| together to form the result.  This means that any integer portion of `zSig'
3464| will be added into the exponent.  Since a properly normalized significand
3465| will have an integer portion equal to 1, the `zExp' input should be 1 less
3466| than the desired result exponent whenever `zSig' is a complete, normalized
3467| significand.
3468*----------------------------------------------------------------------------*/
3469
3470LOCALINLINEFUNC float64 packFloat64( flag zSign, si4r zExp, ui6b zSig )
3471{
3472
3473	return ( ( (ui6b) zSign )<<63 ) + ( ( (ui6b) zExp )<<52 ) + zSig;
3474
3475}
3476
3477/*----------------------------------------------------------------------------
3478| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
3479| and significand `zSig', and returns the proper double-precision floating-
3480| point value corresponding to the abstract input.  Ordinarily, the abstract
3481| value is simply rounded and packed into the double-precision format, with
3482| the inexact exception raised if the abstract input cannot be represented
3483| exactly.  However, if the abstract value is too large, the overflow and
3484| inexact exceptions are raised and an infinity or maximal finite value is
3485| returned.  If the abstract value is too small, the input value is rounded
3486| to a subnormal number, and the underflow and inexact exceptions are raised
3487| if the abstract input cannot be represented exactly as a subnormal double-
3488| precision floating-point number.
3489|     The input significand `zSig' has its binary point between bits 62
3490| and 61, which is 10 bits to the left of the usual location.  This shifted
3491| significand must be normalized or smaller.  If `zSig' is not normalized,
3492| `zExp' must be 0; in that case, the result returned is a subnormal number,
3493| and it must not require rounding.  In the usual case that `zSig' is
3494| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
3495| The handling of underflow and overflow follows the IEC/IEEE Standard for
3496| Binary Floating-Point Arithmetic.
3497*----------------------------------------------------------------------------*/
3498
3499LOCALFUNC float64 roundAndPackFloat64( flag zSign, si4r zExp, ui6b zSig )
3500{
3501	si3r roundingMode;
3502	flag roundNearestEven;
3503	si4r roundIncrement, roundBits;
3504	flag isTiny;
3505
3506	roundingMode = float_rounding_mode;
3507	roundNearestEven = ( roundingMode == float_round_nearest_even );
3508	roundIncrement = 0x200;
3509	if ( ! roundNearestEven ) {
3510		if ( roundingMode == float_round_to_zero ) {
3511			roundIncrement = 0;
3512		}
3513		else {
3514			roundIncrement = 0x3FF;
3515			if ( zSign ) {
3516				if ( roundingMode == float_round_up ) roundIncrement = 0;
3517			}
3518			else {
3519				if ( roundingMode == float_round_down ) roundIncrement = 0;
3520			}
3521		}
3522	}
3523	roundBits = zSig & 0x3FF;
3524	if ( 0x7FD <= (ui4b) zExp ) {
3525		if (    ( 0x7FD < zExp )
3526			 || (    ( zExp == 0x7FD )
3527				  && ( (si6b) ( zSig + roundIncrement ) < 0 ) )
3528		   ) {
3529			float_raise( float_flag_overflow | float_flag_inexact );
3530			return packFloat64( zSign, 0x7FF, 0 ) - ( roundIncrement == 0 );
3531		}
3532		if ( zExp < 0 ) {
3533			isTiny =
3534				   ( float_detect_tininess == float_tininess_before_rounding )
3535				|| ( zExp < -1 )
3536				|| ( zSig + roundIncrement < LIT64( 0x8000000000000000 ) );
3537			shift64RightJamming( zSig, - zExp, &zSig );
3538			zExp = 0;
3539			roundBits = zSig & 0x3FF;
3540			if ( isTiny && roundBits ) float_raise( float_flag_underflow );
3541		}
3542	}
3543	if ( roundBits ) float_exception_flags |= float_flag_inexact;
3544	zSig = ( zSig + roundIncrement )>>10;
3545	zSig &= ~ ( ( ( roundBits ^ 0x200 ) == 0 ) & roundNearestEven );
3546	if ( zSig == 0 ) zExp = 0;
3547	return packFloat64( zSign, zExp, zSig );
3548
3549}
3550
3551/*----------------------------------------------------------------------------
3552| Returns the result of converting the canonical NaN `a' to the double-
3553| precision floating-point format.
3554*----------------------------------------------------------------------------*/
3555
3556LOCALFUNC float64 commonNaNToFloat64( commonNaNT a )
3557{
3558
3559	return
3560		  ( ( (ui6b) a.sign )<<63 )
3561		| LIT64( 0x7FF8000000000000 )
3562		| ( a.high>>12 );
3563
3564}
3565
3566/*----------------------------------------------------------------------------
3567| Returns the result of converting the extended double-precision floating-
3568| point value `a' to the double-precision floating-point format.  The
3569| conversion is performed according to the IEC/IEEE Standard for Binary
3570| Floating-Point Arithmetic.
3571*----------------------------------------------------------------------------*/
3572
3573LOCALFUNC float64 floatx80_to_float64( floatx80 a )
3574{
3575	flag aSign;
3576	si5r aExp;
3577	ui6b aSig, zSig;
3578
3579	aSig = extractFloatx80Frac( a );
3580	aExp = extractFloatx80Exp( a );
3581	aSign = extractFloatx80Sign( a );
3582	if ( aExp == 0x7FFF ) {
3583		if ( (ui6b) ( aSig<<1 ) ) {
3584			return commonNaNToFloat64( floatx80ToCommonNaN( a ) );
3585		}
3586		return packFloat64( aSign, 0x7FF, 0 );
3587	}
3588	shift64RightJamming( aSig, 1, &zSig );
3589	if ( aExp || aSig ) aExp -= 0x3C01;
3590	return roundAndPackFloat64( aSign, aExp, zSig );
3591
3592}
3593
3594/* ----- end from original file "softfloat.c" ----- */
3595
3596typedef si4r Bit16s;
3597typedef si5r Bit32s;
3598typedef si6r Bit64s;
3599
3600typedef ui5r Bit32u;
3601typedef ui6r Bit64u;
3602
3603#define int16_indefinite 0x8000
3604
3605/*----------------------------------------------------------------------------
3606| Takes extended double-precision floating-point  NaN  `a' and returns the
3607| appropriate NaN result. If `a' is a signaling NaN, the invalid exception
3608| is raised.
3609*----------------------------------------------------------------------------*/
3610
3611LOCALFUNC floatx80 propagateOneFloatx80NaN(floatx80 *a)
3612{
3613	if (floatx80_is_signaling_nan(*a))
3614		float_raise(float_flag_invalid);
3615
3616	a->low |= LIT64(0xC000000000000000);
3617
3618	return *a;
3619}
3620
3621#define float_flag_denormal 0x02
3622
3623#define floatx80_default_nan_exp 0xFFFF
3624#define floatx80_default_nan_fraction LIT64(0xC000000000000000)
3625
3626#define packFloatx80m(zSign, zExp, zSig) {(zSig), ((zSign) << 15) + (zExp) }
3627
3628/*----------------------------------------------------------------------------
3629| Packs two 64-bit precision integers into into the quadruple-precision
3630| floating-point value, returning the result.
3631*----------------------------------------------------------------------------*/
3632
3633#define packFloat2x128m(zHi, zLo) {(zLo), (zHi)}
3634#define PACK_FLOAT_128(hi,lo) packFloat2x128m(LIT64(hi),LIT64(lo))
3635
3636static const floatx80 floatx80_default_nan =
3637	packFloatx80m(0, floatx80_default_nan_exp, floatx80_default_nan_fraction);
3638
3639/*----------------------------------------------------------------------------
3640| Software IEC/IEEE floating-point ordering relations
3641*----------------------------------------------------------------------------*/
3642enum {
3643	float_relation_less      = -1,
3644	float_relation_equal     =  0,
3645	float_relation_greater   =  1,
3646	float_relation_unordered =  2
3647};
3648
3649#define EXP_BIAS 0x3FFF
3650
3651
3652/*----------------------------------------------------------------------------
3653| Returns the result of multiplying the extended double-precision floating-
3654| point value `a' and quadruple-precision floating point value `b'. The
3655| operation is performed according to the IEC/IEEE Standard for Binary
3656| Floating-Point Arithmetic.
3657*----------------------------------------------------------------------------*/
3658
3659LOCALFUNC floatx80 floatx80_mul128(floatx80 a, float128 b)
3660{
3661	Bit32s aExp, bExp, zExp;
3662	Bit64u aSig, bSig0, bSig1, zSig0, zSig1, zSig2;
3663	int aSign, bSign, zSign;
3664
3665	// handle unsupported extended double-precision floating encodings
3666	if (0 /* floatx80_is_unsupported(a) */)
3667	{
3668 invalid:
3669		float_raise(float_flag_invalid);
3670		return floatx80_default_nan;
3671	}
3672
3673	aSig = extractFloatx80Frac(a);
3674	aExp = extractFloatx80Exp(a);
3675	aSign = extractFloatx80Sign(a);
3676	bSig0 = extractFloat128Frac0(b);
3677	bSig1 = extractFloat128Frac1(b);
3678	bExp = extractFloat128Exp(b);
3679	bSign = extractFloat128Sign(b);
3680
3681	zSign = aSign ^ bSign;
3682
3683	if (aExp == 0x7FFF) {
3684		if ((Bit64u) (aSig<<1)
3685			 || ((bExp == 0x7FFF) && (bSig0 | bSig1)))
3686		{
3687			floatx80 r = commonNaNToFloatx80(float128ToCommonNaN(b));
3688			return propagateFloatx80NaN(a, r);
3689		}
3690		if (bExp == 0) {
3691			if ((bSig0 | bSig1) == 0) goto invalid;
3692			float_raise(float_flag_denormal);
3693		}
3694		return packFloatx80(zSign, 0x7FFF, LIT64(0x8000000000000000));
3695	}
3696	if (bExp == 0x7FFF) {
3697		if (bSig0 | bSig1) {
3698			floatx80 r = commonNaNToFloatx80(float128ToCommonNaN(b));
3699			return propagateFloatx80NaN(a, r);
3700		}
3701		if (aExp == 0) {
3702			if (aSig == 0) goto invalid;
3703			float_raise(float_flag_denormal);
3704		}
3705		return packFloatx80(zSign, 0x7FFF, LIT64(0x8000000000000000));
3706	}
3707	if (aExp == 0) {
3708		if (aSig == 0) {
3709			if ((bExp == 0) && (bSig0 | bSig1)) float_raise(float_flag_denormal);
3710			return packFloatx80(zSign, 0, 0);
3711		}
3712		float_raise(float_flag_denormal);
3713		normalizeFloatx80Subnormal(aSig, &aExp, &aSig);
3714	}
3715	if (bExp == 0) {
3716		if ((bSig0 | bSig1) == 0) return packFloatx80(zSign, 0, 0);
3717		float_raise(float_flag_denormal);
3718		normalizeFloat128Subnormal(bSig0, bSig1, &bExp, &bSig0, &bSig1);
3719	}
3720	else bSig0 |= LIT64(0x0001000000000000);
3721
3722	zExp = aExp + bExp - 0x3FFE;
3723	shortShift128Left(bSig0, bSig1, 15, &bSig0, &bSig1);
3724	mul128By64To192(bSig0, bSig1, aSig, &zSig0, &zSig1, &zSig2);
3725	if (0 < (Bit64s) zSig0) {
3726		shortShift128Left(zSig0, zSig1, 1, &zSig0, &zSig1);
3727		--zExp;
3728	}
3729	return
3730		roundAndPackFloatx80(floatx80_rounding_precision,
3731			 zSign, zExp, zSig0, zSig1);
3732}
3733
3734/* ----- from original file "softfloatx80.h" ----- */
3735
3736/*
3737	["original Stanislav Shwartsman Copyright notice" went here, included near top of this file.]
3738*/
3739
3740
3741/*----------------------------------------------------------------------------
3742| Software IEC/IEEE floating-point class.
3743*----------------------------------------------------------------------------*/
3744typedef enum {
3745	float_zero,
3746	float_NaN,
3747	float_negative_inf,
3748	float_positive_inf,
3749	float_denormal,
3750	float_normalized
3751} float_class_t;
3752
3753
3754/*-----------------------------------------------------------------------------
3755| Calculates the absolute value of the extended double-precision floating-point
3756| value `a'.  The operation is performed according to the IEC/IEEE Standard
3757| for Binary Floating-Point Arithmetic.
3758*----------------------------------------------------------------------------*/
3759
3760#if 0
3761inline floatx80 floatx80_abs(floatx80 *reg)
3762{
3763	reg->high &= 0x7FFF;
3764	return *reg;
3765}
3766#endif
3767
3768/*-----------------------------------------------------------------------------
3769| Changes the sign of the extended double-precision floating-point value 'a'.
3770| The operation is performed according to the IEC/IEEE Standard for Binary
3771| Floating-Point Arithmetic.
3772*----------------------------------------------------------------------------*/
3773
3774LOCALFUNC floatx80 floatx80_chs(floatx80 *x)
3775{
3776	x->high ^= 0x8000;
3777	return *x;
3778}
3779
3780/* ----- end from original file "softfloatx80.h" ----- */
3781
3782/* ----- from original file "softfloatx80.cc" ----- */
3783
3784/*
3785	["original Stanislav Shwartsman Copyright notice" went here, included near top of this file.]
3786*/
3787
3788/*----------------------------------------------------------------------------
3789| Returns the result of converting the extended double-precision floating-
3790| point value `a' to the 16-bit two's complement integer format.  The
3791| conversion is performed according to the IEC/IEEE Standard for Binary
3792| Floating-Point Arithmetic - which means in particular that the conversion
3793| is rounded according to the current rounding mode. If `a' is a NaN or the
3794| conversion overflows, the integer indefinite value is returned.
3795*----------------------------------------------------------------------------*/
3796
3797#if cIncludeFPUUnused
3798LOCALFUNC Bit16s floatx80_to_int16(floatx80 a)
3799{
3800#if 0
3801   if (floatx80_is_unsupported(a))
3802   {
3803		float_raise(float_flag_invalid);
3804		return int16_indefinite;
3805   }
3806#endif
3807
3808   Bit32s v32 = floatx80_to_int32(a);
3809
3810   if ((v32 > 32767) || (v32 < -32768)) {
3811		float_exception_flags = float_flag_invalid; // throw way other flags
3812		return int16_indefinite;
3813   }
3814
3815   return (Bit16s) v32;
3816}
3817#endif
3818
3819/*----------------------------------------------------------------------------
3820| Returns the result of converting the extended double-precision floating-
3821| point value `a' to the 16-bit two's complement integer format.  The
3822| conversion is performed according to the IEC/IEEE Standard for Binary
3823| Floating-Point Arithmetic, except that the conversion is always rounded
3824| toward zero.  If `a' is a NaN or the conversion overflows, the integer
3825| indefinite value is returned.
3826*----------------------------------------------------------------------------*/
3827
3828#if cIncludeFPUUnused
3829LOCALFUNC Bit16s floatx80_to_int16_round_to_zero(floatx80 a)
3830{
3831#if 0
3832   if (floatx80_is_unsupported(a))
3833   {
3834		float_raise(float_flag_invalid);
3835		return int16_indefinite;
3836   }
3837#endif
3838
3839   Bit32s v32 = floatx80_to_int32_round_to_zero(a);
3840
3841   if ((v32 > 32767) || (v32 < -32768)) {
3842		float_exception_flags = float_flag_invalid; // throw way other flags
3843		return int16_indefinite;
3844   }
3845
3846   return (Bit16s) v32;
3847}
3848#endif
3849
3850/*----------------------------------------------------------------------------
3851| Separate the source extended double-precision floating point value `a'
3852| into its exponent and significand, store the significant back to the
3853| 'a' and return the exponent. The operation performed is a superset of
3854| the IEC/IEEE recommended logb(x) function.
3855*----------------------------------------------------------------------------*/
3856
3857LOCALFUNC floatx80 floatx80_extract(floatx80 *a)
3858{
3859	Bit64u aSig = extractFloatx80Frac(*a);
3860	Bit32s aExp = extractFloatx80Exp(*a);
3861	int   aSign = extractFloatx80Sign(*a);
3862
3863#if 0
3864	if (floatx80_is_unsupported(*a))
3865	{
3866		float_raise(float_flag_invalid);
3867		*a = floatx80_default_nan;
3868		return *a;
3869	}
3870#endif
3871
3872	if (aExp == 0x7FFF) {
3873		if ((Bit64u) (aSig<<1))
3874		{
3875			*a = propagateOneFloatx80NaN(a);
3876			return *a;
3877		}
3878		return packFloatx80(0, 0x7FFF, LIT64(0x8000000000000000));
3879	}
3880	if (aExp == 0)
3881	{
3882		if (aSig == 0) {
3883			float_raise(float_flag_divbyzero);
3884			*a = packFloatx80(aSign, 0, 0);
3885			return packFloatx80(1, 0x7FFF, LIT64(0x8000000000000000));
3886		}
3887		float_raise(float_flag_denormal);
3888		normalizeFloatx80Subnormal(aSig, &aExp, &aSig);
3889	}
3890
3891	a->high = (aSign << 15) + 0x3FFF;
3892	a->low = aSig;
3893	return int32_to_floatx80(aExp - 0x3FFF);
3894}
3895
3896/*----------------------------------------------------------------------------
3897| Scales extended double-precision floating-point value in operand `a' by
3898| value `b'. The function truncates the value in the second operand 'b' to
3899| an integral value and adds that value to the exponent of the operand 'a'.
3900| The operation performed according to the IEC/IEEE Standard for Binary
3901| Floating-Point Arithmetic.
3902*----------------------------------------------------------------------------*/
3903
3904LOCALFUNC floatx80 floatx80_scale(floatx80 a, floatx80 b)
3905{
3906	int shiftCount;
3907	Bit32s scale;
3908
3909	// handle unsupported extended double-precision floating encodings
3910#if 0
3911	if (floatx80_is_unsupported(a) || floatx80_is_unsupported(b))
3912	{
3913		float_raise(float_flag_invalid);
3914		return floatx80_default_nan;
3915	}
3916#endif
3917
3918	Bit64u aSig = extractFloatx80Frac(a);
3919	Bit32s aExp = extractFloatx80Exp(a);
3920	int aSign = extractFloatx80Sign(a);
3921	Bit64u bSig = extractFloatx80Frac(b);
3922	Bit32s bExp = extractFloatx80Exp(b);
3923	int bSign = extractFloatx80Sign(b);
3924
3925	if (aExp == 0x7FFF) {
3926		if ((Bit64u) (aSig<<1) || ((bExp == 0x7FFF) && (Bit64u) (bSig<<1)))
3927		{
3928			return propagateFloatx80NaN(a, b);
3929		}
3930		if ((bExp == 0x7FFF) && bSign) {
3931			float_raise(float_flag_invalid);
3932			return floatx80_default_nan;
3933		}
3934		if (bSig && (bExp == 0)) float_raise(float_flag_denormal);
3935		return a;
3936	}
3937	if (bExp == 0x7FFF) {
3938		if ((Bit64u) (bSig<<1)) return propagateFloatx80NaN(a, b);
3939		if ((aExp | aSig) == 0) {
3940			if (! bSign) {
3941				float_raise(float_flag_invalid);
3942				return floatx80_default_nan;
3943			}
3944			return a;
3945		}
3946		if (aSig && (aExp == 0)) float_raise(float_flag_denormal);
3947		if (bSign) return packFloatx80(aSign, 0, 0);
3948		return packFloatx80(aSign, 0x7FFF, LIT64(0x8000000000000000));
3949	}
3950	if (aExp == 0) {
3951		if (bSig && (bExp == 0)) float_raise(float_flag_denormal);
3952		if (aSig == 0) return a;
3953		float_raise(float_flag_denormal);
3954		normalizeFloatx80Subnormal(aSig, &aExp, &aSig);
3955		if (bExp < 0x3FFF)
3956			return normalizeRoundAndPackFloatx80(80, aSign, aExp, aSig, 0);
3957	}
3958	if (bExp == 0) {
3959		if (bSig == 0) return a;
3960		float_raise(float_flag_denormal);
3961		normalizeFloatx80Subnormal(bSig, &bExp, &bSig);
3962	}
3963
3964	if (bExp > 0x400E) {
3965		/* generate appropriate overflow/underflow */
3966		return roundAndPackFloatx80(80, aSign,
3967						  bSign ? -0x3FFF : 0x7FFF, aSig, 0);
3968	}
3969
3970	if (bExp < 0x3FFF) return a;
3971
3972	shiftCount = 0x403E - bExp;
3973	bSig >>= shiftCount;
3974	scale = (Bit32s) bSig;
3975	if (bSign) scale = -scale; /* -32768..32767 */
3976	return
3977		roundAndPackFloatx80(80, aSign, aExp+scale, aSig, 0);
3978}
3979
3980/*----------------------------------------------------------------------------
3981| Determine extended-precision floating-point number class.
3982*----------------------------------------------------------------------------*/
3983
3984#if cIncludeFPUUnused
3985LOCALFUNC float_class_t floatx80_class(floatx80 a)
3986{
3987   Bit32s aExp = extractFloatx80Exp(a);
3988   Bit64u aSig = extractFloatx80Frac(a);
3989
3990   if(aExp == 0) {
3991	   if (aSig == 0)
3992		   return float_zero;
3993
3994	   /* denormal or pseudo-denormal */
3995	   return float_denormal;
3996   }
3997
3998   /* valid numbers have the MS bit set */
3999   if (!(aSig & LIT64(0x8000000000000000)))
4000	   return float_NaN; /* report unsupported as NaNs */
4001
4002   if(aExp == 0x7fff) {
4003	   int aSign = extractFloatx80Sign(a);
4004
4005	   if (((Bit64u) (aSig<< 1)) == 0)
4006		   return (aSign) ? float_negative_inf : float_positive_inf;
4007
4008	   return float_NaN;
4009   }
4010
4011   return float_normalized;
4012}
4013#endif
4014
4015/*----------------------------------------------------------------------------
4016| Compare  between  two extended precision  floating  point  numbers. Returns
4017| 'float_relation_equal'  if the operands are equal, 'float_relation_less' if
4018| the    value    'a'   is   less   than   the   corresponding   value   `b',
4019| 'float_relation_greater' if the value 'a' is greater than the corresponding
4020| value `b', or 'float_relation_unordered' otherwise.
4021*----------------------------------------------------------------------------*/
4022
4023#if cIncludeFPUUnused
4024LOCALFUNC int floatx80_compare(floatx80 a, floatx80 b)
4025{
4026	int aSign;
4027	int bSign;
4028	Bit64u aSig;
4029	Bit32s aExp;
4030	Bit64u bSig;
4031	Bit32s bExp;
4032	int less_than;
4033	float_class_t aClass = floatx80_class(a);
4034	float_class_t bClass = floatx80_class(b);
4035
4036	if (aClass == float_NaN || bClass == float_NaN)
4037	{
4038		float_raise(float_flag_invalid);
4039		return float_relation_unordered;
4040	}
4041
4042	if (aClass == float_denormal || bClass == float_denormal)
4043	{
4044		float_raise(float_flag_denormal);
4045	}
4046
4047	aSign = extractFloatx80Sign(a);
4048	bSign = extractFloatx80Sign(b);
4049
4050	if (aClass == float_zero) {
4051		if (bClass == float_zero) return float_relation_equal;
4052		return bSign ? float_relation_greater : float_relation_less;
4053	}
4054
4055	if (bClass == float_zero || aSign != bSign) {
4056		return aSign ? float_relation_less : float_relation_greater;
4057	}
4058
4059	aSig = extractFloatx80Frac(a);
4060	aExp = extractFloatx80Exp(a);
4061	bSig = extractFloatx80Frac(b);
4062	bExp = extractFloatx80Exp(b);
4063
4064	if (aClass == float_denormal)
4065		normalizeFloatx80Subnormal(aSig, &aExp, &aSig);
4066
4067	if (bClass == float_denormal)
4068		normalizeFloatx80Subnormal(bSig, &bExp, &bSig);
4069
4070	if (aExp == bExp && aSig == bSig)
4071		return float_relation_equal;
4072
4073	less_than =
4074		aSign ? ((bExp < aExp) || ((bExp == aExp) && (bSig < aSig)))
4075			  : ((aExp < bExp) || ((aExp == bExp) && (aSig < bSig)));
4076
4077	if (less_than) return float_relation_less;
4078
4079	return float_relation_greater;
4080}
4081#endif
4082
4083/*----------------------------------------------------------------------------
4084| Compare  between  two extended precision  floating  point  numbers. Returns
4085| 'float_relation_equal'  if the operands are equal, 'float_relation_less' if
4086| the    value    'a'   is   less   than   the   corresponding   value   `b',
4087| 'float_relation_greater' if the value 'a' is greater than the corresponding
4088| value `b', or 'float_relation_unordered' otherwise. Quiet NaNs do not cause
4089| an exception.
4090*----------------------------------------------------------------------------*/
4091
4092#if cIncludeFPUUnused
4093LOCALFUNC int floatx80_compare_quiet(floatx80 a, floatx80 b)
4094{
4095	int aSign;
4096	int bSign;
4097	Bit64u aSig;
4098	Bit32s aExp;
4099	Bit64u bSig;
4100	Bit32s bExp;
4101	int less_than;
4102	float_class_t aClass = floatx80_class(a);
4103	float_class_t bClass = floatx80_class(b);
4104
4105	if (aClass == float_NaN || bClass == float_NaN)
4106	{
4107#if 0
4108		if (floatx80_is_unsupported(a) || floatx80_is_unsupported(b))
4109			float_raise(float_flag_invalid);
4110#endif
4111
4112		if (floatx80_is_signaling_nan(a) || floatx80_is_signaling_nan(b))
4113			float_raise(float_flag_invalid);
4114
4115		return float_relation_unordered;
4116	}
4117
4118	if (aClass == float_denormal || bClass == float_denormal)
4119	{
4120		float_raise(float_flag_denormal);
4121	}
4122
4123	aSign = extractFloatx80Sign(a);
4124	bSign = extractFloatx80Sign(b);
4125
4126	if (aClass == float_zero) {
4127		if (bClass == float_zero) return float_relation_equal;
4128		return bSign ? float_relation_greater : float_relation_less;
4129	}
4130
4131	if (bClass == float_zero || aSign != bSign) {
4132		return aSign ? float_relation_less : float_relation_greater;
4133	}
4134
4135	aSig = extractFloatx80Frac(a);
4136	aExp = extractFloatx80Exp(a);
4137	bSig = extractFloatx80Frac(b);
4138	bExp = extractFloatx80Exp(b);
4139
4140	if (aClass == float_denormal)
4141		normalizeFloatx80Subnormal(aSig, &aExp, &aSig);
4142
4143	if (bClass == float_denormal)
4144		normalizeFloatx80Subnormal(bSig, &bExp, &bSig);
4145
4146	if (aExp == bExp && aSig == bSig)
4147		return float_relation_equal;
4148
4149	less_than =
4150		aSign ? ((bExp < aExp) || ((bExp == aExp) && (bSig < aSig)))
4151			  : ((aExp < bExp) || ((aExp == bExp) && (aSig < bSig)));
4152
4153	if (less_than) return float_relation_less;
4154	return float_relation_greater;
4155}
4156#endif
4157
4158/* ----- end from original file "softfloatx80.cc" ----- */
4159
4160/* ----- from original file "fprem.cc" ----- */
4161
4162/*
4163	["original Stanislav Shwartsman Copyright notice" went here, included near top of this file.]
4164*/
4165
4166#define USE_estimateDiv128To64
4167
4168/* executes single exponent reduction cycle */
4169LOCALFUNC Bit64u remainder_kernel(Bit64u aSig0, Bit64u bSig, int expDiff, Bit64u *zSig0, Bit64u *zSig1)
4170{
4171	Bit64u term0, term1;
4172	Bit64u aSig1 = 0;
4173	Bit64u q;
4174
4175	shortShift128Left(aSig1, aSig0, expDiff, &aSig1, &aSig0);
4176	q = estimateDiv128To64(aSig1, aSig0, bSig);
4177	mul64To128(bSig, q, &term0, &term1);
4178	sub128(aSig1, aSig0, term0, term1, zSig1, zSig0);
4179	while ((Bit64s)(*zSig1) < 0) {
4180		--q;
4181		add128(*zSig1, *zSig0, 0, bSig, zSig1, zSig0);
4182	}
4183	return q;
4184}
4185
4186LOCALFUNC floatx80 do_fprem(floatx80 a, floatx80 b, Bit64u *q, int rounding_mode)
4187{
4188	Bit32s aExp, bExp, zExp, expDiff;
4189	Bit64u aSig0, aSig1, bSig;
4190	int aSign;
4191	*q = 0;
4192
4193#if 0
4194	// handle unsupported extended double-precision floating encodings
4195	if (floatx80_is_unsupported(a) || floatx80_is_unsupported(b))
4196	{
4197		float_raise(float_flag_invalid);
4198		return floatx80_default_nan;
4199	}
4200#endif
4201
4202	aSig0 = extractFloatx80Frac(a);
4203	aExp = extractFloatx80Exp(a);
4204	aSign = extractFloatx80Sign(a);
4205	bSig = extractFloatx80Frac(b);
4206	bExp = extractFloatx80Exp(b);
4207
4208	if (aExp == 0x7FFF) {
4209		if ((Bit64u) (aSig0<<1) || ((bExp == 0x7FFF) && (Bit64u) (bSig<<1))) {
4210			return propagateFloatx80NaN(a, b);
4211		}
4212		float_raise(float_flag_invalid);
4213		return floatx80_default_nan;
4214	}
4215	if (bExp == 0x7FFF) {
4216		if ((Bit64u) (bSig<<1)) return propagateFloatx80NaN(a, b);
4217		if (aExp == 0 && aSig0) {
4218			float_raise(float_flag_denormal);
4219			normalizeFloatx80Subnormal(aSig0, &aExp, &aSig0);
4220			return (a.low & LIT64(0x8000000000000000)) ?
4221					packFloatx80(aSign, aExp, aSig0) : a;
4222		}
4223		return a;
4224	}
4225	if (bExp == 0) {
4226		if (bSig == 0) {
4227			float_raise(float_flag_invalid);
4228			return floatx80_default_nan;
4229		}
4230		float_raise(float_flag_denormal);
4231		normalizeFloatx80Subnormal(bSig, &bExp, &bSig);
4232	}
4233	if (aExp == 0) {
4234		if (aSig0 == 0) return a;
4235		float_raise(float_flag_denormal);
4236		normalizeFloatx80Subnormal(aSig0, &aExp, &aSig0);
4237	}
4238	expDiff = aExp - bExp;
4239	aSig1 = 0;
4240
4241	if (expDiff >= 64) {
4242		int n = (expDiff & 0x1f) | 0x20;
4243		remainder_kernel(aSig0, bSig, n, &aSig0, &aSig1);
4244		zExp = aExp - n;
4245		*q = (Bit64u) -1;
4246	}
4247	else {
4248		zExp = bExp;
4249
4250		if (expDiff < 0) {
4251			if (expDiff < -1)
4252			   return (a.low & LIT64(0x8000000000000000)) ?
4253					packFloatx80(aSign, aExp, aSig0) : a;
4254			shift128Right(aSig0, 0, 1, &aSig0, &aSig1);
4255			expDiff = 0;
4256		}
4257
4258		if (expDiff > 0) {
4259			*q = remainder_kernel(aSig0, bSig, expDiff, &aSig0, &aSig1);
4260		}
4261		else {
4262			if (bSig <= aSig0) {
4263			   aSig0 -= bSig;
4264			   *q = 1;
4265			}
4266		}
4267
4268		if (rounding_mode == float_round_nearest_even)
4269		{
4270			Bit64u term0, term1;
4271			shift128Right(bSig, 0, 1, &term0, &term1);
4272
4273			if (! lt128(aSig0, aSig1, term0, term1))
4274			{
4275			   int lt = lt128(term0, term1, aSig0, aSig1);
4276			   int eq = eq128(aSig0, aSig1, term0, term1);
4277
4278			   if ((eq && (*q & 1)) || lt) {
4279				  aSign = !aSign;
4280				  ++*q;
4281			   }
4282			   if (lt) sub128(bSig, 0, aSig0, aSig1, &aSig0, &aSig1);
4283			}
4284		}
4285	}
4286
4287	return normalizeRoundAndPackFloatx80(80, aSign, zExp, aSig0, aSig1);
4288}
4289
4290/*----------------------------------------------------------------------------
4291| Returns the remainder of the extended double-precision floating-point value
4292| `a' with respect to the corresponding value `b'.  The operation is performed
4293| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4294*----------------------------------------------------------------------------*/
4295
4296#if cIncludeFPUUnused
4297LOCALFUNC floatx80 floatx80_ieee754_remainder(floatx80 a, floatx80 b, Bit64u *q)
4298{
4299	return do_fprem(a, b, q, float_round_nearest_even);
4300}
4301#endif
4302
4303/*----------------------------------------------------------------------------
4304| Returns the remainder of the extended double-precision floating-point value
4305| `a' with  respect to  the corresponding value `b'. Unlike previous function
4306| the  function  does not compute  the remainder  specified  in  the IEC/IEEE
4307| Standard  for Binary  Floating-Point  Arithmetic.  This  function  operates
4308| differently  from the  previous  function in  the way  that it  rounds  the
4309| quotient of 'a' divided by 'b' to an integer.
4310*----------------------------------------------------------------------------*/
4311
4312LOCALFUNC floatx80 floatx80_remainder(floatx80 a, floatx80 b, Bit64u *q)
4313{
4314	return do_fprem(a, b, q, float_round_to_zero);
4315}
4316
4317/* ----- end from original file "fprem.cc" ----- */
4318
4319/* ----- from original file "fpu_constant.h" ----- */
4320
4321/*
4322	["original Stanislav Shwartsman Copyright notice" went here, included near top of this file.]
4323*/
4324
4325// Pentium CPU uses only 68-bit precision M_PI approximation
4326// #define BETTER_THAN_PENTIUM
4327
4328//////////////////////////////
4329// PI, PI/2, PI/4 constants
4330//////////////////////////////
4331
4332#define FLOATX80_PI_EXP  (0x4000)
4333
4334// 128-bit PI fraction
4335#ifdef BETTER_THAN_PENTIUM
4336#define FLOAT_PI_HI (LIT64(0xc90fdaa22168c234))
4337#define FLOAT_PI_LO (LIT64(0xc4c6628b80dc1cd1))
4338#else
4339#define FLOAT_PI_HI (LIT64(0xc90fdaa22168c234))
4340#define FLOAT_PI_LO (LIT64(0xC000000000000000))
4341#endif
4342
4343#define FLOATX80_PI2_EXP  (0x3FFF)
4344#define FLOATX80_PI4_EXP  (0x3FFE)
4345
4346//////////////////////////////
4347// 3PI/4 constant
4348//////////////////////////////
4349
4350#define FLOATX80_3PI4_EXP (0x4000)
4351
4352// 128-bit 3PI/4 fraction
4353#ifdef BETTER_THAN_PENTIUM
4354#define FLOAT_3PI4_HI (LIT64(0x96cbe3f9990e91a7))
4355#define FLOAT_3PI4_LO (LIT64(0x9394c9e8a0a5159c))
4356#else
4357#define FLOAT_3PI4_HI (LIT64(0x96cbe3f9990e91a7))
4358#define FLOAT_3PI4_LO (LIT64(0x9000000000000000))
4359#endif
4360
4361//////////////////////////////
4362// 1/LN2 constant
4363//////////////////////////////
4364
4365#define FLOAT_LN2INV_EXP  (0x3FFF)
4366
4367// 128-bit 1/LN2 fraction
4368#ifdef BETTER_THAN_PENTIUM
4369#define FLOAT_LN2INV_HI (LIT64(0xb8aa3b295c17f0bb))
4370#define FLOAT_LN2INV_LO (LIT64(0xbe87fed0691d3e89))
4371#else
4372#define FLOAT_LN2INV_HI (LIT64(0xb8aa3b295c17f0bb))
4373#define FLOAT_LN2INV_LO (LIT64(0xC000000000000000))
4374#endif
4375
4376/* ----- end from original file "fpu_constant.h" ----- */
4377
4378/* ----- from original file "poly.cc" ----- */
4379
4380/*
4381	["original Stanislav Shwartsman Copyright notice" went here, included near top of this file.]
4382*/
4383
4384//                            2         3         4               n
4385// f(x) ~ C + (C * x) + (C * x) + (C * x) + (C * x) + ... + (C * x)
4386//         0    1         2         3         4               n
4387//
4388//          --       2k                --        2k+1
4389//   p(x) = >  C  * x           q(x) = >  C   * x
4390//          --  2k                     --  2k+1
4391//
4392//   f(x) ~ [ p(x) + x * q(x) ]
4393//
4394
4395LOCALFUNC float128 EvalPoly(float128 x, float128 *arr, unsigned n)
4396{
4397	float128 r2;
4398	float128 x2 = float128_mul(x, x);
4399	unsigned i;
4400
4401#if 0
4402	assert(n > 1);
4403#endif
4404
4405	float128 r1 = arr[--n];
4406	i = n;
4407	while(i >= 2) {
4408		r1 = float128_mul(r1, x2);
4409		i -= 2;
4410		r1 = float128_add(r1, arr[i]);
4411	}
4412	if (i) r1 = float128_mul(r1, x);
4413
4414	r2 = arr[--n];
4415	i = n;
4416	while(i >= 2) {
4417		r2 = float128_mul(r2, x2);
4418		i -= 2;
4419		r2 = float128_add(r2, arr[i]);
4420	}
4421	if (i) r2 = float128_mul(r2, x);
4422
4423	return float128_add(r1, r2);
4424}
4425
4426//                  2         4         6         8               2n
4427// f(x) ~ C + (C * x) + (C * x) + (C * x) + (C * x) + ... + (C * x)
4428//         0    1         2         3         4               n
4429//
4430//          --       4k                --        4k+2
4431//   p(x) = >  C  * x           q(x) = >  C   * x
4432//          --  2k                     --  2k+1
4433//
4434//                    2
4435//   f(x) ~ [ p(x) + x * q(x) ]
4436//
4437
4438LOCALFUNC float128 EvenPoly(float128 x, float128 *arr, unsigned n)
4439{
4440	 return EvalPoly(float128_mul(x, x), arr, n);
4441}
4442
4443//                        3         5         7         9               2n+1
4444// f(x) ~ (C * x) + (C * x) + (C * x) + (C * x) + (C * x) + ... + (C * x)
4445//          0         1         2         3         4               n
4446//                        2         4         6         8               2n
4447//      = x * [ C + (C * x) + (C * x) + (C * x) + (C * x) + ... + (C * x)
4448//               0    1         2         3         4               n
4449//
4450//          --       4k                --        4k+2
4451//   p(x) = >  C  * x           q(x) = >  C   * x
4452//          --  2k                     --  2k+1
4453//
4454//                        2
4455//   f(x) ~ x * [ p(x) + x * q(x) ]
4456//
4457
4458LOCALFUNC float128 OddPoly(float128 x, float128 *arr, unsigned n)
4459{
4460	 return float128_mul(x, EvenPoly(x, arr, n));
4461}
4462
4463/* ----- end from original file "poly.cc" ----- */
4464
4465/* ----- from original file "fyl2x.cc" ----- */
4466
4467/*
4468	["original Stanislav Shwartsman Copyright notice" went here, included near top of this file.]
4469*/
4470
4471static const floatx80 floatx80_one =
4472	packFloatx80m(0, 0x3fff, LIT64(0x8000000000000000));
4473
4474static const float128 float128_one =
4475	packFloat2x128m(LIT64(0x3fff000000000000), LIT64(0x0000000000000000));
4476static const float128 float128_two =
4477	packFloat2x128m(LIT64(0x4000000000000000), LIT64(0x0000000000000000));
4478
4479static const float128 float128_ln2inv2 =
4480	packFloat2x128m(LIT64(0x400071547652b82f), LIT64(0xe1777d0ffda0d23a));
4481
4482#define SQRT2_HALF_SIG 	LIT64(0xb504f333f9de6484)
4483
4484#define L2_ARR_SIZE 9
4485
4486static float128 ln_arr[L2_ARR_SIZE] =
4487{
4488	PACK_FLOAT_128(0x3fff000000000000, 0x0000000000000000), /*  1 */
4489	PACK_FLOAT_128(0x3ffd555555555555, 0x5555555555555555), /*  3 */
4490	PACK_FLOAT_128(0x3ffc999999999999, 0x999999999999999a), /*  5 */
4491	PACK_FLOAT_128(0x3ffc249249249249, 0x2492492492492492), /*  7 */
4492	PACK_FLOAT_128(0x3ffbc71c71c71c71, 0xc71c71c71c71c71c), /*  9 */
4493	PACK_FLOAT_128(0x3ffb745d1745d174, 0x5d1745d1745d1746), /* 11 */
4494	PACK_FLOAT_128(0x3ffb3b13b13b13b1, 0x3b13b13b13b13b14), /* 13 */
4495	PACK_FLOAT_128(0x3ffb111111111111, 0x1111111111111111), /* 15 */
4496	PACK_FLOAT_128(0x3ffae1e1e1e1e1e1, 0xe1e1e1e1e1e1e1e2)  /* 17 */
4497};
4498
4499LOCALFUNC float128 poly_ln(float128 x1)
4500{
4501/*
4502	//
4503	//                     3     5     7     9     11     13     15
4504	//        1+u         u     u     u     u     u      u      u
4505	// 1/2 ln ---  ~ u + --- + --- + --- + --- + ---- + ---- + ---- =
4506	//        1-u         3     5     7     9     11     13     15
4507	//
4508	//                     2     4     6     8     10     12     14
4509	//                    u     u     u     u     u      u      u
4510	//       = u * [ 1 + --- + --- + --- + --- + ---- + ---- + ---- ] =
4511	//                    3     5     7     9     11     13     15
4512	//
4513	//           3                          3
4514	//          --       4k                --        4k+2
4515	//   p(u) = >  C  * u           q(u) = >  C   * u
4516	//          --  2k                     --  2k+1
4517	//          k=0                        k=0
4518	//
4519	//          1+u                 2
4520	//   1/2 ln --- ~ u * [ p(u) + u * q(u) ]
4521	//          1-u
4522	//
4523*/
4524	return OddPoly(x1, ln_arr, L2_ARR_SIZE);
4525}
4526
4527/* required sqrt(2)/2 < x < sqrt(2) */
4528LOCALFUNC float128 poly_l2(float128 x)
4529{
4530	/* using float128 for approximation */
4531	float128 x_p1 = float128_add(x, float128_one);
4532	float128 x_m1 = float128_sub(x, float128_one);
4533	x = float128_div(x_m1, x_p1);
4534	x = poly_ln(x);
4535	x = float128_mul(x, float128_ln2inv2);
4536	return x;
4537}
4538
4539LOCALFUNC float128 poly_l2p1(float128 x)
4540{
4541	/* using float128 for approximation */
4542	float128 x_p2 = float128_add(x, float128_two);
4543	x = float128_div(x, x_p2);
4544	x = poly_ln(x);
4545	x = float128_mul(x, float128_ln2inv2);
4546	return x;
4547}
4548
4549// =================================================
4550// FYL2X                   Compute y * log (x)
4551//                                        2
4552// =================================================
4553
4554//
4555// Uses the following identities:
4556//
4557// 1. ----------------------------------------------------------
4558//              ln(x)
4559//   log (x) = -------,  ln (x*y) = ln(x) + ln(y)
4560//      2       ln(2)
4561//
4562// 2. ----------------------------------------------------------
4563//                1+u             x-1
4564//   ln (x) = ln -----, when u = -----
4565//                1-u             x+1
4566//
4567// 3. ----------------------------------------------------------
4568//                        3     5     7           2n+1
4569//       1+u             u     u     u           u
4570//   ln ----- = 2 [ u + --- + --- + --- + ... + ------ + ... ]
4571//       1-u             3     5     7           2n+1
4572//
4573
4574LOCALFUNC floatx80 fyl2x(floatx80 a, floatx80 b)
4575{
4576	int aSign;
4577	int bSign;
4578	Bit64u aSig;
4579	Bit32s aExp;
4580	Bit64u bSig;
4581	Bit32s bExp;
4582	int zSign;
4583	int ExpDiff;
4584	Bit64u zSig0, zSig1;
4585	float128 x;
4586
4587	// handle unsupported extended double-precision floating encodings
4588	if (0 /* floatx80_is_unsupported(a) */) {
4589invalid:
4590		float_raise(float_flag_invalid);
4591		return floatx80_default_nan;
4592	}
4593
4594	aSig = extractFloatx80Frac(a);
4595	aExp = extractFloatx80Exp(a);
4596	aSign = extractFloatx80Sign(a);
4597	bSig = extractFloatx80Frac(b);
4598	bExp = extractFloatx80Exp(b);
4599	bSign = extractFloatx80Sign(b);
4600
4601	zSign = bSign ^ 1;
4602
4603	if (aExp == 0x7FFF) {
4604		if ((Bit64u) (aSig<<1)
4605			 || ((bExp == 0x7FFF) && (Bit64u) (bSig<<1)))
4606		{
4607			return propagateFloatx80NaN(a, b);
4608		}
4609		if (aSign) goto invalid;
4610		else {
4611			if (bExp == 0) {
4612				if (bSig == 0) goto invalid;
4613				float_raise(float_flag_denormal);
4614			}
4615			return packFloatx80(bSign, 0x7FFF, LIT64(0x8000000000000000));
4616		}
4617	}
4618	if (bExp == 0x7FFF)
4619	{
4620		if ((Bit64u) (bSig<<1)) return propagateFloatx80NaN(a, b);
4621		if (aSign && (Bit64u)(aExp | aSig)) goto invalid;
4622		if (aSig && (aExp == 0))
4623			float_raise(float_flag_denormal);
4624		if (aExp < 0x3FFF) {
4625			return packFloatx80(zSign, 0x7FFF, LIT64(0x8000000000000000));
4626		}
4627		if (aExp == 0x3FFF && ((Bit64u) (aSig<<1) == 0)) goto invalid;
4628		return packFloatx80(bSign, 0x7FFF, LIT64(0x8000000000000000));
4629	}
4630	if (aExp == 0) {
4631		if (aSig == 0) {
4632			if ((bExp | bSig) == 0) goto invalid;
4633			float_raise(float_flag_divbyzero);
4634			return packFloatx80(zSign, 0x7FFF, LIT64(0x8000000000000000));
4635		}
4636		if (aSign) goto invalid;
4637		float_raise(float_flag_denormal);
4638		normalizeFloatx80Subnormal(aSig, &aExp, &aSig);
4639	}
4640	if (aSign) goto invalid;
4641	if (bExp == 0) {
4642		if (bSig == 0) {
4643			if (aExp < 0x3FFF) return packFloatx80(zSign, 0, 0);
4644			return packFloatx80(bSign, 0, 0);
4645		}
4646		float_raise(float_flag_denormal);
4647		normalizeFloatx80Subnormal(bSig, &bExp, &bSig);
4648	}
4649	if (aExp == 0x3FFF && ((Bit64u) (aSig<<1) == 0))
4650		return packFloatx80(bSign, 0, 0);
4651
4652	float_raise(float_flag_inexact);
4653
4654	ExpDiff = aExp - 0x3FFF;
4655	aExp = 0;
4656	if (aSig >= SQRT2_HALF_SIG) {
4657		ExpDiff++;
4658		aExp--;
4659	}
4660
4661	/* ******************************** */
4662	/* using float128 for approximation */
4663	/* ******************************** */
4664
4665	shift128Right(aSig<<1, 0, 16, &zSig0, &zSig1);
4666	x = packFloat128(0, aExp+0x3FFF, zSig0, zSig1);
4667	x = poly_l2(x);
4668	x = float128_add(x, floatx80_to_float128(int32_to_floatx80(ExpDiff)));
4669	return floatx80_mul128(b, x);
4670}
4671
4672// =================================================
4673// FYL2XP1                 Compute y * log (x + 1)
4674//                                        2
4675// =================================================
4676
4677//
4678// Uses the following identities:
4679//
4680// 1. ----------------------------------------------------------
4681//              ln(x)
4682//   log (x) = -------
4683//      2       ln(2)
4684//
4685// 2. ----------------------------------------------------------
4686//                  1+u              x
4687//   ln (x+1) = ln -----, when u = -----
4688//                  1-u             x+2
4689//
4690// 3. ----------------------------------------------------------
4691//                        3     5     7           2n+1
4692//       1+u             u     u     u           u
4693//   ln ----- = 2 [ u + --- + --- + --- + ... + ------ + ... ]
4694//       1-u             3     5     7           2n+1
4695//
4696
4697LOCALFUNC floatx80 fyl2xp1(floatx80 a, floatx80 b)
4698{
4699	Bit32s aExp, bExp;
4700	Bit64u aSig, bSig, zSig0, zSig1, zSig2;
4701	int aSign, bSign;
4702	int zSign;
4703	float128 x;
4704
4705	// handle unsupported extended double-precision floating encodings
4706	if (0 /* floatx80_is_unsupported(a) */) {
4707invalid:
4708		float_raise(float_flag_invalid);
4709		return floatx80_default_nan;
4710	}
4711
4712	aSig = extractFloatx80Frac(a);
4713	aExp = extractFloatx80Exp(a);
4714	aSign = extractFloatx80Sign(a);
4715	bSig = extractFloatx80Frac(b);
4716	bExp = extractFloatx80Exp(b);
4717	bSign = extractFloatx80Sign(b);
4718	zSign = aSign ^ bSign;
4719
4720	if (aExp == 0x7FFF) {
4721		if ((Bit64u) (aSig<<1)
4722			 || ((bExp == 0x7FFF) && (Bit64u) (bSig<<1)))
4723		{
4724			return propagateFloatx80NaN(a, b);
4725		}
4726		if (aSign) goto invalid;
4727		else {
4728			if (bExp == 0) {
4729				if (bSig == 0) goto invalid;
4730				float_raise(float_flag_denormal);
4731			}
4732			return packFloatx80(bSign, 0x7FFF, LIT64(0x8000000000000000));
4733		}
4734	}
4735	if (bExp == 0x7FFF)
4736	{
4737		if ((Bit64u) (bSig<<1))
4738			return propagateFloatx80NaN(a, b);
4739
4740		if (aExp == 0) {
4741			if (aSig == 0) goto invalid;
4742			float_raise(float_flag_denormal);
4743		}
4744
4745		return packFloatx80(zSign, 0x7FFF, LIT64(0x8000000000000000));
4746	}
4747	if (aExp == 0) {
4748		if (aSig == 0) {
4749			if (bSig && (bExp == 0)) float_raise(float_flag_denormal);
4750			return packFloatx80(zSign, 0, 0);
4751		}
4752		float_raise(float_flag_denormal);
4753		normalizeFloatx80Subnormal(aSig, &aExp, &aSig);
4754	}
4755	if (bExp == 0) {
4756		if (bSig == 0) return packFloatx80(zSign, 0, 0);
4757		float_raise(float_flag_denormal);
4758		normalizeFloatx80Subnormal(bSig, &bExp, &bSig);
4759	}
4760
4761	float_raise(float_flag_inexact);
4762
4763	if (aSign && aExp >= 0x3FFF)
4764		return a;
4765
4766	if (aExp >= 0x3FFC) // big argument
4767	{
4768		return fyl2x(floatx80_add(a, floatx80_one), b);
4769	}
4770
4771	// handle tiny argument
4772	if (aExp < EXP_BIAS-70)
4773	{
4774		// first order approximation, return (a*b)/ln(2)
4775		Bit32s zExp = aExp + FLOAT_LN2INV_EXP - 0x3FFE;
4776
4777	mul128By64To192(FLOAT_LN2INV_HI, FLOAT_LN2INV_LO, aSig, &zSig0, &zSig1, &zSig2);
4778		if (0 < (Bit64s) zSig0) {
4779			shortShift128Left(zSig0, zSig1, 1, &zSig0, &zSig1);
4780			--zExp;
4781		}
4782
4783		zExp = zExp + bExp - 0x3FFE;
4784	mul128By64To192(zSig0, zSig1, bSig, &zSig0, &zSig1, &zSig2);
4785		if (0 < (Bit64s) zSig0) {
4786			shortShift128Left(zSig0, zSig1, 1, &zSig0, &zSig1);
4787			--zExp;
4788		}
4789
4790		return
4791			roundAndPackFloatx80(80, aSign ^ bSign, zExp, zSig0, zSig1);
4792	}
4793
4794	/* ******************************** */
4795	/* using float128 for approximation */
4796	/* ******************************** */
4797
4798	shift128Right(aSig<<1, 0, 16, &zSig0, &zSig1);
4799	x = packFloat128(aSign, aExp, zSig0, zSig1);
4800	x = poly_l2p1(x);
4801	return floatx80_mul128(b, x);
4802}
4803
4804/* ----- end from original file "fyl2x.cc" ----- */
4805
4806/* ----- from original file "f2xm1.cc" ----- */
4807
4808/*
4809	["original Stanislav Shwartsman Copyright notice" went here, included near top of this file.]
4810*/
4811
4812static const floatx80 floatx80_negone  = packFloatx80m(1, 0x3fff, LIT64(0x8000000000000000));
4813static const floatx80 floatx80_neghalf = packFloatx80m(1, 0x3ffe, LIT64(0x8000000000000000));
4814static const float128 float128_ln2     =
4815	packFloat2x128m(LIT64(0x3ffe62e42fefa39e), LIT64(0xf35793c7673007e6));
4816
4817#define LN2_SIG        LIT64(0xb17217f7d1cf79ac)
4818
4819#define EXP_ARR_SIZE 15
4820
4821static float128 exp_arr[EXP_ARR_SIZE] =
4822{
4823	PACK_FLOAT_128(0x3fff000000000000, 0x0000000000000000), /*  1 */
4824	PACK_FLOAT_128(0x3ffe000000000000, 0x0000000000000000), /*  2 */
4825	PACK_FLOAT_128(0x3ffc555555555555, 0x5555555555555555), /*  3 */
4826	PACK_FLOAT_128(0x3ffa555555555555, 0x5555555555555555), /*  4 */
4827	PACK_FLOAT_128(0x3ff8111111111111, 0x1111111111111111), /*  5 */
4828	PACK_FLOAT_128(0x3ff56c16c16c16c1, 0x6c16c16c16c16c17), /*  6 */
4829	PACK_FLOAT_128(0x3ff2a01a01a01a01, 0xa01a01a01a01a01a), /*  7 */
4830	PACK_FLOAT_128(0x3fefa01a01a01a01, 0xa01a01a01a01a01a), /*  8 */
4831	PACK_FLOAT_128(0x3fec71de3a556c73, 0x38faac1c88e50017), /*  9 */
4832	PACK_FLOAT_128(0x3fe927e4fb7789f5, 0xc72ef016d3ea6679), /* 10 */
4833	PACK_FLOAT_128(0x3fe5ae64567f544e, 0x38fe747e4b837dc7), /* 11 */
4834	PACK_FLOAT_128(0x3fe21eed8eff8d89, 0x7b544da987acfe85), /* 12 */
4835	PACK_FLOAT_128(0x3fde6124613a86d0, 0x97ca38331d23af68), /* 13 */
4836	PACK_FLOAT_128(0x3fda93974a8c07c9, 0xd20badf145dfa3e5), /* 14 */
4837	PACK_FLOAT_128(0x3fd6ae7f3e733b81, 0xf11d8656b0ee8cb0)  /* 15 */
4838};
4839
4840/* required -1 < x < 1 */
4841LOCALFUNC float128 poly_exp(float128 x)
4842{
4843/*
4844	//               2     3     4     5     6     7     8     9
4845	//  x           x     x     x     x     x     x     x     x
4846	// e - 1 ~ x + --- + --- + --- + --- + --- + --- + --- + --- + ...
4847	//              2!    3!    4!    5!    6!    7!    8!    9!
4848	//
4849	//                     2     3     4     5     6     7     8
4850	//              x     x     x     x     x     x     x     x
4851	//   = x [ 1 + --- + --- + --- + --- + --- + --- + --- + --- + ... ]
4852	//              2!    3!    4!    5!    6!    7!    8!    9!
4853	//
4854	//           8                          8
4855	//          --       2k                --        2k+1
4856	//   p(x) = >  C  * x           q(x) = >  C   * x
4857	//          --  2k                     --  2k+1
4858	//          k=0                        k=0
4859	//
4860	//    x
4861	//   e  - 1 ~ x * [ p(x) + x * q(x) ]
4862	//
4863*/
4864	float128 t = EvalPoly(x, exp_arr, EXP_ARR_SIZE);
4865	return float128_mul(t, x);
4866}
4867
4868// =================================================
4869//                                  x
4870// FX2P1                   Compute 2  - 1
4871// =================================================
4872
4873//
4874// Uses the following identities:
4875//
4876// 1. ----------------------------------------------------------
4877//      x    x*ln(2)
4878//     2  = e
4879//
4880// 2. ----------------------------------------------------------
4881//                      2     3     4     5           n
4882//      x        x     x     x     x     x           x
4883//     e  = 1 + --- + --- + --- + --- + --- + ... + --- + ...
4884//               1!    2!    3!    4!    5!          n!
4885//
4886
4887LOCALFUNC floatx80 f2xm1(floatx80 a)
4888{
4889	Bit64u zSig0, zSig1;
4890	float128 x;
4891
4892#if 0
4893	// handle unsupported extended double-precision floating encodings
4894	if (floatx80_is_unsupported(a))
4895	{
4896		float_raise(float_flag_invalid);
4897		return floatx80_default_nan;
4898	}
4899#endif
4900
4901	Bit64u aSig = extractFloatx80Frac(a);
4902	Bit32s aExp = extractFloatx80Exp(a);
4903	int aSign = extractFloatx80Sign(a);
4904
4905	if (aExp == 0x7FFF) {
4906		if ((Bit64u) (aSig<<1))
4907			return propagateOneFloatx80NaN(&a);
4908
4909		return (aSign) ? floatx80_negone : a;
4910	}
4911
4912	if (aExp == 0) {
4913		if (aSig == 0) return a;
4914		float_raise(float_flag_denormal | float_flag_inexact);
4915		normalizeFloatx80Subnormal(aSig, &aExp, &aSig);
4916
4917	tiny_argument:
4918		mul64To128(aSig, LN2_SIG, &zSig0, &zSig1);
4919		if (0 < (Bit64s) zSig0) {
4920			shortShift128Left(zSig0, zSig1, 1, &zSig0, &zSig1);
4921			--aExp;
4922		}
4923		return
4924			roundAndPackFloatx80(80, aSign, aExp, zSig0, zSig1);
4925	}
4926
4927	float_raise(float_flag_inexact);
4928
4929	if (aExp < 0x3FFF)
4930	{
4931		if (aExp < EXP_BIAS-68)
4932			goto tiny_argument;
4933
4934		/* ******************************** */
4935		/* using float128 for approximation */
4936		/* ******************************** */
4937
4938		x = floatx80_to_float128(a);
4939		x = float128_mul(x, float128_ln2);
4940		x = poly_exp(x);
4941		return float128_to_floatx80(x);
4942	}
4943	else
4944	{
4945		if ((a.high == 0xBFFF) && (! (aSig<<1)))
4946		   return floatx80_neghalf;
4947
4948		return a;
4949	}
4950}
4951
4952/* ----- end from original file "f2xm1.cc" ----- */
4953
4954/* ----- from original file "fsincos.cc" ----- */
4955
4956/*
4957	["original Stanislav Shwartsman Copyright notice" went here, included near top of this file.]
4958*/
4959
4960#define USE_estimateDiv128To64
4961
4962#if 0
4963static const floatx80 floatx80_one = packFloatx80m(0, 0x3fff, LIT64(0x8000000000000000));
4964#endif
4965
4966/* reduce trigonometric function argument using 128-bit precision
4967   M_PI approximation */
4968LOCALFUNC Bit64u argument_reduction_kernel(Bit64u aSig0, int Exp, Bit64u *zSig0, Bit64u *zSig1)
4969{
4970	Bit64u term0, term1, term2;
4971	Bit64u aSig1 = 0;
4972	Bit64u q;
4973
4974	shortShift128Left(aSig1, aSig0, Exp, &aSig1, &aSig0);
4975	q = estimateDiv128To64(aSig1, aSig0, FLOAT_PI_HI);
4976	mul128By64To192(FLOAT_PI_HI, FLOAT_PI_LO, q, &term0, &term1, &term2);
4977	sub128(aSig1, aSig0, term0, term1, zSig1, zSig0);
4978	while ((Bit64s)(*zSig1) < 0) {
4979		--q;
4980		add192(*zSig1, *zSig0, term2, 0, FLOAT_PI_HI, FLOAT_PI_LO, zSig1, zSig0, &term2);
4981	}
4982	*zSig1 = term2;
4983	return q;
4984}
4985
4986LOCALFUNC int reduce_trig_arg(int expDiff, int *zSign, Bit64u *aSig0, Bit64u *aSig1)
4987{
4988	Bit64u term0, term1, q = 0;
4989
4990	if (expDiff < 0) {
4991		shift128Right(*aSig0, 0, 1, aSig0, aSig1);
4992		expDiff = 0;
4993	}
4994	if (expDiff > 0) {
4995		q = argument_reduction_kernel(*aSig0, expDiff, aSig0, aSig1);
4996	}
4997	else {
4998		if (FLOAT_PI_HI <= *aSig0) {
4999			*aSig0 -= FLOAT_PI_HI;
5000			q = 1;
5001		}
5002	}
5003
5004	shift128Right(FLOAT_PI_HI, FLOAT_PI_LO, 1, &term0, &term1);
5005	if (! lt128(*aSig0, *aSig1, term0, term1))
5006	{
5007		int lt = lt128(term0, term1, *aSig0, *aSig1);
5008		int eq = eq128(*aSig0, *aSig1, term0, term1);
5009
5010		if ((eq && (q & 1)) || lt) {
5011			*zSign = !*zSign;
5012			++q;
5013		}
5014		if (lt) sub128(FLOAT_PI_HI, FLOAT_PI_LO, *aSig0, *aSig1, aSig0, aSig1);
5015	}
5016
5017	return (int)(q & 3);
5018}
5019
5020#define SIN_ARR_SIZE 9
5021#define COS_ARR_SIZE 9
5022
5023static float128 sin_arr[SIN_ARR_SIZE] =
5024{
5025	PACK_FLOAT_128(0x3fff000000000000, 0x0000000000000000), /*  1 */
5026	PACK_FLOAT_128(0xbffc555555555555, 0x5555555555555555), /*  3 */
5027	PACK_FLOAT_128(0x3ff8111111111111, 0x1111111111111111), /*  5 */
5028	PACK_FLOAT_128(0xbff2a01a01a01a01, 0xa01a01a01a01a01a), /*  7 */
5029	PACK_FLOAT_128(0x3fec71de3a556c73, 0x38faac1c88e50017), /*  9 */
5030	PACK_FLOAT_128(0xbfe5ae64567f544e, 0x38fe747e4b837dc7), /* 11 */
5031	PACK_FLOAT_128(0x3fde6124613a86d0, 0x97ca38331d23af68), /* 13 */
5032	PACK_FLOAT_128(0xbfd6ae7f3e733b81, 0xf11d8656b0ee8cb0), /* 15 */
5033	PACK_FLOAT_128(0x3fce952c77030ad4, 0xa6b2605197771b00)  /* 17 */
5034};
5035
5036static float128 cos_arr[COS_ARR_SIZE] =
5037{
5038	PACK_FLOAT_128(0x3fff000000000000, 0x0000000000000000), /*  0 */
5039	PACK_FLOAT_128(0xbffe000000000000, 0x0000000000000000), /*  2 */
5040	PACK_FLOAT_128(0x3ffa555555555555, 0x5555555555555555), /*  4 */
5041	PACK_FLOAT_128(0xbff56c16c16c16c1, 0x6c16c16c16c16c17), /*  6 */
5042	PACK_FLOAT_128(0x3fefa01a01a01a01, 0xa01a01a01a01a01a), /*  8 */
5043	PACK_FLOAT_128(0xbfe927e4fb7789f5, 0xc72ef016d3ea6679), /* 10 */
5044	PACK_FLOAT_128(0x3fe21eed8eff8d89, 0x7b544da987acfe85), /* 12 */
5045	PACK_FLOAT_128(0xbfda93974a8c07c9, 0xd20badf145dfa3e5), /* 14 */
5046	PACK_FLOAT_128(0x3fd2ae7f3e733b81, 0xf11d8656b0ee8cb0)  /* 16 */
5047};
5048
5049/* 0 <= x <= pi/4 */
5050LOCALINLINEFUNC float128 poly_sin(float128 x)
5051{
5052	//                 3     5     7     9     11     13     15
5053	//                x     x     x     x     x      x      x
5054	// sin (x) ~ x - --- + --- - --- + --- - ---- + ---- - ---- =
5055	//                3!    5!    7!    9!    11!    13!    15!
5056	//
5057	//                 2     4     6     8     10     12     14
5058	//                x     x     x     x     x      x      x
5059	//   = x * [ 1 - --- + --- - --- + --- - ---- + ---- - ---- ] =
5060	//                3!    5!    7!    9!    11!    13!    15!
5061	//
5062	//           3                          3
5063	//          --       4k                --        4k+2
5064	//   p(x) = >  C  * x   > 0     q(x) = >  C   * x     < 0
5065	//          --  2k                     --  2k+1
5066	//          k=0                        k=0
5067	//
5068	//                          2
5069	//   sin(x) ~ x * [ p(x) + x * q(x) ]
5070	//
5071
5072	return OddPoly(x, sin_arr, SIN_ARR_SIZE);
5073}
5074
5075/* 0 <= x <= pi/4 */
5076LOCALINLINEFUNC float128 poly_cos(float128 x)
5077{
5078	//                 2     4     6     8     10     12     14
5079	//                x     x     x     x     x      x      x
5080	// cos (x) ~ 1 - --- + --- - --- + --- - ---- + ---- - ----
5081	//                2!    4!    6!    8!    10!    12!    14!
5082	//
5083	//           3                          3
5084	//          --       4k                --        4k+2
5085	//   p(x) = >  C  * x   > 0     q(x) = >  C   * x     < 0
5086	//          --  2k                     --  2k+1
5087	//          k=0                        k=0
5088	//
5089	//                      2
5090	//   cos(x) ~ [ p(x) + x * q(x) ]
5091	//
5092
5093	return EvenPoly(x, cos_arr, COS_ARR_SIZE);
5094}
5095
5096LOCALINLINEPROC sincos_invalid(floatx80 *sin_a, floatx80 *cos_a, floatx80 a)
5097{
5098	if (sin_a) *sin_a = a;
5099	if (cos_a) *cos_a = a;
5100}
5101
5102LOCALINLINEPROC sincos_tiny_argument(floatx80 *sin_a, floatx80 *cos_a, floatx80 a)
5103{
5104	if (sin_a) *sin_a = a;
5105	if (cos_a) *cos_a = floatx80_one;
5106}
5107
5108LOCALFUNC floatx80 sincos_approximation(int neg, float128 r, Bit64u quotient)
5109{
5110	floatx80 result;
5111
5112	if (quotient & 0x1) {
5113		r = poly_cos(r);
5114		neg = 0;
5115	} else  {
5116		r = poly_sin(r);
5117	}
5118
5119	result = float128_to_floatx80(r);
5120	if (quotient & 0x2)
5121		neg = ! neg;
5122
5123	if (neg)
5124		floatx80_chs(&result);
5125
5126	return result;
5127}
5128
5129// =================================================
5130// FSINCOS               Compute sin(x) and cos(x)
5131// =================================================
5132
5133//
5134// Uses the following identities:
5135// ----------------------------------------------------------
5136//
5137//  sin(-x) = -sin(x)
5138//  cos(-x) =  cos(x)
5139//
5140//  sin(x+y) = sin(x)*cos(y)+cos(x)*sin(y)
5141//  cos(x+y) = sin(x)*sin(y)+cos(x)*cos(y)
5142//
5143//  sin(x+ pi/2)  =  cos(x)
5144//  sin(x+ pi)    = -sin(x)
5145//  sin(x+3pi/2)  = -cos(x)
5146//  sin(x+2pi)    =  sin(x)
5147//
5148
5149LOCALFUNC int fsincos(floatx80 a, floatx80 *sin_a, floatx80 *cos_a)
5150{
5151	float128 r;
5152	Bit64u aSig0, aSig1 = 0;
5153	Bit32s aExp, zExp, expDiff;
5154	int aSign, zSign;
5155	int q = 0;
5156
5157#if 0
5158	// handle unsupported extended double-precision floating encodings
5159	if (floatx80_is_unsupported(a))
5160	{
5161		goto invalid;
5162	}
5163#endif
5164
5165	aSig0 = extractFloatx80Frac(a);
5166	aExp = extractFloatx80Exp(a);
5167	aSign = extractFloatx80Sign(a);
5168
5169	/* invalid argument */
5170	if (aExp == 0x7FFF) {
5171		if ((Bit64u) (aSig0<<1)) {
5172			sincos_invalid(sin_a, cos_a, propagateOneFloatx80NaN(&a));
5173			return 0;
5174		}
5175
5176#if 0
5177	invalid:
5178#endif
5179		float_raise(float_flag_invalid);
5180		sincos_invalid(sin_a, cos_a, floatx80_default_nan);
5181		return 0;
5182	}
5183
5184	if (aExp == 0) {
5185		if (aSig0 == 0) {
5186			sincos_tiny_argument(sin_a, cos_a, a);
5187			return 0;
5188		}
5189
5190		float_raise(float_flag_denormal);
5191
5192		/* handle pseudo denormals */
5193		if (! (aSig0 & LIT64(0x8000000000000000)))
5194		{
5195			float_raise(float_flag_inexact);
5196			if (sin_a)
5197				float_raise(float_flag_underflow);
5198			sincos_tiny_argument(sin_a, cos_a, a);
5199			return 0;
5200		}
5201
5202		normalizeFloatx80Subnormal(aSig0, &aExp, &aSig0);
5203	}
5204
5205	zSign = aSign;
5206	zExp = EXP_BIAS;
5207	expDiff = aExp - zExp;
5208
5209	/* argument is out-of-range */
5210	if (expDiff >= 63)
5211		return -1;
5212
5213	float_raise(float_flag_inexact);
5214
5215	if (expDiff < -1) {    // doesn't require reduction
5216		if (expDiff <= -68) {
5217			a = packFloatx80(aSign, aExp, aSig0);
5218			sincos_tiny_argument(sin_a, cos_a, a);
5219			return 0;
5220		}
5221		zExp = aExp;
5222	}
5223	else {
5224		q = reduce_trig_arg(expDiff, &zSign, &aSig0, &aSig1);
5225	}
5226
5227	/* **************************** */
5228	/* argument reduction completed */
5229	/* **************************** */
5230
5231	/* using float128 for approximation */
5232	r = normalizeRoundAndPackFloat128(0, zExp-0x10, aSig0, aSig1);
5233
5234	if (aSign) q = -q;
5235	if (sin_a) *sin_a = sincos_approximation(zSign, r,   q);
5236	if (cos_a) *cos_a = sincos_approximation(zSign, r, q+1);
5237
5238	return 0;
5239}
5240
5241// =================================================
5242// FPTAN                 Compute tan(x)
5243// =================================================
5244
5245//
5246// Uses the following identities:
5247//
5248// 1. ----------------------------------------------------------
5249//
5250//  sin(-x) = -sin(x)
5251//  cos(-x) =  cos(x)
5252//
5253//  sin(x+y) = sin(x)*cos(y)+cos(x)*sin(y)
5254//  cos(x+y) = sin(x)*sin(y)+cos(x)*cos(y)
5255//
5256//  sin(x+ pi/2)  =  cos(x)
5257//  sin(x+ pi)    = -sin(x)
5258//  sin(x+3pi/2)  = -cos(x)
5259//  sin(x+2pi)    =  sin(x)
5260//
5261// 2. ----------------------------------------------------------
5262//
5263//           sin(x)
5264//  tan(x) = ------
5265//           cos(x)
5266//
5267
5268LOCALFUNC int ftan(floatx80 *a)
5269{
5270	float128 r;
5271	float128 sin_r;
5272	float128 cos_r;
5273	Bit64u aSig0, aSig1 = 0;
5274	Bit32s aExp, zExp, expDiff;
5275	int aSign, zSign;
5276	int q = 0;
5277
5278#if 0
5279	// handle unsupported extended double-precision floating encodings
5280	if (floatx80_is_unsupported(*a))
5281	{
5282		goto invalid;
5283	}
5284#endif
5285
5286	aSig0 = extractFloatx80Frac(*a);
5287	aExp = extractFloatx80Exp(*a);
5288	aSign = extractFloatx80Sign(*a);
5289
5290	/* invalid argument */
5291	if (aExp == 0x7FFF) {
5292		if ((Bit64u) (aSig0<<1))
5293		{
5294			*a = propagateOneFloatx80NaN(a);
5295			return 0;
5296		}
5297
5298#if 0
5299	invalid:
5300#endif
5301		float_raise(float_flag_invalid);
5302		*a = floatx80_default_nan;
5303		return 0;
5304	}
5305
5306	if (aExp == 0) {
5307		if (aSig0 == 0) return 0;
5308		float_raise(float_flag_denormal);
5309		/* handle pseudo denormals */
5310		if (! (aSig0 & LIT64(0x8000000000000000)))
5311		{
5312			float_raise(float_flag_inexact | float_flag_underflow);
5313			return 0;
5314		}
5315		normalizeFloatx80Subnormal(aSig0, &aExp, &aSig0);
5316	}
5317
5318	zSign = aSign;
5319	zExp = EXP_BIAS;
5320	expDiff = aExp - zExp;
5321
5322	/* argument is out-of-range */
5323	if (expDiff >= 63)
5324		return -1;
5325
5326	float_raise(float_flag_inexact);
5327
5328	if (expDiff < -1) {    // doesn't require reduction
5329		if (expDiff <= -68) {
5330			*a = packFloatx80(aSign, aExp, aSig0);
5331			return 0;
5332		}
5333		zExp = aExp;
5334	}
5335	else {
5336		q = reduce_trig_arg(expDiff, &zSign, &aSig0, &aSig1);
5337	}
5338
5339	/* **************************** */
5340	/* argument reduction completed */
5341	/* **************************** */
5342
5343	/* using float128 for approximation */
5344	r = normalizeRoundAndPackFloat128(0, zExp-0x10, aSig0, aSig1);
5345
5346	sin_r = poly_sin(r);
5347	cos_r = poly_cos(r);
5348
5349	if (q & 0x1) {
5350		r = float128_div(cos_r, sin_r);
5351		zSign = ! zSign;
5352	} else {
5353		r = float128_div(sin_r, cos_r);
5354	}
5355
5356	*a = float128_to_floatx80(r);
5357	if (zSign)
5358		floatx80_chs(a);
5359
5360	return 0;
5361}
5362
5363/* ----- end from original file "fsincos.cc" ----- */
5364
5365/* ----- from original file "fpatan.cc" ----- */
5366
5367/*
5368	["original Stanislav Shwartsman Copyright notice" went here, included near top of this file.]
5369*/
5370
5371#define FPATAN_ARR_SIZE 11
5372
5373#if 0
5374static const float128 float128_one =
5375		packFloat2x128m(LIT64(0x3fff000000000000), LIT64(0x0000000000000000));
5376#endif
5377static const float128 float128_sqrt3 =
5378		packFloat2x128m(LIT64(0x3fffbb67ae8584ca), LIT64(0xa73b25742d7078b8));
5379static const floatx80 floatx80_pi  =
5380		packFloatx80m(0, 0x4000, LIT64(0xc90fdaa22168c235));
5381
5382static const float128 float128_pi2 =
5383		packFloat2x128m(LIT64(0x3fff921fb54442d1), LIT64(0x8469898CC5170416));
5384static const float128 float128_pi4 =
5385		packFloat2x128m(LIT64(0x3ffe921fb54442d1), LIT64(0x8469898CC5170416));
5386static const float128 float128_pi6 =
5387		packFloat2x128m(LIT64(0x3ffe0c152382d736), LIT64(0x58465BB32E0F580F));
5388
5389static float128 atan_arr[FPATAN_ARR_SIZE] =
5390{
5391	PACK_FLOAT_128(0x3fff000000000000, 0x0000000000000000), /*  1 */
5392	PACK_FLOAT_128(0xbffd555555555555, 0x5555555555555555), /*  3 */
5393	PACK_FLOAT_128(0x3ffc999999999999, 0x999999999999999a), /*  5 */
5394	PACK_FLOAT_128(0xbffc249249249249, 0x2492492492492492), /*  7 */
5395	PACK_FLOAT_128(0x3ffbc71c71c71c71, 0xc71c71c71c71c71c), /*  9 */
5396	PACK_FLOAT_128(0xbffb745d1745d174, 0x5d1745d1745d1746), /* 11 */
5397	PACK_FLOAT_128(0x3ffb3b13b13b13b1, 0x3b13b13b13b13b14), /* 13 */
5398	PACK_FLOAT_128(0xbffb111111111111, 0x1111111111111111), /* 15 */
5399	PACK_FLOAT_128(0x3ffae1e1e1e1e1e1, 0xe1e1e1e1e1e1e1e2), /* 17 */
5400	PACK_FLOAT_128(0xbffaaf286bca1af2, 0x86bca1af286bca1b), /* 19 */
5401	PACK_FLOAT_128(0x3ffa861861861861, 0x8618618618618618)  /* 21 */
5402};
5403
5404/* |x| < 1/4 */
5405LOCALFUNC float128 poly_atan(float128 x1)
5406{
5407/*
5408	//                 3     5     7     9     11     13     15     17
5409	//                x     x     x     x     x      x      x      x
5410	// atan(x) ~ x - --- + --- - --- + --- - ---- + ---- - ---- + ----
5411	//                3     5     7     9     11     13     15     17
5412	//
5413	//                 2     4     6     8     10     12     14     16
5414	//                x     x     x     x     x      x      x      x
5415	//   = x * [ 1 - --- + --- - --- + --- - ---- + ---- - ---- + ---- ]
5416	//                3     5     7     9     11     13     15     17
5417	//
5418	//           5                          5
5419	//          --       4k                --        4k+2
5420	//   p(x) = >  C  * x           q(x) = >  C   * x
5421	//          --  2k                     --  2k+1
5422	//          k=0                        k=0
5423	//
5424	//                            2
5425	//    atan(x) ~ x * [ p(x) + x * q(x) ]
5426	//
5427*/
5428	return OddPoly(x1, atan_arr, FPATAN_ARR_SIZE);
5429}
5430
5431// =================================================
5432// FPATAN                  Compute y * log (x)
5433//                                        2
5434// =================================================
5435
5436//
5437// Uses the following identities:
5438//
5439// 1. ----------------------------------------------------------
5440//
5441//   atan(-x) = -atan(x)
5442//
5443// 2. ----------------------------------------------------------
5444//
5445//                             x + y
5446//   atan(x) + atan(y) = atan -------, xy < 1
5447//                             1-xy
5448//
5449//                             x + y
5450//   atan(x) + atan(y) = atan ------- + PI, x > 0, xy > 1
5451//                             1-xy
5452//
5453//                             x + y
5454//   atan(x) + atan(y) = atan ------- - PI, x < 0, xy > 1
5455//                             1-xy
5456//
5457// 3. ----------------------------------------------------------
5458//
5459//   atan(x) = atan(INF) + atan(- 1/x)
5460//
5461//                           x-1
5462//   atan(x) = PI/4 + atan( ----- )
5463//                           x+1
5464//
5465//                           x * sqrt(3) - 1
5466//   atan(x) = PI/6 + atan( ----------------- )
5467//                             x + sqrt(3)
5468//
5469// 4. ----------------------------------------------------------
5470//                   3     5     7     9                 2n+1
5471//                  x     x     x     x              n  x
5472//   atan(x) = x - --- + --- - --- + --- - ... + (-1)  ------ + ...
5473//                  3     5     7     9                 2n+1
5474//
5475
5476LOCALFUNC floatx80 fpatan(floatx80 a, floatx80 b)
5477{
5478	float128 a128;
5479	float128 b128;
5480	float128 x;
5481	int swap, add_pi6, add_pi4;
5482	Bit32s xExp;
5483	floatx80 result;
5484	int rSign;
5485
5486	// handle unsupported extended double-precision floating encodings
5487#if 0
5488	if (floatx80_is_unsupported(a)) {
5489		float_raise(float_flag_invalid);
5490		return floatx80_default_nan;
5491	}
5492#endif
5493
5494	Bit64u aSig = extractFloatx80Frac(a);
5495	Bit32s aExp = extractFloatx80Exp(a);
5496	int aSign = extractFloatx80Sign(a);
5497	Bit64u bSig = extractFloatx80Frac(b);
5498	Bit32s bExp = extractFloatx80Exp(b);
5499	int bSign = extractFloatx80Sign(b);
5500
5501	int zSign = aSign ^ bSign;
5502
5503	if (bExp == 0x7FFF)
5504	{
5505		if ((Bit64u) (bSig<<1))
5506			return propagateFloatx80NaN(a, b);
5507
5508		if (aExp == 0x7FFF) {
5509			if ((Bit64u) (aSig<<1))
5510				return propagateFloatx80NaN(a, b);
5511
5512			if (aSign) {   /* return 3PI/4 */
5513				return roundAndPackFloatx80(80, bSign,
5514						FLOATX80_3PI4_EXP, FLOAT_3PI4_HI, FLOAT_3PI4_LO);
5515			}
5516			else {         /* return  PI/4 */
5517				return roundAndPackFloatx80(80, bSign,
5518						FLOATX80_PI4_EXP, FLOAT_PI_HI, FLOAT_PI_LO);
5519			}
5520		}
5521
5522		if (aSig && (aExp == 0))
5523			float_raise(float_flag_denormal);
5524
5525		/* return PI/2 */
5526		return roundAndPackFloatx80(80, bSign, FLOATX80_PI2_EXP, FLOAT_PI_HI, FLOAT_PI_LO);
5527	}
5528	if (aExp == 0x7FFF)
5529	{
5530		if ((Bit64u) (aSig<<1))
5531			return propagateFloatx80NaN(a, b);
5532
5533		if (bSig && (bExp == 0))
5534			float_raise(float_flag_denormal);
5535
5536return_PI_or_ZERO:
5537
5538		if (aSign) {   /* return PI */
5539			return roundAndPackFloatx80(80, bSign, FLOATX80_PI_EXP, FLOAT_PI_HI, FLOAT_PI_LO);
5540		} else {       /* return  0 */
5541			return packFloatx80(bSign, 0, 0);
5542		}
5543	}
5544	if (bExp == 0)
5545	{
5546		if (bSig == 0) {
5547			 if (aSig && (aExp == 0)) float_raise(float_flag_denormal);
5548			 goto return_PI_or_ZERO;
5549		}
5550
5551		float_raise(float_flag_denormal);
5552		normalizeFloatx80Subnormal(bSig, &bExp, &bSig);
5553	}
5554	if (aExp == 0)
5555	{
5556		if (aSig == 0)   /* return PI/2 */
5557			return roundAndPackFloatx80(80, bSign, FLOATX80_PI2_EXP, FLOAT_PI_HI, FLOAT_PI_LO);
5558
5559		float_raise(float_flag_denormal);
5560		normalizeFloatx80Subnormal(aSig, &aExp, &aSig);
5561	}
5562
5563	float_raise(float_flag_inexact);
5564
5565	/* |a| = |b| ==> return PI/4 */
5566	if (aSig == bSig && aExp == bExp)
5567		return roundAndPackFloatx80(80, bSign, FLOATX80_PI4_EXP, FLOAT_PI_HI, FLOAT_PI_LO);
5568
5569	/* ******************************** */
5570	/* using float128 for approximation */
5571	/* ******************************** */
5572
5573	a128 = normalizeRoundAndPackFloat128(0, aExp-0x10, aSig, 0);
5574	b128 = normalizeRoundAndPackFloat128(0, bExp-0x10, bSig, 0);
5575	swap = 0;
5576	add_pi6 = 0;
5577	add_pi4 = 0;
5578
5579	if (aExp > bExp || (aExp == bExp && aSig > bSig))
5580	{
5581		x = float128_div(b128, a128);
5582	}
5583	else {
5584		x = float128_div(a128, b128);
5585		swap = 1;
5586	}
5587
5588	xExp = extractFloat128Exp(x);
5589
5590	if (xExp <= EXP_BIAS-40)
5591		goto approximation_completed;
5592
5593	if (x.high >= LIT64(0x3ffe800000000000))        // 3/4 < x < 1
5594	{
5595		/*
5596		arctan(x) = arctan((x-1)/(x+1)) + pi/4
5597		*/
5598		float128 t1 = float128_sub(x, float128_one);
5599		float128 t2 = float128_add(x, float128_one);
5600		x = float128_div(t1, t2);
5601		add_pi4 = 1;
5602	}
5603	else
5604	{
5605		/* argument correction */
5606		if (xExp >= 0x3FFD)                     // 1/4 < x < 3/4
5607		{
5608			/*
5609			arctan(x) = arctan((x*sqrt(3)-1)/(x+sqrt(3))) + pi/6
5610			*/
5611			float128 t1 = float128_mul(x, float128_sqrt3);
5612			float128 t2 = float128_add(x, float128_sqrt3);
5613			x = float128_sub(t1, float128_one);
5614			x = float128_div(x, t2);
5615			add_pi6 = 1;
5616		}
5617	}
5618
5619	x = poly_atan(x);
5620	if (add_pi6) x = float128_add(x, float128_pi6);
5621	if (add_pi4) x = float128_add(x, float128_pi4);
5622
5623approximation_completed:
5624	if (swap) x = float128_sub(float128_pi2, x);
5625	result = float128_to_floatx80(x);
5626	if (zSign) floatx80_chs(&result);
5627	rSign = extractFloatx80Sign(result);
5628	if (!bSign && rSign)
5629		return floatx80_add(result, floatx80_pi);
5630	if (bSign && !rSign)
5631		return floatx80_sub(result, floatx80_pi);
5632	return result;
5633}
5634
5635/* ----- end from original file "fpatan.cc" ----- */
5636
5637/* end soft float stuff */
5638
5639typedef floatx80 myfpr;
5640
5641LOCALPROC myfp_FromExtendedFormat(myfpr *r, ui4r v2, ui5r v1, ui5r v0)
5642{
5643	r->high = v2;
5644	r->low = (((ui6b)v1) << 32) | (v0 & 0xFFFFFFFF);
5645}
5646
5647LOCALPROC myfp_ToExtendedFormat(myfpr *dd, ui4r *v2, ui5r *v1, ui5r *v0)
5648{
5649	*v0 = ((ui6b) dd->low) & 0xFFFFFFFF;
5650	*v1 = (((ui6b) dd->low) >> 32) & 0xFFFFFFFF;
5651	*v2 = dd->high;
5652}
5653
5654LOCALPROC myfp_FromDoubleFormat(myfpr *r, ui5r v1, ui5r v0)
5655{
5656	float64 t = (float64)((((ui6b)v1) << 32) | (v0 & 0xFFFFFFFF));
5657
5658	*r = float64_to_floatx80(t);
5659}
5660
5661LOCALPROC myfp_ToDoubleFormat(myfpr *dd, ui5r *v1, ui5r *v0)
5662{
5663	float64 t = floatx80_to_float64(*dd);
5664
5665	*v0 = ((ui6b) t) & 0xFFFFFFFF;
5666	*v1 = (((ui6b) t) >> 32) & 0xFFFFFFFF;
5667}
5668
5669LOCALPROC myfp_FromSingleFormat(myfpr *r, ui5r x)
5670{
5671	*r = float32_to_floatx80(x);
5672}
5673
5674LOCALFUNC ui5r myfp_ToSingleFormat(myfpr *ff)
5675{
5676	return floatx80_to_float32(*ff);
5677}
5678
5679LOCALPROC myfp_FromLong(myfpr *r, ui5r x)
5680{
5681	*r = int32_to_floatx80( x );
5682}
5683
5684LOCALFUNC ui5r myfp_ToLong(myfpr *x)
5685{
5686	return floatx80_to_int32( *x );
5687}
5688
5689LOCALFUNC blnr myfp_IsNan(myfpr *x)
5690{
5691	return floatx80_is_nan(*x);
5692}
5693
5694LOCALFUNC blnr myfp_IsInf(myfpr *x)
5695{
5696	return ( ( x->high & 0x7FFF ) == 0x7FFF ) && (0 == ((ui6b) ( x->low<<1 )));
5697}
5698
5699LOCALFUNC blnr myfp_IsZero(myfpr *x)
5700{
5701	return ( ( x->high & 0x7FFF ) == 0x0000 ) && (0 == ((ui6b) ( x->low<<1 )));
5702}
5703
5704LOCALFUNC blnr myfp_IsNeg(myfpr *x)
5705{
5706	return ( ( x->high & 0x8000 ) != 0x0000 );
5707}
5708
5709LOCALPROC myfp_Add(myfpr *r, const myfpr *a, const myfpr *b)
5710{
5711	*r = floatx80_add(*a, *b);
5712}
5713
5714LOCALPROC myfp_Sub(myfpr *r, const myfpr *a, const myfpr *b)
5715{
5716	*r = floatx80_sub(*a, *b);
5717}
5718
5719LOCALPROC myfp_Mul(myfpr *r, const myfpr *a, const myfpr *b)
5720{
5721	*r = floatx80_mul(*a, *b);
5722}
5723
5724LOCALPROC myfp_Div(myfpr *r, const myfpr *a, const myfpr *b)
5725{
5726	*r = floatx80_div(*a, *b);
5727}
5728
5729LOCALPROC myfp_Rem(myfpr *r, const myfpr *a, const myfpr *b)
5730{
5731	*r = floatx80_rem(*a, *b);
5732}
5733
5734LOCALPROC myfp_Sqrt(myfpr *r, myfpr *x)
5735{
5736	*r = floatx80_sqrt(*x);
5737}
5738
5739LOCALPROC myfp_Mod(myfpr *r, myfpr *a, myfpr *b)
5740{
5741	Bit64u q;
5742	*r = floatx80_remainder(*a, *b, &q);
5743		/* should save low byte of q */
5744}
5745
5746LOCALPROC myfp_Scale(myfpr *r, myfpr *a, myfpr *b)
5747{
5748	*r = floatx80_scale(*a, *b);
5749}
5750
5751LOCALPROC myfp_GetMan(myfpr *r, myfpr *x)
5752{
5753	*r = *x;
5754	(void) floatx80_extract(r);
5755}
5756
5757LOCALPROC myfp_GetExp(myfpr *r, myfpr *x)
5758{
5759	floatx80 t0 = *x;
5760	*r = floatx80_extract(&t0);
5761}
5762
5763LOCALPROC myfp_floor(myfpr *r, myfpr *x)
5764{
5765	si3r SaveRoundingMode = float_rounding_mode;
5766
5767	float_rounding_mode = float_round_down;
5768	*r = floatx80_round_to_int(*x);
5769	float_rounding_mode = SaveRoundingMode;
5770}
5771
5772LOCALPROC myfp_IntRZ(myfpr *r, myfpr *x)
5773{
5774	si3r SaveRoundingMode = float_rounding_mode;
5775
5776	float_rounding_mode = float_round_to_zero;
5777	*r = floatx80_round_to_int(*x);
5778	float_rounding_mode = SaveRoundingMode;
5779}
5780
5781LOCALPROC myfp_Int(myfpr *r, myfpr *x)
5782{
5783	*r = floatx80_round_to_int(*x);
5784}
5785
5786LOCALPROC myfp_RoundToSingle(myfpr *r, myfpr *x)
5787{
5788	float32 t0 = floatx80_to_float32(*x);
5789
5790	*r = float32_to_floatx80(t0);
5791}
5792
5793LOCALPROC myfp_RoundToDouble(myfpr *r, myfpr *x)
5794{
5795	float64 t0 = floatx80_to_float64(*x);
5796
5797	*r = float64_to_floatx80(t0);
5798}
5799
5800LOCALPROC myfp_Abs(myfpr *r, myfpr *x)
5801{
5802	*r = *x;
5803	r->high &= 0x7FFF;
5804}
5805
5806LOCALPROC myfp_Neg(myfpr *r, myfpr *x)
5807{
5808	*r = *x;
5809	r->high ^= 0x8000;
5810}
5811
5812LOCALPROC myfp_TwoToX(myfpr *r, myfpr *x)
5813{
5814	floatx80 t2;
5815	floatx80 t3;
5816	floatx80 t4;
5817	floatx80 t5;
5818	myfp_floor(&t2, x);
5819	t3 = floatx80_sub(*x, t2);
5820	t4 = f2xm1(t3);
5821	t5 = floatx80_add(t4, floatx80_one);
5822	*r = floatx80_scale(t5, t2);
5823}
5824
5825LOCALPROC myfp_TenToX(myfpr *r, myfpr *x)
5826{
5827	floatx80 t1;
5828	const floatx80 t = /* 1.0 / log(2.0) */
5829		packFloatx80m(0, 0x3fff, LIT64(0xb8aa3b295c17f0bc));
5830	const floatx80 t2 = /* log(10.0) */
5831		packFloatx80m(0, 0x4000, LIT64(0x935d8dddaaa8ac17));
5832	t1 = floatx80_mul(floatx80_mul(*x, t), t2);
5833	myfp_TwoToX(r, &t1);
5834}
5835
5836LOCALPROC myfp_EToX(myfpr *r, myfpr *x)
5837{
5838	floatx80 t1;
5839	const floatx80 t = /* 1.0 / log(2.0) */
5840		packFloatx80m(0, 0x3fff, LIT64(0xb8aa3b295c17f0bc));
5841	t1 = floatx80_mul(*x, t);
5842	myfp_TwoToX(r, &t1);
5843}
5844
5845LOCALPROC myfp_EToXM1(myfpr *r, myfpr *x)
5846{
5847	floatx80 t1;
5848	floatx80 t2;
5849	floatx80 t3;
5850	floatx80 t4;
5851	floatx80 t5;
5852	floatx80 t6;
5853	const floatx80 t = /* 1.0 / log(2.0) */
5854		packFloatx80m(0, 0x3fff, LIT64(0xb8aa3b295c17f0bc));
5855	t1 = floatx80_mul(*x, t);
5856	myfp_floor(&t2, &t1);
5857	t3 = floatx80_sub(t1, t2);
5858	t4 = f2xm1(t3);
5859	if (myfp_IsZero(&t2)) {
5860		*r = t4;
5861	} else {
5862		t5 = floatx80_add(t4, floatx80_one);
5863		t6 = floatx80_scale(t5, t2);
5864		*r = floatx80_sub(t6, floatx80_one);
5865	}
5866}
5867
5868LOCALPROC myfp_Log2(myfpr *r, myfpr *x)
5869{
5870	*r = fyl2x(*x, floatx80_one);
5871}
5872
5873LOCALPROC myfp_LogN(myfpr *r, myfpr *x)
5874{
5875	const floatx80 t = /* log(2.0) */
5876		packFloatx80m(0, 0x3ffe, LIT64(0xb17217f7d1cf79ac));
5877	*r = fyl2x(*x, t);
5878}
5879
5880LOCALPROC myfp_Log10(myfpr *r, myfpr *x)
5881{
5882	const floatx80 t = /* log10(2.0) = ln(2) / ln(10), unknown accuracy */
5883		packFloatx80m(0, 0x3ffd, LIT64(0x9a209a84fbcff798));
5884	*r = fyl2x(*x, t);
5885}
5886
5887LOCALPROC myfp_LogNP1(myfpr *r, myfpr *x)
5888{
5889	const floatx80 t = /* log(2.0) */
5890		packFloatx80m(0, 0x3ffe, LIT64(0xb17217f7d1cf79ac));
5891	*r = fyl2xp1(*x, t);
5892}
5893
5894LOCALPROC myfp_Sin(myfpr *r, myfpr *x)
5895{
5896	(void) fsincos(*x, r, 0);
5897}
5898
5899LOCALPROC myfp_Cos(myfpr *r, myfpr *x)
5900{
5901	(void) fsincos(*x, 0, r);
5902}
5903
5904LOCALPROC myfp_Tan(myfpr *r, myfpr *x)
5905{
5906	*r = *x;
5907	(void) ftan(r);
5908}
5909
5910LOCALPROC myfp_ATan(myfpr *r, myfpr *x)
5911{
5912	*r = fpatan(floatx80_one, *x);
5913}
5914
5915LOCALPROC myfp_ASin(myfpr *r, myfpr *x)
5916{
5917	floatx80 x2 = floatx80_mul(*x, *x);
5918	floatx80 mx2 = floatx80_sub(floatx80_one, x2);
5919	floatx80 cx = floatx80_sqrt(mx2);
5920
5921	*r = fpatan(cx, *x);
5922}
5923
5924LOCALPROC myfp_ACos(myfpr *r, myfpr *x)
5925{
5926	floatx80 x2 = floatx80_mul(*x, *x);
5927	floatx80 mx2 = floatx80_sub(floatx80_one, x2);
5928	floatx80 cx = floatx80_sqrt(mx2);
5929
5930	*r = fpatan(*x, cx);
5931}
5932
5933static const floatx80 floatx80_zero =
5934	packFloatx80m(0, 0x0000, LIT64(0x0000000000000000));
5935
5936static const floatx80 floatx80_Two =
5937	packFloatx80m(0, 0x4000, LIT64(0x8000000000000000));
5938
5939static const floatx80 floatx80_Ten =
5940	packFloatx80m(0, 0x4002, LIT64(0xa000000000000000));
5941
5942LOCALPROC myfp_Sinh(myfpr *r, myfpr *x)
5943{
5944	myfpr ex;
5945	myfpr nx;
5946	myfpr enx;
5947	myfpr t1;
5948
5949	myfp_EToX(&ex, x);
5950	myfp_Neg(&nx, x);
5951	myfp_EToX(&enx, &nx);
5952	myfp_Sub(&t1, &ex, &enx);
5953	myfp_Div(r, &t1, &floatx80_Two);
5954}
5955
5956LOCALPROC myfp_Cosh(myfpr *r, myfpr *x)
5957{
5958	myfpr ex;
5959	myfpr nx;
5960	myfpr enx;
5961	myfpr t1;
5962
5963	myfp_EToX(&ex, x);
5964	myfp_Neg(&nx, x);
5965	myfp_EToX(&enx, &nx);
5966	myfp_Add(&t1, &ex, &enx);
5967	myfp_Div(r, &t1, &floatx80_Two);
5968}
5969
5970LOCALPROC myfp_Tanh(myfpr *r, myfpr *x)
5971{
5972	myfpr x2;
5973	myfpr ex2;
5974	myfpr ex2m1;
5975	myfpr ex2p1;
5976
5977	myfp_Mul(&x2, x, &floatx80_Two);
5978	myfp_EToX(&ex2, &x2);
5979	myfp_Sub(&ex2m1, &ex2, &floatx80_one);
5980	myfp_Add(&ex2p1, &ex2, &floatx80_one);
5981	myfp_Div(r, &ex2m1, &ex2p1);
5982}
5983
5984LOCALPROC myfp_ATanh(myfpr *r, myfpr *x)
5985{
5986	myfpr onepx;
5987	myfpr onemx;
5988	myfpr dv;
5989	myfpr ldv;
5990
5991	myfp_Add(&onepx, x, &floatx80_one);
5992	myfp_Sub(&onemx, x, &floatx80_one);
5993	myfp_Div(&dv, &onepx, &onemx);
5994	myfp_LogN(&ldv, &dv);
5995	myfp_Div(r, &ldv, &floatx80_Two);
5996}
5997
5998LOCALPROC myfp_SinCos(myfpr *r_sin, myfpr *r_cos, myfpr *source)
5999{
6000	(void) fsincos(*source, r_sin, r_cos);
6001}
6002
6003LOCALFUNC blnr myfp_getCR(myfpr *r, ui4b opmode)
6004{
6005	switch (opmode) {
6006		case 0x00:
6007			*r = floatx80_pi; /* M_PI */
6008			break;
6009		case 0x0B:
6010			{
6011				const floatx80 t =
6012					packFloatx80m(0, 0x3ffd, LIT64(0x9a209a84fbcff798));
6013				*r = t; /* log10(2.0) = ln(2) / ln(10), unknown accuracy */
6014			}
6015			break;
6016		case 0x0C:
6017			{
6018				const floatx80 t =
6019					packFloatx80m(0, 0x4000, LIT64(0xadf85458a2bb4a9b));
6020				*r = t; /* exp(1.0), unknown accuracy */
6021			}
6022			break;
6023		case 0x0D:
6024			{
6025				const floatx80 t =
6026					packFloatx80m(0, 0x3fff, LIT64(0xb8aa3b295c17f0bc));
6027				*r = t; /* 1.0 / log(2.0) */
6028			}
6029			break;
6030		case 0x0E:
6031			{
6032				const floatx80 t =
6033					packFloatx80m(0, 0x3ffd, LIT64(0xde5bd8a937287195));
6034				*r = t; /* 1.0 / log(10.0), unknown accuracy */
6035			}
6036			break;
6037		case 0x0F:
6038			*r = floatx80_zero; /* 0.0 */
6039			break;
6040		case 0x30:
6041			{
6042				const floatx80 t =
6043					packFloatx80m(0, 0x3ffe, LIT64(0xb17217f7d1cf79ac));
6044				*r = t; /* log(2.0) */
6045			}
6046			break;
6047		case 0x31:
6048			{
6049				const floatx80 t =
6050					packFloatx80m(0, 0x4000, LIT64(0x935d8dddaaa8ac17));
6051				*r = t; /* log(10.0) */
6052			}
6053			break;
6054		case 0x32:
6055			*r = floatx80_one; /* 1.0 */
6056			break;
6057		case 0x33:
6058			*r = floatx80_Ten; /* 10.0 */
6059			break;
6060		case 0x34:
6061			{
6062				const floatx80 t =
6063					packFloatx80m(0, 0x4005, LIT64(0xc800000000000000));
6064				*r = t; /* 100.0 */
6065			}
6066			break;
6067		case 0x35:
6068			{
6069				const floatx80 t =
6070					packFloatx80m(0, 0x400c, LIT64(0x9c40000000000000));
6071				*r = t; /* 10000.0 */
6072			}
6073			break;
6074		case 0x36:
6075			{
6076				const floatx80 t =
6077					packFloatx80m(0, 0x4019, LIT64(0xbebc200000000000));
6078				*r = t; /* 1.0e8 */
6079			}
6080			break;
6081		case 0x37:
6082			{
6083				const floatx80 t =
6084					packFloatx80m(0, 0x4034, LIT64(0x8e1bc9bf04000000));
6085				*r = t; /* 1.0e16 */
6086			}
6087			break;
6088		case 0x38:
6089			{
6090				const floatx80 t =
6091					packFloatx80m(0, 0x4069, LIT64(0x9dc5ada82b70b59e));
6092				*r = t; /* 1.0e32 */
6093			}
6094			break;
6095		case 0x39:
6096			{
6097				const floatx80 t =
6098					packFloatx80m(0, 0x40d3, LIT64(0xc2781f49ffcfa6d5));
6099				*r = t; /* 1.0e64 */
6100			}
6101			break;
6102		case 0x3A:
6103			{
6104				const floatx80 t =
6105					packFloatx80m(0, 0x41a8, LIT64(0x93ba47c980e98ce0));
6106				*r = t; /* 1.0e128 */
6107			}
6108			break;
6109		case 0x3B:
6110			{
6111				const floatx80 t =
6112					packFloatx80m(0, 0x4351, LIT64(0xaa7eebfb9df9de8e));
6113				*r = t; /* 1.0e256 */
6114			}
6115			break;
6116		case 0x3C:
6117			{
6118				const floatx80 t =
6119					packFloatx80m(0, 0x46a3, LIT64(0xe319a0aea60e91c7));
6120				*r = t; /* 1.0e512 */
6121			}
6122			break;
6123		case 0x3D:
6124			{
6125				const floatx80 t =
6126					packFloatx80m(0, 0x4d48, LIT64(0xc976758681750c17));
6127				*r = t; /* 1.0e1024 */
6128			}
6129			break;
6130		case 0x3E:
6131			{
6132				const floatx80 t =
6133					packFloatx80m(0, 0x5a92, LIT64(0x9e8b3b5dc53d5de5));
6134				*r = t; /* 1.0e2048 */
6135			}
6136			break;
6137		case 0x3F:
6138			{
6139				const floatx80 t =
6140					packFloatx80m(0, 0x7525, LIT64(0xc46052028a20979b));
6141				*r = t; /* 1.0e4096 */
6142			}
6143			break;
6144		default:
6145			return falseblnr;
6146	}
6147	return trueblnr;
6148}
6149
6150/* Floating point control register */
6151
6152LOCALPROC myfp_SetFPCR(ui5r v)
6153{
6154	switch ((v >> 4) & 0x03) {
6155		case 0:
6156			float_rounding_mode = float_round_nearest_even;
6157			break;
6158		case 1:
6159			float_rounding_mode = float_round_to_zero;
6160			break;
6161		case 2:
6162			float_rounding_mode = float_round_down;
6163			break;
6164		case 3:
6165			float_rounding_mode = float_round_up;
6166			break;
6167	}
6168	switch ((v >> 6) & 0x03) {
6169		case 0:
6170			floatx80_rounding_precision = 80;
6171			break;
6172		case 1:
6173			floatx80_rounding_precision = 32;
6174			break;
6175		case 2:
6176			floatx80_rounding_precision = 64;
6177			break;
6178		case 3:
6179			ReportAbnormalID(0x0201,
6180				"Bad rounding precision in myfp_SetFPCR");
6181			floatx80_rounding_precision = 80;
6182			break;
6183	}
6184	if (0 != (v & 0xF)) {
6185		ReportAbnormalID(0x0202,
6186			"Reserved bits not zero in myfp_SetFPCR");
6187	}
6188}
6189
6190LOCALFUNC ui5r myfp_GetFPCR(void)
6191{
6192	ui5r v = 0;
6193
6194	switch (float_rounding_mode) {
6195		case float_round_nearest_even:
6196			/* v |= (0 << 4); */
6197			break;
6198		case float_round_to_zero:
6199			v |= (1 << 4);
6200			break;
6201		case float_round_down:
6202			v |= (2 << 4);
6203			break;
6204		case float_round_up:
6205			v |= (3 << 4);
6206			break;
6207	}
6208
6209	if (80 == floatx80_rounding_precision) {
6210		/* v |= (0 << 6); */
6211	} else if (32 == floatx80_rounding_precision) {
6212		v |= (1 << 6);
6213	} else if (64 == floatx80_rounding_precision) {
6214		v |= (2 << 6);
6215	} else {
6216		ReportAbnormalID(0x0203,
6217			"Bad rounding precision in myfp_GetFPCR");
6218	}
6219
6220	return v;
6221}
6222
6223LOCALVAR struct myfp_envStruct
6224{
6225	ui5r FPSR;  /* Floating point status register */
6226} myfp_env;
6227
6228LOCALPROC myfp_SetFPSR(ui5r v)
6229{
6230	myfp_env.FPSR = v;
6231}
6232
6233LOCALFUNC ui5r myfp_GetFPSR(void)
6234{
6235	return myfp_env.FPSR;
6236}
6237
6238LOCALFUNC ui3r myfp_GetConditionCodeByte(void)
6239{
6240	return (myfp_env.FPSR >> 24) & 0x0F;
6241}
6242
6243LOCALPROC myfp_SetConditionCodeByte(ui3r v)
6244{
6245	myfp_env.FPSR = ((myfp_env.FPSR & 0x00FFFFFF)
6246		| (v << 24));
6247}
6248
6249LOCALPROC myfp_SetConditionCodeByteFromResult(myfpr *result)
6250{
6251	/* Set condition codes here based on result */
6252
6253	int c_nan  = myfp_IsNan(result) ? 1 : 0;
6254	int c_inf  = myfp_IsInf(result) ? 1 : 0;
6255	int c_zero = myfp_IsZero(result) ? 1 : 0;
6256	int c_neg  = myfp_IsNeg(result) ? 1 : 0;
6257
6258	myfp_SetConditionCodeByte(c_nan
6259		| (c_inf  << 1)
6260		| (c_zero << 2)
6261		| (c_neg  << 3));
6262}