src/libm/Source/PowerPC/tableLogD.c at fixPythonPipStalling

overby.me / darling-nix
fork atom
this repo has no description
fork atom
darling-nix / src / libm / Source / PowerPC / tableLogD.c
at fixPythonPipStalling 999 lines 47 kB view raw
wrap content
Lubos Dolezel Restructured source tree to prepare for merge with the "darling" repo 10y ago
f228ae16
  1/*
  2 * Copyright (c) 2002 Apple Computer, Inc. All rights reserved.
  3 *
  4 * @APPLE_LICENSE_HEADER_START@
  5 * 
  6 * The contents of this file constitute Original Code as defined in and
  7 * are subject to the Apple Public Source License Version 1.1 (the
  8 * "License").  You may not use this file except in compliance with the
  9 * License.  Please obtain a copy of the License at
 10 * http://www.apple.com/publicsource and read it before using this file.
 11 * 
 12 * This Original Code and all software distributed under the License are
 13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
 14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
 15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
 16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
 17 * License for the specific language governing rights and limitations
 18 * under the License.
 19 * 
 20 * @APPLE_LICENSE_HEADER_END@
 21 */
 22/*******************************************************************************
 23*                                                                              *
 24*     File tableLogD.c,                                                        *
 25*     Functions log, log1p and log10.                                          *
 26*     Implementation of log( x ) based on IBM/Taligent table method.           *
 27*                                                                              *
 28*     Copyright � 1997-2001 Apple Computer, Inc.  All rights reserved.         *
 29*                                                                              *
 30*     Written by Ali Sazegari, started on April 1997.                          *
 31*     Modified and ported by Robert A. Murley (ram) for Mac OS X.              *
 32*                                                                              *
 33*     A MathLib v4 file.                                                       *
 34*                                                                              *
 35*     Nov   07 2001: removed log2 to prevent conflict with CarbonCore.         *
 36*     Nov   06 2001: commented out warning about Intel architectures.          *
 37*                    changed i386 stub to call abort().                        *
 38*     Nov   02 2001: added stub for i386 version of log2.                      *
 39*     April 28 1997: port of the ibm/taligent log routines.                    *
 40*     Sept  06 2001: replaced __setflm with FEGETENVD/FESETENVD.               *
 41*                    replaced DblInHex typedef with hexdouble.                 *
 42*                    used standard exception symbols from fenv.h.              *
 43*                    added #ifdef __ppc__.                                     *
 44*                    used faster fixed-point instructions to calculate 'n'     *
 45*     Sept  09 2001: added more comments.                                      *
 46*     Sept  10 2001: added macros to detect PowerPC and correct compiler.      *
 47*     Sept  18 2001: added <CoreServices/CoreServices.h> to get <fp.h> and     *
 48*                    <fenv.h>.                                                 *
 49*     Oct   08 2001: removed <CoreServices/CoreServices.h>.                    *
 50*                    changed compiler errors to warnings.                      *
 51*                                                                              *
 52*     W A R N I N G:                                                           *
 53*     These routines require a 64-bit double precision IEEE-754 model.         *
 54*     They are written for PowerPC only and are expecting the compiler         *
 55*     to generate the correct sequence of multiply-add fused instructions.     *
 56*                                                                              *
 57*     These routines are not intended for 32-bit Intel architectures.          *
 58*                                                                              *
 59*     A version of gcc higher than 932 is required.                            *
 60*                                                                              *
 61*     GCC compiler options:                                                    *
 62*            optimization level 3 (-O3)                                        *
 63*            -fschedule-insns -finline-functions -funroll-all-loops            *
 64*                                                                              *
 65*******************************************************************************/
 66
 67#include      "math.h"
 68#include      "fp_private.h"
 69#include      "fenv_private.h"
 70
 71#define  LOGORITHMIC_NAN  "36"
 72
 73/*******************************************************************************
 74*      Floating-point constants.                                               *
 75*******************************************************************************/
 76
 77static const double ln2            = 6.931471805599452862e-1;
 78static const double twoTo128       = 0x1.0p+128; // 3.402823669209384635e+38;
 79static const double largestDenorm  = 2.2250738585072009e-308;   // { 0x000fffff, 0xffffffff }
 80static const hexdouble infinity     = HEXDOUBLE(0x7ff00000, 0x00000000);
 81
 82/*******************************************************************************
 83*      Approximation coefficients.                                             *
 84*******************************************************************************/
 85
 86static const double d2 = -0.5000000000000000000000;             // { 0xBFE00000, 0x00000000 }
 87static const double d3 =  0.3333333333333360968212;             // { 0x3FD55555, 0x55555587 }
 88static const double d4 = -0.2500000000000056849516;             // { 0xBFD00000, 0x00000066 }
 89static const double d5 =  0.1999999996377879912068;             // { 0x3FC99999, 0x98D278CB }
 90static const double d6 = -0.1666666662009609743117;             // { 0xBFC55555, 0x54554F15 }
 91static const double d7 =  0.1428690115662276259345;             // { 0x3FC24988, 0x2224F72F }
 92static const double d8 = -0.1250122079043555957999;             // { 0xBFC00066, 0x68466517 }
 93
 94extern const uint32_t logTable[];
 95
 96struct logTableEntry 
 97       {
 98       double X;
 99       double G;
100       hexdouble F;
101       };
102
103static const hexdouble kConvDouble     = HEXDOUBLE(0x43300000, 0x80000000);
104
105#if !defined(BUILDING_FOR_CARBONCORE_LEGACY)
106
107/*******************************************************************************
108*      Floating-point constants.                                               *
109*******************************************************************************/
110
111static const double ln2Tail        = 2.319046813846299558e-17;
112static const double log10e         = 4.342944819032518200e-01;  // head of log10 of e { 0x3fdbcb7b, 0x1526e50e }
113static const double log10eTail     = 1.098319650216765200e-17;  // tail of log10 of e { 0x3c695355, 0xbaaafad4 }
114
115/*******************************************************************************
116*      Approximation coefficients.                                             *
117*******************************************************************************/
118
119static const double c2 = -0.5000000000000000042725;
120static const double c3 =  0.3333333333296328456505;
121static const double c4 = -0.2499999999949632195759;
122static const double c5 =  0.2000014541571685319141;
123static const double c6 = -0.1666681511199968257150;
124
125/*******************************************************************************
126*                                                                              *
127*    The base e logorithm function.  Caller�s rounding direction is honored.   *
128*                                                                              *
129********************************************************************************
130*                                                                              *
131*    calls the intrinsic functions __setflm, __FABS.                           *
132*    raised exceptions are inexact, divide by zero and invalid.                *
133*                                                                              *
134*******************************************************************************/
135
136double log ( double x ) 
137{
138       hexdouble yInHex, xInHex, OldEnvironment;
139       register double n, zTail, high, low, z, z2, temp1, temp2, temp3, result;
140       register uint32_t i;
141       struct logTableEntry *tablePointer = ( struct logTableEntry * ) logTable;
142       
143       register double FPR_env, FPR_z, FPR_half, FPR_one, FPR_twoTo128, FPR_ln2, FPR_kConvDouble;
144       register double FPR_r, FPR_s, FPR_t, FPR_u, FPR_y;
145       register struct logTableEntry *pT;
146       register int32_t nLong; // converted to double in two widely separated steps, avoiding LSU hazards
147       hexdouble nInHex;
148       
149       xInHex.d = x;						nInHex.i.hi = 0x43300000; // store upper half
150
151       FPR_z = 0.0;						FPR_half = 0.5;
152       FPR_one = 1.0;						FPR_u = 1.0;
153       FPR_twoTo128 = twoTo128;					FPR_ln2 = ln2;
154       
155       FEGETENVD( FPR_env );
156       __ENSURE( FPR_z, FPR_twoTo128, FPR_ln2 );		__ENSURE( FPR_u, FPR_half, FPR_one );		
157       FESETENVD( FPR_z );
158              
159       if (likely( FPR_z <= x && x < infinity.d )) 
160       {                                             // x is finite and non-negative
161              if (likely( x > largestDenorm ))  
162              {                                      // x is normal
163                     i = xInHex.i.hi & 0x000fffff;
164                     yInHex.i.lo = xInHex.i.lo;
165                     yInHex.i.hi = i | 0x3ff00000;          // now 1.0 <= y < 2.0
166                     if ( yInHex.i.hi & 0x00080000 ) 
167                     {                               // 1.5 <= y < 2.0
168                            nLong = ((int32_t) ( xInHex.i.hi >> 20 ) - 1022);
169                            nLong ^= 0x80000000; // flip sign bit
170                            nInHex.i.lo = nLong; // store lower half
171                            i = ( i >> 13 ) & 0x3f;         // table lookup index
172                            FPR_u = FPR_half;
173                     }
174                     else 
175                     {                               // 1.0 <= y < 1.5
176                            nLong = ((int32_t) ( xInHex.i.hi >> 20 ) - 1023);
177                            nLong ^= 0x80000000; // flip sign bit
178                            nInHex.i.lo = nLong; // store lower half
179                            i = ( i >> 12 ) + 64;           // table lookup index
180                            // FPR_u = FPR_one; via initialization above
181                     }
182              }
183              else if (likely( x != FPR_z )) 
184              {                                      // x is nonzero, denormal
185                     xInHex.d = __FMUL( x, FPR_twoTo128 );
186                    __NOOP;
187                    __NOOP;
188                    __NOOP;
189                     i = xInHex.i.hi & 0x000fffff;
190                     yInHex.i.lo = xInHex.i.lo;
191                     yInHex.i.hi = i | 0x3ff00000;          // now 1.0 <= y < 2.0
192                     if ( yInHex.i.hi & 0x00080000 ) 
193                     {                               // 1.5 <= y < 2.0
194                            nLong = ((int32_t) ( xInHex.i.hi >> 20 ) - 1150);
195                            nLong ^= 0x80000000; // flip sign bit
196                            nInHex.i.lo = nLong; // store lower half
197                            i = ( i >> 13 ) & 0x3f;         // table lookup index
198                            FPR_u = FPR_half;
199                     }
200                     else 
201                     {                               // 1.0 <= y < 1.5
202                            nLong = ((int32_t) ( xInHex.i.hi >> 20 ) - 1151);
203                            nLong ^= 0x80000000; // flip sign bit
204                            nInHex.i.lo = nLong; // store lower half
205                            i = ( i >> 12 ) + 64;           // table lookup index
206                            // FPR_u = FPR_one; via initialization above
207                     }
208              }
209              else                                          // x is 0.0
210              {
211                     OldEnvironment.d = FPR_env;
212					 __NOOP;
213					 __NOOP;
214					 __NOOP;
215                     OldEnvironment.i.lo |= FE_DIVBYZERO;
216                     result = -infinity.d;
217                     FESETENVD_GRP( OldEnvironment.d );
218                     return result;
219              }
220              
221              pT = &(tablePointer[i]);
222              FPR_r = pT->X;					FPR_t = pT->G;
223              
224              FPR_y = yInHex.d;					
225              FPR_s = __FMSUB( FPR_u, FPR_y, FPR_r );		__NOOP;
226              
227              z = __FMUL( FPR_s, FPR_t );			
228              FPR_u = __FNMSUB( z, FPR_r, FPR_s );		z2 = __FMUL( z, z );
229              zTail = __FMUL( FPR_u, FPR_t);
230                            
231              if ( (uint32_t)nLong == 0x80000000u /* iff n == 0.0 */ ) 
232              {                                     // 0.75 <= x < 1.5
233                     register double FPR_d2, FPR_d3, FPR_d4, FPR_d5, FPR_d6, FPR_d7, FPR_d8;
234                     
235                     if ( x == FPR_one ) 
236                     {
237                            FESETENVD( FPR_env );
238                            return FPR_z;
239                     }
240                     
241                     FPR_d8 = d8;				FPR_d6 = d6;
242                     
243                     FPR_d7 = d7;				FPR_d5 = d5;
244                     
245                     temp1 = __FMADD( FPR_d8, z2, FPR_d6 );	temp2 = __FMADD( FPR_d7, z2, FPR_d5);
246                     FPR_d4 = d4;				
247                     
248                     FPR_d3 = d3;
249                     temp1 = __FMADD( temp1, z2, FPR_d4 );	temp2 = __FMADD( temp2, z2, FPR_d3 );
250
251                     FPR_d2 = d2;				
252                     FPR_t = pT->F.d;				__NOOP;
253                     
254                     temp1 = __FMADD( temp1, z, temp2 );	temp3 = z + FPR_t;
255                     
256                     temp2 = __FMADD( temp1, z, FPR_d2 );	FPR_u = FPR_t - temp3;
257                     
258                     FPR_s = FPR_u + z;				FPR_r = __FNMSUB( z, zTail, zTail );
259                     
260                     low = FPR_s + FPR_r;
261                     
262                     result = __FMADD( temp2, z2, low );
263                     
264                     result += temp3;
265                     
266                     FESETENVD( FPR_env );
267                     __PROG_INEXACT( FPR_ln2 );
268                     
269                     return result;
270              }
271              else if ( pT->F.i.hi != 0 ) 
272              {                                     // n != 0, and y not close to 1
273                     register double FPR_c2, FPR_c3, FPR_c4, FPR_c5, FPR_c6;
274                     
275                     FPR_c6 = c6;				FPR_c4 = c4;
276                     
277                     FPR_c5 = c5;				FPR_c3 = c3;
278                     
279                     temp3 = __FMADD( FPR_c6, z2, FPR_c4 );	temp2 = __FMADD( FPR_c5, z2, FPR_c3 );
280                     FPR_kConvDouble = kConvDouble.d;	
281                     
282                     FPR_s = ln2Tail;
283                     n = nInHex.d;			// float load double 				
284                     n -= FPR_kConvDouble; 		// subtract magic value			
285                     
286                     __NOOP;				FPR_t = pT->F.d; 
287                     low = __FMADD( n, FPR_s, zTail );		high = z + FPR_t;
288                     
289                     temp3 = __FMUL( temp3, z2 );		FPR_u = FPR_t - high;
290                     		
291                     temp2 = __FMADD( temp2, z, temp3 );	zTail = FPR_u + z;
292
293                     FPR_c2 = c2;
294                     temp2 = temp2 + FPR_c2;			temp1 = __FMADD( n, FPR_ln2, low );
295                     
296                     FPR_t = __FMSUB( n, FPR_ln2, temp1 );	temp3 = high + temp1;
297                     
298                     FPR_s = temp1 - temp3;			low = FPR_t + low;				
299                     
300                     temp1 = FPR_s + high;			FPR_r = __FMADD( temp2, z2, low );
301                     
302                     result = ( FPR_r + temp1 ) + zTail;
303                     
304                     result += temp3;
305                     
306                     FESETENVD( FPR_env );
307                     __PROG_INEXACT( FPR_ln2 );
308                     
309                     return result;
310              }
311              else 
312              {                                     // n != 0 and y close to 1                     
313                     register double FPR_d2, FPR_d3, FPR_d4, FPR_d5, FPR_d6, FPR_d7, FPR_d8;
314                    
315                     FPR_d8 = d8;				FPR_d6 = d6;
316                     
317                     FPR_d7 = d7;				FPR_d5 = d5;
318                     
319                     temp1 = __FMADD( FPR_d8, z2, FPR_d6 );	temp2 = __FMADD( FPR_d7, z2, FPR_d5);
320                     FPR_kConvDouble = kConvDouble.d;	
321                                          
322                     FPR_t = ln2Tail;
323                     n = nInHex.d; 				
324                     n -= FPR_kConvDouble;			
325                     
326                     low = __FMADD( n, FPR_t, zTail );		__NOOP;
327                     FPR_d2 = d2;
328                     
329                     FPR_d4 = d4;				FPR_d3 = d3;
330                     
331                     temp1 = __FMADD( temp1, z2, FPR_d4 );	temp2 = __FMADD( temp2, z2, FPR_d3 );
332                     temp3 = __FMADD( n, FPR_ln2, low );	__NOOP;
333                     				
334                     temp1 = __FMADD( temp1, z, temp2 );	FPR_s = __FMSUB( n, FPR_ln2, temp3 );
335                     
336                     temp2 = __FMADD( temp1, z, FPR_d2 );	low = FPR_s + low;
337                                          
338                     result = __FMADD( temp2, z2, low ) + z;
339                     result += temp3;
340                     
341                     FESETENVD( FPR_env );
342                     __PROG_INEXACT( FPR_ln2 );
343                     
344                     return result;
345              }
346       }
347       
348       OldEnvironment.d = FPR_env;
349       if ( x == FPR_z )
350       {
351              OldEnvironment.i.lo |= FE_DIVBYZERO;
352              x = -infinity.d;
353       }
354       else if ( x < FPR_z )
355       {                                           // x < 0.0
356              OldEnvironment.i.lo |= SET_INVALID;
357              x = nan ( LOGORITHMIC_NAN );
358       }
359              
360       FESETENVD_GRP( OldEnvironment.d );
361       return x;
362       
363}
364
365/*******************************************************************************
366*                                                                              *
367*    The base 10 logorithm function.  Caller�s rounding direction is honored.  *
368*                                                                              *
369*******************************************************************************/
370
371double log10 ( double x ) 
372{
373       hexdouble yInHex, xInHex, OldEnvironment;
374       register double n, zTail, high, low, z, z2, temp1, temp2, temp3, result, resultLow;
375       register uint32_t i;
376       struct logTableEntry *tablePointer = ( struct logTableEntry * ) logTable;
377       
378       register double FPR_env, FPR_z, FPR_half, FPR_one, FPR_twoTo128, FPR_ln2, FPR_kConvDouble;
379       register double FPR_r, FPR_s, FPR_t, FPR_u, FPR_y;
380       register struct logTableEntry *pT;
381       register int32_t nLong; // converted to double in two widely separated steps, avoiding LSU hazards
382       hexdouble nInHex;
383       
384       xInHex.d = x;						nInHex.i.hi = 0x43300000; // store upper half
385
386       FPR_z = 0.0;						FPR_half = 0.5;
387       FPR_one = 1.0;						FPR_u = 1.0;
388       FPR_twoTo128 = twoTo128;					FPR_ln2 = ln2;
389       
390       FEGETENVD( FPR_env );
391       __ENSURE( FPR_z, FPR_twoTo128, FPR_ln2 );		__ENSURE( FPR_u, FPR_half, FPR_one );		
392       FESETENVD( FPR_z );
393              
394       if (likely( FPR_z <= x && x < infinity.d )) 
395       {                                             // x is finite and non-negative
396              if (likely( x > largestDenorm ))  
397              {                                      // x is normal
398                     i = xInHex.i.hi & 0x000fffff;
399                     yInHex.i.lo = xInHex.i.lo;
400                     yInHex.i.hi = i | 0x3ff00000;          // now 1.0 <= y < 2.0
401                     if ( yInHex.i.hi & 0x00080000 ) 
402                     {                               // 1.5 <= y < 2.0
403                            nLong = ((int32_t) ( xInHex.i.hi >> 20 ) - 1022);
404                            nLong ^= 0x80000000; // flip sign bit
405                            nInHex.i.lo = nLong; // store lower half
406                            i = ( i >> 13 ) & 0x3f;         // table lookup index
407                            FPR_u = FPR_half;
408                     }
409                     else 
410                     {                               // 1.0 <= y < 1.5
411                            nLong = ((int32_t) ( xInHex.i.hi >> 20 ) - 1023);
412                            nLong ^= 0x80000000; // flip sign bit
413                            nInHex.i.lo = nLong; // store lower half
414                            i = ( i >> 12 ) + 64;           // table lookup index
415                            // FPR_u = FPR_one; via initialization above
416                     }
417              }
418              else if (likely( x != FPR_z )) 
419              {                                      // x is nonzero, denormal
420                     xInHex.d = __FMUL( x, FPR_twoTo128 );
421					 __NOOP;
422					 __NOOP;
423					 __NOOP;
424                     i = xInHex.i.hi & 0x000fffff;
425                     yInHex.i.lo = xInHex.i.lo;
426                     yInHex.i.hi = i | 0x3ff00000;          // now 1.0 <= y < 2.0
427                     if ( yInHex.i.hi & 0x00080000 ) 
428                     {                               // 1.5 <= y < 2.0
429                            nLong = ((int32_t) ( xInHex.i.hi >> 20 ) - 1150);
430                            nLong ^= 0x80000000; // flip sign bit
431                            nInHex.i.lo = nLong; // store lower half
432                            i = ( i >> 13 ) & 0x3f;         // table lookup index
433                            FPR_u = FPR_half;
434                     }
435                     else 
436                     {                               // 1.0 <= y < 1.5
437                            nLong = ((int32_t) ( xInHex.i.hi >> 20 ) - 1151);
438                            nLong ^= 0x80000000; // flip sign bit
439                            nInHex.i.lo = nLong; // store lower half
440                            i = ( i >> 12 ) + 64;           // table lookup index
441                            // FPR_u = FPR_one; via initialization above
442                     }
443              }
444              else                                          // x is 0.0
445              {
446                     OldEnvironment.d = FPR_env;
447					 __NOOP;
448					 __NOOP;
449					 __NOOP;
450                     OldEnvironment.i.lo |= FE_DIVBYZERO;
451                     result = -infinity.d;
452                     FESETENVD_GRP( OldEnvironment.d );
453                     return result;
454              }
455              
456              pT = &(tablePointer[i]);
457              FPR_r = pT->X;					FPR_t = pT->G;
458              
459              FPR_y = yInHex.d;					
460              FPR_s = __FMSUB( FPR_u, FPR_y, FPR_r );		__NOOP;
461              
462              z = __FMUL( FPR_s, FPR_t );			
463              FPR_u = __FNMSUB( z, FPR_r, FPR_s );		z2 = __FMUL( z, z );
464              zTail = __FMUL( FPR_u, FPR_t);
465                            
466              if ( (uint32_t)nLong == 0x80000000u /* iff n == 0.0 */ ) 
467              {                                     // 0.75 <= x < 1.5
468                     register double FPR_d2, FPR_d3, FPR_d4, FPR_d5, FPR_d6, FPR_d7, FPR_d8;
469                     
470                     if ( x == FPR_one ) 
471                     {
472                            FESETENVD( FPR_env );
473                            return FPR_z;
474                     }
475                     
476                     FPR_d8 = d8;				FPR_d6 = d6;
477                     
478                     FPR_d7 = d7;				FPR_d5 = d5;
479                     
480                     temp1 = __FMADD( FPR_d8, z2, FPR_d6 );	temp2 = __FMADD( FPR_d7, z2, FPR_d5);
481                     FPR_d4 = d4;				
482                     
483                     FPR_d3 = d3;
484                     temp1 = __FMADD( temp1, z2, FPR_d4 );	temp2 = __FMADD( temp2, z2, FPR_d3 );
485
486                     FPR_d2 = d2;				
487                     FPR_t = pT->F.d;				__NOOP;
488                     
489                     temp1 = __FMADD( temp1, z, temp2 );	temp3 = z + FPR_t;
490                     
491                     temp2 = __FMADD( temp1, z, FPR_d2 );	FPR_u = FPR_t - temp3;
492                     
493                     FPR_s = FPR_u + z;				FPR_r = __FNMSUB( z, zTail, zTail );
494                     
495                     low = FPR_s + FPR_r;
496
497                     FPR_r = __FMADD( temp2, z2, low );
498                     FPR_s = log10e;				FPR_t = log10eTail;
499                                          
500                     result = FPR_r + temp3;
501                     resultLow = temp3 - result + FPR_r;
502                     FPR_u = __FMUL( result, FPR_t );
503                     resultLow = __FMADD( resultLow, FPR_s, FPR_u );
504                     result = __FMADD( result, FPR_s, resultLow );
505
506                     FESETENVD( FPR_env );
507                     __PROG_INEXACT( FPR_ln2 );
508                     
509                     return result;
510              }
511              else if ( pT->F.i.hi != 0 ) 
512              {                                     // n != 0, and y not close to 1
513                     register double FPR_c2, FPR_c3, FPR_c4, FPR_c5, FPR_c6;
514                     
515                     FPR_c6 = c6;				FPR_c4 = c4;
516                     
517                     FPR_c5 = c5;				FPR_c3 = c3;
518                     
519                     temp3 = __FMADD( FPR_c6, z2, FPR_c4 );	temp2 = __FMADD( FPR_c5, z2, FPR_c3 );
520                     FPR_kConvDouble = kConvDouble.d;	
521                     
522                     FPR_s = ln2Tail;
523                     n = nInHex.d; 				// float load double				
524                     n -= FPR_kConvDouble; 			// subtract magic value			
525                     
526                     __NOOP;				FPR_t = pT->F.d;
527                     low = __FMADD( n, FPR_s, zTail );		high = z + FPR_t;
528                     
529                     temp3 = __FMUL( temp3, z2 );		FPR_u = FPR_t - high;
530                     		
531                     temp2 = __FMADD( temp2, z, temp3 );	zTail = FPR_u + z;
532
533                     FPR_c2 = c2;
534                     temp2 = temp2 + FPR_c2;			temp1 = __FMADD( n, FPR_ln2, low );
535                     
536                     FPR_t = __FMSUB( n, FPR_ln2, temp1 );	temp3 = high + temp1;
537                     
538                     FPR_s = temp1 - temp3;			low = FPR_t + low;				
539                     
540                     temp1 = FPR_s + high;			FPR_r = __FMADD( temp2, z2, low );
541                     FPR_s = log10e;				FPR_t = log10eTail;
542
543                     result = ( ( FPR_r + temp1 ) + zTail ) + temp3;
544                     resultLow = temp3 - result + zTail + temp1 + FPR_r;
545                     FPR_u = __FMUL( result, FPR_t );
546                     resultLow = __FMADD( resultLow, FPR_s, FPR_u );
547                     result = __FMADD( result, FPR_s, resultLow );
548                     
549                     FESETENVD( FPR_env );
550                     __PROG_INEXACT( FPR_ln2 );
551                     
552                     return result;
553              }
554              else
555              {                                     // n != 0 and y close to 1                     
556                     register double FPR_d2, FPR_d3, FPR_d4, FPR_d5, FPR_d6, FPR_d7, FPR_d8;
557                    
558                     FPR_d8 = d8;				FPR_d6 = d6;
559                     
560                     FPR_d7 = d7;				FPR_d5 = d5;
561                     
562                     temp1 = __FMADD( FPR_d8, z2, FPR_d6 );	temp2 = __FMADD( FPR_d7, z2, FPR_d5);
563                     FPR_kConvDouble = kConvDouble.d;	
564                                          
565                     FPR_t = ln2Tail;
566                     n = nInHex.d; 				
567                     n -= FPR_kConvDouble;			
568                     
569                     low = __FMADD( n, FPR_t, zTail );		__NOOP;
570                     FPR_d2 = d2;
571                     
572                     FPR_d4 = d4;				FPR_d3 = d3;
573                     
574                     temp1 = __FMADD( temp1, z2, FPR_d4 );	temp2 = __FMADD( temp2, z2, FPR_d3 );
575                     temp3 = __FMADD( n, FPR_ln2, low );	__NOOP;
576                     				
577                     temp1 = __FMADD( temp1, z, temp2 );	FPR_s = __FMSUB( n, FPR_ln2, temp3 );
578                     
579                     temp2 = __FMADD( temp1, z, FPR_d2 );	low = FPR_s + low;
580                     
581                     FPR_r = __FMADD( temp2, z2, low );
582                     FPR_s = log10e;				FPR_t = log10eTail;
583                                          
584                     result = ( FPR_r + z ) + temp3;
585                     resultLow = temp3 - result + z + FPR_r;
586                     FPR_u = __FMUL( result, FPR_t );
587                     resultLow = __FMADD( resultLow, FPR_s, FPR_u );
588                     result = __FMADD( result, FPR_s, resultLow );
589                     
590                     FESETENVD( FPR_env );
591                     __PROG_INEXACT( FPR_ln2 );
592                     
593                     return result;
594              }
595       }
596       
597       OldEnvironment.d = FPR_env;
598	   __NOOP;
599	   __NOOP;
600	   __NOOP;
601	   
602       if ( x == FPR_z )
603       {
604              OldEnvironment.i.lo |= FE_DIVBYZERO;
605              x = -infinity.d;
606       }
607       else if ( x < FPR_z )
608       {                                           // x < 0.0
609              OldEnvironment.i.lo |= SET_INVALID;
610              x = nan ( LOGORITHMIC_NAN );
611       }
612              
613       FESETENVD_GRP( OldEnvironment.d );
614       return x;
615       
616}
617
618/*******************************************************************************
619*                                                                              *
620*    The base e log(1+x) function.  Caller�s rounding direction is honored.    *
621*                                                                              *
622*******************************************************************************/
623
624double log1p ( double x ) 
625{
626       hexdouble yInHex, xInHex, OldEnvironment;
627       register double yLow, n, zTail, high, low, z, z2, temp1, temp2, temp3, result;
628       register uint32_t i;
629       struct logTableEntry *tablePointer = ( struct logTableEntry * ) logTable;
630       
631       register double FPR_env, FPR_z, FPR_half, FPR_one, FPR_negOne, FPR_ln2, FPR_kConvDouble;
632       register double FPR_r, FPR_s, FPR_t, FPR_u, FPR_y;
633       register struct logTableEntry *pT;
634       register int32_t nLong; // converted to double in two widely separated steps, avoiding LSU hazards
635       hexdouble nInHex;
636       
637       nInHex.i.hi = 0x43300000; // store upper half
638
639       FPR_z = 0.0;						FPR_half = 0.5;
640       FPR_one = 1.0;						FPR_u = 1.0;
641       FPR_ln2 = ln2;						FPR_negOne = -1.0;
642       
643       FEGETENVD( FPR_env );
644       __ENSURE( FPR_z, FPR_negOne, FPR_ln2 );			__ENSURE( FPR_u, FPR_half, FPR_one );		
645       FESETENVD( FPR_z );
646       
647//     N.B. x == NAN fails all comparisons and falls through to the return.
648
649       if (likely( ( x > FPR_negOne ) && ( x < infinity.d ) )) 
650       {
651              FPR_y = FPR_one + x;                          // yInHex.d cannot be a denormal number
652              yInHex.d = FPR_y;
653              yLow = ( x < FPR_one ) ? ( FPR_one - FPR_y ) + x : ( x - FPR_y ) + FPR_one; 
654              
655              i = yInHex.i.hi & 0x000fffff;
656              nLong = (int32_t) ( yInHex.i.hi >> 20 ) - 1023;
657              xInHex.i.lo = 0x0;
658              xInHex.i.hi = 0x7fe00000 - ( yInHex.i.hi & 0x7ff00000 );
659              yInHex.i.hi = i | 0x3ff00000;                // now 1.0 <= y < 2.0
660              if ( yInHex.i.hi & 0x00080000 ) 
661              {                                     // 1.5 <= y < 2.0
662                     nLong += 1;
663                     FPR_u = FPR_half;
664                     i = ( i >> 13 ) & 0x3f;               // table lookup index
665              }
666              else 
667              {                                     // 1.0 <= y < 1.5
668                     i = ( i >> 12 ) + 64;                 // table lookupndex
669                     // FPR_u = FPR_one; via initialization above
670              }
671              
672              nLong ^= 0x80000000; // flip sign bit
673              nInHex.i.lo = nLong; // store lower half
674              
675              pT = &(tablePointer[(int32_t)i]);
676              FPR_r = pT->X;					FPR_t = pT->G;
677            
678              FPR_y = yInHex.d;					
679              FPR_s = __FMSUB( FPR_u, FPR_y, FPR_r );		yLow = __FMUL( yLow, xInHex.d );
680              
681              z = __FMUL( FPR_s, FPR_t );			yLow = __FMUL( yLow, FPR_u );
682              FPR_u = __FNMSUB( z, FPR_r, FPR_s );		z2 = __FMUL( z, z );
683              FPR_u = FPR_u + yLow;
684              zTail = __FMUL( FPR_u, FPR_t);
685
686              if ( (uint32_t)nLong == 0x80000000u /* iff n == 0.0 */ ) 
687              {
688                     register double FPR_d2, FPR_d3, FPR_d4, FPR_d5, FPR_d6, FPR_d7, FPR_d8;
689
690                     if ( FPR_y == FPR_one )
691                     {
692                            FESETENVD( FPR_env );
693                            if ( x != FPR_z ) 
694                            {
695                                   if ( __FABS( x ) <= largestDenorm )
696                                        __PROG_UF_INEXACT( largestDenorm );
697                                   else
698                                        __PROG_INEXACT( FPR_ln2 );
699                            }
700                            return x;
701                     }
702                     
703                     FPR_d8 = d8;				FPR_d6 = d6;
704                     
705                     FPR_d7 = d7;				FPR_d5 = d5;
706                     
707                     temp1 = __FMADD( FPR_d8, z2, FPR_d6 );	temp2 = __FMADD( FPR_d7, z2, FPR_d5);
708                     FPR_d4 = d4;				
709                     
710                     FPR_d3 = d3;
711                     temp1 = __FMADD( temp1, z2, FPR_d4 );	temp2 = __FMADD( temp2, z2, FPR_d3 );
712
713                     FPR_d2 = d2;				
714                     FPR_t = pT->F.d;				__NOOP;
715                     
716                     temp1 = __FMADD( temp1, z, temp2 );	temp3 = z + FPR_t;
717                     
718                     temp2 = __FMADD( temp1, z, FPR_d2 );	FPR_u = FPR_t - temp3;
719                     
720                     FPR_s = FPR_u + z;				FPR_r = __FNMSUB( z, zTail, zTail );
721                     
722                     low = FPR_s + FPR_r;
723                     
724                     result = __FMADD( temp2, z2, low );
725              }
726              else if ( pT->F.i.hi != 0 ) 
727              {                                     // n != 0, and y not close to 1
728                     register double FPR_c2, FPR_c3, FPR_c4, FPR_c5, FPR_c6;
729                     
730                     FPR_c6 = c6;				FPR_c4 = c4;
731                     
732                     FPR_c5 = c5;				FPR_c3 = c3;
733                     
734                     temp3 = __FMADD( FPR_c6, z2, FPR_c4 );	temp2 = __FMADD( FPR_c5, z2, FPR_c3 );
735                     FPR_kConvDouble = kConvDouble.d;
736                     
737                     FPR_s = ln2Tail;
738                     n = nInHex.d; 				// float load double				
739                     n -= FPR_kConvDouble; 			// subtract magic value			
740                     
741                     __NOOP;				FPR_t = pT->F.d;
742                     low = __FMADD( n, FPR_s, zTail );		high = z + FPR_t;
743                     
744                     temp3 = __FMUL( temp3, z2 );		FPR_u = FPR_t - high;
745                     		
746                     temp2 = __FMADD( temp2, z, temp3 );	zTail = FPR_u + z;
747
748                     FPR_c2 = c2;
749                     temp2 = temp2 + FPR_c2;			temp1 = __FMADD( n, FPR_ln2, low );
750                     
751                     FPR_t = __FMSUB( n, FPR_ln2, temp1 );	temp3 = high + temp1;
752                     
753                     FPR_s = temp1 - temp3;			low = FPR_t + low;				
754                     
755                     temp1 = FPR_s + high;			FPR_r = __FMADD( temp2, z2, low );
756                     
757                     result = ( FPR_r + temp1 ) + zTail;
758              }
759              else 
760              {
761                     register double FPR_d2, FPR_d3, FPR_d4, FPR_d5, FPR_d6, FPR_d7, FPR_d8;
762                    
763                     FPR_d8 = d8;				FPR_d6 = d6;
764                     
765                     FPR_d7 = d7;				FPR_d5 = d5;
766                     
767                     temp1 = __FMADD( FPR_d8, z2, FPR_d6 );	temp2 = __FMADD( FPR_d7, z2, FPR_d5);
768                     FPR_kConvDouble = kConvDouble.d;	
769                                          
770                     FPR_t = ln2Tail;
771                     n = nInHex.d; 				
772                     n -= FPR_kConvDouble;			
773
774                     low = __FMADD( n, FPR_t, zTail );		__NOOP;
775                     FPR_d2 = d2;
776                     
777                     FPR_d4 = d4;				FPR_d3 = d3;
778
779                     temp1 = __FMADD( temp1, z2, FPR_d4 );	temp2 = __FMADD( temp2, z2, FPR_d3 );
780                     temp3 = __FMADD( n, FPR_ln2, low );	__NOOP;
781                                          				
782                     temp1 = __FMADD( temp1, z, temp2 );	FPR_s = __FMSUB( n, FPR_ln2, temp3 );
783                     
784                     temp2 = __FMADD( temp1, z, FPR_d2 );	low = FPR_s + low;
785
786                     low = low + yLow;
787                     result = __FMADD( temp2, z2, low ) + z;
788              }
789
790              result += temp3;
791            
792              FESETENVD( FPR_env );
793              __PROG_INEXACT( FPR_ln2 );
794            
795              return result;
796       }
797       
798       OldEnvironment.d = FPR_env;
799       if ( x == FPR_negOne ) 
800       {
801              OldEnvironment.i.lo |= FE_DIVBYZERO;
802              x = -infinity.d;
803       }
804       else if ( x < FPR_negOne )
805       {
806              OldEnvironment.i.lo |= SET_INVALID;
807              x = nan ( LOGORITHMIC_NAN );
808       }
809       
810       FESETENVD_GRP( OldEnvironment.d );
811       return x;
812}
813
814#else /* BUILDING_FOR_CARBONCORE_LEGACY */
815
816/*******************************************************************************
817*      Floating-point constants.                                               *
818*******************************************************************************/
819
820static const double log2e          = 1.4426950408889634e+0;     // head of log2  of e { 0x3ff71547, 0x652b82fe }
821static const double log2eTail      = 2.0355273740931033e-17;    // tail of log2  of e { 0x3c7777d0, 0xffda0d24 }
822
823/*******************************************************************************
824*                                                                              *
825*    The base 2 logorithm function.  Caller�s rounding direction is honored.   *
826*                                                                              *
827*******************************************************************************/
828double log2 ( double x ) 
829{
830       hexdouble yInHex, xInHex, OldEnvironment;
831       register double n, zTail, low, z, z2, temp1, temp2, temp3, result;
832       register uint32_t i;
833       struct logTableEntry *tablePointer = ( struct logTableEntry * ) logTable;
834       
835       register double FPR_env, FPR_z, FPR_half, FPR_one, FPR_twoTo128, FPR_ln2, FPR_kConvDouble;
836       register double FPR_r, FPR_s, FPR_t, FPR_u, FPR_y;
837       register struct logTableEntry *pT;
838       register int32_t nLong; // converted to double in two widely separated steps, avoiding LSU hazards
839       hexdouble nInHex;
840       
841       xInHex.d = x;						nInHex.i.hi = 0x43300000; // store upper half
842
843       FPR_z = 0.0;						FPR_half = 0.5;
844       FPR_one = 1.0;						FPR_u = 1.0;
845       FPR_twoTo128 = twoTo128;					FPR_ln2 = ln2;
846       
847       FEGETENVD( FPR_env );
848       __ENSURE( FPR_z, FPR_twoTo128, FPR_ln2 );		__ENSURE( FPR_u, FPR_half, FPR_one );		
849       FESETENVD( FPR_z );
850              
851       if ( FPR_z <= x && x < infinity.d ) 
852       {                                             // x is finite and non-negative
853              if ( x > largestDenorm )  
854              {                                      // x is normal
855                     i = xInHex.i.hi & 0x000fffff;
856                     yInHex.i.lo = xInHex.i.lo;
857                     yInHex.i.hi = i | 0x3ff00000;          // now 1.0 <= y < 2.0
858                     if ( yInHex.i.hi & 0x00080000 ) 
859                     {                               // 1.5 <= y < 2.0
860                            nLong = ((int32_t) ( xInHex.i.hi >> 20 ) - 1022);
861                            nLong ^= 0x80000000; // flip sign bit
862                            nInHex.i.lo = nLong; // store lower half
863                            i = ( i >> 13 ) & 0x3f;         // table lookup index
864                            FPR_u = FPR_half;
865                     }
866                     else 
867                     {                               // 1.0 <= y < 1.5
868                            nLong = ((int32_t) ( xInHex.i.hi >> 20 ) - 1023);
869                            nLong ^= 0x80000000; // flip sign bit
870                            nInHex.i.lo = nLong; // store lower half
871                            i = ( i >> 12 ) + 64;           // table lookup index
872                            // FPR_u = FPR_one; via initialization above
873                     }
874              }
875              else if ( x != FPR_z ) 
876              {                                      // x is nonzero, denormal
877                     xInHex.d = __FMUL( x, FPR_twoTo128 );
878                    __NOOP;
879                    __NOOP;
880                    __NOOP;
881                     i = xInHex.i.hi & 0x000fffff;
882                     yInHex.i.lo = xInHex.i.lo;
883                     yInHex.i.hi = i | 0x3ff00000;          // now 1.0 <= y < 2.0
884                     if ( yInHex.i.hi & 0x00080000 ) 
885                     {                               // 1.5 <= y < 2.0
886                            nLong = ((int32_t) ( xInHex.i.hi >> 20 ) - 1150);
887                            nLong ^= 0x80000000; // flip sign bit
888                            nInHex.i.lo = nLong; // store lower half
889                            i = ( i >> 13 ) & 0x3f;         // table lookup index
890                            FPR_u = FPR_half;
891                     }
892                     else 
893                     {                               // 1.0 <= y < 1.5
894                            nLong = ((int32_t) ( xInHex.i.hi >> 20 ) - 1151);
895                            nLong ^= 0x80000000; // flip sign bit
896                            nInHex.i.lo = nLong; // store lower half
897                            i = ( i >> 12 ) + 64;           // table lookup index
898                            // FPR_u = FPR_one; via initialization above
899                     }
900              }
901              else                                          // x is 0.0
902              {
903                     OldEnvironment.d = FPR_env;
904					 __NOOP;
905					 __NOOP;
906					 __NOOP;
907                     OldEnvironment.i.lo |= FE_DIVBYZERO;
908                     result = -infinity.d;
909                     FESETENVD_GRP( OldEnvironment.d );
910                     return result;
911              }
912              
913              {
914                register double FPR_d2, FPR_d3, FPR_d4, FPR_d5, FPR_d6, FPR_d7, FPR_d8;
915
916                FPR_d8 = d8;					FPR_d6 = d6;
917                
918                FPR_d7 = d7;					FPR_d5 = d5;
919
920                pT = &(tablePointer[(int32_t)i]);
921                FPR_r = pT->X;					FPR_t = pT->G;
922              
923                FPR_y = yInHex.d;					
924                FPR_s = __FMSUB( FPR_u, FPR_y, FPR_r );		FPR_kConvDouble = kConvDouble.d;
925              
926                z = __FMUL( FPR_s, FPR_t );			
927                FPR_u = __FNMSUB( z, FPR_r, FPR_s );		z2 = __FMUL( z, z );
928                zTail = __FMUL( FPR_u, FPR_t);
929              
930                
931                temp1 = __FMADD( FPR_d8, z2, FPR_d6 );		temp2 = __FMADD( FPR_d7, z2, FPR_d5);
932                FPR_d4 = d4;				
933                
934                FPR_d3 = d3;
935                temp1 = __FMADD( temp1, z2, FPR_d4 );		temp2 = __FMADD( temp2, z2, FPR_d3 );
936    
937                FPR_d2 = d2;				
938                FPR_t = pT->F.d;				n = nInHex.d;
939                
940                temp1 = __FMADD( temp1, z, temp2 );		temp3 = z + FPR_t;
941                
942                temp2 = __FMADD( temp1, z, FPR_d2 );		FPR_u = FPR_t - temp3;
943                
944                FPR_s = FPR_u + z;				FPR_r = __FNMSUB( z, zTail, zTail );
945                
946                low = FPR_s + FPR_r;
947    
948                FPR_r = __FMADD( temp2, z2, low );
949                FPR_s = log2e;					FPR_t = log2eTail;
950                
951                temp1 = FPR_r + temp3;				n -= FPR_kConvDouble;
952                temp2 = temp3 - temp1 + FPR_r;
953                FPR_u = __FMUL( temp1, FPR_t );		
954                temp3 = __FMADD( temp2, FPR_s, FPR_u );
955              }
956              
957              if ( (uint32_t)nLong == 0x80000000u /* iff n == 0.0 */ ) 
958              {
959                     result =  __FMADD( temp1, FPR_s, temp3 );
960                     
961                     FESETENVD( FPR_env );
962                     if ( FPR_y != FPR_one )
963                        __PROG_INEXACT( FPR_ln2 );
964                        
965                     return result;
966              }
967              else 
968              {                                      // n != 0                     			
969                     result = __FMADD( temp1, FPR_s, n);
970                     FPR_t =  n - result;
971                     temp3 = __FMADD( temp1, FPR_s, FPR_t ) + temp3;
972                     result += temp3;
973                     
974                     FESETENVD( FPR_env );
975                     if ( FPR_y != FPR_one )
976                        __PROG_INEXACT( FPR_ln2 );
977                        
978                     return result;
979              }
980       }
981       
982       OldEnvironment.d = FPR_env;
983       if ( x == FPR_z )
984       {
985              OldEnvironment.i.lo |= FE_DIVBYZERO;
986              x = -infinity.d;
987       }
988       else if ( x < FPR_z )
989       {                                           
990              OldEnvironment.i.lo |= SET_INVALID;
991              x = nan ( LOGORITHMIC_NAN );
992       }
993              
994       FESETENVD_GRP( OldEnvironment.d );
995       return x;
996       
997}
998
999#endif /* BUILDING_FOR_CARBONCORE_LEGACY */