/* * Copyright (c) 2002 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * * The contents of this file constitute Original Code as defined in and * are subject to the Apple Public Source License Version 1.1 (the * "License"). You may not use this file except in compliance with the * License. Please obtain a copy of the License at * http://www.apple.com/publicsource and read it before using this file. * * This Original Code and all software distributed under the License are * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the * License for the specific language governing rights and limitations * under the License. * * @APPLE_LICENSE_HEADER_END@ */ /******************************************************************************* * * * File power.c, * * Function pow ( x, y ), * * Implementation of x^y for the PowerPC. * * * * W A R N I N G: For double precision IEEE machines with MAF * * instructions. * * * * Copyright © 1991-1997 Apple Computer, Inc. All rights reserved. * * * * Written by Ali Sazegari. * * * * June 2 1997 first port of the ibm/taligent power. * * * *******************************************************************************/ #include "math.h" #include "fenv.h" #include "fp_private.h" #include "fenv_private.h" #define POWER_NAN "37" #define SET_INVALID 0x01000000 /******************************************************************************* * Floating-point constants. * *******************************************************************************/ static const double ln2 = 6.931471805599452862e-1; static const double ln2Tail = 2.319046813846299558e-17; static const double kRint = 6.7553994410557440000e+15; // 0x43380000, 0x00000000 static const double maxExp = 7.0978271289338397000e+02; // 0x40862e42, 0xfefa39ef static const double min2Exp = -7.4513321910194122000e+02; // 0xc0874910, 0xd52d3052 static const double denormal = 2.9387358770557188000e-39; // 0x37f00000, 0x00000000 static const double twoTo128 = 0x1.0p+128; // 3.402823669209384635e+38; static const double twoTo52 = 0x1.0p+52; // 4503599627370496.0; static const double oneOverLn2 = 1.4426950408889633000e+00; // 0x3ff71547, 0x652b82fe static const hexdouble huge = HEXDOUBLE(0x7ff00000, 0x00000000); /******************************************************************************* * Approximation coefficients for high precision exp and log. * *******************************************************************************/ static const double c2 = -0.5000000000000000042725; static const double c3 = 0.3333333333296328456505; static const double c4 = -0.2499999999949632195759; static const double c5 = 0.2000014541571685319141; static const double c6 = -0.1666681511199968257150; static const double d2 = -0.5000000000000000000000; // bfe0000000000000 static const double d3 = 0.3333333333333360968212; // 3FD5555555555587 static const double d4 = -0.2500000000000056849516; // BFD0000000000066 static const double d5 = 0.1999999996377879912068; // 3FC9999998D278CB static const double d6 = -0.1666666662009609743117; // BFC5555554554F15 static const double d7 = 0.1428690115662276259345; // 3FC249882224F72F static const double d8 = -0.1250122079043555957999; // BFC0006668466517 static const double cc4 = 0.5000000000000000000; static const double cc3 = 0.16666666666663809522820; static const double cc2 = 0.04166666666666111110939; static const double cc1 = 0.008333338095239329810170; static const double cc0 = 0.001388889583333492938381; /******************************************************************************* * Log and exp table entries definition. * *******************************************************************************/ extern uint32_t logTable[]; extern uint32_t expTable[]; struct logTableEntry { double X; double G; hexdouble F; }; struct expTableEntry { double x; double f; }; static double PowerInner ( double, double, hexdouble ); /******************************************************************************* * Private function _NearbyInt. * *******************************************************************************/ // If x is integral, returns x. // If x is not an integer, returns either floor(x) or ceil(x) (i.e. some vaule // different than x). On this basis, _NearbyInt can be used to detect integral x. // Arithmetic could raise INEXACT, so protect callers flags. static inline double _NearbyInt ( double x ) __attribute__((always_inline)); static inline double _NearbyInt ( double x ) { register double result, FPR_env, FPR_absx, FPR_Two52, FPR_z; FPR_absx = __FABS( x ); FPR_z = 0.0; FPR_Two52 = twoTo52; FEGETENVD( FPR_env ); __ENSURE( FPR_z, FPR_Two52, FPR_z ); if (likely( FPR_absx < FPR_Two52 )) // |x| < 2.0^52 { FESETENVD( FPR_z ); if ( x < FPR_z ) result = ( ( x - FPR_Two52 ) + FPR_Two52 ); else result = ( ( x + FPR_Two52 ) - FPR_Two52 ); FESETENVD( FPR_env ); return result; } else // |x| >= 2.0^52 return ( x ); } double pow ( double base, double exponent ) { register int isExpOddInt; double absBase, result; hexdouble u, v, OldEnvironment; register uint32_t expExp; register uint32_t GPR_mant, GPR_exp; register double FPR_z, FPR_half, FPR_one, FPR_t; v.d = exponent; u.d = base; GPR_mant = 0x000fffffu; GPR_exp = 0x7ff00000u; FPR_z = 0.0; FPR_one = 1.0; __ENSURE( FPR_z, FPR_z, FPR_one ); if ( ( ( v.i.hi & GPR_mant ) | v.i.lo ) == 0 ) { // exponent is power of 2 (or +-Inf) expExp = v.i.hi & 0xfff00000u; if ( expExp == 0x40000000 ) return base * base; // if exponent == 2.0 else if ( exponent == FPR_z ) return FPR_one; // if exponent == 0.0 else if ( expExp == 0x3ff00000 ) return base; // if exponent == 1.0 else if ( expExp == 0xbff00000 ) return FPR_one/base; // if exponent == -1.0 } if ( ( v.i.hi & GPR_exp ) < GPR_exp ) { // exponent is finite if ( ( u.i.hi & GPR_exp ) < GPR_exp ) { // base is finite if ( base > FPR_z ) return PowerInner( base, exponent, u ); // base is positive else if ( base < FPR_z ) { if ( _NearbyInt ( exponent ) != exponent ) // exponent is non-integer { FEGETENVD_GRP( OldEnvironment.d ); OldEnvironment.i.lo |= SET_INVALID; result = nan ( POWER_NAN ); FESETENVD_GRP( OldEnvironment.d ); return result; } FPR_half = 0.5; u.i.hi ^= 0x80000000; result = PowerInner( -base, exponent, u ); FPR_t = FPR_half * exponent; // XXX undeserved UF INEXACT for tiny exponent XXX if ( _NearbyInt ( FPR_t ) != FPR_t ) // exponent is odd result = - result; return ( result ); } else { // base is 0.0: if ( _NearbyInt ( exponent ) == exponent ) // exponent is integral { FPR_half = 0.5; FPR_t = FPR_half * exponent; // XXX undeserved UF INEXACT for tiny exponent XXX isExpOddInt = ( _NearbyInt ( FPR_t ) != FPR_t ); } else isExpOddInt = 0; if ( exponent > FPR_z ) return ( ( isExpOddInt ) ? base : FPR_z ); else // exponent < 0.0 return ( ( isExpOddInt ) ? FPR_one/base : FPR_one/__FABS( base ) ); } } else if (base != base) return base; else { // base == +-Inf if ( base > FPR_z ) return ( exponent > FPR_z ) ? huge.d : FPR_z; else { if ( _NearbyInt ( exponent ) == exponent ) // exponent is integral { FPR_half = 0.5; FPR_t = FPR_half * exponent; // XXX undeserved UF INEXACT for tiny exponent XXX isExpOddInt = ( _NearbyInt ( FPR_t ) != FPR_t ); } else isExpOddInt = 0; return ( exponent > 0 ) ? ( isExpOddInt ? -huge.d : +huge.d ) : ( isExpOddInt ? -FPR_z : FPR_z ); } } } else if ( exponent != exponent ) return (base == 1.0 ? base : base + exponent); else { // exponent is +-Inf if ( base != base ) return base; absBase = __FABS( base ); if ( ( exponent > FPR_z && absBase > FPR_one ) || ( exponent < FPR_z && absBase < FPR_one ) ) return huge.d; else if ( ( exponent > FPR_z && absBase < FPR_one ) || ( exponent < FPR_z && absBase > FPR_one ) ) return FPR_z; else // now: Abs( base ) == 1.0 return FPR_one; } } // Called when our environment is installed (and callers is stashed safely aside). static inline double _NearbyIntDfltEnv ( double x ) __attribute__((always_inline)); static inline double _NearbyIntDfltEnv ( double x ) { register double result, FPR_absx, FPR_Two52, FPR_z; FPR_absx = __FABS( x ); FPR_z = 0.0; FPR_Two52 = twoTo52; __ENSURE( FPR_z, FPR_Two52, FPR_z ); if (likely( FPR_absx < FPR_Two52 )) // |x| < 2.0^52 { if ( x < FPR_z ) result = ( ( x - FPR_Two52 ) + FPR_Two52 ); else result = ( ( x + FPR_Two52 ) - FPR_Two52 ); return result; } else // |x| >= 2.0^52 return ( x ); } static const double kMinNormal = 0x1.0p-1022; // 2.2250738585072014e-308; static const double kMaxNormal = 1.7976931348623157e308; static const hexdouble kConvULDouble = HEXDOUBLE(0x43300000, 0x00000000); // for *unsigned* to double static double PowerInner ( double base, double exponent, hexdouble u ) { hexdouble dInHex, scale, exp; register int32_t i; register int n; register double z, zLow, high, low, zSquared, temp1, temp2, temp3, result, tail, d, x, xLow, y, yLow, power; struct logTableEntry *logTablePointer = ( struct logTableEntry * ) logTable; struct expTableEntry *expTablePointer = ( struct expTableEntry * ) expTable + 177; register double FPR_env, FPR_z, FPR_half, FPR_one, FPR_512, FPR_t, FPR_G, FPR_X, FPR_ud; register double FPR_q, FPR_kConvDouble; register struct logTableEntry *pT; register struct expTableEntry *pE; FPR_z = 0.0; FPR_one = 1.0; FPR_half = 0.5; FPR_512 = 512.0; FPR_kConvDouble = kConvULDouble.d; dInHex.i.hi = 0x43300000; // store upper half if (unlikely( base == FPR_one )) return FPR_one; if (likely( u.i.hi >= 0x000fffffu )) // normal case { FPR_q = 1023.0; } else { // denormal case u.d = __FMUL( base, twoTo128 ); // scale to normal range FPR_q = 1151.0; __NOOP; __NOOP; } dInHex.i.lo = u.i.hi >> 20; // store lower half i = u.i.hi & 0x000fffff; u.i.hi = i | 0x3ff00000; FEGETENVD( FPR_env); // save environment, set default __ENSURE( FPR_z, FPR_half, FPR_512 ); __ENSURE( FPR_z, FPR_half, FPR_one ); FESETENVD( FPR_z ); // Handle exact integral powers and roots of two specially if ( 0 == (( u.i.hi & 0x000fffff ) | u.i.lo) ) // base is power of 2 { d = dInHex.d; // float load double d -= FPR_kConvDouble; // subtract magic value d -= FPR_q; z = __FMUL( d, exponent ); if ( z == _NearbyIntDfltEnv( z ) ) { // Clip z so conversion to int is safe if (z > 2097.0) n = 2098; else if (z < -2098.0) n = -2099; else { n = (int) z; // The common case. Emits fctiwz, stfd } FESETENVD( FPR_env ); return scalbn( FPR_one, n ); // lwz "n" -- make sure this is on cycle following stfd! } } if ( u.i.hi & 0x00080000 ) { // 1.5 <= x < 2.0 n = 1; i = ( i >> 13 ) & 0x3f; // table lookup index FPR_ud = __FMUL( u.d, FPR_half ); u.d = FPR_ud; } else { // 1.0 <= x < 1.5 n = 0; i = ( i >> 12 ) + 64; // table lookup index FPR_ud = u.d; } pT = &(logTablePointer[i]); FPR_X = pT->X; FPR_G = pT->G; FPR_t = FPR_ud - FPR_X; z = __FMUL( FPR_t, FPR_G ); zSquared = __FMUL( z, z ); FPR_t = __FNMSUB( z, FPR_X, FPR_t ); zLow = __FMUL( FPR_t, FPR_G ); if ( n == 0 ) { // n = 0 register double FPR_Fd, FPR_d2, FPR_d3, FPR_d4, FPR_d5, FPR_d6, FPR_d7, FPR_d8; FPR_d8 = d8; FPR_d6 = d6; FPR_d7 = d7; FPR_d5 = d5; FPR_Fd = pT->F.d; temp1 = __FMADD( FPR_d8, zSquared, FPR_d6); temp2 = __FMADD( FPR_d7, zSquared, FPR_d5); FPR_d4 = d4; FPR_d3 = d3; temp3 = z + FPR_Fd; FPR_t = __FNMSUB( z, zLow, zLow ); temp1 = __FMADD( temp1, zSquared, FPR_d4); temp2 = __FMADD( temp2, zSquared, FPR_d3); FPR_d2 = d2; temp1 = __FMADD( temp1, z, temp2); low = FPR_Fd - temp3 + z + FPR_t; temp2 = __FMADD( temp1, z, FPR_d2); FPR_t = __FMADD( temp2, zSquared, low ); result = FPR_t + temp3; tail = temp3 - result + FPR_t; } else if ( pT->F.i.hi != 0 ) { // n = 1 register double FPR_Fd, FPR_ln2, FPR_ln2Tail, FPR_c2, FPR_c3, FPR_c4, FPR_c5, FPR_c6; FPR_c6 = c6; FPR_c4 = c4; FPR_c5 = c5; FPR_c3 = c3; temp3 = __FMADD( FPR_c6, zSquared, FPR_c4 ); temp2 = __FMADD( FPR_c5, zSquared, FPR_c3); FPR_ln2Tail = ln2Tail; FPR_ln2 = ln2; FPR_Fd = pT->F.d; low = FPR_ln2Tail + zLow; high = z + FPR_Fd; temp3 = __FMUL( temp3, zSquared ); FPR_c2 = c2; zLow = FPR_Fd - high + z; temp1 = FPR_ln2 + low; temp2 = __FMADD( temp2, z, temp3 ) + FPR_c2; low = ( FPR_ln2 - temp1 ) + low; temp3 = high + temp1; temp1 = ( temp1 - temp3 ) + high; FPR_t = __FMADD( temp2, zSquared, low ); result = ( ( FPR_t + temp1 ) + zLow ) + temp3; tail = temp3 - result + zLow + temp1 + FPR_t; } else { // n = 1. register double FPR_ln2, FPR_ln2Tail, FPR_d2, FPR_d3, FPR_d4, FPR_d5, FPR_d6, FPR_d7, FPR_d8; FPR_d8 = d8; FPR_d6 = d6; FPR_d7 = d7; FPR_d5 = d5; temp1 = __FMADD( FPR_d8, zSquared, FPR_d6); temp2 = __FMADD( FPR_d7, zSquared, FPR_d5); FPR_ln2Tail = ln2Tail; FPR_ln2 = ln2; low = FPR_ln2Tail + zLow; __NOOP; FPR_d4 = d4; FPR_d3 = d3; temp1 = __FMADD( temp1, zSquared, FPR_d4); temp2 = __FMADD( temp2, zSquared, FPR_d3); FPR_d2 = d2; temp1 = __FMADD( temp1, z, temp2); temp3 = FPR_ln2 + low; temp2 = __FMADD( temp1, z, FPR_d2); low = ( FPR_ln2 - temp3 ) + low; FPR_t = __FMADD( temp2, zSquared, low ); result = ( FPR_t + z ) + temp3; tail = temp3 - result + z + FPR_t; } { register double FPR_ln2, FPR_ln2Tail; d = dInHex.d; // float load double d -= FPR_kConvDouble; // subtract magic value d -= FPR_q; FPR_ln2 = ln2; FPR_ln2Tail = ln2Tail; temp1 = __FMUL( d, FPR_ln2 ); temp2 = __FMSUB( d, FPR_ln2, temp1 ); z = temp1 + result; FPR_t = tail + temp2; zLow = temp1 - z + result + __FMADD(d, FPR_ln2Tail, FPR_t); temp3 = exponent * zLow; x = __FMADD( exponent, z, temp3 ); xLow = __FMSUB( exponent, z, x ) + temp3; } if ( __FABS( x ) < FPR_512 ) { // abs( x ) < 512 register double FPR_ln2, FPR_ln2Tail, FPR_ln2Inv, FPR_kRint, FPR_f; register double FPR_cc0, FPR_cc1, FPR_cc2, FPR_cc3, FPR_cc4; FPR_ln2 = ln2; FPR_ln2Tail = ln2Tail; FPR_ln2Inv = oneOverLn2; FPR_kRint = kRint; FPR_t = __FMADD( x, FPR_ln2Inv, FPR_kRint ); exp.d = FPR_t; FPR_t -= FPR_kRint; y = __FNMSUB( FPR_ln2, FPR_t, x); yLow = __FMUL( FPR_ln2Tail, FPR_t ); u.d = __FMADD( FPR_512, y, FPR_kRint ); scale.i.lo = 0; scale.i.hi = ( exp.i.lo + 1023 ) << 20; FPR_cc0 = cc0; FPR_cc2 = cc2; FPR_cc1 = cc1; FPR_cc3 = cc3; FPR_cc4 = cc4; i = u.i.lo; pE = &(expTablePointer[(int32_t)i]); d = y - pE->x; z = d - yLow; zSquared = __FMUL( z, z ); zLow = d - z - yLow + xLow; FPR_t = scale.d; FPR_f = pE->f; temp1 = __FMADD( FPR_cc0, zSquared, FPR_cc2 ); temp2 = __FMADD( FPR_cc1, zSquared, FPR_cc3 ); temp1 = __FMADD( temp1, zSquared, FPR_cc4 ); temp2 = __FMADD( temp2, z, temp1 ); temp1 = __FMADD( temp2, zSquared, z ); result = __FMUL( FPR_t, FPR_f ); temp2 = __FMUL( result , ( temp1 + ( zLow + zLow * temp1 ) )); // __FMUL thwarts gcc MAF result += temp2; if ( exponent > FPR_z && base == _NearbyIntDfltEnv(base) && exponent == _NearbyIntDfltEnv(exponent)) { FESETENVD( FPR_env ); /* NOTHING: +ve integral base and +ve integral exponent deliver exact result */ } else { FESETENVD( FPR_env ); __PROG_INEXACT ( FPR_ln2 ); } return result; } if ( ( x <= maxExp ) && ( x > min2Exp ) ) { register double FPR_ln2, FPR_ln2Tail, FPR_ln2Inv, FPR_kRint, FPR_f; register double FPR_cc0, FPR_cc1, FPR_cc2, FPR_cc3, FPR_cc4; FPR_ln2 = ln2; FPR_ln2Tail = ln2Tail; FPR_ln2Inv = oneOverLn2; FPR_kRint = kRint; FPR_t = __FMADD( x, FPR_ln2Inv, FPR_kRint ); exp.d = FPR_t; if ( x >= FPR_512 ) { power = 2.0; scale.i.lo = 0; scale.i.hi = ( exp.i.lo + 1023 - 1 ) << 20; } else { power = denormal; scale.i.lo = 0; scale.i.hi = ( exp.i.lo + 1023 + 128 ) << 20; } FPR_t -= FPR_kRint; exp.d = FPR_t; y = __FNMSUB( FPR_ln2, FPR_t, x); yLow = __FMUL( FPR_ln2Tail, FPR_t ); u.d = __FMADD( FPR_512, y, FPR_kRint ); FPR_cc0 = cc0; FPR_cc2 = cc2; FPR_cc1 = cc1; FPR_cc3 = cc3; FPR_cc4 = cc4; i = u.i.lo; pE = &(expTablePointer[(int32_t)i]); d = y - pE->x; z = d - yLow; zSquared = __FMUL( z, z ); zLow = d - z - yLow + xLow; FPR_t = scale.d; FPR_f = pE->f; temp1 = __FMADD( FPR_cc0, zSquared, FPR_cc2 ); temp2 = __FMADD( FPR_cc1, zSquared, FPR_cc3 ); temp1 = __FMADD( temp1, zSquared, FPR_cc4 ); temp2 = __FMADD( temp2, z, temp1 ); temp1 = __FMADD( temp2, zSquared, z ); result = __FMUL( FPR_t, FPR_f ); temp2 = __FMUL( result , ( temp1 + ( zLow + zLow * temp1 ) ) ); // __FMUL thwarts gcc MAF result = ( result + temp2 ) * power; } else if ( x > maxExp ) result = huge.d; else result = FPR_z; FESETENVD( FPR_env ); if ( __FABS( result ) == huge.d ) __PROG_OF_INEXACT( kMaxNormal ); // Inf: set overflow/inexact flag else if ( result == FPR_z ) __PROG_UF_INEXACT( kMinNormal ); // zero: set underflow/inexact flag else __PROG_INEXACT ( ln2 ); return result; }