this repo has no description
1/*
2 * Copyright (c) 2002 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
11 *
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22/*******************************************************************************
23* *
24* File power.c, *
25* Function pow ( x, y ), *
26* Implementation of x^y for the PowerPC. *
27* *
28* W A R N I N G: For double precision IEEE machines with MAF *
29* instructions. *
30* *
31* Copyright � 1991-1997 Apple Computer, Inc. All rights reserved. *
32* *
33* Written by Ali Sazegari. *
34* *
35* June 2 1997 first port of the ibm/taligent power. *
36* *
37*******************************************************************************/
38
39#include "math.h"
40#include "fenv.h"
41#include "fp_private.h"
42#include "fenv_private.h"
43
44#define POWER_NAN "37"
45#define SET_INVALID 0x01000000
46
47/*******************************************************************************
48* Floating-point constants. *
49*******************************************************************************/
50
51static const double ln2 = 6.931471805599452862e-1;
52static const double ln2Tail = 2.319046813846299558e-17;
53static const double kRint = 6.7553994410557440000e+15; // 0x43380000, 0x00000000
54static const double maxExp = 7.0978271289338397000e+02; // 0x40862e42, 0xfefa39ef
55static const double min2Exp = -7.4513321910194122000e+02; // 0xc0874910, 0xd52d3052
56static const double denormal = 2.9387358770557188000e-39; // 0x37f00000, 0x00000000
57static const double twoTo128 = 0x1.0p+128; // 3.402823669209384635e+38;
58static const double twoTo52 = 0x1.0p+52; // 4503599627370496.0;
59static const double oneOverLn2 = 1.4426950408889633000e+00; // 0x3ff71547, 0x652b82fe
60static const hexdouble huge = HEXDOUBLE(0x7ff00000, 0x00000000);
61
62/*******************************************************************************
63* Approximation coefficients for high precision exp and log. *
64*******************************************************************************/
65
66static const double c2 = -0.5000000000000000042725;
67static const double c3 = 0.3333333333296328456505;
68static const double c4 = -0.2499999999949632195759;
69static const double c5 = 0.2000014541571685319141;
70static const double c6 = -0.1666681511199968257150;
71
72static const double d2 = -0.5000000000000000000000; // bfe0000000000000
73static const double d3 = 0.3333333333333360968212; // 3FD5555555555587
74static const double d4 = -0.2500000000000056849516; // BFD0000000000066
75static const double d5 = 0.1999999996377879912068; // 3FC9999998D278CB
76static const double d6 = -0.1666666662009609743117; // BFC5555554554F15
77static const double d7 = 0.1428690115662276259345; // 3FC249882224F72F
78static const double d8 = -0.1250122079043555957999; // BFC0006668466517
79
80static const double cc4 = 0.5000000000000000000;
81static const double cc3 = 0.16666666666663809522820;
82static const double cc2 = 0.04166666666666111110939;
83static const double cc1 = 0.008333338095239329810170;
84static const double cc0 = 0.001388889583333492938381;
85
86/*******************************************************************************
87* Log and exp table entries definition. *
88*******************************************************************************/
89
90extern uint32_t logTable[];
91extern uint32_t expTable[];
92
93struct logTableEntry
94 {
95 double X;
96 double G;
97 hexdouble F;
98 };
99
100struct expTableEntry
101 {
102 double x;
103 double f;
104 };
105
106static double PowerInner ( double, double, hexdouble );
107
108/*******************************************************************************
109* Private function _NearbyInt. *
110*******************************************************************************/
111
112// If x is integral, returns x.
113// If x is not an integer, returns either floor(x) or ceil(x) (i.e. some vaule
114// different than x). On this basis, _NearbyInt can be used to detect integral x.
115// Arithmetic could raise INEXACT, so protect callers flags.
116
117static inline double _NearbyInt ( double x ) __attribute__((always_inline));
118static inline double _NearbyInt ( double x )
119{
120 register double result, FPR_env, FPR_absx, FPR_Two52, FPR_z;
121
122 FPR_absx = __FABS( x );
123 FPR_z = 0.0; FPR_Two52 = twoTo52;
124
125 FEGETENVD( FPR_env );
126 __ENSURE( FPR_z, FPR_Two52, FPR_z );
127
128 if (likely( FPR_absx < FPR_Two52 )) // |x| < 2.0^52
129 {
130 FESETENVD( FPR_z );
131 if ( x < FPR_z )
132 result = ( ( x - FPR_Two52 ) + FPR_Two52 );
133 else
134 result = ( ( x + FPR_Two52 ) - FPR_Two52 );
135 FESETENVD( FPR_env );
136 return result;
137 }
138 else // |x| >= 2.0^52
139 return ( x );
140}
141
142double pow ( double base, double exponent )
143{
144 register int isExpOddInt;
145 double absBase, result;
146 hexdouble u, v, OldEnvironment;
147 register uint32_t expExp;
148
149 register uint32_t GPR_mant, GPR_exp;
150 register double FPR_z, FPR_half, FPR_one, FPR_t;
151
152 v.d = exponent; u.d = base;
153 GPR_mant = 0x000fffffu; GPR_exp = 0x7ff00000u;
154 FPR_z = 0.0; FPR_one = 1.0;
155 __ENSURE( FPR_z, FPR_z, FPR_one );
156
157 if ( ( ( v.i.hi & GPR_mant ) | v.i.lo ) == 0 )
158 { // exponent is power of 2 (or +-Inf)
159 expExp = v.i.hi & 0xfff00000u;
160 if ( expExp == 0x40000000 )
161 return base * base; // if exponent == 2.0
162 else if ( exponent == FPR_z )
163 return FPR_one; // if exponent == 0.0
164 else if ( expExp == 0x3ff00000 )
165 return base; // if exponent == 1.0
166 else if ( expExp == 0xbff00000 )
167 return FPR_one/base; // if exponent == -1.0
168 }
169
170 if ( ( v.i.hi & GPR_exp ) < GPR_exp )
171 { // exponent is finite
172 if ( ( u.i.hi & GPR_exp ) < GPR_exp )
173 { // base is finite
174 if ( base > FPR_z )
175 return PowerInner( base, exponent, u ); // base is positive
176 else if ( base < FPR_z )
177 {
178 if ( _NearbyInt ( exponent ) != exponent ) // exponent is non-integer
179 {
180 FEGETENVD_GRP( OldEnvironment.d );
181 OldEnvironment.i.lo |= SET_INVALID;
182 result = nan ( POWER_NAN );
183 FESETENVD_GRP( OldEnvironment.d );
184 return result;
185 }
186
187 FPR_half = 0.5;
188 u.i.hi ^= 0x80000000;
189 result = PowerInner( -base, exponent, u );
190 FPR_t = FPR_half * exponent; // XXX undeserved UF INEXACT for tiny exponent XXX
191 if ( _NearbyInt ( FPR_t ) != FPR_t ) // exponent is odd
192 result = - result;
193 return ( result );
194 }
195 else
196 { // base is 0.0:
197 if ( _NearbyInt ( exponent ) == exponent ) // exponent is integral
198 {
199 FPR_half = 0.5;
200 FPR_t = FPR_half * exponent; // XXX undeserved UF INEXACT for tiny exponent XXX
201 isExpOddInt = ( _NearbyInt ( FPR_t ) != FPR_t );
202 }
203 else
204 isExpOddInt = 0;
205
206 if ( exponent > FPR_z )
207 return ( ( isExpOddInt ) ? base : FPR_z );
208 else // exponent < 0.0
209 return ( ( isExpOddInt ) ? FPR_one/base : FPR_one/__FABS( base ) );
210 }
211 }
212 else if (base != base)
213 return base;
214 else
215 { // base == +-Inf
216 if ( base > FPR_z )
217 return ( exponent > FPR_z ) ? huge.d : FPR_z;
218 else
219 {
220 if ( _NearbyInt ( exponent ) == exponent ) // exponent is integral
221 {
222 FPR_half = 0.5;
223 FPR_t = FPR_half * exponent; // XXX undeserved UF INEXACT for tiny exponent XXX
224 isExpOddInt = ( _NearbyInt ( FPR_t ) != FPR_t );
225 }
226 else
227 isExpOddInt = 0;
228 return ( exponent > 0 ) ?
229 ( isExpOddInt ? -huge.d : +huge.d ) : ( isExpOddInt ? -FPR_z : FPR_z );
230 }
231 }
232 }
233 else if ( exponent != exponent )
234 return (base == 1.0 ? base : base + exponent);
235 else
236 { // exponent is +-Inf
237 if ( base != base )
238 return base;
239 absBase = __FABS( base );
240 if ( ( exponent > FPR_z && absBase > FPR_one ) || ( exponent < FPR_z && absBase < FPR_one ) )
241 return huge.d;
242 else if ( ( exponent > FPR_z && absBase < FPR_one ) || ( exponent < FPR_z && absBase > FPR_one ) )
243 return FPR_z;
244 else // now: Abs( base ) == 1.0
245 return FPR_one;
246 }
247}
248
249// Called when our environment is installed (and callers is stashed safely aside).
250static inline double _NearbyIntDfltEnv ( double x ) __attribute__((always_inline));
251static inline double _NearbyIntDfltEnv ( double x )
252{
253 register double result, FPR_absx, FPR_Two52, FPR_z;
254
255 FPR_absx = __FABS( x );
256 FPR_z = 0.0; FPR_Two52 = twoTo52;
257
258 __ENSURE( FPR_z, FPR_Two52, FPR_z );
259
260 if (likely( FPR_absx < FPR_Two52 )) // |x| < 2.0^52
261 {
262 if ( x < FPR_z )
263 result = ( ( x - FPR_Two52 ) + FPR_Two52 );
264 else
265 result = ( ( x + FPR_Two52 ) - FPR_Two52 );
266 return result;
267 }
268 else // |x| >= 2.0^52
269 return ( x );
270}
271
272static const double kMinNormal = 0x1.0p-1022; // 2.2250738585072014e-308;
273static const double kMaxNormal = 1.7976931348623157e308;
274static const hexdouble kConvULDouble = HEXDOUBLE(0x43300000, 0x00000000); // for *unsigned* to double
275
276static double PowerInner ( double base, double exponent, hexdouble u )
277{
278 hexdouble dInHex, scale, exp;
279 register int32_t i;
280 register int n;
281 register double z, zLow, high, low, zSquared, temp1, temp2, temp3, result, tail,
282 d, x, xLow, y, yLow, power;
283 struct logTableEntry *logTablePointer = ( struct logTableEntry * ) logTable;
284 struct expTableEntry *expTablePointer = ( struct expTableEntry * ) expTable + 177;
285
286 register double FPR_env, FPR_z, FPR_half, FPR_one, FPR_512, FPR_t, FPR_G, FPR_X, FPR_ud;
287 register double FPR_q, FPR_kConvDouble;
288 register struct logTableEntry *pT;
289 register struct expTableEntry *pE;
290
291 FPR_z = 0.0; FPR_one = 1.0;
292 FPR_half = 0.5; FPR_512 = 512.0;
293 FPR_kConvDouble = kConvULDouble.d; dInHex.i.hi = 0x43300000; // store upper half
294
295 if (unlikely( base == FPR_one ))
296 return FPR_one;
297
298 if (likely( u.i.hi >= 0x000fffffu )) // normal case
299 {
300 FPR_q = 1023.0;
301 }
302 else
303 { // denormal case
304 u.d = __FMUL( base, twoTo128 ); // scale to normal range
305 FPR_q = 1151.0;
306 __NOOP;
307 __NOOP;
308 }
309
310 dInHex.i.lo = u.i.hi >> 20; // store lower half
311
312 i = u.i.hi & 0x000fffff;
313 u.i.hi = i | 0x3ff00000;
314
315 FEGETENVD( FPR_env); // save environment, set default
316 __ENSURE( FPR_z, FPR_half, FPR_512 ); __ENSURE( FPR_z, FPR_half, FPR_one );
317 FESETENVD( FPR_z );
318
319 // Handle exact integral powers and roots of two specially
320 if ( 0 == (( u.i.hi & 0x000fffff ) | u.i.lo) ) // base is power of 2
321 {
322 d = dInHex.d; // float load double
323 d -= FPR_kConvDouble; // subtract magic value
324 d -= FPR_q;
325
326 z = __FMUL( d, exponent );
327 if ( z == _NearbyIntDfltEnv( z ) )
328 {
329 // Clip z so conversion to int is safe
330 if (z > 2097.0)
331 n = 2098;
332 else if (z < -2098.0)
333 n = -2099;
334 else
335 {
336 n = (int) z; // The common case. Emits fctiwz, stfd
337 }
338
339 FESETENVD( FPR_env );
340 return scalbn( FPR_one, n ); // lwz "n" -- make sure this is on cycle following stfd!
341 }
342 }
343
344 if ( u.i.hi & 0x00080000 )
345 { // 1.5 <= x < 2.0
346 n = 1;
347 i = ( i >> 13 ) & 0x3f; // table lookup index
348 FPR_ud = __FMUL( u.d, FPR_half );
349 u.d = FPR_ud;
350 }
351 else
352 { // 1.0 <= x < 1.5
353 n = 0;
354 i = ( i >> 12 ) + 64; // table lookup index
355 FPR_ud = u.d;
356 }
357
358 pT = &(logTablePointer[i]);
359
360 FPR_X = pT->X; FPR_G = pT->G;
361 FPR_t = FPR_ud - FPR_X;
362
363 z = __FMUL( FPR_t, FPR_G );
364
365 zSquared = __FMUL( z, z ); FPR_t = __FNMSUB( z, FPR_X, FPR_t );
366 zLow = __FMUL( FPR_t, FPR_G );
367
368
369 if ( n == 0 )
370 { // n = 0
371 register double FPR_Fd, FPR_d2, FPR_d3, FPR_d4, FPR_d5, FPR_d6, FPR_d7, FPR_d8;
372
373 FPR_d8 = d8; FPR_d6 = d6;
374
375 FPR_d7 = d7; FPR_d5 = d5;
376
377 FPR_Fd = pT->F.d;
378
379 temp1 = __FMADD( FPR_d8, zSquared, FPR_d6); temp2 = __FMADD( FPR_d7, zSquared, FPR_d5);
380 FPR_d4 = d4;
381
382 FPR_d3 = d3;
383 temp3 = z + FPR_Fd; FPR_t = __FNMSUB( z, zLow, zLow );
384
385 temp1 = __FMADD( temp1, zSquared, FPR_d4); temp2 = __FMADD( temp2, zSquared, FPR_d3);
386 FPR_d2 = d2;
387
388 temp1 = __FMADD( temp1, z, temp2); low = FPR_Fd - temp3 + z + FPR_t;
389
390 temp2 = __FMADD( temp1, z, FPR_d2);
391
392 FPR_t = __FMADD( temp2, zSquared, low );
393
394 result = FPR_t + temp3;
395
396 tail = temp3 - result + FPR_t;
397 }
398 else if ( pT->F.i.hi != 0 )
399 { // n = 1
400 register double FPR_Fd, FPR_ln2, FPR_ln2Tail, FPR_c2, FPR_c3, FPR_c4, FPR_c5, FPR_c6;
401
402 FPR_c6 = c6; FPR_c4 = c4;
403
404 FPR_c5 = c5; FPR_c3 = c3;
405
406 temp3 = __FMADD( FPR_c6, zSquared, FPR_c4 ); temp2 = __FMADD( FPR_c5, zSquared, FPR_c3);
407 FPR_ln2Tail = ln2Tail;
408
409 FPR_ln2 = ln2; FPR_Fd = pT->F.d;
410 low = FPR_ln2Tail + zLow;
411
412 high = z + FPR_Fd; temp3 = __FMUL( temp3, zSquared );
413 FPR_c2 = c2;
414
415 zLow = FPR_Fd - high + z; temp1 = FPR_ln2 + low;
416
417 temp2 = __FMADD( temp2, z, temp3 ) + FPR_c2; low = ( FPR_ln2 - temp1 ) + low;
418
419 temp3 = high + temp1;
420
421 temp1 = ( temp1 - temp3 ) + high;
422
423 FPR_t = __FMADD( temp2, zSquared, low );
424
425 result = ( ( FPR_t + temp1 ) + zLow ) + temp3;
426
427 tail = temp3 - result + zLow + temp1 + FPR_t;
428 }
429 else
430 { // n = 1.
431 register double FPR_ln2, FPR_ln2Tail, FPR_d2, FPR_d3, FPR_d4, FPR_d5, FPR_d6, FPR_d7, FPR_d8;
432
433 FPR_d8 = d8; FPR_d6 = d6;
434
435 FPR_d7 = d7; FPR_d5 = d5;
436
437 temp1 = __FMADD( FPR_d8, zSquared, FPR_d6); temp2 = __FMADD( FPR_d7, zSquared, FPR_d5);
438 FPR_ln2Tail = ln2Tail;
439
440 FPR_ln2 = ln2;
441 low = FPR_ln2Tail + zLow; __NOOP;
442
443 FPR_d4 = d4; FPR_d3 = d3;
444
445 temp1 = __FMADD( temp1, zSquared, FPR_d4); temp2 = __FMADD( temp2, zSquared, FPR_d3);
446 FPR_d2 = d2;
447
448 temp1 = __FMADD( temp1, z, temp2); temp3 = FPR_ln2 + low;
449
450 temp2 = __FMADD( temp1, z, FPR_d2); low = ( FPR_ln2 - temp3 ) + low;
451
452 FPR_t = __FMADD( temp2, zSquared, low );
453
454 result = ( FPR_t + z ) + temp3;
455
456 tail = temp3 - result + z + FPR_t;
457 }
458
459 {
460 register double FPR_ln2, FPR_ln2Tail;
461
462 d = dInHex.d; // float load double
463 d -= FPR_kConvDouble; // subtract magic value
464 d -= FPR_q;
465
466 FPR_ln2 = ln2; FPR_ln2Tail = ln2Tail;
467
468 temp1 = __FMUL( d, FPR_ln2 );
469
470 temp2 = __FMSUB( d, FPR_ln2, temp1 ); z = temp1 + result;
471
472 FPR_t = tail + temp2;
473
474 zLow = temp1 - z + result + __FMADD(d, FPR_ln2Tail, FPR_t);
475
476 temp3 = exponent * zLow;
477
478 x = __FMADD( exponent, z, temp3 );
479
480 xLow = __FMSUB( exponent, z, x ) + temp3;
481 }
482
483 if ( __FABS( x ) < FPR_512 )
484 { // abs( x ) < 512
485 register double FPR_ln2, FPR_ln2Tail, FPR_ln2Inv, FPR_kRint, FPR_f;
486 register double FPR_cc0, FPR_cc1, FPR_cc2, FPR_cc3, FPR_cc4;
487
488 FPR_ln2 = ln2; FPR_ln2Tail = ln2Tail;
489 FPR_ln2Inv = oneOverLn2; FPR_kRint = kRint;
490
491 FPR_t = __FMADD( x, FPR_ln2Inv, FPR_kRint );
492 exp.d = FPR_t;
493
494 FPR_t -= FPR_kRint;
495
496 y = __FNMSUB( FPR_ln2, FPR_t, x); yLow = __FMUL( FPR_ln2Tail, FPR_t );
497
498 u.d = __FMADD( FPR_512, y, FPR_kRint );
499
500 scale.i.lo = 0;
501 scale.i.hi = ( exp.i.lo + 1023 ) << 20;
502
503 FPR_cc0 = cc0; FPR_cc2 = cc2;
504 FPR_cc1 = cc1; FPR_cc3 = cc3;
505 FPR_cc4 = cc4;
506
507 i = u.i.lo;
508
509 pE = &(expTablePointer[(int32_t)i]);
510
511 d = y - pE->x;
512 z = d - yLow;
513
514 zSquared = __FMUL( z, z ); zLow = d - z - yLow + xLow;
515
516 FPR_t = scale.d; FPR_f = pE->f;
517
518 temp1 = __FMADD( FPR_cc0, zSquared, FPR_cc2 ); temp2 = __FMADD( FPR_cc1, zSquared, FPR_cc3 );
519
520 temp1 = __FMADD( temp1, zSquared, FPR_cc4 );
521
522 temp2 = __FMADD( temp2, z, temp1 );
523
524 temp1 = __FMADD( temp2, zSquared, z ); result = __FMUL( FPR_t, FPR_f );
525
526 temp2 = __FMUL( result , ( temp1 + ( zLow + zLow * temp1 ) )); // __FMUL thwarts gcc MAF
527
528 result += temp2;
529
530 if ( exponent > FPR_z && base == _NearbyIntDfltEnv(base) && exponent == _NearbyIntDfltEnv(exponent))
531 {
532 FESETENVD( FPR_env ); /* NOTHING: +ve integral base and +ve integral exponent deliver exact result */
533 }
534 else
535 {
536 FESETENVD( FPR_env );
537 __PROG_INEXACT ( FPR_ln2 );
538 }
539
540 return result;
541 }
542
543 if ( ( x <= maxExp ) && ( x > min2Exp ) )
544 {
545 register double FPR_ln2, FPR_ln2Tail, FPR_ln2Inv, FPR_kRint, FPR_f;
546 register double FPR_cc0, FPR_cc1, FPR_cc2, FPR_cc3, FPR_cc4;
547
548 FPR_ln2 = ln2; FPR_ln2Tail = ln2Tail;
549 FPR_ln2Inv = oneOverLn2; FPR_kRint = kRint;
550
551 FPR_t = __FMADD( x, FPR_ln2Inv, FPR_kRint );
552 exp.d = FPR_t;
553
554 if ( x >= FPR_512 )
555 {
556 power = 2.0;
557 scale.i.lo = 0;
558 scale.i.hi = ( exp.i.lo + 1023 - 1 ) << 20;
559 }
560 else
561 {
562 power = denormal;
563 scale.i.lo = 0;
564 scale.i.hi = ( exp.i.lo + 1023 + 128 ) << 20;
565 }
566
567 FPR_t -= FPR_kRint;
568 exp.d = FPR_t;
569
570 y = __FNMSUB( FPR_ln2, FPR_t, x); yLow = __FMUL( FPR_ln2Tail, FPR_t );
571
572 u.d = __FMADD( FPR_512, y, FPR_kRint );
573
574 FPR_cc0 = cc0; FPR_cc2 = cc2;
575 FPR_cc1 = cc1; FPR_cc3 = cc3;
576 FPR_cc4 = cc4;
577
578 i = u.i.lo;
579
580 pE = &(expTablePointer[(int32_t)i]);
581
582 d = y - pE->x;
583 z = d - yLow;
584
585 zSquared = __FMUL( z, z ); zLow = d - z - yLow + xLow;
586
587 FPR_t = scale.d; FPR_f = pE->f;
588
589 temp1 = __FMADD( FPR_cc0, zSquared, FPR_cc2 ); temp2 = __FMADD( FPR_cc1, zSquared, FPR_cc3 );
590
591 temp1 = __FMADD( temp1, zSquared, FPR_cc4 );
592
593 temp2 = __FMADD( temp2, z, temp1 );
594
595 temp1 = __FMADD( temp2, zSquared, z );
596
597 result = __FMUL( FPR_t, FPR_f );
598
599 temp2 = __FMUL( result , ( temp1 + ( zLow + zLow * temp1 ) ) ); // __FMUL thwarts gcc MAF
600
601 result = ( result + temp2 ) * power;
602 }
603 else if ( x > maxExp )
604 result = huge.d;
605 else
606 result = FPR_z;
607
608 FESETENVD( FPR_env );
609 if ( __FABS( result ) == huge.d )
610 __PROG_OF_INEXACT( kMaxNormal ); // Inf: set overflow/inexact flag
611 else if ( result == FPR_z )
612 __PROG_UF_INEXACT( kMinNormal ); // zero: set underflow/inexact flag
613 else
614 __PROG_INEXACT ( ln2 );
615
616 return result;
617}