src/libm/Source/PowerPC/nextafter.c at fixPythonPipStalling

overby.me / darling-nix
fork atom
this repo has no description
fork atom
darling-nix / src / libm / Source / PowerPC / nextafter.c
at fixPythonPipStalling 426 lines 18 kB view raw
wrap content
Lubos Dolezel Restructured source tree to prepare for merge with the "darling" repo 10y ago
f228ae16
  1/*
  2 * Copyright (c) 2002 Apple Computer, Inc. All rights reserved.
  3 *
  4 * @APPLE_LICENSE_HEADER_START@
  5 * 
  6 * The contents of this file constitute Original Code as defined in and
  7 * are subject to the Apple Public Source License Version 1.1 (the
  8 * "License").  You may not use this file except in compliance with the
  9 * License.  Please obtain a copy of the License at
 10 * http://www.apple.com/publicsource and read it before using this file.
 11 * 
 12 * This Original Code and all software distributed under the License are
 13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
 14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
 15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
 16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
 17 * License for the specific language governing rights and limitations
 18 * under the License.
 19 * 
 20 * @APPLE_LICENSE_HEADER_END@
 21 */
 22/*******************************************************************************
 23*                                                                              *
 24*     File nextafter.c,                                                        *
 25*     Function nextafterd for PowerPC based machines.                          *
 26*                                                                              *
 27*     The computation of IEEE-754 nextafter using a floating-point method.     *
 28*                                                                              *
 29*     Copyright � 1991-2001 Apple Computer, Inc.  All rights reserved.         *
 30*                                                                              *
 31*     Written by A. Sazegari, started on September 1991.                       *
 32*     Modified and ported by Robert A. Murley (ram) for Mac OS X.              *
 33*                                                                              *
 34*     A MathLib v4 file.                                                       *
 35*                                                                              *
 36*     Change History (most recent last):                                       *
 37*                                                                              *
 38*     October    01  1991: passes all test vectors with no tolerances.         *
 39*     November   11  1991: changed +INF representation from 7FFF8000...        *
 40*                          to 7FFF000... same thing for -INF.                  *
 41*     November   15  1991: changed classdouble and copysign to CLASSIFY and    *
 42*                          COPYSIGN.                                           *
 43*     November   17  1991: changed classdouble to CLASSEXTENDEDint.  passes    *
 44*                          the test suite.                                     *
 45*     February   07  1992: changed COPYSIGN with COPYSIGNold.                  *
 46*     September  24  1992: took the "#include support.h" out.                  *
 47*     December   03  1992: first rs6000 port.                                  *
 48*     December   13  1992: added the environmental support.                    *
 49*     December   15  1993: created a version without environmental enquires,   *
 50*                          added the rounding direction control previously     *
 51*                          taken care of by feholdexcept (not really!) and     *
 52*                          feupdateenv.  changed pack4.h to fp_private.h.      *
 53*     October    12  1994: fixed the environmental problem and tested it.      *
 54*     September  21  1995: corrected the error in the restoration of the       *
 55*                          rounding mode.                                      *
 56*     June       27  2001: ported routine to OSX, replaced calls to fenv.c     *
 57*                          with inline routines. This technique caused the     *
 58*                          routine to speed up by a factor of three.           *
 59*     July       16  2001: replaced __setflm with FEGETENVD/FESETENVD.         *
 60*     August     28  2001: added description of routine.                       *
 61*     September  05  2001: added #ifdef __ppc__.                               *
 62*     September  10  2001: added macros to detect PowerPC and correct compiler.*
 63*     September  10  2001: added more comments.                                *
 64*     September  18  2001: added <CoreServices/CoreServices.h> to get <fenv.h>.*
 65*     October    08  2001: removed <CoreServices/CoreServices.h>.              *
 66*                          changed compiler errors to warnings.                *
 67*     November   02  2001: added stub for i386 version of nextafterd.          *
 68*     November   06  2001: commented out warning about Intel architectures.    *
 69*                          changed i386 stub to call abort().                  *
 70*                                                                              *
 71*     W A R N I N G:                                                           *
 72*     These routines require a 64-bit double precision IEEE-754 model.         *
 73*     They are written for PowerPC only and are expecting the compiler         *
 74*     to generate the correct sequence of multiply-add fused instructions.     *
 75*                                                                              *
 76*     These routines are not intended for 32-bit Intel architectures.          *
 77*                                                                              *
 78*     A version of gcc higher than 932 is required.                            *
 79*                                                                              *
 80*     GCC compiler options:                                                    *
 81*           optimization level 3 (-O3)                                         *
 82*           -fschedule-insns -finline-functions -funroll-all-loops             *
 83*                                                                              *
 84*******************************************************************************/
 85
 86#include      "math.h"
 87#include      "fp_private.h"
 88#include      "fenv_private.h"
 89
 90/*******************************************************************************
 91********************************************************************************
 92*                        N  E  X  T  A  F  T  E  R  D                          *
 93********************************************************************************
 94*                                                                              *
 95*      Computes the next representable double value after 'x' in the direction *
 96*      of 'y'.  if x == y then y is returned.                                  *
 97*      This algorithm uses the smallest positive number with rounding          *
 98*      directions to compute the nextafter of a double number.                 *
 99*                                                                              *
100*      Functions used in this program: __fpclassify.                           *
101*                                                                              *
102*******************************************************************************/   
103
104static int ___fpclassifyd ( double arg )
105{
106      uint32_t exponent;
107      hexdouble      x;
108            
109      x.d = arg;
110      __NOOP;
111      __NOOP;
112      __NOOP;
113      
114      exponent = x.i.hi & 0x7ff00000;
115      if ( exponent == 0x7ff00000 )
116      {
117            if ( ( ( x.i.hi & 0x000fffff ) | x.i.lo ) == 0 )
118                  return FP_INFINITE;
119            else
120                  return ( x.i.hi & dQuietNan ) ? FP_QNAN : FP_SNAN; 
121      }
122      else if ( exponent != 0)
123            return FP_NORMAL;
124      else
125      {
126            if ( ( ( x.i.hi & 0x000fffff ) | x.i.lo ) == 0 )
127                  return FP_ZERO;
128            else
129                  return FP_SUBNORMAL;
130      }
131}
132
133static double __nextafter ( double x, double y )
134      {
135      static const hexdouble EPSILON = HEXDOUBLE(0x00000000, 0x00000001);
136      static const hexdouble PosINF  = HEXDOUBLE(0x7ff00000, 0x00000000);
137      static const hexdouble NegINF  = HEXDOUBLE(0xfff00000, 0x00000000);
138      static const hexdouble PosBig  = HEXDOUBLE(0x7fefffff, 0xffffffff);
139      static const hexdouble NegBig  = HEXDOUBLE(0xffefffff, 0xffffffff);
140      double arg;
141      hexdouble      temp, xsign, ysign;
142      register int newexc;
143      fenv_t envp;
144      
145//    Save old environment
146      FEGETENVD_GRP(temp.d);
147      envp = temp.i.lo;
148      
149//    Clear all flags in temp variable and set rounding to nearest
150      temp.i.lo &= (FE_NO_FLAGS & FE_NO_ENABLES & FE_NO_RND);
151      temp.i.lo |= FE_TONEAREST;
152      
153      if (unlikely( ( x != x ) || ( y != y ) ))      // one of the arguments is a NaN
154            arg = y + x;
155      else if ( x == y )      /*  Exit here in case the answer is �INF.      */
156            {                 /*  Otherwise unwanted flags will be set.      */
157            // Save exceptions, restore environment with saved exceptions
158            newexc = temp.i.lo & FE_ALL_EXCEPT;
159            temp.i.lo = envp;
160            if ((newexc & FE_INVALID) != 0)
161                  temp.i.lo |= SET_INVALID;
162            temp.i.lo |=  newexc & ( FE_INEXACT | FE_DIVBYZERO | FE_UNDERFLOW | FE_OVERFLOW );
163            FESETENVD_GRP(temp.d);
164            if ( ( x ==  0.0 ) && ( y == 0.0 )) // (*0, -0)-> -0, (*0, +0)-> +0 i.e. "y"
165                return y;
166            else
167                return x;
168            }
169      else if (unlikely( ( x == PosINF.d ) || ( x == NegINF.d ) ))
170            arg = ( x > 0 ) ? PosBig.d : NegBig.d;      // x and/or y is INF
171      else if ( x == 0.0 )
172            {
173            xsign.d = EPSILON.d;      // copy y's sign to EPSILON
174            ysign.d = y;
175		    __NOOP;
176		    __NOOP;
177
178            xsign.i.hi &= 0x7fffffff;
179            xsign.i.hi |= ysign.i.hi & 0x80000000;
180		    __NOOP;
181		    __NOOP;
182		    __NOOP;
183            arg = xsign.d;
184            temp.i.lo |=  FE_INEXACT | FE_UNDERFLOW;
185            }
186      else if ( ( ( x < 0.0 ) && ( x < y ) ) || ( ( x > 0.0 ) && ( x > y ) ) )
187            {            /*  Always clear intermediate spurious inexact.     */
188            temp.i.lo = (temp.i.lo & FE_NO_RND) | FE_TOWARDZERO;
189            FESETENVD_GRP(temp.d);
190            arg = ( x < 0.0 ) ? x + EPSILON.d : x - EPSILON.d;
191            temp.i.lo &= ~( FE_INEXACT | FE_UNDERFLOW );
192            if ((temp.i.lo & FE_ALL_EXCEPT) == 0)
193                  temp.i.lo &= FE_CLR_FX;
194            }
195      else if ( ( x < 0.0 ) && ( x > y ) )
196            {
197            temp.i.lo = (temp.i.lo & FE_NO_RND) | FE_DOWNWARD;
198            FESETENVD_GRP(temp.d);
199            arg = x - EPSILON.d;
200            temp.i.lo &= ~( FE_INEXACT | FE_UNDERFLOW );
201            if ((temp.i.lo & FE_ALL_EXCEPT) == 0)
202                  temp.i.lo &= FE_CLR_FX;
203            }
204      else
205            {
206            temp.i.lo = (temp.i.lo & FE_NO_RND) | FE_UPWARD;
207            FESETENVD_GRP(temp.d);
208            arg = x + EPSILON.d;
209            temp.i.lo &= ~( FE_INEXACT | FE_UNDERFLOW );
210            if ((temp.i.lo & FE_ALL_EXCEPT) == 0)
211                  temp.i.lo &= FE_CLR_FX;
212            }
213      
214/*******************************************************************************
215*     Set the flags according to the menu du jour.                             *
216*******************************************************************************/
217
218      switch ( ___fpclassifyd ( arg ) )
219            {
220            case FP_ZERO:
221                  xsign.d = arg;      // copy sign from x to arg
222                  ysign.d = x;
223				  __NOOP;
224				  __NOOP;
225				  
226                  xsign.i.hi &= 0x7fffffff;
227                  xsign.i.hi |= ( ysign.i.hi & 0x80000000 );
228				  __NOOP;
229				  __NOOP;
230				  __NOOP;
231				  
232                  arg = xsign.d;
233                  /* FALL THROUGH */
234            case FP_SUBNORMAL:
235                  temp.i.lo |=  FE_INEXACT | FE_UNDERFLOW;
236                  break;
237            case FP_INFINITE:
238                  temp.i.lo |=  FE_INEXACT | FE_OVERFLOW;
239                  break;
240            }
241      
242/*******************************************************************************
243*     Not to worry about the rounding directions.  feupdateenv saves the day.  *
244*     In this version, one must restore the rounding direction.                *
245*******************************************************************************/
246      
247      newexc = temp.i.lo & FE_ALL_EXCEPT;
248      temp.i.lo = envp;
249      if ((newexc & FE_INVALID) != 0)
250            temp.i.lo |= SET_INVALID;
251      temp.i.lo |=  newexc & ( FE_INEXACT | FE_DIVBYZERO | FE_UNDERFLOW | FE_OVERFLOW );
252      FESETENVD_GRP(temp.d);
253      
254      return arg;
255      }
256
257#if !defined(BUILDING_FOR_CARBONCORE_LEGACY)
258
259double nextafter(double x, double y)
260{
261        return __nextafter( x, y );
262}
263
264#else /* BUILDING_FOR_CARBONCORE_LEGACY */      
265/* Legacy nextafterd() API */
266
267double nextafterd(double x, double y)
268{
269        return __nextafter( x, y );
270}
271
272static int ___fpclassifyf ( float x )
273{
274   uint32_t iexp;
275   hexsingle      z;
276   
277   z.fval = x;
278   __NOOP;
279   __NOOP;
280   __NOOP;
281   iexp = z.lval & 0x7f800000;             // isolate float exponent
282   
283   if (iexp == 0x7f800000) {               // NaN or INF case
284      if ((z.lval & 0x007fffff) == 0)
285         return FP_INFINITE;
286      else if ((z.lval & fQuietNan) != 0)
287         return FP_QNAN;
288      else
289         return FP_SNAN;
290   }
291   
292   if (iexp != 0)                             // normal float
293      return FP_NORMAL;
294      
295   if ((z.lval & 0x007fffff) == 0)
296      return FP_ZERO;             // zero
297   else
298      return FP_SUBNORMAL;        //must be subnormal
299}
300
301float nextafterf ( float x, float y )
302      {
303      static const hexsingle EPSILON = { 0x00000001 };
304      static const hexsingle PosINF  = { 0x7f800000 };
305      static const hexsingle NegINF  = { 0xff800000 };
306      static const hexsingle PosBig  = { 0x7f7fffff };
307      static const hexsingle NegBig  = { 0xff7fffff };
308      double arg;
309      hexdouble      temp;
310      hexsingle      xsign, ysign;
311      register int newexc;
312      fenv_t envp;
313      
314//    Save old environment
315      FEGETENVD_GRP(temp.d);
316      envp = temp.i.lo;
317      
318//    Clear all flags in temp variable and set rounding to nearest
319      temp.i.lo &= (FE_NO_FLAGS & FE_NO_ENABLES & FE_NO_RND);
320      temp.i.lo |= FE_TONEAREST;
321      
322      if (unlikely( ( x != x ) || ( y != y ) ))      // one of the arguments is a NaN
323            arg = y + x;
324      else if ( x == y )      /*  Exit here in case the answer is �INF.      */
325            {                 /*  Otherwise unwanted flags will be set.      */
326            // Save exceptions, restore environment with saved exceptions
327            newexc = temp.i.lo & FE_ALL_EXCEPT;
328            temp.i.lo = envp;
329            if ((newexc & FE_INVALID) != 0)
330                  temp.i.lo |= SET_INVALID;
331            temp.i.lo |=  newexc & ( FE_INEXACT | FE_DIVBYZERO | FE_UNDERFLOW | FE_OVERFLOW );
332            FESETENVD_GRP(temp.d);
333            if ( ( x ==  0.0 ) && ( y == 0.0 )) // (*0, -0)-> -0, (*0, +0)-> +0
334                return y;
335            else
336                return x;
337            }
338      else if (unlikely( ( x == PosINF.fval ) || ( x == NegINF.fval ) ))
339            arg = ( x > 0 ) ? PosBig.fval : NegBig.fval;      // x and/or y is INF
340      else if ( x == 0.0 )
341            {
342            xsign.fval = EPSILON.fval;      // copy y's sign to EPSILON
343            ysign.fval = y;
344		    __NOOP;
345		    __NOOP;
346
347            xsign.lval &= 0x7fffffff;
348            xsign.lval |= ysign.lval & 0x80000000;
349		    __NOOP;
350		    __NOOP;
351		    __NOOP;
352
353            arg = xsign.fval;
354            temp.i.lo |=  FE_INEXACT | FE_UNDERFLOW;
355            }
356      else if ( ( ( x < 0.0 ) && ( x < y ) ) || ( ( x > 0.0 ) && ( x > y ) ) )
357            {            /*  Always clear intermediate spurious inexact.     */
358            temp.i.lo = (temp.i.lo & FE_NO_RND) | FE_TOWARDZERO;
359            FESETENVD_GRP(temp.d);
360            arg = ( x < 0.0 ) ? x + EPSILON.fval : x - EPSILON.fval;
361            temp.i.lo &= ~( FE_INEXACT | FE_UNDERFLOW );
362            if ((temp.i.lo & FE_ALL_EXCEPT) == 0)
363                  temp.i.lo &= FE_CLR_FX;
364            }
365      else if ( ( x < 0.0 ) && ( x > y ) )
366            {
367            temp.i.lo = (temp.i.lo & FE_NO_RND) | FE_DOWNWARD;
368            FESETENVD_GRP(temp.d);
369            arg = x - EPSILON.fval;
370            temp.i.lo &= ~( FE_INEXACT | FE_UNDERFLOW );
371            if ((temp.i.lo & FE_ALL_EXCEPT) == 0)
372                  temp.i.lo &= FE_CLR_FX;
373            }
374      else
375            {
376            temp.i.lo = (temp.i.lo & FE_NO_RND) | FE_UPWARD;
377            FESETENVD_GRP(temp.d);
378            arg = x + EPSILON.fval;
379            temp.i.lo &= ~( FE_INEXACT | FE_UNDERFLOW );
380            if ((temp.i.lo & FE_ALL_EXCEPT) == 0)
381                  temp.i.lo &= FE_CLR_FX;
382            }
383      
384/*******************************************************************************
385*     Set the flags according to the menu du jour.                             *
386*******************************************************************************/
387
388      switch ( ___fpclassifyf ( (float) arg ) )
389            {
390            case FP_ZERO:
391                  xsign.fval = (float) arg;      // copy sign from x to arg
392                  ysign.fval = x;
393				  __NOOP;
394				  __NOOP;
395
396                  xsign.lval &= 0x7fffffff;
397                  xsign.lval |= ( ysign.lval & 0x80000000 );
398				  __NOOP;
399				  __NOOP;
400				  __NOOP;
401				  
402                  arg = xsign.fval;
403            case FP_SUBNORMAL:
404                  temp.i.lo |=  FE_INEXACT | FE_UNDERFLOW;
405                  break;
406            case FP_INFINITE:
407                  temp.i.lo |=  FE_INEXACT | FE_OVERFLOW;
408                  break;
409            }
410      
411/*******************************************************************************
412*     Not to worry about the rounding directions.  feupdateenv saves the day.  *
413*     In this version, one must restore the rounding direction.                *
414*******************************************************************************/
415      
416      newexc = temp.i.lo & FE_ALL_EXCEPT;
417      temp.i.lo = envp;
418      if ((newexc & FE_INVALID) != 0)
419            temp.i.lo |= SET_INVALID;
420      temp.i.lo |=  newexc & ( FE_INEXACT | FE_DIVBYZERO | FE_UNDERFLOW | FE_OVERFLOW );
421      FESETENVD_GRP(temp.d);
422      
423      return (float) arg;
424      }
425
426#endif /* BUILDING_FOR_CARBONCORE_LEGACY */