src/libm/Source/PowerPC/atan2.c at fixPythonPipStalling

overby.me / darling-nix
fork atom
this repo has no description
fork atom
darling-nix / src / libm / Source / PowerPC / atan2.c
at fixPythonPipStalling 258 lines 13 kB view raw
wrap content
Lubos Dolezel Restructured source tree to prepare for merge with the "darling" repo 10y ago
f228ae16
  1/*
  2 * Copyright (c) 2002 Apple Computer, Inc. All rights reserved.
  3 *
  4 * @APPLE_LICENSE_HEADER_START@
  5 * 
  6 * The contents of this file constitute Original Code as defined in and
  7 * are subject to the Apple Public Source License Version 1.1 (the
  8 * "License").  You may not use this file except in compliance with the
  9 * License.  Please obtain a copy of the License at
 10 * http://www.apple.com/publicsource and read it before using this file.
 11 * 
 12 * This Original Code and all software distributed under the License are
 13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
 14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
 15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
 16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
 17 * License for the specific language governing rights and limitations
 18 * under the License.
 19 * 
 20 * @APPLE_LICENSE_HEADER_END@
 21 */
 22/*******************************************************************************
 23*                                                                              *
 24*     File atan2.c,                                                            *
 25*     Function atan2(y/x),                                                     *
 26*     Implementation of arc tangent of ( y / x ) for the PowerPC.              *
 27*                                                                              *
 28*     Copyright c 1991-2001 Apple Computer, Inc.  All rights reserved.         *
 29*                                                                              *
 30*     Written by Ali Sazegari, started on November 1991,                       *
 31*     Modified and ported by Robert A. Murley (ram) for Mac OS X.              *
 32*                                                                              *
 33*     A MathLib v4 file.                                                       *
 34*                                                                              *
 35*     December 03 1992: first rs6000 port.                                     *
 36*     January  05 1993: added the environmental controls.                      *
 37*     July     07 1993: fixed the argument of __fpclassifyd.                   *
 38*     July     11 1993: changed feholdexcept to _feprocentry to set rounding   *
 39*                       to zero. removed the constant nan, fixed all edge      *
 40*                       cases to reflect the fpce-nceg requirements.           *
 41*     September19 1994: changed all the environemntal enquiries to __setflm.   *
 42*     October  06 1994: initialized CurrentEnvironment to correct an invalid   *
 43*                        flag problem.                                         *
 44*     July     23 2001: replaced __setflm with FEGETENVD/FESETENVD;            *
 45*                       replaced DblInHex typedef with hexdouble.              *
 46*     September 07 2001: added #ifdef __ppc__.                                 *
 47*     September 09 2001: added more comments.                                  *
 48*     September 10 2001: added macros to detect PowerPC and correct compiler.  *
 49*     September 18 2001: added <CoreServices/CoreServices.h> to get to <fp.h>  *
 50*                        and <fenv.h>.                                         *
 51*     September 25 2001: removed fenv_access pragma.                           *
 52*     October   05 2001: added const double pi.                                *
 53*     October   08 2001: removed <CoreServices/CoreServices.h>.                *
 54*                        changed compiler errors to warnings.                  *
 55*     November  06 2001: commented out warning about Intel architectures.      *
 56*                                                                              *
 57*     W A R N I N G:                                                           *
 58*     These routines require a 64-bit double precision IEEE-754 model.         *
 59*     They are written for PowerPC only and are expecting the compiler         *
 60*     to generate the correct sequence of multiply-add fused instructions.     *
 61*                                                                              *
 62*     These routines are not intended for 32-bit Intel architectures.          *
 63*                                                                              *
 64*     A version of gcc higher than 932 is required.                            *
 65*                                                                              *
 66*     GCC compiler options:                                                    *
 67*            optimization level 3 (-O3)                                        *
 68*            -fschedule-insns -finline-functions -funroll-all-loops            *
 69*                                                                              *
 70*******************************************************************************/
 71
 72#include    "math.h"
 73#include    "fenv_private.h"
 74#include    "fp_private.h"
 75
 76/*******************************************************************************
 77*            Functions needed for the computation.                             *
 78*******************************************************************************/
 79
 80/*     the following fp.h functions are used:                                 */
 81/*     atan, __fpclassifyd, copysign and __signbitd.                          */
 82
 83/*******************************************************************************
 84*     This function is odd.  The positive interval is computed and for         *
 85*     negative values, the sign is reflected in the computation.               *
 86*******************************************************************************/
 87static const double kMinNormal = 0x1.0p-1022;                 // 2.2250738585072014e-308;
 88static const double kMaxNormal = 1.7976931348623157e308;
 89static const double kHalf = 0.5;
 90
 91extern double atanCore ( double );
 92extern double atanCoreInv ( double );
 93
 94double atan2 ( double y, double x )
 95{
 96      register double result;      
 97      register double FPR_env, FPR_z, FPR_half, FPR_pi, FPR_kMinNormal, FPR_kMaxNormal, FPR_absx, FPR_absy, FPR_t;
 98      
 99      FPR_z = 0.0;					
100      
101/*******************************************************************************
102*     If argument is SNaN then a QNaN has to be returned and the invalid       *
103*     flag signaled.                                                           * 
104*******************************************************************************/
105
106      if (unlikely( ( x != x ) || ( y != y ) ))
107            return x + y;
108      
109      FEGETENVD( FPR_env );
110      FESETENVD( FPR_z );
111      
112      __NOOP;           // takes slot 0  following the mtfsf
113      FPR_t = y / x;	// takes slot 1 (hence fpu1) following the mtfsf
114      FPR_pi = M_PI;
115      
116      FPR_kMinNormal = kMinNormal;			FPR_kMaxNormal = kMaxNormal; 
117           
118      FPR_absy = __FABS ( y );
119      FPR_half = kHalf;                                
120      FPR_absx = __FABS ( x );
121                  
122/*******************************************************************************
123*     The next switch will decipher what sort of argument we have:             *
124*                                                                              *
125*     atan2 ( �0, x ) = �0, if x > 0,                                          *
126*     atan2 ( �0, +0) = �0,                                                    *
127*     atan2 ( �0, x ) = ��, if x < 0,                                          *
128*     atan2 ( �0, -0) = ��,                                                    *
129*     atan2 ( y, �0 ) = �/2, if y > 0,                                         *
130*     atan2 ( y, �0 ) = -�/2, if y < 0,                                        *
131*     atan2 ( �y, � ) = �0, for finite y > 0,                                  *
132*     atan2 ( ��, x ) = ��/2, for finite x,                                    *
133*     atan2 ( �y, -�) = ��, for finite y > 0,                                  *
134*     atan2 ( ��, � ) = ��/4,                                                  *
135*     atan2 ( ��, -�) = �3�/4.                                                 *
136*                                                                              *
137*     note that the non obvious cases are y and x both infinite or both zero.  *
138*     for more information, see �Branch Cuts for Complex Elementary Functions, *
139*     or much Much Ado About Nothing�s Sign bit�, by W. Kahan, Proceedings of  *
140*     the joint IMA/SIAM conference on The state of the Art in Numerical       *
141*     Analysis, 14-18 April 1986, Clarendon Press (1987).                      *
142*                                                                              *
143*     atan2(y,0) does not raise the divide-by-zero exception, nor does         *
144*     atan2(0,0) raise the invalid exception.                                  *
145*******************************************************************************/
146
147      if (likely( FPR_absx <= FPR_kMaxNormal )) // slot 0 hence fpu0
148      {
149            if (unlikely( x == FPR_z )) // slot 0 hence fpu 0
150            {
151                  if ( y > FPR_z )
152                        result = __FMUL( FPR_half, FPR_pi );
153                  else if ( y < FPR_z )
154                        result = __FMUL( -FPR_half, FPR_pi );
155                  else
156                  {
157                        if ( signbit ( x ) ) // x is +-0
158                              result = copysign ( FPR_pi, y ); // y is +-0
159                        else
160                        {
161                              FESETENVD( FPR_env );
162                              return y;			// Exact zero result.
163                        }
164                  }
165                  FESETENVD( FPR_env );
166                  __PROG_INEXACT( FPR_pi );
167                  return result;
168            }
169      }
170      else
171      {	    // Infinite x
172            if ( x > FPR_z )
173            {
174                if ( FPR_absy <= FPR_kMaxNormal )
175                {
176                        FESETENVD( FPR_env );
177                        result = copysign ( FPR_z, y );
178                        return result;
179                }
180                else
181                {
182                        if ( y > FPR_z )
183                            result = __FMUL( 0.25f, FPR_pi );
184                        else
185                            result = __FMUL( -0.25f, FPR_pi );
186                }
187            }
188            else
189            {
190                if ( FPR_absy <= FPR_kMaxNormal )
191                        result = copysign ( FPR_pi, y );
192                else
193                {
194                        if ( y > FPR_z )
195                            result = __FMUL( 0.75f, FPR_pi );
196                        else
197                            result = __FMUL( -0.75f, FPR_pi );
198                }
199            }
200            FESETENVD( FPR_env );
201            __PROG_INEXACT( FPR_pi );
202            return result;
203      }
204      
205      if (likely( FPR_absy <= FPR_kMaxNormal ))
206      {
207            if (unlikely( y == FPR_z ))
208            {
209                  if ( x > FPR_z )
210                  {
211                        FESETENVD( FPR_env );
212                        return y;
213                  }
214                  else
215                        result = copysign ( FPR_pi, y ); // y is +-0
216                        
217                  FESETENVD( FPR_env );
218                  __PROG_INEXACT( FPR_pi );
219                  return result;
220            }
221      }
222      else
223      {		  // Infinite y
224                  if ( y > FPR_z )
225                        result =  __FMUL( FPR_half, FPR_pi );
226                  else
227                        result =  __FMUL( -FPR_half, FPR_pi );
228                  FESETENVD( FPR_env );
229                  __PROG_INEXACT( FPR_pi );
230                  return result;
231      }
232      
233/*******************************************************************************
234*     End of the special case section. atan2 is mostly a collection of special *
235*     case functions.  Next we will carry out the main computation which at    *
236*     this point will only receive non-zero normal or denormal numbers.        *
237*******************************************************************************/
238      if (FPR_absy > FPR_absx)
239            result = atanCoreInv ( __FABS( FPR_t ) );
240      else
241            result = atanCore ( __FABS( FPR_t ) );
242      
243      if ( x < FPR_z )
244            result = FPR_pi - result;
245            
246      FPR_t = __FABS( result );
247
248      FESETENVD( FPR_env );
249      if ( FPR_t >= FPR_kMinNormal || FPR_t == FPR_z )
250            __PROG_INEXACT( FPR_pi );
251      else
252            __PROG_UF_INEXACT( FPR_kMinNormal );
253
254      if ( y > FPR_z )
255            return result;
256      else
257            return -result;
258}