src/libm/Source/Intel/fenv.c at fixPythonPipStalling

overby.me / darling-nix
fork atom
this repo has no description
fork atom
darling-nix / src / libm / Source / Intel / fenv.c
at fixPythonPipStalling 384 lines 15 kB view raw
wrap content
Lubos Dolezel Restructured source tree to prepare for merge with the "darling" repo 10y ago
f228ae16
  1/*
  2 *  fenv.c
  3 *  xmmLibm
  4 *
  5 *  Created by iano on 6/21/05.
  6 *  Copyright 2005 __MyCompanyName__. All rights reserved.
  7 *
  8 */
  9
 10#include "xmmLibm_prefix.h"
 11
 12#include "fenv.h"
 13#include <xmmintrin.h>
 14
 15#define DEFAULT_CONTROL 0x037f
 16#define DEFAULT_STATUS  0x0
 17#define DEFAULT_RESERVED "\0\0\0\0\0\0\0"
 18
 19#define GET_FSW()      ({ unsigned short _result; asm volatile ("fnstsw %0" : "=m" (_result)::"memory"); /*return*/ _result; })
 20#define GET_FCW()      ({ unsigned short _result; asm volatile ("fnstcw %0" : "=m" (_result)::"memory"); /*return*/ _result; })
 21#define SET_FCW(_a)    { unsigned short _aa = _a; asm volatile ("fldcw %0" : :"m" (_aa)); }
 22
 23typedef struct {
 24    unsigned short __control;
 25    unsigned short __reserved1;
 26    unsigned short __status;
 27    unsigned short __reserved2;
 28    unsigned int __private3;
 29    unsigned int __private4;
 30    unsigned int __private5;
 31    unsigned int __private6;
 32    unsigned int __private7;
 33} __fpustate_t;
 34
 35#define FE_ALL_RND  ( FE_TONEAREST | FE_TOWARDZERO | FE_UPWARD | FE_DOWNWARD )
 36
 37static inline int _fesetexceptflag(const fexcept_t *flagp, int excepts ) ALWAYS_INLINE;
 38static inline int _fesetexceptflag(const fexcept_t *flagp, int excepts )
 39{
 40    int state;
 41    __fpustate_t currfpu;
 42    unsigned int mxcsr;
 43    unsigned int exceptMask = excepts & FE_ALL_EXCEPT;
 44    unsigned int andMask = ~exceptMask;                     // clear just the bits indicated
 45    unsigned int orMask =  *flagp & exceptMask;             // latch the specified bits
 46    
 47    //read the state
 48    mxcsr = _mm_getcsr();                                   //read the MXCSR state
 49    asm volatile ("fnstenv %0" : "=m" (currfpu) );          //read x87 state
 50
 51    //fix up the MXCSR state
 52    mxcsr &= andMask;
 53    mxcsr |= orMask;
 54
 55    //fix up the x87 state
 56    state = currfpu.__status;
 57    state &= andMask; 	   
 58    state |= orMask; 
 59    currfpu.__status = state; 
 60
 61    //store the state
 62    asm volatile ("ldmxcsr %0 ; fldenv %1" : : "m" (mxcsr), "m" (currfpu));
 63	return 0;
 64}
 65
 66static inline int _fegetexceptflag(fexcept_t *flagp, int excepts);
 67static inline int _fegetexceptflag(fexcept_t *flagp, int excepts)
 68{
 69    fexcept_t fsw = GET_FSW();              //get the x87 status word
 70    unsigned int mxcsr = _mm_getcsr();      //get the mxcsr
 71    fexcept_t result = mxcsr | fsw;
 72
 73    result &= excepts & FE_ALL_EXCEPT;
 74    
 75    *flagp = result;
 76	return 0;
 77}
 78
 79
 80#if defined( BUILDING_FOR_CARBONCORE_LEGACY ) || defined(DARLING)
 81
 82const fenv_t _FE_DFL_ENV                        = { DEFAULT_CONTROL, 
 83                                                    DEFAULT_STATUS, 
 84													DEFAULT_MXCSR, 
 85                                                    DEFAULT_RESERVED};
 86                                                    
 87const fenv_t _FE_DFL_DISABLE_SSE_DENORMS_ENV    = { DEFAULT_CONTROL, 
 88                                                    DEFAULT_STATUS, 
 89													DEFAULT_MXCSR | 0x8040, 
 90                                                    DEFAULT_RESERVED };
 91
 92
 93/*******************************************************************************
 94*     The function "feclearexcept" clears the supported floating point         *
 95*     exceptions represented by its argument.                                  *
 96*******************************************************************************/
 97
 98int  feclearexcept(int excepts)
 99{
100    fexcept_t zero = 0;
101    return _fesetexceptflag( &zero, excepts );
102}
103
104
105
106/*******************************************************************************
107*     The function "feraiseexcept" raises the supported floating-point         *
108*     exceptions represented by its argument. The order in which these         *
109*     floating-point exceptions are raised is unspecified.                     *
110*******************************************************************************/
111
112int  feraiseexcept(int excepts)
113{
114    fexcept_t t = excepts;
115    
116    int err = _fesetexceptflag ( &t, excepts );
117    asm volatile ("fwait" :"=X" (t)::"memory"); 			// and raise the exception(s)
118	return err;
119}
120
121
122
123
124
125
126/*******************************************************************************
127*     The function "fetestexcept" determines which of the specified subset of  *
128*     the floating-point exception flags are currently set.  The excepts       *
129*     argument specifies the floating-point status flags to be queried. This   *
130*     function returns the value of the bitwise OR of the floating-point       *
131*     exception macros corresponding to the currently set floating-point       *
132*     exceptions included in excepts.                                          *
133*                                                                              *
134*     On MacOS X for Intel, the result is the value of union of the            *
135*     corresponding result from the x87 and SSE floating point states.         *
136*******************************************************************************/
137
138int  fetestexcept(int excepts )
139{
140    fexcept_t fsw = GET_FSW();              //get the x87 status word
141    unsigned int mxcsr = _mm_getcsr();      //get the mxcsr
142    unsigned int exceptMask = excepts & FE_ALL_EXCEPT;
143
144    mxcsr |= fsw;
145    mxcsr &= exceptMask;
146    
147    return mxcsr;
148}
149
150
151/*******************************************************************************
152*     The following functions provide control of rounding direction modes.     *
153*******************************************************************************/
154
155/*******************************************************************************
156*     The function "fegetround" returns the value of the rounding direction    *
157*     macro which represents the current rounding direction, or a negative     *
158*     if there is no such rounding direction macro or the current rounding     *
159*     direction is not determinable.                                           *
160*******************************************************************************/
161
162int  fegetround(void)
163{
164    int fcw = GET_FCW();
165
166    return (fcw & FE_ALL_RND);
167}
168
169
170/*******************************************************************************
171*     The function "fesetround" establishes the rounding direction represented *
172*     by its argument "round". If the argument is not equal to the value of a  *
173*     rounding direction macro, the rounding direction is not changed.  It     *
174*     returns zero if and only if the argument is equal to a rounding          *
175*     direction macro.                                                         *
176*******************************************************************************/
177
178int  fesetround(int round )
179{
180    if ((round & ~FE_ALL_RND))
181        return 1;
182    else
183    {
184        unsigned short fcw = GET_FCW();
185        int mxcsr = _mm_getcsr();
186        
187        fcw &= ~FE_ALL_RND;
188        fcw |= round;
189        mxcsr &= ~( FE_ALL_RND << 3 );
190        mxcsr |= round << 3;
191    
192        _mm_setcsr( mxcsr );
193        SET_FCW( fcw );
194
195        return 0;
196    }
197}
198
199
200/*******************************************************************************
201*    The following functions manage the floating-point environment, exception  *
202*    flags and dynamic modes, as one entity.                                   *
203*******************************************************************************/
204
205/*******************************************************************************
206*    The fegetenv function stores the current floating-point enviornment in    *
207*    the object pointed to by envp.                                            *
208*******************************************************************************/
209int  fegetenv(fenv_t *envp)
210{
211    __fpustate_t currfpu;
212    int mxcsr = _mm_getcsr();
213    
214    asm volatile ("fnstenv %0" : "=m" (currfpu) :: "memory");
215    
216    envp->__control = currfpu.__control;
217    envp->__status = currfpu.__status;
218    envp->__mxcsr = mxcsr;
219	((int*) envp->__reserved)[0] = 0;
220	((int*) envp->__reserved)[1] = 0;
221	   
222	// fnstenv masks floating-point exceptions.  We restore the state here
223	// in case any exceptions were originally unmasked.
224	asm volatile ("fldenv %0" : : "m" (currfpu));
225	
226	return 0;
227}
228
229/*******************************************************************************
230*    The feholdexcept function saves the current floating-point environment in *
231*    the object pointed to by envp, clears the floating-point status flags,    *
232*    and then installs a non-stop (continue on floating-point exceptions)      *
233*    mode, if available, for all floating-point exceptions. The feholdexcept   *
234*    function returns zero if and only if non-stop floating-point exceptions   *
235*    handling was successfully installed.                                      *
236*******************************************************************************/
237int   feholdexcept(fenv_t *envp)
238{
239    __fpustate_t currfpu;
240    int mxcsr;
241    
242    mxcsr = _mm_getcsr();
243	asm volatile ("fnstenv %0" : "=m" (*&currfpu) :: "memory");
244    
245    envp->__control = currfpu.__control;
246    envp->__status = currfpu.__status;
247    envp->__mxcsr = mxcsr;
248	((int*) envp->__reserved)[0] = 0;
249	((int*) envp->__reserved)[1] = 0;
250	
251    currfpu.__control |= FE_ALL_EXCEPT; // FPU shall handle all exceptions
252	currfpu.__status &= ~FE_ALL_EXCEPT;
253    mxcsr |= FE_ALL_EXCEPT << 7;  // left shifted because control mask is <<7 of the flags
254	mxcsr &= ~FE_ALL_EXCEPT;
255
256    asm volatile ("ldmxcsr %0 ; fldenv %1" : : "m" (*&mxcsr), "m" (*&currfpu));
257    
258    return 0;
259}
260
261#define MXCSR_PLUS_FZ_DAZ	( DEFAULT_MXCSR | 0x8040 ) 
262
263
264/*******************************************************************************
265*    The fesetnv function establishes the floating-point environment           *
266*    represented by the object pointed to by envp. The argument envp shall     *
267*    point to an object set by a call to fegetenv or feholdexcept, or equal to *
268*    a floating-point environment macro -- we define only *FE_DFL_ENV and      *
269*    FE_DISABLE_SSE_DENORMS_ENV -- to be C99 standard compliant and portable   *
270*    to other architectures. Note that fesetnv merely installs the state of    *
271*    the floating-point status flags represented through its argument, and     *
272*    does not raise these floating-point exceptions.                           *
273*                                                                              *
274*    On MacOS X for Intel you may test and set the bits in *envp yourself,     *
275*    provided that you conditionalize the code appropriately to preserve       *
276*    portability and you follow the various strictures and suggestions         *
277*    provided by Intel in appropriate processor documentation. Please be aware *
278*    that because there are two hardware locations for setting and reading     *
279*    floating point environment, this function (and others like it) are not    *
280*    atomic -- that is, for a brief period of time during the function call    *
281*    your new environment will have been applied to one but not both of the    *
282*    floating point engines (x87 and SSE). In addition, the behavior of some   *
283*    higher level interfaces (fegetround) is undefined if the x87 and SSE      *
284*    floating point units rounding modes are configured differently.  Please   *
285*    use common sense.                                                         *
286*******************************************************************************/
287int  fesetenv(const fenv_t *envp)
288{
289    __fpustate_t currfpu;
290    asm volatile ("fnstenv %0" : "=m" (currfpu));
291    
292    currfpu.__control = envp->__control;
293    currfpu.__status = envp->__status;
294    
295    asm volatile ("ldmxcsr %0 ; fldenv %1" : : "m" (envp->__mxcsr), "m" (currfpu));
296	return 0;
297}
298
299
300
301/*******************************************************************************
302*    The feupdateenv function saves the currently raised floating-point        *
303*    exceptions in its automatic storage, installs the floating-point          *
304*    environment represented by the object pointed to by envp, and then raises *
305*    the saved floating-point exceptions. The argument envp shall point to an  *
306*    object set by a call to feholdexcept or fegetenv or equal a               *
307*    floating-point environment macro.                                         *
308*                                                                              *
309*    Please see the description of feholdexcept for additional ways to create  *
310*    a fenv_t object, which are valid only for MacOS X for Intel.              *
311*******************************************************************************/
312int  feupdateenv(const fenv_t *envp)
313{
314    __fpustate_t currfpu;
315    asm volatile ("fnstenv %0" : "=m" (currfpu));
316    
317    currfpu.__control = envp->__control;
318    currfpu.__status = envp->__status;
319    
320    asm volatile ("ldmxcsr %0 ; fldenv %1; fwait " : : "m" (envp->__mxcsr), "m" (currfpu));
321	return 0;
322}
323
324/* Legacy entry point */
325void fegetexcept ( fexcept_t *flagp, int excepts )
326{
327    _fegetexceptflag (flagp, excepts );
328}
329
330/* Legacy entry point */
331void fesetexcept ( fexcept_t *flagp, int excepts )
332{
333    _fesetexceptflag ( flagp, excepts );
334}
335
336#else
337
338/*******************************************************************************
339*    The function "fegetexceptflag" stores a implementation-defined            *
340*    representation of the states of the floating-point status flags indicated *
341*    by its integer argument excepts in the object pointed to by the argument, * 
342*    flagp.                                                                    *
343*******************************************************************************/
344
345int  fegetexceptflag(fexcept_t *flagp, int excepts)
346{
347	return _fegetexceptflag( flagp, excepts );
348}
349      
350/*******************************************************************************
351*     The function "fesetexceptflag" sets or clears the floating point status  *
352*     flags indicated by the argument excepts to the states stored in the      *
353*     object pointed to by flagp. The value of the *flagp shall have been set  *
354*     by a previous call to fegetexceptflag whose second argument represented  *
355*     at least those floating-point exceptions represented by the argument     *
356*     excepts. This function does not raise floating-point exceptions; it just *
357*     sets the state of the flags.                                             *
358*******************************************************************************/
359
360int  fesetexceptflag(const fexcept_t *flagp, int excepts )
361{
362	return _fesetexceptflag( flagp, excepts );
363}
364
365int __fegetfltrounds( void );
366int __fegetfltrounds( void ) 
367{
368    switch ( fegetround() )
369    {
370    case FE_TONEAREST:
371        return 1;
372    case FE_TOWARDZERO:
373        return 0;
374    case FE_UPWARD:
375        return 2;
376    case FE_DOWNWARD:
377        return 3;
378    default:
379        return -1;
380    }
381}
382
383#endif
384