src/libm/Source/ARM/f2i.h at fixPythonPipStalling

overby.me / darling-nix
fork atom
this repo has no description
fork atom
darling-nix / src / libm / Source / ARM / f2i.h
at fixPythonPipStalling 130 lines 3.2 kB view raw
wrap content
Lubos Dolezel Restructured source tree to prepare for merge with the "darling" repo 10y ago
f228ae16
  1
  2/*
  3 *	f2i.h
  4 *
  5 *		by Ian Ollmann
  6 *
  7 *	C utility routines for rounding float to integer, with round to nearest like behavior.
  8 *	We do not specify here which way ties round.  Functions are rounding mode independent, except that
  9 *	the value +-0x1.fffffep-2f may round away from zero depending on rounding mode.
 10 *	
 11 *	The C entry points f2i and d2i assume the following:
 12 *
 13 *		All values are in the range: |x| < 0x1.0p31
 14 *		NaN does not occur
 15 *		x is not large. See comments under Flags_Behavior for more.
 16 */
 17
 18#ifndef F2I_H
 19#define F2I_H
 20
 21#include <stdint.h>
 22
 23#ifndef __arm__ 
 24	#error	This is a __arm__ specific header
 25#endif
 26
 27#ifndef FLAGS_BEHAVIOR
 28#define FLAGS_BEHAVIOR
 29typedef enum
 30{
 31	kCorrectFlags = -1,					// sets inexact when and only when it should.  Returns the wrong answer if |x| >= 0x1.0p31.
 32	kLazyFlags =     0,					// I don't care whether inexact is set or not. Returns the wrong answer if |x| >= 0x1.0p23.
 33	kNoFlags =		 1					// never sets flags.                           Returns the wrong answer if |x| >= 0x1.0p31.
 34}Flags_Behavior;
 35#endif
 36
 37static inline int32_t	f2i_cheesy( float x, Flags_Behavior behavior ) __attribute__ ((always_inline));
 38static inline int32_t	f2i_cheesy( float x, Flags_Behavior behavior )
 39{
 40	float rounded;
 41	int32_t	result;
 42	union{ float f; uint32_t u; }u;
 43
 44	// switch should be optimized away
 45	switch( behavior )
 46	{
 47		case	kCorrectFlags:
 48
 49			result = (int32_t) x;
 50			rounded = (float) result;
 51			if( x != rounded )
 52			{
 53#if defined( AVOID_PIC )				
 54				u.f = x;
 55				u.u = (u.u & 0x80000000U) | 0x3f000000U;		// copysign( 0.5, x )
 56				result = (int32_t) (x + u.f);
 57#else
 58				{
 59					static const float	half[2] = { 0.5f, -0.5f };
 60					result = (int32_t)( x + half[ x < 0.0f ] );
 61				}
 62#endif
 63			}
 64			return result;
 65
 66		case	kLazyFlags:
 67#if defined( AVOID_PIC )				
 68			u.f = x;
 69			u.u = (u.u & 0x80000000U) | 0x3f000000U;		// copysign( 0.5, x )
 70			result = (int32_t) (x + u.f);
 71#else
 72			{
 73				static const float	half[2] = { 0.5f, -0.5f };
 74				result = (int32_t)( x + half[ x < 0.0f ] );
 75			}
 76#endif
 77			return result;
 78
 79		case	kNoFlags:
 80			{
 81				u.f = x;
 82				uint32_t fabsx = u.u & 0x7fffffffU;
 83				int32_t exp = fabsx >> 23;
 84				result = 0;
 85
 86				if( exp >= 126U )
 87				{ // a lookup table can be used to make this part faster, see powf implementation.
 88					int32_t leftShift = exp - (127+23);
 89					int32_t rightShift = -leftShift;
 90					int32_t mask = leftShift >> 31;
 91					int32_t signX = u.u ^ fabsx;
 92					leftShift &= ~mask;
 93					rightShift &= mask;
 94					signX >>= 31;
 95	
 96					// convert float to fixed point
 97					result = ( u.u & 0x007fffffU) | 0x00800000U;
 98
 99					// round
100					result += 0x00800000U >> (exp - 126);
101
102					// correct position of fixed point
103					result >>= rightShift;
104					result <<= leftShift;
105					
106					//fix sign
107					result ^= signX;
108					result -= signX;
109				}
110				return result;
111			}
112			break;
113	}
114	
115	return 0;
116}
117
118static inline long lrintf_private( float x ) __attribute__ ((always_inline));
119static inline long lrintf_private( float x )
120{
121	register float temp;
122	register long  result;
123
124	__asm__ __volatile__ ( "ftosis %0, %1" : "=w" (temp) : "w" (x) );
125	__asm__ __volatile__ ( "fmrs %0, %1" : "=r" (result) : "w" (temp) );
126
127	return result;
128}
129
130#endif	/* F2I_H */