src/libm/Source/ARM/d2i.h at fixPythonPipStalling

overby.me / darling-nix
fork atom
this repo has no description
fork atom
darling-nix / src / libm / Source / ARM / d2i.h
at fixPythonPipStalling 135 lines 3.5 kB view raw
wrap content
Lubos Dolezel Restructured source tree to prepare for merge with the "darling" repo 10y ago
f228ae16
  1
  2/*
  3 *	d2i.h
  4 *
  5 *		by Ian Ollmann
  6 *
  7 *	C utility routines for rounding double to integer, with round to nearest like behavior.
  8 *	We do not specify here which way ties round.  Functions are rounding mode independent, except that
  9 *	the value +-0x1.fffffffffffffp-2 may round away from zero depending on rounding mode.
 10 *	
 11 *	The C entry points f2i and d2i assume the following:
 12 *
 13 *		All values are in the range: |x| <= 0x1.0p31 - 1
 14 *		NaN does not occur
 15 *		x is not large. See comments under Flags_Behavior for more.
 16 *
 17 *	For double -> int64 conversions, please see <rdar://problems/5316332&5316398>
 18 */
 19
 20#ifndef D2I_H
 21#define D2I_H
 22
 23#ifndef __arm__
 24	#error header valid for __arm__ only
 25#endif
 26
 27#include <stdint.h>
 28
 29#ifndef FLAGS_BEHAVIOR
 30#define FLAGS_BEHAVIOR
 31typedef enum
 32{
 33	kCorrectFlags = -1,					// sets inexact when and only when it should.  Returns the wrong answer if |x| >= 0x1.0p31.
 34	kLazyFlags =     0,					// I don't care whether inexact is set or not. Returns the wrong answer if |x| >= 0x1.0p31.
 35	kNoFlags =		 1					// never sets flags.                           Returns the wrong answer if |x| >= 0x1.0p31.
 36}Flags_Behavior;
 37#endif
 38
 39static inline int32_t	d2i_cheesy( double x, Flags_Behavior behavior ) __attribute__ ((always_inline));
 40static inline int32_t	d2i_cheesy( double x, Flags_Behavior behavior )
 41{
 42	double rounded;
 43	int32_t	result;
 44	union{ double d; uint64_t u; }u;
 45
 46	// switch should be optimized away
 47	switch( behavior )
 48	{
 49		case	kCorrectFlags:
 50
 51			result = (int32_t) x;
 52			rounded = (float) result;
 53			if( x != rounded )
 54			{
 55#if defined( AVOID_PIC )				
 56				u.d = x;
 57				u.u = (u.u & 0x8000000000000000ULL) | 0x3fe0000000000000ULL;		// copysign( 0.5, x )
 58				result = (int32_t) (x + u.d);
 59#else
 60				{
 61					static const double	half[2] = { 0.5, -0.5 };
 62					result = (int32_t)( x + half[ x < 0.0 ] );
 63				}
 64#endif
 65			}
 66			return result;
 67
 68		case	kLazyFlags:
 69#if defined( AVOID_PIC )				
 70			u.d = x;
 71			u.u = (u.u & 0x8000000000000000ULL) | 0x3fe0000000000000ULL;		// copysign( 0.5, x )
 72			result = (int32_t) (x + u.d);
 73#else
 74			{
 75				static const double	half[2] = { 0.5, -0.5 };
 76				result = (int32_t)( x + half[ x < 0.0 ] );
 77			}
 78#endif
 79			return result;
 80
 81		case	kNoFlags:
 82			{
 83				u.d = x;
 84				uint64_t fabsx = u.u & 0x7fffffffffffffffULL;
 85				int32_t exp = (int32_t)(fabsx >> 52);
 86				result = 0;
 87
 88				if( exp >= 1022U )
 89				{ // a lookup table can be used to make this part faster, see powf implementation.
 90					int32_t leftShift = exp - (1023+52);
 91					int32_t rightShift = -leftShift;
 92					int32_t mask = leftShift >> 31;
 93					int32_t signX = (int32_t)((u.u ^ fabsx) >> 32);
 94					leftShift &= ~mask;
 95					rightShift &= mask;
 96					signX >>= 31;
 97	
 98					// convert float to fixed point
 99					int64_t r = ( u.u & 0x000fffffffffffffULL) | 0x0010000000000000ULL;
100
101					// round
102					r += 0x0008000000000000ULL >> (exp - 1022);
103
104					// correct position of fixed point
105					r >>= rightShift;
106					r <<= leftShift;
107
108					result = (int32_t) r;
109					
110					//fix sign
111					result ^= signX;
112					result -= signX;
113				}
114				return result;
115			}
116			break;
117	}
118	
119	return 0;
120}
121
122// Compliant implementation of C99 lrint
123static inline long lrint_private( double x ) __attribute__ ((always_inline));
124static inline long lrint_private( double x )
125{
126	register float temp;
127	register long result;
128
129	__asm__ __volatile__ ( "ftosid %0, %P1" : "=w" (temp) : "w" (x) );
130	__asm__ __volatile__ ( "fmrs %0, %1" : "=r" (result) : "w" (temp) );
131
132	return result;
133}
134
135#endif	/* D2I_H */