this repo has no description
at fixPythonPipStalling 135 lines 3.5 kB view raw
1 2/* 3 * d2i.h 4 * 5 * by Ian Ollmann 6 * 7 * C utility routines for rounding double to integer, with round to nearest like behavior. 8 * We do not specify here which way ties round. Functions are rounding mode independent, except that 9 * the value +-0x1.fffffffffffffp-2 may round away from zero depending on rounding mode. 10 * 11 * The C entry points f2i and d2i assume the following: 12 * 13 * All values are in the range: |x| <= 0x1.0p31 - 1 14 * NaN does not occur 15 * x is not large. See comments under Flags_Behavior for more. 16 * 17 * For double -> int64 conversions, please see <rdar://problems/5316332&5316398> 18 */ 19 20#ifndef D2I_H 21#define D2I_H 22 23#ifndef __arm__ 24 #error header valid for __arm__ only 25#endif 26 27#include <stdint.h> 28 29#ifndef FLAGS_BEHAVIOR 30#define FLAGS_BEHAVIOR 31typedef enum 32{ 33 kCorrectFlags = -1, // sets inexact when and only when it should. Returns the wrong answer if |x| >= 0x1.0p31. 34 kLazyFlags = 0, // I don't care whether inexact is set or not. Returns the wrong answer if |x| >= 0x1.0p31. 35 kNoFlags = 1 // never sets flags. Returns the wrong answer if |x| >= 0x1.0p31. 36}Flags_Behavior; 37#endif 38 39static inline int32_t d2i_cheesy( double x, Flags_Behavior behavior ) __attribute__ ((always_inline)); 40static inline int32_t d2i_cheesy( double x, Flags_Behavior behavior ) 41{ 42 double rounded; 43 int32_t result; 44 union{ double d; uint64_t u; }u; 45 46 // switch should be optimized away 47 switch( behavior ) 48 { 49 case kCorrectFlags: 50 51 result = (int32_t) x; 52 rounded = (float) result; 53 if( x != rounded ) 54 { 55#if defined( AVOID_PIC ) 56 u.d = x; 57 u.u = (u.u & 0x8000000000000000ULL) | 0x3fe0000000000000ULL; // copysign( 0.5, x ) 58 result = (int32_t) (x + u.d); 59#else 60 { 61 static const double half[2] = { 0.5, -0.5 }; 62 result = (int32_t)( x + half[ x < 0.0 ] ); 63 } 64#endif 65 } 66 return result; 67 68 case kLazyFlags: 69#if defined( AVOID_PIC ) 70 u.d = x; 71 u.u = (u.u & 0x8000000000000000ULL) | 0x3fe0000000000000ULL; // copysign( 0.5, x ) 72 result = (int32_t) (x + u.d); 73#else 74 { 75 static const double half[2] = { 0.5, -0.5 }; 76 result = (int32_t)( x + half[ x < 0.0 ] ); 77 } 78#endif 79 return result; 80 81 case kNoFlags: 82 { 83 u.d = x; 84 uint64_t fabsx = u.u & 0x7fffffffffffffffULL; 85 int32_t exp = (int32_t)(fabsx >> 52); 86 result = 0; 87 88 if( exp >= 1022U ) 89 { // a lookup table can be used to make this part faster, see powf implementation. 90 int32_t leftShift = exp - (1023+52); 91 int32_t rightShift = -leftShift; 92 int32_t mask = leftShift >> 31; 93 int32_t signX = (int32_t)((u.u ^ fabsx) >> 32); 94 leftShift &= ~mask; 95 rightShift &= mask; 96 signX >>= 31; 97 98 // convert float to fixed point 99 int64_t r = ( u.u & 0x000fffffffffffffULL) | 0x0010000000000000ULL; 100 101 // round 102 r += 0x0008000000000000ULL >> (exp - 1022); 103 104 // correct position of fixed point 105 r >>= rightShift; 106 r <<= leftShift; 107 108 result = (int32_t) r; 109 110 //fix sign 111 result ^= signX; 112 result -= signX; 113 } 114 return result; 115 } 116 break; 117 } 118 119 return 0; 120} 121 122// Compliant implementation of C99 lrint 123static inline long lrint_private( double x ) __attribute__ ((always_inline)); 124static inline long lrint_private( double x ) 125{ 126 register float temp; 127 register long result; 128 129 __asm__ __volatile__ ( "ftosid %0, %P1" : "=w" (temp) : "w" (x) ); 130 __asm__ __volatile__ ( "fmrs %0, %1" : "=r" (result) : "w" (temp) ); 131 132 return result; 133} 134 135#endif /* D2I_H */