this repo has no description
at fixPythonPipStalling 172 lines 11 kB view raw
1 2 3// 4// Basic types used internally in xmmLibm 5// 6 7#ifndef __XMMLIBM_PREFIX_H__ 8#define __XMMLIBM_PREFIX_H__ 9 10#include <stdint.h> 11#include <emmintrin.h> 12 13 14//The types we use 15typedef float xFloat __attribute__ ((vector_size (16))); 16typedef double xDouble __attribute__ ((vector_size (16))); 17typedef int32_t xSInt32 __attribute__ ((vector_size (16))); 18typedef uint32_t xUInt32 __attribute__ ((vector_size (16))); 19typedef int64_t xSInt64 __attribute__ ((vector_size (16))); 20typedef uint64_t xUInt64 __attribute__ ((vector_size (16))); 21typedef double DoubleConstant_sd; 22typedef xDouble DoubleConstant_pd; 23 24typedef union 25{ 26 uint64_t u; 27 double d; 28}hexdouble; 29 30typedef union 31{ 32 uint32_t u; 33 float f; 34}hexfloat; 35 36#if defined( __i386__ ) || defined( __x86_64__ ) 37 #include <xmmintrin.h> 38 #include <emmintrin.h> 39 40 #define DEFAULT_MXCSR 0x1F80 41 #define INVALID_FLAG 0x0001 42 #define DENORMAL_FLAG 0x0002 43 #define DIVIDE_0_FLAG 0x0004 44 #define OVERFLOW_FLAG 0x0008 45 #define UNDERFLOW_FLAG 0x0010 46 #define INEXACT_FLAG 0x0020 47 #define ALL_FLAGS 0x3F 48 #define DEFAULT_MASK ( DEFAULT_MXCSR | ALL_FLAGS ) 49 #define INVALID_MASK 0x0080 50 #define DENORMAL_MASK 0x0100 51 #define DIVIDE_0_MASK 0x0200 52 #define OVERFLOW_MASK 0x0400 53 #define UNDERFLOW_MASK 0x0800 54 #define INEXACT_MASK 0x1000 55 #define ALL_MASKS 0x1F80 56 57 #define ROUND_TO_NEAREST 0x0000 58 #define ROUND_TO_ZERO 0x6000 59 #define ROUND_TO_INFINITY 0x4000 60 #define ROUND_TO_NEG_INFINITY 0x2000 61 #define ROUND_MASK ROUND_TO_ZERO 62 63 static const xDouble minusZeroD = {-0.0, -0.0}; 64 static const xFloat minusZeroF = {-0.0f, -0.0f, -0.0f, -0.0f }; 65 66 //Macros to handle conversions between scalar types to vector types 67#if __APPLE_CC__-0 < 5342 68 #define DOUBLE_2_XDOUBLE( _d ) _mm_load_sd( &(_d) ) 69 #define FLOAT_2_XFLOAT( _f ) _mm_load_ss( &(_f) ) 70 #define XDOUBLE_2_DOUBLE( _xd ) ({ double _d; _mm_store_sd( &_d, _xd ); /*return*/ _d; }) 71 #define XFLOAT_2_FLOAT( _xf ) ({ float _f; _mm_store_ss( &_f, _xf ); /*return*/ _f; }) 72 #define FLOAT_2_XDOUBLE( _f ) ({ xFloat _xf = _mm_load_ss( &(_f) ); xDouble _xd = _mm_cvtss_sd( (xDouble) _xf, _xf ); /*return*/ _xd; }) 73 #define XDOUBLE_2_FLOAT( _xd ) ({ float _f; xDouble _d = _xd; xFloat _xf = _mm_cvtsd_ss( (xFloat) _d, _d ); _mm_store_ss( &_f, _xf ); /* return*/ _f; }) 74#else 75 #define DOUBLE_2_XDOUBLE( _d ) _mm_set_sd( _d ) 76 #define FLOAT_2_XFLOAT( _f ) _mm_set_ss( _f ) 77 #define XDOUBLE_2_DOUBLE( _xd ) _mm_cvtsd_f64( _xd ) 78 #define XFLOAT_2_FLOAT( _xf ) _mm_cvtss_f32( _xf ) 79 #define FLOAT_2_XDOUBLE( _f ) ({ xFloat _xf = FLOAT_2_XFLOAT(_f); xDouble _xd = _mm_cvtss_sd( (xDouble) _xf, _xf ); /*return*/ _xd; }) 80 #define XDOUBLE_2_FLOAT( _xd ) ({ float _f; xDouble _d = _xd; xFloat _xf = _mm_cvtsd_ss( (xFloat) _d, _d ); _f = XFLOAT_2_FLOAT( _xf); /* return*/ _f; }) 81#endif 82 83 //Macros to do constant expansion 84 #define GET_CONSTANT_sd( _p ) _p 85 #define GET_CONSTANT_pd( _p ) { _p, _p } 86 87 //Macros to extend the set of intrinsics to allow for memory operands 88 #define _mm_cmplt_sdm( _xd, _m64 ) ({ register xDouble _r = _xd; asm( "cmpltsd %1, %0" : "+x" (_r) : "m" (*(_m64)) ); /*return*/ _r; }) 89 #define _mm_cmple_sdm( _xd, _m64 ) ({ register xDouble _r = _xd; asm( "cmplesd %1, %0" : "+x" (_r) : "m" (*(_m64)) ); /*return*/ _r; }) 90 #define _mm_cmpgt_sdm( _xd, _m64 ) ({ register xDouble _r; asm( "movsd %1, %0" : "=x" (_r) : "m" (*(_m64)) ); _r = (xDouble) _mm_cmplt_sd( _r, _xd ); /*return*/ _r; }) 91 #define _mm_cmpge_sdm( _xd, _m64 ) ({ register xDouble _r; asm( "movsd %1, %0" : "=x" (_r) : "m" (*(_m64)) ); _r = (xDouble) _mm_cmple_sd( _r, _xd ); /*return*/ _r; }) 92 #define _mm_cmpeq_sdm( _xd, _m64 ) ({ register xDouble _r = _xd; asm( "cmpeqsd %1, %0" : "+x" (_r) : "m" (*(_m64)) ); /*return*/ _r; }) 93 #define _mm_cmpne_sdm( _xd, _m64 ) ({ register xDouble _r = _xd; asm( "cmpneqsd %1, %0" : "+x" (_r) : "m" (*(_m64)) ); /*return*/ _r; }) 94 #define _mm_add_sdm( _xd, _m64 ) ({ register xDouble _r = _xd; asm( "addsd %1, %0" : "+x" (_r) : "m" (*(_m64)) ); /*return*/ _r; }) 95 #define _mm_sub_sdm( _xd, _m64 ) ({ register xDouble _r = _xd; asm( "subsd %1, %0" : "+x" (_r) : "m" (*(_m64)) ); /*return*/ _r; }) 96 #define _mm_mul_sdm( _xd, _m64 ) ({ register xDouble _r = _xd; asm( "mulsd %1, %0" : "+x" (_r) : "m" (*(_m64)) ); /*return*/ _r; }) 97 #define _mm_max_sdm( _xd, _m64 ) ({ register xDouble _r = _xd; asm( "maxsd %1, %0" : "+x" (_r) : "m" (*(_m64)) ); /*return*/ _r; }) 98 #define _mm_min_sdm( _xd, _m64 ) ({ register xDouble _r = _xd; asm( "minsd %1, %0" : "+x" (_r) : "m" (*(_m64)) ); /*return*/ _r; }) 99 #define _mm_div_sdm( _xd, _m64 ) ({ register xDouble _r = _xd; asm( "divsd %1, %0" : "+x" (_r) : "m" (*(_m64)) ); /*return*/ _r; }) 100 #define _mm_sel_pd( _xd1, _xd2, _mask) ({ xDouble _m = _mask; _m = _mm_or_pd( _mm_and_pd(_xd2, _m ), _mm_andnot_pd( _m, _xd1 ) ); /* return */ _m; }) 101 102 #define _mm_ucomieq_sdm( _xd, _m64 ) ({ bool _r; asm("ucomisd %1, %2\n\tsete %b0" : "=q" (_r) : "m" (*(_m64)), "X" (_xd)); /*return */ _r; }) 103 #define _mm_ucomieq_ssm( _xf, _m32 ) ({ bool _r; asm("ucomiss %1, %2\n\tsete %b0" : "=q" (_r) : "m" (*(_m32)), "X" (_xf)); /*return */ _r; }) 104 105 #define _mm_cmplt_ssm( _xf, _m32 ) ({ register xFloat _r = _xf; asm( "cmpltss %1, %0" : "+x" (_r) : "m" (*(_m32)) ); /*return*/ _r; }) 106 #define _mm_cmple_ssm( _xf, _m32 ) ({ register xFloat _r = _xf; asm( "cmpless %1, %0" : "+x" (_r) : "m" (*(_m32)) ); /*return*/ _r; }) 107 #define _mm_cmpgt_ssm( _xf, _m32 ) ({ register xFloat _r; asm( "movss %1, %0" : "=x" (_r) : "m" (*(_m32)) ); _r = (xFloat) __builtin_ia32_cmpltss( _r, _xf ); /*return*/ _r; }) 108 #define _mm_cmpge_ssm( _xf, _m32 ) ({ register xFloat _r; asm( "movss %1, %0" : "=x" (_r) : "m" (*(_m32)) ); _r = (xFloat) __builtin_ia32_cmpless( _r, _xf ); /*return*/ _r; }) 109 #define _mm_cmpeq_ssm( _xf, _m32 ) ({ register xFloat _r = _xf; asm( "cmpeqss %1, %0" : "+x" (_r) : "m" (*(_m32)) ); /*return*/ _r; }) 110 #define _mm_cmpne_ssm( _xf, _m32 ) ({ register xFloat _r = _xf; asm( "cmpneqss %1, %0" : "+x" (_r) : "m" (*(_m32)) ); /*return*/ _r; }) 111 #define _mm_add_ssm( _xf, _m32 ) ({ register xFloat _r = _xf; asm( "addss %1, %0" : "+x" (_r) : "m" (*(_m32)) ); /*return*/ _r; }) 112 #define _mm_sub_ssm( _xf, _m32 ) ({ register xFloat _r = _xf; asm( "subss %1, %0" : "+x" (_r) : "m" (*(_m32)) ); /*return*/ _r; }) 113 #define _mm_mul_ssm( _xf, _m32 ) ({ register xFloat _r = _xf; asm( "mulss %1, %0" : "+x" (_r) : "m" (*(_m32)) ); /*return*/ _r; }) 114 #define _mm_max_ssm( _xf, _m32 ) ({ register xFloat _r = _xf; asm( "maxss %1, %0" : "+x" (_r) : "m" (*(_m32)) ); /*return*/ _r; }) 115 #define _mm_min_ssm( _xf, _m32 ) ({ register xFloat _r = _xf; asm( "minss %1, %0" : "+x" (_r) : "m" (*(_m32)) ); /*return*/ _r; }) 116 #define _mm_div_ssm( _xf, _m32 ) ({ register xFloat _r = _xf; asm( "divss %1, %0" : "+x" (_r) : "m" (*(_m32)) ); /*return*/ _r; }) 117 #define _mm_sel_ps( _xf1, _xf2, _mask) ({ xFloat _m = _mask; _m = _mm_or_ps( _mm_and_ps(_xf2, _m ), _mm_andnot_ps( _m, _xf1 ) ); /* return */ _m; }) 118 119 120 #define _mm_istrue_sd( _test ) (1 == ( _mm_movemask_pd( _test ) & 1 )) 121 #define _mm_isfalse_sd( _test ) (0 == ( _mm_movemask_pd( _test ) & 1 )) 122 123 #define _mm_istrue_ss( _test ) (1 == ( _mm_movemask_ps( _test ) & 1 )) 124 #define _mm_isfalse_ss( _test ) (0 == ( _mm_movemask_ps( _test ) & 1 )) 125 126 //fast scalb. Doesn't check for overflow 127 #define twoToTheM( _i ) ({ xDouble _xi = (xDouble) _mm_slli_epi64( _mm_cvtsi32_si128( (_i) + 1023 ), 52 ); /*return*/ _xi; }) 128 129 //work around for broken _mm_sqrt_sd intrinsic in gcc-4.0 130 #define _MM_SQRT_SD( x ) __builtin_ia32_sqrtsd( x ) 131 132 #define _mm_cvtsi64_si128( x ) ({ int64_t _x = x; xSInt64 _r; asm volatile( "movq %1, %0" : "=x" (_r) : "m" (*&_x)); _r }) 133 134 #define REQUIRED_ADD_sd( _x, _y ) ({ xDouble __x = _x; xDouble __y = _y; asm volatile( "addsd %1, %0" : "+x" (__x ) : "x" (__y)); /*return*/ __x; }) 135 #define REQUIRED_ADD_sdm( _x, _m64 ) ({ xDouble __x = _x; asm volatile( "addsd %1, %0" : "+x" (__x ) : "m" (*(_m64))); /*return*/ __x; }) 136 #define REQUIRED_MULTIPLY_sd( _x, _y ) ({ xDouble __x = _x; xDouble __y = _y; asm volatile( "mulsd %1, %0" : "+x" (__x ) : "x" (__y)); /*return*/ __x; }) 137 #define REQUIRED_MULTIPLY_sdm( _x, _m64 ) ({ xDouble __x = _x; xDouble __y = _y; asm volatile( "mulsd %1, %0" : "+x" (__x ) : "m" (*(_m64))); /*return*/ __x; }) 138 #define REQUIRED_DIVIDE_sd( _x, _y ) ({ xDouble __x = _x; xDouble __y = _y; asm volatile( "divsd %1, %0" : "+x" (__x ) : "x" (__y)); /*return*/ __x; }) 139 #define REQUIRED_DIVIDE_sdm( _x, _m64 ) ({ xDouble __x = _x; asm volatile( "divsd %1, %0" : "+x" (__x ) : "m" (*(_m64))); /*return*/ __x; }) 140 141 #define REQUIRED_ADD_ss( _x, _y ) ({ xFloat __x = _x; xFloat __y = _y; asm volatile( "addss %1, %0" : "+x" (__x ) : "x" (__y)); /*return*/ __x; }) 142 #define REQUIRED_ADD_ssm( _x, _m32 ) ({ xFloat __x = _x; asm volatile( "addss %1, %0" : "+x" (__x ) : "m" (*(_m32))); /*return*/ __x; }) 143 #define REQUIRED_MULTIPLY_ss( _x, _y ) ({ xFloat __x = _x; xFloat __y = _y; asm volatile( "mulss %1, %0" : "+x" (__x ) : "x" (__y)); /*return*/ __x; }) 144 #define REQUIRED_MULTIPLY_ssm( _x, _m32 ) ({ xFloat __x = _x; xFloat __y = _y; asm volatile( "mulss %1, %0" : "+x" (__x ) : "m" (*(_m32))); /*return*/ __x; }) 145 #define REQUIRED_DIVIDE_ss( _x, _y ) ({ xFloat __x = _x; xFloat __y = _y; asm volatile( "divss %1, %0" : "+x" (__x ) : "x" (__y)); /*return*/ __x; }) 146 #define REQUIRED_DIVIDE_ssm( _x, _m32 ) ({ xFloat __x = _x; asm volatile( "divss %1, %0" : "+x" (__x ) : "m" (*(_m32))); /*return*/ __x; }) 147 148 149 #define SET_INVALID_FLAG() { __m128d __x = _mm_setzero_pd(); __asm__ __volatile__( "pcmpeqd %0, %0 \n\t cmpltsd %0, %0" : "+x" (__x) ); } 150 151#if defined( __SSE3__ ) 152 #define CVTTLD_SI64( _ld ) ({ long double _x = _ld; int64_t _r; asm( "fldt %1 \n\t fisttpll %0" : "=m" (*&_r): "m" (*&_x) ); /*return*/ _r; }) 153#else 154 #warning SSE3 disabled 155 static inline int64_t CVTTLD_SI64( long double ld ){ return (int64_t) ld; } 156#endif 157 #define CVTLD_SI64( _ld ) ({ long double _x = _ld; int64_t _r; asm( "fldt %1 \n\t fistpll %0" : "=m" (*&_r): "m" (*&_x) ); /*return*/ _r; }) 158 159 #define EXPECT_TRUE( _a ) __builtin_expect( (_a), 1 ) 160 #define EXPECT_FALSE( _a ) __builtin_expect( (_a), 0 ) 161 162 163#endif 164 165#if defined( XMMLIBM_DEBUG ) 166 #warning always inline disabled 167 #define ALWAYS_INLINE 168#else 169 #define ALWAYS_INLINE __attribute__ ((always_inline)) 170#endif 171 172#endif /* __XMMLIBM_PREFIX_H__ */