this repo has no description
at fixPythonPipStalling 255 lines 11 kB view raw
1 2/* 3 * expl.s 4 * 5 * by Ian Ollmann 6 * 7 * C99 expl for Mac OS X for Intel 8 */ 9 10#include "abi.h" 11#include "machine/asm.h" 12 13.literal4 14inf: .long 0x7f800000 15minf: .long 0xff800000 16 17.const 18.align 4 19maxldbl: .quad 0xffffffffffffffff, 0x7ffe 20minldbl: .quad 0x8000000000000000, 0x0001 21ln2_hi: .quad 0xB17217F7D1D00000, 0x3ffe 22ln2_lo: .quad 0x8654361C4C67FC0D, 0xbFCE 23 24 25#if defined( __i386__ ) 26 #define RELATIVE_ADDR( _a ) ((_a) - 0b)(%ecx) 27#else 28 #define RELATIVE_ADDR( _a ) _a(%rip) 29#endif 30 31.text 32ENTRY( expl ) 33 fldt FRAME_SIZE( STACKP ) // { x } 34 movswl 8+FRAME_SIZE( STACKP ), %eax // sign + exponent 35 movl %eax, %edx // sign + exponent 36 andl $0x7fff, %eax // exponent 37 subl $(16383-64), %eax // subtract out most of the bias -- push expoents less than -64 negative 38 cmpl $(64+14), %eax // if( exponent < -64 || exponent > 14 || isnan(x) 39 ja 2f // goto 2 40 41 fldl2e // { log2(e), x } 42 andl $0x80000000, %edx // signof (x) 43 fmul %st(1), %st(0) // { x * log2(e), x } 44 orl $0x3f000000, %edx // copysign( 0.5f, x ) 45 movl %edx, FRAME_SIZE(STACKP) 46 fadds FRAME_SIZE( STACKP ) // { x * log2(e) + copysign( 0.5, x ), x } 47 48 //truncate the top of stack 49#if defined( __SSE3__ ) 50 fisttpl FRAME_SIZE(STACKP) // { x } 51 movl FRAME_SIZE(STACKP), %edx 52 fildl FRAME_SIZE(STACKP) // { round(2(x * log2(e)), x } 53#else 54 fstpl FRAME_SIZE(STACKP) // { x } 55 cvtsd2si FRAME_SIZE(STACKP), %edx 56 movl %edx, FRAME_SIZE(STACKP) 57 fildl FRAME_SIZE(STACKP) // { round(2(x * log2(e)), x } 58#endif 59 60 // PIC 61#if defined( __i386__ ) 62 call 0f 630: popl %ecx 64#endif 65 66 // reduce x to the fractional part 67 fldt RELATIVE_ADDR(ln2_hi) // { ln(2) hi part, round(2(x * log2(e)), x } 68 fmul %st(1), %st(0) // { ln(2) hi part * round(2(x * log2(e)), round(2(x * log2(e)), x } 69 fsubrp %st(0), %st(2) // { round(2(x * log2(e)), x - log2(e) hi part * round(2(x * log2(e)) } 70 fldt RELATIVE_ADDR(ln2_lo) // { ln2 71 fmul %st(1), %st(0) // { round(2(x * log2(e)) * ln(2) lo part, round(2(x * log2(e)), x - log2(e) hi part * round(2(x * log2(e)) } 72 fsubrp %st(0), %st(2) // { round(2(x * log2(e)), reduced x } 73 fxch // { reduced x, round(2(x * log2(e)) } 74 75 // multiply by log2(e) 76 fldl2e // { log2(e), x - ln(2) hi part * round(2(x * log2(e)) - round(2(x * log2(e)) * ln(2) lo part } 77 fmulp // { log2(e) * reduced x, round(2(x * log2(e)) } 78 79 // calculate 2**(log2(e) * reduced x ) 80 f2xm1 // { 2**f - 1, round(2(x * log2(e)) } 81 fld1 // { 1, 2**f - 1, round(2(x * log2(e)) } 82 faddp // { 2**f, round(2(x * log2(e)) } 83 84 // scale results 85 fscale // { result, , round(2(x * log2(e)) } 86 fstp %st(1) 87 88 // The scale may fail to set underflow. Check to see if we need to do that 89 fldt RELATIVE_ADDR( minldbl ) // { 0x1.0p-16382, result } 90 fucomip %st(1), %st(0) // { 0x1.0p-16382, result } 91 ja 1f 92 93 // Nope! We're done 94 ret 95 961: // set underflow 97 fldt RELATIVE_ADDR( minldbl ) // { 0x1.0p-16382, result } 98 fmul %st(0), %st(0) 99 fstp %st(0) 100 ret 101 102//0x1.p-64 > |x| || 0x1.p14 < |x| || isnan(x) 1032: jg 3f 104 105 // 0x1.p-64 > |x| 106 fld1 // { 1, x } 107 faddp // { 1 + x } 108 ret 109 110 111// 0x1.p14 < |x| || isnan(x) 1123: 113#if defined( __i386__ ) 114 call 0f 1150: popl %ecx 116#endif 117 cmpl $0, %edx // if ( x < 0 ) 118 jl 4f // goto 4 119 120 // 0x1.p14 < x || isnan(x) 121 fldt RELATIVE_ADDR(maxldbl) // { LDBL_MAX, x } 122 fmulp // { x * LDBL_MAX } set overflow, Inf or quiet nan 123 ret 124 125 126 // -0x1.p14 > x || isnan(x) 1274: flds RELATIVE_ADDR( minf ) // { -inf, x } set overflow, Inf or quiet nan 128 fucomip %st(1), %st(0) // { x } 129 je 6f 130 131 // set underflow, return 0 1325: fldt RELATIVE_ADDR( minldbl ) // { LDBL_MIN, x } 133 fmul %st(0), %st(1) // { LDBL_MIN, LDBL_MIN * x } 134 fmul %st(0), %st(1) // { LDBL_MIN, LDBL_MIN * x } 135 fchs // { -LDBL_MIN, LDBL_MIN * x } 136 fmulp // { -LDBL_MIN * (LDBL_MIN * x) } 137 ret 138 139 140 // x == -inf or nan 1416: jp 5b 142 143 // x == -inf 144 fstp %st(0) 145 fldz 146 ret 147 148 149 150 151ENTRY( expl$fenv_access_off ) 152 fldt FRAME_SIZE( STACKP ) // { x } 153 movswl 8+FRAME_SIZE( STACKP ), %eax // sign + exponent 154 movl %eax, %edx // sign + exponent 155 andl $0x7fff, %eax // exponent 156 subl $(16383-64), %eax // subtract out most of the bias -- push expoents less than -64 negative 157 cmpl $(64+14), %eax // if( exponent < -64 || exponent > 14 || isnan(x) 158 ja 2f // goto 2 159 160 fldl2e // { log2(e), x } 161 andl $0x80000000, %edx // signof (x) 162 fmul %st(1), %st(0) // { x * log2(e), x } 163 orl $0x3f000000, %edx // copysign( 0.5f, x ) 164 movl %edx, FRAME_SIZE(STACKP) 165 fadds FRAME_SIZE( STACKP ) // { x * log2(e) + copysign( 0.5, x ), x } 166 167 //truncate the top of stack 168#if defined( __SSE3__ ) 169 fisttpl FRAME_SIZE(STACKP) // { x } 170 movl FRAME_SIZE(STACKP), %edx 171 fildl FRAME_SIZE(STACKP) // { round(2(x * log2(e)), x } 172#else 173 fstpl FRAME_SIZE(STACKP) // { x } 174 cvtsd2si FRAME_SIZE(STACKP), %edx 175 movl %edx, FRAME_SIZE(STACKP) 176 fildl FRAME_SIZE(STACKP) // { round(2(x * log2(e)), x } 177#endif 178 179 // PIC 180#if defined( __i386__ ) 181 call 0f 1820: popl %ecx 183#endif 184 185 // reduce x to the fractional part 186 flds RELATIVE_ADDR(ln2_hi) // { ln(2) hi part, round(2(x * log2(e)), x } 187 fmul %st(1), %st(0) // { ln(2) hi part * round(2(x * log2(e)), round(2(x * log2(e)), x } 188 fsubrp %st(0), %st(2) // { round(2(x * log2(e)), x - log2(e) hi part * round(2(x * log2(e)) } 189 fldt RELATIVE_ADDR(ln2_lo) // { ln2 190 fmul %st(1), %st(0) // { round(2(x * log2(e)) * ln(2) lo part, round(2(x * log2(e)), x - log2(e) hi part * round(2(x * log2(e)) } 191 fsubrp %st(0), %st(2) // { round(2(x * log2(e)), reduced x } 192 fxch // { reduced x, round(2(x * log2(e)) } 193 194 // multiply by log2(e) 195 fldl2e // { log2(e), x - ln(2) hi part * round(2(x * log2(e)) - round(2(x * log2(e)) * ln(2) lo part } 196 fmulp // { log2(e) * reduced x, round(2(x * log2(e)) } 197 198 // calculate 2**(log2(e) * reduced x ) 199 f2xm1 // { 2**f - 1, round(2(x * log2(e)) } 200 fld1 // { 1, 2**f - 1, round(2(x * log2(e)) } 201 faddp // { 2**f, round(2(x * log2(e)) } 202 203 // scale results 204 fscale // { result, , round(2(x * log2(e)) } 205 fstp %st(1) 206 ret 207 208 209//0x1.p-64 > |x| || 0x1.p14 < |x| || isnan(x) 2102: jg 3f 211 212 // 0x1.p-64 > |x| 213 fld1 // { 1, x } 214 faddp // { 1 + x } 215 ret 216 217 218// 0x1.p14 < |x| || isnan(x) 2193: 220#if defined( __i386__ ) 221 call 0f 2220: popl %ecx 223#endif 224 cmpl $0, %edx // if ( x < 0 ) 225 jl 4f // goto 4 226 227 // 0x1.p14 < x || isnan(x) 228 fldt RELATIVE_ADDR(maxldbl) // { LDBL_MAX, x } 229 fmulp // { x * LDBL_MAX } set overflow, Inf or quiet nan 230 ret 231 232 233 // -0x1.p14 > x || isnan(x) 2344: flds RELATIVE_ADDR( minf ) // { -inf, x } set overflow, Inf or quiet nan 235 fucomip %st(1), %st(0) // { x } 236 je 6f 237 238 // set underflow, return 0 2395: fldt RELATIVE_ADDR( minldbl ) // { LDBL_MIN, x } 240 fmul %st(0), %st(1) // { LDBL_MIN, LDBL_MIN * x } 241 fmul %st(0), %st(1) // { LDBL_MIN, LDBL_MIN * x } 242 fchs // { -LDBL_MIN, LDBL_MIN * x } 243 fmulp // { -LDBL_MIN * (LDBL_MIN * x) } 244 ret 245 246 247 // x == -inf or nan 2486: jp 5b 249 250 // x == -inf 251 fstp %st(0) 252 fldz 253 ret 254 255