this repo has no description
at fixPythonPipStalling 284 lines 9.1 kB view raw
1 2/* 3 * lroundl.s 4 * 5 * by Ian Ollmann 6 * 7 * Apple Inc. Copyright (c) 2007. All rights reserved. 8 * 9 */ 10 11#include "abi.h" 12#include <machine/asm.h> 13 14.align 2 15.literal4 16two63: .long 0x5f000000 17mtwo63: .long 0xdf000000 18one: .long 1 19inf: .long 0x7f800000 20 21.align 3 22.literal8 23cutoff32: .double 2147483647.5 // 2**31-0.5 24mcutoff32: .double -2147483648.5 // 2**31-0.5 25 26 27.align 4 28.literal16 29cutoff: .quad 0xffffffffffffffff, 0x403d 30sign: .quad 0x0, 0xffffffffffffffff 31 32.text 33#if defined( __x86_64__ ) 34 35ENTRY( lroundl ) 36ENTRY( llroundl ) 37 movswl 8+FRAME_SIZE( STACKP ), %edx 38 andl $0x7fff, %edx // exponent of x 39 movq FRAME_SIZE( STACKP ), %rax 40 subl $0x3ffe, %edx // push exponents less than -1 negative 41 fldt FRAME_SIZE( STACKP ) // { x } 42 cmpl $(63+1), %edx // if( |x| >= 0x1.0p63 || |x| < 0.5 || isnan(x) ) 43 jae 1f // goto 1 44 45 // 0.5 <= |x| < 0x1.0p63 46 fldt cutoff( %rip ) // { 0x1.0p63 - 0.5, x } 47 fucomip %st(1), %st(0) // { x } 48 je 3f 49 50 //Shift the significand right so that units bit is at units + 1 position 51 movl $63, %ecx 52 subl %edx, %ecx // 63 - (exponent+1) 53 shrq %cl, %rax // shift so that the units bit is at the +1 position 54 movq %rax, %rdx // set aside a copy 55 shrq $1, %rax // finish the shift with shift right by 1 bit -- we need to do 64-bit shifts here at times and not possible with ISA 56 andq $1, %rdx // isolate the leading fractional bit 57 addq %rdx, %rax // round the result up. 58 59 //fix sign 60 movswq 8+FRAME_SIZE( STACKP ), %rdx // read the sign + exponent 61 sarq $16, %rdx // remove exponent 62 xorq %rdx, %rax // flip the sign of the result 63 subq %rdx, %rax // correct for 2's complement 64 65 // set inexact as necessary 66 fabs // { |x| } 67 fadds two63(%rip) // { |x| + 0x1.0p63 } set inexact as necessary 68 fstp %st(0) // throw away numerical result. 69 70 ret 71 72// |x| >= 0x1.0p63 || |x| < 0.5 || isnan(x) 731: jge 2f 74 75 // |x| < 0.5 76 xorq %rax, %rax 77 fistpl FRAME_SIZE( STACKP ) // set inexact as necessary 78 ret 79 80// |x| >= 0x1.0p63 || isnan(x) 812: movswq 8+FRAME_SIZE( STACKP ), %rdx 82 flds mtwo63( %rip ) 83 fucomip %st(1), %st(0) 84 je 4f 85 fistpl FRAME_SIZE( STACKP ) // set invalid 86 shrq $63, %rdx 87 subq $1, %rdx 88 movq $0x8000000000000000, %rax 89 xorq %rdx, %rax 90 ret 91 92 93// 0x1.0p63 - 0.5, positive overflow 943: fistpl FRAME_SIZE( STACKP ) // set invalid 95 movq $0x7fffffffffffffff, %rax 96 ret 97 98// -0x1.0p63 or nan 994: jp 5f 100 fstp %st(0) 101 movq $0x8000000000000000, %rax 102 ret 103 104// nan 1055: fistpl FRAME_SIZE( STACKP ) // set invalid 106 movq $0x8000000000000000, %rax 107 ret 108 109#else 110 111ENTRY( lroundl ) 112 movswl 8+FRAME_SIZE( STACKP ), %edx 113 andl $0x7fff, %edx // exponent of x 114 movl 4+FRAME_SIZE( STACKP ), %eax 115 subl $0x3ffe, %edx // push exponents less than -1 negative 116 fldt FRAME_SIZE( STACKP ) // { x } 117 cmpl $(31+1), %edx // if( |x| >= 0x1.0p31 || |x| < 0.5 || isnan(x) ) 118 jae 1f // goto 1 119 120 // 121 call 0f 1220: popl %ecx 123 fldl (cutoff32-0b)(%ecx) 124 fucomip %st(1), %st(0) 125 jbe 3f 126 127 // set inexact 128 fabs 129 fadds (two63-0b)(%ecx) 130 fstp %st(0) 131 132 // round 133 movl $31, %ecx 134 subl %edx, %ecx 135 shrl %cl, %eax 136 movl %eax, %edx 137 shrl $1, %eax 138 andl $1, %edx 139 addl %edx, %eax 140 141 // fix sign 142 movswl 8+FRAME_SIZE( STACKP ), %edx 143 sarl $16, %edx 144 xorl %edx, %eax 145 subl %edx, %eax 146 ret 147 1481: jge 2f 149 150 // |x| < 0.5 151 xorl %eax, %eax 152 fistpl FRAME_SIZE( STACKP ) // set inexact as necessary 153 ret 154 1552: movswl 8+FRAME_SIZE( STACKP), %edx 156 call 0f 1570: popl %ecx 158 fldl ( mcutoff32-0b)(%ecx) 159 fucomip %st(1), %st(0) 160 jae 4f 161 fldl ( cutoff32-0b )(%ecx) 162 fucomip %st(1), %st(0) 163 jbe 3f 164 165 // non overflowing result 166 shrl $31, %edx 167 subl $1, %edx 168 movl $0x80000000, %eax 169 xorl %edx, %eax 170 171 //set inexact 172 fabs 173 fadds (two63-0b)(%ecx) 174 fstp %st(0) 175 ret 176 177// positive overflow 1783: jp 5f 179 fistps FRAME_SIZE( STACKP ) 180 movl $0x7fffffff, %eax 181 ret 182 183// negative overflow 1844: fistps FRAME_SIZE( STACKP ) 185 movl $0x80000000, %eax 186 ret 187 188// nan 1895: fistpl FRAME_SIZE( STACKP ) 190 movl $0x80000000, %eax 191 ret 192 193 194ENTRY( llroundl ) 195 movswl 8+FRAME_SIZE( STACKP ), %edx 196 andl $0x7fff, %edx // exponent of x 197 movq FRAME_SIZE( STACKP ), %xmm0 198 subl $0x3ffe, %edx // push exponents less than -1 negative 199 fldt FRAME_SIZE( STACKP ) // { x } 200 cmpl $(63+1), %edx // if( |x| >= 0x1.0p63 || |x| < 0.5 || isnan(x) ) 201 jae 1f // goto 1 202 203 call 0f 2040: popl %ecx 205 206 // 0.5 <= |x| < 0x1.0p63 207 fldt (cutoff-0b)( %ecx ) // { 0x1.0p63 - 0.5, x } 208 fucomip %st(1), %st(0) // { x } 209 je 3f 210 211 //Shift the significand right so that units bit is at units + 1 position 212 movl $63, %eax 213 movd (one-0b)(%ecx), %xmm2 // 1 214 subl %edx, %eax // 63 - (exponent+1) 215 movd %eax, %xmm1 216 psrlq %xmm1, %xmm0 // shift so that the units bit is at the +1 position 217 movq %xmm0, %xmm1 // set aside a copy 218 psrlq $1, %xmm0 // finish the shift with shift right by 1 bit -- we need to do 64-bit shifts here at times and not possible with ISA 219 pand %xmm2, %xmm1 // isolate the leading fractional bit 220 paddq %xmm1, %xmm0 // round the result up. 221 222 // set inexact as necessary 223 fabs // { |x| } 224 fadds (two63-0b)(%ecx) // { |x| + 0x1.0p63 } set inexact as necessary 225 fstp %st(0) // throw away numerical result. 226 227 //fix sign 228 movswl 8+FRAME_SIZE( STACKP ), %eax // read the sign + exponent 229 shrl $31, %eax // remove exponent 230 movq (sign-0b)(%ecx, %eax,8), %xmm1 231 pxor %xmm1, %xmm0 232 psubq %xmm1, %xmm0 233 movd %xmm0, %eax 234 psrlq $32, %xmm0 235 movd %xmm0, %edx 236 237 ret 238 239// |x| >= 0x1.0p63 || |x| < 0.5 || isnan(x) 2401: jge 2f 241 242 // |x| < 0.5 243 xorl %eax, %eax 244 xorl %edx, %edx 245 fistpl FRAME_SIZE( STACKP ) // set inexact as necessary 246 ret 247 248// |x| >= 0x1.0p63 || isnan(x) 2492: movswl 8+FRAME_SIZE( STACKP ), %eax 250 call 0f 2510: popl %ecx 252 flds (mtwo63-0b)( %ecx ) 253 fucomip %st(1), %st(0) 254 je 4f 255 fistpl FRAME_SIZE( STACKP ) // set invalid 256 shrl $31, %eax 257 subl $1, %eax 258 movl $0x80000000, %edx 259 xorl %eax, %edx 260 ret 261 262 263// 0x1.0p63 - 0.5, positive overflow 2643: fistpl FRAME_SIZE( STACKP ) // set invalid 265 movl $-1, %eax 266 movl $0x7fffffff, %edx 267 ret 268 269// -0x1.0p63 or nan 2704: jp 5f 271 fstp %st(0) 272 movl $0x80000000, %edx 273 xorl %eax, %eax 274 ret 275 276// nan 2775: fistpl FRAME_SIZE( STACKP ) // set invalid 278 movl $0x80000000, %edx 279 xorl %eax, %eax 280 ret 281 282 283 284#endif