this repo has no description
at fixPythonPipStalling 217 lines 8.0 kB view raw
1 2/* 3 * lround.s 4 * 5 * by Ian Ollmann 6 * 7 * Copyright (c) 2007, Apple Inc. All Rights Reserved. 8 * 9 * Implementation of C99 lround and llround functions for i386 and x86_64. 10 */ 11 12#include <machine/asm.h> 13#include "abi.h" 14 15.literal8 16half: .quad 0x3fe0000000000000 // 0.5 17mtwo63: .quad 0xc3e0000000000000 // -2**63 18mtwo31: .quad 0xc1e0000000000000 // -2**31 19two52: .quad 0x4330000000000000 // 2**52 20twom32: .quad 0x3df0000000000000 // 2**-32 21implicit: .quad 0x8000000000000000 22cutoff: .double 2147483647.5 // 2**31-0.5 23mcutoff: .double -2147483648.5 // 2**31-0.5 24 25.text 26#if defined( __x86_64__ ) 27ENTRY( lround ) 28ENTRY( llround ) 29 movd %xmm0, %rax 30 movq %rax, %rdx 31 shrq $52, %rax 32 andq $0x7ff, %rax // exponent + bias 33 subq $0x3fe, %rax // push exponent < -1 to negative 34 cmpq $64, %rax // if( |x| >= 0x1.0p63 || |x| < 0.5 || isnan(x) ) 35 jae 1f // goto 1 36 37 // 0.5 <= |x| < 0x1.0p63 38 shlq $11, %rdx // put most significant bit in leading position 39 orq implicit(%rip), %rdx // make implicit bit explicit 40 41 // shift value right so that the integer bit is at position 1 42 movq $63, %rcx 43 subq %rax, %rcx // 63 - (exponent+1) 44 shrq %cl, %rdx 45 46 addq $1, %rdx // round away from zero 47 shrq $1, %rdx // move unit bit to correct position 48 49 // Fix sign 50 movd %xmm0, %rax 51 sarq $63, %rax 52 movq %rax, %rcx 53 xorq %rdx, %rax 54 subq %rcx, %rax 55 56 // set inexact as necessary 57 cvttsd2si %xmm0, %rdx 58 ret 59 60// |x| >= 0x1.0p63 || |x| < 0.5 || isnan(x) 611: jge 2f 62 63 // |x| < 0.5 64 cvttsd2si %xmm0, %rax // set invalid (as necessary), prepare 0x8000000000000000 65 ret 66 67 // |x| >= 0x1.0p63 || isnan(x) 682: ucomisd %xmm0, %xmm0 69 cvttsd2si %xmm0, %rax // set invalid (as necessary), prepare 0x8000000000000000 70 jp 3f 71 negq %rdx 72 sarq $63, %rdx 73 xorq %rdx, %rax 743: ret 75 76 77#else /* __i386__ */ 78 79ENTRY( llround ) 80 movl 4+FRAME_SIZE( STACKP ), %eax // x.hi 81 movsd FRAME_SIZE( STACKP ), %xmm0 // x 82 andl $0x7fffffff, %eax // |x|.hi 83 subl $0x3fe00000, %eax // push exponent - 1 to negative 84 call 0f 850: popl %ecx 86 cmpl $((63+1)<<20), %eax // if( |x| >= 0x1.0p63 || |x| < 0.5 || isnan(x) ) 87 jae 1f // goto 1 88 89 // 0.5 <= |x| < 0x1.0p63 90 movq (implicit-0b)(%ecx), %xmm1 91 movapd %xmm0, %xmm7 92 shrl $20, %eax // move exponent+1 to units position 93 psllq $11, %xmm0 // put the most significant bit in the leading position 94 movl $63, %edx 95 orpd %xmm1, %xmm0 // make implicit bit explicit 96 subl %eax, %edx // subtract exponent+1 from 63 97 movd %edx, %xmm2 // move to xmm 98 psrlq %xmm2, %xmm0 // shift units bit to units+1 position 99 pcmpeqb %xmm3, %xmm3 // -1LL 100 psubq %xmm3, %xmm0 // round away from zero 101 psrlq $1, %xmm0 // shift units bit to units position 102 103 //fix sign 104 movsd (two52-0b)(%ecx ), %xmm4 // 2**52 105 xorpd %xmm6, %xmm6 // 0 106 cmpnltsd %xmm7, %xmm6 // x < 0 ? -1LL : 0 107 pxor %xmm6, %xmm0 108 psubq %xmm6, %xmm0 109 110 // move result to GPR 111 movd %xmm0, %eax 112 psrlq $32, %xmm0 113 movd %xmm0, %edx 114 115 //set inexact 116 andnpd %xmm7, %xmm1 // |x| 117 minsd %xmm4, %xmm1 // min( |x|, 0x1.0p52 ) -- avoid spurious inexact for |x| > 0x1.0p52 118 addsd %xmm4, %xmm1 // add 0x1.0p52, set inexact 119 120 ret 121 122// |x| >= 0x1.0p63 || |x| < 0.5 || isnan(x) 1231: jge 2f 124 125 // |x| < 0.5 126 cvttsd2si %xmm0, %eax 127 xorl %edx, %edx 128 ret 129 1302: xorl %eax, %eax 131 ucomisd (mtwo63-0b)(%ecx), %xmm0 // special case for -0x1.0p63, nan 132 je 3f 133 134 //overflow 135 xorpd %xmm1, %xmm1 136 cmpltsd %xmm0, %xmm1 137 movd %xmm1, %eax 138 cvttsd2si %xmm0, %edx 139 xorl %eax, %edx 140 ret 141 142// special case for -0x1.0p63 1433: jp 4f //nans end up here, so get rid of them 144 movl $0x80000000, %edx 145 xorl %eax, %eax 146 ret 147 148//nan 1494: cvttsd2si %xmm0, %edx 150 ret 151 152 153ENTRY( lround ) 154 movl 4+FRAME_SIZE( STACKP ), %eax // x.hi 155 movsd FRAME_SIZE( STACKP ), %xmm0 // x 156 andl $0x7fffffff, %eax // |x|.hi 157 subl $0x3fe00000, %eax // push exponent - 1 to negative 158 call 0f 1590: popl %ecx 160 movq (implicit-0b)(%ecx), %xmm1 161 cmpl $((31+1)<<20), %eax // if( |x| >= 0x1.0p63 || |x| < 0.5 || isnan(x) ) 162 jae 1f // goto 1 163 164 // weed out positive overflow cases 165 ucomisd (cutoff-0b)(%ecx), %xmm0 // if( x >= 0x1.0p31-0.5 ) 166 jae 3f 167 168 // 0.5 <= |x| < 0x1.0p31 169 movapd %xmm0, %xmm7 170 shrl $20, %eax // move exponent+1 to units position 171 psllq $11, %xmm0 // put the most significant bit in the leading position 172 movl $63, %edx 173 orpd %xmm1, %xmm0 // make implicit bit explicit 174 subl %eax, %edx // subtract exponent+1 from 63 175 movd %edx, %xmm2 // move to xmm 176 psrlq %xmm2, %xmm0 // shift units bit to units+1 position 177 pcmpeqb %xmm3, %xmm3 // -1LL 178 psubq %xmm3, %xmm0 // round away from zero 179 psrlq $1, %xmm0 // shift units bit to units position 180 181 //fix sign 182 movl 4+FRAME_SIZE( STACKP ), %edx 183 movd %xmm0, %eax 184 sarl $31, %edx 185 xorl %edx, %eax 186 subl %edx, %eax 187 188 //set inexact 189 andnpd %xmm7, %xmm1 // |x| 190 addsd (two52-0b)(%ecx ), %xmm1 // |x| += 0x1.0p52, set inexact 191 ret 192 193// |x| >= 0x1.0p31 || |x| < 0.5 || isnan(x) 1941: andnpd %xmm0, %xmm1 // |x| 195 ucomisd (mcutoff-0b)(%ecx), %xmm0 // special case for -0x1.0p31, nan 196 jbe 2f 197 198 // weed out positive overflow cases 199 ucomisd (cutoff-0b)(%ecx), %xmm0 // if( x >= 0x1.0p31-0.5 ) 200 jae 3f 201 202 // -0x1.0p63-0.5 < x <= -0x1.063 || |x| < 0.5 203 cvttsd2si %xmm0, %eax 204 ret 205 206// negative overflow cases and nan 2072: pcmpeqb %xmm0, %xmm0 // nan 208 cvttsd2si %xmm0, %eax // set invalid 209 ret 210 211// positive overflow cases 2123: movl $0x7fffffff, %eax // result is 0x7fffffff 213 pcmpeqb %xmm0, %xmm0 // nan 214 cvttsd2si %xmm0, %edx // set invalid 215 ret 216 217#endif