this repo has no description
1
2/*
3 * lround.s
4 *
5 * by Ian Ollmann
6 *
7 * Copyright (c) 2007, Apple Inc. All Rights Reserved.
8 *
9 * Implementation of C99 lround and llround functions for i386 and x86_64.
10 */
11
12#include <machine/asm.h>
13#include "abi.h"
14
15.literal8
16half: .quad 0x3fe0000000000000 // 0.5
17mtwo63: .quad 0xc3e0000000000000 // -2**63
18mtwo31: .quad 0xc1e0000000000000 // -2**31
19two52: .quad 0x4330000000000000 // 2**52
20twom32: .quad 0x3df0000000000000 // 2**-32
21implicit: .quad 0x8000000000000000
22cutoff: .double 2147483647.5 // 2**31-0.5
23mcutoff: .double -2147483648.5 // 2**31-0.5
24
25.text
26#if defined( __x86_64__ )
27ENTRY( lround )
28ENTRY( llround )
29 movd %xmm0, %rax
30 movq %rax, %rdx
31 shrq $52, %rax
32 andq $0x7ff, %rax // exponent + bias
33 subq $0x3fe, %rax // push exponent < -1 to negative
34 cmpq $64, %rax // if( |x| >= 0x1.0p63 || |x| < 0.5 || isnan(x) )
35 jae 1f // goto 1
36
37 // 0.5 <= |x| < 0x1.0p63
38 shlq $11, %rdx // put most significant bit in leading position
39 orq implicit(%rip), %rdx // make implicit bit explicit
40
41 // shift value right so that the integer bit is at position 1
42 movq $63, %rcx
43 subq %rax, %rcx // 63 - (exponent+1)
44 shrq %cl, %rdx
45
46 addq $1, %rdx // round away from zero
47 shrq $1, %rdx // move unit bit to correct position
48
49 // Fix sign
50 movd %xmm0, %rax
51 sarq $63, %rax
52 movq %rax, %rcx
53 xorq %rdx, %rax
54 subq %rcx, %rax
55
56 // set inexact as necessary
57 cvttsd2si %xmm0, %rdx
58 ret
59
60// |x| >= 0x1.0p63 || |x| < 0.5 || isnan(x)
611: jge 2f
62
63 // |x| < 0.5
64 cvttsd2si %xmm0, %rax // set invalid (as necessary), prepare 0x8000000000000000
65 ret
66
67 // |x| >= 0x1.0p63 || isnan(x)
682: ucomisd %xmm0, %xmm0
69 cvttsd2si %xmm0, %rax // set invalid (as necessary), prepare 0x8000000000000000
70 jp 3f
71 negq %rdx
72 sarq $63, %rdx
73 xorq %rdx, %rax
743: ret
75
76
77#else /* __i386__ */
78
79ENTRY( llround )
80 movl 4+FRAME_SIZE( STACKP ), %eax // x.hi
81 movsd FRAME_SIZE( STACKP ), %xmm0 // x
82 andl $0x7fffffff, %eax // |x|.hi
83 subl $0x3fe00000, %eax // push exponent - 1 to negative
84 call 0f
850: popl %ecx
86 cmpl $((63+1)<<20), %eax // if( |x| >= 0x1.0p63 || |x| < 0.5 || isnan(x) )
87 jae 1f // goto 1
88
89 // 0.5 <= |x| < 0x1.0p63
90 movq (implicit-0b)(%ecx), %xmm1
91 movapd %xmm0, %xmm7
92 shrl $20, %eax // move exponent+1 to units position
93 psllq $11, %xmm0 // put the most significant bit in the leading position
94 movl $63, %edx
95 orpd %xmm1, %xmm0 // make implicit bit explicit
96 subl %eax, %edx // subtract exponent+1 from 63
97 movd %edx, %xmm2 // move to xmm
98 psrlq %xmm2, %xmm0 // shift units bit to units+1 position
99 pcmpeqb %xmm3, %xmm3 // -1LL
100 psubq %xmm3, %xmm0 // round away from zero
101 psrlq $1, %xmm0 // shift units bit to units position
102
103 //fix sign
104 movsd (two52-0b)(%ecx ), %xmm4 // 2**52
105 xorpd %xmm6, %xmm6 // 0
106 cmpnltsd %xmm7, %xmm6 // x < 0 ? -1LL : 0
107 pxor %xmm6, %xmm0
108 psubq %xmm6, %xmm0
109
110 // move result to GPR
111 movd %xmm0, %eax
112 psrlq $32, %xmm0
113 movd %xmm0, %edx
114
115 //set inexact
116 andnpd %xmm7, %xmm1 // |x|
117 minsd %xmm4, %xmm1 // min( |x|, 0x1.0p52 ) -- avoid spurious inexact for |x| > 0x1.0p52
118 addsd %xmm4, %xmm1 // add 0x1.0p52, set inexact
119
120 ret
121
122// |x| >= 0x1.0p63 || |x| < 0.5 || isnan(x)
1231: jge 2f
124
125 // |x| < 0.5
126 cvttsd2si %xmm0, %eax
127 xorl %edx, %edx
128 ret
129
1302: xorl %eax, %eax
131 ucomisd (mtwo63-0b)(%ecx), %xmm0 // special case for -0x1.0p63, nan
132 je 3f
133
134 //overflow
135 xorpd %xmm1, %xmm1
136 cmpltsd %xmm0, %xmm1
137 movd %xmm1, %eax
138 cvttsd2si %xmm0, %edx
139 xorl %eax, %edx
140 ret
141
142// special case for -0x1.0p63
1433: jp 4f //nans end up here, so get rid of them
144 movl $0x80000000, %edx
145 xorl %eax, %eax
146 ret
147
148//nan
1494: cvttsd2si %xmm0, %edx
150 ret
151
152
153ENTRY( lround )
154 movl 4+FRAME_SIZE( STACKP ), %eax // x.hi
155 movsd FRAME_SIZE( STACKP ), %xmm0 // x
156 andl $0x7fffffff, %eax // |x|.hi
157 subl $0x3fe00000, %eax // push exponent - 1 to negative
158 call 0f
1590: popl %ecx
160 movq (implicit-0b)(%ecx), %xmm1
161 cmpl $((31+1)<<20), %eax // if( |x| >= 0x1.0p63 || |x| < 0.5 || isnan(x) )
162 jae 1f // goto 1
163
164 // weed out positive overflow cases
165 ucomisd (cutoff-0b)(%ecx), %xmm0 // if( x >= 0x1.0p31-0.5 )
166 jae 3f
167
168 // 0.5 <= |x| < 0x1.0p31
169 movapd %xmm0, %xmm7
170 shrl $20, %eax // move exponent+1 to units position
171 psllq $11, %xmm0 // put the most significant bit in the leading position
172 movl $63, %edx
173 orpd %xmm1, %xmm0 // make implicit bit explicit
174 subl %eax, %edx // subtract exponent+1 from 63
175 movd %edx, %xmm2 // move to xmm
176 psrlq %xmm2, %xmm0 // shift units bit to units+1 position
177 pcmpeqb %xmm3, %xmm3 // -1LL
178 psubq %xmm3, %xmm0 // round away from zero
179 psrlq $1, %xmm0 // shift units bit to units position
180
181 //fix sign
182 movl 4+FRAME_SIZE( STACKP ), %edx
183 movd %xmm0, %eax
184 sarl $31, %edx
185 xorl %edx, %eax
186 subl %edx, %eax
187
188 //set inexact
189 andnpd %xmm7, %xmm1 // |x|
190 addsd (two52-0b)(%ecx ), %xmm1 // |x| += 0x1.0p52, set inexact
191 ret
192
193// |x| >= 0x1.0p31 || |x| < 0.5 || isnan(x)
1941: andnpd %xmm0, %xmm1 // |x|
195 ucomisd (mcutoff-0b)(%ecx), %xmm0 // special case for -0x1.0p31, nan
196 jbe 2f
197
198 // weed out positive overflow cases
199 ucomisd (cutoff-0b)(%ecx), %xmm0 // if( x >= 0x1.0p31-0.5 )
200 jae 3f
201
202 // -0x1.0p63-0.5 < x <= -0x1.063 || |x| < 0.5
203 cvttsd2si %xmm0, %eax
204 ret
205
206// negative overflow cases and nan
2072: pcmpeqb %xmm0, %xmm0 // nan
208 cvttsd2si %xmm0, %eax // set invalid
209 ret
210
211// positive overflow cases
2123: movl $0x7fffffff, %eax // result is 0x7fffffff
213 pcmpeqb %xmm0, %xmm0 // nan
214 cvttsd2si %xmm0, %edx // set invalid
215 ret
216
217#endif