this repo has no description
1
2/*
3 * lroundl.s
4 *
5 * by Ian Ollmann
6 *
7 * Apple Inc. Copyright (c) 2007. All rights reserved.
8 *
9 */
10
11#include "abi.h"
12#include <machine/asm.h>
13
14.align 2
15.literal4
16two63: .long 0x5f000000
17mtwo63: .long 0xdf000000
18one: .long 1
19inf: .long 0x7f800000
20
21.align 3
22.literal8
23cutoff32: .double 2147483647.5 // 2**31-0.5
24mcutoff32: .double -2147483648.5 // 2**31-0.5
25
26
27.align 4
28.literal16
29cutoff: .quad 0xffffffffffffffff, 0x403d
30sign: .quad 0x0, 0xffffffffffffffff
31
32.text
33#if defined( __x86_64__ )
34
35ENTRY( lroundl )
36ENTRY( llroundl )
37 movswl 8+FRAME_SIZE( STACKP ), %edx
38 andl $0x7fff, %edx // exponent of x
39 movq FRAME_SIZE( STACKP ), %rax
40 subl $0x3ffe, %edx // push exponents less than -1 negative
41 fldt FRAME_SIZE( STACKP ) // { x }
42 cmpl $(63+1), %edx // if( |x| >= 0x1.0p63 || |x| < 0.5 || isnan(x) )
43 jae 1f // goto 1
44
45 // 0.5 <= |x| < 0x1.0p63
46 fldt cutoff( %rip ) // { 0x1.0p63 - 0.5, x }
47 fucomip %st(1), %st(0) // { x }
48 je 3f
49
50 //Shift the significand right so that units bit is at units + 1 position
51 movl $63, %ecx
52 subl %edx, %ecx // 63 - (exponent+1)
53 shrq %cl, %rax // shift so that the units bit is at the +1 position
54 movq %rax, %rdx // set aside a copy
55 shrq $1, %rax // finish the shift with shift right by 1 bit -- we need to do 64-bit shifts here at times and not possible with ISA
56 andq $1, %rdx // isolate the leading fractional bit
57 addq %rdx, %rax // round the result up.
58
59 //fix sign
60 movswq 8+FRAME_SIZE( STACKP ), %rdx // read the sign + exponent
61 sarq $16, %rdx // remove exponent
62 xorq %rdx, %rax // flip the sign of the result
63 subq %rdx, %rax // correct for 2's complement
64
65 // set inexact as necessary
66 fabs // { |x| }
67 fadds two63(%rip) // { |x| + 0x1.0p63 } set inexact as necessary
68 fstp %st(0) // throw away numerical result.
69
70 ret
71
72// |x| >= 0x1.0p63 || |x| < 0.5 || isnan(x)
731: jge 2f
74
75 // |x| < 0.5
76 xorq %rax, %rax
77 fistpl FRAME_SIZE( STACKP ) // set inexact as necessary
78 ret
79
80// |x| >= 0x1.0p63 || isnan(x)
812: movswq 8+FRAME_SIZE( STACKP ), %rdx
82 flds mtwo63( %rip )
83 fucomip %st(1), %st(0)
84 je 4f
85 fistpl FRAME_SIZE( STACKP ) // set invalid
86 shrq $63, %rdx
87 subq $1, %rdx
88 movq $0x8000000000000000, %rax
89 xorq %rdx, %rax
90 ret
91
92
93// 0x1.0p63 - 0.5, positive overflow
943: fistpl FRAME_SIZE( STACKP ) // set invalid
95 movq $0x7fffffffffffffff, %rax
96 ret
97
98// -0x1.0p63 or nan
994: jp 5f
100 fstp %st(0)
101 movq $0x8000000000000000, %rax
102 ret
103
104// nan
1055: fistpl FRAME_SIZE( STACKP ) // set invalid
106 movq $0x8000000000000000, %rax
107 ret
108
109#else
110
111ENTRY( lroundl )
112 movswl 8+FRAME_SIZE( STACKP ), %edx
113 andl $0x7fff, %edx // exponent of x
114 movl 4+FRAME_SIZE( STACKP ), %eax
115 subl $0x3ffe, %edx // push exponents less than -1 negative
116 fldt FRAME_SIZE( STACKP ) // { x }
117 cmpl $(31+1), %edx // if( |x| >= 0x1.0p31 || |x| < 0.5 || isnan(x) )
118 jae 1f // goto 1
119
120 //
121 call 0f
1220: popl %ecx
123 fldl (cutoff32-0b)(%ecx)
124 fucomip %st(1), %st(0)
125 jbe 3f
126
127 // set inexact
128 fabs
129 fadds (two63-0b)(%ecx)
130 fstp %st(0)
131
132 // round
133 movl $31, %ecx
134 subl %edx, %ecx
135 shrl %cl, %eax
136 movl %eax, %edx
137 shrl $1, %eax
138 andl $1, %edx
139 addl %edx, %eax
140
141 // fix sign
142 movswl 8+FRAME_SIZE( STACKP ), %edx
143 sarl $16, %edx
144 xorl %edx, %eax
145 subl %edx, %eax
146 ret
147
1481: jge 2f
149
150 // |x| < 0.5
151 xorl %eax, %eax
152 fistpl FRAME_SIZE( STACKP ) // set inexact as necessary
153 ret
154
1552: movswl 8+FRAME_SIZE( STACKP), %edx
156 call 0f
1570: popl %ecx
158 fldl ( mcutoff32-0b)(%ecx)
159 fucomip %st(1), %st(0)
160 jae 4f
161 fldl ( cutoff32-0b )(%ecx)
162 fucomip %st(1), %st(0)
163 jbe 3f
164
165 // non overflowing result
166 shrl $31, %edx
167 subl $1, %edx
168 movl $0x80000000, %eax
169 xorl %edx, %eax
170
171 //set inexact
172 fabs
173 fadds (two63-0b)(%ecx)
174 fstp %st(0)
175 ret
176
177// positive overflow
1783: jp 5f
179 fistps FRAME_SIZE( STACKP )
180 movl $0x7fffffff, %eax
181 ret
182
183// negative overflow
1844: fistps FRAME_SIZE( STACKP )
185 movl $0x80000000, %eax
186 ret
187
188// nan
1895: fistpl FRAME_SIZE( STACKP )
190 movl $0x80000000, %eax
191 ret
192
193
194ENTRY( llroundl )
195 movswl 8+FRAME_SIZE( STACKP ), %edx
196 andl $0x7fff, %edx // exponent of x
197 movq FRAME_SIZE( STACKP ), %xmm0
198 subl $0x3ffe, %edx // push exponents less than -1 negative
199 fldt FRAME_SIZE( STACKP ) // { x }
200 cmpl $(63+1), %edx // if( |x| >= 0x1.0p63 || |x| < 0.5 || isnan(x) )
201 jae 1f // goto 1
202
203 call 0f
2040: popl %ecx
205
206 // 0.5 <= |x| < 0x1.0p63
207 fldt (cutoff-0b)( %ecx ) // { 0x1.0p63 - 0.5, x }
208 fucomip %st(1), %st(0) // { x }
209 je 3f
210
211 //Shift the significand right so that units bit is at units + 1 position
212 movl $63, %eax
213 movd (one-0b)(%ecx), %xmm2 // 1
214 subl %edx, %eax // 63 - (exponent+1)
215 movd %eax, %xmm1
216 psrlq %xmm1, %xmm0 // shift so that the units bit is at the +1 position
217 movq %xmm0, %xmm1 // set aside a copy
218 psrlq $1, %xmm0 // finish the shift with shift right by 1 bit -- we need to do 64-bit shifts here at times and not possible with ISA
219 pand %xmm2, %xmm1 // isolate the leading fractional bit
220 paddq %xmm1, %xmm0 // round the result up.
221
222 // set inexact as necessary
223 fabs // { |x| }
224 fadds (two63-0b)(%ecx) // { |x| + 0x1.0p63 } set inexact as necessary
225 fstp %st(0) // throw away numerical result.
226
227 //fix sign
228 movswl 8+FRAME_SIZE( STACKP ), %eax // read the sign + exponent
229 shrl $31, %eax // remove exponent
230 movq (sign-0b)(%ecx, %eax,8), %xmm1
231 pxor %xmm1, %xmm0
232 psubq %xmm1, %xmm0
233 movd %xmm0, %eax
234 psrlq $32, %xmm0
235 movd %xmm0, %edx
236
237 ret
238
239// |x| >= 0x1.0p63 || |x| < 0.5 || isnan(x)
2401: jge 2f
241
242 // |x| < 0.5
243 xorl %eax, %eax
244 xorl %edx, %edx
245 fistpl FRAME_SIZE( STACKP ) // set inexact as necessary
246 ret
247
248// |x| >= 0x1.0p63 || isnan(x)
2492: movswl 8+FRAME_SIZE( STACKP ), %eax
250 call 0f
2510: popl %ecx
252 flds (mtwo63-0b)( %ecx )
253 fucomip %st(1), %st(0)
254 je 4f
255 fistpl FRAME_SIZE( STACKP ) // set invalid
256 shrl $31, %eax
257 subl $1, %eax
258 movl $0x80000000, %edx
259 xorl %eax, %edx
260 ret
261
262
263// 0x1.0p63 - 0.5, positive overflow
2643: fistpl FRAME_SIZE( STACKP ) // set invalid
265 movl $-1, %eax
266 movl $0x7fffffff, %edx
267 ret
268
269// -0x1.0p63 or nan
2704: jp 5f
271 fstp %st(0)
272 movl $0x80000000, %edx
273 xorl %eax, %eax
274 ret
275
276// nan
2775: fistpl FRAME_SIZE( STACKP ) // set invalid
278 movl $0x80000000, %edx
279 xorl %eax, %eax
280 ret
281
282
283
284#endif