src/libm/Source/Intel/frexp.S at fixPythonPipStalling

overby.me / darling-nix
fork atom
this repo has no description
fork atom
darling-nix / src / libm / Source / Intel / frexp.S
at fixPythonPipStalling 307 lines 10 kB view raw
wrap content
Lubos Dolezel Restructured source tree to prepare for merge with the "darling" repo 10y ago
f228ae16
  1
  2/*
  3 *  frexp.s
  4 *
  5 *      by Ian Ollmann
  6 *
  7 *  Copyright � 2007 Apple Inc. All Rights Reserved.
  8 */
  9 
 10#define LOCAL_STACK_SIZE	(32 - FRAME_SIZE)
 11
 12#include "machine/asm.h"
 13#include "abi.h"
 14
 15#if defined( __LP64__ )
 16    #define RESULT_P       %rdi
 17#else
 18    #define RESULT_P       %edx
 19#endif
 20
 21ENTRY( frexpf )
 22    //Fetch the arguments
 23#if defined( __i386__ )
 24    movl    FRAME_SIZE( STACKP ),           %eax
 25    movl    4+FRAME_SIZE( STACKP ),         RESULT_P
 26#else
 27    movd    %xmm0,                          %eax
 28#endif
 29
 30    movl    %eax,                           %ecx        //  x
 31    andl    $0x7f800000,                    %eax        //  biased exponent
 32    addl    $0x00800000,                    %eax        //  add one to exponent
 33    cmpl    $0x00800000,                    %eax        // if( isdenormal(x) || isinf(x) || isnan(x) )
 34    jle     1f                                          //      goto 1
 35
 36    andl    $0x807fffff,                    %ecx        //  sign + significand
 37    sarl    $23,                            %eax        //  shift exponent to units position
 38    orl     $0x3f000000,                    %ecx        //  set return exponent to -1
 39    subl    $127,                           %eax        //  remove bias
 40    
 41    //return result
 42#if defined( __i386__ )
 43    movl    %ecx,                           FRAME_SIZE( STACKP )
 44    flds    FRAME_SIZE( STACKP )
 45#else
 46    movd    %ecx,                           %xmm0
 47#endif
 48    movl    %eax,                           (RESULT_P)  //  store exponent
 49    ret
 50
 51//  0, denorm, nan, inf
 521:  jl      3f                                          // NaN, inf goto 3
 53
 54    // 0 or denor
 55    movl    %ecx,                           %eax        // x
 56    andl    $0x7fffffff,                    %ecx        // |x|
 57    jz      3f                                          // if the value is zero, goto 3
 58
 59    // denormal
 60    bsrl    %ecx,                           %ecx       // count leading zeros
 61    subl    $23,                            %ecx       // correct for the backwards bit numbering scheme
 62    negl    %ecx                                       // fix sign
 63    shl     %cl,                            %eax       // move high bit to lowest exponent bit
 64    addl    $125,                           %ecx       // find exponent
 65    andl    $0x007fffff,                    %eax       // trim high bit
 66    negl    %ecx
 67    movl    %ecx,                           (RESULT_P) // write out exponent -- we need the register
 68
 69#if defined( __i386__ )
 70    movl    FRAME_SIZE( STACKP ),           %ecx       // we lost the sign bit along the way. Get it back
 71#else
 72    movd    %xmm0,                          %ecx       // we lost the sign bit along the way. Get it back
 73#endif
 74    orl     $0x3f000000,                    %eax
 75    andl    $0x80000000,                    %ecx
 76    orl     %ecx,                           %eax
 77
 78#if defined( __i386__ )
 79    movl    %eax,                           FRAME_SIZE( STACKP )
 80    flds    FRAME_SIZE(STACKP)
 81#else
 82    movd    %eax,                           %xmm0
 83#endif
 84    ret
 85    
 863:  // 0, inf, NaN
 87#if defined( __i386__ )
 88    flds    FRAME_SIZE(STACKP)
 89#endif
 90    movl    $0,                             (RESULT_P)
 91    ret
 92
 93
 94
 95ENTRY( frexp )
 96    SUBP    $LOCAL_STACK_SIZE,  STACKP
 97    
 98//Load exponent of argument and result pointer
 99#if defined( __i386__ )
100    movl    4+FIRST_ARG_OFFSET(STACKP),     %eax
101    movl    8+FIRST_ARG_OFFSET(STACKP),     RESULT_P
102#else
103    movd    %xmm0,                          %rax
104    shrq    $32,                            %rax
105#endif    
106    
107    //make a copy of mantissa top bits and sign
108    movl    %eax,                           %ecx
109    
110    //expose the exponent
111    andl    $0x7FF00000,                    %eax
112    
113    //extract sign + mantissa high bits
114    andl    $0x800FFFFF,                    %ecx
115
116    //add 1 to the exponent 
117    addl    $0x00100000,                    %eax
118    
119    //or in -1 + bias as the new exponent for the mantissa
120    orl     $0x3fe00000,                    %ecx
121
122    //test to see if we are in an exceptional case.  (0x00800000 -> 0 or denormal, 0x80000000 -> NaN or Inf)
123    cmpl    $0x00100000,                    %eax
124    jle     1f
125    
126    //Merge high and low mantissa parts and move to return register
127    movd    %ecx,                           %xmm1
128#if defined( __i386__ )
129    movd    FIRST_ARG_OFFSET(STACKP),       %xmm0
130#endif
131    punpckldq %xmm1,                         %xmm0
132#if defined( __i386__ )
133    movq    %xmm0,                          (STACKP)
134    fldl    (STACKP)
135#endif
136
137    //move exponent to units precision, and store
138    sarl    $20,                            %eax
139    addl    $-1023,                         %eax            //remove bias
140    movl    %eax,                           (RESULT_P)
141
142    //exit
143    ADDP    $LOCAL_STACK_SIZE,   STACKP
144    ret
145
146    //special case code
1471:  je      3f                              //handle zeros and denormals in 3
148
1492:  movl    $0,                             (RESULT_P)
150  //Infs, zeros and NaNs -- return input value
151#if defined( __i386__ )
152    fldl    FIRST_ARG_OFFSET( STACKP )
153#endif
154    ADDP    $LOCAL_STACK_SIZE,   STACKP
155    ret
156
157//zeros and denormals
1583:  
159#if defined( __i386__ )
160	fldl    FIRST_ARG_OFFSET( STACKP )       // { 0 or denormal }
161    fldz                                    //  { 0, 0 or denormal }
162    fucomip %st(1), %st(0)                  //  { 0 or denormal }
163    fstpt   (STACKP)                        //  save out in 80-bit format (normalizes denorms), and empty stack
164    je      2b
165
166    movsxw  8(STACKP),      %eax            //  load sign + exponent
167    movl    %eax,           %ecx            //  copy to ecx
168    andl    $0x7fff,        %eax            //  extract exponent
169    andl    $0x8000,        %ecx            //  extract sign
170    addl    $-16382,        %eax            //  subtract bias from exponent
171    orl     $0x3ffe,        %ecx            //  copy sign bit to a new exponent of -1
172    movl    %eax,           (RESULT_P)      //  store our exponent to the result pointer
173    movw    %cx,            8(STACKP)       //  write the other exponent back to the 80-bit FP value
174    fldt    (STACKP)                        //  load the manittsa in
175
176    ADDP    $LOCAL_STACK_SIZE,   STACKP
177    ret
178
179#else
180	xorpd	%xmm1,			%xmm1
181	ucomisd	%xmm0,			%xmm1
182	je		2b
183	
184	// read value from xmm
185	movd	%xmm0,			%rax			
186	movq	%rax,			%rcx			//set aside sign for later
187	
188	//take absolute value
189	shlq	$1,				%rax
190	shrq	$1,				%rax
191	
192	//save sign bit for later
193	xorq	%rax,			%rcx			
194	
195	//read |x| as integer -- multiplies denormal by 2**(1022+52) to get normal number
196	cvtsi2sd	%rax,		%xmm1
197	movd		%xmm1,		%rax
198
199	//prepare an exponent mask
200	movq	$0x7ff,			%rdx
201	shlq	$52,			%rdx			// 0x7ff0000000000000
202	
203	//extract exponent and mantissa
204	andq	%rax,			%rdx			// exponent
205	xorq	%rdx,			%rax			// mantissa
206	
207	//reduce exponent to units precision
208	shrq	$52,			%rdx
209	
210	//or in sign bit to mantissa
211	orq		%rcx,			%rax
212
213	//prepare 0.5
214	movq	$0x3fe,			%rcx
215	shlq	$52,			%rcx			// 0x3fe0000000000000
216	
217	//subtract out the bias in the exponent
218	subq	$(1022+52+1022),	%rdx
219	
220	//set mantissa exponent to 2**-1
221	orq		%rcx,			%rax
222	
223	//write out the exponent
224	movl	%edx,			(RESULT_P)
225	
226	//move result to destination register
227	movd	%rax,			%xmm0
228	
229    ADDP    $LOCAL_STACK_SIZE,   STACKP
230	ret
231#endif
232
233    
234
235ENTRY( frexpl )
236    SUBP    $LOCAL_STACK_SIZE,  STACKP
237    
238    //Load the sign + exponent
239    movsxw  8+FIRST_ARG_OFFSET(STACKP),     %eax
240    movl    %eax,                           %ecx
241
242    //Load result pointer if necessary
243#if defined( __i386__ )
244    movl    SECOND_ARG_OFFSET(STACKP),      RESULT_P
245#endif
246    //Copy value to stack
247    fldt    FIRST_ARG_OFFSET(STACKP)
248    fstpt   (STACKP)
249
250    //extract exponent
251    andl    $0x7fff,                        %eax
252    addl    $1,                             %eax
253    cmpw    $1,                             %ax
254    jle     1f                              //special case code for zero, nan, denormal, inf
255    addl    $-16383,                        %eax
256    movl    %eax,                           (RESULT_P)
257
258    //prepare new exponent for mantissa
259    andl    $0x8000,                        %ecx
260    orl     $0x3ffe,                        %ecx
261    movw    %cx,                            8(STACKP)
262    fldt    (STACKP)
263    
264    //exit
265    ADDP    $LOCAL_STACK_SIZE,   STACKP
266    ret
267
2681:  fldt    FIRST_ARG_OFFSET(STACKP)
269    je      3f                  //denormals and zeros are handled elsewhere
270
2712:  movl    $0,                             (RESULT_P)
272    ADDP    $LOCAL_STACK_SIZE,   STACKP
273    ret
274
275//handle zero case
2763:  fldz
277    fucomip %st(1), %st(0)
278    je      2b                  
279    fstp    %st(0)
280
281    //we have a denormal. Load the mantissa as an integer, and convert to normal floating point number.
282    fildll  (STACKP)                            // {|f| * 2**(16382+63)}    //note: fails for malformed normals that have 0 exponent, but leading bit set. However, since this is a stack copy of the data, we naively hope that the hardware took care of that.
283    fld     %st(0)                              // {|f| * 2**(16382+63), |f| * 2**(16382+63)}
284    fchs                                        // {-|f| * 2**(16382+63), |f| * 2**(16382+63)}
285    fcmovb  %st(1),         %st(0)              // { f * 2**(16382+63), |f| * 2**(16382+63) }
286    fstpt   (STACKP)                            // { |f| * 2**(16382+63) }
287    fstp    %st(0)                              // {}
288    
289//repeat frexpl with new bias
290    //Load the sign + exponent
291    movsxw  8(STACKP),                      %eax
292    movl    %eax,                           %ecx
293    
294    //extract exponent
295    andl    $0x7fff,                        %eax
296    addl    $(-16383-16382-63+1),           %eax
297    movl    %eax,                           (RESULT_P)
298    
299    //prepare new exponent for mantissa
300    andl    $0x8000,                        %ecx
301    orl     $0x3ffe,                        %ecx
302    movw    %cx,                            8(STACKP)
303    fldt    (STACKP)
304
305    //exit
306    ADDP    $LOCAL_STACK_SIZE,   STACKP
307    ret