src/libm/Source/Intel/ilogb.S at fixPythonPipStalling

overby.me / darling-nix
fork atom
this repo has no description
fork atom
darling-nix / src / libm / Source / Intel / ilogb.S
at fixPythonPipStalling 438 lines 14 kB view raw
wrap content
Lubos Dolezel Restructured source tree to prepare for merge with the "darling" repo 10y ago
f228ae16
  1/*
  2 *  ilogb.s
  3 *
  4 *      by Ian Ollmann
  5 *
  6 *  Copyright � 2007 Apple Inc. All Rights Reserved.
  7 */
  8 
  9#include <machine/asm.h>
 10#include "abi.h"
 11
 12ENTRY( ilogbf )
 13//move argument to eax
 14#if defined( __i386__ )
 15    movl    FRAME_SIZE(STACKP), %eax
 16#else
 17    movd    %xmm0,  %eax
 18#endif
 19
 20    andl    $0x7f800000,        %eax        //expose exponent
 21    addl    $0x00800000,        %eax        //add 1 to exponent, promotes NaN/Inf to 2**31
 22    cmpl    $0x00800000,        %eax        //test for exceptional case
 23    jle     1f                              //jump to special code for zeros, NaNs, Infs and denorms
 24    sarl    $23,                %eax        //move exponent to unit precision
 25    subl    $128,               %eax        //remove (bias+1)
 26    ret
 27    
 281:  je  2f                                  //handle zeros/denorms at 2
 29
 30    //load initial value
 31#if defined( __i386__ )
 32    movss   FRAME_SIZE(STACKP), %xmm0
 33#endif
 34
 35    cvtss2si    %xmm0,          %eax        //set invalid and produce a 0x80000000
 36    cmpeqss %xmm0,              %xmm0       // x == x
 37    movd    %xmm0,              %edx
 38    addl    %edx,               %eax        // c = |x| == inf ? 0x7fffffff : 0x80000000
 39    ret
 40    
 412:  //reload initial value
 42#if defined( __i386__ )
 43    movl    FRAME_SIZE(STACKP), %edx
 44    movss   FRAME_SIZE(STACKP), %xmm0
 45#else
 46    movd    %xmm0,  %edx
 47#endif
 48
 49    //set invalid for zero
 50    xorps       %xmm1,  %xmm1
 51    cmpeqss     %xmm1,  %xmm0
 52    cvtss2si    %xmm0,  %ecx
 53
 54    andl    $0x7fffffff,        %edx        //take absolute value
 55    bsrl    %edx,               %eax        //get positition of leading set bit
 56    subl    $(126+23),          %eax        //subtract out bias for denorm
 57    cmp     $0,                 %edx
 58    cmove   %ecx,               %eax
 59    ret
 60    
 61ENTRY( ilogb )
 62//move high 32-bits of argument to eax
 63#if defined( __i386__ )
 64    //We take this roundabout route to avoid a store forwarding stall, which costs us 8 cycles on Yonah.
 65    movq    FRAME_SIZE(STACKP), %xmm0
 66    psrlq   $32,    %xmm0
 67    movd    %xmm0,  %eax
 68#else
 69    movd    %xmm0,  %rax
 70    shrq    $32,    %rax
 71#endif
 72
 73    andl    $0x7ff00000,        %eax        //expose exponent
 74    addl    $0x00100000,        %eax        //add 1 to exponent, promotes NaN/Inf to 2**31
 75    cmpl    $0x00100000,        %eax        //test for exceptional cases
 76    jle     1f                              //jump to special code for zeros, NaNs, Infs and denorms
 77    sarl    $20,                %eax        //move exponent to unit precision
 78    subl    $1024,              %eax        //remove (bias+1)
 79    ret
 80
 811:  je      2f                              //handle zeros/denorms at 2
 82
 83    //reload initial value
 84#if defined( __i386__ )
 85    movsd   FRAME_SIZE(STACKP), %xmm0
 86#endif
 87    cvtsd2si    %xmm0,          %eax        //set invalid and produce a 0x80000000
 88    cmpeqsd %xmm0,              %xmm0       // x == x
 89    movd    %xmm0,              %edx
 90    addl    %edx,               %eax        // c = |x| == inf ? 0x7fffffff : 0x80000000
 91    ret
 92    
 932: // zero/denorm case 
 94    //reload initial value
 95#if defined( __i386__ )
 96    movsd   FRAME_SIZE(STACKP), %xmm0
 97    movl    4+FRAME_SIZE(STACKP), %edx
 98#else
 99    movd    %xmm0,  %rdx
100#endif
101    xorpd   %xmm1,  %xmm1
102    cmpeqsd   %xmm1,  %xmm0
103    cvtsd2si  %xmm0, %ecx       //set invalid for zero, and produce ILOGB_ZERO. Denorms produce 0 here.
104    
105#if defined( __i386__ )
106    andl    $0x7fffffff,    %edx    //take absolute value
107    movl    $32,            %eax
108    cmp     $0,             %edx
109    cmove   %edx,           %eax
110    cmove   FRAME_SIZE(STACKP), %edx
111    bsrl    %edx,           %edx
112    subl    $(1022+52),     %eax
113    addl    %edx,           %eax
114    cmp     $0,             %ecx
115    cmovne  %ecx,           %eax
116    ret
117
118#else
119    //take absolute value
120    salq    $1,     %rdx
121    shrq    $1,     %rdx
122    bsrq    %rdx,   %rax
123    subl    $(1022+52), %eax
124    cmp     $0,     %rdx
125    cmove   %ecx,   %eax
126    ret
127#endif
128
129ENTRY( ilogbl )
130    movw    8+FRAME_SIZE( STACKP ),     %ax
131    andw    $0x7fff,                    %ax
132    addw    $1,                         %ax
133    cmpw    $1,                         %ax
134    jle     1f
135    andl    $0xffff,                    %eax
136    subl    $16384,                     %eax
137    ret
138    
1391:  je      2f
140    
141    fldt    FRAME_SIZE(STACKP)          // {x}
142    pcmpeqb %xmm0,      %xmm0           // -1
143    xor     %eax,		%eax
144    cvtss2si %xmm0,     %edx            // set invalid
145    fucomip  %st(0), %st(0)             // test for NaN
146    setp    %al                         // isnan(x)
147    addl    $0x7fffffff,   %eax         // isnan(x) ? 0x80000000 : 0x7fffffff
148    ret
149
150
1512: //zeros / denorms
152    fildll  FRAME_SIZE(STACKP)          // load in 0/denormal mantissa as integer
153    SUBP    $(32-FRAME_SIZE), STACKP
154    fstpt   (STACKP)                    // write it back out
155    xorl    %eax,           %eax
156    movw    8(STACKP),      %ax
157    subl    $(16382+63+16383), %eax
158    movl    $0x80000000,    %edx
159    cmp     $(-16382+-63+-16383), %eax
160    cmove   %edx,           %eax
161    ADDP    $(32-FRAME_SIZE), STACKP
162
163    //set invalid if necessary
164    xorps   %xmm1,          %xmm1
165    movd    %eax,           %xmm0
166    cmpeqss %xmm1,          %xmm0
167    cvtss2si    %xmm0,      %edx
168    ret
169
170
171ENTRY( logbf )
172    //move argument to eax
173#if defined( __i386__ )
174    movl    FRAME_SIZE(STACKP), %eax
175#else
176    movd    %xmm0,  %eax
177#endif
178
179    andl    $0x7f800000,        %eax        //expose exponent
180    addl    $0x00800000,        %eax        //add 1 to exponent, promotes NaN/Inf to 2**31
181    cmpl    $0x00800000,        %eax        //test for exceptional case
182    jle     1f                              //jump to special code for zeros, NaNs, Infs and denorms
183    sarl    $23,                %eax        //move exponent to unit precision
184    subl    $128,               %eax        //remove (bias+1)
185
186#if defined( __i386__ )    
187    SUBP    $4, STACKP
188    movl    %eax,   (STACKP)
189    fildl   (STACKP)
190    ADDP    $4, STACKP
191#else
192    cvtsi2ss   %eax,           %xmm0
193#endif
194    ret
195    
1961:  je  2f                                  //handle zeros/denorms at 2
197
198    //Infinity and NaN just return |x|
199#if defined( __i386__ )
200    flds    FRAME_SIZE(STACKP)
201    fabs
202#else
203    movaps  %xmm0,  %xmm1
204    cmpeqps %xmm0, %xmm0
205    pslld   $31,    %xmm0
206    andnps  %xmm1,  %xmm0
207#endif
208    ret
209    
2102:  
211    //load |x| as if it was an int (equivalent to multiplying denorm by 2**(23+126))
212#if defined( __i386__ )
213    movl    FRAME_SIZE(STACKP),     %edx
214#else
215    movd    %xmm0,                  %edx
216#endif
217    andl    $0x7fffffff,            %edx
218    cvtsi2ss  %edx,                 %xmm1       //read the denorm/0 in as an integer
219
220    //move a = x==0 ? 1.0f: 0.0f to xmm
221    movl    $0x3f800000,            %eax
222    xorl    %ecx,                   %ecx
223    cmp     $0,                     %edx
224    cmovne  %ecx,                   %eax
225    movd    %eax,                   %xmm0       //load x == 0 ? 1.0f : 0.0f     (0.0f is to avoid inexact flag on denorms)
226
227    // do isdenorm(x) ? 0.0f / denorm*2**(23+126) : 1.0 / 0
228    divss   %xmm1,                  %xmm0       //set div/0 flag if x is zero
229
230    //extract the exponent of the scaled denorm
231    movd    %xmm1,                  %eax        //move the bits back to gpr, formatted as float now
232    andl    $0x7f800000,            %eax        //extract exponent
233    sarl    $23,                    %eax        //reduce to unit precision
234    subl    $(126+23+127),          %eax        //correct for new bias after int->float conversion
235
236    //convert to float
237    cvtsi2ss %eax,                  %xmm0       //logb requires its result in floating point format, so convert the unbiased exponent to float
238    movd    %xmm0,                  %edx        //and bring it back
239
240    //if( x == 0 ) return -inf
241    movl    $0xff800000,            %ecx
242    cmp     $(-126-23-127),         %eax
243    cmove   %ecx,                   %edx
244
245#if defined( __i386__ )
246    SUBP    $4, STACKP
247    movl    %edx,  (STACKP)
248    flds    (STACKP)
249    ADDP    $4, STACKP
250#else
251    movd    %edx,                   %xmm0
252#endif
253    ret
254
255ENTRY( logb )
256    //move argument to eax
257#if defined( __i386__ )
258    //We take this roundabout route to avoid a store forwarding stall, which costs us 8 cycles on Yonah.
259    movq    FRAME_SIZE(STACKP), %xmm0
260    psrlq   $32,    %xmm0
261    movd    %xmm0,  %eax
262#else
263    movd    %xmm0,  %rax
264    shrq    $32,    %rax
265#endif
266
267    andl    $0x7ff00000,        %eax        //expose exponent
268    addl    $0x00100000,        %eax        //add 1 to exponent, promotes NaN/Inf to 2**31
269    cmpl    $0x00100000,        %eax        //test for exceptional case
270    jle     1f                              //jump to special code for zeros, NaNs, Infs and denorms
271    sarl    $20,                %eax        //move exponent to unit precision
272    subl    $1024,              %eax        //remove (bias+1)
273
274#if defined( __i386__ )    
275    SUBP    $4, STACKP
276    movl    %eax,   (STACKP)
277    fildl   (STACKP)
278    ADDP    $4, STACKP
279#else
280    cvtsi2sd    %eax,          %xmm0
281#endif
282    ret
283    
2841:  je  2f                                  //handle zeros/denorms at 2
285
286    //Infinity and NaN just return |x|
287#if defined( __i386__ )
288    fldl    FRAME_SIZE(STACKP)
289    fabs
290#else
291    movapd  %xmm0,  %xmm1
292    cmpeqpd %xmm0, %xmm0
293    psllq   $63,    %xmm0
294    andnpd  %xmm1,  %xmm0
295#endif
296    ret
297    
2982:  
299    SUBP    $(16 - FRAME_SIZE),     STACKP
300
301#if defined( __i386__ )
302    //take absolute value of x
303    movq    (16-FRAME_SIZE+FRAME_SIZE)(STACKP),             %xmm0
304    pcmpeqb %xmm1,                  %xmm1
305    psllq   $63,                    %xmm1
306    pandn   %xmm0,                  %xmm1
307    movq    %xmm1,                  (STACKP)
308    
309    //convert to double (interpreting bits as int -- same a multiplying denormal by 2**(1022+52))
310    fildll  (STACKP)                // { x * 2**(1022+52) }
311    fstpt   (STACKP)
312    
313    //load in the exponent
314    xorl    %eax,                   %eax
315    movw    8(STACKP),              %ax
316    
317    //subtract the bias
318    subl    $(1022+52+16383),       %eax
319    cmp     $(-1022-52-16383),      %eax
320    movl    %eax,                   (STACKP)
321    
322    //deal with the div/0 flag we need to set for the 0 case
323    fld1                            // { 1 }
324    fldz                            // { 0, 1 }
325    fcmovne     %st(1), %st(0)
326    fdivrp      %st(1), %st(0)      // set div/0 if necessary, gives either 1 or Inf
327    
328    fildl       (STACKP)            //{ denormal result, 1 or Inf }
329    fmulp                           //{ result }
330    
331    ADDP    $(16 - FRAME_SIZE),     STACKP
332    ret    
333    
334#else
335    //Fetch original value. 
336    movd    %xmm0,                  %rax
337
338	//Check to see if original value is zero
339	xorpd	%xmm1,					%xmm1
340	cmpneqsd	%xmm0,				%xmm1		// x == 0 ? 0 : -1ULL 
341
342	//take absolute value
343    salq    $1,                     %rax
344    shrq    $1,                     %rax
345
346    //convert to double (interpreting bits as int -- same a multiplying denormal by 2**(1022+52))
347    cvtsi2sd    %rax,               %xmm0
348
349	//change test result from -1 to 1.0
350	psllq	$54,					%xmm1		// x == 0 ? 0 : 0xFFC0000000000000 
351	psrlq	$2,						%xmm1		// x == 0 ? 0 : 0x3ff0000000000000
352
353    //move exponent to units position
354    movd    %xmm0,                  %rax
355    shrq    $52,                    %rax
356
357    //subtract out bias
358    subq    $(1022+52+1023),		%rax
359    
360    //convert to double precision
361    cvtsi2sd    %eax,               %xmm0
362
363	//divide result by x == 0 ? 0 : 1.0 to get correct zero result and set div/0 if x == 0
364	divsd	%xmm1,					%xmm0
365
366    ADDP    $(16 - FRAME_SIZE),     STACKP
367    ret
368    
369#endif
370
371
372ENTRY( logbl )
373    SUBP    $(32-FRAME_SIZE),       STACKP
374    
375    //load exponent
376    xorl    %eax,                   %eax
377    movw    40(STACKP),             %ax
378    
379    //take absolute value
380    andl    $0x7fff,                %eax
381    
382    //add 1 to exponent, pushes NaN/Inf into 2**15 bit
383    addl    $1,                     %eax
384    
385    //check for exceptional conditions
386    cmpw    $1,                     %ax
387    
388    //deal with exceptional conditions elsewhere
389    jle     1f
390    
391    //remove the bias+1
392    subl    $(16383+1),             %eax
393    
394    //convert to long double
395    movl    %eax,                   (STACKP)
396
397    fildl   (STACKP)
398    
399    ADDP    $(32-FRAME_SIZE),       STACKP
400    ret
401
4021:  je      2f          //deal with zero / denorm elsewhere
403
404    //load value
405    fldt    32(STACKP)              // {x}
406    fucomi  %st(0), %st(0)          // isnan(x)
407    fld     %st(0)                  // {x, x}
408    fabs                            // {|x|, x}
409    fcmovne %st(1), %st(0)          // {result, x}
410    fstp    %st(1)                  // {result}
411    ADDP    $(32-FRAME_SIZE),       STACKP
412    ret
413
4142:  //denormal/zero
415    fildll  32(STACKP)              // load in 0/denormal mantissa as integer. Fails for pseudodenormals, but they "don't happen".
416    fstpt   (STACKP)                // write it back out to stack as floating point
417    xorl    %eax,           %eax
418    movw    8(STACKP),      %ax     // read the exponent
419    subl    $(16382+63+16383), %eax // subtract out the bias
420    cmp     $(-16382+-63+-16383), %eax //check for zero
421
422    //convert the exponent to float
423    movl    %eax,       (STACKP)
424    fildl   (STACKP)                // { exp }
425
426    //set div/0 for zero result
427    fld1                            // { 1, exp }
428    fldz                            // { 0, 1, exp }
429    fcmovne     %st(1), %st(0)
430    fdivrp      %st(0), %st(1)      // { 1 or Inf, exp } set div/0 if necessary, gives either 1 or Inf
431    fmulp                           // { result }
432    
433    ADDP    $(32-FRAME_SIZE), STACKP
434    ret
435    
436
437    
438