src/libm/Source/Intel/modfl.S at fixPythonPipStalling

overby.me / darling-nix
fork atom
this repo has no description
fork atom
darling-nix / src / libm / Source / Intel / modfl.S
at fixPythonPipStalling 164 lines 6.5 kB view raw
wrap content
Lubos Dolezel Restructured source tree to prepare for merge with the "darling" repo 10y ago
f228ae16
  1/*
  2 *  modfl.s
  3 *
  4 *      by Ian Ollmann 
  5 *
  6 *  Copyright 2007 Apple Inc. All rights reserved.
  7 *
  8 */
  9 
 10/*
 11#include <machine/asm.h>
 12#define LOCAL_STACK_SIZE	12
 13#include "abi.h"
 14
 15#if defined( __LP64__ )
 16	#define DEST_P			%rdi
 17	#define LOAD_DEST_P
 18#else
 19	#define DEST_P			%eax
 20	#define LOAD_DEST_P		mov			SECOND_ARG_OFFSET(STACKP), DEST_P
 21#endif
 22
 23// I tried branch free code here. Alas there were so many special cases, that 2/3 of the code was patchup after the fidll instruction.
 24// I've moved some special cases { 0, +-inf, NaN} out, which simplifies things quite a bit on any path you care to follow.
 25
 26ENTRY( modfl )
 27        SUBP		$LOCAL_STACK_SIZE, STACKP
 28        
 29        LOAD_DEST_P
 30        fldt		FIRST_ARG_OFFSET(STACKP)			// {x}
 31        fld         %st(0)                              //  { x, x }
 32        fabs                                            //  { |x|, x }
 33        fld         %st(0)                              //  { |x|, |x|, x }
 34        frndint                                         //  { |i|, |x|, x }
 35        fucomi      %st(1), %st(0)                      //  { |i|, |x|, x }
 36        jz          1f
 37        
 38        //at this point we know we are a denormal or normal non-integer
 39        //the next step is to truncate the value. We've already rounded it
 40        //to an int. We just need to make sure that the right rounding direction
 41        //applied
 42        fld1                                            //  { 1, |i|, |x|, x }
 43        fldz                                            //  { 0, 1, |i|, |x|, x }
 44        fcmovnbe    %st(1), %st(0)                      //  { 0 or 1, 1, |i|, |x|, x }
 45        fstp        %st(1)                              //  { 0 or 1, |i|, |x|, x }
 46        fsubrp      %st(1), %st(0)                      //  { |i|, |x|, x }
 47
 48        //fix the sign
 49        fld         %st(0)                              //  { |i|, |i|, |x|, x }
 50        fchs                                            //  { -|i|, |i|, |x|, x }
 51        fxch        %st(2)                              //  { |x|, |i|, -|i|, x }
 52        fucomip     %st(3), %st(0)                      //  { |i|, -|i|, x }
 53        fcmovne     %st(1), %st(0)                      //  { i result, -|i|, x }
 54        fstp        %st(1)                              //  { i result, x }
 55        
 56        //get the fractional part and store the iresult
 57        fsubr       %st(0), %st(1)                      //  { i result, f result }
 58        fstpt       (DEST_P)                            //  { -|i|, f result }
 59        ADDP		$LOCAL_STACK_SIZE, STACKP
 60        ret
 61        
 621:      //special case entry for NaN, inf and integers, including zero
 63        fstp        %st(0)                              //  { |x|, x }
 64        fstp        %st(0)                              //  { x }
 65
 66        //handle integers, Inf and zero 
 67        fldz                                            //  { 0, x }
 68        fadd        %st(0), %st(1)                      //  { 0, x }  silence NaN
 69        fucomi      %st(1), %st(0)                      //  { 0, x }
 70        fchs                                            //  { -0, x }
 71        fldz                                            //  { 0, -0, x }
 72        fcmovnbe    %st(1), %st(0)                      //  { +-0, -0, x }
 73        fcmove      %st(2), %st(0)                      //  { f, -0, x }
 74        fxch        %st(2)                              //  { x, -0, f }
 75        fstpt       (DEST_P)                            //  { -0, f }
 76        fstp        %st(0)                              //  { f }
 77        ADDP		$LOCAL_STACK_SIZE, STACKP
 78        ret
 79 */       
 80
 81#include <machine/asm.h>
 82#include "abi.h"
 83
 84#if defined( __i386__ )
 85    #define RESULT_P        %edx
 86#else
 87    #define RESULT_P        %rdi
 88#endif
 89
 90ENTRY( modfl )
 91    xorl    %eax,                   %eax
 92    movw    8+FRAME_SIZE(STACKP),   %ax         //load sign + exponent of input
 93    movl    %eax,                   %ecx        //set aside sign + exponent
 94    andl    $0x7fff,                %eax        // remove sign
 95    addl    $(16384-62),            %eax
 96    cmpw    $(16383+16384-62),      %ax
 97    jl      1f
 98    
 99    //common case of 1.0 <= x < 2**64
100    movq    FRAME_SIZE( STACKP),    %xmm0
101    subl    $(16383+16384-62),      %eax
102    movl    $63,                    %edx
103    subl    %eax,                   %edx
104    movd    %edx,                   %xmm1
105#if defined( __i386__ )
106    movl    16+FRAME_SIZE( STACKP ),    RESULT_P
107#endif
108    pcmpeqb %xmm2,                  %xmm2
109    psllq   %xmm1,                  %xmm2
110    pand    %xmm2,                  %xmm0
111    movq    %xmm0,                  (RESULT_P)
112    movw    %cx,                    8(RESULT_P)
113    fldt    FRAME_SIZE(STACKP)      // { x, 0 }
114    fldt    (RESULT_P)              // { truncl(x), x }
115    fucomi  %st(1), %st(0)          // { truncl(x), x }
116    je      4f          //if x is an integer goto 4
117    
118    fsubr   %st(0), %st(1)          // { truncl(x), fract }
119    fstpt   (RESULT_P)              // { fract }
120    ret
121        
1221:
123#if defined( __i386__ )
124    movl    16+FRAME_SIZE( STACKP ),    RESULT_P
125#endif
126    jae     2f                          //Inf, NaN, big numbers go to 2
127
128    // |x| < 1.0
129    pxor    %xmm0,                  %xmm0
130    movq    %xmm0,                  (RESULT_P)
131    andl    $0x8000,                %ecx
132    movw    %cx,                    8(RESULT_P)
133    fldt    FRAME_SIZE( STACKP )
134    ret
135    
1362:  
137    // |x| >= 2**63 or NAN
138    fldz                            // { 0 }
139    fldt    FRAME_SIZE( STACKP )    // { x, 0 }
140    fucomi  %st(1), %st(0)      
141    jp      3f                      // do NaNs elsewhere
142    fstpt   ( RESULT_P)             // { 0 }
143    fchs                            // { -0 }
144    fldz                            // { 0, -0 }
145    fcmovb  %st(1), %st(0)          // { fract, -0 }
146    fstp    %st(1)
147    ret
148
1493:  //NaN                           // { x, 0 }
150    fld     %st(0)                  // { x, x, 0 }
151    fstpt   (RESULT_P)              // { x, 0 }
152    fstp    %st(1)                  // { x }s
153    ret
154
1554:  //integer                   // { truncl(x), x }
156    fstpt   (RESULT_P)          // { x }
157    fldz                        // { 0, x }
158    fucomi  %st(1), %st(0)
159    fchs                        // { -0, x }
160    fldz                        // { 0, -0, x }
161    fcmovnb %st(1), %st(0)      // { fract, -0, x }
162    fstp    %st(2)              // { -0, fract }
163    fstp    %st(0)              // { fract }
164    ret