src/libm/Source/Intel/nextafterl.S at fixPythonPipStalling

overby.me / darling-nix
fork atom
this repo has no description
fork atom
darling-nix / src / libm / Source / Intel / nextafterl.S
at fixPythonPipStalling 154 lines 6.7 kB view raw
wrap content
Lubos Dolezel More progress 9y ago
a76db625
  1/*
  2 *  nextafterl.s
  3 *  LibmV5
  4 *
  5 *  Written by Ian Ollmann on 11/9/07.
  6 *  Copyright 2006 Apple Computer. All rights reserved.
  7 *
  8 */
  9
 10#include <machine/asm.h>
 11#include "abi.h"
 12
 13#if defined( __x86_64__ )
 14.align 3
 15.literal8
 16signBit:        .quad   0x8000000000000000
 17nonSignBit:     .quad   0x7fffffffffffffff
 18#endif
 19
 20.align 4
 21.const
 22
 23tiny:   .quad   0x8000000000000000, 0x1         //0x1.0p-16382
 24big:    .quad   0x8000000000000000, 0x7ffe      //0x1.0p16383
 25min:    .quad   0x1, 0x0, 0x1, 0x8000           //{ 0x1.0p-16445, -0x1.0p-16445 }
 26
 27.text
 28ENTRY(nexttowardl)
 29ENTRY( nextafterl )
 30    fldt    FRAME_SIZE( STACKP )        //  { x }
 31    fldt    16+FRAME_SIZE( STACKP )     //  { y, x }
 32    xor     AX_P,       AX_P            //  0
 33    fucomip %st(1),     %st(0)          //  { x }       if( x == y || isnan(x) || isnan(y) )
 34    setbe   %al                         //  x >= y ? 1 : 0
 35    je      3f                          //                      goto 2
 36
 37    fldz                                //  { 0, x }
 38    xor     DX_P,       DX_P            //  0
 39    fucomip %st(1),     %st(0)          //  { x }       if( x == 0 )
 40    setb    %dl                         //  0 < x ? 1 : 0
 41    je      2f                          //                      goto 1
 42
 43    xor     DX_P,       AX_P            //  (x >= y) != (0 < x) ? 1 : 0
 44    add     AX_P,       AX_P            //  (x >= y) != (0 < x) ? 2 : 0
 45
 46#if defined( __i386__ )
 47    movq    FRAME_SIZE( STACKP ),       %xmm0   // load x significand
 48    xorpd   %xmm1,                      %xmm1   //  { 0, 0 }
 49    cmpeqsd %xmm1,                      %xmm1   //  { 0, -1LL }
 50    movdqa  %xmm0,                      %xmm2   // old significand
 51    movd    %eax,                       %xmm3   // (x >= y) != (0 < x) ? 2 : 0
 52    paddq   %xmm1,                      %xmm0   // subtract 1 from significand
 53    psllq   $63,                        %xmm1   // 0x8000000000000000ULL
 54    paddq   %xmm3,                      %xmm0   // add back either 2 or 0 depending on the direction we are moving the exponent
 55
 56    //At this point, the significand might have overflowed or it might have underflowed.
 57    //If it overflowed, the leading bit was just lost due to modulo overflow. The rest of the bits
 58    //are zero (since we added 1), so all we need to do is set the leading bit again.  If it
 59    //underflowed, the bits are 0x7fffffffffffffff (since we subtracted 1) and all we need to do
 60    //is set the leading bit again. We still need to patch up the exponent, and we might have just
 61    //created a denormal. We will deal with that later.
 62    pxor    %xmm0,                      %xmm2   // set leading bit to 1 if leading bit changed (we have overflow/underflow)
 63    pand    %xmm1,                      %xmm2   // zero the non-leading bits
 64    por     %xmm2,                      %xmm0   // set leading bit again in the significand if it became unset
 65    movmskpd %xmm2,                     %edx    // put carry bit into edx
 66    movq    %xmm0,        FRAME_SIZE(STACKP)    // write out new significand
 67#else
 68    movq    FRAME_SIZE( STACKP ),       %rcx    // load significand of x
 69    movq    %rcx,                       %rdx    // old significand of x
 70    subq    $1,                         %rcx    // subtract 1 from significand of x
 71    addq    %rax,                       %rcx    // add back in either 0 or 2 depending on direction of movement
 72    xorq    %rcx,                       %rdx    // set leading bit to 1 if leading bit changed (we have overflow/underflow)
 73    andq    signBit(%rip),              %rdx    // zero all the other bits
 74    orq     %rdx,                       %rcx    // restore leading bit in the significand if it changed
 75    shrq    $63,                        %rdx    // move carry bit to low bit
 76    movq    %rcx,         FRAME_SIZE(STACKP)    // write out new significand
 77#endif
 78
 79    // fix exponent
 80    test    $1,                         %edx    // Did we change it?
 81    movzwl  8+FRAME_SIZE( STACKP ),     %edx    // Load the exponent
 82    jnz     1f
 83    
 84    // check for overflow/underflow
 850:  andl    $0x7fff,                    %edx
 86    subl    $1,                         %edx
 87    cmpl    $0x7ffe,                    %edx
 88    jae     5f
 89    
 90    fldt    FRAME_SIZE( STACKP )
 91    fstp    %st(1)
 92    ret
 93
 94    //fix exponent
 951:  subl    $1,                         %eax    // 
 96    addl    %eax,                       %edx    //  add back either 2 or 0 depending on direction of motion.
 97    movw    %dx,        8+FRAME_SIZE( STACKP )
 98    jmp     0b
 99
100
1012:  // x == 0, x != y, return copysignl( 0x1.0p-16445, y )
102    movzwl  24+FRAME_SIZE( STACKP ),    %eax    // sign + exponent of y
103    and     $0x8000,                    AX_P    // sign of y
104    shr     $11,                        AX_P    // y is negative ? 16 : 0
105    fstp    %st(0)                              //  {}
106#if defined( __x86_64__ )
107    leaq    min(%rip),                  %rcx
108    fldt    (%rcx, %rax, 1 )                    //  { copysign( 0x1.0p-16445, y ) }
109    fldt    tiny( %rip )                        //  { 0x1.0p-16382, copysign( 0x1.0p-16445, y ) }
110#else
111    call    0f
1120:  popl    %ecx
113    fldt    (min-0b)(%ecx, %eax, 1 )            //  { copysign( 0x1.0p-16445, y ) }
114    fldt    (tiny-0b)( %ecx )                   //  { 0x1.0p-16382, copysign( 0x1.0p-16445, y ) }
115#endif  
116    fmulp   %st(0), %st(0)                      //  { copysign( 0x1.0p-16445, y ) }         set underflow, inexact
117    ret
118
119
120    // x == y || isnan(x) || isnan(y)
1213:  fldt    16+FRAME_SIZE( STACKP )     // { y, x }
122    jp      4f                          //  if( isnan(x) || isnan(y) )  goto 3
123    fstp    %st(1)                      // { y }    return y
124    ret
125
126    // nan
1274:  faddp                               //  return x + y
128    ret
129  
130// overflow or underflow
1315:  je      6f                          // result is infinite, goto 6
132
133    //underflow
134    fstp    %st(0)                      // get rid of x
135#if defined( __x86_64__ )
136    fldt    tiny( %rip )                //  { 0x1.0p-16382 }
137    andq    nonSignBit(%rip),           %rcx
138    movq    %rcx,           FRAME_SIZE( STACKP )
139#else
140    call    0f
1410:  popl    %ecx
142    fldt    (tiny-0b)( %ecx )           //  { 0x1.0p-16382 }
143    pandn   %xmm0,                      %xmm2   // unset the leading bit
144    movq    %xmm2,        FRAME_SIZE(STACKP)    // write out denorm significand
145#endif  
146    fmulp   %st(0), %st(0)              // set underflow
147    fldt    FRAME_SIZE( STACKP )
148    ret    
149    
150
151// infinite
1526:  faddp   %st(0), %st(0)              // {}       add x to itself and throw away the result -- sets overflow and inexact
153    fldt    FRAME_SIZE( STACKP )
154    ret