src/libm/Source/Intel/lround.S at fixPythonPipStalling

overby.me / darling-nix
fork atom
this repo has no description
fork atom
darling-nix / src / libm / Source / Intel / lround.S
at fixPythonPipStalling 217 lines 8.0 kB view raw
wrap content
Lubos Dolezel Restructured source tree to prepare for merge with the "darling" repo 10y ago
f228ae16
  1
  2/*
  3 *	lround.s
  4 *
  5 *		by Ian Ollmann
  6 *
  7 *	Copyright (c) 2007,  Apple Inc.  All Rights Reserved.
  8 *
  9 *	Implementation of C99 lround and llround functions for i386 and x86_64.
 10 */
 11 
 12#include <machine/asm.h>
 13#include "abi.h"
 14
 15.literal8
 16half:           .quad       0x3fe0000000000000      // 0.5
 17mtwo63:         .quad       0xc3e0000000000000      // -2**63
 18mtwo31:         .quad       0xc1e0000000000000      // -2**31
 19two52:          .quad       0x4330000000000000      // 2**52
 20twom32:         .quad       0x3df0000000000000      // 2**-32
 21implicit:       .quad       0x8000000000000000
 22cutoff:         .double     2147483647.5            // 2**31-0.5
 23mcutoff:        .double    -2147483648.5            // 2**31-0.5
 24
 25.text
 26#if defined( __x86_64__ )
 27ENTRY( lround )
 28ENTRY( llround )
 29    movd    %xmm0,                  %rax
 30    movq    %rax,                   %rdx
 31    shrq    $52,                    %rax
 32    andq    $0x7ff,                 %rax    // exponent + bias
 33    subq    $0x3fe,                 %rax    // push exponent < -1 to negative
 34    cmpq    $64,                    %rax    // if( |x| >= 0x1.0p63 || |x| < 0.5 || isnan(x) )
 35    jae     1f                              //      goto 1
 36    
 37    // 0.5 <= |x| < 0x1.0p63
 38    shlq    $11,                    %rdx    // put most significant bit in leading position
 39    orq     implicit(%rip),         %rdx    // make implicit bit explicit
 40
 41    // shift value right so that the integer bit is at position 1
 42    movq    $63,                    %rcx
 43    subq    %rax,                   %rcx    // 63 - (exponent+1)
 44    shrq    %cl,                    %rdx
 45    
 46    addq    $1,                     %rdx    // round away from zero
 47    shrq    $1,                     %rdx    // move unit bit to correct position
 48    
 49    // Fix sign
 50    movd    %xmm0,                  %rax
 51    sarq    $63,                    %rax
 52    movq    %rax,                   %rcx
 53    xorq    %rdx,                   %rax
 54    subq    %rcx,                   %rax
 55    
 56    // set inexact as necessary
 57    cvttsd2si %xmm0,                %rdx
 58    ret
 59  
 60//  |x| >= 0x1.0p63 || |x| < 0.5 || isnan(x) 
 611:  jge         2f
 62
 63    // |x| < 0.5
 64    cvttsd2si   %xmm0,              %rax    // set invalid (as necessary), prepare 0x8000000000000000
 65    ret
 66    
 67    // |x| >= 0x1.0p63 || isnan(x) 
 682:  ucomisd     %xmm0,              %xmm0    
 69    cvttsd2si   %xmm0,              %rax    // set invalid (as necessary), prepare 0x8000000000000000
 70    jp          3f
 71    negq        %rdx
 72    sarq        $63,                %rdx
 73    xorq        %rdx,               %rax    
 743:  ret
 75    
 76        
 77#else  /* __i386__ */
 78
 79ENTRY( llround )
 80    movl    4+FRAME_SIZE( STACKP ), %eax    // x.hi
 81    movsd   FRAME_SIZE( STACKP ),   %xmm0   // x
 82    andl    $0x7fffffff,            %eax    // |x|.hi
 83    subl    $0x3fe00000,            %eax    // push exponent - 1 to negative
 84    call    0f
 850:  popl    %ecx
 86    cmpl    $((63+1)<<20),          %eax    // if( |x| >= 0x1.0p63 || |x| < 0.5 || isnan(x) )
 87    jae     1f                              //      goto 1
 88  
 89    // 0.5 <= |x| < 0x1.0p63  
 90    movq    (implicit-0b)(%ecx),    %xmm1
 91    movapd  %xmm0,                  %xmm7
 92    shrl    $20,                    %eax    // move exponent+1 to units position
 93    psllq   $11,                    %xmm0   // put the most significant bit in the leading position
 94    movl    $63,                    %edx    
 95    orpd    %xmm1,                  %xmm0   // make implicit bit explicit
 96    subl    %eax,                   %edx    // subtract exponent+1 from 63
 97    movd    %edx,                   %xmm2   // move to xmm
 98    psrlq   %xmm2,                  %xmm0   // shift units bit to units+1 position
 99    pcmpeqb %xmm3,                  %xmm3   // -1LL
100    psubq   %xmm3,                  %xmm0   // round away from zero
101    psrlq   $1,                     %xmm0   // shift units bit to units position
102    
103    //fix sign
104    movsd   (two52-0b)(%ecx ),      %xmm4   // 2**52
105    xorpd   %xmm6,                  %xmm6   // 0
106   cmpnltsd %xmm7,                  %xmm6   // x < 0 ? -1LL : 0
107    pxor    %xmm6,                  %xmm0
108    psubq   %xmm6,                  %xmm0
109
110    // move result to GPR
111    movd    %xmm0,                  %eax
112    psrlq   $32,                    %xmm0
113    movd    %xmm0,                  %edx
114        
115    //set inexact
116    andnpd  %xmm7,                  %xmm1   // |x|
117    minsd   %xmm4,                  %xmm1   //  min( |x|, 0x1.0p52 ) -- avoid spurious inexact for |x| > 0x1.0p52
118    addsd   %xmm4,                  %xmm1   //  add 0x1.0p52, set inexact
119        
120    ret
121
122//  |x| >= 0x1.0p63 || |x| < 0.5 || isnan(x)     
1231:  jge     2f
124
125    // |x| < 0.5
126    cvttsd2si   %xmm0,              %eax
127    xorl        %edx,               %edx
128    ret
129
1302:  xorl    %eax,                   %eax
131    ucomisd (mtwo63-0b)(%ecx),      %xmm0   // special case for -0x1.0p63, nan
132    je      3f
133
134    //overflow
135    xorpd   %xmm1,                  %xmm1
136    cmpltsd %xmm0,                  %xmm1
137    movd    %xmm1,                  %eax
138    cvttsd2si %xmm0,                %edx
139    xorl    %eax,                   %edx
140    ret
141    
142// special case for -0x1.0p63
1433:  jp      4f                             //nans end up here, so get rid of them
144    movl    $0x80000000,            %edx
145    xorl    %eax,                   %eax
146    ret
147
148//nan
1494:  cvttsd2si %xmm0,                %edx
150    ret
151
152    
153ENTRY( lround )
154    movl    4+FRAME_SIZE( STACKP ), %eax    // x.hi
155    movsd   FRAME_SIZE( STACKP ),   %xmm0   // x
156    andl    $0x7fffffff,            %eax    // |x|.hi
157    subl    $0x3fe00000,            %eax    // push exponent - 1 to negative
158    call    0f
1590:  popl    %ecx
160    movq    (implicit-0b)(%ecx),    %xmm1
161    cmpl    $((31+1)<<20),          %eax    // if( |x| >= 0x1.0p63 || |x| < 0.5 || isnan(x) )
162    jae     1f                              //      goto 1
163
164    // weed out positive overflow cases
165    ucomisd (cutoff-0b)(%ecx),      %xmm0   // if( x >= 0x1.0p31-0.5 )
166    jae     3f
167  
168    // 0.5 <= |x| < 0x1.0p31  
169    movapd  %xmm0,                  %xmm7
170    shrl    $20,                    %eax    // move exponent+1 to units position
171    psllq   $11,                    %xmm0   // put the most significant bit in the leading position
172    movl    $63,                    %edx    
173    orpd    %xmm1,                  %xmm0   // make implicit bit explicit
174    subl    %eax,                   %edx    // subtract exponent+1 from 63
175    movd    %edx,                   %xmm2   // move to xmm
176    psrlq   %xmm2,                  %xmm0   // shift units bit to units+1 position
177    pcmpeqb %xmm3,                  %xmm3   // -1LL
178    psubq   %xmm3,                  %xmm0   // round away from zero
179    psrlq   $1,                     %xmm0   // shift units bit to units position
180    
181    //fix sign
182    movl    4+FRAME_SIZE( STACKP ), %edx
183    movd    %xmm0,                  %eax
184    sarl    $31,                    %edx
185    xorl    %edx,                   %eax
186    subl    %edx,                   %eax
187        
188    //set inexact
189    andnpd  %xmm7,                  %xmm1   // |x|
190    addsd   (two52-0b)(%ecx ),      %xmm1   // |x| += 0x1.0p52, set inexact
191    ret
192
193//  |x| >= 0x1.0p31 || |x| < 0.5 || isnan(x) 
1941:  andnpd  %xmm0,                  %xmm1   // |x|
195    ucomisd (mcutoff-0b)(%ecx),     %xmm0   // special case for -0x1.0p31, nan
196    jbe     2f
197
198    // weed out positive overflow cases
199    ucomisd (cutoff-0b)(%ecx),      %xmm0   // if( x >= 0x1.0p31-0.5 )
200    jae     3f
201
202    // -0x1.0p63-0.5 < x <= -0x1.063 || |x| < 0.5
203    cvttsd2si %xmm0,                %eax
204    ret
205    
206// negative overflow cases and nan
2072:  pcmpeqb %xmm0,                  %xmm0   // nan
208    cvttsd2si %xmm0,                %eax    // set invalid
209    ret
210
211// positive overflow cases
2123:  movl    $0x7fffffff,            %eax    // result is 0x7fffffff
213    pcmpeqb %xmm0,                  %xmm0   // nan
214    cvttsd2si %xmm0,                %edx    // set invalid
215    ret
216
217#endif