src/libm/Source/Intel/s_rint.S at fixPythonPipStalling

overby.me / darling-nix
fork atom
this repo has no description
fork atom
darling-nix / src / libm / Source / Intel / s_rint.S
at fixPythonPipStalling 103 lines 5.1 kB view raw
wrap content
Lubos Dolezel Restructured source tree to prepare for merge with the "darling" repo 10y ago
f228ae16
  1/*
  2 * Written by Ian Ollmann.
  3 * Copyright � 2005 Apple Computer Inc.
  4 */
  5
  6#include <machine/asm.h>
  7
  8#include "abi.h"
  9
 10//i386 versions if these functions are in xmm_floor.c
 11//On x86_64 we can take advantage of the REX form of cvtsd2si to produce 64-bit values
 12#if defined( __LP64__ )
 13
 14ENTRY( lrint )
 15ENTRY( llrint )
 16    movl        $0x43e00000, %eax                   //Exponent for 0x1.0p63
 17    movd        %eax,  %xmm1                        //copy to low 32-bits of xmm1
 18    psllq       $32,   %xmm1                        //move it to the high 32-bits of the low double in xmm1, to make 0x1.0p63
 19    cmplesd     %xmm0, %xmm1                        //compare 0x1.0p63 <= x.  Since there are no double precision values between LONG_MAX and 0x1.0p63 we don't need to worry about them
 20	cvtsd2siq	%xmm0, %rax                         //convert x to long
 21    movd        %xmm1, %rdx                         //copy compare result (all 64-bits) to %rdx 
 22    xorq        %rdx,  %rax                         //flip overflow values to 0x7fffffffffffffff
 23	ret
 24
 25ENTRY( lrintf )
 26ENTRY( llrintf )
 27    movl        $0x5f000000, %eax                   //load 0x1.063f
 28    movd        %eax,  %xmm1                        //copy to xmm
 29    cmpless     %xmm0, %xmm1                        //compare 0x1.063f <= x
 30	cvtss2siq	%xmm0, %rdx                         //convert x to long
 31    movd        %xmm1, %rax                         //copy 64 bits of the comparison result to %rdx
 32	cdqe											//sign extend 
 33    xorq        %rdx,  %rax                         //flip overflow results to 0x7fffffffffffffff
 34	ret
 35    
 36#else
 37
 38ENTRY( lrintf )
 39    movl        $0x4f000000, %eax                           //load 0x1.0p31f
 40    movss       (FIRST_ARG_OFFSET)( STACKP ), %xmm0         //load x
 41    movd        %eax, %xmm1                                 //copy 0x1.0p31f to xmm1
 42    cmpless     %xmm0, %xmm1                                //compare 0x1.0p31f <= x. There are no single precision values between INT_MAX and 0x1.0p31f, so no need to worry here.
 43    cvtss2si    %xmm0, %eax                                 //convert to int
 44    movd        %xmm1,  %edx                                //move the compare result to edx
 45    xorl        %edx, %eax                                  //saturate overflow results to 0x7fffffff
 46    ret
 47    
 48ENTRY( lrint )
 49    movsd       (FIRST_ARG_OFFSET)( STACKP ), %xmm0         // load x
 50    xorpd       %xmm1, %xmm1                                // load 0.0f
 51    cmpltsd     %xmm0, %xmm1                                // test 0.0f < x
 52    cvtsd2si    %xmm0, %eax                                 // convert x to int
 53    movd        %xmm1,  %edx                                // copy the compare result to %edx
 54    xorl        %ecx, %ecx                                  // set %ecx to 0
 55    cmp         $0x80000000, %eax                           // check the result to see if it is 0x80000000 -- the overflow result
 56    cmovne      %ecx, %edx                                  // if the result is not 0x80000000, overwrite the earlier compare result with 0
 57    xorl        %edx, %eax                                  // saturate overflow results to 0x7fffffff (was 0x80000000)
 58    ret
 59    
 60ENTRY( llrintf )
 61	SUBP		$12, STACKP
 62	movl		$0x5f000000, 8(STACKP)						//0x1.0p63f
 63	xor			%edx,		%edx
 64
 65	flds		8(STACKP)                                   //{0x1.0p63 }
 66	flds		(FIRST_ARG_OFFSET+12)( STACKP )				//{f, 0x1.0p63}
 67	fucomi 		%ST(1), %ST                                 //{f, 0x1.0p63}		f>=0x1.0p63
 68	fistpll		(STACKP)                                    //{0x1.0p63}
 69	fstp		%ST(0)                                      //{}
 70
 71	setnb		%dl                                         // copy f >= 0x1.0p63 to the d register
 72	negl		%edx                                        // convert [0,1] to [0,-1]
 73	movl		(STACKP),	%eax                            // load low 32-bits of the result
 74	xorl		%edx,		%eax                            // saturate to 0xffffffff if overflow
 75	xorl		4(STACKP),	%edx                            // load the high 32-bits of the result and saturate to 0x7fffffff if overflow
 76	
 77	ADDP		$12,		STACKP
 78	ret
 79    
 80ENTRY( llrint )
 81	SUBP		$12, STACKP
 82	movl		$0x5f000000, 8(STACKP)						//0x1.0p63f
 83	xor			%edx,		%edx
 84
 85	flds		8(STACKP)                                   //{0x1.0p63 }
 86	fldl		(FIRST_ARG_OFFSET+12)( STACKP )				//{f, 0x1.0p63}
 87	fucomi 		%ST(1), %ST                                 //{f, 0x1.0p63}		f>=0x1.0p63
 88	fistpll		(STACKP)                                    //{0x1.0p63}
 89	fstp		%ST(0)                                      //{}
 90
 91	setnb		%dl                                         // copy f >= 0x1.0p63 to the d register
 92	negl		%edx                                        // convert [0,1] to [0,-1]
 93	movl		(STACKP),	%eax                            // load low 32-bits of the result
 94	xorl		%edx,		%eax                            // saturate to 0xffffffff if overflow
 95	xorl		4(STACKP),	%edx                            // load the high 32-bits of the result and saturate to 0x7fffffff if overflow
 96	
 97	ADDP		$12,		STACKP
 98	ret
 99    
100#endif
101
102
103