src/libm/Source/Intel/modf.S at fixPythonPipStalling

overby.me / darling-nix
fork atom
this repo has no description
fork atom
darling-nix / src / libm / Source / Intel / modf.S
at fixPythonPipStalling 131 lines 3.2 kB view raw
wrap content
Lubos Dolezel More progress 9y ago
a76db625
  1
  2/*
  3 *  modf.s
  4 *
  5 *      by Steve Canon (scanon)
  6 *
  7 *  Copyright (c) 2008 Apple Inc. All Rights Reserved.
  8 *
  9 *  Rewritten by Steve Canon in December '08 to fix
 10 *  behavior of small signaling nans and to get the
 11 *  sign of a zero fractional part correct.
 12 */
 13
 14// double modf(double x, double *iptr);
 15//
 16// breaks x into integral and fractional parts, each of which has the same sign as the argument.
 17// the fractional part is returned, and iptr holds the integral part.
 18//
 19// Special Cases:
 20//
 21//		Input	Fractional Part		Integral Part
 22//		+-inf		+-0					+-inf
 23//		NaN			NaN					NaN
 24
 25#if defined __i386__
 26
 27.text
 28.align 4
 29.globl _modf
 30_modf:
 31	movl	   8(%esp),		%eax	// high word of x
 32	andl	   $0x7fffffff,	%eax	// high word of |x|
 33	subl	   $0x3ff00000,	%eax	// subtract off exponent bias
 34	
 35	movsd	   4(%esp),		%xmm0	// x
 36	pcmpeqb		%xmm3,		%xmm3
 37	psllq	   $63,			%xmm3
 38	movapd		%xmm3,		%xmm4	// set aside signbit mask for later use
 39	andpd		%xmm0,		%xmm3	// signbit(x)
 40	
 41	cmpl	   $0x03400000,	%eax	// compare to unbiased 2**52
 42	jae			1f
 43	
 44//	1.0 <= |x| < 2**52
 45	shrl	   $20,			%eax	// unbiased exponent of x
 46	movl	   $52,			%edx
 47	subl		%eax,		%edx
 48	movd		%edx,		%xmm2	// 52 - unbiased exponent of x
 49	pcmpeqb		%xmm1,		%xmm1
 50	psllq		%xmm2,		%xmm1	// mask for integral bits of x
 51	andpd		%xmm0,		%xmm1	// trunc(x)
 52	subsd		%xmm1,		%xmm0	// fractional part, except that sign of zero may be wrong
 53	andnpd		%xmm0,		%xmm4	// |fractional part|
 54	orpd		%xmm3,		%xmm4	// copysign(fractional part, x)
 55	
 56	movl	  12(%esp),		%ecx	// iptr
 57	movsd		%xmm4,	   4(%esp)
 58	movsd		%xmm1,	    (%ecx)	// store integral part to iptr
 59	fldl	   4(%esp)				// return fraction part on the x87 stack
 60	retl
 61	
 621:	movl	  12(%esp),		%ecx	// iptr
 63	jge			2f
 64
 65//	|x| < 1.0
 66	movsd		%xmm3,		(%ecx)	// *iptr = copysign(0.0, x)
 67	fldl	   4(%esp)				// fractional part = x
 68	retl
 69	
 702:	ucomisd		%xmm0,		%xmm0	// check for NaN
 71	jp			3f
 72
 73//	|x| >= 2**52
 74	movsd		%xmm3,	   4(%esp)	// fractional part = copysign(0.0, x)
 753:	movsd		%xmm0,		(%ecx)	// *iptr = x
 76	fldl	   4(%esp)
 77	retl
 78	
 79#else // __x86_64__
 80
 81.const
 82.align 4
 83absmask:
 84	.quad	0x7fffffffffffffff
 85one:
 86	.quad	0x3ff0000000000000
 87
 88.text
 89.align 4
 90.globl _modf
 91_modf:
 92	movsd	absmask(%rip),	%xmm1
 93	andpd		%xmm0,		%xmm1	// |x|
 94	movd		%xmm1,		%rax
 95	xorpd		%xmm0,		%xmm1	// copysign(0.0, x)
 96	subq	one(%rip),		%rax	// subtract off exponent bias
 97	movq	   $52,			%rcx
 98	sarq		%cl,		%rax
 99	cmpq		%rcx,		%rax	// compare exponent of x to 52
100	jae			1f
101	
102//	1.0 <= |x| < 2**52
103	subq		%rax,		%rcx	// 52 - unbiased exponent of x
104	pcmpeqb		%xmm2,		%xmm2
105	movd		%rcx,		%xmm3
106	psllq		%xmm3,		%xmm2	// mask for integral bits of x
107	andpd		%xmm0,		%xmm2	// trunc(x)
108	subsd		%xmm2,		%xmm0	// fractional part, except sign of zero may be wrong
109	andpd	absmask(%rip),	%xmm0	// |fractional part|
110	orpd		%xmm1,		%xmm0	// copysign(fractional part, x)
111	movsd		%xmm2,		(%rdi)	// *iptr = trunc(x)
112	retq
113	
1141:	jge			2f
115	
116//	|x| < 1.0
117	movsd		%xmm1,		(%rdi)	// *iptr = copysign(0.0, x)
118	retq
119	
1202:	ucomisd		%xmm0,		%xmm0	// check for NaN
121	jp			3f
122	
123//	|x| >= 2**52
124	movsd		%xmm0,		(%rdi)	// *iptr = x
125	movapd		%xmm1,		%xmm0	// fractional part = copysign(0.0, x)
126	retq
127	
1283:	movsd		%xmm0,		(%rdi)	// *iptr = x
129	retq
130
131#endif // __ARCH__