src/libm/Source/Intel/nextafter.S at fixPythonPipStalling

overby.me / darling-nix
fork atom
this repo has no description
fork atom
darling-nix / src / libm / Source / Intel / nextafter.S
at fixPythonPipStalling 135 lines 3.8 kB view raw
wrap content
Lubos Dolezel Restructured source tree to prepare for merge with the "darling" repo 10y ago
f228ae16
  1
  2
  3/*
  4 *	nextafterf.s
  5 *
  6 *		by Ian Ollmann
  7 *
  8 *	Copyright (c) 2007, Apple Inc.  All Rights Reserved.
  9 *
 10 *	Implementation of C99 nextafterf for __i386__ and __x86_64__.
 11 */
 12
 13#include <machine/asm.h>
 14#include "abi.h"
 15
 16#if defined( BUILDING_FOR_CARBONCORE_LEGACY )
 17ENTRY( nextafterd )
 18#else
 19ENTRY( nextafter )
 20#endif
 21#if defined( __i386__ )
 22	movsd	FRAME_SIZE( STACKP ),	%xmm0
 23	movsd	8+FRAME_SIZE( STACKP ),	%xmm1
 24#endif
 25
 26	ucomisd	%xmm0,					%xmm1		// if( x == y || isnan(x) || isnan(y) )
 27	je		2f									//		goto 2
 28	
 29	xorpd	%xmm2,					%xmm2		// 0.0f
 30	ucomisd %xmm0,					%xmm2		// if( x == 0 )
 31	je		1f									//		goto 1
 32
 33	// x != y. x != 0.0f. X and Y are numeric.
 34	cmpltsd	%xmm0,					%xmm1		// y < x ? -1 : 0
 35	xorpd	%xmm2,					%xmm2		// 0.0f
 36	cmpltsd	%xmm0,					%xmm2		// 0.0f < x ? -1 : 0
 37	xorpd	%xmm2,					%xmm1		// move away from zero ? -1 : 0
 38	paddq	%xmm1,					%xmm1		// move away from zero ? -2 : 0
 39	pcmpeqb %xmm2,					%xmm2		// -1
 40	psubq	%xmm2,					%xmm1		// move away from zero ? -1 : 1
 41	psubq	%xmm1,					%xmm0		// initial stab at result
 42	
 43	//check to see if we overflowed, underflowed or did other bad things
 44	movapd	%xmm0,					%xmm7
 45	psrlq	$32,					%xmm7
 46	movd	%xmm7,					%eax		// result
 47	andl	$0x7fffffff,			%eax		// |result|
 48	subl	$0x00100000,			%eax		// push denormals negative
 49	cmpl	$0x7fe00000,			%eax		// if( |result| < 0x1.0p-126 || |result| == inf || isnan( result ) )
 50	jae		4f									//		goto 4
 51	
 52#if defined( __i386__ )
 53	movsd	%xmm0,					FRAME_SIZE( STACKP )
 54	fldl	FRAME_SIZE( STACKP )
 55#endif
 56	ret
 57	
 58	
 59	
 601:	// x == 0 and y != x, so result is either 0x80000001 or 0x00000001
 61	pcmpeqb	%xmm0,					%xmm0		//	-1
 62	movdqa	%xmm0,					%xmm2		//	-1
 63	psllq	$63,					%xmm0		//	0x8000000000000000
 64	andpd	%xmm1,					%xmm0		//  signof( y )
 65	psubq	%xmm2,					%xmm0		//  signof( y ) + 1U
 66	
 67	//set inexact and underflow
 68	movapd	%xmm0,					%xmm1		//  0x8000000000000001 or 0x0000000000000001
 69	psllq	$52,					%xmm1		//	0x0010000000000000
 70	orps	%xmm0,					%xmm1		//	0x8010000000000001 or 0x0010000000000001
 71	mulsd	%xmm1,					%xmm1		//	set inexact and underflow
 72	
 73#if defined( __i386__ )
 74	movsd	%xmm0,					FRAME_SIZE( STACKP )
 75	fldl	FRAME_SIZE( STACKP )
 76#endif
 77	ret
 78	
 79	// x == y || isnan(x) || isnan(y)
 802:	jp		3f									// if( isnan(x) || isnan(y) ) goto 3
 81
 82    // x == y, return y
 83#if defined( __i386__ )
 84	movsd	%xmm1,					FRAME_SIZE( STACKP )
 85	fldl	FRAME_SIZE( STACKP )
 86#else
 87    movsd   %xmm1,                  %xmm0
 88#endif
 89	ret
 90    
 91    // nan
 923:  addsd	%xmm1,					%xmm0		// either x or y or both are nan, so add the two to silence and move to xmm0
 93#if defined( __i386__ )
 94	movsd	%xmm0,					FRAME_SIZE( STACKP )
 95	fldl	FRAME_SIZE( STACKP )
 96#endif
 97	ret
 98
 99// |result| < 0x1.0p-126 || |result| == inf || isnan( result )	
1004:	je		5f									//	if result is infinite, goto 5
101	jg		6f									//	if nan, goto 6
102
103	// denormal, set underflow and inexact
104	psrlq	$63,					%xmm2		// 1ULL
105	movdqa	%xmm2,					%xmm3		// 1ULL
106	pslld	$52,					%xmm2		// 0x0010000000000000ULL
107	por		%xmm3,					%xmm2		// 0x0010000000000001ULL
108	mulss	%xmm2,					%xmm2		// set inexact and underflow
109#if defined( __i386__ )
110	movsd	%xmm0,					FRAME_SIZE( STACKP )
111	fldl	FRAME_SIZE( STACKP )
112#endif
113	ret
114
115//	|result| is infinite
1165:	pcmpeqb	%xmm1,					%xmm1		// -1LL
117	paddq	%xmm0,					%xmm1		// copysign( max finite, result )
118	movdqa	%xmm1,					%xmm2		// copysign( max finite, result )
119	psllq	$10,					%xmm1		// copysign( 1.0f,		result )
120	addsd	%xmm2,					%xmm1		// set inexact
121	addsd	%xmm2,					%xmm2		// set overflow
122#if defined( __i386__ )
123	movsd	%xmm0,					FRAME_SIZE( STACKP )
124	fldl	FRAME_SIZE( STACKP )
125#endif
126	ret
127
128// NaN results were infinities that became NaNs, push back to infinity	
1296:	pcmpeqb	%xmm1,					%xmm1		// -1
130	paddq	%xmm1,					%xmm0		
131#if defined( __i386__ )
132	movsd	%xmm0,					FRAME_SIZE( STACKP )
133	fldl	FRAME_SIZE( STACKP )
134#endif
135	ret