src/libm/Source/Intel/nextafterf.S at fixPythonPipStalling

overby.me / darling-nix
fork atom
this repo has no description
fork atom
darling-nix / src / libm / Source / Intel / nextafterf.S
at fixPythonPipStalling 129 lines 3.6 kB view raw
wrap content
Lubos Dolezel Restructured source tree to prepare for merge with the "darling" repo 10y ago
f228ae16
  1
  2
  3/*
  4 *	nextafterf.s
  5 *
  6 *		by Ian Ollmann
  7 *
  8 *	Copyright (c) 2007, Apple Inc.  All Rights Reserved.
  9 *
 10 *	Implementation of C99 nextafterf for __i386__ and __x86_64__.
 11 */
 12
 13#include <machine/asm.h>
 14#include "abi.h"
 15
 16
 17ENTRY( nextafterf )
 18#if defined( __i386__ )
 19	movss	FRAME_SIZE( STACKP ),	%xmm0
 20	movss	4+FRAME_SIZE( STACKP ),	%xmm1
 21#endif
 22
 23	ucomiss	%xmm0,					%xmm1		// if( x == y || isnan(x) || isnan(y) )
 24	je		2f									//		goto 2
 25	
 26	xorps	%xmm2,					%xmm2		// 0.0f
 27	ucomiss %xmm0,					%xmm2		// if( x == 0 )
 28	je		1f									//		goto 1
 29
 30	// x != y. x != 0.0f. X and Y are numeric.
 31	cmpltss	%xmm0,					%xmm1		// y < x ? -1 : 0
 32	xorps	%xmm2,					%xmm2		// 0.0f
 33	cmpltss	%xmm0,					%xmm2		// 0.0f < x ? -1 : 0
 34	xorps	%xmm2,					%xmm1		// move away from zero ? -1 : 0
 35	paddd	%xmm1,					%xmm1		// move away from zero ? -2 : 0
 36	pcmpeqb %xmm2,					%xmm2		// -1
 37	psubd	%xmm2,					%xmm1		// move away from zero ? -1 : 1
 38	psubd	%xmm1,					%xmm0		// initial stab at result
 39	
 40	//check to see if we overflowed, underflowed or did other bad things
 41	movd	%xmm0,					%eax		// result
 42	andl	$0x7fffffff,			%eax		// |result|
 43	subl	$0x00800000,			%eax		// push denormals negative
 44	cmpl	$0x7f000000,			%eax		// if( |result| < 0x1.0p-126 || |result| == inf || isnan( result ) )
 45	jae		4f									//		goto 4
 46	
 47#if defined( __i386__ )
 48	movss	%xmm0,					FRAME_SIZE( STACKP )
 49	flds	FRAME_SIZE( STACKP )
 50#endif
 51	ret
 52	
 53	
 54	
 551:	// x == 0 and y != x, so result is either 0x80000001 or 0x00000001
 56	pcmpeqb	%xmm0,					%xmm0		//	-1
 57	movdqa	%xmm0,					%xmm2		//	-1
 58	pslld	$31,					%xmm0		//	0x80000000
 59	andps	%xmm1,					%xmm0		//  signof( y )
 60	psubd	%xmm2,					%xmm0		//  signof( y ) + 1U
 61	
 62	//set inexact and underflow
 63	movaps	%xmm0,					%xmm1		//   0x80000001 or 0x00000001
 64	pslld	$23,					%xmm1		//	0x00800000
 65	orps	%xmm0,					%xmm1		//	0x80800001 or 0x00800001
 66	mulss	%xmm1,					%xmm1		//	set inexact and underflow
 67	
 68#if defined( __i386__ )
 69	movss	%xmm0,					FRAME_SIZE( STACKP )
 70	flds	FRAME_SIZE( STACKP )
 71#endif
 72	ret
 73	
 74	// x == y || isnan(x) || isnan(y)
 752:	jp		3f									// if( isnan(x) || isnan(y) ) goto 3
 76
 77#if defined( __i386__ )
 78	movss	%xmm1,					FRAME_SIZE( STACKP )
 79	flds	FRAME_SIZE( STACKP )
 80#else
 81    movss   %xmm1,                  %xmm0
 82#endif
 83	ret
 84
 85    // nan
 863:	addss	%xmm1,					%xmm0		// either x or y or both are nan, so add the two to silence and move to xmm0
 87#if defined( __i386__ )
 88	movss	%xmm0,					FRAME_SIZE( STACKP )
 89	flds	FRAME_SIZE( STACKP )
 90#endif
 91	ret
 92
 93// |result| < 0x1.0p-126 || |result| == inf || isnan( result )	
 944:	je		5f									//	if result is infinite, goto 5
 95	jg		6f									//	if nan, goto 6
 96
 97	// denormal, set underflow and inexact
 98	psrld	$31,					%xmm2		// 1U
 99	movdqa	%xmm2,					%xmm3		// 1U
100	pslld	$23,					%xmm2		// 0x00800000U
101	por		%xmm3,					%xmm2		// 0x00800001U
102	mulss	%xmm2,					%xmm2		// set inexact and underflow
103#if defined( __i386__ )
104	movss	%xmm0,					FRAME_SIZE( STACKP )
105	flds	FRAME_SIZE( STACKP )
106#endif
107	ret
108
109//	|result| is infinite
1105:	pcmpeqb	%xmm1,					%xmm1		// -1
111	paddd	%xmm0,					%xmm1		// copysign( max finite, result )
112	movdqa	%xmm1,					%xmm2		// copysign( max finite, result )
113	pslld	$7,						%xmm1		// copysign( 1.0f,		result )
114	addss	%xmm2,					%xmm1		// set inexact
115	addss	%xmm2,					%xmm2		// set overflow
116#if defined( __i386__ )
117	movss	%xmm0,					FRAME_SIZE( STACKP )
118	flds	FRAME_SIZE( STACKP )
119#endif
120	ret
121
122// NaN results were infinities that became NaNs, push back to infinity	
1236:	pcmpeqb	%xmm1,					%xmm1		// -1
124	paddd	%xmm1,					%xmm0		
125#if defined( __i386__ )
126	movss	%xmm0,					FRAME_SIZE( STACKP )
127	flds	FRAME_SIZE( STACKP )
128#endif
129	ret