this repo has no description
at fixPythonPipStalling 154 lines 6.7 kB view raw
1/* 2 * nextafterl.s 3 * LibmV5 4 * 5 * Written by Ian Ollmann on 11/9/07. 6 * Copyright 2006 Apple Computer. All rights reserved. 7 * 8 */ 9 10#include <machine/asm.h> 11#include "abi.h" 12 13#if defined( __x86_64__ ) 14.align 3 15.literal8 16signBit: .quad 0x8000000000000000 17nonSignBit: .quad 0x7fffffffffffffff 18#endif 19 20.align 4 21.const 22 23tiny: .quad 0x8000000000000000, 0x1 //0x1.0p-16382 24big: .quad 0x8000000000000000, 0x7ffe //0x1.0p16383 25min: .quad 0x1, 0x0, 0x1, 0x8000 //{ 0x1.0p-16445, -0x1.0p-16445 } 26 27.text 28ENTRY(nexttowardl) 29ENTRY( nextafterl ) 30 fldt FRAME_SIZE( STACKP ) // { x } 31 fldt 16+FRAME_SIZE( STACKP ) // { y, x } 32 xor AX_P, AX_P // 0 33 fucomip %st(1), %st(0) // { x } if( x == y || isnan(x) || isnan(y) ) 34 setbe %al // x >= y ? 1 : 0 35 je 3f // goto 2 36 37 fldz // { 0, x } 38 xor DX_P, DX_P // 0 39 fucomip %st(1), %st(0) // { x } if( x == 0 ) 40 setb %dl // 0 < x ? 1 : 0 41 je 2f // goto 1 42 43 xor DX_P, AX_P // (x >= y) != (0 < x) ? 1 : 0 44 add AX_P, AX_P // (x >= y) != (0 < x) ? 2 : 0 45 46#if defined( __i386__ ) 47 movq FRAME_SIZE( STACKP ), %xmm0 // load x significand 48 xorpd %xmm1, %xmm1 // { 0, 0 } 49 cmpeqsd %xmm1, %xmm1 // { 0, -1LL } 50 movdqa %xmm0, %xmm2 // old significand 51 movd %eax, %xmm3 // (x >= y) != (0 < x) ? 2 : 0 52 paddq %xmm1, %xmm0 // subtract 1 from significand 53 psllq $63, %xmm1 // 0x8000000000000000ULL 54 paddq %xmm3, %xmm0 // add back either 2 or 0 depending on the direction we are moving the exponent 55 56 //At this point, the significand might have overflowed or it might have underflowed. 57 //If it overflowed, the leading bit was just lost due to modulo overflow. The rest of the bits 58 //are zero (since we added 1), so all we need to do is set the leading bit again. If it 59 //underflowed, the bits are 0x7fffffffffffffff (since we subtracted 1) and all we need to do 60 //is set the leading bit again. We still need to patch up the exponent, and we might have just 61 //created a denormal. We will deal with that later. 62 pxor %xmm0, %xmm2 // set leading bit to 1 if leading bit changed (we have overflow/underflow) 63 pand %xmm1, %xmm2 // zero the non-leading bits 64 por %xmm2, %xmm0 // set leading bit again in the significand if it became unset 65 movmskpd %xmm2, %edx // put carry bit into edx 66 movq %xmm0, FRAME_SIZE(STACKP) // write out new significand 67#else 68 movq FRAME_SIZE( STACKP ), %rcx // load significand of x 69 movq %rcx, %rdx // old significand of x 70 subq $1, %rcx // subtract 1 from significand of x 71 addq %rax, %rcx // add back in either 0 or 2 depending on direction of movement 72 xorq %rcx, %rdx // set leading bit to 1 if leading bit changed (we have overflow/underflow) 73 andq signBit(%rip), %rdx // zero all the other bits 74 orq %rdx, %rcx // restore leading bit in the significand if it changed 75 shrq $63, %rdx // move carry bit to low bit 76 movq %rcx, FRAME_SIZE(STACKP) // write out new significand 77#endif 78 79 // fix exponent 80 test $1, %edx // Did we change it? 81 movzwl 8+FRAME_SIZE( STACKP ), %edx // Load the exponent 82 jnz 1f 83 84 // check for overflow/underflow 850: andl $0x7fff, %edx 86 subl $1, %edx 87 cmpl $0x7ffe, %edx 88 jae 5f 89 90 fldt FRAME_SIZE( STACKP ) 91 fstp %st(1) 92 ret 93 94 //fix exponent 951: subl $1, %eax // 96 addl %eax, %edx // add back either 2 or 0 depending on direction of motion. 97 movw %dx, 8+FRAME_SIZE( STACKP ) 98 jmp 0b 99 100 1012: // x == 0, x != y, return copysignl( 0x1.0p-16445, y ) 102 movzwl 24+FRAME_SIZE( STACKP ), %eax // sign + exponent of y 103 and $0x8000, AX_P // sign of y 104 shr $11, AX_P // y is negative ? 16 : 0 105 fstp %st(0) // {} 106#if defined( __x86_64__ ) 107 leaq min(%rip), %rcx 108 fldt (%rcx, %rax, 1 ) // { copysign( 0x1.0p-16445, y ) } 109 fldt tiny( %rip ) // { 0x1.0p-16382, copysign( 0x1.0p-16445, y ) } 110#else 111 call 0f 1120: popl %ecx 113 fldt (min-0b)(%ecx, %eax, 1 ) // { copysign( 0x1.0p-16445, y ) } 114 fldt (tiny-0b)( %ecx ) // { 0x1.0p-16382, copysign( 0x1.0p-16445, y ) } 115#endif 116 fmulp %st(0), %st(0) // { copysign( 0x1.0p-16445, y ) } set underflow, inexact 117 ret 118 119 120 // x == y || isnan(x) || isnan(y) 1213: fldt 16+FRAME_SIZE( STACKP ) // { y, x } 122 jp 4f // if( isnan(x) || isnan(y) ) goto 3 123 fstp %st(1) // { y } return y 124 ret 125 126 // nan 1274: faddp // return x + y 128 ret 129 130// overflow or underflow 1315: je 6f // result is infinite, goto 6 132 133 //underflow 134 fstp %st(0) // get rid of x 135#if defined( __x86_64__ ) 136 fldt tiny( %rip ) // { 0x1.0p-16382 } 137 andq nonSignBit(%rip), %rcx 138 movq %rcx, FRAME_SIZE( STACKP ) 139#else 140 call 0f 1410: popl %ecx 142 fldt (tiny-0b)( %ecx ) // { 0x1.0p-16382 } 143 pandn %xmm0, %xmm2 // unset the leading bit 144 movq %xmm2, FRAME_SIZE(STACKP) // write out denorm significand 145#endif 146 fmulp %st(0), %st(0) // set underflow 147 fldt FRAME_SIZE( STACKP ) 148 ret 149 150 151// infinite 1526: faddp %st(0), %st(0) // {} add x to itself and throw away the result -- sets overflow and inexact 153 fldt FRAME_SIZE( STACKP ) 154 ret