this repo has no description
at fixPythonPipStalling 84 lines 2.8 kB view raw
1// float hypotf(float x, float y) 2// float cabsf(float complex z) 3// 4// Returns sqrt(x*x + y*y), computed without spurious overflow or underflow. 5// 6// -- Stephen Canon, August 2009 7 8#if !defined __i386__ && !defined __x86_64__ 9#error This implementation is for i386 and x86_64 only 10#endif 11 12#define ABSMASK $0x7fffffff 13#define INFINITY $0x7f800000 14 15.text 16.globl _cabsf 17.globl _hypotf 18 19// Entry points ------------------------------------------------------- 20 21#if defined __i386__ 22.align 4 23_cabsf: // on i386, we can use the same code for 24_hypotf: // hypotf and cabsf, because the arguments 25 mov 4(%esp), %eax // come in at the same stack offsets 26 mov 8(%esp), %edx // 27 movss 4(%esp), %xmm0 // real at esp + 4 28 movss 8(%esp), %xmm1 // imag at esp + 8 29#else 30.align 4 // however, on x86_64, the registers used 31_cabsf: // are different. cabsf's arguments come 32 pshufd $0xfd, %xmm0, %xmm1 // in packed in xmm0. 33.align 4 34_hypotf: // 35 movd %xmm0, %eax // hypotf, on the other hand, gets x in 36 movd %xmm1, %edx // xmm0 and y in xmm1. 37#endif 38 mov ABSMASK, %ecx 39 40// Detect special cases ----------------------------------------------- 41 42 and %ecx, %eax // |x| 43 and %ecx, %edx // |y| 44 45 cmp INFINITY, %eax // we need to detect if either x or y is 46 jz L_returnAbsX // an infinity and return +infinity if so; 47 cmp INFINITY, %edx // this is because hypot(inf, NaN) needs 48 jz L_returnAbsY // to return +inf, not NaN. 49 50// Mainline ----------------------------------------------------------- 51 52 cvtss2sd %xmm0, %xmm0 // Naive computation of sqrt(x*x + y*y) 53 cvtss2sd %xmm1, %xmm1 // in double. The extra exponent range 54 mulsd %xmm0, %xmm0 // and precision protect against spurious 55 mulsd %xmm1, %xmm1 // over/underflow and the double rounding 56 addsd %xmm1, %xmm0 // is inconsequential, because hypot 57 sqrtsd %xmm0, %xmm0 // does not need to be correctly rounded. 58 cvtsd2ss %xmm0, %xmm0 // 59 60#ifdef __i386__ 61 movss %xmm0, 4(%esp) 62 flds 4(%esp) 63#endif 64 ret 65 66// Special case handling ---------------------------------------------- 67 68#ifdef __i386__ 69L_returnAbsY: 70 mov %edx, 4(%esp) 71 flds 4(%esp) 72 ret 73L_returnAbsX: 74 mov %eax, 4(%esp) 75 flds 4(%esp) 76 ret 77#else 78L_returnAbsY: 79 movd %edx, %xmm0 80 ret 81L_returnAbsX: 82 movd %eax, %xmm0 83 ret 84#endif