// float hypotf(float x, float y) // float cabsf(float complex z) // // Returns sqrt(x*x + y*y), computed without spurious overflow or underflow. // // -- Stephen Canon, August 2009 #if !defined __i386__ && !defined __x86_64__ #error This implementation is for i386 and x86_64 only #endif #define ABSMASK $0x7fffffff #define INFINITY $0x7f800000 .text .globl _cabsf .globl _hypotf // Entry points ------------------------------------------------------- #if defined __i386__ .align 4 _cabsf: // on i386, we can use the same code for _hypotf: // hypotf and cabsf, because the arguments mov 4(%esp), %eax // come in at the same stack offsets mov 8(%esp), %edx // movss 4(%esp), %xmm0 // real at esp + 4 movss 8(%esp), %xmm1 // imag at esp + 8 #else .align 4 // however, on x86_64, the registers used _cabsf: // are different. cabsf's arguments come pshufd $0xfd, %xmm0, %xmm1 // in packed in xmm0. .align 4 _hypotf: // movd %xmm0, %eax // hypotf, on the other hand, gets x in movd %xmm1, %edx // xmm0 and y in xmm1. #endif mov ABSMASK, %ecx // Detect special cases ----------------------------------------------- and %ecx, %eax // |x| and %ecx, %edx // |y| cmp INFINITY, %eax // we need to detect if either x or y is jz L_returnAbsX // an infinity and return +infinity if so; cmp INFINITY, %edx // this is because hypot(inf, NaN) needs jz L_returnAbsY // to return +inf, not NaN. // Mainline ----------------------------------------------------------- cvtss2sd %xmm0, %xmm0 // Naive computation of sqrt(x*x + y*y) cvtss2sd %xmm1, %xmm1 // in double. The extra exponent range mulsd %xmm0, %xmm0 // and precision protect against spurious mulsd %xmm1, %xmm1 // over/underflow and the double rounding addsd %xmm1, %xmm0 // is inconsequential, because hypot sqrtsd %xmm0, %xmm0 // does not need to be correctly rounded. cvtsd2ss %xmm0, %xmm0 // #ifdef __i386__ movss %xmm0, 4(%esp) flds 4(%esp) #endif ret // Special case handling ---------------------------------------------- #ifdef __i386__ L_returnAbsY: mov %edx, 4(%esp) flds 4(%esp) ret L_returnAbsX: mov %eax, 4(%esp) flds 4(%esp) ret #else L_returnAbsY: movd %edx, %xmm0 ret L_returnAbsX: movd %eax, %xmm0 ret #endif