/* tanf.s -- tanf for standard math library. Written by Eric Postpischil, June 2007. */ #define SignificandBits 24 #define ExponentBits 8 .literal8 .align 3 NegativeOne: .double -1 .const .align 4 /* Coefficients to calculate: x * (x**4 + c02*x**2 + c00) * (x**4 + c12*x**2 + c10) * (x**4 + c22*x**2 + c20) * ( c32*x**2 + c30) c02 and c12 are stored as a pair at the location below labeled c02, c00 and c10 are at c00, c22 and c23 at c22, and c20 and c30 at c20. This polynomial was designed to cover two binades (supporting an exponent fold of one bit) for tanf with its period (pi) partitioned into two intervals. The expanded polynomial was factored to create the form above convenient for calculating with SIMD. This was minimized for ULP error. */ c02: .double -0.34431297545117363561, -1.53503935362160698123 c00: .double +0.75337415977492632831, +0.92783629599735333058 c22: .double +0.91301179133375347365, +5.27860261872543679865 c20: .double +0.58807104225499649417, +3.82127246397188494330 /* ExponentFold specifies how many bits of the exponent will not be used to look up entries in the table. This must match the ExponentFold used to generate the table. The behavior for infinite inputs relies on the table contents -- if the table entries looked up for an infinite input are m0 and m1, then m0*infinity + m1*infinity should generate a NaN. This happens of either m0 or m1 is a zero or they have opposite signs. If this were not true, additional arrangements would have to be made for infinite inputs. */ #define ExponentFold 1 // Include the reduction table. See tanfMakeTable.c for more information. #include "tanfTable.s" // Rename the general registers (just to make it easier to keep track of them). #if defined __i386__ #define r0 %eax #define r1 %ecx #define r2 %edx #define r3 %ebx #define r4 %esp #define r5 %ebp #define r6 %esi #define r7 %edi #elif defined __x86_64__ #define r0 %rax #define r1 %rcx #define r2 %rdx #define r3 %rbx #define r4 %rsp #define r5 %rbp #define r6 %rsi #define r7 %rdi #else #error "Unknown architecture." #endif .text // Define symbols for tanf. #define BaseP r0 // Base address for position-independent addressing. #define xInteger r1 // The input x in an integer register. #define ki r1 // x*m0 rounded to an integer. #define p0 %xmm0 // Must be in %xmm0 for return on x86_64. #define p2 %xmm1 #define x %xmm2 #define k %xmm3 #define m1 %xmm4 #define x1 %xmm5 #define Numerator %xmm6 #if defined __i386__ // Define location of argument x. #define Argx 4(%esp) // Define how to address data. BaseP must contain the address of label 0. #define Address(label) label-0b(BaseP) #elif defined __x86_64__ // Define location of argument x. #define Argx %xmm0 // Define how to address data. #define Address(label) label(%rip) #endif /* float tanf(float x). Notes: Citations in parentheses below indicate the source of a requirement. "C" stands for ISO/IEC 9899:TC2. The Open Group specification (IEEE Std 1003.1, 2004 edition) adds no requirements since it defers to C and requires errno behavior only if we choose to support it by arranging for "math_errhandling & MATH_ERRNO" to be non-zero, which we do not. Return value: For +/- zero, return zero with same sign (C F.9 12 and F.9.1.7). For +/- infinity, return a NaN (C F.9.1.7). For a NaN, return the same NaN (C F.9 11 and 13). Otherwise: If the rounding mode is round-to-nearest, return tangent(x) faithfully rounded. Not currently implemented: In other rounding modes, return tangent(x) possibly with slightly worse error, not necessarily honoring the rounding mode (Ali Sazegari narrowing C F.9 10). Exceptions: Raise underflow for a denormal result (C F.9 7 and Draft Standard for Floating-Point Arithmetic P754 Draft 1.2.5 9.5). If the input is the smallest normal, underflow may or may not be raised. This is stricter than the older 754 standard. May or may not raise inexact, even if the result is exact (C F.9 8). Raise invalid if the input is a signalling NaN (C 5.2.4.2.2 3, in spite of C F.2.1) or an infinity (C F.9.1.7) but not if the input is a quiet NaN (C F.9 11). May not raise exceptions otherwise (C F.9 9). Properties: Desired to be monotonic. Not yet proven! Exhaustive testing proved this routine returns faithfully rounded results. */ .align 5 .globl _tanf _tanf: // Put x into an integer register. #if defined __i386__ mov Argx, xInteger // Put x into xInteger. #elif defined __x86_64__ movd Argx, xInteger // Put x into xInteger. #endif cvtss2sd Argx, x // Convert x to double precision. #if defined __i386__ // Get address of 0 in BaseP. call 0f // Push program counter onto stack. 0: pop BaseP // Get program counter. #elif defined __x86_64__ // Get address of reduction table in BaseP. lea ReductionTable(%rip), BaseP #endif movsd Address(NegativeOne), Numerator unpcklpd x, x // Duplicate x. /* Shift: SignificandBits-1 bits right to remove significand. ExponentFold bits right to remove unused bits of exponent. 4 bits left to multiply by size of a table entry. */ shr $SignificandBits-1+ExponentFold-4, xInteger // Clear bits other than those we are using from exponent. and $(1<