this repo has no description
at fixPythonPipStalling 767 lines 32 kB view raw
1/* 2 * Copyright (c) 2002 Apple Computer, Inc. All rights reserved. 3 * 4 * @APPLE_LICENSE_HEADER_START@ 5 * 6 * The contents of this file constitute Original Code as defined in and 7 * are subject to the Apple Public Source License Version 1.1 (the 8 * "License"). You may not use this file except in compliance with the 9 * License. Please obtain a copy of the License at 10 * http://www.apple.com/publicsource and read it before using this file. 11 * 12 * This Original Code and all software distributed under the License are 13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER 14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the 17 * License for the specific language governing rights and limitations 18 * under the License. 19 * 20 * @APPLE_LICENSE_HEADER_END@ 21 */ 22// 23// ArcSinCosDD.c 24// 25// Double-double Function Library 26// Copyright: � 1995-96 by Apple Computer, Inc., all rights reserved 27// 28// long double asinl( long double x ); 29// long double acosl( long double x ); 30// 31 32#include "math.h" 33#include "fp_private.h" 34#if (__WANT_LONG_DOUBLE_FORMAT__ - 0L == 128L) 35#include "DD.h" 36 37long double __sqrtldextra( double x, double y, double *pextra); 38 39// Floating-point constants 40static const double kPiBy2 = 1.570796326794896619231322; // 0x1.921FB54442D18p0 41static const double kPiBy2Tail1 = 6.123233995736766e-17; // 0x1.1A62633145C07p-54 42static const double kPiBy2Tail2 = -1.4973849048591698e-33; // 0x1.F1976B7ED8FBCp-110 43 44static const double kPiBy4 = 7.85398163397448279e-01; // 0x1.921FB54442D18p-1 45static const double kPiBy4Tail1 = 3.061616997868383018e-17; // 0x1.1A62633145C07p-55 46static const double kPiBy4Tail2 = -7.486924524295849165e-34; // 0x1.F1976B7ED8FBCp-111 47 48static const double k3PiBy4 = 2.356194490192344837; // 0x1.2D97C7F3321D2p1 49static const double k3PiBy4Tail1 = 9.184850993605148438e-17; // 0x1.A79394C9E8A0Ap-54 50static const double k3PiBy4Tail2 = 3.916898464750400322e-33; // 0x1.456737B06EA1Ap-108 51 52static const double kPi = 3.141592653589793116; // 0x1.921FB54442D18p1 53static const double kPiTail1 = 1.224646799147353207e-16; // 0x1.1A62633145C07p-53 54static const double kPiTail2 = -2.994769809718339666e-33; // 0x1.F1976B7ED8FBCp-109 55 56static const double kSqrt3By2 = 0.866025403684439; // 0x1.bb67ae84a8e3ep-1 57 58static const int kNterms = 23; 59 60static const Float kNaNu = {0x7fc00000}; 61 62static const uint32_t asinCoeff[] __attribute__ ((aligned(8))) = { 63 0x3FF00000, 0x00000000, 0x00000000, 0x00000000, 64 0x3FC55555, 0x55555555, 0x3C655555, 0x55555555, 65 0x3FB33333, 0x33333333, 0x3C499999, 0x9999999A, 66 0x3FA6DB6D, 0xB6DB6DB7, 0xBC324924, 0x92492492, 67 0x3F9F1C71, 0xC71C71C7, 0x3C1C71C7, 0x1C71C71C, 68 0x3F96E8BA, 0x2E8BA2E9, 0xBC31745D, 0x1745D174, 69 0x3F91C4EC, 0x4EC4EC4F, 0xBC2D89D8, 0x9D89D89E, 70 0x3F8C9999, 0x9999999A, 0xBC299999, 0x9999999A, 71 0x3F87A878, 0x78787878, 0x3C2E1E1E, 0x1E1E1E1E, 72 0x3F83FDE5, 0x0D79435E, 0x3C2435E5, 0x0D79435E, 73 0x3F812EF3, 0xCF3CF3CF, 0x3C1E79E7, 0x9E79E79E, 74 0x3F7DF3BD, 0x37A6F4DF, 0xBC190B21, 0x642C8591, 75 0x3F7A6863, 0xD70A3D71, 0xBC170A3D, 0x70A3D70A, 76 0x3F7782DD, 0xA12F684C, 0xBC02F684, 0xBDA12F68, 77 0x3F751BA3, 0x08D3DCB1, 0xBC1CB08D, 0x3DCB08D4, 78 0x3F731683, 0xBDEF7BDF, 0xBBE08421, 0x08421084, 79 0x3F715EE9, 0xD45D1746, 0xBC0745D1, 0x745D1746, 80 0x3F6FCAF8, 0xFB6DB6DB, 0x3C0B6DB6, 0xDB6DB6DB, 81 0x3F6D3D2A, 0x8E0DD67D, 0xBC0D67C8, 0xA60DD67D, 82 0x3F6B026F, 0x57B13B14, 0xBC03B13B, 0x13B13B14, 83 0x3F690CB7, 0x7F60C7CE, 0x3BD8F9C1, 0x8F9C18FA, 84 0x3F6750DE, 0x64D7D05F, 0x3C005F41, 0x7D05F418, 85 0x3F65C5F5, 0x6EFAAAAB, 0xBC055555, 0x55555555, 86 0x3F6464C0, 0x950F7D47, 0xBBF882B9, 0x31057262, 87 0x3F632755, 0x86C5F2F0, 0x3C04E5E0, 0xA72F0539, 88 0x3F6208D3, 0x570AE5A6, 0xBC069696, 0x96969697, 89 0x3F61052B, 0xC5FA960A, 0xBC05BC60, 0x9A90E7D9, 90 0x3F6018F9, 0x63C229BF, 0xBBF4F209, 0x4F2094F2, 91 0x3F5E82BE, 0x60D9127E, 0xBBEF7047, 0xDC11F704, 92 0x3F5CF7DE, 0xA5B6E830, 0xBBF15B1E, 0x5F75270D, 93 0x3F5B8D2E, 0x5667CE6C, 0x3BD0C971, 0x4FBCDA3B, 94 0x3F5A3F1E, 0xF82137EE, 0x3BEC71C7, 0x1C71C71C, 95 0x3F590A9F, 0x747DB95D, 0xBBE6A56A, 0x56A56A57, 96 0x3F57ED07, 0x9ED4C037, 0xBBD03D22, 0x6357E16F, 97 0x3F56E407, 0x90442038, 0x3BCA6F4D, 0xE9BD37A7, 98 0x3F55ED9A, 0x0BD901B6, 0xBBF040E6, 0xC2B4481D, 99 0x3F5507F9, 0x4C2470BD, 0x3BE56070, 0x381C0E07, 100 0x3F543195, 0xBF54E5D7, 0xBBFB9E4B, 0x17E4B17E, 101 0x3F53690E, 0x51F04536, 0x3BF9D974, 0x5D1745D1, 102 0x3F52AD29, 0xFCD49D54, 0x3BF82FBF, 0x309B8B57, 103 0x3F51FCD2, 0x5AE4A26E, 0x3BECA730, 0xFCD6E9E0, 104 0x3F51570F, 0x16ECE10A, 0x3BFA9E4B, 0xF3A9A378, 105 0x3F50BB02, 0x0BC1EAA0, 0x3BF8C4B0, 0x78787878, 106 0x3F5027E3, 0xF7FCD8BF, 0x3BF67F63, 0x2C234F73, 107 0x3F4F3A03, 0x591C2915, 0x3BE22B2B, 0xE05C0B81, 108 0x3F4E3373, 0x43F5C1F5, 0x3BEEF13D, 0x589D89D9, 109 0x3F4D3AF3, 0xC78CE2E4, 0x3BB8EEC6, 0x4A5294A5, 110 0x3F4C4F7C, 0x88F08B5E, 0x3BD4D455, 0x26BCA1AF, 111 0x3F4B701D, 0x9E1F038E, 0xBBE2BB23, 0xE09FAB8C, 112 0x3F4A9BFC, 0xD93A26FD, 0x3BC437EC, 0xD445D174, 113 0x3F49D253, 0x6C99619A, 0xBB9FD25B, 0x093F5DC8 114}; 115 116struct asinCoeffEntry { 117 double Head; 118 double Tail; 119} asinCoeffEntry; 120 121long double asinl( long double x ) 122{ 123 double arg, argl, argsq, argsq2; 124 double temp,temp2, temp3; 125 double sum, sumt, suml; 126 double xHead, xTail; 127 double sq, sq2, sq3; 128 double res, reslow, resmid, restiny, reshi, resbot, resinf; 129 double h, h2, g2, g3, g4, p, q, t; 130 double prod, prodlow; 131 double fenv; 132 DblDbl ddx; 133 134 int i; 135 uint32_t hibits; 136 struct asinCoeffEntry *asinPtr = (struct asinCoeffEntry *) asinCoeff; 137 138 FEGETENVD(fenv); 139 FESETENVD(0.0); 140 141 ddx.f = x; 142 143 xHead = ddx.d[0]; 144 xTail = ddx.d[1]; 145 146 hibits = ddx.i[0] & 0x7FFFFFFFu; // highest 32 bits as uint32_t 147 148 // This filters most of the invalid cases 149 if (hibits > 0x3ff00000u) { // |x| > 1.0 or NaN 150 if (xHead != xHead) { // x is a NaN 151 FESETENVD(fenv); 152 return x; 153 } 154 else { // |x| > 1.0 155 ddx.d[0] = kNaNu.f; // NaN 156 ddx.d[1] = ddx.d[0]; 157 FESETENVD(fenv); 158 return (ddx.f); 159 } 160 } 161 // For values of |x| very close to unity, xTail needs to be considered. 162 if ((xHead > 0.0) && (xHead -1.0 + xTail > 0.0)) { // x > 1 out of range 163 ddx.d[0] = kNaNu.f; // NaN 164 ddx.d[1] = ddx.d[0]; 165 FESETENVD(fenv); 166 return (ddx.f); 167 } 168 if ((xHead < 0.0) && (xHead +1.0 + xTail < 0.0)) { // x < -1 out of range 169 ddx.d[0] = kNaNu.f; // NaN 170 ddx.d[1] = ddx.d[0]; 171 FESETENVD(fenv); 172 return (ddx.f); 173 } 174 175 // finite x, with |x| <= 1 176 177 //for tiny x, ArcSin x = x 178 if (hibits < 0x3c800000u) { // 2^(-55); 179 FESETENVD(fenv); 180 return x; 181 } 182 183 184 // finite result but not tiny 185 186 //Argument is valid, i.e., |x| <= 1 range reduce 187 188 if (hibits <= 0x3fe00000u) { // |xHead| <= 0.5 189 190 temp = __FMUL( (2.0*xHead), xTail ); 191 argsq = __FMADD( xHead, xHead, temp ); // temp + xHead*xHead; 192 /* argsq2 = xHead*xHead - argsq + temp + ((2.0*xHead)*xTail-temp); */ 193 argsq2 = __FMSUB( xHead, xHead, argsq ) + temp + __FMSUB( (2.0*xHead), xTail, temp ); 194 195 sum = asinPtr[50].Head; // Use Taylor Series 196 sum = __FMADD( sum, argsq, asinPtr[49].Head ); // asinPtr[49].Head + sum*argsq; 197 sum = __FMADD( sum, argsq, asinPtr[48].Head ); // asinPtr[48].Head + sum*argsq; 198 sum = __FMADD( sum, argsq, asinPtr[47].Head ); // asinPtr[47].Head + sum*argsq; 199 sum = __FMADD( sum, argsq, asinPtr[46].Head ); // asinPtr[46].Head + sum*argsq; 200 201 for (i = 45; i > kNterms+1; i--) // First 25 terms in working precison 202 sum = __FMADD( sum, argsq, asinPtr[i].Head );// asinPtr[i].Head + sum*argsq; 203 204 sumt = __FMADD( sum, argsq, asinPtr[kNterms+1].Head );// asinPtr[kNterms+1].Head + sum*argsq; 205 suml = __FMADD( sum, argsq, asinPtr[kNterms+1].Head - sumt );// asinPtr[kNterms+1].Head - sumt + sum*argsq; 206 sum = sumt; 207 208 for (i = 1; i <= kNterms; i++) { 209 temp = __FMADD( sum, argsq, asinPtr[kNterms-i+1].Head ); // asinPtr[kNterms-i+1].Head + sum*argsq; 210 /* suml = asinPtr[kNterms-i+1].Head - temp+sum*argsq + 211 (asinPtr[kNterms-i+1].Tail + sum*argsq2 + suml*argsq); */ 212 suml = __FMADD( sum, argsq, asinPtr[kNterms-i+1].Head - temp ) + 213 __FMADD( suml, argsq, __FMADD( sum, argsq2, asinPtr[kNterms-i+1].Tail ) ); 214 sum = temp; 215 } 216 217 prodlow = __FMADD( suml, argsq, sum*argsq2 ); // suml*argsq + sum*argsq2; // mult. by arg^2 218 prod = __FMADD( sum, argsq, prodlow ); // sum*argsq + prodlow; 219 prodlow = __FMSUB( sum, argsq, prod ) + prodlow;// sum*argsq - prod+prodlow; 220 temp2 = __FMADD( prodlow, xHead, prod*xTail ); // prodlow*xHead + prod*xTail; // mult. by arg 221 temp = __FMADD( prod, xHead, temp2 ); // prod*xHead + temp2; 222 temp2 = __FMSUB( prod, xHead, temp ) + temp2; // prod*xHead - temp+temp2; 223 224 res = __FADD( xHead, temp ); // except for argument 225 reslow = (xHead - res + temp); // exact 226 resmid = __FADD( xTail, temp2 ); 227 restiny = xTail - resmid + temp2; 228 p = __FADD( reslow, resmid ); // sum of middle terms 229 q = reslow - p + resmid; // possible residual 230 reshi = __FADD( res, p ); 231 resbot = (res - reshi + p) + (q + restiny); 232 233 ddx.d[0] = reshi; 234 ddx.d[1] = resbot; 235 FESETENVD(fenv); 236 return (ddx.f); 237 } 238 else if (fabs(xHead) < kSqrt3By2) { // Pi/6 < |result| < Pi/3 239 240 // Use 1-2.*(xHead,xTail)^2 as arg. 241 h = __FMUL( xHead, xHead ); // careful computation of 242 h2 = __FMSUB( xHead, xHead, h ); // xHead*xHead - h; // square of original argument 243 g2 = __FMUL( (2.0*xHead), xTail ); 244 g3 = __FMADD( xTail, xTail, __FMSUB( (2.0*xHead), xTail, g2 ) ); // (2.0*xHead)*xTail - g2 + xTail*xTail; 245 t = __FADD( h2 , g2 ); //sum of middle parts 246 sq = __FADD( h, t ); 247 248 if (fabs(h2) > fabs(g2)) // More than 107 bits are needed, 249 g4 = h2 - t + g2 + g3; // because the square will be 250 else // subtracted from small constants, 251 g4 = g2 - t + h2 + g3; // causing leading bit cancellations, 252 // which must be filled in from the 253 sq2 = __FADD( (h - sq + t), g4 ); // right 254 sq3 = (h - sq + t) - sq2 + g4; // This captures extra low order bits 255 temp = __FNMSUB( 2.0, sq, 1.0 ); // 1.0 - 2.0 * sq; 256 temp2 = __FNMSUB( 2.0, sq2, __FNMSUB( 2.0, sq, 1.0 - temp ) ); // 1.0 - temp - 2.0*sq - 2.0*sq2; 257 arg = __FADD( temp, temp2 ); 258 argl = __FNMSUB( 2.0, sq3, temp - arg + temp2 ); // temp - arg + temp2 - 2.0*sq3; 259 temp = 2.0*arg*argl; 260 argsq = __FMADD( arg, arg, temp ); // __FADD( arg*arg, temp ); // Square of new argument 261 argsq2 = __FMSUB( arg, arg, argsq ) + temp; // arg*arg - argsq + temp; 262 // Compute result as 263 // Pi/4-0.5*asin(arg,argl) 264 // Compute asin with 265 sum = asinPtr[50].Head; // Taylor Series 266 267 for (i = 49; i > kNterms+1; i--) // First 25 terms in working precison 268 sum = __FMADD( sum, argsq, asinPtr[i].Head );// asinPtr[i].Head + sum*argsq; // precison 269 270 sumt = __FMADD( sum, argsq, asinPtr[kNterms+1].Head );// asinPtr[kNterms+1].Head + sum*argsq; 271 suml = __FMADD( sum, argsq, asinPtr[kNterms+1].Head - sumt );// asinPtr[kNterms+1].Head - sumt + sum*argsq; 272 sum = sumt; 273 274 for (i = 1; i <= kNterms; i++) { // remaining terms in quad precision 275 temp = __FMADD( sum, argsq, asinPtr[kNterms-i+1].Head ); // asinPtr[kNterms-i+1].Head + sum*argsq; 276 /* suml = asinPtr[kNterms-i+1].Head - temp + sum*argsq + 277 (asinPtr[kNterms-i+1].Tail + sum*argsq2 + suml*argsq); */ 278 suml = __FMADD( sum, argsq, asinPtr[kNterms-i+1].Head - temp ) + 279 __FMADD( suml, argsq, __FMADD( sum, argsq2, asinPtr[kNterms-i+1].Tail ) ); 280 sum = temp; 281 } 282 prodlow = __FMADD( suml, argsq, sum*argsq2 ); // suml*argsq + sum*argsq2; // mult. by arg^2 283 prod = __FMADD( sum, argsq, prodlow ); // sum*argsq + prodlow; 284 prodlow = __FMSUB( sum, argsq, prod ) + prodlow;// sum*argsq - prod+prodlow; 285 temp2 = __FMADD( prodlow, arg, prod*argl ); // prodlow*arg + prod*argl; // mult. by arg 286 temp = __FMADD( prod, arg, temp2 ); // prod*arg + temp2; 287 temp2 = __FMSUB( prod, arg, temp ) + temp2; // prod*arg - temp+temp2; 288 289 res = __FADD( arg, temp ); // add arg for asin(arg) 290 reslow = (arg - res + temp); // exact 291 resmid = __FADD( argl, temp2 ); 292 restiny = argl - resmid + temp2; 293 p = __FADD( reslow, resmid ); // sum of middle terms 294 q = reslow - p + resmid; // possible residual 295 reshi = __FADD( res, p ); 296 resbot = __FADD( (res - reshi + p), (q + restiny) ); //get ready to subtract from pi/4 297 resinf = (res - reshi + p) - resbot + (q + restiny); 298 temp = kPiBy4; 299 temp2 = kPiBy4Tail1; 300 temp3 = kPiBy4Tail2; 301 res = __FNMSUB( 0.5, reshi, temp ); // temp - 0.5*reshi; 302 reslow = __FNMSUB( 0.5, reshi, temp - res ) ; // (temp - res - 0.5*reshi); 303 resmid = __FNMSUB( 0.5, resbot, temp2 ); // temp2 - 0.5*resbot; 304 if (fabs(temp2) > fabs(0.5*resbot)) 305 /* restiny = temp2 - resmid - 0.5*resbot + temp3 - 0.5*resinf; */ 306 restiny = __FNMSUB( 0.5, resbot, temp2 - resmid ) + __FNMSUB( 0.5, resinf, temp3 ); 307 else 308 /* restiny = temp2 - (0.5*resbot + resmid) + temp3 - 0.5*resinf; */ 309 restiny = temp2 - __FMADD( 0.5, resbot, resmid ) + __FNMSUB( 0.5, resinf, temp3 ); 310 p = __FADD( reslow, resmid ); 311 q = reslow - p + resmid; 312 reshi = __FADD( res, p ); 313 resbot = (res - reshi + p) + (q + restiny); 314 315 if (xHead > 0.0) { 316 ddx.d[0] = reshi; 317 ddx.d[1] = resbot; 318 } 319 else { 320 ddx.d[0] = -reshi; 321 ddx.d[1] = -resbot; 322 } 323 324 FESETENVD(fenv); 325 return (ddx.f); 326 } 327 else { // "large arguments", .866 < Abs(xHead,xTail) <= 1.0 328 329 if ((xHead - 1.0 + xTail) == 0.0) { //for input of 1.0, return Pi/2 330 ddx.d[0] = kPiBy2; 331 ddx.d[1] = kPiBy2Tail1; 332 FESETENVD(fenv); 333 return ddx.f; 334 } 335 if ((xHead + 1.0 - xTail) == 0.0) { //for input of -1.0, return -Pi/2 336 ddx.d[0] = - kPiBy2; 337 ddx.d[1] = - kPiBy2Tail1; 338 FESETENVD(fenv); 339 return ddx.f; 340 } 341 if (xHead < 0.0) { // Use absolute value of input 342 temp = -xHead; // to work only in the first 343 temp2 = -xTail; // quadrant 344 } 345 else { // For arg x > 0.5, use identity 346 temp = xHead; // asin(x)=pi/2-2asin((1-x)/2)^.5) 347 temp2 = xTail; 348 } 349 h = __FNMSUB( 0.5, temp, 0.5 ); // 0.5 - 0.5*temp; // exact 350 argsq = __FNMSUB( 0.5, temp2, h ); // h - 0.5*temp2; 351 argsq2 = __FNMSUB( 0.5, temp2, h - argsq ); // __FSUB( h, argsq ) - 0.5*temp2; //square of reduced arg, exact 352 353 ddx.f = __sqrtldextra(argsq, argsq2 , &temp); // The new argument 354 355 sum = asinPtr[25].Head; // Taylor Series 356 357 for (i = 24; i > kNterms/2+1; i--) // First 25 terms in working precison 358 sum = __FMADD( sum, argsq, asinPtr[i].Head );// asinPtr[i].Head + sum*argsq; // precison 359 360 sumt = __FMADD( sum, argsq, asinPtr[kNterms/2+1].Head );// asinPtr[kNterms/2+1].Head + sum*argsq; 361 suml = __FMADD( sum, argsq, asinPtr[kNterms/2+1].Head - sumt );// __FSUB( asinPtr[kNterms/2+1].Head, sumt ) + sum*argsq; 362 sum = sumt; 363 364 for (i = 1; i <= kNterms/2; i++) { //remaining terms in quad 365 temp = __FMADD( sum, argsq, asinPtr[kNterms/2-i+1].Head ); // asinPtr[kNterms/2-i+1].Head + sum*argsq; 366 /* suml = asinPtr[kNterms/2-i+1].Head - temp + sum*argsq + 367 (asinPtr[kNterms/2-i+1].Tail + sum*argsq2 + suml*argsq); */ 368 suml = __FMADD( sum, argsq, asinPtr[kNterms/2-i+1].Head - temp ) + 369 __FMADD( suml, argsq, __FMADD( sum, argsq2, asinPtr[kNterms/2-i+1].Tail ) ); 370 sum = temp; 371 } 372 373 arg = ddx.d[0]; 374 argl = ddx.d[1]; 375 376 prodlow = __FMADD( suml, argsq, sum*argsq2 ); // suml*argsq + sum*argsq2; // mult. by arg^2 377 prod = __FMADD( sum, argsq, prodlow ); // sum*argsq + prodlow; 378 prodlow = __FMSUB( sum, argsq, prod ) + prodlow;// sum*argsq - prod+prodlow; 379 temp2 = __FMADD( prodlow, arg, prod*argl ); // prodlow*arg + prod*argl; // multiply by arg 380 temp = __FMADD( prod, arg, temp2 ); // prod*arg + temp2; 381 temp2 = __FMSUB( prod, arg, temp ) + temp2; // prod*arg - temp + temp2; 382 383 res = __FADD( arg, temp ); // add arg for asin(arg) 384 reslow = (arg - res + temp); // exact 385 resmid = __FADD( argl, temp2 ); 386 restiny = argl - resmid + temp2; 387 p = __FADD( reslow, resmid ); // sum of middle terms 388 q = reslow - p + resmid; // possible residual 389 reshi = __FADD( res, p ); 390 resbot = __FADD( (res - reshi + p), (q + restiny) ); //get ready to subtract from pi/4 391 resinf = (res - reshi + p) - resbot + (q + restiny); 392 393 res = __FNMSUB( 2.0, reshi, kPiBy2 ); // __FSUB( kPiBy2, (2.0*reshi) ); 394 reslow = __FNMSUB( 2.0, reshi, kPiBy2 - res ); // (kPiBy2 - res - (2.0*reshi)); 395 resmid = __FNMSUB( 2.0, resbot, kPiBy2Tail1 ); // __FSUB( kPiBy2Tail1, (2.0*resbot) ); 396 397 if (kPiBy2Tail1 > fabs(2.0*resbot)) 398 /* restiny = kPiBy2Tail1 - resmid - (2.0*resbot) + (kPiBy2Tail2 - 2.0*resinf); */ 399 restiny = __FNMSUB( 2.0, resbot, kPiBy2Tail1 - resmid ) + __FNMSUB( 2.0, resinf, kPiBy2Tail2 ); 400 else 401 /* restiny = kPiBy2Tail1 - (resmid + (2.0*resbot)) + (kPiBy2Tail2 - 2.0*resinf); */ 402 restiny = kPiBy2Tail1 - __FMADD( 2.0, resbot, resmid ) + __FNMSUB( 2.0, resinf, kPiBy2Tail2 ); 403 p = __FADD( reslow, resmid ); 404 q = reslow - p+resmid; 405 reshi = __FADD( res, p ); 406 resbot = (res - reshi + p) + (q + restiny); 407 if (xHead < 0) { 408 ddx.d[0] = -reshi; 409 ddx.d[1] = -resbot; 410 } 411 else { 412 ddx.d[0] = reshi; 413 ddx.d[1] = resbot; 414 } 415 FESETENVD(fenv); 416 return (ddx.f); 417 } 418} 419 420long double acosl( long double x ) 421{ 422 double arg, argl, argsq, argsq2; 423 double temp,temp2, temp3; 424 double sum, sumt, suml; 425 double xHead, xTail; 426 double sq, sq2, sq3; 427 double sqrtextra; 428 double res, reslow, resmid, restiny, reshi, resbot, resinf; 429 double h, h2, g2, g3, g4, p, q, t; 430 double prod, prodlow; 431 double fenv; 432 DblDbl ddx; 433 434 int i; 435 uint32_t hibits; 436 437 struct asinCoeffEntry *asinPtr = (struct asinCoeffEntry *) asinCoeff; 438 439 FEGETENVD(fenv); 440 FESETENVD(0.0); 441 442 ddx.f = x; 443 444 xHead = ddx.d[0]; 445 xTail = ddx.d[1]; 446 447 hibits = ddx.i[0] & 0x7FFFFFFFu; // highest 32 bits as uint32_t 448 449 // This filters most of the invalid cases (NaNs, inf and |x| > 1) 450 451 if (hibits > 0x3ff00000u) { // |x| > 1.0 or NaN 452 if (xHead != xHead) { // x is a NaN 453 FESETENVD(fenv); 454 return x; 455 } 456 else { // |x| > 1.0 457 ddx.d[0] = kNaNu.f; // NaN 458 ddx.d[1] = ddx.d[0]; 459 FESETENVD(fenv); 460 return (ddx.f); 461 } 462 } 463 464 // For values of |x| very close to unity, xTail needs to be considered. 465 466 if ((xHead > 0.0) && (xHead -1.0 + xTail > 0.0)) { // x > 1 out of range 467 ddx.d[0] = kNaNu.f; // NaN 468 ddx.d[1] = ddx.d[0]; 469 FESETENVD(fenv); 470 return (ddx.f); 471 } 472 473 if ((xHead < 0.0) && (xHead +1.0 + xTail < 0.0)) { // x < -1 out of range 474 ddx.d[0] = kNaNu.f; // NaN 475 ddx.d[1] = ddx.d[0]; 476 FESETENVD(fenv); 477 return (ddx.f); 478 } 479 480 //finite result 481 482 // finite tiny x 483 if (hibits < 0x3c800000u) { // Tiny argument -- |x| < 2^-55 484 //subnormal case underflow. Zero must be excluded. 485 res = kPiBy2; 486 reslow = -xHead; 487 resmid = __FSUB( kPiBy2Tail1, xTail ); 488 restiny = kPiBy2Tail1 - resmid - xTail + kPiBy2Tail2; 489 p = __FADD( reslow, resmid ); 490 reshi = __FADD( res, p ); 491 if (fabs (reslow) > fabs (resmid)) 492 q = reslow - p + resmid; 493 else 494 q = resmid - p + reslow; 495 resbot = (res-reshi+p) + (q+restiny); 496 ddx.d[0] = __FADD( reshi, resbot ); 497 ddx.d[1] = (reshi - ddx.d[0]) + resbot; 498 FESETENVD(fenv); 499 return (ddx.f); 500 } 501 502 // Argument is valid, i.e., |x| <= 1 range reduce 503 504 if (hibits <= 0x3fe00000u) { // |xHead| <= 0.5 505 // in power series for ASIN 506 temp = __FMUL( (2.0*xHead), xTail ); // Compute acos(x)=pi/2-asin(x) 507 argsq = __FMADD( xHead, xHead, temp ); // temp + xHead*xHead; 508 /* argsq2 = xHead*xHead - argsq + temp + ((2.0*xHead)*xTail-temp); */ 509 argsq2 = __FMSUB( xHead, xHead, argsq ) + temp + __FMSUB( (2.0*xHead), xTail, temp ); 510 511 sum = asinPtr[50].Head; // Use Taylor Series 512 sum = __FMADD( sum, argsq, asinPtr[49].Head ); // asinPtr[49].Head + sum*argsq; 513 sum = __FMADD( sum, argsq, asinPtr[48].Head ); // asinPtr[48].Head + sum*argsq; 514 sum = __FMADD( sum, argsq, asinPtr[47].Head ); // asinPtr[47].Head + sum*argsq; 515 sum = __FMADD( sum, argsq, asinPtr[46].Head ); // asinPtr[46].Head + sum*argsq; 516 517 for (i = 45; i >= kNterms+2; i--) // First 25 terms in working 518 sum = __FMADD( sum, argsq, asinPtr[i].Head );// asinPtr[i].Head + sum*argsq; 519 520 sumt = __FMADD( sum, argsq, asinPtr[kNterms+1].Head );// asinPtr[kNterms+1].Head + sum*argsq; 521 suml = __FMADD( sum, argsq, asinPtr[kNterms+1].Head - sumt );// asinPtr[kNterms+1].Head - sumt + sum*argsq; 522 sum = sumt; 523 524 for (i = 1; i <= kNterms; i++) { 525 temp = __FMADD( sum, argsq, asinPtr[kNterms-i+1].Head ); // asinPtr[kNterms-i+1].Head + sum*argsq; 526 /* suml = asinPtr[kNterms-i+1].Head - temp+sum*argsq + 527 (asinPtr[kNterms-i+1].Tail + sum*argsq2 + suml*argsq); */ 528 suml = __FMADD( sum, argsq, asinPtr[kNterms-i+1].Head - temp ) + 529 __FMADD( suml, argsq, __FMADD( sum, argsq2, asinPtr[kNterms-i+1].Tail ) ); 530 sum = temp; 531 } 532 533 prodlow = __FMADD( suml, argsq, sum*argsq2 ); // suml*argsq + sum*argsq2; // mult. by arg^2 534 prod = __FMADD( sum, argsq, prodlow ); // sum*argsq + prodlow; 535 prodlow = __FMSUB( sum, argsq, prod ) + prodlow;// sum*argsq - prod+prodlow; 536 temp2 = __FMADD( prodlow, xHead, prod*xTail ); // prodlow*xHead + prod*xTail; // mult. by arg 537 temp = __FMADD( prod, xHead, temp2 ); // prod*xHead + temp2; 538 temp2 = __FMSUB( prod, xHead, temp ) + temp2; // prod*xHead - temp+temp2; 539 540 res = __FADD( xHead, temp ); // add argument 541 reslow = (xHead - res + temp); 542 resmid = __FADD( xTail, temp2 ); 543 restiny = xTail - resmid + temp2; 544 p = __FADD( reslow, resmid ); // sum of middle terms 545 q = reslow - p + resmid; // possible residual 546 reshi = __FADD( res, p ); 547 resbot = __FADD( (res - reshi + p), (q + restiny) ); 548 resinf = (res - reshi + p) - resbot + (q + restiny); 549 550 res = __FSUB( kPiBy2, reshi ); // and subtract from Pi/2 551 reslow = kPiBy2 - res - reshi; 552 resmid = __FSUB( kPiBy2Tail1, resbot ); 553 554 if (kPiBy2Tail1 > fabs (resbot)) 555 restiny = kPiBy2Tail1 - resmid - resbot + (kPiBy2Tail2 - resinf); 556 else 557 restiny = kPiBy2Tail1 - (resmid + resbot) + (kPiBy2Tail2 - resinf); 558 p = __FADD( reslow, resmid ); 559 q = reslow - p + resmid; 560 reshi = __FADD( res, p ); 561 resbot = (res - reshi + p) + (q + restiny); 562 563 ddx.d[0] = __FADD( reshi, resbot ); 564 ddx.d[1] = (reshi - ddx.d[0]) + resbot; 565 566 FESETENVD(fenv); 567 return (ddx.f); 568 } 569 570 571 if (fabs (xHead) < kSqrt3By2) { // Pi/6 < |result| < Pi/3 572 //Use 2.*(xHead ,xTail )^2-1 as arg. 573 h = __FMUL( xHead, xHead ); // careful computation of 574 h2 = __FMSUB( xHead, xHead, h ); // xHead*xHead - h; // square of original argument 575 g2 = __FMUL( (2.0*xHead), xTail ); 576 g3 = __FMADD( xTail, xTail, __FMSUB( (2.0*xHead), xTail, g2 ) ); // (2.0*xHead)*xTail - g2 + xTail*xTail; 577 t = __FADD( h2 , g2 ); //sum of middle parts 578 sq = __FADD( h, t ); 579 580 if (fabs (h2) > fabs (g2)) // More than 107 bits are needed, 581 g4 = h2 - t + g2 + g3; // because the square will be 582 else // subtracted from small constants, 583 g4 = g2 - t + h2 + g3; // causing leading bit cancel- 584 // lations, which must be filled 585 sq2 = __FADD( (h - sq + t), g4 ); // in from the right 586 sq3 = (h - sq + t) - sq2 + g4; // This captures extra l.o. bits 587 temp = __FMSUB( 2.0, sq, 1.0 ); // 2.0*sq - 1.0; 588 temp2 = __FMADD( 2.0, sq2, __FMSUB( 2.0, sq, 1.0 + temp ) ); // 2.0*sq - (1.0 + temp) + 2.0*sq2; 589 arg = __FADD( temp, temp2 ); 590 argl = __FMADD( 2.0, sq3, temp - arg + temp2 ); // temp - arg + temp2 + 2.0*sq3; 591 temp = 2.0*arg*argl; 592 argsq = __FMADD( arg, arg, temp ); // arg*arg + temp; // Square of new argument 593 argsq2 = __FMSUB( arg, arg, argsq ) + temp; // arg*arg - argsq + temp; 594 // Compute result as 595 // Pi/4-0.5*asin(arg,argl) 596 // Compute asin with 597 sum = asinPtr[50].Head; // Taylor Series 598 599 for (i = 49;i >= kNterms+2;i--) // First half(approx) terms in 600 sum = __FMADD( sum, argsq, asinPtr[i].Head );// asinPtr[i].Head + sum*argsq; // precison 601 602 sumt = __FMADD( sum, argsq, asinPtr[kNterms+1].Head );// asinPtr[kNterms+1].Head + sum*argsq; 603 suml = __FMADD( sum, argsq, asinPtr[kNterms+1].Head - sumt );// asinPtr[kNterms+1].Head - sumt + sum*argsq; 604 sum = sumt; 605 606 for (i = 1;i <= kNterms;i++) { // remaining terms in quad prec. 607 temp = __FMADD( sum, argsq, asinPtr[kNterms-i+1].Head ); // asinPtr[kNterms-i+1].Head + sum*argsq; 608 /* suml = asinPtr[kNterms-i+1].Head - temp + sum*argsq + 609 (asinPtr[kNterms-i+1].Tail + sum*argsq2 + suml*argsq); */ 610 suml = __FMADD( sum, argsq, asinPtr[kNterms-i+1].Head - temp ) + 611 __FMADD( suml, argsq, __FMADD( sum, argsq2, asinPtr[kNterms-i+1].Tail ) ); 612 sum = temp; 613 } 614 615 prodlow = __FMADD( suml, argsq, sum*argsq2 ); // suml*argsq + sum*argsq2; // mult. by arg^2 616 prod = __FMADD( sum, argsq, prodlow ); // sum*argsq + prodlow; 617 prodlow = __FMSUB( sum, argsq, prod ) + prodlow;// sum*argsq - prod+prodlow; 618 temp2 = __FMADD( prodlow, arg, prod*argl ); // prodlow*arg + prod*argl; // mult. by arg 619 temp = __FMADD( prod, arg, temp2 ); // prod*arg + temp2; 620 temp2 = __FMSUB( prod, arg, temp ) + temp2; // prod*arg - temp+temp2; 621 622 res = __FADD( arg, temp ); // add arg for asin(arg) 623 reslow = (arg - res + temp); // exact 624 resmid = __FADD( argl, temp2 ); 625 restiny = argl - resmid + temp2; 626 p = __FADD( reslow, resmid ); // sum of middle terms 627 q = reslow - p + resmid; // possible residual 628 reshi = __FADD( res, p ); 629 resbot = __FADD( (res - reshi + p), (q + restiny) ); // prepare to subtract from pi/4 630 resinf = (res - reshi + p) - resbot + (q + restiny); 631 632 if (xHead>0.0) { 633 temp = kPiBy4; // positive argument 634 temp2 = kPiBy4Tail1; // acos(x)=pi/4-.5 asin(arg,arg1) 635 temp3 = kPiBy4Tail2; 636 } 637 else { 638 temp = k3PiBy4; // negative argument 639 temp2 = k3PiBy4Tail1; // acos(x)=3pi/4 + 640 temp3 = k3PiBy4Tail2; // 0.5 asin(arg,argl) 641 reshi = -reshi; 642 resbot = -resbot; 643 resinf = -resinf; 644 } 645 646 res = __FNMSUB( 0.5, reshi, temp ); // temp - 0.5*reshi; 647 reslow = __FNMSUB( 0.5, reshi, temp - res ) ; // (temp - res - 0.5*reshi); 648 resmid = __FNMSUB( 0.5, resbot, temp2 ); // temp2 - 0.5*resbot; 649 650 if (fabs (temp2) > fabs (0.5*resbot)) { 651 /* restiny = temp2 - resmid - 0.5*resbot + temp3 - 0.5*resinf; */ 652 restiny = __FNMSUB( 0.5, resbot, temp2 - resmid ) + __FNMSUB( 0.5, resinf, temp3 ); 653 } 654 else { 655 /* restiny = temp2 - (0.5*resbot + resmid) + temp3 - 0.5*resinf; */ 656 restiny = temp2 - __FMADD( 0.5, resbot, resmid ) + __FNMSUB( 0.5, resinf, temp3 ); 657 } 658 659 p = __FADD( reslow, resmid ); 660 q = reslow - p + resmid; 661 reshi = __FADD( res, p ); 662 resbot = (res - reshi + p) + (q + restiny); 663 664 ddx.d[0] = __FADD( reshi, resbot ); 665 ddx.d[1] = (reshi - ddx.d[0]) + resbot; 666 667 FESETENVD(fenv); 668 return (ddx.f); 669 } 670 else { // "large arguments", .866 < abs(xHead,xTail) <= 1.0 671 672 if ((xHead - 1.0 + xTail) == 0.0) { // for input of 1.0, return zero 673 FESETENVD(fenv); 674 return (long double) 0.0; 675 } 676 677 if ((xHead + 1.0 - xTail) == 0.0) { // for input of -1.0, return Pi 678 ddx.d[0] = kPi; 679 ddx.d[1] = kPiTail1; 680 FESETENVD(fenv); 681 return ddx.f; 682 } 683 684 if (xHead < 0.0) { // Use absolute value of input 685 temp = -xHead; // to work only in the first 686 temp2 = -xTail; // quadrant 687 } 688 else { // For input x > 0.5, use identity 689 temp = xHead; // asin(x) = 690 temp2 = xTail; // pi/2-2asin(sqrt((1-x)/2)) 691 } 692 693 h = __FNMSUB( 0.5, temp, 0.5 ); // 0.5 - 0.5*temp; // exact 694 argsq = __FNMSUB( 0.5, temp2, h ); // h - 0.5*temp2; 695 argsq2 = __FNMSUB( 0.5, temp2, h - argsq ); // __FSUB( h, argsq ) - 0.5*temp2; //square of reduced arg, exact 696 697 ddx.f = __sqrtldextra(argsq , argsq2, &sqrtextra); 698 699 sum = asinPtr[25].Head; // Taylor Series 700 701 for (i = 24; i >= kNterms/2+2; i--) 702 sum = __FMADD( sum, argsq, asinPtr[i].Head );// asinPtr[i].Head + sum*argsq; // working precison 703 704 sumt = __FMADD( sum, argsq, asinPtr[kNterms/2+1].Head );// asinPtr[kNterms/2+1].Head + sum*argsq; 705 suml = __FMADD( sum, argsq, asinPtr[kNterms/2+1].Head - sumt );// __FSUB( asinPtr[kNterms/2+1].Head, sumt ) + sum*argsq; 706 sum = sumt; 707 708 for (i = 1; i <= kNterms/2; i++) { // remaining terms in quad prec. 709 temp = __FMADD( sum, argsq, asinPtr[kNterms/2-i+1].Head ); // asinPtr[kNterms/2-i+1].Head + sum*argsq; 710 /* suml = asinPtr[kNterms/2-i+1].Head - temp + sum*argsq + 711 (asinPtr[kNterms/2-i+1].Tail + sum*argsq2 + suml*argsq); */ 712 suml = __FMADD( sum, argsq, asinPtr[kNterms/2-i+1].Head - temp ) + 713 __FMADD( suml, argsq, __FMADD( sum, argsq2, asinPtr[kNterms/2-i+1].Tail ) ); 714 sum = temp; 715 } 716 717 arg = ddx.d[0]; 718 argl = ddx.d[1]; 719 720 prodlow = __FMADD( suml, argsq, sum*argsq2 ); // suml*argsq + sum*argsq2; // mult. by arg^2 721 prod = __FMADD( sum, argsq, prodlow ); // sum*argsq + prodlow; 722 prodlow = __FMSUB( sum, argsq, prod ) + prodlow;// sum*argsq - prod+prodlow; 723 temp2 = __FMADD( prodlow, arg, prod*argl ); // prodlow*arg + prod*argl; // multiply by arg 724 temp = __FMADD( prod, arg, temp2 ); // prod*arg + temp2; 725 temp2 = __FMSUB( prod, arg, temp ) + temp2; // prod*arg - temp + temp2; 726 727 res = __FADD( arg, temp ); // add arg for asin(arg) 728 reslow = (arg - res + temp); // exact 729 resmid = __FADD( argl, temp2 ); 730 restiny = argl - resmid + temp2 + sqrtextra; 731 p = __FADD( reslow, resmid ); // sum of middle terms 732 q = reslow - p + resmid; // possible residual 733 reshi = __FADD( res, p ); 734 resbot = __FADD( (res - reshi + p), (q + restiny) ); //get ready to subtract from pi/4 735 736 if (xHead > 0) { // for first quadrant angles, 737 ddx.d[0] = __FMADD( 2.0, reshi, 2.0*resbot ); // __FADD( 2.0*reshi, 2.0*resbot ); // we are finished 738 ddx.d[1] = __FMADD( 2.0, resbot, __FMSUB( 2.0, reshi, ddx.d[0] ) ); // (2.0*reshi - ddx.d[0]) + 2.0*resbot; 739 FESETENVD(fenv); 740 return (ddx.f); 741 } 742 743 resinf = (res - reshi +p ) - resbot + (q + restiny); // for second quadrant need to 744 res = __FNMSUB( 2.0, reshi, kPi ); // __FSUB( kPi, (2.0*reshi) ); // subtract from Pi. 745 reslow = __FNMSUB( 2.0, reshi, kPi - res ); // (kPi - res - (2.0*reshi)); 746 resmid = __FNMSUB( 2.0, resbot, kPiTail1 ); // __FSUB( kPiTail1, (2.0*resbot) ); 747 748 if (kPiTail1 > fabs(2.0*resbot)) 749 /* restiny = kPiTail1 - resmid - (2.0*resbot) + (kPiTail2 - 2.0*resinf); */ 750 restiny = __FNMSUB( 2.0, resbot, kPiTail1 - resmid ) + __FNMSUB( 2.0, resinf, kPiTail2 ); 751 else 752 /* restiny = kPiTail1 - (resmid + (2.0*resbot)) + (kPiTail2 - 2.0*resinf); */ 753 restiny = kPiTail1 - __FMADD( 2.0, resbot, resmid ) + __FNMSUB( 2.0, resinf, kPiTail2 ); 754 755 p = __FADD( reslow, resmid ); 756 q = reslow - p + resmid; 757 reshi = __FADD( res, p ); 758 resbot = (res - reshi + p) + (q + restiny); 759 760 ddx.d[0] = __FADD( reshi, resbot ); 761 ddx.d[1] = (reshi - ddx.d[0]) + resbot; 762 763 FESETENVD(fenv); 764 return (ddx.f); 765 } 766} 767#endif