this repo has no description
at fixPythonPipStalling 254 lines 10 kB view raw
1/* 2 * Copyright (c) 2002 Apple Computer, Inc. All rights reserved. 3 * 4 * @APPLE_LICENSE_HEADER_START@ 5 * 6 * The contents of this file constitute Original Code as defined in and 7 * are subject to the Apple Public Source License Version 1.1 (the 8 * "License"). You may not use this file except in compliance with the 9 * License. Please obtain a copy of the License at 10 * http://www.apple.com/publicsource and read it before using this file. 11 * 12 * This Original Code and all software distributed under the License are 13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER 14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the 17 * License for the specific language governing rights and limitations 18 * under the License. 19 * 20 * @APPLE_LICENSE_HEADER_END@ 21 */ 22// 23// SqrtDD.c 24// 25// Double-double Function Library 26// Copyright: � 1995-96 by Apple Computer, Inc., all rights reserved 27// 28// long double sqrtl( long double x ); 29// 30 31#include "math.h" 32#include "fp_private.h" 33#if (__WANT_LONG_DOUBLE_FORMAT__ - 0L == 128L) 34#include "DD.h" 35 36// sqrttable[256] contains initial estimates for the high eight significand 37// bits for Sqrt(x) and 0.5 times its reciprocal. 38 39static const unsigned short sqrttable[256] = { 40 0x6b69, 0x6c68, 0x6e67, 0x6f65, 0x7064, 0x7263, 0x7361, 0x7460, 41 0x765f, 0x775d, 0x795c, 0x7a5b, 0x7b5a, 0x7d58, 0x7e57, 0x7f56, 42 0x8155, 0x8254, 0x8352, 0x8551, 0x8650, 0x874f, 0x894e, 0x8a4d, 43 0x8b4c, 0x8c4b, 0x8e4a, 0x8f48, 0x9047, 0x9246, 0x9345, 0x9444, 44 0x9543, 0x9742, 0x9841, 0x9940, 0x9a3f, 0x9c3e, 0x9d3d, 0x9e3c, 45 0x9f3c, 0xa13b, 0xa23a, 0xa339, 0xa438, 0xa637, 0xa736, 0xa835, 46 0xa934, 0xaa33, 0xac33, 0xad32, 0xae31, 0xaf30, 0xb02f, 0xb12e, 47 0xb32e, 0xb42d, 0xb52c, 0xb62b, 0xb72a, 0xb92a, 0xba29, 0xbb28, 48 0xbc27, 0xbd26, 0xbe26, 0xbf25, 0xc124, 0xc223, 0xc323, 0xc422, 49 0xc521, 0xc621, 0xc720, 0xc81f, 0xca1e, 0xcb1e, 0xcc1d, 0xcd1c, 50 0xce1c, 0xcf1b, 0xd01a, 0xd11a, 0xd219, 0xd418, 0xd518, 0xd617, 51 0xd716, 0xd816, 0xd915, 0xda14, 0xdb14, 0xdc13, 0xdd13, 0xde12, 52 0xdf11, 0xe111, 0xe210, 0xe310, 0xe40f, 0xe50e, 0xe60e, 0xe70d, 53 0xe80d, 0xe90c, 0xea0b, 0xeb0b, 0xec0a, 0xed0a, 0xee09, 0xef09, 54 0xf008, 0xf108, 0xf207, 0xf306, 0xf406, 0xf505, 0xf605, 0xf704, 55 0xf804, 0xf903, 0xfa03, 0xfb02, 0xfc02, 0xfd01, 0xfe01, 0xff00, 56 0x00ff, 0x01fd, 0x02fb, 0x03f9, 0x04f7, 0x05f5, 0x06f3, 0x07f2, 57 0x08f0, 0x09ee, 0x0aec, 0x0bea, 0x0ce9, 0x0de7, 0x0ee5, 0x0fe4, 58 0x10e2, 0x11e0, 0x12df, 0x13dd, 0x14db, 0x15da, 0x16d8, 0x17d7, 59 0x17d5, 0x18d4, 0x19d2, 0x1ad1, 0x1bcf, 0x1cce, 0x1dcc, 0x1ecb, 60 0x1fc9, 0x20c8, 0x20c6, 0x21c5, 0x22c4, 0x23c2, 0x24c1, 0x25c0, 61 0x26be, 0x27bd, 0x27bc, 0x28ba, 0x29b9, 0x2ab8, 0x2bb7, 0x2cb5, 62 0x2db4, 0x2db3, 0x2eb2, 0x2fb0, 0x30af, 0x31ae, 0x32ad, 0x33ac, 63 0x33aa, 0x34a9, 0x35a8, 0x36a7, 0x37a6, 0x37a5, 0x38a4, 0x39a3, 64 0x3aa2, 0x3ba0, 0x3c9f, 0x3c9e, 0x3d9d, 0x3e9c, 0x3f9b, 0x409a, 65 0x4099, 0x4198, 0x4297, 0x4396, 0x4495, 0x4494, 0x4593, 0x4692, 66 0x4791, 0x4890, 0x488f, 0x498e, 0x4a8d, 0x4b8c, 0x4b8c, 0x4c8b, 67 0x4d8a, 0x4e89, 0x4e88, 0x4f87, 0x5086, 0x5185, 0x5284, 0x5283, 68 0x5383, 0x5482, 0x5581, 0x5580, 0x567f, 0x577e, 0x587e, 0x587d, 69 0x597c, 0x5a7b, 0x5b7a, 0x5b79, 0x5c79, 0x5d78, 0x5d77, 0x5e76, 70 0x5f76, 0x6075, 0x6074, 0x6173, 0x6272, 0x6372, 0x6371, 0x6470, 71 0x656f, 0x656f, 0x666e, 0x676d, 0x686d, 0x686c, 0x696b, 0x6a6a 72 }; 73 74 75 76//**************************************************************************** 77// FUNCTION: long double __sqrtldextra( double x, double y, double *pextra ) 78// 79// DESCRIPTION: treats x and y as the head and tail, respectively of a long 80// double argument X. Returns the square root of X rounded to 81// long double format and sets *pextra to a double value which 82// contains a correction factor which contains about 8 extra 83// bits of precision for the square root. This extra precision 84// is used by the long double ArcCos implementation. 85// 86// The main square root refinement algorithm is that of P. 87// Markstein, IBM J. R&D, January, 1990, p.117. 88// 89// ASSUMPTIONS: x and y represent a finite positive long double in canonical 90// form and rounding direction is currently set to the default 91// (IEEE round-to-nearest) mode. 92// 93//*************************************************************************** 94 95long double __sqrtldextra( double x, double y, double *pextra ) 96{ 97 DblDbl zDD; 98 Double xD, guessD, recipD; 99 double scale, guess, recip, diff, recip2, epsilon, guessnew, gsq, guesslow; 100 double gsqlow, gg, gmid, gmidlow, recipnew, diff1, diff3, diff3x, diff3a; 101 double diff3ax, diff4, diff5, glow2, diff6, diff7, firstfix, fixup; 102 uint32_t ghead, reciphead, ival; 103 int index; 104 105 const Double scaleupD = {{0x5ff00000,0x0}}; 106 const Double scaledownD = {{0x1ff00000,0x0}}; 107 const Double adjustupD = {{0x4ff00000,0x0}}; 108 const Double adjustdownD = {{0x2ff00000,0x0}}; 109 110 xD.f = x; 111 scale = 1.0; 112 113 if (xD.i[0] < 0x07000000u) { // tiny argument 114 x *= scaleupD.f; // scale up 115 y *= scaleupD.f; 116 xD.f = x; 117 scale = adjustdownD.f; // final scale factor 118 } 119 else if (xD.i[0] > 0x7fdfffffu) { // huge argument 120 x *= scaledownD.f; // scale down away from 121 y *= scaledownD.f; // overflow threshold 122 xD.f = x; 123 scale = adjustupD.f; // final scale factor 124 } 125 126 // estimate exponents of square root and 0.5*reciprocal square root 127 ghead = ((xD.i[0] + 0x3ff00000u) >> 1) & 0x7ff00000u; 128 reciphead = 0x7fc00000u - ghead; 129 130 // initialize initial significand estimates for square root and 0.5*reciprocal 131 // from sqrttable entry indexed based upon significand of x and low exponent 132 // bit 133 index = (int)((xD.i[0] >> 13) & 0xffu); 134 ival = (uint32_t) sqrttable[index]; 135 guessD.i[1] = 0u; 136 guessD.i[0] = ghead + ((ival & 0xff00u) << 4); 137 recipD.i[0] = reciphead + ((ival & 0xffu) << 12); 138 recipD.i[1] = 0u; 139 140 guess = guessD.f; 141 recip = recipD.f; 142 143 // iterate to refine square root and its reciprocal (MAF required) 144 diff = __FNMSUB( guess, guess, x ); // x - guess*guess; 145 recip2 = recip + recip; // recip2 is approx 1.0/guess 146 guess = __FMADD( diff, recip, guess ); //guess + diff*recip; // 16-bit square root 147 epsilon = __FNMSUB( recip, guess, 0.5 ); // 0.5 - recip*guess; // begin Newton-Raphson iteration 148 diff = __FNMSUB( guess, guess, x ); // x - guess*guess; 149 recip = __FMADD( epsilon, recip2, recip ); // recip + epsilon*recip2; // 16-bit reciprocal 150 guess = __FMADD( recip, diff, guess ); // guess + recip*diff; // 32-bit square root 151 recip2 = recip + recip; 152 epsilon = __FNMSUB( recip, guess, 0.5 ); // 0.5 - recip*guess; 153 diff = __FNMSUB( guess, guess, x ) + y; // (x - guess*guess) + y; 154 recip = __FMADD( epsilon, recip2, recip ); // 32-bit reciprocal 155 guessnew = __FMADD( recip, diff, guess ); // guess + recip*diff; // 53-bit square root 156 recip2 = recip + recip; 157 gsq = __FMUL( guessnew, guessnew ); // NOTE: Departure from P. Markstein 158 // for greater accuracy. 159 guesslow = __FMADD( recip, diff, (guess - guessnew) ); // (guess - guessnew) + recip*diff; 160 gsqlow = __FMSUB( guessnew, guessnew, gsq );// guessnew*guessnew - gsq; 161 gg = guessnew + guessnew; // (guessnew,guesslow)^2 must be 162 // computed to sextuple precision 163 gmid = __FMUL( gg, guesslow ); // in order to control errors to 164 gmidlow = __FMSUB( gg, guesslow, gmid ); // gg*guesslow - gmid; // less than 1/2 ulp 165 epsilon = __FNMSUB( recip, guessnew, 0.5 ); // 0.5 - recip*guessnew; 166 recip2 = recip + recip; 167 recipnew = __FMADD( epsilon, recip2, recip ); 168 diff1 = x - gsq; // exact 169 diff3 = __FSUB( diff1, gmid ); // not necessarily exact 170 diff3x = diff1 - diff3 - gmid; // usually inexact 171 172 diff3a = __FSUB( diff3, gsqlow ); // not necessarily exact 173 diff3ax = diff3 - (diff3a + gsqlow); // exact 174 diff4 = diff3a + y; // usually inexact 175 176 diff5 = diff4 - gmidlow; // error < ulp/1024 177 glow2 = guesslow*guesslow; // error < ulp/(2^50) 178 diff6 = __FSUB( diff5, glow2 ); // diff5 - glow2; // error < ulp/1024 179 diff7 = diff6 + (diff3x + diff3ax); // error < ulp/1024 180 firstfix = recipnew*diff7; // total error < ulp/256 181 fixup = __FADD( guesslow, firstfix ); // guesslow + firstfix; 182 guess = __FADD( guessnew, fixup ); // put in canonical form 183 fixup = guessnew - guess + fixup; 184 185 zDD.d[0] = guess*scale; // scale result and extra bits; 186 zDD.d[1] = fixup*scale; 187 *pextra = ((guesslow - fixup) + firstfix)*scale; 188 return (zDD.f); // return long double square root 189} 190 191long double sqrtl( long double x ) 192{ 193 DblDbl xDD; 194 long double y; 195 double envD; 196 double extra; 197 uint32_t hibits; 198 199 xDD.f = x; 200 hibits = xDD.i[0]; // high 32 bits of head 201 202 // Non-trivial case (positive finite x) call __sqrtldextra with default 203 // rounding in effect and discard extra precision bits. 204 205 if ((xDD.d[0] != 0.0) && (hibits < 0x7ff00000u)) { 206 FEGETENVD(envD); 207 FESETENVD(0.0); 208 y = __sqrtldextra(xDD.d[0],xDD.d[1],&extra); 209 FESETENVD(envD); // restore caller's env 210 return (y); 211 } 212 213 // Separate edge cases into trivial and exceptional cases 214 215 if ((hibits < 0x80000000u) || (xDD.d[0] == 0.0) || (xDD.d[0] != xDD.d[0])) 216 return (x); // zero, +INF, or NaN 217 218 // Negative ordered arguments result in NaN; 219 xDD.i[0] = 0x7ff80000u; // return value is NaN 220 xDD.i[1] = 0u; 221 xDD.i[2] = 0x7ff80000u; 222 xDD.i[3] = 0u; 223 return (xDD.f); // return NaN 224} 225 226#include "math.h" 227long double cbrtl( long double x ) 228{ 229 DblDbl xDD; 230 double arg, guess; 231 long double ldguess, ldguess2, sqldguess; 232 const long double oneThird = 0.3333333333333333333333333333333L; 233 234 xDD.f = x; 235 236 arg = xDD.d[0]; 237 238 if (arg != arg || arg == 0.0 || arg == INFINITY || -arg == INFINITY) 239 return x; 240 241 guess = cbrt(arg); 242 243 xDD.d[0] = guess; 244 xDD.d[1] = 0.0; 245 246 ldguess = xDD.f; 247 248 sqldguess = ldguess * ldguess; 249 ldguess2 = ldguess + ldguess; 250 251 return (ldguess2 + x/sqldguess)*oneThird; 252} 253 254#endif