this repo has no description
1/*
2 * Copyright (c) 2002 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
11 *
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22//
23// SqrtDD.c
24//
25// Double-double Function Library
26// Copyright: � 1995-96 by Apple Computer, Inc., all rights reserved
27//
28// long double sqrtl( long double x );
29//
30
31#include "math.h"
32#include "fp_private.h"
33#if (__WANT_LONG_DOUBLE_FORMAT__ - 0L == 128L)
34#include "DD.h"
35
36// sqrttable[256] contains initial estimates for the high eight significand
37// bits for Sqrt(x) and 0.5 times its reciprocal.
38
39static const unsigned short sqrttable[256] = {
40 0x6b69, 0x6c68, 0x6e67, 0x6f65, 0x7064, 0x7263, 0x7361, 0x7460,
41 0x765f, 0x775d, 0x795c, 0x7a5b, 0x7b5a, 0x7d58, 0x7e57, 0x7f56,
42 0x8155, 0x8254, 0x8352, 0x8551, 0x8650, 0x874f, 0x894e, 0x8a4d,
43 0x8b4c, 0x8c4b, 0x8e4a, 0x8f48, 0x9047, 0x9246, 0x9345, 0x9444,
44 0x9543, 0x9742, 0x9841, 0x9940, 0x9a3f, 0x9c3e, 0x9d3d, 0x9e3c,
45 0x9f3c, 0xa13b, 0xa23a, 0xa339, 0xa438, 0xa637, 0xa736, 0xa835,
46 0xa934, 0xaa33, 0xac33, 0xad32, 0xae31, 0xaf30, 0xb02f, 0xb12e,
47 0xb32e, 0xb42d, 0xb52c, 0xb62b, 0xb72a, 0xb92a, 0xba29, 0xbb28,
48 0xbc27, 0xbd26, 0xbe26, 0xbf25, 0xc124, 0xc223, 0xc323, 0xc422,
49 0xc521, 0xc621, 0xc720, 0xc81f, 0xca1e, 0xcb1e, 0xcc1d, 0xcd1c,
50 0xce1c, 0xcf1b, 0xd01a, 0xd11a, 0xd219, 0xd418, 0xd518, 0xd617,
51 0xd716, 0xd816, 0xd915, 0xda14, 0xdb14, 0xdc13, 0xdd13, 0xde12,
52 0xdf11, 0xe111, 0xe210, 0xe310, 0xe40f, 0xe50e, 0xe60e, 0xe70d,
53 0xe80d, 0xe90c, 0xea0b, 0xeb0b, 0xec0a, 0xed0a, 0xee09, 0xef09,
54 0xf008, 0xf108, 0xf207, 0xf306, 0xf406, 0xf505, 0xf605, 0xf704,
55 0xf804, 0xf903, 0xfa03, 0xfb02, 0xfc02, 0xfd01, 0xfe01, 0xff00,
56 0x00ff, 0x01fd, 0x02fb, 0x03f9, 0x04f7, 0x05f5, 0x06f3, 0x07f2,
57 0x08f0, 0x09ee, 0x0aec, 0x0bea, 0x0ce9, 0x0de7, 0x0ee5, 0x0fe4,
58 0x10e2, 0x11e0, 0x12df, 0x13dd, 0x14db, 0x15da, 0x16d8, 0x17d7,
59 0x17d5, 0x18d4, 0x19d2, 0x1ad1, 0x1bcf, 0x1cce, 0x1dcc, 0x1ecb,
60 0x1fc9, 0x20c8, 0x20c6, 0x21c5, 0x22c4, 0x23c2, 0x24c1, 0x25c0,
61 0x26be, 0x27bd, 0x27bc, 0x28ba, 0x29b9, 0x2ab8, 0x2bb7, 0x2cb5,
62 0x2db4, 0x2db3, 0x2eb2, 0x2fb0, 0x30af, 0x31ae, 0x32ad, 0x33ac,
63 0x33aa, 0x34a9, 0x35a8, 0x36a7, 0x37a6, 0x37a5, 0x38a4, 0x39a3,
64 0x3aa2, 0x3ba0, 0x3c9f, 0x3c9e, 0x3d9d, 0x3e9c, 0x3f9b, 0x409a,
65 0x4099, 0x4198, 0x4297, 0x4396, 0x4495, 0x4494, 0x4593, 0x4692,
66 0x4791, 0x4890, 0x488f, 0x498e, 0x4a8d, 0x4b8c, 0x4b8c, 0x4c8b,
67 0x4d8a, 0x4e89, 0x4e88, 0x4f87, 0x5086, 0x5185, 0x5284, 0x5283,
68 0x5383, 0x5482, 0x5581, 0x5580, 0x567f, 0x577e, 0x587e, 0x587d,
69 0x597c, 0x5a7b, 0x5b7a, 0x5b79, 0x5c79, 0x5d78, 0x5d77, 0x5e76,
70 0x5f76, 0x6075, 0x6074, 0x6173, 0x6272, 0x6372, 0x6371, 0x6470,
71 0x656f, 0x656f, 0x666e, 0x676d, 0x686d, 0x686c, 0x696b, 0x6a6a
72 };
73
74
75
76//****************************************************************************
77// FUNCTION: long double __sqrtldextra( double x, double y, double *pextra )
78//
79// DESCRIPTION: treats x and y as the head and tail, respectively of a long
80// double argument X. Returns the square root of X rounded to
81// long double format and sets *pextra to a double value which
82// contains a correction factor which contains about 8 extra
83// bits of precision for the square root. This extra precision
84// is used by the long double ArcCos implementation.
85//
86// The main square root refinement algorithm is that of P.
87// Markstein, IBM J. R&D, January, 1990, p.117.
88//
89// ASSUMPTIONS: x and y represent a finite positive long double in canonical
90// form and rounding direction is currently set to the default
91// (IEEE round-to-nearest) mode.
92//
93//***************************************************************************
94
95long double __sqrtldextra( double x, double y, double *pextra )
96{
97 DblDbl zDD;
98 Double xD, guessD, recipD;
99 double scale, guess, recip, diff, recip2, epsilon, guessnew, gsq, guesslow;
100 double gsqlow, gg, gmid, gmidlow, recipnew, diff1, diff3, diff3x, diff3a;
101 double diff3ax, diff4, diff5, glow2, diff6, diff7, firstfix, fixup;
102 uint32_t ghead, reciphead, ival;
103 int index;
104
105 const Double scaleupD = {{0x5ff00000,0x0}};
106 const Double scaledownD = {{0x1ff00000,0x0}};
107 const Double adjustupD = {{0x4ff00000,0x0}};
108 const Double adjustdownD = {{0x2ff00000,0x0}};
109
110 xD.f = x;
111 scale = 1.0;
112
113 if (xD.i[0] < 0x07000000u) { // tiny argument
114 x *= scaleupD.f; // scale up
115 y *= scaleupD.f;
116 xD.f = x;
117 scale = adjustdownD.f; // final scale factor
118 }
119 else if (xD.i[0] > 0x7fdfffffu) { // huge argument
120 x *= scaledownD.f; // scale down away from
121 y *= scaledownD.f; // overflow threshold
122 xD.f = x;
123 scale = adjustupD.f; // final scale factor
124 }
125
126 // estimate exponents of square root and 0.5*reciprocal square root
127 ghead = ((xD.i[0] + 0x3ff00000u) >> 1) & 0x7ff00000u;
128 reciphead = 0x7fc00000u - ghead;
129
130 // initialize initial significand estimates for square root and 0.5*reciprocal
131 // from sqrttable entry indexed based upon significand of x and low exponent
132 // bit
133 index = (int)((xD.i[0] >> 13) & 0xffu);
134 ival = (uint32_t) sqrttable[index];
135 guessD.i[1] = 0u;
136 guessD.i[0] = ghead + ((ival & 0xff00u) << 4);
137 recipD.i[0] = reciphead + ((ival & 0xffu) << 12);
138 recipD.i[1] = 0u;
139
140 guess = guessD.f;
141 recip = recipD.f;
142
143 // iterate to refine square root and its reciprocal (MAF required)
144 diff = __FNMSUB( guess, guess, x ); // x - guess*guess;
145 recip2 = recip + recip; // recip2 is approx 1.0/guess
146 guess = __FMADD( diff, recip, guess ); //guess + diff*recip; // 16-bit square root
147 epsilon = __FNMSUB( recip, guess, 0.5 ); // 0.5 - recip*guess; // begin Newton-Raphson iteration
148 diff = __FNMSUB( guess, guess, x ); // x - guess*guess;
149 recip = __FMADD( epsilon, recip2, recip ); // recip + epsilon*recip2; // 16-bit reciprocal
150 guess = __FMADD( recip, diff, guess ); // guess + recip*diff; // 32-bit square root
151 recip2 = recip + recip;
152 epsilon = __FNMSUB( recip, guess, 0.5 ); // 0.5 - recip*guess;
153 diff = __FNMSUB( guess, guess, x ) + y; // (x - guess*guess) + y;
154 recip = __FMADD( epsilon, recip2, recip ); // 32-bit reciprocal
155 guessnew = __FMADD( recip, diff, guess ); // guess + recip*diff; // 53-bit square root
156 recip2 = recip + recip;
157 gsq = __FMUL( guessnew, guessnew ); // NOTE: Departure from P. Markstein
158 // for greater accuracy.
159 guesslow = __FMADD( recip, diff, (guess - guessnew) ); // (guess - guessnew) + recip*diff;
160 gsqlow = __FMSUB( guessnew, guessnew, gsq );// guessnew*guessnew - gsq;
161 gg = guessnew + guessnew; // (guessnew,guesslow)^2 must be
162 // computed to sextuple precision
163 gmid = __FMUL( gg, guesslow ); // in order to control errors to
164 gmidlow = __FMSUB( gg, guesslow, gmid ); // gg*guesslow - gmid; // less than 1/2 ulp
165 epsilon = __FNMSUB( recip, guessnew, 0.5 ); // 0.5 - recip*guessnew;
166 recip2 = recip + recip;
167 recipnew = __FMADD( epsilon, recip2, recip );
168 diff1 = x - gsq; // exact
169 diff3 = __FSUB( diff1, gmid ); // not necessarily exact
170 diff3x = diff1 - diff3 - gmid; // usually inexact
171
172 diff3a = __FSUB( diff3, gsqlow ); // not necessarily exact
173 diff3ax = diff3 - (diff3a + gsqlow); // exact
174 diff4 = diff3a + y; // usually inexact
175
176 diff5 = diff4 - gmidlow; // error < ulp/1024
177 glow2 = guesslow*guesslow; // error < ulp/(2^50)
178 diff6 = __FSUB( diff5, glow2 ); // diff5 - glow2; // error < ulp/1024
179 diff7 = diff6 + (diff3x + diff3ax); // error < ulp/1024
180 firstfix = recipnew*diff7; // total error < ulp/256
181 fixup = __FADD( guesslow, firstfix ); // guesslow + firstfix;
182 guess = __FADD( guessnew, fixup ); // put in canonical form
183 fixup = guessnew - guess + fixup;
184
185 zDD.d[0] = guess*scale; // scale result and extra bits;
186 zDD.d[1] = fixup*scale;
187 *pextra = ((guesslow - fixup) + firstfix)*scale;
188 return (zDD.f); // return long double square root
189}
190
191long double sqrtl( long double x )
192{
193 DblDbl xDD;
194 long double y;
195 double envD;
196 double extra;
197 uint32_t hibits;
198
199 xDD.f = x;
200 hibits = xDD.i[0]; // high 32 bits of head
201
202 // Non-trivial case (positive finite x) call __sqrtldextra with default
203 // rounding in effect and discard extra precision bits.
204
205 if ((xDD.d[0] != 0.0) && (hibits < 0x7ff00000u)) {
206 FEGETENVD(envD);
207 FESETENVD(0.0);
208 y = __sqrtldextra(xDD.d[0],xDD.d[1],&extra);
209 FESETENVD(envD); // restore caller's env
210 return (y);
211 }
212
213 // Separate edge cases into trivial and exceptional cases
214
215 if ((hibits < 0x80000000u) || (xDD.d[0] == 0.0) || (xDD.d[0] != xDD.d[0]))
216 return (x); // zero, +INF, or NaN
217
218 // Negative ordered arguments result in NaN;
219 xDD.i[0] = 0x7ff80000u; // return value is NaN
220 xDD.i[1] = 0u;
221 xDD.i[2] = 0x7ff80000u;
222 xDD.i[3] = 0u;
223 return (xDD.f); // return NaN
224}
225
226#include "math.h"
227long double cbrtl( long double x )
228{
229 DblDbl xDD;
230 double arg, guess;
231 long double ldguess, ldguess2, sqldguess;
232 const long double oneThird = 0.3333333333333333333333333333333L;
233
234 xDD.f = x;
235
236 arg = xDD.d[0];
237
238 if (arg != arg || arg == 0.0 || arg == INFINITY || -arg == INFINITY)
239 return x;
240
241 guess = cbrt(arg);
242
243 xDD.d[0] = guess;
244 xDD.d[1] = 0.0;
245
246 ldguess = xDD.f;
247
248 sqldguess = ldguess * ldguess;
249 ldguess2 = ldguess + ldguess;
250
251 return (ldguess2 + x/sqldguess)*oneThird;
252}
253
254#endif