this repo has no description
1/* This is an implementation of acos. It is written in standard C except:
2
3 double is expected be an IEEE 754 double-precision implementation.
4
5 "volatile" is used to attempt to force certain floating-point
6 operations to occur at run time (to generate exceptions that might not
7 be generated if the operations are performed at compile time).
8
9 long double is used for extended precision if it has at least 64 bits
10 of precision. Otherwise, double is used with some cumbersome routines
11 to provide multiple precision arithmetic, resulting in precise and
12 accurate but slow code. Other techniques, such as dividing the work
13 into smaller intervals, were tried briefly but did not yield
14 satisfactory solutions in the time allowed.
15
16 If the long double issues are worked out, it should be good enough to serve
17 as the libm acos with tolerable performance.
18*/
19
20
21// Include math.h to ensure we match the declarations.
22#include <math.h>
23
24// Include float.h so we can check the characteristics of the long double type.
25#include <float.h>
26#if 64 <= LDBL_MANT_DIG
27 #define UseLongDouble 1
28#endif
29
30
31/* Declare certain constants volatile to force the compiler to access them
32 when we reference them. This in turn forces arithmetic operations on them
33 to be performed at run time (or as if at run time). We use such operations
34 to generate invalid or inexact.
35*/
36static volatile const double Infinity = INFINITY;
37static volatile const double Tiny = 0x1p-1022;
38
39
40#if defined __STDC__ && 199901L <= __STDC_VERSION__ && !defined __GNUC__
41 // GCC does not currently support FENV_ACCESS. Maybe someday.
42 #pragma STDC FENV_ACCESS ON
43#endif
44
45
46#if !UseLongDouble
47
48
49 /* If we are not using long double arithmetic, define some subroutines
50 to provided extended precision using double arithmetic.
51
52 These routines are derived from CR-LIBM: A Library of correctly
53 rounded elementary functions in double-precision by Catherine
54 Daramy-Loirat, David Defour, Florent de Dinechin, Matthieu Gallet,
55 Nicolas Gast, Jean-Michel Muller.
56
57 The digits in the names indicate the precisions of the arguments and
58 results. For example, Add112 adds two numbers expressed with one
59 double each and returns a number expressed with two doubles.
60 */
61
62
63 // double2 represents a number equal to d0 + d1, with |d1| <= 1/2 ULP(d0).
64 typedef struct { double d0, d1; } double2;
65
66
67 // Return a + b given |b| < |a|.
68 static inline double2 Add112RightSmaller(double a, double b)
69 {
70 double d0 = a + b, d1 = b - (d0 - a);
71 return (double2) { d0, d1 };
72 }
73
74
75 // Return a * b, given |a|, |b| < 2**970.
76 static inline double2 Mul112(double a, double b)
77 {
78 static const double c = 0x1p27 + 1;
79
80 double
81 ap = a * c, bp = b * c,
82 a0 = (a-ap)+ap, b0 = (b-bp)+bp,
83 a1 = a - a0, b1 = b - b0,
84 d0 = a * b,
85 d1 = a0*b0 - d0 + a0*b1 + a1*b0 + a1*b1;
86 return (double2) { d0, d1 };
87 }
88
89
90 // Return a + b with relative error below 2**-103 given |b| < |a|.
91 static inline double2 Add212RightSmaller(double2 a, double b)
92 {
93 double
94 c0 = a.d0 + b,
95 c1 = a.d0 - c0 + b + a.d1,
96 d0 = c0 + c1,
97 d1 = c0 - d0 + c1;
98 return (double2) { d0, d1 };
99 }
100
101
102 /* Return a - b with relative error below 2**-103 and then rounded to a
103 double given |b| < |a|.
104 */
105 static inline double Sub211RightSmaller(double2 a, double b)
106 {
107 double
108 c0 = a.d0 - b,
109 c1 = a.d0 - c0 - b + a.d1,
110 d0 = c0 + c1;
111 return d0;
112 }
113
114
115 /* Return a - b with relative error below 2**-103 and then rounded to
116 double given |b| < |a|.
117 */
118 static inline double Sub221RightSmaller(double2 a, double2 b)
119 {
120 double
121 c0 = a.d0 - b.d0,
122 c1 = a.d0 - c0 - b.d0 - b.d1 + a.d1,
123 d0 = c0 + c1;
124 return d0;
125 }
126
127
128 /* Return approximately a * b - 1 given |a|, |b| < 2**970 and a * b is
129 very near 1.
130 */
131 static inline double Mul121Special(double a, double2 b)
132 {
133 static const double c = 0x1p27 + 1;
134
135 double
136 ap = a * c, bp = b.d0 * c,
137 a0 = (a-ap)+ap, b0 = (b.d0-bp)+bp,
138 a1 = a - a0, b1 = b.d0 - b0,
139 m1 = a0*b0 - 1 + a0*b1 + a1*b0 + a1*b1 + a*b.d1;
140 return m1;
141 }
142
143
144 // Return approximately a * b given |a|, |b| < 2**970.
145 static inline double Mul221(double2 a, double2 b)
146 {
147 static const double c = 0x1p27 + 1;
148
149 double
150 ap = a.d0 * c, bp = b.d0 * c,
151 a0 = (a.d0-ap)+ap, b0 = (b.d0-bp)+bp,
152 a1 = a.d0 - a0, b1 = b.d0 - b0,
153 m0 = a.d0 * b.d0,
154 m1 = a0*b0 - m0 + a0*b1 + a1*b0 + a1*b1 + (a.d0*b.d1 + a.d1*b.d0),
155 d0 = m0 + m1;
156 return d0;
157 }
158
159
160#endif // !UseLongDouble
161
162
163/* double acos(double x).
164
165 (This routine appears below, following some subroutines.)
166
167 Notes:
168
169 Citations in parentheses below indicate the source of a requirement.
170
171 "C" stands for ISO/IEC 9899:TC2.
172
173 The Open Group specification (IEEE Std 1003.1, 2004 edition) adds no
174 requirements since it defers to C and requires errno behavior only if
175 we choose to support it by arranging for "math_errhandling &
176 MATH_ERRNO" to be non-zero, which we do not.
177
178 Return value:
179
180 For arccosine of 1, return +0 (C F.9.1.1).
181
182 For 1 < |x| (including infinity), return NaN (C F.9.1.1).
183
184 For a NaN, return the same NaN (C F.9 11 and 13). (If the NaN is a
185 signalling NaN, we return the "same" NaN quieted.)
186
187 Otherwise:
188
189 If the rounding mode is round-to-nearest, return arccosine(x)
190 faithfully rounded. This is not proven but seems likely.
191 Generally, the largest source of errors is the evaluation of the
192 polynomial using double precision. Some analysis might bound this
193 and prove faithful rounding. The largest observed error is .922
194 ULP.
195
196 Return a value in [0, pi] (C 7.12.4.1 3).
197
198 Not implemented: In other rounding modes, return arccosine(x)
199 possibly with slightly worse error, not necessarily honoring the
200 rounding mode (Ali Sazegari narrowing C F.9 10).
201
202 Exceptions:
203
204 May or may not raise inexact, even if the result is exact (C F.9 8).
205
206 Raise invalid if the input is a signalling NaN (C 5.2.4.2.2 3, in spite
207 of C 4.2.1) or 1 < |x| (including infinity) (C F.9.1.1) but not if the
208 input is a quiet NaN (C F.9 11).
209
210 May not raise exceptions otherwise (C F.9 9).
211
212 Properties:
213
214 Not yet proven: Monotonic.
215*/
216
217
218// Return arccosine(x) given |x| <= .4, with the same properties as acos.
219static double Center(double x)
220{
221 static const double
222 HalfPi = 0x3.243f6a8885a308d313198a2e03707344ap-1;
223
224 /* If x is small, generate inexact and return pi/2. We must do this
225 for very small x to avoid underflow when x is squared.
226 */
227 if (-0x1.8d313198a2e03p-53 <= x && x <= +0x1.8d313198a2e03p-53)
228 return HalfPi + Tiny;
229
230 static const double p03 = + .1666666666666251331848183;
231 static const double p05 = + .7500000000967090522908427e-1;
232 static const double p07 = + .4464285630020156622713320e-1;
233 static const double p09 = + .3038198238851575770651788e-1;
234 static const double p11 = + .2237115216935265224962544e-1;
235 static const double p13 = + .1736953298172084894468665e-1;
236 static const double p15 = + .1378527665685754961528021e-1;
237 static const double p17 = + .1277870997666947910124296e-1;
238 static const double p19 = + .4673473145155259234911049e-2;
239 static const double p21 = + .1951350766744288383625404e-1;
240
241 // Square x.
242 double x2 = x * x;
243
244 return HalfPi - (((((((((((
245 + p21) * x2
246 + p19) * x2
247 + p17) * x2
248 + p15) * x2
249 + p13) * x2
250 + p11) * x2
251 + p09) * x2
252 + p07) * x2
253 + p05) * x2
254 + p03) * x2 * x + x);
255}
256
257
258// Return arccosine(x) given .4 <= |x| <= .6, with the same properties as acos.
259static double Gap(double x)
260{
261 static const double p03 = + .1666666544260252354339083;
262 static const double p05 = + .7500058936188719422797382e-1;
263 static const double p07 = + .4462999611462664666589096e-1;
264 static const double p09 = + .3054999576148835435598555e-1;
265 static const double p11 = + .2090953485621966528477317e-1;
266 static const double p13 = + .2626916834046217573905021e-1;
267 static const double p15 = - .2496419961469848084029243e-1;
268 static const double p17 = + .1336320190979444903198404;
269 static const double p19 = - .2609082745402891409913617;
270 static const double p21 = + .4154485118940996442799104;
271 static const double p23 = - .3718481677216955169129325;
272 static const double p25 = + .1791132167840254903934055;
273
274 // Square x.
275 double x2 = x * x;
276
277 double poly = ((((((((((((
278 + p25) * x2
279 + p23) * x2
280 + p21) * x2
281 + p19) * x2
282 + p17) * x2
283 + p15) * x2
284 + p13) * x2
285 + p11) * x2
286 + p09) * x2
287 + p07) * x2
288 + p05) * x2
289 + p03) * x2 * x;
290
291 #if UseLongDouble
292 static const long double
293 HalfPi = 0x3.243f6a8885a308d313198a2e03707344ap-1L;
294 return HalfPi - (poly + (long double) x);
295 #else // #if UseLongDouble
296 static const double2
297 HalfPi = { 0x1.921fb54442d18p+0, 0x1.1a62633145c07p-54 };
298 return Sub221RightSmaller(HalfPi, Add112RightSmaller(x, poly));
299 #endif // #if UseLongDouble
300}
301
302
303// Return arccosine(x) given +.6 < x, with the same properties as acos.
304static double pTail(double x)
305{
306 if (1 <= x)
307 return 1 == x
308 // If x is 1, return zero.
309 ? 0
310 // If x is outside the domain, generate invalid and return NaN.
311 : Infinity - Infinity;
312
313 static const double p01 = - .2145900291823555067724496;
314 static const double p02 = + .8895931658903454714161991e-1;
315 static const double p03 = - .5037781062999805015401690e-1;
316 static const double p04 = + .3235271184788013959507217e-1;
317 static const double p05 = - .2125492340970560944012545e-1;
318 static const double p06 = + .1307107321829037349021838e-1;
319 static const double p07 = - .6921689208385164161272068e-2;
320 static const double p08 = + .2912114685670939037614086e-2;
321 static const double p09 = - .8899459104279910976564839e-3;
322 static const double p10 = + .1730883544880830573920551e-3;
323 static const double p11 = - .1594866672026418356538789e-4;
324
325 double t0 = (((((((((((
326 + p11) * x
327 + p10) * x
328 + p09) * x
329 + p08) * x
330 + p07) * x
331 + p06) * x
332 + p05) * x
333 + p04) * x
334 + p03) * x
335 + p02) * x
336 + p01) * x;
337
338 #if UseLongDouble
339 static const long double p00 = +1.5707956046853235350824205L;
340 return sqrtl(1-x) * (t0 + p00);
341 #else // #if UseLongDouble
342 static const double2
343 p00 = { 0x1.921fa926d2f24p0, +0x1.b4a23d0ecbb40p-59 };
344 /* p00.d1 might not be needed. However, omitting it brings the
345 sampled error to .872 ULP. We would need to prove this is okay.
346 */
347
348 // Estimate square root to double precision.
349 double e = 1 / sqrt(1-x);
350
351 // Refine estimate using Newton-Raphson.
352 double2 ex = Mul112(e, 1-x);
353 double e2x = Mul121Special(e, ex);
354 double2 t1 = Add212RightSmaller(ex, ex.d0 * -.5 * e2x);
355
356 // Return sqrt(1-x) * (t0 + p00).
357 return Mul221(t1, Add212RightSmaller(p00, t0));
358 #endif // #if UseLongDouble
359}
360
361
362// Return arccosine(x) given x < -.6, with the same properties as acos.
363static double nTail(double x)
364{
365 if (x <= -1)
366 return -1 == x
367 // If x is -1, generate inexact and return pi rounded toward zero.
368 ? 0x3.243f6a8885a308d313198a2e03707344ap0 + Tiny
369 // If x is outside the domain, generate invalid and return NaN.
370 : Infinity - Infinity;
371
372 static const double p00 = +1.5707956513160834076561054;
373 static const double p01 = + .2145907003920708442108238;
374 static const double p02 = + .8896369437915166409934895e-1;
375 static const double p03 = + .5039488847935731213671556e-1;
376 static const double p04 = + .3239698582040400391437898e-1;
377 static const double p05 = + .2133501549935443220662813e-1;
378 static const double p06 = + .1317423797769298396461497e-1;
379 static const double p07 = + .7016307696008088925432394e-2;
380 static const double p08 = + .2972670140131377611481662e-2;
381 static const double p09 = + .9157019394367251664320071e-3;
382 static const double p10 = + .1796407754831532447333023e-3;
383 static const double p11 = + .1670402962434266380655447e-4;
384
385 double poly = sqrt(1+x) * ((((((((((((
386 + p11) * x
387 + p10) * x
388 + p09) * x
389 + p08) * x
390 + p07) * x
391 + p06) * x
392 + p05) * x
393 + p04) * x
394 + p03) * x
395 + p02) * x
396 + p01) * x
397 + p00);
398
399 #if UseLongDouble
400 static const long double Pi = 0x3.243f6a8885a308d313198a2e03707344ap0L;
401 return Pi - poly;
402 #else // #if UseLongDouble
403 static const double2
404 Pi = { 0x1.921fb54442d18p+1, 0x1.1a62633145c07p-53 };
405 return Sub211RightSmaller(Pi, poly);
406 #endif // #if UseLongDouble
407}
408
409
410// See documentation above.
411double acos(double x)
412{
413 if (x < -.4)
414 if (x < -.6)
415 return nTail(x);
416 else
417 return Gap(x);
418 else if (x <= +.4)
419 return Center(x);
420 else
421 if (x <= +.6)
422 return Gap(x);
423 else
424 return pTail(x);
425}