this repo has no description
1/* sincostan.c -- sin, cos, and tan for standard math library.
2
3 These are preliminary versions of sin, cos, and tan written to provide
4 argument reduction using the correct value of pi, rather than pi rounded to
5 66-bits as used by the Intel trigonometric instructions. Versions to
6 enhance accuracy and speed may follow.
7*/
8
9
10#include <math.h>
11
12
13// Describe the destination floating-point type.
14#define SignificandBits 53
15#define ExponentBits 11
16
17
18/* Define a structure for accessing the internal components of a double.
19
20 s.exponent is the raw exponent field.
21
22 s.significand2 is the least two significant bits of the significand field.
23
24 The other fields are unused.
25*/
26typedef union
27{
28 double d;
29 struct
30 {
31 #if defined __BIG_ENDIAN__
32 unsigned int sign : 1;
33 unsigned int exponent : ExponentBits;
34 unsigned int significand0 : SignificandBits-1-32;
35 unsigned int significand1 : 30;
36 unsigned int significand2 : 2;
37 #else // defined __BIG_ENDIAN__
38 unsigned int significand2 : 2;
39 unsigned int significand1 : 30;
40 unsigned int significand0 : SignificandBits-1-32;
41 unsigned int exponent : ExponentBits;
42 unsigned int sign : 1;
43 #endif // defined __BIG_ENDIAN__
44 } s;
45} Double;
46
47
48/* Each of the following numbers is a strict upper bound on some technique
49 for calculating sin, cos, or tan. That is, the technique is known to apply
50 when |x| < bound, where x is the function argument. (ReduceFull also
51 requires that \x| be greater than some amount around 0x1p-622, to avoid
52 underflow or loss of accuracy.)
53
54 If |x| < BoundDenormal, x is denormal or zero.
55
56 If |x| < BoundTrivialSin, x is the correctly rounded sine of x.
57
58 If |x| < BoundTrivialCos, 1 is the correctly rounded cosine of x.
59
60 If |x| < BoundTrivialTan, x is the correctly rounded tangent of x.
61
62 If |x| < BoundPolynomial, sinp(x) or cosp(x) may be used for sin(x) or
63 cos(x), with no argument reduction.
64
65 If |x| < BoundMedium, ReduceMedium may be used to reduce x.
66
67 If |x| < BoundFull, ReduceFull may be used to reduce x.
68
69 Otherwise, x is an infinity or a NaN.
70*/
71#define BoundDenormal 0x1p-1022
72#define BoundTrivialSin 0x1.7137449123ef7p-26
73#define BoundTrivialCos 0x1.6A09E667F3BCDp-27
74#define BoundTrivialTan 0x1.250BFE1B082F5p-26
75#define BoundPolynomial 0x3.243f6a8885a31p-2
76#define BoundMedium 0x1.000013be57a40p19
77#define BoundFull INFINITY
78
79
80/* Below, we define methods to test whether a function argument, x, is within
81 bounds suitable for a techique for implementing the function. Several
82 methods are defined, using natural floating-point comparisons or using
83 integer comparisons on 32 or 64 bits of the floating-point representation.
84 Implementations may choose whichever method is best for performance.
85
86 To use, first prepare the key used for checking bounds:
87
88 BoundKeyType xk = Key(x);
89
90 That is done once at the start of a routine. Then, as many times as
91 desired, bounds are checked:
92
93 if (Within(xk, bound))
94 ...
95
96 The "bound" must be one of the names defined above, such as BoundMedium.
97
98 If Within(xk, bound) evaluates to true, then |x| < bound. The converse is
99 not necessarily true; if Within(xk, bound) is false, |x| might be less than
100 bound. This is because some implementations of Within do not check all
101 bits of xk and bound. They are intended only as fast screens that get the
102 majority of values. However, if floor(log[2](|x|)) < floor(log[2](bound)),
103 Within(xk, bound) is guaranteed to be true -- its evaluation includes at
104 least all of the floating-point exponent.
105*/
106#if defined __i386__ // (Add any other desired architectures.)
107
108 /* This method uses integer comparisons with the most significant 32 bits
109 of the IEEE-754 representation of the floating-point number.
110 */
111
112 #include <inttypes.h>
113
114 typedef uint32_t BoundKeyType;
115
116 typedef union
117 {
118 double d;
119 #if defined __BIG_ENDIAN__
120 struct { uint32_t key, notkey; } s;
121 #else
122 struct { uint32_t notkey, key; } s;
123 #endif
124 } DoubleForBound;
125
126 // Prepare constants containing the bounds in our desired form.
127 #define DefineBound(bound) \
128 static const DoubleForBound Double##bound = { bound };
129 DefineBound(BoundDenormal)
130 DefineBound(BoundTrivialSin)
131 DefineBound(BoundTrivialCos)
132 DefineBound(BoundTrivialTan)
133 DefineBound(BoundPolynomial)
134 DefineBound(BoundMedium)
135 DefineBound(BoundFull)
136
137 // Get the key for |x|.
138 static BoundKeyType Key(double x)
139 {
140 const DoubleForBound X = { x };
141 BoundKeyType t = X.s.key;
142 return t & 0x7fffffff;
143 }
144
145 #define Within(xk, bound) ((xk) < Double##bound.s.key)
146
147#elif defined __x86_64__ // (Add any other desired architectures.)
148
149 /* This method uses integer comparisons with all 64 bits of the IEEE-754
150 representation of the floating-point number.
151 */
152
153 #include <inttypes.h>
154
155 typedef uint64_t BoundKeyType;
156
157 typedef union
158 {
159 double d;
160 uint64_t key;
161 } DoubleForBound;
162
163 // Prepare constants containing the bounds in our desired form.
164 #define DefineBound(bound) \
165 static const DoubleForBound Double##bound = { bound };
166 DefineBound(BoundDenormal)
167 DefineBound(BoundTrivialSin)
168 DefineBound(BoundTrivialCos)
169 DefineBound(BoundTrivialTan)
170 DefineBound(BoundPolynomial)
171 DefineBound(BoundMedium)
172 DefineBound(BoundFull)
173
174 // Get the key for |x|.
175 static BoundKeyType Key(double x)
176 {
177 const DoubleForBound X = { x };
178 BoundKeyType t = X.key;
179 return t & 0x7fffffffffffffff;
180 }
181
182 #define Within(xk, bound) ((xk) < Double##bound.key)
183
184#else
185
186 // If no other method is selected, use regular floating-point comparison.
187
188 typedef double BoundKeyType;
189
190 #define DefineBound(bound)
191
192 // Get the key for |x|.
193 static BoundKeyType Key(double x)
194 {
195 #if defined __i386__ || defined __x86_64__
196 // On Intel, just clear the sign bit.
197 double xk = -0.;
198 __asm__("andnpd %[x], %[xk]" : [xk] "+x" (xk) : [x] "x" (x));
199 return xk;
200 #elif 1
201 return x < 0 ? -x : +x;
202 #else
203 return fabs(x);
204 #endif
205 }
206
207 #define Within(xk, bound) ((xk) < bound)
208
209#endif
210
211
212/* Several argument-reduction routines follow. Each has this specification:
213
214 static void Reduce<Name>(double *xp, int *a, double x).
215
216 Input:
217
218 x is a number to be reduced modulo pi/2. Each routine has
219 qualifications on what values it supports for x. Generally, x must not
220 be a NaN or infinity.
221
222 Output:
223
224 *xp is set to a residue of x modulo pi/2. Mostly, -pi/4 <= x <= +pi/4,
225 but *xp may be outside this interval by as much as 3.24128e-11.
226
227 *a is set to the arc of the circle x is in. 0 <= *a < 4.
228
229 On output, x is approximately k * 2*pi + *a * pi/2 + *xp for some
230 integer k. This holds even if x is outside [-pi/4, pi/4]; *a matches
231 *xp.
232
233 This means all the reduction routines are returning *xp in radians.
234 ReduceFull goes through steps in which it has period-fraction units (0 to 1
235 corresponding to 0 to pi/2 radians), so the restoration of radians is
236 forced and introduces additional rounding error and execution time. We
237 might want to do something about that.
238
239 Since *xp is a single double, it is insufficient to represent the residue
240 precisely enough to compute a faithfully rounded function. For that, we
241 will have to return extended precision, perhaps a long double or perhaps
242 two doubles.
243*/
244
245
246/* static void ReduceFull(double *xp, int *a, double x).
247
248 Input:
249
250 x is a number to be reduced modulo pi/2. This routine requires 1 <=
251 |x|. (The actual limit is something smaller, perhaps around 0x1p-622,
252 but 1 suffices.)
253
254 Output:
255
256 *xp is set to x modulo pi/2.
257
258 *a is set to the arc of the circle x is in. 0 <= *a < 4.
259
260 On output, x is approximately k * 2*pi + *a * pi/2 + *xp for some
261 integer k.
262
263 This routine was adapted from the _sin function in double_fns.h (which
264 implements vvsin).
265*/
266static void ReduceFull(double *xp, int *a, double x)
267{
268 // The rows are 8-bit shifts of 27-bit windows on 2/pi * 0x1p400.
269 static const double TwoOverPiWithOffset[4][45] =
270 {
271 {
272 +0x0.0000000p+0, +0x1.45f3070p+399, -0x1.1b1bbe8p+372,
273 -0x1.6b01ec8p+345, +0x1.5f47d50p+318, -0x1.6447e48p+291,
274 -0x1.3ad4ce0p+263, +0x1.e21c820p+237, +0x1.fe51640p+208,
275 -0x1.5085110p+182, +0x1.586dca0p+154, -0x1.c8e2df0p+129,
276 +0x1.374b800p+102, +0x1.924bbb0p+74, -0x1.f62e6e0p+48,
277 +0x1.cfe1df0p+20, -0x1.38d3b58p-6, -0x1.63045e0p-34,
278 +0x1.1afa980p-63, -0x1.44bb7b0p-88, -0x1.6638fe0p-116,
279 +0x1.ad17e00p-142, -0x1.bec66e0p-168, -0x1.4e33e58p-195,
280 +0x1.9cfa4e0p-223, +0x1.08bf178p-249, -0x1.036be40p-279,
281 +0x1.8ffc4c0p-303, -0x1.0fd3000p-339, -0x1.fc04340p-358,
282 -0x1.dce94c0p-385, +0x1.4da3ee0p-413, -0x1.64c0980p-439,
283 -0x1.b069ec8p-465, -0x1.1617380p-493, -0x1.32c3400p-521,
284 -0x1.5d28ae0p-548, +0x1.eeb1fb0p-574, -0x1.a0e8500p-604,
285 +0x1.e839cf8p-627, +0x1.e294a48p-654, +0x1.d4d7f68p-681,
286 +0x1.fb11f90p-708, -0x1.517bd50p-735, +0x1.9823800p-767
287 },
288 {
289 +0x0.0000000p+0, +0x1.45f3000p+399, +0x1.b727240p+377,
290 -0x1.f56b020p+353, +0x1.3abe900p+325, -0x1.5964480p+299,
291 +0x1.b6c52b0p+271, +0x1.93c4390p+244, +0x1.07f9480p+214,
292 -0x1.38a8428p+191, -0x1.0ea7920p+162, -0x1.b7238c0p+135,
293 +0x1.09374b8p+110, +0x1.924c000p+74, -0x1.15f62e8p+56,
294 +0x1.21cfe20p+28, -0x1.0a71a70p+1, -0x1.acb1820p-25,
295 -0x1.77dca08p-52, -0x1.68a25d8p-79, -0x1.ec598e0p-106,
296 -0x1.fb29740p-133, -0x1.037d8d0p-161, +0x1.1d63980p-188,
297 +0x1.a99cfa0p-215, +0x1.3908bf0p-241, +0x1.77bf250p-269,
298 +0x1.d8ffc80p-299, -0x1.a000880p-322, +0x1.6603fc0p-350,
299 -0x1.a1dce90p-377, -0x1.2fac970p-403, -0x1.2593040p-433,
300 +0x1.9e4f960p-457, +0x1.36e9e90p-485, -0x1.c099620p-512,
301 +0x1.7fa8b60p-538, -0x1.5b08a70p-565, -0x1.41a0e80p-596,
302 -0x1.30be300p-622, -0x1.821d6b8p-646, +0x1.25d4d80p-673,
303 -0x1.2813b80p-702, -0x1.ca8be00p-730, +0x1.580cc10p-754
304 },
305 {
306 +0x0.0000000p+0, +0x1.4600000p+399, -0x1.9f246c0p+386,
307 -0x1.bbead60p+360, -0x1.ec54100p+329, -0x1.c159648p+307,
308 +0x1.c0db628p+280, +0x1.5993c40p+252, +0x1.c820ff0p+225,
309 +0x1.458eaf0p+198, +0x1.ebbc560p+172, +0x1.b7246e0p+144,
310 +0x1.d2126f0p+117, -0x1.a3ff370p+91, +0x1.2eea0a0p+64,
311 -0x1.736f180p+37, -0x1.e214e40p+8, +0x1.62534e8p-17,
312 -0x1.177dc80p-48, -0x1.0568a28p-71, +0x1.1213a68p-98,
313 -0x1.c7eca60p-127, +0x1.7df9040p-154, +0x1.cc8eb20p-179,
314 -0x1.9f2b318p-206, -0x1.6c6f780p-237, +0x1.f8bbdf8p-260,
315 +0x1.283b200p-288, -0x1.da00080p-318, -0x1.fa67f00p-344,
316 -0x1.0d0ee80p-372, +0x1.6b414e0p-397, -0x1.7049650p-423,
317 +0x1.fb3c9f0p-450, +0x1.6136ea0p-477, -0x1.7381320p-505,
318 -0x1.8680578p-530, +0x1.aea4f78p-557, -0x1.38141a0p-584,
319 -0x1.d098600p-613, +0x1.ce7de28p-638, +0x1.4a4baa0p-666,
320 -0x1.404a050p-692, +0x1.1f8d5d0p-720, +0x1.0ac0680p-749
321 },
322 {
323 +0x0.0000000p+0, +0x1.8000000p+399, -0x1.d067c90p+396,
324 -0x1.b1bbeb0p+368, +0x1.4fe13b0p+341, -0x1.05c1598p+315,
325 +0x1.bb81b70p+287, -0x1.d6a66c0p+260, -0x1.de37df0p+233,
326 -0x1.ae9c400p+200, -0x1.4214438p+180, -0x1.4f246e0p+153,
327 +0x1.b8e9090p+126, +0x1.ba5c010p+99, -0x1.b6d1160p+72,
328 +0x1.3a32440p+43, -0x1.80f10a0p+17, -0x1.c69dac8p-9,
329 -0x1.8c11780p-36, +0x1.1afa978p-63, -0x1.12edec8p-90,
330 +0x1.338e050p-117, -0x1.4ba0818p-144, -0x1.f633718p-171,
331 +0x1.8e60d50p-198, -0x1.8c16c70p-225, +0x1.17e2f00p-254,
332 -0x1.036be28p-279, +0x1.ff89800p-308, -0x1.0fd3400p-339,
333 +0x1.fde5e00p-365, +0x1.18b5a10p-388, -0x1.64b8248p-414,
334 -0x1.9302618p-441, -0x1.834f648p-468, -0x1.6173820p-497,
335 +0x1.9a797f8p-522, +0x1.45aea50p-549, -0x1.14e0500p-578,
336 -0x1.a0e84c0p-604, -0x1.7c63040p-631, -0x1.d6b5b40p-658,
337 -0x1.59404a0p-684, -0x1.3b81cc0p-714, +0x1.7421580p-738
338 }
339 };
340
341 Double X = { x };
342
343 /* Set ec to the unbiased exponent minus 33. Why 33? I do not know.
344 This was in the vvsin code in double_fns.h.
345 */
346 int ec = X.s.exponent - (1023+33);
347
348 // Set k to ceiling(ec / 27) and m to residue.
349 int k = (ec + 26) * (607*4) >> 16;
350 int m = 27*k - ec;
351
352 // offset is used to select a row in the reduction table. See below.
353 int offset = m >> 3;
354
355 /* The reduction table, TwoOverPiWithOffset, contains bits of 2/pi.
356 First, all entries are scaled by 2**400 to avoid overflow/underflow
357 issues. Each entry contains 27 bits of 2/pi, except in the first two
358 columns. The first column contains zeroes because no reduction is done
359 for numbers that are already small. The entries in the second row
360 start before (at a bit with higher value) the leading bit of 2/pi, so
361 they contain some leading zeroes. The sign bits form part of the 27
362 bits represented.
363
364 Each row forms a contiguous string of bits of 2/pi. That is, adding
365 all the entries with sufficient precision yields a single bit string
366 representing 2/pi. The rows differ in their starting points; row 0
367 begins with the leading bit of 2/pi, and rows 1, 2, and 3 begin 8, 16,
368 and 24 bits before that.
369 */
370
371 /* Scale x to avoid overflow in Dekker split. This is compensated for
372 in the entries in TwoOverPiWithOffset.
373 */
374 x *= 0x1p-400;
375
376 /* Use Dekker's algorithm to split x into 26 bits and 27 bits. This
377 requires round-to-nearest mode.
378 */
379 double xDekker = x * (0x1p27 + 1);
380 double x0 = xDekker - (xDekker - x);
381 double x1 = x - x0;
382
383 // Get address of starting point in table.
384 const double *p0 = &TwoOverPiWithOffset[offset][k];
385
386 // Get table entries.
387 const double fp0 = p0[0];
388 const double fp1 = p0[1];
389 const double fp2 = p0[2];
390 const double fp3 = p0[3];
391
392 // Get high bits of x * f, where f is the part of 2/pi we are using.
393 const double f0 = x1 * fp0 + fp1 * x0;
394
395 double f = x1 * fp1 + fp2 * x0;
396
397 // Combine to do integer work.
398 const double fi = f0 + f;
399
400 static const double IntegerBias = 0x1.8p52;
401
402 // Force the integer bits into a specific position.
403 Double Fi = { fi + IntegerBias };
404 /* |fi| is less than 0x1p36, so fi + IntegerBias is well within
405 [0x1p52, 0x1p53), so it has a known exponent, and the bits with
406 weight 2 and 1 are the least significant bits in its significand.
407
408 We know |fi| is less than 0x1p36 because it is the sum of x1 * fp0,
409 fp1 * x0, and something small in f. x0 has the same exponent as
410 x. Say 2**e <= |x| < 2**(e+1). (That is the original x, before we
411 scaled it by 0x1p-400.) The table entry we look up for fp0 has
412 magnitude less than 2**(400+27-27*ceiling((e-33)/27)), and fp1 is
413 less, bounded by 2**(400-27*ceiling((e-33)/27)). Including the
414 scaling in x, fp1 * x0 is less than 2**(e+1-400) *
415 2**(400-27*ceiling((e-33)/27)) <= 2**34. Similarly, fp0 * x1 is
416 less than 2**34, so their sum is less than 2**35.
417 */
418
419 // Get the two least significant integer bits.
420 *a = Fi.s.significand2;
421
422 double fint = Fi.d - IntegerBias;
423
424 const double fp4 = p0[4];
425 const double fp5 = p0[5];
426 const double fp6 = p0[6];
427
428 f = f0 - fint + f;
429 f += x1 * fp2 + fp3 * x0;
430 f += x1 * fp3 + fp4 * x0;
431 f += x1 * fp4 + fp5 * x0;
432 f += x1 * fp5 + fp6 * x0;
433
434 // Convert to radians by multiplying by pi/2.
435 *xp = f * 0x3.243F6A8885A3p-1;
436}
437
438
439/* static void ReduceMedium(double *xp, int *a, double x).
440
441 Input:
442
443 x is a number to be reduced modulo pi/2. This routine requires |x| <=
444 X. X is described below.
445
446 Output:
447
448 *xp is set to x modulo pi/2.
449
450 *a is set to the arc of the circle x is in. 0 <= *a < 4.
451
452 On output, x is approximately k * 2*pi + *a * pi/2 + *xp for some
453 integer k.
454
455 Nomenclature:
456
457 p is the period, pi/2.
458
459 X is the maximum value x may have. X is 0x1.000013be57a3fp19.
460
461 InversePeriod is 1/p, rounded to the nearest double, ties to even.
462
463 n is x * InversePeriod, rounded to the nearest double, ties to even,
464 and then rounded to the nearest integer, ties to even.
465
466 N is the maximum value n may have. N is X * InversePeriod, rounded to
467 the nearest double, ties to even, and then rounded to the nearest
468 integer, ties to even. N is 333773.
469
470 Notes:
471
472 In comments in this routine, unquoted expressions are mathematical and
473 quoted expressions are floating-point. Thus, "a + b" refers to the
474 floating-point operation of addition, including rounding.
475
476 "n * Period[0]" is exact for all |n| <= N. It is inexact for n = N+1,
477 so this is the source of the bound on x. X is the greatest value for
478 which |n| <= N. (It is happenstance that Period[0] limits x; if the
479 period were different, its bits might result in Period[1] causing the
480 limit. n has some other roles in the errors in this routine that might
481 limit x when used with other periods or different precisions.)
482
483 The difference between InversePeriod and 1/p can cause n to differ from
484 the ideal value when x is near a multiple of p. The result is that
485 instead of producing a result inside the target interval [-p/2, p/2], a
486 result slightly outside the interval is produced. The result may be as
487 much as 3.24128e-11 outside the interval. The polynomial and the
488 polynomial evaluation must be satisfactory over this extended interval.
489*/
490static void ReduceMedium(double *xp, int *a, double x)
491{
492 static const double InversePeriod =
493 2 / 0x3.243f6a8885a308d313198a2e03707344ap0;
494
495 /* Period is an extended-precision representation of the reduction period.
496 Each element except the last has the property that multiplication by
497 any integer with magnitude at most N is exact.
498 */
499 static const double Period[] = {
500 +0x1.921FB54440000p-0,
501 +0x1.68C234C4C0000p-39,
502 +0x1.98A2E03707345p-77
503 };
504
505 // Estimate x / p and round to nearest integer.
506 double n = x * InversePeriod + 0x1.8p52 - 0x1.8p52;
507
508 // Record which arc of the circle x lies in.
509 *a = (int) n & 3;
510
511 // This is exact, per design of Period.
512 double np0 = n * Period[0];
513
514 // This is exact, per design of Period.
515 double np1 = n * Period[1];
516
517 /* The error here is at most 1/2 ULP(n * Period[2]), which is
518 1/2 ULP(n * +0x1.98A2E03707345p-77) = ULP(n * +0x1.98A2E03707345p-78).
519
520 n <= N = 333773, so an absolute bound on that is ULP(333773 *
521 +0x1.98A2E03707345p-78) = 0x1p-111, but we need a bound dependent on n
522 to partition a proof below.
523
524 This is called error (a).
525 */
526 double np2 = n * Period[2];
527
528 /* Set x00 to x - n * Period[0]. np0 is very near x, so this is exact,
529 per David Goldberg, _What Every Computer Scientist Should Know About
530 Floating-Point Arithmetic_, Theorem 11.
531
532 The nomenclature used here is:
533
534 x0 is x - n * (Period[0]),
535 x1 is x - n * (Period[0] + Period[1]), and
536 x2 is x - n * (Period[0] + Period[1] + Period[2]).
537
538 x00 is the first and only "word" of an extended precision
539 representation of x0.
540
541 x10 is the first "word" of x1; it has the most significant bits.
542 x11 is the next "word" of x1; it has the following bits.
543 */
544 double x00 = x - np0;
545
546 /* Subtract n * Period[1] from x0 to produce x1. These operations yield
547 an exact result, per Knuth, _The Art of Computer Programming_ 2, second
548 edition, page 221, section 4.2.2, Theorem C, in the sense that x10 +
549 x11 will exactly equal x0 - n * Period[1]. The first statement puts
550 the high bits in x10, and the second statement determines what was
551 rounded away in x10 and puts it in x11.
552
553 Knuth assumes |np1| <= |x00|. This is often the case, but perhaps
554 cancellation has reduced the magnitude of x00. Hoever, if |x00| <
555 |np1|, this arithmetic is exact, because x00's least significant bit is
556 at least as large as the ULP of Period[1].
557 */
558 double x10 = x00 - np1;
559 double x11 = x00 - x10 - np1;
560
561 /* This comment demonstrates the rounding error in the following
562 statement, "t = x11 - np2", is tiny relative to x10.
563
564 This is called error (b).
565
566 Let LSB(x) be the weight of the least significant bit set in a
567 floating-point number x. E.g, LSB(1.25) is 1/4, although ULP(1.25) is
568 2**-52.
569
570 Here, x10 and x11 are each multiples of LSB(Period[1]). If |x10| <
571 2**53 LSB(Period[1]), then x11 is zero, because all the bits of x10 +
572 x11 fit into x10. In that case, "x11 - np2" is exact.
573
574 Otherwise, 2**53 LSB(Period[1]) <= |x10|, so 2 LSB(Period[1]) <=
575 ULP(x10). LSB(Period[1]) = 0x1p-73, so 0x1p-72 <= ULP(x10).
576
577 By construction of x10 and x11, |x11| <= 1/2 ULP(x10).
578
579 |np2| <= |"n * Period[2]"| <= |n * Period[2] * (1+2**-53)| = 333733 *
580 0x1.98A2E03707345p-77 * (1+2**-53) < 0x1.0426p-58.
581
582 |"x11 - np2"| <= 2*max(|x11|, |np2|) <= 2*max(1/2 ULP(x10),
583 0x1.0426p-58) = max(ULP(x10), 0x1.0426p-57). The rounding error in
584 "x11 - np2" is at most 2**-53 times this, so it is at most max(2**-53
585 ULP(x10), 0x1p-110). Since ULP(x10) is at least 0x1p-72, the rounding
586 error is at most 0x1p-38 ULP(x10).
587 */
588 double t = x11 - np2;
589
590 /* Add t to x1 to produce x2. As before, we could use Knuth's technique
591 to produce an exact sum, x20 + x21 = x10 + t. For now, we only return
592 x20. When we want extended precision, we could return x21 as well.
593 (If so, be careful to show that |t| <= |x10|, as required for Knuth's
594 technique, or otherwise show the arithmetic is sufficiently accurate.)
595
596 The difference between x20 and x10 + t is called error (c). It is at
597 most 1/2 ULP(x20).
598 */
599 double x20 = x10 + t;
600 // double x21 = x10 - x20 + t;
601
602 *xp = x20;
603
604 /* Having subtracted n * (Period[0] + Period[1] + Period[2]) from x, we
605 have an error caused by the difference between p and Period[0] +
606 Period[1] + Period[2], which is less than 0x1.6fdb2p-131. So the error
607 is less than n * 0x1.6fdb2p-131.
608
609 n <= N = 333773, so an absolute bound on that is 333773 *
610 0x1.6fdb2p-131 < 0x1.d45fp-113, but we need a bound dependent on n to
611 partition a proof below.
612
613 This is called error (d).
614
615 Our reduction has four errors:
616
617 (a), which is at most ULP(n * +0x1.98A2E03707345p-78) <= 0x1p-111.
618 (b), which is at most 0x1p-38 ULP(x10).
619 (c), which is at most 1/2 ULP(x20).
620 (d), which is at most n * 0x1.6fdb2p-131 < 0x1.d45fp-113.
621
622 From the discussion of (b), if x11 is not zero, then 0x1p-72 <=
623 ULP(x10). Then x10 dominates the result, because np2 is so small that
624 subtracting it from x1 to produce x2 cannot reduce the exponent by more
625 than one. So the errors (a), (b), and (d) are tiny relative to x10,
626 and the total error is very nearly (c), 1/2 ULP(x20).
627
628 However, if x11 is zero, then (b) is zero, and we only need to consider
629 (a), (c), and (d). We partition this into two cases.
630
631 Case 0: |x| < 0x1p12.
632
633 The double-precision floating-point number closest to a multiple of
634 pi/2 in that interval is 0x1.6c6cbc45dc8dep6 (according to Maple
635 code from Muller, _Elementary Functions_), and it is about
636 0x1.6d61b58c99c43p-60 away from a multiple of pi/2.
637
638 n is at most 0x1p12 * InversePi, rounded to a double, so n < 2608.
639 Then:
640
641 (a) is at most ULP(2608 * +0x1.98A2E03707345p-78) = 0x1p-119.
642 (b) is zero.
643 (c) is at most 1/2 ULP(x20).
644 (d) is at most 2608 * 0x1.6fdb2p-131 < 0x1p-119.
645
646 The final result might be as low as about 0x1.6d6p-60. That has an
647 ULP of 0x1p-113, so (a) and (d) are small relative to it, and the
648 total error is nearly (c), 1/2 ULP(x20).
649
650 Case 1: 0x1p12 <= |x|.
651
652 We still have |x| <= 0x1.000013be57a3fp19. The double-precision
653 floating-point number closest to a multiple of pi/2 in that set is
654 0x1.6c6cbc45dc8dep11 (Muller again), and it is about
655 0x1.6d61b58c99c43p-55 away from a multiple of pi/2.
656
657 Then:
658
659 (a) is at most ULP(333773 * +0x1.98A2E03707345p-78) = 0x1p-111.
660 (b) is zero.
661 (c) is at most 1/2 ULP(x20).
662 (d) is at most 333773 * 0x1.6fdb2p-131 < 0x1.d45fp-113.
663
664 The final result might be as low as about 0x1.6d6p-55. That has an
665 ULP of 0x1p-108, so (a) and (d) are small relative to it, and the
666 total error is nearly (c), 1/2 ULP(x20).
667
668 Therefore, the total error is never much more than 1/2 ULP of the value
669 returned in *xp.
670 */
671}
672
673
674/* static double sinp(double r)
675
676 Return sine(r) using only a polynomial, no reduction.
677
678 Input:
679
680 r, with |r| < pi/4 + 3.24128e-11.
681
682 Output:
683
684 Approximately sine(r) is returned.
685*/
686static double sinp(double r)
687{
688 double rr = r * r;
689
690 /* Derived from Cephes Math Library Release 2.8: June, 2000. Maple's
691 infnorm routine says this polynomial is within .0688 ULP of
692 sine(r) for |r| < pi/4 + 3.24128e-11.
693 */
694 return r + r * rr * ((((((
695 + 1.58962301576546568060E-10) * rr
696 - 2.50507477628578072866E-8 ) * rr
697 + 2.75573136213857245213E-6 ) * rr
698 - 1.98412698295895385996E-4 ) * rr
699 + 8.33333333332211858878E-3 ) * rr
700 - 1.66666666666666307295E-1 );
701}
702
703
704/* static double cosp(double r)
705
706 Return cosine(r) using only a polynomial, no reduction.
707
708 Input:
709
710 r, with |r| < pi/4 + 3.24128e-11.
711
712 Output:
713
714 Approximately cosine(r) is returned.
715*/
716static double cosp(double r)
717{
718 double rr = r * r;
719
720 /* Derived from Cephes Math Library Release 2.8: June, 2000. Maple's
721 infnorm routine says this polynomial is within .00709 ULP of
722 cosine(r) for |r| < pi/4 + 3.24128e-11.
723 */
724 return (((((((
725 - 1.13585365213876817300E-11) * rr
726 + 2.08757008419747316778E-9 ) * rr
727 - 2.75573141792967388112E-7 ) * rr
728 + 2.48015872888517045348E-5 ) * rr
729 - 1.38888888888730564116E-3 ) * rr
730 + 4.16666666666665929218E-2 ) * rr
731 - .5 ) * rr
732 + 1;
733}
734
735
736/* double sin(double x).
737
738 Notes:
739
740 Citations in parentheses below indicate the source of a requirement.
741
742 "C" stands for ISO/IEC 9899:TC2.
743
744 The Open Group specification (IEEE Std 1003.1, 2004 edition) adds no
745 requirements since it defers to C and requires errno behavior only if
746 we choose to support it by arranging for "math_errhandling &
747 MATH_ERRNO" to be non-zero, which we do not.
748
749 Return value:
750
751 For +/- zero, return zero with same sign (C F.9 12 and F.9.1.6).
752
753 For +/- infinity, return a NaN (C F.9.1.6).
754
755 For a NaN, return the same NaN (C F.9 11 and 13).
756
757 Otherwise:
758
759 If the rounding mode is round-to-nearest, return sine(x) within a
760 few ULP. The maximum error of this routine is not precisely
761 known. The maximum error of the reduction might be around 3 ULP,
762 although this is partly a guess. The polynomials have small
763 errors. The polynomial evaluation might have an error under 1
764 ULP. So the worst error for this routine might be under 4 ULP.
765
766 Not currently implemented: In other rounding modes, return sine(x)
767 possibly with slightly worse error, not necessarily honoring the
768 rounding mode (Ali Sazegari narrowing C F.9 10).
769
770 All results are in [-1, 1].
771
772 Exceptions:
773
774 Raise underflow for a denormal result (C F.9 7 and Draft Standard for
775 Floating-Point Arithmetic P754 Draft 1.2.5 9.5). If the input is the
776 smallest normal, underflow may or may not be raised. This is stricter
777 than the older 754 standard.
778
779 May or may not raise inexact, even if the result is exact (C F.9 8).
780
781 Raise invalid if the input is a signalling NaN (C 5.2.4.2.2 3, in spite
782 of C 4.2.1) or an infinity (C F.9.1.6) but not if the input is a quiet
783 NaN (C F.9 11).
784
785 May not raise exceptions otherwise (C F.9 9).
786
787 Properties:
788
789 Desired to be monotonic. Not yet proven!
790*/
791double sin(double x)
792{
793 // Get |x| in form used by the bound-check operations for a key.
794 BoundKeyType xk = Key(x);
795
796 double r;
797 int j0;
798
799 /* Handle denormal numbers and zero here to generate underflow for
800 denormals numbers and to avoid generating inexact for zero.
801 */
802 if (Within(xk, BoundDenormal))
803 return x * (1 - 0x1p-53);
804
805 /* For small numbers, we can return x for sine(x). Unfortunately,
806 adding this check slows down the other cases, which may be more
807 frequent.
808 */
809 else if (Within(xk, BoundTrivialSin))
810 {
811 /* Make t0 volatile to force compiler to fetch it at run-time
812 rather than optimizing away the multiplication.
813 */
814 static volatile const double t0 = 1/3.;
815
816 /* Get t0 once. If we wrote "t0*t0", the compiler would load it
817 twice, since it is volatile.
818 */
819 const double t1 = t0;
820
821 /* Perform a multiplication and pass it into an assembly construct to
822 prevent the compiler from knowing we do not use the result and
823 optimizing away the multiplication.
824 */
825 __asm__("" : : "X" (t1*t1));
826
827 // Return the floating-point number nearest sine(x), which is x.
828 return x;
829 }
830
831 // If |x| is small, no reduction is necessary.
832 else if (Within(xk, BoundPolynomial))
833 r = x, j0 = 0;
834 /* The interval for this could be enlarged by examining the sine
835 polynomial and figuring out how big r can get before the error is
836 too large.
837 */
838
839 // If |x| is medium, we can use a fast reduction routine.
840 else if (Within(xk, BoundMedium))
841 ReduceMedium(&r, &j0, x);
842
843 // Otherwise, we need the full, slow reduction routine.
844 else if (Within(xk, BoundFull))
845 ReduceFull(&r, &j0, x);
846
847 else
848 return x-x;
849
850 switch (j0)
851 {
852 default:
853 case 0: return +sinp(r);
854 case 1: return +cosp(r);
855 case 2: return -sinp(r);
856 case 3: return -cosp(r);
857 }
858}
859
860
861/* double cos(double x).
862
863 Notes:
864
865 Citations in parentheses below indicate the source of a requirement.
866
867 "C" stands for ISO/IEC 9899:TC2.
868
869 The Open Group specification (IEEE Std 1003.1, 2004 edition) adds no
870 requirements since it defers to C and requires errno behavior only if
871 we choose to support it by arranging for "math_errhandling &
872 MATH_ERRNO" to be non-zero, which we do not.
873
874 Return value:
875
876 For +/- infinity, return a NaN (C F.9.1.6).
877
878 For a NaN, return the same NaN (C F.9 11 and 13).
879
880 Otherwise:
881
882 If the rounding mode is round-to-nearest, return cosine(x) within a
883 few ULP. The maximum error of this routine is not precisely
884 known. The maximum error of the reduction might be around 3 ULP,
885 although this is partly a guess. The polynomials have small
886 errors. The polynomial evaluation might have an error under 1
887 ULP. So the worst error for this routine might be under 4 ULP.
888
889 Not currently implemented: In other rounding modes, return
890 cosine(x) possibly with slightly worse error, not necessarily
891 honoring the rounding mode (Ali Sazegari narrowing C F.9 10).
892
893 All results are in [-1, 1].
894
895 Exceptions:
896
897 Raise underflow for a denormal result (C F.9 7 and Draft Standard for
898 Floating-Point Arithmetic P754 Draft 1.2.5 9.5). If the input is the
899 smallest normal, underflow may or may not be raised. This is stricter
900 than the older 754 standard.
901
902 May or may not raise inexact, even if the result is exact (C F.9 8).
903
904 Raise invalid if the input is a signalling NaN (C 5.2.4.2.2 3, in spite
905 of C 4.2.1) or an infinity (C F.9.1.5) but not if the input is a quiet
906 NaN (C F.9 11).
907
908 May not raise exceptions otherwise (C F.9 9).
909
910 Properties:
911
912 Desired to be monotonic. Not yet proven!
913*/
914double cos(double x)
915{
916 // Get |x| in form used by the bound-check operations for a key.
917 BoundKeyType xk = Key(x);
918
919 double r;
920 int j0;
921
922 // Avoid generating inexact for zero.
923 if (x == 0)
924 return 1;
925
926 /* For small numbers, we can return 1 for cosine(x). Unfortunately,
927 adding this check slows down the other cases, which may be more
928 frequent. It greatly speeds up cases where |x| < 0x1p-492, by
929 avoiding arithmetic with denormals.
930 */
931 else if (Within(xk, BoundTrivialCos))
932 {
933 /* Make t0 volatile to force compiler to fetch it at run-time
934 rather than optimizing away the multiplication.
935 */
936 static volatile const double t0 = 1/3.;
937
938 /* Get t0 once. If we wrote "t0*t0", the compiler would load it
939 twice, since it is volatile.
940 */
941 const double t1 = t0;
942
943 /* Perform a multiplication and pass it into an assembly construct to
944 prevent the compiler from knowing we do not use the result and
945 optimizing away the multiplication.
946 */
947 __asm__("" : : "X" (t1*t1));
948
949 // Return the floating-point number nearest cosine(x), which is 1.
950 return 1;
951 }
952
953 // If |x| is small, no reduction is necessary.
954 else if (Within(xk, BoundPolynomial))
955 r = x, j0 = 0;
956 /* The interval for this could be enlarged by examining the cosine
957 polynomial and figuring out how big r can get before the error is
958 too large.
959 */
960
961 // If |x| is medium, we can use a fast reduction routine.
962 else if (Within(xk, BoundMedium))
963 ReduceMedium(&r, &j0, x);
964
965 // Otherwise, we need the full, slow reduction routine.
966 else if (Within(xk, BoundFull))
967 ReduceFull(&r, &j0, x);
968
969 else
970 return x-x;
971
972 switch (j0)
973 {
974 default:
975 case 0: return +cosp(r);
976 case 1: return -sinp(r);
977 case 2: return -cosp(r);
978 case 3: return +sinp(r);
979 }
980}
981
982
983/* double tan(double x).
984
985 Notes:
986
987 Citations in parentheses below indicate the source of a requirement.
988
989 "C" stands for ISO/IEC 9899:TC2.
990
991 The Open Group specification (IEEE Std 1003.1, 2004 edition) adds no
992 requirements since it defers to C and requires errno behavior only if
993 we choose to support it by arranging for "math_errhandling &
994 MATH_ERRNO" to be non-zero, which we do not.
995
996 Return value:
997
998 For +/- zero, return zero with same sign (C F.9 12 and F.9.1.7).
999
1000 For +/- infinity, return a NaN (C F.9.1.7).
1001
1002 For a NaN, return the same NaN (C F.9 11 and 13).
1003
1004 Otherwise:
1005
1006 If the rounding mode is round-to-nearest, return tangent(x) within
1007 a few ULP. The maximum error of this routine is not precisely
1008 known. The maximum error of the reduction might be around 3 ULP,
1009 although this is partly a guess. The polynomials have small
1010 errors. The polynomial evaluation might have an error under 1
1011 ULP. How the final division affects error has not been considered
1012 yet. 4.55 ULP has been observed.
1013
1014 Not currently implemented: In other rounding modes, return
1015 tangent(x) possibly with slightly worse error, not necessarily
1016 honoring the rounding mode (Ali Sazegari narrowing C F.9 10).
1017
1018 Exceptions:
1019
1020 Raise underflow for a denormal result (C F.9 7 and Draft Standard for
1021 Floating-Point Arithmetic P754 Draft 1.2.5 9.5). If the input is the
1022 smallest normal, underflow may or may not be raised. This is stricter
1023 than the older 754 standard.
1024
1025 May or may not raise inexact, even if the result is exact (C F.9 8).
1026
1027 Raise invalid if the input is a signalling NaN (C 5.2.4.2.2 3, in spite
1028 of C 4.2.1) or an infinity (C F.9.1.7) but not if the input is a quiet
1029 NaN (C F.9 11).
1030
1031 May not raise exceptions otherwise (C F.9 9).
1032
1033 Properties:
1034
1035 Desired to be monotonic. Not yet proven!
1036*/
1037double tan(double x)
1038{
1039 // Get |x| in form used by the bound-check operations for a key.
1040 BoundKeyType xk = Key(x);
1041
1042 double r;
1043 int j0;
1044
1045 /* Handle denormal numbers and zero here to generate underflow for
1046 denormals numbers and to avoid generating inexact for zero.
1047 */
1048 if (Within(xk, BoundDenormal))
1049 return x * (1 - 0x1p-53);
1050
1051 /* For small numbers, we can return x for tangent(x). Unfortunately,
1052 adding this check slows down the other cases, which may be more
1053 frequent.
1054 */
1055 else if (Within(xk, BoundTrivialTan))
1056 {
1057 /* Make t0 volatile to force compiler to fetch it at run-time
1058 rather than optimizing away the multiplication.
1059 */
1060 static volatile const double t0 = 1/3.;
1061
1062 /* Get t0 once. If we wrote "t0*t0", the compiler would load it
1063 twice, since it is volatile.
1064 */
1065 const double t1 = t0;
1066
1067 /* Perform a multiplication and pass it into an assembly construct to
1068 prevent the compiler from knowing we do not use the result and
1069 optimizing it away.
1070 */
1071 __asm__("" : : "X" (t1*t1));
1072
1073 // Return the floating-point number nearest tangent(x), which is x.
1074 return x;
1075 }
1076
1077 // If |x| is small, no reduction is necessary.
1078 else if (Within(xk, BoundPolynomial))
1079 r = x, j0 = 0;
1080 /* The interval for this could be enlarged by examining the
1081 polynomials and figuring out how big r can get before the error is
1082 too large.
1083 */
1084
1085 // If |x| is medium, we can use a fast reduction routine.
1086 else if (Within(xk, BoundMedium))
1087 ReduceMedium(&r, &j0, x);
1088
1089 // Otherwise, we need the full, slow reduction routine.
1090 else if (Within(xk, BoundFull))
1091 ReduceFull(&r, &j0, x);
1092
1093 else
1094 return x-x;
1095
1096 switch (j0)
1097 {
1098 default:
1099 case 0: case 2: return + sinp(r) / cosp(r);
1100 case 1: case 3: return - cosp(r) / sinp(r);
1101 }
1102}