this repo has no description
at fixPythonPipStalling 1102 lines 36 kB view raw
1/* sincostan.c -- sin, cos, and tan for standard math library. 2 3 These are preliminary versions of sin, cos, and tan written to provide 4 argument reduction using the correct value of pi, rather than pi rounded to 5 66-bits as used by the Intel trigonometric instructions. Versions to 6 enhance accuracy and speed may follow. 7*/ 8 9 10#include <math.h> 11 12 13// Describe the destination floating-point type. 14#define SignificandBits 53 15#define ExponentBits 11 16 17 18/* Define a structure for accessing the internal components of a double. 19 20 s.exponent is the raw exponent field. 21 22 s.significand2 is the least two significant bits of the significand field. 23 24 The other fields are unused. 25*/ 26typedef union 27{ 28 double d; 29 struct 30 { 31 #if defined __BIG_ENDIAN__ 32 unsigned int sign : 1; 33 unsigned int exponent : ExponentBits; 34 unsigned int significand0 : SignificandBits-1-32; 35 unsigned int significand1 : 30; 36 unsigned int significand2 : 2; 37 #else // defined __BIG_ENDIAN__ 38 unsigned int significand2 : 2; 39 unsigned int significand1 : 30; 40 unsigned int significand0 : SignificandBits-1-32; 41 unsigned int exponent : ExponentBits; 42 unsigned int sign : 1; 43 #endif // defined __BIG_ENDIAN__ 44 } s; 45} Double; 46 47 48/* Each of the following numbers is a strict upper bound on some technique 49 for calculating sin, cos, or tan. That is, the technique is known to apply 50 when |x| < bound, where x is the function argument. (ReduceFull also 51 requires that \x| be greater than some amount around 0x1p-622, to avoid 52 underflow or loss of accuracy.) 53 54 If |x| < BoundDenormal, x is denormal or zero. 55 56 If |x| < BoundTrivialSin, x is the correctly rounded sine of x. 57 58 If |x| < BoundTrivialCos, 1 is the correctly rounded cosine of x. 59 60 If |x| < BoundTrivialTan, x is the correctly rounded tangent of x. 61 62 If |x| < BoundPolynomial, sinp(x) or cosp(x) may be used for sin(x) or 63 cos(x), with no argument reduction. 64 65 If |x| < BoundMedium, ReduceMedium may be used to reduce x. 66 67 If |x| < BoundFull, ReduceFull may be used to reduce x. 68 69 Otherwise, x is an infinity or a NaN. 70*/ 71#define BoundDenormal 0x1p-1022 72#define BoundTrivialSin 0x1.7137449123ef7p-26 73#define BoundTrivialCos 0x1.6A09E667F3BCDp-27 74#define BoundTrivialTan 0x1.250BFE1B082F5p-26 75#define BoundPolynomial 0x3.243f6a8885a31p-2 76#define BoundMedium 0x1.000013be57a40p19 77#define BoundFull INFINITY 78 79 80/* Below, we define methods to test whether a function argument, x, is within 81 bounds suitable for a techique for implementing the function. Several 82 methods are defined, using natural floating-point comparisons or using 83 integer comparisons on 32 or 64 bits of the floating-point representation. 84 Implementations may choose whichever method is best for performance. 85 86 To use, first prepare the key used for checking bounds: 87 88 BoundKeyType xk = Key(x); 89 90 That is done once at the start of a routine. Then, as many times as 91 desired, bounds are checked: 92 93 if (Within(xk, bound)) 94 ... 95 96 The "bound" must be one of the names defined above, such as BoundMedium. 97 98 If Within(xk, bound) evaluates to true, then |x| < bound. The converse is 99 not necessarily true; if Within(xk, bound) is false, |x| might be less than 100 bound. This is because some implementations of Within do not check all 101 bits of xk and bound. They are intended only as fast screens that get the 102 majority of values. However, if floor(log[2](|x|)) < floor(log[2](bound)), 103 Within(xk, bound) is guaranteed to be true -- its evaluation includes at 104 least all of the floating-point exponent. 105*/ 106#if defined __i386__ // (Add any other desired architectures.) 107 108 /* This method uses integer comparisons with the most significant 32 bits 109 of the IEEE-754 representation of the floating-point number. 110 */ 111 112 #include <inttypes.h> 113 114 typedef uint32_t BoundKeyType; 115 116 typedef union 117 { 118 double d; 119 #if defined __BIG_ENDIAN__ 120 struct { uint32_t key, notkey; } s; 121 #else 122 struct { uint32_t notkey, key; } s; 123 #endif 124 } DoubleForBound; 125 126 // Prepare constants containing the bounds in our desired form. 127 #define DefineBound(bound) \ 128 static const DoubleForBound Double##bound = { bound }; 129 DefineBound(BoundDenormal) 130 DefineBound(BoundTrivialSin) 131 DefineBound(BoundTrivialCos) 132 DefineBound(BoundTrivialTan) 133 DefineBound(BoundPolynomial) 134 DefineBound(BoundMedium) 135 DefineBound(BoundFull) 136 137 // Get the key for |x|. 138 static BoundKeyType Key(double x) 139 { 140 const DoubleForBound X = { x }; 141 BoundKeyType t = X.s.key; 142 return t & 0x7fffffff; 143 } 144 145 #define Within(xk, bound) ((xk) < Double##bound.s.key) 146 147#elif defined __x86_64__ // (Add any other desired architectures.) 148 149 /* This method uses integer comparisons with all 64 bits of the IEEE-754 150 representation of the floating-point number. 151 */ 152 153 #include <inttypes.h> 154 155 typedef uint64_t BoundKeyType; 156 157 typedef union 158 { 159 double d; 160 uint64_t key; 161 } DoubleForBound; 162 163 // Prepare constants containing the bounds in our desired form. 164 #define DefineBound(bound) \ 165 static const DoubleForBound Double##bound = { bound }; 166 DefineBound(BoundDenormal) 167 DefineBound(BoundTrivialSin) 168 DefineBound(BoundTrivialCos) 169 DefineBound(BoundTrivialTan) 170 DefineBound(BoundPolynomial) 171 DefineBound(BoundMedium) 172 DefineBound(BoundFull) 173 174 // Get the key for |x|. 175 static BoundKeyType Key(double x) 176 { 177 const DoubleForBound X = { x }; 178 BoundKeyType t = X.key; 179 return t & 0x7fffffffffffffff; 180 } 181 182 #define Within(xk, bound) ((xk) < Double##bound.key) 183 184#else 185 186 // If no other method is selected, use regular floating-point comparison. 187 188 typedef double BoundKeyType; 189 190 #define DefineBound(bound) 191 192 // Get the key for |x|. 193 static BoundKeyType Key(double x) 194 { 195 #if defined __i386__ || defined __x86_64__ 196 // On Intel, just clear the sign bit. 197 double xk = -0.; 198 __asm__("andnpd %[x], %[xk]" : [xk] "+x" (xk) : [x] "x" (x)); 199 return xk; 200 #elif 1 201 return x < 0 ? -x : +x; 202 #else 203 return fabs(x); 204 #endif 205 } 206 207 #define Within(xk, bound) ((xk) < bound) 208 209#endif 210 211 212/* Several argument-reduction routines follow. Each has this specification: 213 214 static void Reduce<Name>(double *xp, int *a, double x). 215 216 Input: 217 218 x is a number to be reduced modulo pi/2. Each routine has 219 qualifications on what values it supports for x. Generally, x must not 220 be a NaN or infinity. 221 222 Output: 223 224 *xp is set to a residue of x modulo pi/2. Mostly, -pi/4 <= x <= +pi/4, 225 but *xp may be outside this interval by as much as 3.24128e-11. 226 227 *a is set to the arc of the circle x is in. 0 <= *a < 4. 228 229 On output, x is approximately k * 2*pi + *a * pi/2 + *xp for some 230 integer k. This holds even if x is outside [-pi/4, pi/4]; *a matches 231 *xp. 232 233 This means all the reduction routines are returning *xp in radians. 234 ReduceFull goes through steps in which it has period-fraction units (0 to 1 235 corresponding to 0 to pi/2 radians), so the restoration of radians is 236 forced and introduces additional rounding error and execution time. We 237 might want to do something about that. 238 239 Since *xp is a single double, it is insufficient to represent the residue 240 precisely enough to compute a faithfully rounded function. For that, we 241 will have to return extended precision, perhaps a long double or perhaps 242 two doubles. 243*/ 244 245 246/* static void ReduceFull(double *xp, int *a, double x). 247 248 Input: 249 250 x is a number to be reduced modulo pi/2. This routine requires 1 <= 251 |x|. (The actual limit is something smaller, perhaps around 0x1p-622, 252 but 1 suffices.) 253 254 Output: 255 256 *xp is set to x modulo pi/2. 257 258 *a is set to the arc of the circle x is in. 0 <= *a < 4. 259 260 On output, x is approximately k * 2*pi + *a * pi/2 + *xp for some 261 integer k. 262 263 This routine was adapted from the _sin function in double_fns.h (which 264 implements vvsin). 265*/ 266static void ReduceFull(double *xp, int *a, double x) 267{ 268 // The rows are 8-bit shifts of 27-bit windows on 2/pi * 0x1p400. 269 static const double TwoOverPiWithOffset[4][45] = 270 { 271 { 272 +0x0.0000000p+0, +0x1.45f3070p+399, -0x1.1b1bbe8p+372, 273 -0x1.6b01ec8p+345, +0x1.5f47d50p+318, -0x1.6447e48p+291, 274 -0x1.3ad4ce0p+263, +0x1.e21c820p+237, +0x1.fe51640p+208, 275 -0x1.5085110p+182, +0x1.586dca0p+154, -0x1.c8e2df0p+129, 276 +0x1.374b800p+102, +0x1.924bbb0p+74, -0x1.f62e6e0p+48, 277 +0x1.cfe1df0p+20, -0x1.38d3b58p-6, -0x1.63045e0p-34, 278 +0x1.1afa980p-63, -0x1.44bb7b0p-88, -0x1.6638fe0p-116, 279 +0x1.ad17e00p-142, -0x1.bec66e0p-168, -0x1.4e33e58p-195, 280 +0x1.9cfa4e0p-223, +0x1.08bf178p-249, -0x1.036be40p-279, 281 +0x1.8ffc4c0p-303, -0x1.0fd3000p-339, -0x1.fc04340p-358, 282 -0x1.dce94c0p-385, +0x1.4da3ee0p-413, -0x1.64c0980p-439, 283 -0x1.b069ec8p-465, -0x1.1617380p-493, -0x1.32c3400p-521, 284 -0x1.5d28ae0p-548, +0x1.eeb1fb0p-574, -0x1.a0e8500p-604, 285 +0x1.e839cf8p-627, +0x1.e294a48p-654, +0x1.d4d7f68p-681, 286 +0x1.fb11f90p-708, -0x1.517bd50p-735, +0x1.9823800p-767 287 }, 288 { 289 +0x0.0000000p+0, +0x1.45f3000p+399, +0x1.b727240p+377, 290 -0x1.f56b020p+353, +0x1.3abe900p+325, -0x1.5964480p+299, 291 +0x1.b6c52b0p+271, +0x1.93c4390p+244, +0x1.07f9480p+214, 292 -0x1.38a8428p+191, -0x1.0ea7920p+162, -0x1.b7238c0p+135, 293 +0x1.09374b8p+110, +0x1.924c000p+74, -0x1.15f62e8p+56, 294 +0x1.21cfe20p+28, -0x1.0a71a70p+1, -0x1.acb1820p-25, 295 -0x1.77dca08p-52, -0x1.68a25d8p-79, -0x1.ec598e0p-106, 296 -0x1.fb29740p-133, -0x1.037d8d0p-161, +0x1.1d63980p-188, 297 +0x1.a99cfa0p-215, +0x1.3908bf0p-241, +0x1.77bf250p-269, 298 +0x1.d8ffc80p-299, -0x1.a000880p-322, +0x1.6603fc0p-350, 299 -0x1.a1dce90p-377, -0x1.2fac970p-403, -0x1.2593040p-433, 300 +0x1.9e4f960p-457, +0x1.36e9e90p-485, -0x1.c099620p-512, 301 +0x1.7fa8b60p-538, -0x1.5b08a70p-565, -0x1.41a0e80p-596, 302 -0x1.30be300p-622, -0x1.821d6b8p-646, +0x1.25d4d80p-673, 303 -0x1.2813b80p-702, -0x1.ca8be00p-730, +0x1.580cc10p-754 304 }, 305 { 306 +0x0.0000000p+0, +0x1.4600000p+399, -0x1.9f246c0p+386, 307 -0x1.bbead60p+360, -0x1.ec54100p+329, -0x1.c159648p+307, 308 +0x1.c0db628p+280, +0x1.5993c40p+252, +0x1.c820ff0p+225, 309 +0x1.458eaf0p+198, +0x1.ebbc560p+172, +0x1.b7246e0p+144, 310 +0x1.d2126f0p+117, -0x1.a3ff370p+91, +0x1.2eea0a0p+64, 311 -0x1.736f180p+37, -0x1.e214e40p+8, +0x1.62534e8p-17, 312 -0x1.177dc80p-48, -0x1.0568a28p-71, +0x1.1213a68p-98, 313 -0x1.c7eca60p-127, +0x1.7df9040p-154, +0x1.cc8eb20p-179, 314 -0x1.9f2b318p-206, -0x1.6c6f780p-237, +0x1.f8bbdf8p-260, 315 +0x1.283b200p-288, -0x1.da00080p-318, -0x1.fa67f00p-344, 316 -0x1.0d0ee80p-372, +0x1.6b414e0p-397, -0x1.7049650p-423, 317 +0x1.fb3c9f0p-450, +0x1.6136ea0p-477, -0x1.7381320p-505, 318 -0x1.8680578p-530, +0x1.aea4f78p-557, -0x1.38141a0p-584, 319 -0x1.d098600p-613, +0x1.ce7de28p-638, +0x1.4a4baa0p-666, 320 -0x1.404a050p-692, +0x1.1f8d5d0p-720, +0x1.0ac0680p-749 321 }, 322 { 323 +0x0.0000000p+0, +0x1.8000000p+399, -0x1.d067c90p+396, 324 -0x1.b1bbeb0p+368, +0x1.4fe13b0p+341, -0x1.05c1598p+315, 325 +0x1.bb81b70p+287, -0x1.d6a66c0p+260, -0x1.de37df0p+233, 326 -0x1.ae9c400p+200, -0x1.4214438p+180, -0x1.4f246e0p+153, 327 +0x1.b8e9090p+126, +0x1.ba5c010p+99, -0x1.b6d1160p+72, 328 +0x1.3a32440p+43, -0x1.80f10a0p+17, -0x1.c69dac8p-9, 329 -0x1.8c11780p-36, +0x1.1afa978p-63, -0x1.12edec8p-90, 330 +0x1.338e050p-117, -0x1.4ba0818p-144, -0x1.f633718p-171, 331 +0x1.8e60d50p-198, -0x1.8c16c70p-225, +0x1.17e2f00p-254, 332 -0x1.036be28p-279, +0x1.ff89800p-308, -0x1.0fd3400p-339, 333 +0x1.fde5e00p-365, +0x1.18b5a10p-388, -0x1.64b8248p-414, 334 -0x1.9302618p-441, -0x1.834f648p-468, -0x1.6173820p-497, 335 +0x1.9a797f8p-522, +0x1.45aea50p-549, -0x1.14e0500p-578, 336 -0x1.a0e84c0p-604, -0x1.7c63040p-631, -0x1.d6b5b40p-658, 337 -0x1.59404a0p-684, -0x1.3b81cc0p-714, +0x1.7421580p-738 338 } 339 }; 340 341 Double X = { x }; 342 343 /* Set ec to the unbiased exponent minus 33. Why 33? I do not know. 344 This was in the vvsin code in double_fns.h. 345 */ 346 int ec = X.s.exponent - (1023+33); 347 348 // Set k to ceiling(ec / 27) and m to residue. 349 int k = (ec + 26) * (607*4) >> 16; 350 int m = 27*k - ec; 351 352 // offset is used to select a row in the reduction table. See below. 353 int offset = m >> 3; 354 355 /* The reduction table, TwoOverPiWithOffset, contains bits of 2/pi. 356 First, all entries are scaled by 2**400 to avoid overflow/underflow 357 issues. Each entry contains 27 bits of 2/pi, except in the first two 358 columns. The first column contains zeroes because no reduction is done 359 for numbers that are already small. The entries in the second row 360 start before (at a bit with higher value) the leading bit of 2/pi, so 361 they contain some leading zeroes. The sign bits form part of the 27 362 bits represented. 363 364 Each row forms a contiguous string of bits of 2/pi. That is, adding 365 all the entries with sufficient precision yields a single bit string 366 representing 2/pi. The rows differ in their starting points; row 0 367 begins with the leading bit of 2/pi, and rows 1, 2, and 3 begin 8, 16, 368 and 24 bits before that. 369 */ 370 371 /* Scale x to avoid overflow in Dekker split. This is compensated for 372 in the entries in TwoOverPiWithOffset. 373 */ 374 x *= 0x1p-400; 375 376 /* Use Dekker's algorithm to split x into 26 bits and 27 bits. This 377 requires round-to-nearest mode. 378 */ 379 double xDekker = x * (0x1p27 + 1); 380 double x0 = xDekker - (xDekker - x); 381 double x1 = x - x0; 382 383 // Get address of starting point in table. 384 const double *p0 = &TwoOverPiWithOffset[offset][k]; 385 386 // Get table entries. 387 const double fp0 = p0[0]; 388 const double fp1 = p0[1]; 389 const double fp2 = p0[2]; 390 const double fp3 = p0[3]; 391 392 // Get high bits of x * f, where f is the part of 2/pi we are using. 393 const double f0 = x1 * fp0 + fp1 * x0; 394 395 double f = x1 * fp1 + fp2 * x0; 396 397 // Combine to do integer work. 398 const double fi = f0 + f; 399 400 static const double IntegerBias = 0x1.8p52; 401 402 // Force the integer bits into a specific position. 403 Double Fi = { fi + IntegerBias }; 404 /* |fi| is less than 0x1p36, so fi + IntegerBias is well within 405 [0x1p52, 0x1p53), so it has a known exponent, and the bits with 406 weight 2 and 1 are the least significant bits in its significand. 407 408 We know |fi| is less than 0x1p36 because it is the sum of x1 * fp0, 409 fp1 * x0, and something small in f. x0 has the same exponent as 410 x. Say 2**e <= |x| < 2**(e+1). (That is the original x, before we 411 scaled it by 0x1p-400.) The table entry we look up for fp0 has 412 magnitude less than 2**(400+27-27*ceiling((e-33)/27)), and fp1 is 413 less, bounded by 2**(400-27*ceiling((e-33)/27)). Including the 414 scaling in x, fp1 * x0 is less than 2**(e+1-400) * 415 2**(400-27*ceiling((e-33)/27)) <= 2**34. Similarly, fp0 * x1 is 416 less than 2**34, so their sum is less than 2**35. 417 */ 418 419 // Get the two least significant integer bits. 420 *a = Fi.s.significand2; 421 422 double fint = Fi.d - IntegerBias; 423 424 const double fp4 = p0[4]; 425 const double fp5 = p0[5]; 426 const double fp6 = p0[6]; 427 428 f = f0 - fint + f; 429 f += x1 * fp2 + fp3 * x0; 430 f += x1 * fp3 + fp4 * x0; 431 f += x1 * fp4 + fp5 * x0; 432 f += x1 * fp5 + fp6 * x0; 433 434 // Convert to radians by multiplying by pi/2. 435 *xp = f * 0x3.243F6A8885A3p-1; 436} 437 438 439/* static void ReduceMedium(double *xp, int *a, double x). 440 441 Input: 442 443 x is a number to be reduced modulo pi/2. This routine requires |x| <= 444 X. X is described below. 445 446 Output: 447 448 *xp is set to x modulo pi/2. 449 450 *a is set to the arc of the circle x is in. 0 <= *a < 4. 451 452 On output, x is approximately k * 2*pi + *a * pi/2 + *xp for some 453 integer k. 454 455 Nomenclature: 456 457 p is the period, pi/2. 458 459 X is the maximum value x may have. X is 0x1.000013be57a3fp19. 460 461 InversePeriod is 1/p, rounded to the nearest double, ties to even. 462 463 n is x * InversePeriod, rounded to the nearest double, ties to even, 464 and then rounded to the nearest integer, ties to even. 465 466 N is the maximum value n may have. N is X * InversePeriod, rounded to 467 the nearest double, ties to even, and then rounded to the nearest 468 integer, ties to even. N is 333773. 469 470 Notes: 471 472 In comments in this routine, unquoted expressions are mathematical and 473 quoted expressions are floating-point. Thus, "a + b" refers to the 474 floating-point operation of addition, including rounding. 475 476 "n * Period[0]" is exact for all |n| <= N. It is inexact for n = N+1, 477 so this is the source of the bound on x. X is the greatest value for 478 which |n| <= N. (It is happenstance that Period[0] limits x; if the 479 period were different, its bits might result in Period[1] causing the 480 limit. n has some other roles in the errors in this routine that might 481 limit x when used with other periods or different precisions.) 482 483 The difference between InversePeriod and 1/p can cause n to differ from 484 the ideal value when x is near a multiple of p. The result is that 485 instead of producing a result inside the target interval [-p/2, p/2], a 486 result slightly outside the interval is produced. The result may be as 487 much as 3.24128e-11 outside the interval. The polynomial and the 488 polynomial evaluation must be satisfactory over this extended interval. 489*/ 490static void ReduceMedium(double *xp, int *a, double x) 491{ 492 static const double InversePeriod = 493 2 / 0x3.243f6a8885a308d313198a2e03707344ap0; 494 495 /* Period is an extended-precision representation of the reduction period. 496 Each element except the last has the property that multiplication by 497 any integer with magnitude at most N is exact. 498 */ 499 static const double Period[] = { 500 +0x1.921FB54440000p-0, 501 +0x1.68C234C4C0000p-39, 502 +0x1.98A2E03707345p-77 503 }; 504 505 // Estimate x / p and round to nearest integer. 506 double n = x * InversePeriod + 0x1.8p52 - 0x1.8p52; 507 508 // Record which arc of the circle x lies in. 509 *a = (int) n & 3; 510 511 // This is exact, per design of Period. 512 double np0 = n * Period[0]; 513 514 // This is exact, per design of Period. 515 double np1 = n * Period[1]; 516 517 /* The error here is at most 1/2 ULP(n * Period[2]), which is 518 1/2 ULP(n * +0x1.98A2E03707345p-77) = ULP(n * +0x1.98A2E03707345p-78). 519 520 n <= N = 333773, so an absolute bound on that is ULP(333773 * 521 +0x1.98A2E03707345p-78) = 0x1p-111, but we need a bound dependent on n 522 to partition a proof below. 523 524 This is called error (a). 525 */ 526 double np2 = n * Period[2]; 527 528 /* Set x00 to x - n * Period[0]. np0 is very near x, so this is exact, 529 per David Goldberg, _What Every Computer Scientist Should Know About 530 Floating-Point Arithmetic_, Theorem 11. 531 532 The nomenclature used here is: 533 534 x0 is x - n * (Period[0]), 535 x1 is x - n * (Period[0] + Period[1]), and 536 x2 is x - n * (Period[0] + Period[1] + Period[2]). 537 538 x00 is the first and only "word" of an extended precision 539 representation of x0. 540 541 x10 is the first "word" of x1; it has the most significant bits. 542 x11 is the next "word" of x1; it has the following bits. 543 */ 544 double x00 = x - np0; 545 546 /* Subtract n * Period[1] from x0 to produce x1. These operations yield 547 an exact result, per Knuth, _The Art of Computer Programming_ 2, second 548 edition, page 221, section 4.2.2, Theorem C, in the sense that x10 + 549 x11 will exactly equal x0 - n * Period[1]. The first statement puts 550 the high bits in x10, and the second statement determines what was 551 rounded away in x10 and puts it in x11. 552 553 Knuth assumes |np1| <= |x00|. This is often the case, but perhaps 554 cancellation has reduced the magnitude of x00. Hoever, if |x00| < 555 |np1|, this arithmetic is exact, because x00's least significant bit is 556 at least as large as the ULP of Period[1]. 557 */ 558 double x10 = x00 - np1; 559 double x11 = x00 - x10 - np1; 560 561 /* This comment demonstrates the rounding error in the following 562 statement, "t = x11 - np2", is tiny relative to x10. 563 564 This is called error (b). 565 566 Let LSB(x) be the weight of the least significant bit set in a 567 floating-point number x. E.g, LSB(1.25) is 1/4, although ULP(1.25) is 568 2**-52. 569 570 Here, x10 and x11 are each multiples of LSB(Period[1]). If |x10| < 571 2**53 LSB(Period[1]), then x11 is zero, because all the bits of x10 + 572 x11 fit into x10. In that case, "x11 - np2" is exact. 573 574 Otherwise, 2**53 LSB(Period[1]) <= |x10|, so 2 LSB(Period[1]) <= 575 ULP(x10). LSB(Period[1]) = 0x1p-73, so 0x1p-72 <= ULP(x10). 576 577 By construction of x10 and x11, |x11| <= 1/2 ULP(x10). 578 579 |np2| <= |"n * Period[2]"| <= |n * Period[2] * (1+2**-53)| = 333733 * 580 0x1.98A2E03707345p-77 * (1+2**-53) < 0x1.0426p-58. 581 582 |"x11 - np2"| <= 2*max(|x11|, |np2|) <= 2*max(1/2 ULP(x10), 583 0x1.0426p-58) = max(ULP(x10), 0x1.0426p-57). The rounding error in 584 "x11 - np2" is at most 2**-53 times this, so it is at most max(2**-53 585 ULP(x10), 0x1p-110). Since ULP(x10) is at least 0x1p-72, the rounding 586 error is at most 0x1p-38 ULP(x10). 587 */ 588 double t = x11 - np2; 589 590 /* Add t to x1 to produce x2. As before, we could use Knuth's technique 591 to produce an exact sum, x20 + x21 = x10 + t. For now, we only return 592 x20. When we want extended precision, we could return x21 as well. 593 (If so, be careful to show that |t| <= |x10|, as required for Knuth's 594 technique, or otherwise show the arithmetic is sufficiently accurate.) 595 596 The difference between x20 and x10 + t is called error (c). It is at 597 most 1/2 ULP(x20). 598 */ 599 double x20 = x10 + t; 600 // double x21 = x10 - x20 + t; 601 602 *xp = x20; 603 604 /* Having subtracted n * (Period[0] + Period[1] + Period[2]) from x, we 605 have an error caused by the difference between p and Period[0] + 606 Period[1] + Period[2], which is less than 0x1.6fdb2p-131. So the error 607 is less than n * 0x1.6fdb2p-131. 608 609 n <= N = 333773, so an absolute bound on that is 333773 * 610 0x1.6fdb2p-131 < 0x1.d45fp-113, but we need a bound dependent on n to 611 partition a proof below. 612 613 This is called error (d). 614 615 Our reduction has four errors: 616 617 (a), which is at most ULP(n * +0x1.98A2E03707345p-78) <= 0x1p-111. 618 (b), which is at most 0x1p-38 ULP(x10). 619 (c), which is at most 1/2 ULP(x20). 620 (d), which is at most n * 0x1.6fdb2p-131 < 0x1.d45fp-113. 621 622 From the discussion of (b), if x11 is not zero, then 0x1p-72 <= 623 ULP(x10). Then x10 dominates the result, because np2 is so small that 624 subtracting it from x1 to produce x2 cannot reduce the exponent by more 625 than one. So the errors (a), (b), and (d) are tiny relative to x10, 626 and the total error is very nearly (c), 1/2 ULP(x20). 627 628 However, if x11 is zero, then (b) is zero, and we only need to consider 629 (a), (c), and (d). We partition this into two cases. 630 631 Case 0: |x| < 0x1p12. 632 633 The double-precision floating-point number closest to a multiple of 634 pi/2 in that interval is 0x1.6c6cbc45dc8dep6 (according to Maple 635 code from Muller, _Elementary Functions_), and it is about 636 0x1.6d61b58c99c43p-60 away from a multiple of pi/2. 637 638 n is at most 0x1p12 * InversePi, rounded to a double, so n < 2608. 639 Then: 640 641 (a) is at most ULP(2608 * +0x1.98A2E03707345p-78) = 0x1p-119. 642 (b) is zero. 643 (c) is at most 1/2 ULP(x20). 644 (d) is at most 2608 * 0x1.6fdb2p-131 < 0x1p-119. 645 646 The final result might be as low as about 0x1.6d6p-60. That has an 647 ULP of 0x1p-113, so (a) and (d) are small relative to it, and the 648 total error is nearly (c), 1/2 ULP(x20). 649 650 Case 1: 0x1p12 <= |x|. 651 652 We still have |x| <= 0x1.000013be57a3fp19. The double-precision 653 floating-point number closest to a multiple of pi/2 in that set is 654 0x1.6c6cbc45dc8dep11 (Muller again), and it is about 655 0x1.6d61b58c99c43p-55 away from a multiple of pi/2. 656 657 Then: 658 659 (a) is at most ULP(333773 * +0x1.98A2E03707345p-78) = 0x1p-111. 660 (b) is zero. 661 (c) is at most 1/2 ULP(x20). 662 (d) is at most 333773 * 0x1.6fdb2p-131 < 0x1.d45fp-113. 663 664 The final result might be as low as about 0x1.6d6p-55. That has an 665 ULP of 0x1p-108, so (a) and (d) are small relative to it, and the 666 total error is nearly (c), 1/2 ULP(x20). 667 668 Therefore, the total error is never much more than 1/2 ULP of the value 669 returned in *xp. 670 */ 671} 672 673 674/* static double sinp(double r) 675 676 Return sine(r) using only a polynomial, no reduction. 677 678 Input: 679 680 r, with |r| < pi/4 + 3.24128e-11. 681 682 Output: 683 684 Approximately sine(r) is returned. 685*/ 686static double sinp(double r) 687{ 688 double rr = r * r; 689 690 /* Derived from Cephes Math Library Release 2.8: June, 2000. Maple's 691 infnorm routine says this polynomial is within .0688 ULP of 692 sine(r) for |r| < pi/4 + 3.24128e-11. 693 */ 694 return r + r * rr * (((((( 695 + 1.58962301576546568060E-10) * rr 696 - 2.50507477628578072866E-8 ) * rr 697 + 2.75573136213857245213E-6 ) * rr 698 - 1.98412698295895385996E-4 ) * rr 699 + 8.33333333332211858878E-3 ) * rr 700 - 1.66666666666666307295E-1 ); 701} 702 703 704/* static double cosp(double r) 705 706 Return cosine(r) using only a polynomial, no reduction. 707 708 Input: 709 710 r, with |r| < pi/4 + 3.24128e-11. 711 712 Output: 713 714 Approximately cosine(r) is returned. 715*/ 716static double cosp(double r) 717{ 718 double rr = r * r; 719 720 /* Derived from Cephes Math Library Release 2.8: June, 2000. Maple's 721 infnorm routine says this polynomial is within .00709 ULP of 722 cosine(r) for |r| < pi/4 + 3.24128e-11. 723 */ 724 return ((((((( 725 - 1.13585365213876817300E-11) * rr 726 + 2.08757008419747316778E-9 ) * rr 727 - 2.75573141792967388112E-7 ) * rr 728 + 2.48015872888517045348E-5 ) * rr 729 - 1.38888888888730564116E-3 ) * rr 730 + 4.16666666666665929218E-2 ) * rr 731 - .5 ) * rr 732 + 1; 733} 734 735 736/* double sin(double x). 737 738 Notes: 739 740 Citations in parentheses below indicate the source of a requirement. 741 742 "C" stands for ISO/IEC 9899:TC2. 743 744 The Open Group specification (IEEE Std 1003.1, 2004 edition) adds no 745 requirements since it defers to C and requires errno behavior only if 746 we choose to support it by arranging for "math_errhandling & 747 MATH_ERRNO" to be non-zero, which we do not. 748 749 Return value: 750 751 For +/- zero, return zero with same sign (C F.9 12 and F.9.1.6). 752 753 For +/- infinity, return a NaN (C F.9.1.6). 754 755 For a NaN, return the same NaN (C F.9 11 and 13). 756 757 Otherwise: 758 759 If the rounding mode is round-to-nearest, return sine(x) within a 760 few ULP. The maximum error of this routine is not precisely 761 known. The maximum error of the reduction might be around 3 ULP, 762 although this is partly a guess. The polynomials have small 763 errors. The polynomial evaluation might have an error under 1 764 ULP. So the worst error for this routine might be under 4 ULP. 765 766 Not currently implemented: In other rounding modes, return sine(x) 767 possibly with slightly worse error, not necessarily honoring the 768 rounding mode (Ali Sazegari narrowing C F.9 10). 769 770 All results are in [-1, 1]. 771 772 Exceptions: 773 774 Raise underflow for a denormal result (C F.9 7 and Draft Standard for 775 Floating-Point Arithmetic P754 Draft 1.2.5 9.5). If the input is the 776 smallest normal, underflow may or may not be raised. This is stricter 777 than the older 754 standard. 778 779 May or may not raise inexact, even if the result is exact (C F.9 8). 780 781 Raise invalid if the input is a signalling NaN (C 5.2.4.2.2 3, in spite 782 of C 4.2.1) or an infinity (C F.9.1.6) but not if the input is a quiet 783 NaN (C F.9 11). 784 785 May not raise exceptions otherwise (C F.9 9). 786 787 Properties: 788 789 Desired to be monotonic. Not yet proven! 790*/ 791double sin(double x) 792{ 793 // Get |x| in form used by the bound-check operations for a key. 794 BoundKeyType xk = Key(x); 795 796 double r; 797 int j0; 798 799 /* Handle denormal numbers and zero here to generate underflow for 800 denormals numbers and to avoid generating inexact for zero. 801 */ 802 if (Within(xk, BoundDenormal)) 803 return x * (1 - 0x1p-53); 804 805 /* For small numbers, we can return x for sine(x). Unfortunately, 806 adding this check slows down the other cases, which may be more 807 frequent. 808 */ 809 else if (Within(xk, BoundTrivialSin)) 810 { 811 /* Make t0 volatile to force compiler to fetch it at run-time 812 rather than optimizing away the multiplication. 813 */ 814 static volatile const double t0 = 1/3.; 815 816 /* Get t0 once. If we wrote "t0*t0", the compiler would load it 817 twice, since it is volatile. 818 */ 819 const double t1 = t0; 820 821 /* Perform a multiplication and pass it into an assembly construct to 822 prevent the compiler from knowing we do not use the result and 823 optimizing away the multiplication. 824 */ 825 __asm__("" : : "X" (t1*t1)); 826 827 // Return the floating-point number nearest sine(x), which is x. 828 return x; 829 } 830 831 // If |x| is small, no reduction is necessary. 832 else if (Within(xk, BoundPolynomial)) 833 r = x, j0 = 0; 834 /* The interval for this could be enlarged by examining the sine 835 polynomial and figuring out how big r can get before the error is 836 too large. 837 */ 838 839 // If |x| is medium, we can use a fast reduction routine. 840 else if (Within(xk, BoundMedium)) 841 ReduceMedium(&r, &j0, x); 842 843 // Otherwise, we need the full, slow reduction routine. 844 else if (Within(xk, BoundFull)) 845 ReduceFull(&r, &j0, x); 846 847 else 848 return x-x; 849 850 switch (j0) 851 { 852 default: 853 case 0: return +sinp(r); 854 case 1: return +cosp(r); 855 case 2: return -sinp(r); 856 case 3: return -cosp(r); 857 } 858} 859 860 861/* double cos(double x). 862 863 Notes: 864 865 Citations in parentheses below indicate the source of a requirement. 866 867 "C" stands for ISO/IEC 9899:TC2. 868 869 The Open Group specification (IEEE Std 1003.1, 2004 edition) adds no 870 requirements since it defers to C and requires errno behavior only if 871 we choose to support it by arranging for "math_errhandling & 872 MATH_ERRNO" to be non-zero, which we do not. 873 874 Return value: 875 876 For +/- infinity, return a NaN (C F.9.1.6). 877 878 For a NaN, return the same NaN (C F.9 11 and 13). 879 880 Otherwise: 881 882 If the rounding mode is round-to-nearest, return cosine(x) within a 883 few ULP. The maximum error of this routine is not precisely 884 known. The maximum error of the reduction might be around 3 ULP, 885 although this is partly a guess. The polynomials have small 886 errors. The polynomial evaluation might have an error under 1 887 ULP. So the worst error for this routine might be under 4 ULP. 888 889 Not currently implemented: In other rounding modes, return 890 cosine(x) possibly with slightly worse error, not necessarily 891 honoring the rounding mode (Ali Sazegari narrowing C F.9 10). 892 893 All results are in [-1, 1]. 894 895 Exceptions: 896 897 Raise underflow for a denormal result (C F.9 7 and Draft Standard for 898 Floating-Point Arithmetic P754 Draft 1.2.5 9.5). If the input is the 899 smallest normal, underflow may or may not be raised. This is stricter 900 than the older 754 standard. 901 902 May or may not raise inexact, even if the result is exact (C F.9 8). 903 904 Raise invalid if the input is a signalling NaN (C 5.2.4.2.2 3, in spite 905 of C 4.2.1) or an infinity (C F.9.1.5) but not if the input is a quiet 906 NaN (C F.9 11). 907 908 May not raise exceptions otherwise (C F.9 9). 909 910 Properties: 911 912 Desired to be monotonic. Not yet proven! 913*/ 914double cos(double x) 915{ 916 // Get |x| in form used by the bound-check operations for a key. 917 BoundKeyType xk = Key(x); 918 919 double r; 920 int j0; 921 922 // Avoid generating inexact for zero. 923 if (x == 0) 924 return 1; 925 926 /* For small numbers, we can return 1 for cosine(x). Unfortunately, 927 adding this check slows down the other cases, which may be more 928 frequent. It greatly speeds up cases where |x| < 0x1p-492, by 929 avoiding arithmetic with denormals. 930 */ 931 else if (Within(xk, BoundTrivialCos)) 932 { 933 /* Make t0 volatile to force compiler to fetch it at run-time 934 rather than optimizing away the multiplication. 935 */ 936 static volatile const double t0 = 1/3.; 937 938 /* Get t0 once. If we wrote "t0*t0", the compiler would load it 939 twice, since it is volatile. 940 */ 941 const double t1 = t0; 942 943 /* Perform a multiplication and pass it into an assembly construct to 944 prevent the compiler from knowing we do not use the result and 945 optimizing away the multiplication. 946 */ 947 __asm__("" : : "X" (t1*t1)); 948 949 // Return the floating-point number nearest cosine(x), which is 1. 950 return 1; 951 } 952 953 // If |x| is small, no reduction is necessary. 954 else if (Within(xk, BoundPolynomial)) 955 r = x, j0 = 0; 956 /* The interval for this could be enlarged by examining the cosine 957 polynomial and figuring out how big r can get before the error is 958 too large. 959 */ 960 961 // If |x| is medium, we can use a fast reduction routine. 962 else if (Within(xk, BoundMedium)) 963 ReduceMedium(&r, &j0, x); 964 965 // Otherwise, we need the full, slow reduction routine. 966 else if (Within(xk, BoundFull)) 967 ReduceFull(&r, &j0, x); 968 969 else 970 return x-x; 971 972 switch (j0) 973 { 974 default: 975 case 0: return +cosp(r); 976 case 1: return -sinp(r); 977 case 2: return -cosp(r); 978 case 3: return +sinp(r); 979 } 980} 981 982 983/* double tan(double x). 984 985 Notes: 986 987 Citations in parentheses below indicate the source of a requirement. 988 989 "C" stands for ISO/IEC 9899:TC2. 990 991 The Open Group specification (IEEE Std 1003.1, 2004 edition) adds no 992 requirements since it defers to C and requires errno behavior only if 993 we choose to support it by arranging for "math_errhandling & 994 MATH_ERRNO" to be non-zero, which we do not. 995 996 Return value: 997 998 For +/- zero, return zero with same sign (C F.9 12 and F.9.1.7). 999 1000 For +/- infinity, return a NaN (C F.9.1.7). 1001 1002 For a NaN, return the same NaN (C F.9 11 and 13). 1003 1004 Otherwise: 1005 1006 If the rounding mode is round-to-nearest, return tangent(x) within 1007 a few ULP. The maximum error of this routine is not precisely 1008 known. The maximum error of the reduction might be around 3 ULP, 1009 although this is partly a guess. The polynomials have small 1010 errors. The polynomial evaluation might have an error under 1 1011 ULP. How the final division affects error has not been considered 1012 yet. 4.55 ULP has been observed. 1013 1014 Not currently implemented: In other rounding modes, return 1015 tangent(x) possibly with slightly worse error, not necessarily 1016 honoring the rounding mode (Ali Sazegari narrowing C F.9 10). 1017 1018 Exceptions: 1019 1020 Raise underflow for a denormal result (C F.9 7 and Draft Standard for 1021 Floating-Point Arithmetic P754 Draft 1.2.5 9.5). If the input is the 1022 smallest normal, underflow may or may not be raised. This is stricter 1023 than the older 754 standard. 1024 1025 May or may not raise inexact, even if the result is exact (C F.9 8). 1026 1027 Raise invalid if the input is a signalling NaN (C 5.2.4.2.2 3, in spite 1028 of C 4.2.1) or an infinity (C F.9.1.7) but not if the input is a quiet 1029 NaN (C F.9 11). 1030 1031 May not raise exceptions otherwise (C F.9 9). 1032 1033 Properties: 1034 1035 Desired to be monotonic. Not yet proven! 1036*/ 1037double tan(double x) 1038{ 1039 // Get |x| in form used by the bound-check operations for a key. 1040 BoundKeyType xk = Key(x); 1041 1042 double r; 1043 int j0; 1044 1045 /* Handle denormal numbers and zero here to generate underflow for 1046 denormals numbers and to avoid generating inexact for zero. 1047 */ 1048 if (Within(xk, BoundDenormal)) 1049 return x * (1 - 0x1p-53); 1050 1051 /* For small numbers, we can return x for tangent(x). Unfortunately, 1052 adding this check slows down the other cases, which may be more 1053 frequent. 1054 */ 1055 else if (Within(xk, BoundTrivialTan)) 1056 { 1057 /* Make t0 volatile to force compiler to fetch it at run-time 1058 rather than optimizing away the multiplication. 1059 */ 1060 static volatile const double t0 = 1/3.; 1061 1062 /* Get t0 once. If we wrote "t0*t0", the compiler would load it 1063 twice, since it is volatile. 1064 */ 1065 const double t1 = t0; 1066 1067 /* Perform a multiplication and pass it into an assembly construct to 1068 prevent the compiler from knowing we do not use the result and 1069 optimizing it away. 1070 */ 1071 __asm__("" : : "X" (t1*t1)); 1072 1073 // Return the floating-point number nearest tangent(x), which is x. 1074 return x; 1075 } 1076 1077 // If |x| is small, no reduction is necessary. 1078 else if (Within(xk, BoundPolynomial)) 1079 r = x, j0 = 0; 1080 /* The interval for this could be enlarged by examining the 1081 polynomials and figuring out how big r can get before the error is 1082 too large. 1083 */ 1084 1085 // If |x| is medium, we can use a fast reduction routine. 1086 else if (Within(xk, BoundMedium)) 1087 ReduceMedium(&r, &j0, x); 1088 1089 // Otherwise, we need the full, slow reduction routine. 1090 else if (Within(xk, BoundFull)) 1091 ReduceFull(&r, &j0, x); 1092 1093 else 1094 return x-x; 1095 1096 switch (j0) 1097 { 1098 default: 1099 case 0: case 2: return + sinp(r) / cosp(r); 1100 case 1: case 3: return - cosp(r) / sinp(r); 1101 } 1102}