this repo has no description
at fixPythonPipStalling 843 lines 42 kB view raw
1/* 2 * atanhf.s 3 * 4 * by Stephen Canon 5 * 6 * Copyright (c) 2007, Apple Inc. All rights reserved 7 * 8 * Single precision implementation of C99 math library function atanhf for the OS X i386 and x86_64 abis. 9 * 10 */ 11 12#include <machine/asm.h> 13#include "abi.h" 14 15.const 16.align 4 17// Polynomial coefficients used for the 7/8 < |x| < 1 case. These are addressed by offset from big_table, 18// so take care if you move things around. 19 20 .quad 0x3fc00400e807c4f5, 0xbf82304e7c64d56d // c4 21 .quad 0xbff5533310026652, 0x4002a85c54ffbc1c // c3/c4 22 .quad 0x3fcfffffaa90af21, 0xbfb0005bce0fa090 // c2 23 .quad 0xc0000000244fd260, 0x400fff47b17904a4 // c1/c2 24 25/********************************************************* 26void gen_big_table() { 27 union { double d; uint64_t x; } constant; 28 29 int i,p,q; 30 for (i=0; i<256; ++i) { 31 double a = (double)i / 256.0; 32 constant.d = -0.5*log1p(a); printf("\t\t\t\t.quad 0x%016llx,", constant.x); 33 constant.d = 1.0 / (1.0 + a); printf(" 0x%016llx", constant.x); 34 if (!i) { 35 printf("\t// a = 0\n"); 36 } else { 37 p = i; q = 256; 38 while (!(p % 2)) { 39 p = p/2; q = q/2; 40 } 41 printf("\t// a = %d/%d\n", p, q); 42 } 43 } 44} 45*********************************************************/ 46 47 // -0.5*log1p(a) 1/(1+a) 48big_table: .quad 0x8000000000000000, 0x3ff0000000000000 // a = 0 49 .quad 0xbf5ff00aa2b10bc0, 0x3fefe01fe01fe020 // a = 1/256 50 .quad 0xbf6fe02a6b106789, 0x3fefc07f01fc07f0 // a = 1/128 51 .quad 0xbf77dc475f810a77, 0x3fefa11caa01fa12 // a = 3/256 52 .quad 0xbf7fc0a8b0fc03e4, 0x3fef81f81f81f820 // a = 1/64 53 .quad 0xbf83cea44346a575, 0x3fef6310aca0dbb5 // a = 5/256 54 .quad 0xbf87b91b07d5b11b, 0x3fef44659e4a4271 // a = 3/128 55 .quad 0xbf8b9fc027af9198, 0x3fef25f644230ab5 // a = 7/256 56 .quad 0xbf8f829b0e783300, 0x3fef07c1f07c1f08 // a = 1/32 57 .quad 0xbf91b0d98923d980, 0x3feee9c7f8458e02 // a = 9/256 58 .quad 0xbf939e87b9febd60, 0x3feecc07b301ecc0 // a = 5/128 59 .quad 0xbf958a5bafc8e4d5, 0x3feeae807aba01eb // a = 11/256 60 .quad 0xbf977458f632dcfc, 0x3fee9131abf0b767 // a = 3/64 61 .quad 0xbf995c830ec8e3eb, 0x3fee741aa59750e4 // a = 13/256 62 .quad 0xbf9b42dd711971bf, 0x3fee573ac901e574 // a = 7/128 63 .quad 0xbf9d276b8adb0b52, 0x3fee3a9179dc1a73 // a = 15/256 64 .quad 0xbf9f0a30c01162a6, 0x3fee1e1e1e1e1e1e // a = 1/16 65 .quad 0xbfa075983598e471, 0x3fee01e01e01e01e // a = 17/256 66 .quad 0xbfa16536eea37ae1, 0x3fede5d6e3f8868a // a = 9/128 67 .quad 0xbfa253f62f0a1417, 0x3fedca01dca01dca // a = 19/256 68 .quad 0xbfa341d7961bd1d1, 0x3fedae6076b981db // a = 5/64 69 .quad 0xbfa42edcbea646f0, 0x3fed92f2231e7f8a // a = 21/256 70 .quad 0xbfa51b073f06183f, 0x3fed77b654b82c34 // a = 11/128 71 .quad 0xbfa60658a93750c4, 0x3fed5cac807572b2 // a = 23/256 72 .quad 0xbfa6f0d28ae56b4c, 0x3fed41d41d41d41d // a = 3/32 73 .quad 0xbfa7da766d7b12cc, 0x3fed272ca3fc5b1a // a = 25/256 74 .quad 0xbfa8c345d6319b21, 0x3fed0cb58f6ec074 // a = 13/128 75 .quad 0xbfa9ab42462033ad, 0x3fecf26e5c44bfc6 // a = 27/256 76 .quad 0xbfaa926d3a4ad563, 0x3fecd85689039b0b // a = 7/64 77 .quad 0xbfab78c82bb0eda1, 0x3fecbe6d9601cbe7 // a = 29/256 78 .quad 0xbfac5e548f5bc743, 0x3feca4b3055ee191 // a = 15/128 79 .quad 0xbfad4313d66cb35d, 0x3fec8b265afb8a42 // a = 31/256 80 .quad 0xbfae27076e2af2e6, 0x3fec71c71c71c71c // a = 1/8 81 .quad 0xbfaf0a30c01162a6, 0x3fec5894d10d4986 // a = 33/256 82 .quad 0xbfafec9131dbeabb, 0x3fec3f8f01c3f8f0 // a = 17/128 83 .quad 0xbfb0671512ca596e, 0x3fec26b5392ea01c // a = 35/256 84 .quad 0xbfb0d77e7cd08e59, 0x3fec0e070381c0e0 // a = 9/64 85 .quad 0xbfb14785846742ac, 0x3febf583ee868d8b // a = 37/256 86 .quad 0xbfb1b72ad52f67a0, 0x3febdd2b899406f7 // a = 19/128 87 .quad 0xbfb2266f190a5acc, 0x3febc4fd65883e7b // a = 39/256 88 .quad 0xbfb29552f81ff523, 0x3febacf914c1bad0 // a = 5/32 89 .quad 0xbfb303d718e47fd3, 0x3feb951e2b18ff23 // a = 41/256 90 .quad 0xbfb371fc201e8f74, 0x3feb7d6c3dda338b // a = 21/128 91 .quad 0xbfb3dfc2b0ecc62a, 0x3feb65e2e3beee05 // a = 43/256 92 .quad 0xbfb44d2b6ccb7d1e, 0x3feb4e81b4e81b4f // a = 11/64 93 .quad 0xbfb4ba36f39a55e5, 0x3feb37484ad806ce // a = 45/256 94 .quad 0xbfb526e5e3a1b438, 0x3feb2036406c80d9 // a = 23/128 95 .quad 0xbfb59338d9982086, 0x3feb094b31d922a4 // a = 47/256 96 .quad 0xbfb5ff3070a793d4, 0x3feaf286bca1af28 // a = 3/16 97 .quad 0xbfb66acd4272ad51, 0x3feadbe87f94905e // a = 49/256 98 .quad 0xbfb6d60fe719d21d, 0x3feac5701ac5701b // a = 25/128 99 .quad 0xbfb740f8f54037a5, 0x3feaaf1d2f87ebfd // a = 51/256 100 .quad 0xbfb7ab890210d909, 0x3fea98ef606a63be // a = 13/64 101 .quad 0xbfb815c0a14357eb, 0x3fea82e65130e159 // a = 53/256 102 .quad 0xbfb87fa06520c911, 0x3fea6d01a6d01a6d // a = 27/128 103 .quad 0xbfb8e928de886d41, 0x3fea574107688a4a // a = 55/256 104 .quad 0xbfb9525a9cf456b4, 0x3fea41a41a41a41a // a = 7/32 105 .quad 0xbfb9bb362e7dfb83, 0x3fea2c2a87c51ca0 // a = 57/256 106 .quad 0xbfba23bc1fe2b563, 0x3fea16d3f97a4b02 // a = 29/128 107 .quad 0xbfba8becfc882f19, 0x3fea01a01a01a01a // a = 59/256 108 .quad 0xbfbaf3c94e80bff3, 0x3fe9ec8e951033d9 // a = 15/64 109 .quad 0xbfbb5b519e8fb5a4, 0x3fe9d79f176b682d // a = 61/256 110 .quad 0xbfbbc286742d8cd6, 0x3fe9c2d14ee4a102 // a = 31/128 111 .quad 0xbfbc2968558c18c1, 0x3fe9ae24ea5510da // a = 63/256 112 .quad 0xbfbc8ff7c79a9a22, 0x3fe999999999999a // a = 1/4 113 .quad 0xbfbcf6354e09c5dc, 0x3fe9852f0d8ec0ff // a = 65/256 114 .quad 0xbfbd5c216b4fbb91, 0x3fe970e4f80cb872 // a = 33/128 115 .quad 0xbfbdc1bca0abec7d, 0x3fe95cbb0be377ae // a = 67/256 116 .quad 0xbfbe27076e2af2e6, 0x3fe948b0fcd6e9e0 // a = 17/64 117 .quad 0xbfbe8c0252aa5a60, 0x3fe934c67f9b2ce6 // a = 69/256 118 .quad 0xbfbef0adcbdc5936, 0x3fe920fb49d0e229 // a = 35/128 119 .quad 0xbfbf550a564b7b37, 0x3fe90d4f120190d5 // a = 71/256 120 .quad 0xbfbfb9186d5e3e2b, 0x3fe8f9c18f9c18fa // a = 9/32 121 .quad 0xbfc00e6c45ad501d, 0x3fe8e6527af1373f // a = 73/256 122 .quad 0xbfc0402594b4d041, 0x3fe8d3018d3018d3 // a = 37/128 123 .quad 0xbfc071b85fcd590d, 0x3fe8bfce8062ff3a // a = 75/256 124 .quad 0xbfc0a324e27390e3, 0x3fe8acb90f6bf3aa // a = 19/64 125 .quad 0xbfc0d46b579ab74b, 0x3fe899c0f601899c // a = 77/256 126 .quad 0xbfc1058bf9ae4ad5, 0x3fe886e5f0abb04a // a = 39/128 127 .quad 0xbfc136870293a8b0, 0x3fe87427bcc092b9 // a = 79/256 128 .quad 0xbfc1675cababa60e, 0x3fe8618618618618 // a = 5/16 129 .quad 0xbfc1980d2dd4236f, 0x3fe84f00c2780614 // a = 81/256 130 .quad 0xbfc1c898c16999fb, 0x3fe83c977ab2bedd // a = 41/128 131 .quad 0xbfc1f8ff9e48a2f3, 0x3fe82a4a0182a4a0 // a = 83/256 132 .quad 0xbfc22941fbcf7966, 0x3fe8181818181818 // a = 21/64 133 .quad 0xbfc2596010df763a, 0x3fe8060180601806 // a = 85/256 134 .quad 0xbfc2895a13de86a3, 0x3fe7f405fd017f40 // a = 43/128 135 .quad 0xbfc2b9303ab89d25, 0x3fe7e225515a4f1d // a = 87/256 136 .quad 0xbfc2e8e2bae11d31, 0x3fe7d05f417d05f4 // a = 11/32 137 .quad 0xbfc31871c9544185, 0x3fe7beb3922e017c // a = 89/256 138 .quad 0xbfc347dd9a987d55, 0x3fe7ad2208e0ecc3 // a = 45/128 139 .quad 0xbfc3772662bfd85b, 0x3fe79baa6bb6398b // a = 91/256 140 .quad 0xbfc3a64c556945ea, 0x3fe78a4c8178a4c8 // a = 23/64 141 .quad 0xbfc3d54fa5c1f710, 0x3fe77908119ac60d // a = 93/256 142 .quad 0xbfc404308686a7e4, 0x3fe767dce434a9b1 // a = 47/128 143 .quad 0xbfc432ef2a04e814, 0x3fe756cac201756d // a = 95/256 144 .quad 0xbfc4618bc21c5ec2, 0x3fe745d1745d1746 // a = 3/8 145 .quad 0xbfc49006804009d1, 0x3fe734f0c541fe8d // a = 97/256 146 .quad 0xbfc4be5f957778a1, 0x3fe724287f46debc // a = 49/128 147 .quad 0xbfc4ec973260026a, 0x3fe713786d9c7c09 // a = 99/256 148 .quad 0xbfc51aad872df82d, 0x3fe702e05c0b8170 // a = 25/64 149 .quad 0xbfc548a2c3add263, 0x3fe6f26016f26017 // a = 101/256 150 .quad 0xbfc5767717455a6c, 0x3fe6e1f76b4337c7 // a = 51/128 151 .quad 0xbfc5a42ab0f4cfe2, 0x3fe6d1a62681c861 // a = 103/256 152 .quad 0xbfc5d1bdbf5809ca, 0x3fe6c16c16c16c17 // a = 13/32 153 .quad 0xbfc5ff3070a793d4, 0x3fe6b1490aa31a3d // a = 105/256 154 .quad 0xbfc62c82f2b9c795, 0x3fe6a13cd1537290 // a = 53/128 155 .quad 0xbfc659b57303e1f3, 0x3fe691473a88d0c0 // a = 107/256 156 .quad 0xbfc686c81e9b14af, 0x3fe6816816816817 // a = 27/64 157 .quad 0xbfc6b3bb2235943e, 0x3fe6719f3601671a // a = 109/256 158 .quad 0xbfc6e08eaa2ba1e4, 0x3fe661ec6a5122f9 // a = 55/128 159 .quad 0xbfc70d42e2789236, 0x3fe6524f853b4aa3 // a = 111/256 160 .quad 0xbfc739d7f6bbd007, 0x3fe642c8590b2164 // a = 7/16 161 .quad 0xbfc7664e1239dbcf, 0x3fe63356b88ac0de // a = 113/256 162 .quad 0xbfc792a55fdd47a2, 0x3fe623fa77016240 // a = 57/128 163 .quad 0xbfc7bede0a37afc0, 0x3fe614b36831ae94 // a = 115/256 164 .quad 0xbfc7eaf83b82afc3, 0x3fe6058160581606 // a = 29/64 165 .quad 0xbfc816f41da0d496, 0x3fe5f66434292dfc // a = 117/256 166 .quad 0xbfc842d1da1e8b17, 0x3fe5e75bb8d015e7 // a = 59/128 167 .quad 0xbfc86e919a330ba0, 0x3fe5d867c3ece2a5 // a = 119/256 168 .quad 0xbfc89a3386c1425b, 0x3fe5c9882b931057 // a = 15/32 169 .quad 0xbfc8c5b7c858b48b, 0x3fe5babcc647fa91 // a = 121/256 170 .quad 0xbfc8f11e873662c7, 0x3fe5ac056b015ac0 // a = 61/128 171 .quad 0xbfc91c67eb45a83e, 0x3fe59d61f123ccaa // a = 123/256 172 .quad 0xbfc947941c2116fb, 0x3fe58ed2308158ed // a = 31/64 173 .quad 0xbfc972a341135158, 0x3fe5805601580560 // a = 125/256 174 .quad 0xbfc99d958117e08b, 0x3fe571ed3c506b3a // a = 63/128 175 .quad 0xbfc9c86b02dc0863, 0x3fe56397ba7c52e2 // a = 127/256 176 .quad 0xbfc9f323ecbf984c, 0x3fe5555555555555 // a = 1/2 177 .quad 0xbfca1dc064d5b995, 0x3fe54725e6bb82fe // a = 129/256 178 .quad 0xbfca484090e5bb0a, 0x3fe5390948f40feb // a = 65/128 179 .quad 0xbfca72a4966bd9ea, 0x3fe52aff56a8054b // a = 131/256 180 .quad 0xbfca9cec9a9a084a, 0x3fe51d07eae2f815 // a = 33/64 181 .quad 0xbfcac718c258b0e4, 0x3fe50f22e111c4c5 // a = 133/256 182 .quad 0xbfcaf1293247786b, 0x3fe5015015015015 // a = 67/128 183 .quad 0xbfcb1b1e0ebdfc5b, 0x3fe4f38f62dd4c9b // a = 135/256 184 .quad 0xbfcb44f77bcc8f63, 0x3fe4e5e0a72f0539 // a = 17/32 185 .quad 0xbfcb6eb59d3cf35e, 0x3fe4d843bedc2c4c // a = 137/256 186 .quad 0xbfcb9858969310fb, 0x3fe4cab88725af6e // a = 69/128 187 .quad 0xbfcbc1e08b0dad0a, 0x3fe4bd3edda68fe1 // a = 139/256 188 .quad 0xbfcbeb4d9da71b7c, 0x3fe4afd6a052bf5b // a = 35/64 189 .quad 0xbfcc149ff115f027, 0x3fe4a27fad76014a // a = 141/256 190 .quad 0xbfcc3dd7a7cdad4d, 0x3fe49539e3b2d067 // a = 71/128 191 .quad 0xbfcc66f4e3ff6ff8, 0x3fe4880522014880 // a = 143/256 192 .quad 0xbfcc8ff7c79a9a22, 0x3fe47ae147ae147b // a = 9/16 193 .quad 0xbfccb8e0744d7aca, 0x3fe46dce34596066 // a = 145/256 194 .quad 0xbfcce1af0b85f3eb, 0x3fe460cbc7f5cf9a // a = 73/128 195 .quad 0xbfcd0a63ae721e64, 0x3fe453d9e2c776ca // a = 147/256 196 .quad 0xbfcd32fe7e00ebd5, 0x3fe446f86562d9fb // a = 37/64 197 .quad 0xbfcd5b7f9ae2c684, 0x3fe43a2730abee4d // a = 149/256 198 .quad 0xbfcd83e7258a2f3e, 0x3fe42d6625d51f87 // a = 75/128 199 .quad 0xbfcdac353e2c5954, 0x3fe420b5265e5951 // a = 151/256 200 .quad 0xbfcdd46a04c1c4a1, 0x3fe4141414141414 // a = 19/32 201 .quad 0xbfcdfc859906d5b5, 0x3fe40782d10e6566 // a = 153/256 202 .quad 0xbfce24881a7c6c26, 0x3fe3fb013fb013fb // a = 77/128 203 .quad 0xbfce4c71a8687704, 0x3fe3ee8f42a5af07 // a = 155/256 204 .quad 0xbfce744261d68788, 0x3fe3e22cbce4a902 // a = 39/64 205 .quad 0xbfce9bfa659861f5, 0x3fe3d5d991aa75c6 // a = 157/256 206 .quad 0xbfcec399d2468cc0, 0x3fe3c995a47babe7 // a = 79/128 207 .quad 0xbfceeb20c640ddf4, 0x3fe3bd60d9232955 // a = 159/256 208 .quad 0xbfcf128f5faf06ed, 0x3fe3b13b13b13b14 // a = 5/8 209 .quad 0xbfcf39e5bc811e5c, 0x3fe3a524387ac822 // a = 161/256 210 .quad 0xbfcf6123fa7028ac, 0x3fe3991c2c187f63 // a = 81/128 211 .quad 0xbfcf884a36fe9ec2, 0x3fe38d22d366088e // a = 163/256 212 .quad 0xbfcfaf588f78f31f, 0x3fe3813813813814 // a = 41/64 213 .quad 0xbfcfd64f20f61572, 0x3fe3755bd1c945ee // a = 165/256 214 .quad 0xbfcffd2e0857f498, 0x3fe3698df3de0748 // a = 83/128 215 .quad 0xbfd011fab125ff8a, 0x3fe35dce5f9f2af8 // a = 167/256 216 .quad 0xbfd02552a5a5d0ff, 0x3fe3521cfb2b78c1 // a = 21/32 217 .quad 0xbfd0389eefce633b, 0x3fe34679ace01346 // a = 169/256 218 .quad 0xbfd04bdf9da926d2, 0x3fe33ae45b57bcb2 // a = 85/128 219 .quad 0xbfd05f14bd26459c, 0x3fe32f5ced6a1dfa // a = 171/256 220 .quad 0xbfd0723e5c1cdf40, 0x3fe323e34a2b10bf // a = 43/64 221 .quad 0xbfd0855c884b450e, 0x3fe3187758e9ebb6 // a = 173/256 222 .quad 0xbfd0986f4f573521, 0x3fe30d190130d190 // a = 87/128 223 .quad 0xbfd0ab76bece14d2, 0x3fe301c82ac40260 // a = 175/256 224 .quad 0xbfd0be72e4252a83, 0x3fe2f684bda12f68 // a = 11/16 225 .quad 0xbfd0d163ccb9d6b8, 0x3fe2eb4ea1fed14b // a = 177/256 226 .quad 0xbfd0e44985d1cc8c, 0x3fe2e025c04b8097 // a = 89/128 227 .quad 0xbfd0f7241c9b497d, 0x3fe2d50a012d50a0 // a = 179/256 228 .quad 0xbfd109f39e2d4c97, 0x3fe2c9fb4d812ca0 // a = 45/64 229 .quad 0xbfd11cb81787ccf8, 0x3fe2bef98e5a3711 // a = 181/256 230 .quad 0xbfd12f719593efbc, 0x3fe2b404ad012b40 // a = 91/128 231 .quad 0xbfd1422025243d45, 0x3fe2a91c92f3c105 // a = 183/256 232 .quad 0xbfd154c3d2f4d5ea, 0x3fe29e4129e4129e // a = 23/32 233 .quad 0xbfd1675cababa60e, 0x3fe293725bb804a5 // a = 185/256 234 .quad 0xbfd179eabbd899a1, 0x3fe288b01288b013 // a = 93/128 235 .quad 0xbfd18c6e0ff5cf06, 0x3fe27dfa38a1ce4d // a = 187/256 236 .quad 0xbfd19ee6b467c96f, 0x3fe27350b8812735 // a = 47/64 237 .quad 0xbfd1b154b57da29f, 0x3fe268b37cd60127 // a = 189/256 238 .quad 0xbfd1c3b81f713c25, 0x3fe25e22708092f1 // a = 95/128 239 .quad 0xbfd1d610fe677003, 0x3fe2539d7e9177b2 // a = 191/256 240 .quad 0xbfd1e85f5e7040d0, 0x3fe2492492492492 // a = 3/4 241 .quad 0xbfd1faa34b87094c, 0x3fe23eb79717605b // a = 193/256 242 .quad 0xbfd20cdcd192ab6e, 0x3fe23456789abcdf // a = 97/128 243 .quad 0xbfd21f0bfc65beec, 0x3fe22a0122a0122a // a = 195/256 244 .quad 0xbfd23130d7bebf43, 0x3fe21fb78121fb78 // a = 49/64 245 .quad 0xbfd2434b6f483934, 0x3fe21579804855e6 // a = 197/256 246 .quad 0xbfd2555bce98f7cb, 0x3fe20b470c67c0d9 // a = 99/128 247 .quad 0xbfd26762013430e0, 0x3fe2012012012012 // a = 199/256 248 .quad 0xbfd2795e1289b11b, 0x3fe1f7047dc11f70 // a = 25/32 249 .quad 0xbfd28b500df60783, 0x3fe1ecf43c7fb84c // a = 201/256 250 .quad 0xbfd29d37fec2b08b, 0x3fe1e2ef3b3fb874 // a = 101/128 251 .quad 0xbfd2af15f02640ad, 0x3fe1d8f5672e4abd // a = 203/256 252 .quad 0xbfd2c0e9ed448e8c, 0x3fe1cf06ada2811d // a = 51/64 253 .quad 0xbfd2d2b4012edc9e, 0x3fe1c522fc1ce059 // a = 205/256 254 .quad 0xbfd2e47436e40268, 0x3fe1bb4a4046ed29 // a = 103/128 255 .quad 0xbfd2f62a99509546, 0x3fe1b17c67f2bae3 // a = 207/256 256 .quad 0xbfd307d7334f10be, 0x3fe1a7b9611a7b96 // a = 13/16 257 .quad 0xbfd3197a0fa7fe6a, 0x3fe19e0119e0119e // a = 209/256 258 .quad 0xbfd32b1339121d71, 0x3fe19453808ca29c // a = 105/128 259 .quad 0xbfd33ca2ba328995, 0x3fe18ab083902bdb // a = 211/256 260 .quad 0xbfd34e289d9ce1d3, 0x3fe1811811811812 // a = 53/64 261 .quad 0xbfd35fa4edd36ea0, 0x3fe1778a191bd684 // a = 213/256 262 .quad 0xbfd37117b54747b6, 0x3fe16e0689427379 // a = 107/128 263 .quad 0xbfd38280fe58797f, 0x3fe1648d50fc3201 // a = 215/256 264 .quad 0xbfd393e0d3562a1a, 0x3fe15b1e5f75270d // a = 27/32 265 .quad 0xbfd3a5373e7ebdfa, 0x3fe151b9a3fdd5c9 // a = 217/256 266 .quad 0xbfd3b68449fffc23, 0x3fe1485f0e0acd3b // a = 109/128 267 .quad 0xbfd3c7c7fff73206, 0x3fe13f0e8d344724 // a = 219/256 268 .quad 0xbfd3d9026a7156fb, 0x3fe135c81135c811 // a = 55/64 269 .quad 0xbfd3ea33936b2f5c, 0x3fe12c8b89edc0ac // a = 221/256 270 .quad 0xbfd3fb5b84d16f42, 0x3fe12358e75d3033 // a = 111/128 271 .quad 0xbfd40c7a4880dce9, 0x3fe11a3019a74826 // a = 223/256 272 .quad 0xbfd41d8fe84672ae, 0x3fe1111111111111 // a = 7/8 273 .quad 0xbfd42e9c6ddf80bf, 0x3fe107fbbe011080 // a = 225/256 274 .quad 0xbfd43f9fe2f9ce67, 0x3fe0fef010fef011 // a = 113/128 275 .quad 0xbfd4509a5133bb0a, 0x3fe0f5edfab325a2 // a = 227/256 276 .quad 0xbfd4618bc21c5ec2, 0x3fe0ecf56be69c90 // a = 57/64 277 .quad 0xbfd472743f33aaad, 0x3fe0e40655826011 // a = 229/256 278 .quad 0xbfd48353d1ea88df, 0x3fe0db20a88f4696 // a = 115/128 279 .quad 0xbfd4942a83a2fc07, 0x3fe0d24456359e3a // a = 231/256 280 .quad 0xbfd4a4f85db03ebb, 0x3fe0c9714fbcda3b // a = 29/32 281 .quad 0xbfd4b5bd6956e274, 0x3fe0c0a7868b4171 // a = 233/256 282 .quad 0xbfd4c679afccee3a, 0x3fe0b7e6ec259dc8 // a = 117/128 283 .quad 0xbfd4d72d3a39fd00, 0x3fe0af2f722eecb5 // a = 235/256 284 .quad 0xbfd4e7d811b75bb1, 0x3fe0a6810a6810a7 // a = 59/64 285 .quad 0xbfd4f87a3f5026e9, 0x3fe09ddba6af8360 // a = 237/256 286 .quad 0xbfd50913cc01686b, 0x3fe0953f39010954 // a = 119/128 287 .quad 0xbfd519a4c0ba3446, 0x3fe08cabb37565e2 // a = 239/256 288 .quad 0xbfd52a2d265bc5ab, 0x3fe0842108421084 // a = 15/16 289 .quad 0xbfd53aad05b99b7d, 0x3fe07b9f29b8eae2 // a = 241/256 290 .quad 0xbfd54b2467999498, 0x3fe073260a47f7c6 // a = 121/128 291 .quad 0xbfd55b9354b40bcd, 0x3fe06ab59c7912fb // a = 243/256 292 .quad 0xbfd56bf9d5b3f399, 0x3fe0624dd2f1a9fc // a = 61/64 293 .quad 0xbfd57c57f336f191, 0x3fe059eea0727586 // a = 245/256 294 .quad 0xbfd58cadb5cd7989, 0x3fe05197f7d73404 // a = 123/128 295 .quad 0xbfd59cfb25fae87e, 0x3fe04949cc1664c5 // a = 247/256 296 .quad 0xbfd5ad404c359f2d, 0x3fe0410410410410 // a = 31/32 297 .quad 0xbfd5bd7d30e71c73, 0x3fe038c6b78247fc // a = 249/256 298 .quad 0xbfd5cdb1dc6c1765, 0x3fe03091b51f5e1a // a = 125/128 299 .quad 0xbfd5ddde57149923, 0x3fe02864fc7729e9 // a = 251/256 300 .quad 0xbfd5ee02a9241675, 0x3fe0204081020408 // a = 63/64 301 .quad 0xbfd5fe1edad18919, 0x3fe0182436517a37 // a = 253/256 302 .quad 0xbfd60e32f44788d9, 0x3fe0101010101010 // a = 127/128 303 .quad 0xbfd61e3efda46467, 0x3fe0080402010080 // a = 255/256 304 305 306 307/******************************************************** 308void gen_small_table() { 309 union { double d; uint64_t x; } constant; 310 311 int i,p,q; 312 for (i=0; i<256; ++i) { 313 double a = (double)i / 256.0; 314 constant.d = atanh(a); printf("\t\t\t\t.quad 0x%016llx,", constant.x); 315 constant.d = 1.0 / (1.0 - a*a); printf(" 0x%016llx", constant.x); 316 if (!i) { 317 printf("\t// a = 0\n"); 318 } else { 319 p = i; q = 256; 320 while (!(p % 2)) { 321 p = p/2; q = q/2; 322 } 323 printf("\t// a = %d/%d\n", p, q); 324 } 325 } 326} 327*********************************************************/ 328 329 // atanh(hi) 1/(1-hi^2) 330small_table: .quad 0x0000000000000000, 0x3ff0000000000000 // a = 0 331 .quad 0x3f7000055558888b, 0x3ff0001000100010 // a = 1/256 332 .quad 0x3f8000155588891b, 0x3ff0004001000400 // a = 1/128 333 .quad 0x3f8800480184d690, 0x3ff0009005102d92 // a = 3/256 334 .quad 0x3f9000555888ad1c, 0x3ff0010010010010 // a = 1/64 335 .quad 0x3f9400a6b46f591b, 0x3ff001902713d0ef // a = 5/256 336 .quad 0x3f980120184f3ded, 0x3ff00240510b659a // a = 3/128 337 .quad 0x3f9c01c989e21e45, 0x3ff00310962cbe90 // a = 7/256 338 .quad 0x3fa001558891aee2, 0x3ff0040100401004 // a = 1/32 339 .quad 0x3fa201e65c5878df, 0x3ff005119a91e82a // a = 9/256 340 .quad 0x3fa4029b471650a5, 0x3ff0064271f48383 // a = 5/128 341 .quad 0x3fa60378514ed016, 0x3ff0079394c14f5f // a = 11/256 342 .quad 0x3fa8048185694820, 0x3ff0090512da9af7 // a = 3/64 343 .quad 0x3faa05baefe1fa74, 0x3ff00a96fdad7784 // a = 13/256 344 .quad 0x3fac07289f7b9245, 0x3ff00c496833c7a5 // a = 7/128 345 .quad 0x3fae08cea570e1e7, 0x3ff00e1c66f67ea5 // a = 15/256 346 .quad 0x3fb005588ad375ad, 0x3ff0101010101010 // a = 1/16 347 .quad 0x3fb1066a036f9cec, 0x3ff012247b2f1021 // a = 17/256 348 .quad 0x3fb2079dc9754942, 0x3ff01459c19905ab // a = 9/128 349 .quad 0x3fb308f5eb6e013e, 0x3ff016affe2d6e10 // a = 19/256 350 .quad 0x3fb40a74799e283e, 0x3ff019274d68f3fd // a = 5/64 351 .quad 0x3fb50c1b861eec5e, 0x3ff01bbfcd68d99f // a = 21/256 352 .quad 0x3fb60ded24f86c90, 0x3ff01e799dee9716 // a = 11/128 353 .quad 0x3fb70feb6c3c1a5a, 0x3ff02154e063adfb // a = 23/256 354 .quad 0x3fb81218741f5a6a, 0x3ff02451b7ddb2d2 // a = 3/32 355 .quad 0x3fb9147657166782, 0x3ff0277049228d5b // a = 25/256 356 .quad 0x3fba170731ef7b1e, 0x3ff02ab0baacf0ac // a = 13/128 357 .quad 0x3fbb19cd23ee3f68, 0x3ff02e1334b10c2c // a = 27/256 358 .quad 0x3fbc1cca4ee78e02, 0x3ff03197e121767b // a = 7/64 359 .quad 0x3fbd2000d75d7f60, 0x3ff0353eebb45366 // a = 29/256 360 .quad 0x3fbe2372e49bce55, 0x3ff0390881e8b620 // a = 15/128 361 .quad 0x3fbf2722a0d493b5, 0x3ff03cf4d30c4100 // a = 31/256 362 .quad 0x3fc015891c9eaef7, 0x3ff0410410410410 // a = 1/8 363 .quad 0x3fc097a1ef16543f, 0x3ff045366c839bda // a = 33/256 364 .quad 0x3fc119dce19bdbaf, 0x3ff0498c1cb191d7 // a = 17/128 365 .quad 0x3fc19c3b0fa86d54, 0x3ff04e0557900010 // a = 35/256 366 .quad 0x3fc21ebd96730f38, 0x3ff052a255d27987 // a = 9/64 367 .quad 0x3fc2a165950035bc, 0x3ff0576352223903 // a = 37/256 368 .quad 0x3fc324342c318e81, 0x3ff05c488925980e // a = 19/128 369 .quad 0x3fc3a72a7ed6082c, 0x3ff061523987cfea // a = 39/256 370 .quad 0x3fc42a49b1ba196a, 0x3ff06680a4010668 // a = 5/32 371 .quad 0x3fc4ad92ebb84987, 0x3ff06bd40b5ea891 // a = 41/256 372 .quad 0x3fc5310755c9fd18, 0x3ff0714cb48c1542 // a = 21/128 373 .quad 0x3fc5b4a81b18894f, 0x3ff076eae69b99da // a = 43/256 374 .quad 0x3fc63876690e9070, 0x3ff07caeeacfc334 // a = 11/64 375 .quad 0x3fc6bc736f69aa39, 0x3ff082990ca50557 // a = 45/256 376 .quad 0x3fc740a0604c5adb, 0x3ff088a999dbbc40 // a = 23/128 377 .quad 0x3fc7c4fe70505b75, 0x3ff08ee0e282885b // a = 47/256 378 .quad 0x3fc8498ed69936dc, 0x3ff0953f39010954 // a = 3/16 379 .quad 0x3fc8ce52cce73dc7, 0x3ff09bc4f222fa0b // a = 49/256 380 .quad 0x3fc9534b8faad565, 0x3ff0a2726523b088 // a = 25/128 381 .quad 0x3fc9d87a5e18238b, 0x3ff0a947ebba04fd // a = 51/256 382 .quad 0x3fca5de07a3b1bc2, 0x3ff0b045e224a2f9 // a = 13/64 383 .quad 0x3fcae37f290bf096, 0x3ff0b76ca736c81a // a = 53/256 384 .quad 0x3fcb6957b283ec92, 0x3ff0bebc9c657399 // a = 27/128 385 .quad 0x3fcbef6b61b2b693, 0x3ff0c63625d50a60 // a = 55/256 386 .quad 0x3fcc75bb84d40518, 0x3ff0cdd9aa677344 // a = 7/32 387 .quad 0x3fccfc496d65c453, 0x3ff0d5a793caaf5c // a = 57/256 388 .quad 0x3fcd8316703eb314, 0x3ff0dda04e87f26e // a = 29/128 389 .quad 0x3fce0a23e5a57a70, 0x3ff0e5c44a133fbe // a = 59/256 390 .quad 0x3fce917329684475, 0x3ff0ee13f8db8f93 // a = 15/64 391 .quad 0x3fcf19059af4d646, 0x3ff0f68fd05b8216 // a = 61/256 392 .quad 0x3fcfa0dc9d7131ff, 0x3ff0ff38492aa44b // a = 31/128 393 .quad 0x3fd0147ccbea629a, 0x3ff1080ddf0f4c2c // a = 63/256 394 .quad 0x3fd058aefa811451, 0x3ff1111111111111 // a = 1/4 395 .quad 0x3fd09d0591f0bb21, 0x3ff11a42618be5dd // a = 65/256 396 .quad 0x3fd0e1814bbd9d56, 0x3ff123a25643da93 // a = 33/128 397 .quad 0x3fd12622e38a03ab, 0x3ff12d3178798b4c // a = 67/256 398 .quad 0x3fd16aeb1724557c, 0x3ff136f054ff42a4 // a = 17/64 399 .quad 0x3fd1afdaa6958afb, 0x3ff140df7c4ed62d // a = 69/256 400 .quad 0x3fd1f4f2542ff85b, 0x3ff14aff82a0438d // a = 35/128 401 .quad 0x3fd23a32e49e74ec, 0x3ff1555100011555 // a = 71/256 402 .quad 0x3fd27f9d1ef3e177, 0x3ff15fd4906c96f1 // a = 9/32 403 .quad 0x3fd2c531ccbb110c, 0x3ff16a8ad3e4df4c // a = 73/256 404 .quad 0x3fd30af1ba0717b8, 0x3ff175746e8cba40 // a = 37/128 405 .quad 0x3fd350ddb58402ab, 0x3ff1809208c27917 // a = 75/256 406 .quad 0x3fd396f69087fd7c, 0x3ff18be44f3bb2f6 // a = 19/64 407 .quad 0x3fd3dd3d1f24e85f, 0x3ff1976bf321fe4a // a = 77/256 408 .quad 0x3fd423b2383a6343, 0x3ff1a329aa30acca // a = 39/128 409 .quad 0x3fd46a56b58851f7, 0x3ff1af1e2ed3940c // a = 79/256 410 .quad 0x3fd4b12b73c1dd95, 0x3ff1bb4a4046ed29 // a = 5/16 411 .quad 0x3fd4f83152a0f7b5, 0x3ff1c7aea2b8565d // a = 81/256 412 .quad 0x3fd53f6934fa63f8, 0x3ff1d44c1f69021b // a = 41/128 413 .quad 0x3fd586d400d24cbe, 0x3ff1e12384d11f8a // a = 83/256 414 .quad 0x3fd5ce729f71680a, 0x3ff1ee35a6c48900 // a = 21/64 415 .quad 0x3fd61645fd7ab1ba, 0x3ff1fb835e98c5a2 // a = 85/256 416 .quad 0x3fd65e4f0b01c08e, 0x3ff2090d8b4c6bdc // a = 43/128 417 .quad 0x3fd6a68ebba1bb84, 0x3ff216d511aff336 // a = 87/256 418 .quad 0x3fd6ef060694f581, 0x3ff224dadc900489 // a = 11/32 419 .quad 0x3fd737b5e6cd3547, 0x3ff2331fdce15884 // a = 89/256 420 .quad 0x3fd7809f5b0cb028, 0x3ff241a509ee3506 // a = 45/128 421 .quad 0x3fd7c9c365ffbdfc, 0x3ff2506b61859acc // a = 91/256 422 .quad 0x3fd813230e574d5a, 0x3ff25f73e82c35af // a = 23/64 423 .quad 0x3fd85cbf5ee41f2a, 0x3ff26ebfa94f2298 // a = 93/256 424 .quad 0x3fd8a69966b2d128, 0x3ff27e4fb7789f5c // a = 47/128 425 .quad 0x3fd8f0b23928bf15, 0x3ff28e252c86b994 // a = 95/256 426 .quad 0x3fd93b0aee21c2c8, 0x3ff29e4129e4129e // a = 3/8 427 .quad 0x3fd985a4a20edba2, 0x3ff2aea4d8c2d024 // a = 97/256 428 .quad 0x3fd9d0807615c643, 0x3ff2bf516a59d19c // a = 49/128 429 .quad 0x3fda1b9f90318dcb, 0x3ff2d04818244483 // a = 99/256 430 .quad 0x3fda67031b542059, 0x3ff2e18a2423b269 // a = 25/64 431 .quad 0x3fdab2ac4788f0dc, 0x3ff2f318d924a53c // a = 101/256 432 .quad 0x3fdafe9c4a18b0e3, 0x3ff304f58b05ffd0 // a = 51/128 433 .quad 0x3fdb4ad45dae2d59, 0x3ff3172197032a26 // a = 103/256 434 .quad 0x3fdb9755c27c59e0, 0x3ff3299e6401329a // a = 13/32 435 .quad 0x3fdbe421be6596cb, 0x3ff33c6d62df06fc // a = 105/256 436 .quad 0x3fdc31399d243e72, 0x3ff34f900ec8ea4b // a = 53/128 437 .quad 0x3fdc7e9eb074870b, 0x3ff36307ed8f4df6 // a = 107/256 438 .quad 0x3fdccc52503fc6fe, 0x3ff376d69001376d // a = 27/64 439 .quad 0x3fdd1a55dac92a26, 0x3ff38afd924a5d42 // a = 109/256 440 .quad 0x3fdd68aab4dbe74b, 0x3ff39f7e9c55292e // a = 55/128 441 .quad 0x3fddb75249fb05b3, 0x3ff3b45b6230cf21 // a = 111/256 442 .quad 0x3fde064e0c92c396, 0x3ff3c995a47babe7 // a = 7/16 443 .quad 0x3fde559f762baeee, 0x3ff3df2f30d22100 // a = 113/256 444 .quad 0x3fdea548079f8314, 0x3ff3f529e2422615 // a = 57/128 445 .quad 0x3fdef549494fde6b, 0x3ff40b87a1c3cbdf // a = 115/256 446 .quad 0x3fdf45a4cb5ee468, 0x3ff4224a66b6ef90 // a = 29/64 447 .quad 0x3fdf965c25e9e132, 0x3ff4397437666199 // a = 117/256 448 .quad 0x3fdfe770f9460541, 0x3ff451072990c667 // a = 59/128 449 .quad 0x3fe01c72771fa832, 0x3ff4690562f77bee // a = 119/256 450 .quad 0x3fe0455cdb2ce279, 0x3ff4817119f3d325 // a = 15/32 451 .quad 0x3fe06e78860a7e8c, 0x3ff49a4c9612f15e // a = 121/256 452 .quad 0x3fe097c659991ec1, 0x3ff4b39a30b8b264 // a = 61/128 453 .quad 0x3fe0c1473c7e911c, 0x3ff4cd5c55c9e98b // a = 123/256 454 .quad 0x3fe0eafc1a4b81ea, 0x3ff4e795845e65bf // a = 31/64 455 .quad 0x3fe114e5e3a29a89, 0x3ff502484f7b2291 // a = 125/256 456 .quad 0x3fe13f058e611d13, 0x3ff51d775ed516de // a = 63/128 457 .quad 0x3fe1695c15c90ea9, 0x3ff539256f9d18b1 // a = 127/256 458 .quad 0x3fe193ea7aad030b, 0x3ff5555555555555 // a = 1/2 459 .quad 0x3fe1beb1c39d9d1a, 0x3ff57209fab0e4c8 // a = 129/256 460 .quad 0x3fe1e9b2fd18d91c, 0x3ff58f46627e080b // a = 65/128 461 .quad 0x3fe214ef39bb369d, 0x3ff5ad0da89bab4f // a = 131/256 462 .quad 0x3fe240679272d92c, 0x3ff5cb6302face89 // a = 33/64 463 .quad 0x3fe26c1d26b4b850, 0x3ff5ea49c2ac81d8 // a = 133/256 464 .quad 0x3fe298111cb3f8ad, 0x3ff609c554fd2e40 // a = 67/128 465 .quad 0x3fe2c444a19b89a5, 0x3ff629d9449defb6 // a = 135/256 466 .quad 0x3fe2f0b8e9ca2471, 0x3ff64a893adcd25f // a = 17/32 467 .quad 0x3fe31d6f3110cb49, 0x3ff66bd900ecd324 // a = 137/256 468 .quad 0x3fe34a68baf3e921, 0x3ff68dcc813e92e9 // a = 69/128 469 .quad 0x3fe377a6d2ef3448, 0x3ff6b067c8eabc0a // a = 139/256 470 .quad 0x3fe3a52accbc786f, 0x3ff6d3af092f2b6d // a = 35/64 471 .quad 0x3fe3d2f6049d6eb3, 0x3ff6f7a69900016f // a = 141/256 472 .quad 0x3fe40109dfa8ccb5, 0x3ff71c52f6add38a // a = 71/128 473 .quad 0x3fe42f67cc1ab64c, 0x3ff741b8c9a24d98 // a = 143/256 474 .quad 0x3fe45e1141a8c010, 0x3ff767dce434a9b1 // a = 9/16 475 .quad 0x3fe48d07c1d9b3ea, 0x3ff78ec445977f4f // a = 145/256 476 .quad 0x3fe4bc4cd8614bf6, 0x3ff7b6741be18685 // a = 73/128 477 .quad 0x3fe4ebe21b801b3d, 0x3ff7def1c6330a52 // a = 147/256 478 .quad 0x3fe51bc92c67dfa3, 0x3ff80842d6f9e5e7 // a = 37/64 479 .quad 0x3fe54c03b7a47bf0, 0x3ff8326d16560c53 // a = 149/256 480 .quad 0x3fe57c937589dd3f, 0x3ff85d7684a0c0a3 // a = 75/128 481 .quad 0x3fe5ad7a2aa7137d, 0x3ff889655d18ce68 // a = 151/256 482 .quad 0x3fe5deb9a83ee95c, 0x3ff8b64018b64019 // a = 19/32 483 .quad 0x3fe61053ccc64d7b, 0x3ff8e40d7128425c // a = 153/256 484 .quad 0x3fe6424a8468e3d0, 0x3ff912d464001913 // a = 77/128 485 .quad 0x3fe6749fc9941cbb, 0x3ff9429c360c45bd // a = 155/256 486 .quad 0x3fe6a755a5893549, 0x3ff9736c76e73ebc // a = 39/64 487 .quad 0x3fe6da6e30f68b8e, 0x3ff9a54d04bd5ccc // a = 157/256 488 .quad 0x3fe70deb9498b947, 0x3ff9d846104df033 // a = 79/128 489 .quad 0x3fe741d009e3ef5a, 0x3ffa0c60212bc26a // a = 159/256 490 .quad 0x3fe7761ddbb61598, 0x3ffa41a41a41a41a // a = 5/8 491 .quad 0x3fe7aad767123bc7, 0x3ffa781b3ea00af6 // a = 161/256 492 .quad 0x3fe7dfff1be5f383, 0x3ffaafcf36993030 // a = 81/128 493 .quad 0x3fe815977dd935af, 0x3ffae8ca15319829 // a = 163/256 494 .quad 0x3fe84ba3252982b5, 0x3ffb23165deb6f69 // a = 41/64 495 .quad 0x3fe88224bf90fa20, 0x3ffb5ebf0af3b992 // a = 165/256 496 .quad 0x3fe8b91f113a34a3, 0x3ffb9bcf93b8ede9 // a = 83/128 497 .quad 0x3fe8f094f5c1bba9, 0x3ffbda53f3f34c2e // a = 167/256 498 .quad 0x3fe9288961460abe, 0x3ffc1a58b327f576 // a = 21/32 499 .quad 0x3fe960ff61871a12, 0x3ffc5beaecb0a99a // a = 169/256 500 .quad 0x3fe999fa1f16860c, 0x3ffc9f185852f521 // a = 85/128 501 .quad 0x3fe9d37cde997e64, 0x3ffce3ef53729f97 // a = 171/256 502 .quad 0x3fea0d8b021dc00c, 0x3ffd2a7eeaec4a48 // a = 43/64 503 .quad 0x3fea48280a82f83e, 0x3ffd72d6e5a66e97 // a = 173/256 504 .quad 0x3fea835798fa0cba, 0x3ffdbd07cfe84d5e // a = 87/128 505 .quad 0x3feabf1d709be5fb, 0x3ffe09230787ea78 // a = 175/256 506 .quad 0x3feafb7d78197bed, 0x3ffe573ac901e574 // a = 11/16 507 .quad 0x3feb387bbb870d4d, 0x3ffea7623d8fe82c // a = 177/256 508 .quad 0x3feb761c6e44955e, 0x3ffef9ad8a548440 // a = 89/128 509 .quad 0x3febb463ed05c38d, 0x3fff4e31e0b5b7e6 // a = 179/256 510 .quad 0x3febf356bffbedc0, 0x3fffa5059001fa50 // a = 45/64 511 .quad 0x3fec32f99d24b0c8, 0x3ffffe40187ea913 // a = 181/256 512 .quad 0x3fec73516ac03329, 0x40002cfd200102d0 // a = 91/128 513 .quad 0x3fecb46341f246c6, 0x40005c27141f5891 // a = 183/256 514 .quad 0x3fecf6347191f5b5, 0x40008cabb37565e2 // a = 23/32 515 .quad 0x3fed38ca812b5fa0, 0x4000be998ff8ce1a // a = 185/256 516 .quad 0x3fed7c2b3438300b, 0x4000f20010f20011 // a = 93/128 517 .quad 0x3fedc05c8d936455, 0x400126ef8270fab7 // a = 187/256 518 .quad 0x3fee0564d32d9391, 0x40015d79261f33f6 // a = 47/64 519 .quad 0x3fee4b4a92077457, 0x400195af45931bd8 // a = 189/256 520 .quad 0x3fee9214a278f736, 0x4001cfa5464e21e2 // a = 95/128 521 .quad 0x3feed9ca2ccbf9e9, 0x40020b6fbf932b06 // a = 191/256 522 .quad 0x3fef2272ae325a57, 0x4002492492492492 // a = 3/4 523 .quad 0x3fef6c15fe200bf7, 0x400288db0323f01c // a = 193/256 524 .quad 0x3fefb6bc5412c9c8, 0x4002caabd7556820 // a = 97/128 525 .quad 0x3ff0013726e90b80, 0x40030eb17410dc80 // a = 195/256 526 .quad 0x3ff0279a7b19be98, 0x4003550801355080 // a = 49/64 527 .quad 0x3ff04e8ce6382f3a, 0x40039dcd8f7e31ca // a = 197/256 528 .quad 0x3ff07613660e2537, 0x4003e92242a773b1 // a = 99/128 529 .quad 0x3ff09e333ae64b65, 0x4004372880014373 // a = 199/256 530 .quad 0x3ff0c6f1ec420786, 0x4004880522014880 // a = 25/32 531 .quad 0x3ff0f0554dfbf7da, 0x4004dbdfb17409a8 // a = 201/256 532 .quad 0x3ff11a6385e30cf6, 0x400532e2a5092677 // a = 101/128 533 .quad 0x3ff1452311dbc75f, 0x40058d3ba8114219 // a = 203/256 534 .quad 0x3ff1709ace96ee39, 0x4005eb1be9658b37 // a = 51/64 535 .quad 0x3ff19cd1feef2a07, 0x40064cb87397b01f // a = 205/256 536 .quad 0x3ff1c9d0540158c9, 0x4006b24a8fb6f220 // a = 103/128 537 .quad 0x3ff1f79df6163ed8, 0x40071c10342d5c96 // a = 207/256 538 .quad 0x3ff226438e777c99, 0x40078a4c8178a4c8 // a = 13/16 539 .quad 0x3ff255ca524d82c3, 0x4007fd484ecf128b // a = 209/256 540 .quad 0x3ff2863c0ea8b8cf, 0x40087552c91cb5b8 // a = 105/128 541 .quad 0x3ff2b7a335dd4c9f, 0x4008f2c227337192 // a = 211/256 542 .quad 0x3ff2ea0aee5f5810, 0x400975f4768d3a48 // a = 53/64 543 .quad 0x3ff31d7f23546375, 0x4009ff5084a080c5 // a = 213/256 544 .quad 0x3ff3520c9718089f, 0x400a8f46e989d487 // a = 107/128 545 .quad 0x3ff387c0f7fbe5f1, 0x400b265339bb9f3a // a = 215/256 546 .quad 0x3ff3beaaf7978c58, 0x400bc4fd65883e7b // a = 27/32 547 .quad 0x3ff3f6da650c1586, 0x400c6bdb4ec15ed3 // a = 217/256 548 .quad 0x3ff430604ab1313f, 0x400d1b929e6308d0 // a = 109/128 549 .quad 0x3ff46b4f0fb778a3, 0x400dd4dae66843b4 // a = 219/256 550 .quad 0x3ff4a7ba9e66a99b, 0x400e98801e98801f // a = 55/64 551 .quad 0x3ff4e5b88fbf4bf0, 0x400f67658e7f8c33 // a = 221/256 552 .quad 0x3ff525605d6fc1b9, 0x401021449d84e212 // a = 111/128 553 .quad 0x3ff566cb9b3ef248, 0x40109583f9d88406 // a = 223/256 554 // a = 7/8 555 556.literal8 557.align 3 558one_256th: .quad 0x3f70000000000000 // 1/256 559one: .quad 0x3ff0000000000000 560one_plus_eps: .quad 0x3ff0000000000001 // 1 + ulp 561one_third: .quad 0x3fd5555555555555 562one_fifth: .quad 0x3fc999999999999a 563minimax_c: .quad 0x3ff0a3d70a3d70a4 // = 1.04 564third_over_c: .quad 0x3fd4834834834834 // = 1/3.12 565a_mask: .quad 0x7ffff00000000000ULL // top 8 bits of mantissa 566exp_mask: .quad 0x7ff0000000000000ULL // exponent bits / +Inf 567neglog2_2: .quad 0xbfd62e42fefa39ef // -ln(2)/2 568 569.literal4 570.align 2 571f256: .long 0x43800000 // 256.0f 572 573.text 574#if defined( __x86_64__ ) 575 #define RELATIVE_ADDR( _a) (_a)( %rip ) 576 #define INDEX %r8 577#elif defined( __i386__ ) 578 #define RELATIVE_ADDR( _a) (_a)-atanhf_body( CX_P ) 579 #define INDEX %edi 580.align 4 581atanhf_pic: 582 movl (%esp), %ecx // copy address of local_addr to %ecx 583 ret 584#else 585 #error arch not supported 586#endif 587 588ENTRY( atanhf ) 589#if defined( __i386__ ) 590 movl FRAME_SIZE( STACKP ), %eax 591 movss FRAME_SIZE( STACKP ), %xmm0 592 calll atanhf_pic 593atanhf_body: 594#else 595 movd %xmm0, %eax 596#endif 597 andl $0x7fffffff, %eax 598 movd %eax, %xmm1 // xmm1 <-- |x| 599 subl $0x3d000000, %eax // |x| - 1/32 as integers 600 cmpl $0x02600000, %eax // if (|x| < 1/32 or |x| >= 7/8 or isnan(x)) 601 jae 2f // goto 2 602 603 /* 604 (Set with tab = 4 spaces) 605 606 For 1/32 < x < 7/8: 607 608 Compute a = floor(x*256), set hi = a/256. a is an integer between 0 and 223. hi is an approximation to 609 x, with 610 611 (1) 0 < (x - a) < 1/256 612 613 We base our approximation on the identity 614 615 1 + x 616 (2) atanh(x) = 1/2 log ------- = 1/2 ( log(1+x) - log(1-x) ) 617 1 - x 618 619 Expand 1x as follows: 620 621 (1 + x) = (1 + hi)(1 - even + odd) (1 - x) = (1 - hi)(1 - even - odd) 622 623 and solve for b,c to get: 624 625 x - a 626 c = --------- b = -ac 627 1 - a^2 628 629 Substituting into (2), we get: 630 631 atanh(x) = 1/2 ( log(1 + hi) + log(1 + even + odd) - log(1 - hi) - log(1 + even - odd) ) 632 633 (3) = 1/2 atanh(hi) + 1/2 ( log(1 + even + odd) - log(1 + even - odd) ) 634 635 We can store atanh(hi), and also the 1/(1-hi^2) factor used to compute odd, in a lookup table indexed on a. 636 A fairly straighforward analysis using (1) and the fact that a < 7/8 shows that 637 638 |even|, |odd| < 1/60 639 640 So a 4th-order taylor series will approximate the second term of (3) to very nearly the desired accuracy. 641 When we compute the series, some really truly wonderful cancellation takes place, leaving us with a beautiful 642 approximation: 643 644 1/2 (log(1+even+odd) - log(1+even-odd)) = odd - odd*even + odd*even^2 + 1/3 odd^3 - odd*even^3 - odd^3*even 645 = odd((1 - even)(1 + even^2) + (1/3 - even)odd^2) 646 647 Unfortunately, this is not *quite* accurate enough - it results in errors of approximately .75 ulps. So we 648 tweak the approximation ever so slightly to achieve the necessary accuracy: 649 650 1/2 (log(1+even+odd) - log(1+even-odd)) = odd((1 - even)(1 + even^2) + 1.04(1/3.12 - even)odd^2) 651 652 Putting this all together, we get 653 654 (4) atanh(x) = atanh(a) + odd((1 - even)(1 + even^2) + (1/3 - 1.04 even)odd^2) + (< .51 ulp) 655 656 - Stephen Canon, July 2007 657 658 */ 659 660 xorps %xmm1, %xmm0 // xmm0 <-- signbit(x) 661 movaps %xmm1, %xmm2 662 mulss RELATIVE_ADDR(f256), %xmm1 // xmm1 <-- x * 256.0 663 cvtss2sd %xmm2, %xmm2 // xmm2 <-- (double)x 664 cvttss2si %xmm1, AX_P // trunc(x * 256.0) 665 cvtsi2sd %eax, %xmm1 // xmm1 <-- (double)trunc(x * 256.0) 666 667 shl $4, AX_P // stride of table is 16 bytes 668 mulsd RELATIVE_ADDR(one_256th), %xmm1 // xmm1 <-- hi = trunc(x * 256.0) / 256.0 669 lea RELATIVE_ADDR(small_table),DX_P 670 subsd %xmm1, %xmm2 // xmm2 <-- x - hi 671 movsd (DX_P,AX_P,1), %xmm7 // xmm7 <-- atanh(a) 672 mulsd 8(DX_P,AX_P,1), %xmm2 // xmm2 <-- odd 673 movsd RELATIVE_ADDR(minimax_c), %xmm3 // xmm3 <-- 1.04 674 movsd RELATIVE_ADDR(third_over_c),%xmm5 // xmm5 <-- 1/3.12 675 movsd RELATIVE_ADDR(one), %xmm6 // xmm6 <-- 1.0 676 mulsd %xmm2, %xmm1 // xmm1 <-- -even 677 mulsd %xmm2, %xmm3 // xmm3 <-- 1.04*odd 678 movapd %xmm1, %xmm4 // xmm4 <-- -even 679 mulsd %xmm1, %xmm1 // xmm1 <-- even*even 680 addsd %xmm4, %xmm5 // xmm5 <-- 1/3.12 - even 681 mulsd %xmm2, %xmm3 // xmm3 <-- 1.04*odd*odd 682 addsd %xmm6, %xmm4 // xmm4 <-- 1.0 - even 683 addsd %xmm6, %xmm1 // xmm1 <-- 1 + even*even 684 mulsd %xmm3, %xmm5 // xmm5 <-- (1/3 - 1.04even)*odd^2 685 mulsd %xmm1, %xmm4 // xmm4 <-- (1.0 - even)(1.0 + even^2) 686 addsd %xmm5, %xmm4 // xmm4 <-- (1.0 - even)(1.0 + even^2) + (1/3 - 1.04even)*odd^2 687 mulsd %xmm2, %xmm4 // xmm4 <-- odd*(1.0 - even)(1.0 + even^2) + (1/3 - 1.04even)*odd^2 688 addsd %xmm7, %xmm4 // xmm4 <-- atanh(x) 689 cvtsd2ss %xmm4, %xmm1 690 orps %xmm1, %xmm0 // signbit 691#if defined( __i386__ ) 692 movss %xmm0, FRAME_SIZE( STACKP ) 693 flds FRAME_SIZE( STACKP ) 694#endif 695 ret 696 6972: 698 cmpl $0x02800000, %eax // if (|x| < 1/32 or |x| >= 1 or isnan(x)) 699 jae 3f // goto 3 700 701 /* 702 (Set with tab = 4 spaces) 703 704 For 7/8 <= |x| < 1: 705 706 On this range, we use an approximation of the form: 707 708 (1) atanh(x) = -1/2 log((1-x)/2) - minimax polynomial in (1-x) 709 710 The second term of (1) is just a straightforward 4-term minimax polynomial. To compute the first term, consider 711 712 (1-x) = mantissa * 2^exponent 713 714 with 1.0 <= mantissa < 2. Set a = floor(256(mantissa - 1)), and let hi = a/256 and lo = (mantissa-(1+hi))/(1+hi). 715 Then: 716 717 (2) -1/2 log((1-x)/2) = -ln2/2 * (exponent-1) - 1/2 log(1+hi) - 1/2 log(1+lo) 718 719 The first term can be computed directly; the second term is looked up in the table indexed on a, and the final 720 term is approximated via another minimax polynomial. 721 722 - Stephen Canon, July 2007 723 724 */ 725 726 xorps %xmm1, %xmm0 // xmm0 <-- signbit(x) 727 cvtss2sd %xmm1, %xmm1 // xmm1 <-- (double)|x| 728 movsd RELATIVE_ADDR(one), %xmm2 // xmm2 <-- 1.0 729 movsd RELATIVE_ADDR(exp_mask), %xmm3 // xmm3 <-- 0x7ff0000000000000ULL 730 movapd %xmm2, %xmm4 // xmm4 <-- 1.0 731 subsd %xmm1, %xmm2 // xmm2 <-- 1.0 - x 732 movsd RELATIVE_ADDR(a_mask), %xmm5 // xmm5 <-- 0x7ffff00000000000ULL 733 andnpd %xmm2, %xmm3 734 movapd %xmm3, %xmm1 735 orpd %xmm4, %xmm3 // xmm3 <-- mantissa(1-x) 736 andpd %xmm3, %xmm5 // xmm5 <-- 1 + hi 737 738 movlhps %xmm2, %xmm3 739 740 subsd %xmm5, %xmm3 // xmm3 <-- mantissa(1-x) - (1+hi) 741 lea RELATIVE_ADDR(big_table), DX_P 742 psrlq $52-(8+4), %xmm1 // xmm1 <-- 8 bit integer a | 4 bit junk 743 movd %xmm1, %eax 744 and $0xff0, AX_P 745 mulsd 8(DX_P,AX_P,1), %xmm3 // xmm3 <-- u = [ lo , 1-x ] 746 movsd (DX_P,AX_P,1), %xmm1 // xmm1 <-- -1/2 log(1+hi) 747 748 movapd %xmm3, %xmm4 // xmm4 <-- u 749 mulpd %xmm3, %xmm3 // xmm3 <-- uu 750 movapd -48(DX_P), %xmm6 // xmm6 <-- c3/c4 751 movapd -32(DX_P), %xmm5 // xmm5 <-- c2 752 753 psrlq $52, %xmm2 // xmm2 <-- biased exponent of (1-x) 754 movd %xmm2, %eax 755 subl $0x400, %eax // unbiased exponent - 1 756 cvtsi2sd %eax, %xmm2 // xmm2 <-- unbiased exponent - 1 757 mulsd RELATIVE_ADDR(neglog2_2), %xmm2 // xmm2 <-- -ln2/2 * (exponent-1) 758 addsd %xmm2, %xmm1 // xmm1 <-- -ln2/2 * (exponent-1) - 1/2 log(1+hi) 759 760 mulpd %xmm4, %xmm6 // xmm6 <-- c3/c4 u 761 mulpd %xmm4, %xmm5 // xmm5 <-- c2 u 762 addpd -16(DX_P), %xmm4 // xmm4 <-- c1/c2 + u 763 addpd %xmm3, %xmm6 // xmm6 <-- c3/c4 u + uu 764 mulpd -64(DX_P), %xmm3 // xmm3 <-- c4uu 765 mulpd %xmm5, %xmm4 // xmm4 <-- c1 u + c2 u 766 mulpd %xmm3, %xmm6 // xmm3 <-- c3 uuu + c4 uuuu 767 768 // Now add up all the terms 769 addsd %xmm1, %xmm4 770 addpd %xmm6, %xmm4 771 movhlps %xmm4, %xmm1 772 addsd %xmm4, %xmm1 773 774 // Convert to single precision 775 cvtsd2ss %xmm1, %xmm1 776 orps %xmm1, %xmm0 // signbit 777#if defined( __i386__ ) 778 movss %xmm0, FRAME_SIZE( STACKP ) 779 flds FRAME_SIZE( STACKP ) 780#endif 781 ret 782 7833: // if (|x| >= 1 or isnan(x)) 784 jge 5f // goto 5 785 786 // Now we know |x| < 1/32 787 addl $0x3d000000, %eax // |x| 788 cmpl $0x38000000, %eax // if (|x| <= 2^-13) 789 jle 4f // goto 4 790 791 cvtss2sd %xmm0, %xmm1 792 movapd %xmm1, %xmm2 793 mulsd %xmm2, %xmm2 // xmm2 <-- x*x 794 movsd RELATIVE_ADDR(one_fifth), %xmm3 795 mulsd %xmm2, %xmm3 // xmm3 <-- .2xx 796 addsd RELATIVE_ADDR(one_third), %xmm3 // xmm3 <-- .333 + .2xx 797 mulsd %xmm3, %xmm2 // xmm2 <-- .333xx + .2xxxx 798 mulsd %xmm1, %xmm2 // xmm2 <-- .333xxx + .2xxxxx 799 addsd %xmm2, %xmm1 // xmm1 <-- x + .333xxx + .2xxxxx 800 cvtsd2ss %xmm1, %xmm0 801 802#if defined( __i386__ ) 803 movss %xmm0, FRAME_SIZE( STACKP ) 804 flds FRAME_SIZE( STACKP ) 805#endif 806 ret 807 8084: // very small case, just multiply by (1+ulp) and return 809 cvtss2sd %xmm0, %xmm1 810 mulsd RELATIVE_ADDR(one_plus_eps),%xmm1 // xmm1 <-- x * (1 + ulp) for rounding and inexact. 811 cvtsd2ss %xmm1, %xmm0 812#if defined( __i386__ ) 813 movss %xmm0, FRAME_SIZE( STACKP ) 814 flds FRAME_SIZE( STACKP ) 815#endif 816 ret 817 818 8195: // if x = 1 820 je 7f // goto 7 821 ucomiss %xmm0, %xmm0 // if isnan(x) 822 jp 6f // goto 6 823 824 pcmpeqb %xmm0, %xmm0 825 pslld $23, %xmm0 8266: 827 movaps %xmm0, %xmm1 828 subss %xmm1, %xmm0 829#if defined( __i386__ ) 830 movss %xmm0, FRAME_SIZE( STACKP ) 831 flds FRAME_SIZE( STACKP ) 832#endif 833 ret 834 8357: 836 xorps %xmm1, %xmm1 // xmm1 <-- 0 837 divss %xmm1, %xmm0 // xmm0 <-- 1/0 = inf, div by zero 838#if defined( __i386__ ) 839 movss %xmm0, FRAME_SIZE( STACKP ) 840 flds FRAME_SIZE( STACKP ) 841#endif 842 ret 843