this repo has no description
at fixPythonPipStalling 922 lines 48 kB view raw
1/* 2 * xmm_exp.c 3 * xmmLibm 4 * 5 * Created by Ian Ollmann, Ph.D. on 7/14/05. 6 * Copyright � 2005 Apple Computer, Inc. All rights reserved. 7 * 8 * Constants from original typing by Earl Killian of MIPS on March 23rd, 1992. 9 * Converted from pairs of 32-bit hexadecimal words to C99 notation, by Ian Ollmann 10 * 11 * Algorithm from Peter Tang: 12 * 13 * ACM Transactions on Mathematical Software, Vol 16, 4, December 1990, pp 378-400 14 * 15 * 16 */ 17 18#include "xmmLibm_prefix.h" 19 20#include "math.h" 21#include "fenv.h" 22 23//Functions implemented here 24static inline xDouble _xlog( xDouble x ) ALWAYS_INLINE; 25 26static const double A1[2] = { 27 0x1.5555555550286p-4, //0.0833333333330391334837 28 0x1.999A0BC712416p-7 //0.0125000531680985842164 29 }; 30 31static const double A2[4] = { 32 0x1.55555555554E6p-4, //0.083333333333331788273 33 0x1.9999999BAC6D4p-7, //0.0125000000037717509671 34 0x1.2492307F1519Fp-9, //0.00223213998791944824227 35 0x1.C8034C85DFFF0p-12 //0.000434887777707614574252 36 }; 37 38static const double C_lead[129] = { 39 0x0, // 0 40 0x1.FE02A6B100000p-8, //0.00778214044203195953742 41 0x1.FC0A8B0FC0000p-7, //0.0155041865359635266941 42 0x1.7B91B07D60000p-6, //0.0231670592816044518258 43 0x1.F829B0E780000p-6, //0.0307716586667083902285 44 0x1.39E87B9FE8000p-5, //0.0383188643020275776507 45 0x1.77458F6330000p-5, //0.0458095360313564015087 46 0x1.B42DD71198000p-5, //0.0532445145188376045553 47 0x1.F0A30C0118000p-5, //0.060624621816486978787 48 0x1.16536EEA38000p-4, //0.0679506619085259444546 49 0x1.341D7961BC000p-4, //0.075223421237524235039 50 0x1.51B073F060000p-4, //0.0824436692109884461388 51 0x1.6F0D28AE58000p-4, //0.0896121586897606903221 52 0x1.8C345D6318000p-4, //0.096729626458454731619 53 0x1.A926D3A4AC000p-4, //0.10379679368156757846 54 0x1.C5E548F5BC000p-4, //0.110814366340264314204 55 0x1.E27076E2B0000p-4, //0.117783035656430001836 56 0x1.FEC9131DC0000p-4, //0.12470347850103280507 57 0x1.0D77E7CD08000p-3, //0.131576357788617315236 58 0x1.1B72AD52F6000p-3, //0.138402322859064952354 59 0x1.29552F8200000p-3, //0.145182009844575077295 60 0x1.371FC201E8000p-3, //0.151916042025732167531 61 0x1.44D2B6CCB8000p-3, //0.158605030176659056451 62 0x1.526E5E3A1C000p-3, //0.165249572895390883787 63 0x1.5FF3070A7A000p-3, //0.171850256926745714736 64 0x1.6D60FE719E000p-3, //0.178407657472916980623 65 0x1.7AB890210E000p-3, //0.184922338494061477832 66 0x1.87FA06520C000p-3, //0.191394852999565046048 67 0x1.9525A9CF46000p-3, //0.197825743329985925811 68 0x1.A23BC1FE2C000p-3, //0.204215541428766300669 69 0x1.AF3C94E80C000p-3, //0.210564769107350002741 70 0x1.BC286742D8000p-3, //0.216873938300523150247 71 0x1.C8FF7C79AA000p-3, //0.223143551314251453732 72 0x1.D5C216B4FC000p-3, //0.229374101064877322642 73 0x1.E27076E2B0000p-3, //0.235566071312860003673 74 0x1.EF0ADCBDC6000p-3, //0.241719936887193398434 75 0x1.FB9186D5E4000p-3, //0.247836163904594286578 76 0x1.0402594B4D000p-2, //0.253915209980959843961 77 0x1.0A324E2739000p-2, //0.259957524436913445243 78 0x1.1058BF9AE5000p-2, //0.265963548497211377253 79 0x1.1675CABABA000p-2, //0.271933715483555715764 80 0x1.1C898C169A000p-2, //0.277868451003541849786 81 0x1.22941FBCF8000p-2, //0.283768173130738432519 82 0x1.2895A13DE8000p-2, //0.289633292582948342897 83 0x1.2E8E2BAE12000p-2, //0.295464212893875810551 84 0x1.347DD9A988000p-2, //0.301261330578199704178 85 0x1.3A64C55694000p-2, //0.307025035294827830512 86 0x1.404308686A000p-2, //0.312755710003784770379 87 0x1.4618BC21C6000p-2, //0.318453731118552241242 88 0x1.4BE5F95778000p-2, //0.324119468654316733591 89 0x1.51AAD872E0000p-2, //0.329753286372579168528 90 0x1.5767717456000p-2, //0.335355541921217081835 91 0x1.5D1BDBF581000p-2, //0.340926586970681455568 92 0x1.62C82F2B9C000p-2, //0.346466767346100823488 93 0x1.686C81E9B1000p-2, //0.351976423157111639739 94 0x1.6E08EAA2BA000p-2, //0.357455888921776931966 95 0x1.739D7F6BBD000p-2, //0.362905493689368086052 96 0x1.792A55FDD4000p-2, //0.368325561158599157352 97 0x1.7EAF83B82B000p-2, //0.373716409793587445165 98 0x1.842D1DA1E9000p-2, //0.379078352935039220029 99 0x1.89A3386C14000p-2, //0.384411698910298582632 100 0x1.8F11E87366000p-2, //0.3897167511399857176 101 0x1.947941C211000p-2, //0.394993808240769794793 102 0x1.99D958117E000p-2, //0.400243164127005002229 103 0x1.9F323ECBFA000p-2, //0.405465108108273852849 104 0x1.A484090E5C000p-2, //0.410659924985338875558 105 0x1.A9CEC9A9A1000p-2, //0.415827895143820569501 106 0x1.AF12932478000p-2, //0.420969294644237379543 107 0x1.B44F77BCC9000p-2, //0.426084395310908803367 108 0x1.B985896931000p-2, //0.43117346481835738814 109 0x1.BEB4D9DA72000p-2, //0.436236766774982243078 110 0x1.C3DD7A7CDB000p-2, //0.441274560804913562606 111 0x1.C8FF7C79AA000p-2, //0.446287102628502907464 112 0x1.CE1AF0B85F000p-2, //0.451274644139402880683 113 0x1.D32FE7E00F000p-2, //0.456237433481646803557 114 0x1.D83E7258A3000p-2, //0.461175715122180918115 115 0x1.DD46A04C1C000p-2, //0.466089729924533457961 116 0x1.E24881A7C7000p-2, //0.47097971521884574031 117 0x1.E744261D68000p-2, //0.475845904869856894948 118 0x1.EC399D2469000p-2, //0.480688529345798087888 119 0x1.F128F5FAF0000p-2, //0.485507815781602403149 120 0x1.F6123FA703000p-2, //0.490303988045297955978 121 0x1.FAF588F78F000p-2, //0.495077266797807169496 122 0x1.FFD2E0857F000p-2, //0.499827869556384030147 123 0x1.02552A5A5D000p-1, //0.50455601075236700126 124 0x1.04BDF9DA92800p-1, //0.509261901789841431309 125 0x1.0723E5C1CE000p-1, //0.513945751102255599108 126 0x1.0986F4F573800p-1, //0.518607764208127264283 127 0x1.0BE72E4252800p-1, //0.52324814376447648101 128 0x1.0E44985D1D000p-1, //0.527867089620940532768 129 0x1.109F39E2D5000p-1, //0.532464798869568767259 130 0x1.12F719593F000p-1, //0.537041465896891168086 131 0x1.154C3D2F4D800p-1, //0.541597282432803694974 132 0x1.179EABBD89800p-1, //0.546132437598089381936 133 0x1.19EE6B467C800p-1, //0.550647117952621556469 134 0x1.1C3B81F714000p-1, //0.555141507540611200966 135 0x1.1E85F5E704000p-1, //0.559615787935399566777 136 0x1.20CDCD192A800p-1, //0.564070138284705535625 137 0x1.23130D7BEC000p-1, //0.568504735352689749561 138 0x1.2555BCE98F800p-1, //0.572919753561791367247 139 0x1.2795E1289B000p-1, //0.57731536503479219391 140 0x1.29D37FEC2B000p-1, //0.581691739634607074549 141 0x1.2C0E9ED449000p-1, //0.586049045003619539784 142 0x1.2E47436E40000p-1, //0.590387446602107957006 143 0x1.307D7334F1000p-1, //0.594707107746671681525 144 0x1.32B1339122000p-1, //0.599008189646156097297 145 0x1.34E289D9CE000p-1, //0.603290851438032404985 146 0x1.37117B5474800p-1, //0.607555250224550036364 147 0x1.393E0D3562800p-1, //0.611801541105933210929 148 0x1.3B6844A000000p-1, //0.616029877215623855591 149 0x1.3D9026A715800p-1, //0.620240409751886545564 150 0x1.3FB5B84D17000p-1, //0.624433288011914555682 151 0x1.41D8FE8467000p-1, //0.628608659422297932906 152 0x1.43F9FE2F9D000p-1, //0.632766669571083184564 153 0x1.4618BC21C6000p-1, //0.636907462237104482483 154 0x1.48353D1EA8800p-1, //0.641031179420906482846 155 0x1.4A4F85DB04000p-1, //0.645137961373620782979 156 0x1.4C679AFCCF000p-1, //0.649227946625160257099 157 0x1.4E7D811B75800p-1, //0.653301272012640765752 158 0x1.50913CC016800p-1, //0.657358072708348117885 159 0x1.52A2D265BC800p-1, //0.661398482245431296178 160 0x1.54B2467999800p-1, //0.665422632545187298092 161 0x1.56BF9D5B3F000p-1, //0.669430653942526987521 162 0x1.58CADB5CD7800p-1, //0.673422675212123067467 163 0x1.5AD404C35A000p-1, //0.677398823591829568613 164 0x1.5CDB1DC6C1800p-1, //0.681359224807920327294 165 0x1.5EE02A9241800p-1, //0.685304003098963221419 166 0x1.60E32F4478800p-1, //0.689233281238784911693 167 0x1.62E42FEFA3800p-1, //0.693147180559890330187 168 }; 169 170static const double C_trail[129] = { 171 0.0, // 0 172 0x1.9E23F0DDA40E4p-46, //2.29894100462035112076e-14 173 0x1.F1E7CF6D3A69Cp-50, //1.72745674997061065553e-15 174 -0x1.3B955B602ACE4p-44, //-7.00735970431003565857e-14 175 0x1.980267C7E09E4p-45, //4.52981425779092882775e-14 176 0x1.EAFD480AD9015p-44, //1.09021543022033016421e-13 177 -0x1.181DCE586AF09p-44, //-6.21983419947579227529e-14 178 -0x1.C827AE5D6704Cp-46, //-2.53216894311744497863e-14 179 -0x1.D599E83368E91p-45, //-5.21362063913650408235e-14 180 -0x1.47C5E768FA309p-46, //-1.81950600301688149235e-14 181 0x1.1D09299837610p-44, //6.32906595872454402199e-14 182 0x1.83F69278E686Ap-44, //8.61451293608781447223e-14 183 -0x1.4B4641B664613p-44, //-7.35577021943502867846e-14 184 0x1.B20F5ACB42A66p-44, //9.63806765855227740728e-14 185 0x1.563650BD22A9Cp-44, //7.59863659719414144342e-14 186 0x1.D0C57585FBE06p-46, //2.57999912830699022513e-14 187 -0x1.A342C2AF0003Cp-45, //-4.65472974759844472568e-14 188 -0x1.54555D1AE6607p-44, //-7.55692068745133691756e-14 189 0x1.CB2CD2EE2F482p-44, //1.01957352237084734958e-13 190 0x1.E80A41811A396p-45, //5.41833313790089940464e-14 191 -0x1.5B967F4471DFCp-44, //-7.71800133682809851086e-14 192 0x1.EE8779B2D8ABCp-44, //1.09807540998552379211e-13 193 -0x1.70CC16135783Cp-46, //-2.04723578004619553937e-14 194 -0x1.790BA37FC5238p-44, //-8.37209109923591205585e-14 195 -0x1.8586F183BEBF2p-44, //-8.64923960721207091374e-14 196 -0x1.BC6E557134767p-44, //-9.86835038673494943912e-14 197 -0x1.BDB9072534A58p-45, //-4.94851676612509959777e-14 198 0x1.22120401202FCp-44, //6.44085615069689207389e-14 199 -0x1.297137D9F158Fp-44, //-6.60454487708238395939e-14 200 -0x1.539CD91DC9F0Bp-44, //-7.54091651195618882501e-14 201 -0x1.A4E633FCD9066p-52, //-3.65071888317905767114e-16 202 0x1.9AC53F39D121Cp-44, //9.12093724991498410553e-14 203 -0x1.7794F689F8434p-45, //-4.16979658452719528642e-14 204 -0x1.1BA91BBCA681Bp-45, //-3.14926506519148377243e-14 205 -0x1.A342C2AF0003Cp-44, //-9.30945949519688945136e-14 206 -0x1.B26B79C86AF24p-45, //-4.82302894299408858477e-14 207 -0x1.D572AAB993C87p-47, //-1.30297971733086634357e-14 208 0x1.036B89EF42D7Fp-48, //3.60017673263733462441e-15 209 0x1.C6BEE7EF4030Ep-47, //1.26217293988853160748e-14 210 -0x1.4AB9D817D52CDp-44, //-7.34359136986779711761e-14 211 0x1.8380E731F55C4p-44, //8.60430677280873279668e-14 212 -0x1.81410E5C62AFFp-44, //-8.55436000656632193091e-14 213 -0x1.A6976F5EB0963p-44, //-9.38341722366369999987e-14 214 0x1.A8D7AD24C13F0p-44, //9.43339818951269030846e-14 215 -0x1.67B1E99B72BD8p-45, //-3.99341638438784391272e-14 216 -0x1.5594DD4C58092p-45, //-3.7923164802093146798e-14 217 0x1.7A71CBCD735D0p-44, //8.40315630479242455758e-14 218 0x1.F8EF43049F7D3p-44, //1.1211800740360981983e-13 219 -0x1.3D82F484C84CCp-46, //-1.76254313121726620573e-14 220 -0x1.D7C92CD9AD824p-44, //-1.04757500587765412913e-13 221 -0x1.F4BD8DB0A7CC1p-44, //-1.11186713895593226425e-13 222 -0x1.64EAD9524D7CAp-44, //-7.92515783138655870267e-14 223 -0x1.8D6BDC9C7C238p-44, //-8.8245263321256400121e-14 224 0x1.E54BDBD7C8A98p-44, //1.07757430375726404546e-13 225 0x1.2BB110AF84054p-44, //6.65449164332479482515e-14 226 0x1.E38C139318D71p-46, //2.68422602858563731995e-14 227 0x1.A7389314FEB50p-52, //3.67085697163493829481e-16 228 0x1.E89F057691FEAp-44, //1.08495696229679121506e-13 229 -0x1.E4DA62D0C25ADp-49, //-3.36434401382552911606e-15 230 -0x1.3A2DB13AE687Cp-44, //-6.97616377035377091917e-14 231 0x1.2D5AD38C40882p-45, //3.3457102695440823738e-14 232 0x1.63BF0BB4EAB4Cp-45, //3.949577025210288071e-14 233 0x1.BEAE9337451F4p-44, //9.91833135258393974771e-14 234 0x1.1597525DD88F0p-47, //7.70470078193964863175e-15 235 -0x1.ED03525CA2643p-44, //-1.09470871366066397592e-13 236 -0x1.3D7500D6523C5p-44, //-7.04896239210974659112e-14 237 -0x1.ED9CADEC02B43p-44, //-1.09603887929539904968e-13 238 -0x1.E53BB31EED7A9p-44, //-1.07743414616095792458e-13 239 -0x1.3AE68224AA2CEp-47, //-8.74024251107295313295e-15 240 0x1.F6B31F629F11Ep-47, //1.39527194700992522593e-14 241 -0x1.21021E78B2151p-44, //-6.41727287881571093141e-14 242 -0x1.5946261F5A42Bp-45, //-3.83331165923754571982e-14 243 -0x1.7794F689F8434p-44, //-8.33959316905439057284e-14 244 0x1.F5BDBE95E5568p-45, //5.57044620824077391343e-14 245 -0x1.0AA7884DCD050p-44, //-5.92091761359114736879e-14 246 -0x1.835F5D48BA26Dp-47, //-1.07517471912360339462e-14 247 0x1.282FB989A9274p-44, //6.57665976858006147528e-14 248 -0x1.ECF1A1385D356p-45, //-5.47277630185806178777e-14 249 0x1.E1F8DF68DBCF3p-44, //1.07019317621142549209e-13 250 -0x1.9FF45188D6065p-45, //-4.61802117788209510607e-14 251 0x1.BB2CD720EC44Cp-44, //9.84046527823262695501e-14 252 -0x1.D4E7AEA4F0D25p-44, //-1.04117827384293371195e-13 253 0x1.8F6CD7D9F2754p-45, //4.43451018828153751026e-14 254 0x1.261565F40D932p-44, //6.52996738757825591006e-14 255 0x1.FD8D38D2BAFDDp-46, //2.8285798609067893876e-14 256 -0x1.2D9A033EFF74Ep-45, //-3.34845053941249831574e-14 257 -0x1.7F6350D38EDDDp-46, //-2.12823065872096837292e-14 258 -0x1.6FA37012B5806p-44, //-8.16321296891503919914e-14 259 0x1.415B4C4BDD99Fp-44, //7.13555066011812146304e-14 260 -0x1.BA048A8D10B4Bp-44, //-9.81476542529858082436e-14 261 -0x1.B4810E09B27A4p-44, //-9.69233849737002813365e-14 262 -0x1.0EB3FB7398E0Cp-47, //-7.51351912898166894415e-15 263 -0x1.0B2B38662E34Dp-44, //-5.93233971574446149215e-14 264 0x1.A0BFC60E6FA08p-45, //4.62684463909612350375e-14 265 0x1.6ECC5CBDD7782p-45, //4.07227907088846767786e-14 266 -0x1.EDA1B58389902p-44, //-1.09608250460592783688e-13 267 0x1.A07BD8B34BE7Cp-46, //2.3119493838005377632e-14 268 0x1.B6C9A81E87BAEp-44, //9.74304462767037064935e-14 269 -0x1.7AFA4392F1BA7p-46, //-2.10374825114449422873e-14 270 -0x1.A61FDE292977Ep-48, //-5.85815401264202219115e-15 271 0x1.1AEB783F3DB97p-45, //3.14104080050449590607e-14 272 0x1.1590B9AD974BAp-46, //1.54079711890856738893e-14 273 -0x1.74468563CE45Dp-45, //-4.13308801481084566682e-14 274 0x1.34202A10C3491p-44, //6.84176364159146659095e-14 275 0x1.7C3F6B2143EADp-46, //2.11079891578422983965e-14 276 -0x1.4766FD54A4C27p-44, //-7.26979150253512871478e-14 277 0x1.D316EB92D885Dp-45, //5.18573553063418286042e-14 278 -0x1.28E88BF6DEEC9p-47, //-8.24086314983113070392e-15 279 0x1.0CD4E221301B7p-44, //5.96926009653846962309e-14 280 -0x1.EEA838909F3D3p-44, //-1.0983594325438429833e-13 281 -0x1.055BFBD9C2F53p-45, //-2.90167125533596631915e-14 282 -0x1.7B4962C55F46Bp-46, //-2.10546393306435734475e-14 283 0x1.5732325E617A3p-44, //7.6204838231893709023e-14 284 -0x1.98858D84649F1p-45, //-4.53550186996774688761e-14 285 -0x1.3D82F484C84CCp-45, //-3.52508626243453241145e-14 286 0x1.BEE7ABD176604p-46, //2.48082091251967673736e-14 287 -0x1.44FDD840B8591p-45, //-3.60813136042255739798e-14 288 -0x1.C64E971322CE8p-45, //-5.04382083563449091526e-14 289 0x1.D84E584C2B22Cp-44, //1.04873006903856996874e-13 290 0x1.AD2F2CE96C2D6p-47, //1.19122567102055698791e-14 291 -0x1.2A88C41BA8752p-44, //-6.62879179039074743232e-14 292 -0x1.B42B755EBA5E1p-44, //-9.68491419671810783613e-14 293 0x1.CCA08E310B9B2p-44, //1.02279777907416200886e-13 294 0x1.893092F25D931p-45, //4.36528304869414810551e-14 295 -0x1.A609ACAAB41FCp-46, //-2.34278036379790696248e-14 296 -0x1.36E612387451Fp-46, //-1.72583456150091690167e-14 297 -0x1.8A8F29F6A02DCp-45, //-4.38048746232309815355e-14 298 0x1.B194F912B416Ap-46, //2.40686318405286681994e-14 299 0x1.EF35793C76730p-45, //5.49792301870837115524e-14 300 }; 301 302 303static const double T1 = 0x1.E0FABFBC702A3p-1; //0.939413062813475696622 // e^(-1/16) 304static const double T2 = 0x1.1082B577D34EEp+0; //1.06449445891785954288 // e^(+1/16) 305static const double tiny = 0x0.0000000000001p-1022; //4.94065645841246544177e-324 306static const xDouble plusinf = { INFINITY, 0.0 }; 307static const xDouble smallestNormal = { 0x1.0p-1022, 0 }; 308static const double denormBias = 0.0; 309static const xDouble one = { 1.0, 0.0 }; 310static const double d128 = 128.0; 311static const double d0_0078125 = 0.0078125; 312static const xSInt64 bias[2] = { {2045, 0}, {1023, 0} }; 313static const xSInt64 * const biasp = bias + 1; 314static const double mOne = -1.0; 315static const double two = 2.0; 316static const xSInt64 logNaN = { 0x7ff8000000000024LL, 0 }; 317static const xSInt64 logNaNf = { 0x7ff8000480000000LL, 0 }; 318 319//x must be finite, non-NaN, positive 320static inline xDouble mantissa( xDouble x, xDouble *exponent ) ALWAYS_INLINE; 321static inline xDouble mantissa( xDouble x, xDouble *exponent ) 322{ 323 324 xDouble mantissa = _mm_andnot_pd( plusinf, x ); // x & 0x800FFFFFFFFFFFFF (note: always positive) 325 xDouble isDenormal = _mm_cmplt_sdm( x, (double*) &smallestNormal ); // -1LL if denormal, 0 otherwise 326 xDouble denormExp = _mm_and_pd( one, isDenormal ); // 1.0 if denormal, 0.0 otherwise 327 int biasSelector = _mm_cvtsi128_si32( (__m128i) isDenormal ); // -1 if denormal, 0 otherwise 328 x = _mm_or_pd( x, denormExp ); // or in 1.0 as exponent if denormal 329 x = _mm_sub_sd( x, denormExp ); // subtract away 1.0 as exponent if denormal 330 mantissa = _mm_andnot_pd( plusinf, x ); // get the mantissa of x 331 xUInt64 iExp = (__m128i) _mm_and_pd( x, plusinf ); // extract the exponent of x 332 iExp = _mm_srli_epi64( iExp, 52 ); // right shift to make an integer 333 iExp = _mm_sub_epi64( iExp, biasp[ biasSelector ] ); // subtract away either 1023 or the denormal bias 334 *exponent = _mm_cvtepi32_pd( iExp ); // convert to double 335 return _mm_or_pd( mantissa, one ); // set the mantissa to have a range 1.0 <= mantissa < 2.0 336} 337 338static inline xDouble _xlog( xDouble x ) 339{ 340 static const double half = 0.5; 341 xDouble Y, F, f, f1, L_lead, L_trail, R, u, u1, u2, v, q, g, m; 342 int j; 343 344 xDouble xLET1 = _mm_cmple_sdm( x, &T1 ); 345 xDouble xGET2 = _mm_cmpge_sdm( x, &T2 ); 346 if( _mm_istrue_sd( _mm_or_pd( xLET1, xGET2 ) ) ) 347 { 348 Y = mantissa( x, &m ); 349 j = _mm_cvttsd_si32( _mm_add_sdm( _mm_mul_sdm( Y, &d128 ), &half) ); 350 F = _mm_mul_sdm( _mm_cvtsi32_sd( Y, j) , &d0_0078125 ); 351 j -= 128; 352 f = _mm_sub_sd( Y, F ); 353 L_lead = _mm_add_sdm( _mm_mul_sdm( m, &C_lead[128] ), &C_lead[j] ); 354 L_trail = _mm_add_sdm( _mm_mul_sdm( m, &C_trail[128] ), &C_trail[j] ); 355 u = _mm_div_sd( _mm_add_sd( f, f), _mm_add_sd( Y, F ) ); 356 v = _mm_mul_sd( u, u ); 357 q = _mm_add_sdm( _mm_mul_sdm( v, &A1[1] ), &A1[0] ); 358 q = _mm_mul_sd( q, v ); 359 q = _mm_mul_sd( q, u ); 360 R = _mm_add_sd( L_lead, _mm_add_sd( u , _mm_add_sd( q, L_trail ) ) ); 361 } 362 else 363 { 364 xDouble xEQone = _mm_cmpeq_sdm( x, (double*) &one ); 365 366 f = _mm_sub_sdm( x, (double*) &one ); 367 g = _mm_div_sd( one, _mm_add_sdm( f, &two ) ); 368 u = _mm_mul_sd( f, g ); u = _mm_add_sd( u, u ); //u = 2.0 * (f * g) 369 v = _mm_mul_sd( u, u ); 370 q = _mm_add_sdm( _mm_mul_sdm( v, &A2[3] ), &A2[2] ); 371 q = _mm_add_sdm( _mm_mul_sd( q, v ), &A2[1] ); 372 q = _mm_add_sdm( _mm_mul_sd( q, v ), &A2[0] ); 373 q = _mm_mul_sd( _mm_mul_sd( q, v ), u ); 374 u1 = _mm_cvtss_sd( u, _mm_cvtsd_ss( (xFloat) u, u ) ); 375 f1 = _mm_cvtss_sd( f, _mm_cvtsd_ss( (xFloat) f, f ) ); 376 377 // u2 = ( ( 2 * ( f - u1 ) - u1 * f1 ) - u1 * ( f - f1 ) ) * g; 378 xDouble t0 = _mm_sub_sd( f, u1 ); //f - u1 379 xDouble t1 = _mm_sub_sd( f, f1 ); //f - f1 380 xDouble t2 = _mm_mul_sd( u1, f1 ); //u1 * f1 381 t0 = _mm_add_sd( t0, t0 ); //2 * (f- u1) 382 t1 = _mm_mul_sd( t1, u1 ); //u1 * ( f - f1 ) 383 t0 = _mm_sub_sd( t0, t2 ); //2 * (f - u1) - u1 * f1 384 t0 = _mm_sub_sd( t0, t1 ); //(2 * (f - u1) - u1 * f1) - u1 * (f- f1) 385 u2 = _mm_mul_sd( t0, g ); //((2 * (f - u1) - u1 * f1) - u1 * (f- f1)) * g 386 R = _mm_add_sd( u2, q ); 387 R = _mm_add_sd( R, u1 ); 388 389 R = _mm_andnot_pd( xEQone, R ); 390 } 391 return R; 392} 393 394/* 395static inline double _xlog2( double x ) ALWAYS_INLINE; 396static inline double _xlog2( double x ) 397{ 398 static const double conversion = 1.0 / 6.9314718055994530942E-1; 399 xDouble r = _xlog( DOUBLE_2_XDOUBLE( x ) ); 400 r = _mm_mul_sdm( r, &conversion ); 401 return XDOUBLE_2_DOUBLE( r ); 402} 403*/ 404 405#pragma mark - 406#pragma mark log2 407 408#if defined( BUILDING_FOR_CARBONCORE_LEGACY ) 409 410#warning goofy use of scaled log() to do log2() 411double log2( double x ) 412{ 413 static const double conversion = 1.0 / 6.9314718055994530942E-1; 414 xDouble xd = DOUBLE_2_XDOUBLE( x ); 415 416 xDouble xIsNaN = _mm_cmpunord_sd( xd, xd ); 417 xDouble safeX = _mm_andnot_pd( xIsNaN, xd ); 418 419 xDouble xGTzero = _mm_cmpgt_sdm( safeX, (double*) &minusZeroD ); 420 xDouble xLTinf = _mm_cmplt_sdm( safeX, (double*) &plusinf ); 421 422 // mainline: 0 < x < inf 423 if( _mm_istrue_sd( _mm_and_pd( xGTzero, xLTinf ) ) ) { 424 xd = _xlog(xd); 425 xd = _mm_mul_sdm(xd, &conversion); 426 } 427 428 // edge cases 429 else { 430 if( _mm_istrue_sd( _mm_cmpeq_sd( xd, minusZeroD ) ) ) 431 xd = _mm_div_sd( _mm_load_sd( &two ), minusZeroD ); 432 else { 433 xDouble inf = plusinf; 434 xDouble isInf = _mm_cmpeq_sd( xd, inf ); 435 inf = _mm_andnot_pd( xIsNaN, inf ); 436 inf = _mm_andnot_pd( isInf, inf ); 437 438 xDouble xLTzero = _mm_cmplt_sdm( safeX, (double*) &minusZeroD ); 439 440 xd = _mm_sub_sd( xd, inf ); //silence NaN, set finite values to -Inf 441 xd = _mm_add_sd( xd, inf ); 442 xLTzero = _mm_and_pd( xLTzero, (xDouble) logNaN ); 443 xd = _mm_add_sd( xLTzero, xd ); //set to NaN if x < 0 444 } 445 } 446 447 // return 448 return XDOUBLE_2_DOUBLE( xd ); 449} 450 451#else 452 453/* 454float log2f( float x ) 455{ 456 //cheesy fallback on double, that probably fails to get the edge cases right. 457 static const double conversion = 1.0 / 6.9314718055994530942E-1; 458 xDouble xd = FLOAT_2_XDOUBLE( x ); 459 460 xDouble xIsNaN = _mm_cmpunord_sd( xd, xd ); 461 xDouble safeX = _mm_andnot_pd( xIsNaN, xd ); 462 463 xDouble xGTzero = _mm_cmpgt_sdm( safeX, (double*) &minusZeroD ); 464 xDouble xLTinf = _mm_cmplt_sdm( safeX, (double*) &plusinf ); 465 466 // mainline: 0 < x < inf 467 if( _mm_istrue_sd( _mm_and_pd( xGTzero, xLTinf ) ) ) { 468 xd = _xlog(xd); 469 xd = _mm_mul_sdm(xd, &conversion); 470 } 471 472 // edge cases 473 else { 474 if( _mm_istrue_sd( _mm_cmpeq_sd( xd, minusZeroD ) ) ) 475 xd = _mm_div_sd( _mm_load_sd( &two ), minusZeroD ); 476 else { 477 xDouble inf = plusinf; 478 xDouble isInf = _mm_cmpeq_sd( xd, inf ); 479 inf = _mm_andnot_pd( xIsNaN, inf ); 480 inf = _mm_andnot_pd( isInf, inf ); 481 482 xDouble xLTzero = _mm_cmplt_sdm( safeX, (double*) &minusZeroD ); 483 484 xd = _mm_sub_sd( xd, inf ); //silence NaN, set finite values to -Inf 485 xd = _mm_add_sd( xd, inf ); 486 xLTzero = _mm_and_pd( xLTzero, (xDouble) logNaNf ); 487 xd = _mm_add_sd( xLTzero, xd ); //set to NaN if x < 0 488 } 489 } 490 491 // return 492 return XDOUBLE_2_FLOAT( xd ); 493} 494*/ 495 496#pragma mark - 497#pragma mark log 498 499double log ( double x ) 500{ 501 xDouble xd = DOUBLE_2_XDOUBLE( x ); 502 503 xDouble xIsNaN = _mm_cmpunord_sd( xd, xd ); 504 xDouble safeX = _mm_andnot_pd( xIsNaN, xd ); 505 506 xDouble xGTzero = _mm_cmpgt_sdm( safeX, (double*) &minusZeroD ); 507 xDouble xLTinf = _mm_cmplt_sdm( safeX, (double*) &plusinf ); 508 509 // mainline: 0 < x < inf 510 if( _mm_istrue_sd( _mm_and_pd( xGTzero, xLTinf ) ) ) 511 xd = _xlog(xd); 512 513 // edge cases 514 else { 515 if( _mm_istrue_sd( _mm_cmpeq_sd( xd, minusZeroD ) ) ) 516 xd = _mm_div_sd( _mm_load_sd( &two ), minusZeroD ); 517 else { 518 xDouble inf = plusinf; 519 xDouble isInf = _mm_cmpeq_sd( xd, inf ); 520 inf = _mm_andnot_pd( xIsNaN, inf ); 521 inf = _mm_andnot_pd( isInf, inf ); 522 523 xDouble xLTzero = _mm_cmplt_sdm( safeX, (double*) &minusZeroD ); 524 525 xd = _mm_sub_sd( xd, inf ); //silence NaN, set finite values to -Inf 526 xd = _mm_add_sd( xd, inf ); 527 xLTzero = _mm_and_pd( xLTzero, (xDouble) logNaN ); 528 xd = _mm_add_sd( xLTzero, xd ); //set to NaN if x < 0 529 } 530 } 531 532 // return 533 return XDOUBLE_2_DOUBLE( xd ); 534} 535 536/* 537float logf( float x ) 538{ 539 //cheesy fallback on double, that probably fails to get the edge cases right. 540 xDouble xd = FLOAT_2_XDOUBLE( x ); 541 542 xDouble xIsNaN = _mm_cmpunord_sd( xd, xd ); 543 xDouble safeX = _mm_andnot_pd( xIsNaN, xd ); 544 545 xDouble xGTzero = _mm_cmpgt_sdm( safeX, (double*) &minusZeroD ); 546 xDouble xLTinf = _mm_cmplt_sdm( safeX, (double*) &plusinf ); 547 548 // mainline: 0 < x < inf 549 if( _mm_istrue_sd( _mm_and_pd( xGTzero, xLTinf ) ) ) 550 xd = _xlog(xd); 551 552 // edge cases 553 else { 554 if( _mm_istrue_sd( _mm_cmpeq_sd( xd, minusZeroD ) ) ) 555 xd = _mm_div_sd( _mm_load_sd( &two ), minusZeroD ); 556 else { 557 xDouble inf = plusinf; 558 xDouble isInf = _mm_cmpeq_sd( xd, inf ); 559 inf = _mm_andnot_pd( xIsNaN, inf ); 560 inf = _mm_andnot_pd( isInf, inf ); 561 562 xDouble xLTzero = _mm_cmplt_sdm( safeX, (double*) &minusZeroD ); 563 564 xd = _mm_sub_sd( xd, inf ); //silence NaN, set finite values to -Inf 565 xd = _mm_add_sd( xd, inf ); 566 xLTzero = _mm_and_pd( xLTzero, (xDouble) logNaNf ); 567 xd = _mm_add_sd( xLTzero, xd ); //set to NaN if x < 0 568 } 569 } 570 571 // return 572 return XDOUBLE_2_FLOAT( xd ); 573} 574*/ 575 576#pragma mark - 577#pragma mark log1p 578 579static inline xDouble _xlog1p( xDouble x ) ALWAYS_INLINE; 580 581static const double T1p = -0x1.F0540438FD5C4p-5; //-0.0605869371865242201114 // e^(-1/16) - 1 582static const double T2p = 0x1.082B577D34ED8p-4; //0.0644944589178594318568; // e^(+1/16) - 1 583static const double T4p = 0x1.0000000000000p-53; //1.11022302462515654042e-16; // 2^(-53) 584 585static inline xDouble _xlog1p( xDouble x ) 586{ 587 static const double mTwo = -2.0; 588 static const double x52 = 52.0; 589 static const double eight = 8.0; 590 static const double oneEighth = 0.125; 591 static const double twom1 = 0x1.0p-1; 592 static const double twom9 = 0x1.0p-9; 593 594 xDouble Y, F, f, f1, L_lead, L_trail, R, u, u1, u2, v, q, g, m; 595 xSInt64 im, imc; 596 int j; 597 598 xDouble xLEt1p = _mm_cmple_sdm( x, &T1p ); 599 xDouble xGEt2p = _mm_cmpge_sdm( x, &T2p ); 600 xDouble fabsX = _mm_andnot_pd( minusZeroD, x ); 601 xDouble bt = _mm_or_pd( xLEt1p, xGEt2p ); 602 603 if( _mm_istrue_sd( bt ) ) // x <= T1p || x >= T2p 604 { 605/* Y = mantissa(x + 1.0, &m); // m = logb(x+1); Y = scalb(x+1, -m); 606 j = (int)(128.0*Y + 0.5); 607 F = j*0.0078125; 608 j -= 128; 609 610 if ( m <= -2 ) 611 f = Y - F; 612 else if ( m <= 52 ) 613 f = ( scalb ( 1.0, - m ) - F ) + scalb ( x, - m ); 614 else 615 f = ( scalb ( x, - m ) - F ) + scalb ( 1.0, - m ); 616*/ 617 618 //We can simplify a number of ways here: 619 // 1) x + 1.0 cannot give us a denormal result. 620 // 2) (x+1.0) * 2**-8 cannot underfow. 621 // 3) In this particular case, x+1.0 is also always positive, so certain hacks 622 // that fail with negative numbers work here. 623 // 624 //Lets do this a different way. Don't reduce Y to a mantissa section until later. 625 //Just use x + 1.0 instead. We can extract out the top 8 bits with rounding by 626 //adding the exponent of (x+1) * 2-8 to x+1. Use a mask to trim off the remaining 627 //mantissa bits below the first 7. This is okay for the overflow case where the rounding 628 //value triggers a promotion in the exponent, since the leading bits in the mantissa are 629 //all zeros in this case, so using the wrong mask isn't a problem. Note that at this case, 630 //F and Y are scaled larger than the old code by 2**m. This means we can do the calculation 631 //without scalb above, and just use 1.0 or x where scalb ( 1.0, - m ) and scalb ( x, - m ) 632 //appear. 633 634 double fparts[4]; 635 int index; 636 Y = _mm_sub_sdm( x, &mOne ); 637 im = _mm_srli_epi64( (xSInt64) Y, 52 ); //Grab the exponent before we destroy it. Y is always positive, so we need only shift right 638 R = _mm_mul_sdm( Y, &twom9 ); //rounding value 639 imc = _mm_sub_epi64( im, bias[1] ); //m as an integer (eponent - 1023) 640 im = _mm_add_epi64( im, (xSInt64) bt ); //subtract 1 from im, to correct for 0.5 multiplication to follow 641 Y = _mm_mul_sdm( Y, &twom1 ); //divide Y by two so that the F calculation doesn't overflow to infinity. This cannot underflow. 642 R = _mm_and_pd( R, plusinf ); //trim off mantissa of the rounding value 643 m = _mm_cvtepi32_pd( imc ); //-m as a double 644 F = _mm_add_sd( Y, R ); //Round Y up to give F 645 x = _mm_mul_sdm( x, &twom1 ); //convert x to 0.5x (exact, unless x is denorm, in which case, it does not influence the answer) 646 F = (xDouble) _mm_srli_epi64( (xSInt64) F, 52-7 ); //truncate F 647 im = _mm_slli_epi64( im, 7 ); 648 im = _mm_sub_epi64( (xSInt64) F, im ); 649 F = (xDouble) _mm_slli_epi64( (xSInt64) F, 52-7 ); 650 j = _mm_cvtsi128_si32( im ); 651 652 //F now has the mantissa we want. Both Y and F are currently too large by 2**(m-1) 653 fparts[0] = 0.0; 654 fparts[1] = 0.5; 655 _mm_store_sd( &fparts[2], x ); 656 _mm_store_sd( &fparts[3], Y ); 657 xDouble mLEmtwo = _mm_cmple_sdm( m, &mTwo ); 658 xDouble mLE52 = _mm_cmple_sdm( m, &x52 ); 659 index = _mm_cvtsi128_si32( _mm_add_epi64( (xSInt64) mLEmtwo, (xSInt64) mLE52 ) ); //-2 for m <= -2, -1 for -2 < m <=52, 0 otherwise 660 661 f = _mm_sub_sdm( F, fparts + 2 + index ); // f = { F - 0, F - 0.5, F - x } 662 f = _mm_sub_sdm( f, fparts + 1 - index ); // f = { F - 0 - Y, F - 0.5 - x, F - x - 0.5 } 663 664 //f currently has the wrong exponent ( off by 2**(m-1), and the wrong sign ) 665 //However, the rest of the code looks like this: 666 // L_lead = m * C_lead[128].d + C_lead[j].d; 667 // L_trail = m * C_trail[128].d + C_trail[j].d; 668 // u = ( 2.0 * f ) / ( Y + F ); 669 // v = u * u; 670 // q = u * v * ( A1[0].d + v * A1[1].d ); 671 // R = L_lead + ( u + ( q + L_trail ) ); 672 // 673 // f is only used in one place and there it is divided by Y + F, which also are 674 // off by 2**(m-1), so it cancels out, presuming the intermediate terms don't overflow 675 // The maximum possible value of F is exactly 2**1023, and Y must be slightly less than that, so the 676 // denomenator cannot overflow. f is considerably smaller than F, so 2 * f cannot overflow. 677 L_lead = _mm_add_sdm( _mm_mul_sdm( m, C_lead + 128 ), C_lead + j ); 678 L_trail = _mm_add_sdm( _mm_mul_sdm( m, C_trail + 128 ), C_trail + j); 679 Y = _mm_add_sd( Y, F ); //Y + F, too large by 2**(m-1) 680 f = _mm_mul_sdm( f, &mTwo ); //2 * f, sign is correct now, too large by 2**(m-1) 681 u = _mm_div_sd( f, Y ); //u is correct 682 v = _mm_mul_sd( u, u ); 683 q = _mm_add_sdm( _mm_mul_sdm( v, A1 + 1 ), A1 ); 684 v = _mm_mul_sd( v, u ); 685 q = _mm_mul_sd( q, v ); 686 R = _mm_add_sd( q, L_trail ); 687 R = _mm_add_sd( R, u ); 688 R = _mm_add_sd( R, L_lead ); 689 } 690 else if( _mm_istrue_sd( _mm_cmple_sdm( fabsX, &T4p ) ) ) 691 { 692 xDouble xEQzero = _mm_cmpeq_sdm( x, (double*) &minusZeroD ); 693 xDouble ittyBit = _mm_load_sd( &tiny ); 694 ittyBit = _mm_andnot_pd( xEQzero, ittyBit ); 695 //R = ( 8.0 * x - tiny.d ) / 8.0; 696 R = _mm_mul_sdm( x, &eight ); 697 R = _mm_sub_sd( R, ittyBit ); 698 R = _mm_mul_sdm( R, &oneEighth ); 699 700 R = _mm_sel_pd( R, x, xEQzero ); //make sure the x == 0 case is zero 701 } 702 else 703 { 704 // g = 1.0 / ( 2.0 + x ); 705 g = _mm_div_sd( one, _mm_sub_sdm( x, &mTwo ) ); 706 // u = 2 * ( x * g ); 707 u = _mm_mul_sd( x, g ); 708 u = _mm_add_sd( u, u ); 709 // v = u * u; 710 v = _mm_mul_sd( u, u ); 711 // q = u * v * ( A2[0].d + v * ( A2[1].d + v * ( A2[2].d + v * A2[3].d ) ) ); 712 q = _mm_add_sdm( _mm_mul_sdm( v, A2 + 3 ), A2 + 2 ); 713 q = _mm_add_sdm( _mm_mul_sd( q, v ), A2 + 1 ); 714 q = _mm_add_sdm( _mm_mul_sd( q, v ), A2 + 0 ); 715 v = _mm_mul_sd( v, u ); 716 q = _mm_mul_sd( q, v ); 717 // u1 = ( float ) u; 718 u1 = _mm_cvtss_sd( u, _mm_cvtsd_ss( (xFloat) u, u ) ); 719 // f1 = ( float ) x; 720 f1 = _mm_cvtss_sd( x, _mm_cvtsd_ss( (xFloat) x, x ) ); 721 // u2 = ( ( 2 * ( x - u1 ) - u1 * f1 ) - u1 * ( x - f1 ) ) * g; 722 xDouble t0 = _mm_sub_sd( x, u1 ); //f - u1 723 xDouble t1 = _mm_sub_sd( x, f1 ); //f - f1 724 xDouble t2 = _mm_mul_sd( u1, f1 ); //u1 * f1 725 t0 = _mm_add_sd( t0, t0 ); //2 * (f- u1) 726 t1 = _mm_mul_sd( t1, u1 ); //u1 * ( f - f1 ) 727 t0 = _mm_sub_sd( t0, t2 ); //2 * (f - u1) - u1 * f1 728 t0 = _mm_sub_sd( t0, t1 ); //(2 * (f - u1) - u1 * f1) - u1 * (f- f1) 729 u2 = _mm_mul_sd( t0, g ); //((2 * (f - u1) - u1 * f1) - u1 * (f- f1)) * g 730 731 // R = u1 + ( u2 + q ); 732 R = _mm_add_sd( u2, q ); 733 R = _mm_add_sd( R, u1 ); 734 } 735 736 return R; 737} 738 739double log1p ( double x ) { 740 xDouble xd = DOUBLE_2_XDOUBLE( x ); 741 742 xDouble xIsNaN = _mm_cmpunord_sd( xd, xd ); 743 if( _mm_istrue_sd( xIsNaN ) ) 744 return XDOUBLE_2_DOUBLE( (xDouble)logNaN ); 745 746 xDouble xGTminusOne = _mm_cmpgt_sdm( xd, &mOne ); 747 xDouble xLTinfinity = _mm_cmplt_sdm( xd, (double*) &plusinf ); 748 749 // mainline: -1 < x < inf 750 if( _mm_istrue_sd( _mm_and_pd( xGTminusOne, xLTinfinity ) ) ) { 751 xd = _xlog1p( xd ); 752 } 753 754 // edge cases 755 else { 756 if( _mm_istrue_sd( _mm_cmpeq_sdm( xd, &mOne ) ) ) 757 xd = _mm_div_sd( _mm_load_sd( &two ), minusZeroD ); 758 else { 759 xDouble inf = plusinf; 760 xDouble isInf = _mm_cmpeq_sd( xd, inf ); 761 inf = _mm_andnot_pd( xIsNaN, inf ); 762 inf = _mm_andnot_pd( isInf, inf ); 763 764 xDouble xLTzero = _mm_cmplt_sdm( xd, (double*) &minusZeroD ); 765 766 xd = _mm_sub_sd( xd, inf ); //silence NaN, set finite values to -Inf 767 xd = _mm_add_sd( xd, inf ); 768 xLTzero = _mm_and_pd( xLTzero, (xDouble) logNaN ); 769 xd = _mm_add_sd( xLTzero, xd ); //set to NaN if x < 0 770 } 771 } 772 773 // return 774 return XDOUBLE_2_DOUBLE( xd ); 775} 776 777/* 778float log1pf( float x ) 779{ //cheesy fallback on double, that probably fails to get the edge cases right. 780 xDouble xd = FLOAT_2_XDOUBLE( x ); 781 782 xDouble xIsNaN = _mm_cmpunord_sd( xd, xd ); 783 if( _mm_istrue_sd( xIsNaN ) ) 784 return XDOUBLE_2_FLOAT( (xDouble)logNaNf ); 785 786 xDouble xGTminusOne = _mm_cmpgt_sdm( xd, &mOne ); 787 xDouble xLTinfinity = _mm_cmplt_sdm( xd, (double*) &plusinf ); 788 789 // mainline: -1 < x < inf 790 if( _mm_istrue_sd( _mm_and_pd( xGTminusOne, xLTinfinity ) ) ) { 791 xd = _xlog1p( xd ); 792 } 793 794 // edge cases 795 else { 796 if( _mm_istrue_sd( _mm_cmpeq_sdm( xd, &mOne ) ) ) 797 xd = _mm_div_sd( _mm_load_sd( &two ), minusZeroD ); 798 else { 799 xDouble inf = plusinf; 800 xDouble isInf = _mm_cmpeq_sd( xd, inf ); 801 inf = _mm_andnot_pd( xIsNaN, inf ); 802 inf = _mm_andnot_pd( isInf, inf ); 803 804 xDouble xLTzero = _mm_cmplt_sdm( xd, (double*) &minusZeroD ); 805 806 xd = _mm_sub_sd( xd, inf ); //silence NaN, set finite values to -Inf 807 xd = _mm_add_sd( xd, inf ); 808 xLTzero = _mm_and_pd( xLTzero, (xDouble) logNaNf ); 809 xd = _mm_add_sd( xLTzero, xd ); //set to NaN if x < 0 810 } 811 } 812 813 // return 814 return XDOUBLE_2_FLOAT( xd ); 815} 816*/ 817 818#pragma mark - 819#pragma mark log10 820 821static inline double _xlog10( double ) ALWAYS_INLINE; 822 823/* static xUInt64 LOGORITHMIC_NAN = { 0x7FF8000000000000ULL, 0 }; [sic!] */ 824 825static inline double _xlog10( double x ) 826{ 827 static const double half = 0.5; 828 static const double log10e = 0.434294481903251827651128918916605082; // log_10(e) 829 xDouble xx = DOUBLE_2_XDOUBLE( x ); 830 xDouble xIsNaN = _mm_cmpunord_sd( xx, xx ); 831 xDouble safeX = _mm_andnot_pd( xIsNaN, xx ); 832 xDouble xGTzero = _mm_cmpgt_sdm( safeX, (double*) &minusZeroD ); 833 xDouble xLTinf = _mm_cmplt_sdm( safeX, (double*) &plusinf ); 834 xDouble R; 835 836 if( _mm_istrue_sd( _mm_and_pd( xGTzero, xLTinf ) ) ) // x > 0 && x < inf 837 { 838 if( (x <= T1) || x >= T2 ) 839 { 840 xDouble m; 841 xDouble Y = mantissa( xx, &m ); 842 int j = _mm_cvttsd_si32( _mm_add_sdm( _mm_mul_sdm( Y, &d128 ), &half )); 843 xDouble F = _mm_mul_sdm( _mm_cvtsi32_sd( Y, j) , &d0_0078125 ); 844 j -= 128; 845 xDouble f = _mm_sub_sd( Y, F ); 846 xDouble L_lead = _mm_add_sdm( _mm_mul_sdm( m, &C_lead[128] ), &C_lead[j] ); 847 xDouble L_trail = _mm_add_sdm( _mm_mul_sdm( m, &C_trail[128] ), &C_trail[j] ); 848 xDouble u = _mm_div_sd( _mm_add_sd( f, f), _mm_add_sd( Y, F ) ); 849 xDouble v = _mm_mul_sd( u, u ); 850 xDouble q = _mm_add_sdm( _mm_mul_sdm( v, &A1[1] ), &A1[0] ); 851 q = _mm_mul_sd( q, v ); 852 q = _mm_mul_sd( q, u ); 853 R = _mm_add_sd( L_lead, _mm_add_sd( u , _mm_add_sd( q, L_trail ) ) ); 854 R = _mm_mul_sdm( R, &log10e ); 855 } 856 else 857 { 858 xDouble xEQone = _mm_cmpeq_sdm( xx, (double*) &one ); 859 860 xDouble f = _mm_sub_sdm( xx, (double*) &one ); 861 xDouble g = _mm_div_sd( one, _mm_add_sdm( f, &two ) ); 862 xDouble u = _mm_mul_sd( f, g ); u = _mm_add_sd( u, u ); //u = 2.0 * (f * g) 863 xDouble v = _mm_mul_sd( u, u ); 864 xDouble q = _mm_add_sdm( _mm_mul_sdm( v, &A2[3] ), &A2[2] ); 865 q = _mm_add_sdm( _mm_mul_sd( q, v ), &A2[1] ); 866 q = _mm_add_sdm( _mm_mul_sd( q, v ), &A2[0] ); 867 q = _mm_mul_sd( _mm_mul_sd( q, v ), u ); 868 xDouble u1 = _mm_cvtss_sd( u, _mm_cvtsd_ss( (xFloat) u, u ) ); 869 xDouble f1 = _mm_cvtss_sd( f, _mm_cvtsd_ss( (xFloat) f, f ) ); 870 871 // u2 = ( ( 2 * ( f - u1 ) - u1 * f1 ) - u1 * ( f - f1 ) ) * g; 872 xDouble t0 = _mm_sub_sd( f, u1 ); //f - u1 873 xDouble t1 = _mm_sub_sd( f, f1 ); //f - f1 874 xDouble t2 = _mm_mul_sd( u1, f1 ); //u1 * f1 875 t0 = _mm_add_sd( t0, t0 ); //2 * (f- u1) 876 t1 = _mm_mul_sd( t1, u1 ); //u1 * ( f - f1 ) 877 t0 = _mm_sub_sd( t0, t2 ); //2 * (f - u1) - u1 * f1 878 t0 = _mm_sub_sd( t0, t1 ); //(2 * (f - u1) - u1 * f1) - u1 * (f- f1) 879 xDouble u2 = _mm_mul_sd( t0, g ); //((2 * (f - u1) - u1 * f1) - u1 * (f- f1)) * g 880 R = _mm_add_sd( u2, q ); 881 R = _mm_add_sd( R, u1 ); 882 R = _mm_mul_sdm( R, &log10e ); 883 R = _mm_andnot_pd( xEQone, R ); 884 } 885 } 886 else 887 { //edge cases 888 if( _mm_istrue_sd( _mm_cmpeq_sd( xx, minusZeroD ) ) ) 889 R = _mm_div_sd( _mm_load_sd( &two ), minusZeroD ); 890 else 891 { 892 xDouble inf = plusinf; 893 xDouble isInf = _mm_cmpeq_sd( xx, inf ); 894 inf = _mm_andnot_pd( xIsNaN, inf ); 895 inf = _mm_andnot_pd( isInf, inf ); 896 897 xDouble xLTzero = _mm_cmplt_sdm( safeX, (double*) &minusZeroD ); 898 899 R = _mm_sub_sd( xx, inf ); //silence NaN, set finite values to -Inf 900 R = _mm_add_sd( R, inf ); 901 xLTzero = _mm_and_pd( xLTzero, (xDouble) logNaN ); 902 R = _mm_add_sd( xLTzero, R ); //set to NaN if x < 0 903 } 904 905 } 906 907 return XDOUBLE_2_DOUBLE( R ); 908} 909 910double log10 ( double x ) 911{ 912 return _xlog10( x ); 913} 914 915/* 916float log10f( float x ) 917{//cheesy fallback on double, that probably fails to get the edge cases right. 918 return (float) _xlog10( (double) x ); 919} 920*/ 921 922#endif /* defined( BUILDING_FOR_CARBONCORE_LEGACY ) */