this repo has no description
1/*
2 * xmm_exp.c
3 * xmmLibm
4 *
5 * Created by Ian Ollmann, Ph.D. on 7/14/05.
6 * Copyright � 2005 Apple Computer, Inc. All rights reserved.
7 *
8 * Constants from original typing by Earl Killian of MIPS on March 23rd, 1992.
9 * Converted from pairs of 32-bit hexadecimal words to C99 notation, by Ian Ollmann
10 *
11 * Algorithm from Peter Tang:
12 *
13 * ACM Transactions on Mathematical Software, Vol 16, 4, December 1990, pp 378-400
14 *
15 *
16 */
17
18#include "xmmLibm_prefix.h"
19
20#include "math.h"
21#include "fenv.h"
22
23//Functions implemented here
24static inline xDouble _xlog( xDouble x ) ALWAYS_INLINE;
25
26static const double A1[2] = {
27 0x1.5555555550286p-4, //0.0833333333330391334837
28 0x1.999A0BC712416p-7 //0.0125000531680985842164
29 };
30
31static const double A2[4] = {
32 0x1.55555555554E6p-4, //0.083333333333331788273
33 0x1.9999999BAC6D4p-7, //0.0125000000037717509671
34 0x1.2492307F1519Fp-9, //0.00223213998791944824227
35 0x1.C8034C85DFFF0p-12 //0.000434887777707614574252
36 };
37
38static const double C_lead[129] = {
39 0x0, // 0
40 0x1.FE02A6B100000p-8, //0.00778214044203195953742
41 0x1.FC0A8B0FC0000p-7, //0.0155041865359635266941
42 0x1.7B91B07D60000p-6, //0.0231670592816044518258
43 0x1.F829B0E780000p-6, //0.0307716586667083902285
44 0x1.39E87B9FE8000p-5, //0.0383188643020275776507
45 0x1.77458F6330000p-5, //0.0458095360313564015087
46 0x1.B42DD71198000p-5, //0.0532445145188376045553
47 0x1.F0A30C0118000p-5, //0.060624621816486978787
48 0x1.16536EEA38000p-4, //0.0679506619085259444546
49 0x1.341D7961BC000p-4, //0.075223421237524235039
50 0x1.51B073F060000p-4, //0.0824436692109884461388
51 0x1.6F0D28AE58000p-4, //0.0896121586897606903221
52 0x1.8C345D6318000p-4, //0.096729626458454731619
53 0x1.A926D3A4AC000p-4, //0.10379679368156757846
54 0x1.C5E548F5BC000p-4, //0.110814366340264314204
55 0x1.E27076E2B0000p-4, //0.117783035656430001836
56 0x1.FEC9131DC0000p-4, //0.12470347850103280507
57 0x1.0D77E7CD08000p-3, //0.131576357788617315236
58 0x1.1B72AD52F6000p-3, //0.138402322859064952354
59 0x1.29552F8200000p-3, //0.145182009844575077295
60 0x1.371FC201E8000p-3, //0.151916042025732167531
61 0x1.44D2B6CCB8000p-3, //0.158605030176659056451
62 0x1.526E5E3A1C000p-3, //0.165249572895390883787
63 0x1.5FF3070A7A000p-3, //0.171850256926745714736
64 0x1.6D60FE719E000p-3, //0.178407657472916980623
65 0x1.7AB890210E000p-3, //0.184922338494061477832
66 0x1.87FA06520C000p-3, //0.191394852999565046048
67 0x1.9525A9CF46000p-3, //0.197825743329985925811
68 0x1.A23BC1FE2C000p-3, //0.204215541428766300669
69 0x1.AF3C94E80C000p-3, //0.210564769107350002741
70 0x1.BC286742D8000p-3, //0.216873938300523150247
71 0x1.C8FF7C79AA000p-3, //0.223143551314251453732
72 0x1.D5C216B4FC000p-3, //0.229374101064877322642
73 0x1.E27076E2B0000p-3, //0.235566071312860003673
74 0x1.EF0ADCBDC6000p-3, //0.241719936887193398434
75 0x1.FB9186D5E4000p-3, //0.247836163904594286578
76 0x1.0402594B4D000p-2, //0.253915209980959843961
77 0x1.0A324E2739000p-2, //0.259957524436913445243
78 0x1.1058BF9AE5000p-2, //0.265963548497211377253
79 0x1.1675CABABA000p-2, //0.271933715483555715764
80 0x1.1C898C169A000p-2, //0.277868451003541849786
81 0x1.22941FBCF8000p-2, //0.283768173130738432519
82 0x1.2895A13DE8000p-2, //0.289633292582948342897
83 0x1.2E8E2BAE12000p-2, //0.295464212893875810551
84 0x1.347DD9A988000p-2, //0.301261330578199704178
85 0x1.3A64C55694000p-2, //0.307025035294827830512
86 0x1.404308686A000p-2, //0.312755710003784770379
87 0x1.4618BC21C6000p-2, //0.318453731118552241242
88 0x1.4BE5F95778000p-2, //0.324119468654316733591
89 0x1.51AAD872E0000p-2, //0.329753286372579168528
90 0x1.5767717456000p-2, //0.335355541921217081835
91 0x1.5D1BDBF581000p-2, //0.340926586970681455568
92 0x1.62C82F2B9C000p-2, //0.346466767346100823488
93 0x1.686C81E9B1000p-2, //0.351976423157111639739
94 0x1.6E08EAA2BA000p-2, //0.357455888921776931966
95 0x1.739D7F6BBD000p-2, //0.362905493689368086052
96 0x1.792A55FDD4000p-2, //0.368325561158599157352
97 0x1.7EAF83B82B000p-2, //0.373716409793587445165
98 0x1.842D1DA1E9000p-2, //0.379078352935039220029
99 0x1.89A3386C14000p-2, //0.384411698910298582632
100 0x1.8F11E87366000p-2, //0.3897167511399857176
101 0x1.947941C211000p-2, //0.394993808240769794793
102 0x1.99D958117E000p-2, //0.400243164127005002229
103 0x1.9F323ECBFA000p-2, //0.405465108108273852849
104 0x1.A484090E5C000p-2, //0.410659924985338875558
105 0x1.A9CEC9A9A1000p-2, //0.415827895143820569501
106 0x1.AF12932478000p-2, //0.420969294644237379543
107 0x1.B44F77BCC9000p-2, //0.426084395310908803367
108 0x1.B985896931000p-2, //0.43117346481835738814
109 0x1.BEB4D9DA72000p-2, //0.436236766774982243078
110 0x1.C3DD7A7CDB000p-2, //0.441274560804913562606
111 0x1.C8FF7C79AA000p-2, //0.446287102628502907464
112 0x1.CE1AF0B85F000p-2, //0.451274644139402880683
113 0x1.D32FE7E00F000p-2, //0.456237433481646803557
114 0x1.D83E7258A3000p-2, //0.461175715122180918115
115 0x1.DD46A04C1C000p-2, //0.466089729924533457961
116 0x1.E24881A7C7000p-2, //0.47097971521884574031
117 0x1.E744261D68000p-2, //0.475845904869856894948
118 0x1.EC399D2469000p-2, //0.480688529345798087888
119 0x1.F128F5FAF0000p-2, //0.485507815781602403149
120 0x1.F6123FA703000p-2, //0.490303988045297955978
121 0x1.FAF588F78F000p-2, //0.495077266797807169496
122 0x1.FFD2E0857F000p-2, //0.499827869556384030147
123 0x1.02552A5A5D000p-1, //0.50455601075236700126
124 0x1.04BDF9DA92800p-1, //0.509261901789841431309
125 0x1.0723E5C1CE000p-1, //0.513945751102255599108
126 0x1.0986F4F573800p-1, //0.518607764208127264283
127 0x1.0BE72E4252800p-1, //0.52324814376447648101
128 0x1.0E44985D1D000p-1, //0.527867089620940532768
129 0x1.109F39E2D5000p-1, //0.532464798869568767259
130 0x1.12F719593F000p-1, //0.537041465896891168086
131 0x1.154C3D2F4D800p-1, //0.541597282432803694974
132 0x1.179EABBD89800p-1, //0.546132437598089381936
133 0x1.19EE6B467C800p-1, //0.550647117952621556469
134 0x1.1C3B81F714000p-1, //0.555141507540611200966
135 0x1.1E85F5E704000p-1, //0.559615787935399566777
136 0x1.20CDCD192A800p-1, //0.564070138284705535625
137 0x1.23130D7BEC000p-1, //0.568504735352689749561
138 0x1.2555BCE98F800p-1, //0.572919753561791367247
139 0x1.2795E1289B000p-1, //0.57731536503479219391
140 0x1.29D37FEC2B000p-1, //0.581691739634607074549
141 0x1.2C0E9ED449000p-1, //0.586049045003619539784
142 0x1.2E47436E40000p-1, //0.590387446602107957006
143 0x1.307D7334F1000p-1, //0.594707107746671681525
144 0x1.32B1339122000p-1, //0.599008189646156097297
145 0x1.34E289D9CE000p-1, //0.603290851438032404985
146 0x1.37117B5474800p-1, //0.607555250224550036364
147 0x1.393E0D3562800p-1, //0.611801541105933210929
148 0x1.3B6844A000000p-1, //0.616029877215623855591
149 0x1.3D9026A715800p-1, //0.620240409751886545564
150 0x1.3FB5B84D17000p-1, //0.624433288011914555682
151 0x1.41D8FE8467000p-1, //0.628608659422297932906
152 0x1.43F9FE2F9D000p-1, //0.632766669571083184564
153 0x1.4618BC21C6000p-1, //0.636907462237104482483
154 0x1.48353D1EA8800p-1, //0.641031179420906482846
155 0x1.4A4F85DB04000p-1, //0.645137961373620782979
156 0x1.4C679AFCCF000p-1, //0.649227946625160257099
157 0x1.4E7D811B75800p-1, //0.653301272012640765752
158 0x1.50913CC016800p-1, //0.657358072708348117885
159 0x1.52A2D265BC800p-1, //0.661398482245431296178
160 0x1.54B2467999800p-1, //0.665422632545187298092
161 0x1.56BF9D5B3F000p-1, //0.669430653942526987521
162 0x1.58CADB5CD7800p-1, //0.673422675212123067467
163 0x1.5AD404C35A000p-1, //0.677398823591829568613
164 0x1.5CDB1DC6C1800p-1, //0.681359224807920327294
165 0x1.5EE02A9241800p-1, //0.685304003098963221419
166 0x1.60E32F4478800p-1, //0.689233281238784911693
167 0x1.62E42FEFA3800p-1, //0.693147180559890330187
168 };
169
170static const double C_trail[129] = {
171 0.0, // 0
172 0x1.9E23F0DDA40E4p-46, //2.29894100462035112076e-14
173 0x1.F1E7CF6D3A69Cp-50, //1.72745674997061065553e-15
174 -0x1.3B955B602ACE4p-44, //-7.00735970431003565857e-14
175 0x1.980267C7E09E4p-45, //4.52981425779092882775e-14
176 0x1.EAFD480AD9015p-44, //1.09021543022033016421e-13
177 -0x1.181DCE586AF09p-44, //-6.21983419947579227529e-14
178 -0x1.C827AE5D6704Cp-46, //-2.53216894311744497863e-14
179 -0x1.D599E83368E91p-45, //-5.21362063913650408235e-14
180 -0x1.47C5E768FA309p-46, //-1.81950600301688149235e-14
181 0x1.1D09299837610p-44, //6.32906595872454402199e-14
182 0x1.83F69278E686Ap-44, //8.61451293608781447223e-14
183 -0x1.4B4641B664613p-44, //-7.35577021943502867846e-14
184 0x1.B20F5ACB42A66p-44, //9.63806765855227740728e-14
185 0x1.563650BD22A9Cp-44, //7.59863659719414144342e-14
186 0x1.D0C57585FBE06p-46, //2.57999912830699022513e-14
187 -0x1.A342C2AF0003Cp-45, //-4.65472974759844472568e-14
188 -0x1.54555D1AE6607p-44, //-7.55692068745133691756e-14
189 0x1.CB2CD2EE2F482p-44, //1.01957352237084734958e-13
190 0x1.E80A41811A396p-45, //5.41833313790089940464e-14
191 -0x1.5B967F4471DFCp-44, //-7.71800133682809851086e-14
192 0x1.EE8779B2D8ABCp-44, //1.09807540998552379211e-13
193 -0x1.70CC16135783Cp-46, //-2.04723578004619553937e-14
194 -0x1.790BA37FC5238p-44, //-8.37209109923591205585e-14
195 -0x1.8586F183BEBF2p-44, //-8.64923960721207091374e-14
196 -0x1.BC6E557134767p-44, //-9.86835038673494943912e-14
197 -0x1.BDB9072534A58p-45, //-4.94851676612509959777e-14
198 0x1.22120401202FCp-44, //6.44085615069689207389e-14
199 -0x1.297137D9F158Fp-44, //-6.60454487708238395939e-14
200 -0x1.539CD91DC9F0Bp-44, //-7.54091651195618882501e-14
201 -0x1.A4E633FCD9066p-52, //-3.65071888317905767114e-16
202 0x1.9AC53F39D121Cp-44, //9.12093724991498410553e-14
203 -0x1.7794F689F8434p-45, //-4.16979658452719528642e-14
204 -0x1.1BA91BBCA681Bp-45, //-3.14926506519148377243e-14
205 -0x1.A342C2AF0003Cp-44, //-9.30945949519688945136e-14
206 -0x1.B26B79C86AF24p-45, //-4.82302894299408858477e-14
207 -0x1.D572AAB993C87p-47, //-1.30297971733086634357e-14
208 0x1.036B89EF42D7Fp-48, //3.60017673263733462441e-15
209 0x1.C6BEE7EF4030Ep-47, //1.26217293988853160748e-14
210 -0x1.4AB9D817D52CDp-44, //-7.34359136986779711761e-14
211 0x1.8380E731F55C4p-44, //8.60430677280873279668e-14
212 -0x1.81410E5C62AFFp-44, //-8.55436000656632193091e-14
213 -0x1.A6976F5EB0963p-44, //-9.38341722366369999987e-14
214 0x1.A8D7AD24C13F0p-44, //9.43339818951269030846e-14
215 -0x1.67B1E99B72BD8p-45, //-3.99341638438784391272e-14
216 -0x1.5594DD4C58092p-45, //-3.7923164802093146798e-14
217 0x1.7A71CBCD735D0p-44, //8.40315630479242455758e-14
218 0x1.F8EF43049F7D3p-44, //1.1211800740360981983e-13
219 -0x1.3D82F484C84CCp-46, //-1.76254313121726620573e-14
220 -0x1.D7C92CD9AD824p-44, //-1.04757500587765412913e-13
221 -0x1.F4BD8DB0A7CC1p-44, //-1.11186713895593226425e-13
222 -0x1.64EAD9524D7CAp-44, //-7.92515783138655870267e-14
223 -0x1.8D6BDC9C7C238p-44, //-8.8245263321256400121e-14
224 0x1.E54BDBD7C8A98p-44, //1.07757430375726404546e-13
225 0x1.2BB110AF84054p-44, //6.65449164332479482515e-14
226 0x1.E38C139318D71p-46, //2.68422602858563731995e-14
227 0x1.A7389314FEB50p-52, //3.67085697163493829481e-16
228 0x1.E89F057691FEAp-44, //1.08495696229679121506e-13
229 -0x1.E4DA62D0C25ADp-49, //-3.36434401382552911606e-15
230 -0x1.3A2DB13AE687Cp-44, //-6.97616377035377091917e-14
231 0x1.2D5AD38C40882p-45, //3.3457102695440823738e-14
232 0x1.63BF0BB4EAB4Cp-45, //3.949577025210288071e-14
233 0x1.BEAE9337451F4p-44, //9.91833135258393974771e-14
234 0x1.1597525DD88F0p-47, //7.70470078193964863175e-15
235 -0x1.ED03525CA2643p-44, //-1.09470871366066397592e-13
236 -0x1.3D7500D6523C5p-44, //-7.04896239210974659112e-14
237 -0x1.ED9CADEC02B43p-44, //-1.09603887929539904968e-13
238 -0x1.E53BB31EED7A9p-44, //-1.07743414616095792458e-13
239 -0x1.3AE68224AA2CEp-47, //-8.74024251107295313295e-15
240 0x1.F6B31F629F11Ep-47, //1.39527194700992522593e-14
241 -0x1.21021E78B2151p-44, //-6.41727287881571093141e-14
242 -0x1.5946261F5A42Bp-45, //-3.83331165923754571982e-14
243 -0x1.7794F689F8434p-44, //-8.33959316905439057284e-14
244 0x1.F5BDBE95E5568p-45, //5.57044620824077391343e-14
245 -0x1.0AA7884DCD050p-44, //-5.92091761359114736879e-14
246 -0x1.835F5D48BA26Dp-47, //-1.07517471912360339462e-14
247 0x1.282FB989A9274p-44, //6.57665976858006147528e-14
248 -0x1.ECF1A1385D356p-45, //-5.47277630185806178777e-14
249 0x1.E1F8DF68DBCF3p-44, //1.07019317621142549209e-13
250 -0x1.9FF45188D6065p-45, //-4.61802117788209510607e-14
251 0x1.BB2CD720EC44Cp-44, //9.84046527823262695501e-14
252 -0x1.D4E7AEA4F0D25p-44, //-1.04117827384293371195e-13
253 0x1.8F6CD7D9F2754p-45, //4.43451018828153751026e-14
254 0x1.261565F40D932p-44, //6.52996738757825591006e-14
255 0x1.FD8D38D2BAFDDp-46, //2.8285798609067893876e-14
256 -0x1.2D9A033EFF74Ep-45, //-3.34845053941249831574e-14
257 -0x1.7F6350D38EDDDp-46, //-2.12823065872096837292e-14
258 -0x1.6FA37012B5806p-44, //-8.16321296891503919914e-14
259 0x1.415B4C4BDD99Fp-44, //7.13555066011812146304e-14
260 -0x1.BA048A8D10B4Bp-44, //-9.81476542529858082436e-14
261 -0x1.B4810E09B27A4p-44, //-9.69233849737002813365e-14
262 -0x1.0EB3FB7398E0Cp-47, //-7.51351912898166894415e-15
263 -0x1.0B2B38662E34Dp-44, //-5.93233971574446149215e-14
264 0x1.A0BFC60E6FA08p-45, //4.62684463909612350375e-14
265 0x1.6ECC5CBDD7782p-45, //4.07227907088846767786e-14
266 -0x1.EDA1B58389902p-44, //-1.09608250460592783688e-13
267 0x1.A07BD8B34BE7Cp-46, //2.3119493838005377632e-14
268 0x1.B6C9A81E87BAEp-44, //9.74304462767037064935e-14
269 -0x1.7AFA4392F1BA7p-46, //-2.10374825114449422873e-14
270 -0x1.A61FDE292977Ep-48, //-5.85815401264202219115e-15
271 0x1.1AEB783F3DB97p-45, //3.14104080050449590607e-14
272 0x1.1590B9AD974BAp-46, //1.54079711890856738893e-14
273 -0x1.74468563CE45Dp-45, //-4.13308801481084566682e-14
274 0x1.34202A10C3491p-44, //6.84176364159146659095e-14
275 0x1.7C3F6B2143EADp-46, //2.11079891578422983965e-14
276 -0x1.4766FD54A4C27p-44, //-7.26979150253512871478e-14
277 0x1.D316EB92D885Dp-45, //5.18573553063418286042e-14
278 -0x1.28E88BF6DEEC9p-47, //-8.24086314983113070392e-15
279 0x1.0CD4E221301B7p-44, //5.96926009653846962309e-14
280 -0x1.EEA838909F3D3p-44, //-1.0983594325438429833e-13
281 -0x1.055BFBD9C2F53p-45, //-2.90167125533596631915e-14
282 -0x1.7B4962C55F46Bp-46, //-2.10546393306435734475e-14
283 0x1.5732325E617A3p-44, //7.6204838231893709023e-14
284 -0x1.98858D84649F1p-45, //-4.53550186996774688761e-14
285 -0x1.3D82F484C84CCp-45, //-3.52508626243453241145e-14
286 0x1.BEE7ABD176604p-46, //2.48082091251967673736e-14
287 -0x1.44FDD840B8591p-45, //-3.60813136042255739798e-14
288 -0x1.C64E971322CE8p-45, //-5.04382083563449091526e-14
289 0x1.D84E584C2B22Cp-44, //1.04873006903856996874e-13
290 0x1.AD2F2CE96C2D6p-47, //1.19122567102055698791e-14
291 -0x1.2A88C41BA8752p-44, //-6.62879179039074743232e-14
292 -0x1.B42B755EBA5E1p-44, //-9.68491419671810783613e-14
293 0x1.CCA08E310B9B2p-44, //1.02279777907416200886e-13
294 0x1.893092F25D931p-45, //4.36528304869414810551e-14
295 -0x1.A609ACAAB41FCp-46, //-2.34278036379790696248e-14
296 -0x1.36E612387451Fp-46, //-1.72583456150091690167e-14
297 -0x1.8A8F29F6A02DCp-45, //-4.38048746232309815355e-14
298 0x1.B194F912B416Ap-46, //2.40686318405286681994e-14
299 0x1.EF35793C76730p-45, //5.49792301870837115524e-14
300 };
301
302
303static const double T1 = 0x1.E0FABFBC702A3p-1; //0.939413062813475696622 // e^(-1/16)
304static const double T2 = 0x1.1082B577D34EEp+0; //1.06449445891785954288 // e^(+1/16)
305static const double tiny = 0x0.0000000000001p-1022; //4.94065645841246544177e-324
306static const xDouble plusinf = { INFINITY, 0.0 };
307static const xDouble smallestNormal = { 0x1.0p-1022, 0 };
308static const double denormBias = 0.0;
309static const xDouble one = { 1.0, 0.0 };
310static const double d128 = 128.0;
311static const double d0_0078125 = 0.0078125;
312static const xSInt64 bias[2] = { {2045, 0}, {1023, 0} };
313static const xSInt64 * const biasp = bias + 1;
314static const double mOne = -1.0;
315static const double two = 2.0;
316static const xSInt64 logNaN = { 0x7ff8000000000024LL, 0 };
317static const xSInt64 logNaNf = { 0x7ff8000480000000LL, 0 };
318
319//x must be finite, non-NaN, positive
320static inline xDouble mantissa( xDouble x, xDouble *exponent ) ALWAYS_INLINE;
321static inline xDouble mantissa( xDouble x, xDouble *exponent )
322{
323
324 xDouble mantissa = _mm_andnot_pd( plusinf, x ); // x & 0x800FFFFFFFFFFFFF (note: always positive)
325 xDouble isDenormal = _mm_cmplt_sdm( x, (double*) &smallestNormal ); // -1LL if denormal, 0 otherwise
326 xDouble denormExp = _mm_and_pd( one, isDenormal ); // 1.0 if denormal, 0.0 otherwise
327 int biasSelector = _mm_cvtsi128_si32( (__m128i) isDenormal ); // -1 if denormal, 0 otherwise
328 x = _mm_or_pd( x, denormExp ); // or in 1.0 as exponent if denormal
329 x = _mm_sub_sd( x, denormExp ); // subtract away 1.0 as exponent if denormal
330 mantissa = _mm_andnot_pd( plusinf, x ); // get the mantissa of x
331 xUInt64 iExp = (__m128i) _mm_and_pd( x, plusinf ); // extract the exponent of x
332 iExp = _mm_srli_epi64( iExp, 52 ); // right shift to make an integer
333 iExp = _mm_sub_epi64( iExp, biasp[ biasSelector ] ); // subtract away either 1023 or the denormal bias
334 *exponent = _mm_cvtepi32_pd( iExp ); // convert to double
335 return _mm_or_pd( mantissa, one ); // set the mantissa to have a range 1.0 <= mantissa < 2.0
336}
337
338static inline xDouble _xlog( xDouble x )
339{
340 static const double half = 0.5;
341 xDouble Y, F, f, f1, L_lead, L_trail, R, u, u1, u2, v, q, g, m;
342 int j;
343
344 xDouble xLET1 = _mm_cmple_sdm( x, &T1 );
345 xDouble xGET2 = _mm_cmpge_sdm( x, &T2 );
346 if( _mm_istrue_sd( _mm_or_pd( xLET1, xGET2 ) ) )
347 {
348 Y = mantissa( x, &m );
349 j = _mm_cvttsd_si32( _mm_add_sdm( _mm_mul_sdm( Y, &d128 ), &half) );
350 F = _mm_mul_sdm( _mm_cvtsi32_sd( Y, j) , &d0_0078125 );
351 j -= 128;
352 f = _mm_sub_sd( Y, F );
353 L_lead = _mm_add_sdm( _mm_mul_sdm( m, &C_lead[128] ), &C_lead[j] );
354 L_trail = _mm_add_sdm( _mm_mul_sdm( m, &C_trail[128] ), &C_trail[j] );
355 u = _mm_div_sd( _mm_add_sd( f, f), _mm_add_sd( Y, F ) );
356 v = _mm_mul_sd( u, u );
357 q = _mm_add_sdm( _mm_mul_sdm( v, &A1[1] ), &A1[0] );
358 q = _mm_mul_sd( q, v );
359 q = _mm_mul_sd( q, u );
360 R = _mm_add_sd( L_lead, _mm_add_sd( u , _mm_add_sd( q, L_trail ) ) );
361 }
362 else
363 {
364 xDouble xEQone = _mm_cmpeq_sdm( x, (double*) &one );
365
366 f = _mm_sub_sdm( x, (double*) &one );
367 g = _mm_div_sd( one, _mm_add_sdm( f, &two ) );
368 u = _mm_mul_sd( f, g ); u = _mm_add_sd( u, u ); //u = 2.0 * (f * g)
369 v = _mm_mul_sd( u, u );
370 q = _mm_add_sdm( _mm_mul_sdm( v, &A2[3] ), &A2[2] );
371 q = _mm_add_sdm( _mm_mul_sd( q, v ), &A2[1] );
372 q = _mm_add_sdm( _mm_mul_sd( q, v ), &A2[0] );
373 q = _mm_mul_sd( _mm_mul_sd( q, v ), u );
374 u1 = _mm_cvtss_sd( u, _mm_cvtsd_ss( (xFloat) u, u ) );
375 f1 = _mm_cvtss_sd( f, _mm_cvtsd_ss( (xFloat) f, f ) );
376
377 // u2 = ( ( 2 * ( f - u1 ) - u1 * f1 ) - u1 * ( f - f1 ) ) * g;
378 xDouble t0 = _mm_sub_sd( f, u1 ); //f - u1
379 xDouble t1 = _mm_sub_sd( f, f1 ); //f - f1
380 xDouble t2 = _mm_mul_sd( u1, f1 ); //u1 * f1
381 t0 = _mm_add_sd( t0, t0 ); //2 * (f- u1)
382 t1 = _mm_mul_sd( t1, u1 ); //u1 * ( f - f1 )
383 t0 = _mm_sub_sd( t0, t2 ); //2 * (f - u1) - u1 * f1
384 t0 = _mm_sub_sd( t0, t1 ); //(2 * (f - u1) - u1 * f1) - u1 * (f- f1)
385 u2 = _mm_mul_sd( t0, g ); //((2 * (f - u1) - u1 * f1) - u1 * (f- f1)) * g
386 R = _mm_add_sd( u2, q );
387 R = _mm_add_sd( R, u1 );
388
389 R = _mm_andnot_pd( xEQone, R );
390 }
391 return R;
392}
393
394/*
395static inline double _xlog2( double x ) ALWAYS_INLINE;
396static inline double _xlog2( double x )
397{
398 static const double conversion = 1.0 / 6.9314718055994530942E-1;
399 xDouble r = _xlog( DOUBLE_2_XDOUBLE( x ) );
400 r = _mm_mul_sdm( r, &conversion );
401 return XDOUBLE_2_DOUBLE( r );
402}
403*/
404
405#pragma mark -
406#pragma mark log2
407
408#if defined( BUILDING_FOR_CARBONCORE_LEGACY )
409
410#warning goofy use of scaled log() to do log2()
411double log2( double x )
412{
413 static const double conversion = 1.0 / 6.9314718055994530942E-1;
414 xDouble xd = DOUBLE_2_XDOUBLE( x );
415
416 xDouble xIsNaN = _mm_cmpunord_sd( xd, xd );
417 xDouble safeX = _mm_andnot_pd( xIsNaN, xd );
418
419 xDouble xGTzero = _mm_cmpgt_sdm( safeX, (double*) &minusZeroD );
420 xDouble xLTinf = _mm_cmplt_sdm( safeX, (double*) &plusinf );
421
422 // mainline: 0 < x < inf
423 if( _mm_istrue_sd( _mm_and_pd( xGTzero, xLTinf ) ) ) {
424 xd = _xlog(xd);
425 xd = _mm_mul_sdm(xd, &conversion);
426 }
427
428 // edge cases
429 else {
430 if( _mm_istrue_sd( _mm_cmpeq_sd( xd, minusZeroD ) ) )
431 xd = _mm_div_sd( _mm_load_sd( &two ), minusZeroD );
432 else {
433 xDouble inf = plusinf;
434 xDouble isInf = _mm_cmpeq_sd( xd, inf );
435 inf = _mm_andnot_pd( xIsNaN, inf );
436 inf = _mm_andnot_pd( isInf, inf );
437
438 xDouble xLTzero = _mm_cmplt_sdm( safeX, (double*) &minusZeroD );
439
440 xd = _mm_sub_sd( xd, inf ); //silence NaN, set finite values to -Inf
441 xd = _mm_add_sd( xd, inf );
442 xLTzero = _mm_and_pd( xLTzero, (xDouble) logNaN );
443 xd = _mm_add_sd( xLTzero, xd ); //set to NaN if x < 0
444 }
445 }
446
447 // return
448 return XDOUBLE_2_DOUBLE( xd );
449}
450
451#else
452
453/*
454float log2f( float x )
455{
456 //cheesy fallback on double, that probably fails to get the edge cases right.
457 static const double conversion = 1.0 / 6.9314718055994530942E-1;
458 xDouble xd = FLOAT_2_XDOUBLE( x );
459
460 xDouble xIsNaN = _mm_cmpunord_sd( xd, xd );
461 xDouble safeX = _mm_andnot_pd( xIsNaN, xd );
462
463 xDouble xGTzero = _mm_cmpgt_sdm( safeX, (double*) &minusZeroD );
464 xDouble xLTinf = _mm_cmplt_sdm( safeX, (double*) &plusinf );
465
466 // mainline: 0 < x < inf
467 if( _mm_istrue_sd( _mm_and_pd( xGTzero, xLTinf ) ) ) {
468 xd = _xlog(xd);
469 xd = _mm_mul_sdm(xd, &conversion);
470 }
471
472 // edge cases
473 else {
474 if( _mm_istrue_sd( _mm_cmpeq_sd( xd, minusZeroD ) ) )
475 xd = _mm_div_sd( _mm_load_sd( &two ), minusZeroD );
476 else {
477 xDouble inf = plusinf;
478 xDouble isInf = _mm_cmpeq_sd( xd, inf );
479 inf = _mm_andnot_pd( xIsNaN, inf );
480 inf = _mm_andnot_pd( isInf, inf );
481
482 xDouble xLTzero = _mm_cmplt_sdm( safeX, (double*) &minusZeroD );
483
484 xd = _mm_sub_sd( xd, inf ); //silence NaN, set finite values to -Inf
485 xd = _mm_add_sd( xd, inf );
486 xLTzero = _mm_and_pd( xLTzero, (xDouble) logNaNf );
487 xd = _mm_add_sd( xLTzero, xd ); //set to NaN if x < 0
488 }
489 }
490
491 // return
492 return XDOUBLE_2_FLOAT( xd );
493}
494*/
495
496#pragma mark -
497#pragma mark log
498
499double log ( double x )
500{
501 xDouble xd = DOUBLE_2_XDOUBLE( x );
502
503 xDouble xIsNaN = _mm_cmpunord_sd( xd, xd );
504 xDouble safeX = _mm_andnot_pd( xIsNaN, xd );
505
506 xDouble xGTzero = _mm_cmpgt_sdm( safeX, (double*) &minusZeroD );
507 xDouble xLTinf = _mm_cmplt_sdm( safeX, (double*) &plusinf );
508
509 // mainline: 0 < x < inf
510 if( _mm_istrue_sd( _mm_and_pd( xGTzero, xLTinf ) ) )
511 xd = _xlog(xd);
512
513 // edge cases
514 else {
515 if( _mm_istrue_sd( _mm_cmpeq_sd( xd, minusZeroD ) ) )
516 xd = _mm_div_sd( _mm_load_sd( &two ), minusZeroD );
517 else {
518 xDouble inf = plusinf;
519 xDouble isInf = _mm_cmpeq_sd( xd, inf );
520 inf = _mm_andnot_pd( xIsNaN, inf );
521 inf = _mm_andnot_pd( isInf, inf );
522
523 xDouble xLTzero = _mm_cmplt_sdm( safeX, (double*) &minusZeroD );
524
525 xd = _mm_sub_sd( xd, inf ); //silence NaN, set finite values to -Inf
526 xd = _mm_add_sd( xd, inf );
527 xLTzero = _mm_and_pd( xLTzero, (xDouble) logNaN );
528 xd = _mm_add_sd( xLTzero, xd ); //set to NaN if x < 0
529 }
530 }
531
532 // return
533 return XDOUBLE_2_DOUBLE( xd );
534}
535
536/*
537float logf( float x )
538{
539 //cheesy fallback on double, that probably fails to get the edge cases right.
540 xDouble xd = FLOAT_2_XDOUBLE( x );
541
542 xDouble xIsNaN = _mm_cmpunord_sd( xd, xd );
543 xDouble safeX = _mm_andnot_pd( xIsNaN, xd );
544
545 xDouble xGTzero = _mm_cmpgt_sdm( safeX, (double*) &minusZeroD );
546 xDouble xLTinf = _mm_cmplt_sdm( safeX, (double*) &plusinf );
547
548 // mainline: 0 < x < inf
549 if( _mm_istrue_sd( _mm_and_pd( xGTzero, xLTinf ) ) )
550 xd = _xlog(xd);
551
552 // edge cases
553 else {
554 if( _mm_istrue_sd( _mm_cmpeq_sd( xd, minusZeroD ) ) )
555 xd = _mm_div_sd( _mm_load_sd( &two ), minusZeroD );
556 else {
557 xDouble inf = plusinf;
558 xDouble isInf = _mm_cmpeq_sd( xd, inf );
559 inf = _mm_andnot_pd( xIsNaN, inf );
560 inf = _mm_andnot_pd( isInf, inf );
561
562 xDouble xLTzero = _mm_cmplt_sdm( safeX, (double*) &minusZeroD );
563
564 xd = _mm_sub_sd( xd, inf ); //silence NaN, set finite values to -Inf
565 xd = _mm_add_sd( xd, inf );
566 xLTzero = _mm_and_pd( xLTzero, (xDouble) logNaNf );
567 xd = _mm_add_sd( xLTzero, xd ); //set to NaN if x < 0
568 }
569 }
570
571 // return
572 return XDOUBLE_2_FLOAT( xd );
573}
574*/
575
576#pragma mark -
577#pragma mark log1p
578
579static inline xDouble _xlog1p( xDouble x ) ALWAYS_INLINE;
580
581static const double T1p = -0x1.F0540438FD5C4p-5; //-0.0605869371865242201114 // e^(-1/16) - 1
582static const double T2p = 0x1.082B577D34ED8p-4; //0.0644944589178594318568; // e^(+1/16) - 1
583static const double T4p = 0x1.0000000000000p-53; //1.11022302462515654042e-16; // 2^(-53)
584
585static inline xDouble _xlog1p( xDouble x )
586{
587 static const double mTwo = -2.0;
588 static const double x52 = 52.0;
589 static const double eight = 8.0;
590 static const double oneEighth = 0.125;
591 static const double twom1 = 0x1.0p-1;
592 static const double twom9 = 0x1.0p-9;
593
594 xDouble Y, F, f, f1, L_lead, L_trail, R, u, u1, u2, v, q, g, m;
595 xSInt64 im, imc;
596 int j;
597
598 xDouble xLEt1p = _mm_cmple_sdm( x, &T1p );
599 xDouble xGEt2p = _mm_cmpge_sdm( x, &T2p );
600 xDouble fabsX = _mm_andnot_pd( minusZeroD, x );
601 xDouble bt = _mm_or_pd( xLEt1p, xGEt2p );
602
603 if( _mm_istrue_sd( bt ) ) // x <= T1p || x >= T2p
604 {
605/* Y = mantissa(x + 1.0, &m); // m = logb(x+1); Y = scalb(x+1, -m);
606 j = (int)(128.0*Y + 0.5);
607 F = j*0.0078125;
608 j -= 128;
609
610 if ( m <= -2 )
611 f = Y - F;
612 else if ( m <= 52 )
613 f = ( scalb ( 1.0, - m ) - F ) + scalb ( x, - m );
614 else
615 f = ( scalb ( x, - m ) - F ) + scalb ( 1.0, - m );
616*/
617
618 //We can simplify a number of ways here:
619 // 1) x + 1.0 cannot give us a denormal result.
620 // 2) (x+1.0) * 2**-8 cannot underfow.
621 // 3) In this particular case, x+1.0 is also always positive, so certain hacks
622 // that fail with negative numbers work here.
623 //
624 //Lets do this a different way. Don't reduce Y to a mantissa section until later.
625 //Just use x + 1.0 instead. We can extract out the top 8 bits with rounding by
626 //adding the exponent of (x+1) * 2-8 to x+1. Use a mask to trim off the remaining
627 //mantissa bits below the first 7. This is okay for the overflow case where the rounding
628 //value triggers a promotion in the exponent, since the leading bits in the mantissa are
629 //all zeros in this case, so using the wrong mask isn't a problem. Note that at this case,
630 //F and Y are scaled larger than the old code by 2**m. This means we can do the calculation
631 //without scalb above, and just use 1.0 or x where scalb ( 1.0, - m ) and scalb ( x, - m )
632 //appear.
633
634 double fparts[4];
635 int index;
636 Y = _mm_sub_sdm( x, &mOne );
637 im = _mm_srli_epi64( (xSInt64) Y, 52 ); //Grab the exponent before we destroy it. Y is always positive, so we need only shift right
638 R = _mm_mul_sdm( Y, &twom9 ); //rounding value
639 imc = _mm_sub_epi64( im, bias[1] ); //m as an integer (eponent - 1023)
640 im = _mm_add_epi64( im, (xSInt64) bt ); //subtract 1 from im, to correct for 0.5 multiplication to follow
641 Y = _mm_mul_sdm( Y, &twom1 ); //divide Y by two so that the F calculation doesn't overflow to infinity. This cannot underflow.
642 R = _mm_and_pd( R, plusinf ); //trim off mantissa of the rounding value
643 m = _mm_cvtepi32_pd( imc ); //-m as a double
644 F = _mm_add_sd( Y, R ); //Round Y up to give F
645 x = _mm_mul_sdm( x, &twom1 ); //convert x to 0.5x (exact, unless x is denorm, in which case, it does not influence the answer)
646 F = (xDouble) _mm_srli_epi64( (xSInt64) F, 52-7 ); //truncate F
647 im = _mm_slli_epi64( im, 7 );
648 im = _mm_sub_epi64( (xSInt64) F, im );
649 F = (xDouble) _mm_slli_epi64( (xSInt64) F, 52-7 );
650 j = _mm_cvtsi128_si32( im );
651
652 //F now has the mantissa we want. Both Y and F are currently too large by 2**(m-1)
653 fparts[0] = 0.0;
654 fparts[1] = 0.5;
655 _mm_store_sd( &fparts[2], x );
656 _mm_store_sd( &fparts[3], Y );
657 xDouble mLEmtwo = _mm_cmple_sdm( m, &mTwo );
658 xDouble mLE52 = _mm_cmple_sdm( m, &x52 );
659 index = _mm_cvtsi128_si32( _mm_add_epi64( (xSInt64) mLEmtwo, (xSInt64) mLE52 ) ); //-2 for m <= -2, -1 for -2 < m <=52, 0 otherwise
660
661 f = _mm_sub_sdm( F, fparts + 2 + index ); // f = { F - 0, F - 0.5, F - x }
662 f = _mm_sub_sdm( f, fparts + 1 - index ); // f = { F - 0 - Y, F - 0.5 - x, F - x - 0.5 }
663
664 //f currently has the wrong exponent ( off by 2**(m-1), and the wrong sign )
665 //However, the rest of the code looks like this:
666 // L_lead = m * C_lead[128].d + C_lead[j].d;
667 // L_trail = m * C_trail[128].d + C_trail[j].d;
668 // u = ( 2.0 * f ) / ( Y + F );
669 // v = u * u;
670 // q = u * v * ( A1[0].d + v * A1[1].d );
671 // R = L_lead + ( u + ( q + L_trail ) );
672 //
673 // f is only used in one place and there it is divided by Y + F, which also are
674 // off by 2**(m-1), so it cancels out, presuming the intermediate terms don't overflow
675 // The maximum possible value of F is exactly 2**1023, and Y must be slightly less than that, so the
676 // denomenator cannot overflow. f is considerably smaller than F, so 2 * f cannot overflow.
677 L_lead = _mm_add_sdm( _mm_mul_sdm( m, C_lead + 128 ), C_lead + j );
678 L_trail = _mm_add_sdm( _mm_mul_sdm( m, C_trail + 128 ), C_trail + j);
679 Y = _mm_add_sd( Y, F ); //Y + F, too large by 2**(m-1)
680 f = _mm_mul_sdm( f, &mTwo ); //2 * f, sign is correct now, too large by 2**(m-1)
681 u = _mm_div_sd( f, Y ); //u is correct
682 v = _mm_mul_sd( u, u );
683 q = _mm_add_sdm( _mm_mul_sdm( v, A1 + 1 ), A1 );
684 v = _mm_mul_sd( v, u );
685 q = _mm_mul_sd( q, v );
686 R = _mm_add_sd( q, L_trail );
687 R = _mm_add_sd( R, u );
688 R = _mm_add_sd( R, L_lead );
689 }
690 else if( _mm_istrue_sd( _mm_cmple_sdm( fabsX, &T4p ) ) )
691 {
692 xDouble xEQzero = _mm_cmpeq_sdm( x, (double*) &minusZeroD );
693 xDouble ittyBit = _mm_load_sd( &tiny );
694 ittyBit = _mm_andnot_pd( xEQzero, ittyBit );
695 //R = ( 8.0 * x - tiny.d ) / 8.0;
696 R = _mm_mul_sdm( x, &eight );
697 R = _mm_sub_sd( R, ittyBit );
698 R = _mm_mul_sdm( R, &oneEighth );
699
700 R = _mm_sel_pd( R, x, xEQzero ); //make sure the x == 0 case is zero
701 }
702 else
703 {
704 // g = 1.0 / ( 2.0 + x );
705 g = _mm_div_sd( one, _mm_sub_sdm( x, &mTwo ) );
706 // u = 2 * ( x * g );
707 u = _mm_mul_sd( x, g );
708 u = _mm_add_sd( u, u );
709 // v = u * u;
710 v = _mm_mul_sd( u, u );
711 // q = u * v * ( A2[0].d + v * ( A2[1].d + v * ( A2[2].d + v * A2[3].d ) ) );
712 q = _mm_add_sdm( _mm_mul_sdm( v, A2 + 3 ), A2 + 2 );
713 q = _mm_add_sdm( _mm_mul_sd( q, v ), A2 + 1 );
714 q = _mm_add_sdm( _mm_mul_sd( q, v ), A2 + 0 );
715 v = _mm_mul_sd( v, u );
716 q = _mm_mul_sd( q, v );
717 // u1 = ( float ) u;
718 u1 = _mm_cvtss_sd( u, _mm_cvtsd_ss( (xFloat) u, u ) );
719 // f1 = ( float ) x;
720 f1 = _mm_cvtss_sd( x, _mm_cvtsd_ss( (xFloat) x, x ) );
721 // u2 = ( ( 2 * ( x - u1 ) - u1 * f1 ) - u1 * ( x - f1 ) ) * g;
722 xDouble t0 = _mm_sub_sd( x, u1 ); //f - u1
723 xDouble t1 = _mm_sub_sd( x, f1 ); //f - f1
724 xDouble t2 = _mm_mul_sd( u1, f1 ); //u1 * f1
725 t0 = _mm_add_sd( t0, t0 ); //2 * (f- u1)
726 t1 = _mm_mul_sd( t1, u1 ); //u1 * ( f - f1 )
727 t0 = _mm_sub_sd( t0, t2 ); //2 * (f - u1) - u1 * f1
728 t0 = _mm_sub_sd( t0, t1 ); //(2 * (f - u1) - u1 * f1) - u1 * (f- f1)
729 u2 = _mm_mul_sd( t0, g ); //((2 * (f - u1) - u1 * f1) - u1 * (f- f1)) * g
730
731 // R = u1 + ( u2 + q );
732 R = _mm_add_sd( u2, q );
733 R = _mm_add_sd( R, u1 );
734 }
735
736 return R;
737}
738
739double log1p ( double x ) {
740 xDouble xd = DOUBLE_2_XDOUBLE( x );
741
742 xDouble xIsNaN = _mm_cmpunord_sd( xd, xd );
743 if( _mm_istrue_sd( xIsNaN ) )
744 return XDOUBLE_2_DOUBLE( (xDouble)logNaN );
745
746 xDouble xGTminusOne = _mm_cmpgt_sdm( xd, &mOne );
747 xDouble xLTinfinity = _mm_cmplt_sdm( xd, (double*) &plusinf );
748
749 // mainline: -1 < x < inf
750 if( _mm_istrue_sd( _mm_and_pd( xGTminusOne, xLTinfinity ) ) ) {
751 xd = _xlog1p( xd );
752 }
753
754 // edge cases
755 else {
756 if( _mm_istrue_sd( _mm_cmpeq_sdm( xd, &mOne ) ) )
757 xd = _mm_div_sd( _mm_load_sd( &two ), minusZeroD );
758 else {
759 xDouble inf = plusinf;
760 xDouble isInf = _mm_cmpeq_sd( xd, inf );
761 inf = _mm_andnot_pd( xIsNaN, inf );
762 inf = _mm_andnot_pd( isInf, inf );
763
764 xDouble xLTzero = _mm_cmplt_sdm( xd, (double*) &minusZeroD );
765
766 xd = _mm_sub_sd( xd, inf ); //silence NaN, set finite values to -Inf
767 xd = _mm_add_sd( xd, inf );
768 xLTzero = _mm_and_pd( xLTzero, (xDouble) logNaN );
769 xd = _mm_add_sd( xLTzero, xd ); //set to NaN if x < 0
770 }
771 }
772
773 // return
774 return XDOUBLE_2_DOUBLE( xd );
775}
776
777/*
778float log1pf( float x )
779{ //cheesy fallback on double, that probably fails to get the edge cases right.
780 xDouble xd = FLOAT_2_XDOUBLE( x );
781
782 xDouble xIsNaN = _mm_cmpunord_sd( xd, xd );
783 if( _mm_istrue_sd( xIsNaN ) )
784 return XDOUBLE_2_FLOAT( (xDouble)logNaNf );
785
786 xDouble xGTminusOne = _mm_cmpgt_sdm( xd, &mOne );
787 xDouble xLTinfinity = _mm_cmplt_sdm( xd, (double*) &plusinf );
788
789 // mainline: -1 < x < inf
790 if( _mm_istrue_sd( _mm_and_pd( xGTminusOne, xLTinfinity ) ) ) {
791 xd = _xlog1p( xd );
792 }
793
794 // edge cases
795 else {
796 if( _mm_istrue_sd( _mm_cmpeq_sdm( xd, &mOne ) ) )
797 xd = _mm_div_sd( _mm_load_sd( &two ), minusZeroD );
798 else {
799 xDouble inf = plusinf;
800 xDouble isInf = _mm_cmpeq_sd( xd, inf );
801 inf = _mm_andnot_pd( xIsNaN, inf );
802 inf = _mm_andnot_pd( isInf, inf );
803
804 xDouble xLTzero = _mm_cmplt_sdm( xd, (double*) &minusZeroD );
805
806 xd = _mm_sub_sd( xd, inf ); //silence NaN, set finite values to -Inf
807 xd = _mm_add_sd( xd, inf );
808 xLTzero = _mm_and_pd( xLTzero, (xDouble) logNaNf );
809 xd = _mm_add_sd( xLTzero, xd ); //set to NaN if x < 0
810 }
811 }
812
813 // return
814 return XDOUBLE_2_FLOAT( xd );
815}
816*/
817
818#pragma mark -
819#pragma mark log10
820
821static inline double _xlog10( double ) ALWAYS_INLINE;
822
823/* static xUInt64 LOGORITHMIC_NAN = { 0x7FF8000000000000ULL, 0 }; [sic!] */
824
825static inline double _xlog10( double x )
826{
827 static const double half = 0.5;
828 static const double log10e = 0.434294481903251827651128918916605082; // log_10(e)
829 xDouble xx = DOUBLE_2_XDOUBLE( x );
830 xDouble xIsNaN = _mm_cmpunord_sd( xx, xx );
831 xDouble safeX = _mm_andnot_pd( xIsNaN, xx );
832 xDouble xGTzero = _mm_cmpgt_sdm( safeX, (double*) &minusZeroD );
833 xDouble xLTinf = _mm_cmplt_sdm( safeX, (double*) &plusinf );
834 xDouble R;
835
836 if( _mm_istrue_sd( _mm_and_pd( xGTzero, xLTinf ) ) ) // x > 0 && x < inf
837 {
838 if( (x <= T1) || x >= T2 )
839 {
840 xDouble m;
841 xDouble Y = mantissa( xx, &m );
842 int j = _mm_cvttsd_si32( _mm_add_sdm( _mm_mul_sdm( Y, &d128 ), &half ));
843 xDouble F = _mm_mul_sdm( _mm_cvtsi32_sd( Y, j) , &d0_0078125 );
844 j -= 128;
845 xDouble f = _mm_sub_sd( Y, F );
846 xDouble L_lead = _mm_add_sdm( _mm_mul_sdm( m, &C_lead[128] ), &C_lead[j] );
847 xDouble L_trail = _mm_add_sdm( _mm_mul_sdm( m, &C_trail[128] ), &C_trail[j] );
848 xDouble u = _mm_div_sd( _mm_add_sd( f, f), _mm_add_sd( Y, F ) );
849 xDouble v = _mm_mul_sd( u, u );
850 xDouble q = _mm_add_sdm( _mm_mul_sdm( v, &A1[1] ), &A1[0] );
851 q = _mm_mul_sd( q, v );
852 q = _mm_mul_sd( q, u );
853 R = _mm_add_sd( L_lead, _mm_add_sd( u , _mm_add_sd( q, L_trail ) ) );
854 R = _mm_mul_sdm( R, &log10e );
855 }
856 else
857 {
858 xDouble xEQone = _mm_cmpeq_sdm( xx, (double*) &one );
859
860 xDouble f = _mm_sub_sdm( xx, (double*) &one );
861 xDouble g = _mm_div_sd( one, _mm_add_sdm( f, &two ) );
862 xDouble u = _mm_mul_sd( f, g ); u = _mm_add_sd( u, u ); //u = 2.0 * (f * g)
863 xDouble v = _mm_mul_sd( u, u );
864 xDouble q = _mm_add_sdm( _mm_mul_sdm( v, &A2[3] ), &A2[2] );
865 q = _mm_add_sdm( _mm_mul_sd( q, v ), &A2[1] );
866 q = _mm_add_sdm( _mm_mul_sd( q, v ), &A2[0] );
867 q = _mm_mul_sd( _mm_mul_sd( q, v ), u );
868 xDouble u1 = _mm_cvtss_sd( u, _mm_cvtsd_ss( (xFloat) u, u ) );
869 xDouble f1 = _mm_cvtss_sd( f, _mm_cvtsd_ss( (xFloat) f, f ) );
870
871 // u2 = ( ( 2 * ( f - u1 ) - u1 * f1 ) - u1 * ( f - f1 ) ) * g;
872 xDouble t0 = _mm_sub_sd( f, u1 ); //f - u1
873 xDouble t1 = _mm_sub_sd( f, f1 ); //f - f1
874 xDouble t2 = _mm_mul_sd( u1, f1 ); //u1 * f1
875 t0 = _mm_add_sd( t0, t0 ); //2 * (f- u1)
876 t1 = _mm_mul_sd( t1, u1 ); //u1 * ( f - f1 )
877 t0 = _mm_sub_sd( t0, t2 ); //2 * (f - u1) - u1 * f1
878 t0 = _mm_sub_sd( t0, t1 ); //(2 * (f - u1) - u1 * f1) - u1 * (f- f1)
879 xDouble u2 = _mm_mul_sd( t0, g ); //((2 * (f - u1) - u1 * f1) - u1 * (f- f1)) * g
880 R = _mm_add_sd( u2, q );
881 R = _mm_add_sd( R, u1 );
882 R = _mm_mul_sdm( R, &log10e );
883 R = _mm_andnot_pd( xEQone, R );
884 }
885 }
886 else
887 { //edge cases
888 if( _mm_istrue_sd( _mm_cmpeq_sd( xx, minusZeroD ) ) )
889 R = _mm_div_sd( _mm_load_sd( &two ), minusZeroD );
890 else
891 {
892 xDouble inf = plusinf;
893 xDouble isInf = _mm_cmpeq_sd( xx, inf );
894 inf = _mm_andnot_pd( xIsNaN, inf );
895 inf = _mm_andnot_pd( isInf, inf );
896
897 xDouble xLTzero = _mm_cmplt_sdm( safeX, (double*) &minusZeroD );
898
899 R = _mm_sub_sd( xx, inf ); //silence NaN, set finite values to -Inf
900 R = _mm_add_sd( R, inf );
901 xLTzero = _mm_and_pd( xLTzero, (xDouble) logNaN );
902 R = _mm_add_sd( xLTzero, R ); //set to NaN if x < 0
903 }
904
905 }
906
907 return XDOUBLE_2_DOUBLE( R );
908}
909
910double log10 ( double x )
911{
912 return _xlog10( x );
913}
914
915/*
916float log10f( float x )
917{//cheesy fallback on double, that probably fails to get the edge cases right.
918 return (float) _xlog10( (double) x );
919}
920*/
921
922#endif /* defined( BUILDING_FOR_CARBONCORE_LEGACY ) */