src/libm/Source/ARM/tanf.h at fixPythonPipStalling

overby.me / darling-nix
fork atom
this repo has no description
fork atom
darling-nix / src / libm / Source / ARM / tanf.h
at fixPythonPipStalling 315 lines 13 kB view raw
wrap content
Lubos Dolezel Restructured source tree to prepare for merge with the "darling" repo 10y ago
f228ae16
  1/*	This is an implementation of tanf.  It is written in standard C except that
  2	float must have an IEEE 754 single-precision implementation, and its
  3	exponent field is accessed via a union and a struct containing bit fields.
  4	It should be good enough to serve as the libm tanf with tolerable
  5	performance.  (The exponent field access could be replaced with ilogbf to
  6	make this a standard C routine, but additional branches would be required
  7	and performance would be impaired.)
  8
  9	When this file is compiled, a definition must be supplied for the
 10	preprocessor symbol Name.  This both becomes the name of the routine and
 11	specifies the function to perform.  The only supported name is:
 12
 13		tanf	Tangent function.
 14*/
 15
 16
 17// Define constants for preprocessor conditionals.
 18#define	Tangent	1
 19
 20// Temporarily define symbols so we can use preprocessor tests.
 21#define	tanf			Tangent
 22
 23#if Name == Tangent
 24	#define	Function	Tangent
 25#else
 26	#error "Unknown function."
 27#endif
 28
 29// Remove temporary definitions.
 30#undef	tanf
 31
 32#define	ExponentFold	1
 33
 34
 35// Include math.h to ensure we match the declarations of tanf.
 36#include <math.h>
 37
 38
 39// Describe the destination floating-point type.
 40#define	SignificandBits	24
 41#define	ExponentBits	8
 42
 43
 44/*	float Name(float x).
 45
 46	Notes:
 47
 48		Citations in parentheses below indicate the source of a requirement.
 49
 50		"C" stands for ISO/IEC 9899:TC2.
 51
 52		The Open Group specification (IEEE Std 1003.1, 2004 edition) adds no
 53		requirements since it defers to C and requires errno behavior only if
 54		we choose to support it by arranging for "math_errhandling &
 55		MATH_ERRNO" to be non-zero, which we do not.
 56
 57	Return value:
 58
 59		For tangent of +/- zero, return zero with same sign (C F.9 12 and
 60		F.9.1.7).
 61
 62		For +/- infinity, return NaN (C F.9.1.7).
 63
 64		For a NaN, return the same NaN (C F.9 11 and 13).
 65
 66		Otherwise:
 67
 68			If the rounding mode is round-to-nearest, return tangent(x)
 69			faithfully rounded.
 70		  
 71			Not implemented:  In other rounding modes, return tangent(x)
 72			possibly with slightly worse error, not necessarily honoring the
 73			rounding mode (Ali Sazegari narrowing C F.9 10).
 74
 75	Exceptions:
 76
 77		Raise underflow for a denormal result (C F.9 7 and Draft Standard for
 78		Floating-Point Arithmetic P754 Draft 1.2.5 9.5).  If the input is the
 79		smallest normal, underflow may or may not be raised.  This is stricter
 80		than the older 754 standard.
 81
 82		May or may not raise inexact, even if the result is exact (C F.9 8).
 83
 84		Raise invalid if the input is a signalling NaN (C 5.2.4.2.2 3, in spite
 85		of C 4.2.1)  or an infinity (C F.9.1.7) but not if the input is a quiet
 86		NaN (C F.9 11).
 87
 88		May not raise exceptions otherwise (C F.9 9).
 89
 90	Properties:
 91
 92		Monotonic.  Not yet proven.
 93*/
 94float Name(float x)
 95{
 96	if (isnan(x))
 97		return x;
 98
 99	#if defined __STDC__ && 199901L <= __STDC_VERSION__ && !defined __GNUC__
100		// GCC does not currently support FP_CONTRACT.  Maybe someday.
101		#pragma STDC FP_CONTRACT OFF
102	#endif
103
104	typedef struct { double m0, m1; } Double2;
105
106	/* This table was generated by tanfMakeTable.c. */
107
108	static const Double2 ReductionTable[] =
109	{
110		{ 0x1.45f306ep-1, -0x1.b1bbead603d8bp-32 },	// Exponent -126.
111		{ 0x1.45f306ep-1, -0x1.b1bbead603d8bp-32 },	// Exponent -125.
112		{ 0x1.45f306ep-1, -0x1.b1bbead603d8bp-32 },	// Exponent -123.
113		{ 0x1.45f306ep-1, -0x1.b1bbead603d8bp-32 },	// Exponent -121.
114		{ 0x1.45f306ep-1, -0x1.b1bbead603d8bp-32 },	// Exponent -119.
115		{ 0x1.45f306ep-1, -0x1.b1bbead603d8bp-32 },	// Exponent -117.
116		{ 0x1.45f306ep-1, -0x1.b1bbead603d8bp-32 },	// Exponent -115.
117		{ 0x1.45f306ep-1, -0x1.b1bbead603d8bp-32 },	// Exponent -113.
118		{ 0x1.45f306ep-1, -0x1.b1bbead603d8bp-32 },	// Exponent -111.
119		{ 0x1.45f306ep-1, -0x1.b1bbead603d8bp-32 },	// Exponent -109.
120		{ 0x1.45f306ep-1, -0x1.b1bbead603d8bp-32 },	// Exponent -107.
121		{ 0x1.45f306ep-1, -0x1.b1bbead603d8bp-32 },	// Exponent -105.
122		{ 0x1.45f306ep-1, -0x1.b1bbead603d8bp-32 },	// Exponent -103.
123		{ 0x1.45f306ep-1, -0x1.b1bbead603d8bp-32 },	// Exponent -101.
124		{ 0x1.45f306ep-1, -0x1.b1bbead603d8bp-32 },	// Exponent -99.
125		{ 0x1.45f306ep-1, -0x1.b1bbead603d8bp-32 },	// Exponent -97.
126		{ 0x1.45f306ep-1, -0x1.b1bbead603d8bp-32 },	// Exponent -95.
127		{ 0x1.45f306ep-1, -0x1.b1bbead603d8bp-32 },	// Exponent -93.
128		{ 0x1.45f306ep-1, -0x1.b1bbead603d8bp-32 },	// Exponent -91.
129		{ 0x1.45f306ep-1, -0x1.b1bbead603d8bp-32 },	// Exponent -89.
130		{ 0x1.45f306ep-1, -0x1.b1bbead603d8bp-32 },	// Exponent -87.
131		{ 0x1.45f306ep-1, -0x1.b1bbead603d8bp-32 },	// Exponent -85.
132		{ 0x1.45f306ep-1, -0x1.b1bbead603d8bp-32 },	// Exponent -83.
133		{ 0x1.45f306ep-1, -0x1.b1bbead603d8bp-32 },	// Exponent -81.
134		{ 0x1.45f306ep-1, -0x1.b1bbead603d8bp-32 },	// Exponent -79.
135		{ 0x1.45f306ep-1, -0x1.b1bbead603d8bp-32 },	// Exponent -77.
136		{ 0x1.45f306ep-1, -0x1.b1bbead603d8bp-32 },	// Exponent -75.
137		{ 0x1.45f306ep-1, -0x1.b1bbead603d8bp-32 },	// Exponent -73.
138		{ 0x1.45f306ep-1, -0x1.b1bbead603d8bp-32 },	// Exponent -71.
139		{ 0x1.45f306ep-1, -0x1.b1bbead603d8bp-32 },	// Exponent -69.
140		{ 0x1.45f306ep-1, -0x1.b1bbead603d8bp-32 },	// Exponent -67.
141		{ 0x1.45f306ep-1, -0x1.b1bbead603d8bp-32 },	// Exponent -65.
142		{ 0x1.45f306ep-1, -0x1.b1bbead603d8bp-32 },	// Exponent -63.
143		{ 0x1.45f306ep-1, -0x1.b1bbead603d8bp-32 },	// Exponent -61.
144		{ 0x1.45f306ep-1, -0x1.b1bbead603d8bp-32 },	// Exponent -59.
145		{ 0x1.45f306ep-1, -0x1.b1bbead603d8bp-32 },	// Exponent -57.
146		{ 0x1.45f306ep-1, -0x1.b1bbead603d8bp-32 },	// Exponent -55.
147		{ 0x1.45f306ep-1, -0x1.b1bbead603d8bp-32 },	// Exponent -53.
148		{ 0x1.45f306ep-1, -0x1.b1bbead603d8bp-32 },	// Exponent -51.
149		{ 0x1.45f306ep-1, -0x1.b1bbead603d8bp-32 },	// Exponent -49.
150		{ 0x1.45f306ep-1, -0x1.b1bbead603d8bp-32 },	// Exponent -47.
151		{ 0x1.45f306ep-1, -0x1.b1bbead603d8bp-32 },	// Exponent -45.
152		{ 0x1.45f306ep-1, -0x1.b1bbead603d8bp-32 },	// Exponent -43.
153		{ 0x1.45f306ep-1, -0x1.b1bbead603d8bp-32 },	// Exponent -41.
154		{ 0x1.45f306ep-1, -0x1.b1bbead603d8bp-32 },	// Exponent -39.
155		{ 0x1.45f306ep-1, -0x1.b1bbead603d8bp-32 },	// Exponent -37.
156		{ 0x1.45f306ep-1, -0x1.b1bbead603d8bp-32 },	// Exponent -35.
157		{ 0x1.45f306ep-1, -0x1.b1bbead603d8bp-32 },	// Exponent -33.
158		{ 0x1.45f306ep-1, -0x1.b1bbead603d8bp-32 },	// Exponent -31.
159		{ 0x1.45f306ep-1, -0x1.b1bbead603d8bp-32 },	// Exponent -29.
160		{ 0x1.45f306ep-1, -0x1.b1bbead603d8bp-32 },	// Exponent -27.
161		{ 0x1.45f306ep-1, -0x1.b1bbead603d8bp-32 },	// Exponent -25.
162		{ 0x1.45f306ep-1, -0x1.b1bbead603d8bp-32 },	// Exponent -23.
163		{ 0x1.45f306ep-1, -0x1.b1bbead603d8bp-32 },	// Exponent -21.
164		{ 0x1.45f306ep-1, -0x1.b1bbead603d8bp-32 },	// Exponent -19.
165		{ 0x1.45f306ep-1, -0x1.b1bbead603d8bp-32 },	// Exponent -17.
166		{ 0x1.45f306ep-1, -0x1.b1bbead603d8bp-32 },	// Exponent -15.
167		{ 0x1.45f306ep-1, -0x1.b1bbead603d8bp-32 },	// Exponent -13.
168		{ 0x1.45f306ep-1, -0x1.b1bbead603d8bp-32 },	// Exponent -11.
169		{ 0x1.45f306ep-1, -0x1.b1bbead603d8bp-32 },	// Exponent -9.
170		{ 0x1.45f306ep-1, -0x1.b1bbead603d8bp-32 },	// Exponent -7.
171		{ 0x1.45f306ep-1, -0x1.b1bbead603d8bp-32 },	// Exponent -5.
172		{ 0x1.45f306ep-1, -0x1.b1bbead603d8bp-32 },	// Exponent -3.
173		{ 0x1.45f306ep-1, -0x1.b1bbead603d8bp-32 },	// Exponent -1.
174		{ 0x1.45f306ep-1, -0x1.b1bbead603d8bp-32 },	// Exponent 1.
175		{ 0x1.45f306ep-1, -0x1.b1bbead603d8bp-32 },	// Exponent 3.
176		{ 0x1.45f306ep-1, -0x1.b1bbead603d8bp-32 },	// Exponent 5.
177		{ 0x1.45f306ep-1, -0x1.b1bbead603d8bp-32 },	// Exponent 7.
178		{ 0x1.45f306ep-1, -0x1.b1bbead603d8bp-32 },	// Exponent 9.
179		{ 0x1.45f306ep-1, -0x1.b1bbead603d8bp-32 },	// Exponent 11.
180		{ 0x1.45f306ep-1, -0x1.b1bbead603d8bp-32 },	// Exponent 13.
181		{ 0x1.45f306ep-1, -0x1.b1bbead603d8bp-32 },	// Exponent 15.
182		{ 0x1.45f306ep-1, -0x1.b1bbead603d8bp-32 },	// Exponent 17.
183		{ 0x1.45f306ep-1, -0x1.b1bbead603d8bp-32 },	// Exponent 19.
184		{ 0x1.45f306ep-1, -0x1.b1bbead603d8bp-32 },	// Exponent 21.
185		{ 0x1.45f306ep-1, -0x1.b1bbead603d8bp-32 },	// Exponent 23.
186		{ 0x1.17cc1b7p-3, 0x1.391054a7f09d6p-34 },	// Exponent 25.
187		{ 0x1.7cc1b72p-7, 0x1.c882a53f84ebp-37 },	// Exponent 27.
188		{ 0x1.7cc1b72p-7, 0x1.c882a53f84ebp-37 },	// Exponent 29.
189		{ 0x1.f306dcap-9, -0x1.bbead603d8a83p-40 },	// Exponent 31.
190		{ -0x1.9f246c7p-14, 0x1.054a7f09d5f48p-46 },	// Exponent 33.
191		{ -0x1.9f246c7p-14, 0x1.054a7f09d5f48p-46 },	// Exponent 35.
192		{ 0x1.836e4e4p-16, 0x1.054a7f09d5f48p-46 },	// Exponent 37.
193		{ -0x1.f246c6fp-18, 0x1.529fc2757d1f5p-52 },	// Exponent 39.
194		{ 0x1.b727221p-23, -0x1.5ac07b1505c16p-53 },	// Exponent 41.
195		{ 0x1.b727221p-23, -0x1.5ac07b1505c16p-53 },	// Exponent 43.
196		{ 0x1.b727221p-23, -0x1.5ac07b1505c16p-53 },	// Exponent 45.
197		{ -0x1.236377dp-25, -0x1.6b01ec5417056p-55 },	// Exponent 47.
198		{ -0x1.1b1bbebp-28, 0x1.4fe13abe8fa9ap-59 },	// Exponent 49.
199		{ 0x1.c9c882ap-29, 0x1.4fe13abe8fa9ap-59 },	// Exponent 51.
200		{ -0x1.b1bbeadp-32, -0x1.80f62a0b82b2dp-62 },	// Exponent 53.
201		{ 0x1.391054ap-34, 0x1.fc2757d1f534ep-64 },	// Exponent 55.
202		{ -0x1.8ddf56bp-35, -0x1.ec54170565912p-71 },	// Exponent 57.
203		{ 0x1.c882a54p-37, -0x1.ec54170565912p-71 },	// Exponent 59.
204		{ -0x1.bbead6p-40, -0x1.ec54170565912p-71 },	// Exponent 61.
205		{ 0x1.1054a7fp-42, 0x1.3abe8fa9a6eep-75 },	// Exponent 63.
206		{ -0x1.df56b02p-43, 0x1.3abe8fa9a6eep-75 },	// Exponent 65.
207		{ 0x1.054a7f1p-46, -0x1.8a82e0acb223fp-76 },	// Exponent 67.
208		{ -0x1.f56b01fp-47, 0x1.d5f47d4d37703p-78 },	// Exponent 69.
209		{ 0x1.529fc27p-52, 0x1.5f47d4d377037p-82 },	// Exponent 71.
210		{ 0x1.529fc27p-52, 0x1.5f47d4d377037p-82 },	// Exponent 73.
211		{ -0x1.5ac07b1p-53, -0x1.4170565911f92p-83 },	// Exponent 75.
212		{ -0x1.6b01ec5p-55, -0x1.05c1596447e49p-85 },	// Exponent 77.
213		{ -0x1.ac07b15p-57, -0x1.70565911f924fp-91 },	// Exponent 79.
214		{ 0x1.4fe13acp-59, -0x1.70565911f924fp-91 },	// Exponent 81.
215		{ 0x1.3f84ebp-61, -0x1.70565911f924fp-91 },	// Exponent 83.
216		{ 0x1.fc2757dp-64, 0x1.f534ddc0db629p-96 },	// Exponent 85.
217		{ -0x1.ec5417p-71, -0x1.596447e493ad5p-101 },	// Exponent 87.
218		{ -0x1.ec5417p-71, -0x1.596447e493ad5p-101 },	// Exponent 89.
219		{ -0x1.ec5417p-71, -0x1.596447e493ad5p-101 },	// Exponent 91.
220		{ -0x1.ec5417p-71, -0x1.596447e493ad5p-101 },	// Exponent 93.
221		{ 0x1.3abe8fbp-75, -0x1.96447e493ad4dp-105 },	// Exponent 95.
222		{ 0x1.3abe8fbp-75, -0x1.96447e493ad4dp-105 },	// Exponent 97.
223		{ 0x1.d5f47d5p-78, -0x1.6447e493ad4cep-109 },	// Exponent 99.
224		{ -0x1.505c159p-81, -0x1.911f924eb5336p-111 },	// Exponent 101.
225		{ -0x1.505c159p-81, -0x1.911f924eb5336p-111 },	// Exponent 103.
226		{ -0x1.4170566p-83, 0x1.bb81b6c52b328p-113 },	// Exponent 105.
227		{ -0x1.05c1596p-85, -0x1.11f924eb53362p-115 },	// Exponent 107.
228		{ -0x1.7056591p-91, -0x1.f924eb53361dep-123 },	// Exponent 109.
229		{ -0x1.7056591p-91, -0x1.f924eb53361dep-123 },	// Exponent 111.
230		{ -0x1.7056591p-91, -0x1.f924eb53361dep-123 },	// Exponent 113.
231		{ -0x1.c159644p-93, -0x1.f924eb53361dep-123 },	// Exponent 115.
232		{ 0x1.f534ddcp-96, 0x1.b6c52b3278872p-129 },	// Exponent 117.
233		{ -0x1.596447ep-101, -0x1.24eb53361de38p-131 },	// Exponent 119.
234		{ -0x1.596447ep-101, -0x1.24eb53361de38p-131 },	// Exponent 121.
235		{ -0x1.596447ep-101, -0x1.24eb53361de38p-131 },	// Exponent 123.
236		{ -0x1.65911f9p-103, -0x1.275a99b0ef1bfp-134 },	// Exponent 125.
237		{ -0x1.96447e5p-105, 0x1.b14acc9e21c82p-135 },	// Exponent 127.
238	};
239
240	// Define a structure for accessing the internal components of a float.
241	typedef union
242	{
243		float f;
244		struct
245		{
246			#if defined __BIG_ENDIAN__
247				unsigned int sign     : 1;
248				unsigned int exponent : ExponentBits;
249				unsigned int fraction : SignificandBits-1;
250			#else	// defined __BIG_ENDIAN__
251				unsigned int fraction : SignificandBits-1;
252				unsigned int exponent : ExponentBits;
253				unsigned int sign     : 1;
254			#endif	// defined __BIG_ENDIAN__
255		} s;
256	} Float;
257
258	Float X = { x };
259	
260	/*	Look up 2**ArcBits * (u/p % 1)/u in the reduction table.  See
261		tanfMakeTable.c for more information.  The properties we need are:
262
263			The multiplication of residue.m0 by x is exact.
264
265			Let ki be residue.m0 * x rounded to an integer.
266
267			Let xr be residue.m0 * x - ki + residue.m1 * x.
268
269			If ki is even, the tangent of xr is nearly equal to the tangent of
270			x.
271
272			If ki is odd, the negative of the tangent of xr is nearly equal to
273			the tangent of x.
274	*/
275	Double2 residue = ReductionTable[X.s.exponent >> ExponentFold];
276
277	double p = x * residue.m0;
278
279	// Round p to an integer in k.
280	int ki = (int) lrint(p);
281	double k = ki;
282		/*	Alternatives to lrint:
283
284				Use nearbyint.  This will not raise the inexact exception.
285				However, we already raise inexact in the version-two polynomial
286				even when x is zero.
287
288				Use lround.  This will take longer but will round to nearest
289				regardless of rounding mode.
290		*/
291
292	double f0 = p - k;
293	double f1 = x * residue.m1;
294
295	// xr is x reduced to a final residue.
296	double xr = f0 + f1;
297
298	// Approximate tangent with a polynomial.
299
300	double x2 = xr*xr;	// Square xr.
301
302	double t = 
303			(   ((x2 - 0.34431297545117363561) * x2 + 0.75337415977492632831)
304			* ((x2 + 0.91301179133375347365) * x2 + 0.58807104225499649417)
305			)
306		*
307			(   ((x2 - 1.53503935362160698123) * x2 + 0.92783629599735333058)
308			* ((   + 5.27860261872543679865) * x2 + 3.82127246397188494330)
309			)
310		*
311			xr;
312
313	// If low bit of ki is 1, return t or -1/t according to low bit of ki.
314	return (float) (ki & 1 ? -1/t : t);
315}