this repo has no description
1/*
2 * Copyright (c) 2002 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
11 *
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22/*******************************************************************************
23* *
24* File shchth.c, *
25* Function Sinh(x), Cosh(x) and Tanh(x); *
26* Implementation of sine, cosine and tangent hyperbolic for the PowerPC. *
27* *
28* Copyright � 1991-2001 Apple Computer, Inc. All rights reserved. *
29* *
30* Written by Ali Sazegari, started on November 1991, *
31* Modified and ported by Robert A. Murley (ram) for Mac OS X. *
32* *
33* A MathLib v4 file. *
34* *
35* January 06 1992: changed the value of SEAM to �epsilon to conserve *
36* function monotonicity: *
37* sqrt ( epsilon ) = 1.646361269956798117e-10 *
38* = 3fde0000b504f333f9de6484 *
39* *
40* December 03 1992: first rs6000 port. *
41* January 05 1993: added the environmental controls. *
42* July 11 1993: changed feholdexcept to _feprocentry to set rounding *
43* to zero. added fenv_access pragama and fp_private.h, *
44* corrected the argument of the classification switch. *
45* July 18 1994: merged th with sh and ch. *
46* August 02 1994: corrected the flags that we missed for the 601 ROM. *
47* August 08 2001: replaced __setflm with FEGETENVD/FESETENVD. *
48* replaced DblInHex typedef with hexdouble. *
49* Sept 19 2001: added macros to detect PowerPC and correct compiler. *
50* Sept 19 2001: added <CoreServices/CoreServices.h> to get to <fp.h> *
51* and <fenv.h>, removed denormal comments. *
52* Sept 24 2001: removed fenv_access pragma and some old comments. *
53* Oct 08 2001: removed <CoreServices/CoreServices.h>. *
54* changed compiler errors to warnings. *
55* Nov 06 2001: commented out warning about Intel architectures. *
56* *
57* W A R N I N G: *
58* These routines require a 64-bit double precision IEEE-754 model. *
59* They are written for PowerPC only and are expecting the compiler *
60* to generate the correct sequence of multiply-add fused instructions. *
61* *
62* These routines are not intended for 32-bit Intel architectures. *
63* *
64* A version of gcc higher than 932 is required. *
65* *
66* GCC compiler options: *
67* optimization level 3 (-O3) *
68* -fschedule-insns -finline-functions -funroll-all-loops *
69* *
70*******************************************************************************/
71
72#include "math.h"
73#include "fenv_private.h"
74#include "fp_private.h"
75
76/*******************************************************************************
77* Functions needed for the computation. *
78********************************************************************************
79* *
80* the following fp.h functions are used: *
81* __fpclassifyd, copysign, exp, expm1 and __FABS. *
82*******************************************************************************/
83
84static const hexdouble SqrtNegEps = HEXDOUBLE(0x3e400000, 0x00000000);
85static const hexdouble Huge = HEXDOUBLE(0x7ff00000, 0x00000000);
86static const double kMinNormal = 0x1.0p-1022; // 2.2250738585072014e-308;
87static const double maxExp = 7.0978271289338397000e+02; /* 0x40862e42, 0xfefa39ef */
88
89
90/*******************************************************************************
91* The function is odd. The positive interval is computed and for *
92* negative values, the sign is reflected in the computation. *
93* Some computational flags are set in the FPSCR. It is important to fold *
94* them in at the end. *
95********************************************************************************
96* S I N H *
97*******************************************************************************/
98
99static const hexdouble Log2 = HEXDOUBLE(0x3FE62E42, 0xFEFA39EF); /* = 6.9314718055994530942E-1 */
100static const double kMaxNormal = 1.7976931348623157e308;
101
102double sinh ( double x )
103{
104 register double PositiveX;
105
106 register double result, FPR_env, FPR_z, FPR_kMinNormal, FPR_half, FPR_one,
107 FPR_ln2, FPR_sqreps, FPR_kMaxNormal, FPR_inf;
108
109 PositiveX = __FABS ( x );
110 FPR_z = 0.0; FPR_half = 0.5;
111 FPR_one = 1.0; FPR_sqreps = SqrtNegEps.d;
112 FPR_inf = Huge.d; FPR_kMinNormal = kMinNormal;
113 FPR_ln2 = Log2.d; FPR_kMaxNormal = kMaxNormal;
114
115 FEGETENVD ( FPR_env);
116 __ENSURE( FPR_half, FPR_sqreps, FPR_kMinNormal ); __ENSURE( FPR_z, FPR_kMaxNormal, FPR_ln2 );
117 FESETENVD ( FPR_z );
118 __ENSURE( FPR_z, FPR_one, FPR_inf );
119
120 if ( PositiveX > (maxExp - M_LN2) )
121 {
122 result = exp ( FPR_half * PositiveX );
123 result = ( FPR_half * result ) * result;
124 }
125 else if ( PositiveX > FPR_sqreps ) /* return the arg if too small */
126 {
127 result = expm1 ( PositiveX );
128 result = FPR_half * ( result + result / ( FPR_one + result ) );
129 }
130 else
131 result = PositiveX;
132
133 FESETENVD ( FPR_env );
134
135 if (unlikely( result != result))
136 ; /* NOTHING */
137 else if (unlikely( result == FPR_z )) // iff x == 0.0
138 result = x; // Get +-0.0 right
139 else if (unlikely( result < FPR_kMinNormal ))
140 __PROG_UF_INEXACT( FPR_kMinNormal );
141 else if (likely( result < FPR_inf ))
142 __PROG_INEXACT( FPR_ln2 );
143 else if (likely( PositiveX < FPR_inf ))
144 __PROG_OF_INEXACT( FPR_kMaxNormal );
145
146 if ( x < FPR_z)
147 result = -result;
148
149 return result;
150}
151
152/*******************************************************************************
153* C O S H *
154*******************************************************************************/
155
156double cosh ( double x )
157{
158 hexdouble OldEnvironment, NewEnvironment;
159
160 register double result, FPR_env, FPR_z, FPR_one, FPR_half, FPR_t, FPR_lim;
161
162 FPR_t = __FABS ( x );
163 FPR_z = 0.0; FPR_half = 0.5;
164 FPR_one = 1.0;
165
166 FEGETENVD ( FPR_env);
167 __ENSURE( FPR_z, FPR_half, FPR_one );
168 FPR_lim = maxExp - M_LN2; // gcc-3.5 doesn't fold. Emitted code raises inexact. Care for env!
169 FESETENVD ( FPR_z );
170
171 if ( FPR_t < FPR_lim )
172 {
173 FPR_t = exp ( FPR_t );
174
175 result = FPR_half * (FPR_t + FPR_one / FPR_t); OldEnvironment.d = FPR_env;
176 }
177 else
178 {
179 FPR_t = exp ( FPR_half * FPR_t );
180
181 result = ( FPR_half * FPR_t ) * FPR_t; OldEnvironment.d = FPR_env;
182 }
183
184 FEGETENVD_GRP ( NewEnvironment.d );
185 OldEnvironment.i.lo |= ( NewEnvironment.i.lo & EXCEPT_MASK );
186 FESETENVD_GRP ( OldEnvironment.d );
187
188 return result;
189}
190
191/*******************************************************************************
192* This function is odd. The positive interval is computed and for *
193* negative values, the sign is reflected in the computation. *
194* This calculation has spurious flags that need to be cleared before final *
195* exit. Instead of clearing them, we keep the only set flag (inexact) *
196* and do not fold the sticky FPSCR excpeions. It makes for a faster tanh *
197* and less errors with the test vectors. *
198********************************************************************************
199* T A N H *
200*******************************************************************************/
201
202double tanh ( double x )
203{
204 register double PositiveX;
205
206 register double result, FPR_env, FPR_z, FPR_kMinNormal, FPR_two, FPR_negTwo,
207 FPR_ln2, FPR_sqreps, FPR_kMaxNormal, FPR_inf, FPR_t;
208
209 PositiveX = __FABS ( x );
210 FPR_z = 0.0; FPR_inf = Huge.d;
211 FPR_two = 2.0; FPR_negTwo = -2.0;
212 FPR_sqreps = SqrtNegEps.d; FPR_kMinNormal = kMinNormal;
213 FPR_ln2 = Log2.d; FPR_kMaxNormal = kMaxNormal;
214
215 if (unlikely( PositiveX == FPR_inf ))
216 return (x >= FPR_z ? 1.0 : -1.0);
217
218 FEGETENVD ( FPR_env );
219 __ENSURE( FPR_negTwo, FPR_sqreps, FPR_kMinNormal ); __ENSURE( FPR_z, FPR_kMaxNormal, FPR_ln2 );
220 FESETENVD ( FPR_z );
221 __ENSURE( FPR_z, FPR_inf, FPR_two );
222
223/*******************************************************************************
224* Reduce the number of calls to expm1 function by using the identity: *
225* th(x) = ( e^x - e^-x ) / ( e^x + e^-x ) *
226* = - ( e^-2x - 1 ) / ( 2 + ( e^-2x - 1 ) ) *
227*******************************************************************************/
228 if ( PositiveX > FPR_sqreps) /* return the arg if too small */
229 {
230 FPR_t = expm1 ( FPR_negTwo * PositiveX ); /* call exp1 once */
231 result = - FPR_t / ( FPR_two + FPR_t );
232 }
233 else
234 result = PositiveX;
235
236/*******************************************************************************
237* If the argument to expm1 above is 7fe0000000000000 or 40d0000000000000 *
238* then expm1 will either set an overflow or an underflow which is *
239* undeserved for tanh. *
240*******************************************************************************/
241
242 FESETENVD ( FPR_env );
243
244 if (unlikely( result != result ))
245 ; /* NOTHING */
246 else if (unlikely( result == FPR_z )) // iff x == 0.0
247 result = x; // Get +-0.0 right
248 else if (unlikely( result < FPR_kMinNormal ))
249 __PROG_UF_INEXACT( FPR_kMinNormal );
250 else if (likely( result < FPR_inf ))
251 __PROG_INEXACT( FPR_ln2 );
252 else if (likely( PositiveX < FPR_inf ))
253 __PROG_OF_INEXACT( FPR_kMaxNormal );
254
255 if ( x < FPR_z)
256 result = -result;
257
258 return result;
259}