1 /* -------------------------------------------------------------- */
2 /* (C)Copyright 2007,2008, */
3 /* International Business Machines Corporation */
4 /* All Rights Reserved. */
5 /* */
6 /* Redistribution and use in source and binary forms, with or */
7 /* without modification, are permitted provided that the */
8 /* following conditions are met: */
9 /* */
10 /* - Redistributions of source code must retain the above copyright*/
11 /* notice, this list of conditions and the following disclaimer. */
12 /* */
13 /* - Redistributions in binary form must reproduce the above */
14 /* copyright notice, this list of conditions and the following */
15 /* disclaimer in the documentation and/or other materials */
16 /* provided with the distribution. */
17 /* */
18 /* - Neither the name of IBM Corporation nor the names of its */
19 /* contributors may be used to endorse or promote products */
20 /* derived from this software without specific prior written */
21 /* permission. */
22 /* */
23 /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */
24 /* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */
25 /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
26 /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
27 /* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR */
28 /* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, */
29 /* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT */
30 /* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; */
31 /* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) */
32 /* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
33 /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR */
34 /* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, */
35 /* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
36 /* -------------------------------------------------------------- */
37 /* PROLOG END TAG zYx */
38 #ifdef __SPU__
39 #ifndef _TANHF4_H_
40 #define _TANHF4_H_ 1
41
42 #include <spu_intrinsics.h>
43
44 #include "expf4.h"
45 #include "divf4.h"
46
47 #include "tanhd2.h"
48
49 /*
50 * FUNCTION
51 * vector float _tanhf4(vector float x)
52 *
53 * DESCRIPTION
54 * The _tanhf4 function computes the hyperbolic tangent for each
55 * element of the input vector.
56 *
57 * We use the following to approximate tanh:
58 *
59 * |x| <= .25: Taylor Series
60 * |x| > .25: tanh(x) = (exp(2x) - 1)/(exp(2x) + 1)
61 *
62 *
63 * SPECIAL CASES:
64 * - tanh(+/- 0) = +/-0
65 * - tanh(+/- infinity) = +/- 1
66 *
67 */
68
_tanhf4(vector float x)69 static __inline vector float _tanhf4(vector float x)
70 {
71 vector float signbit = spu_splats(-0.0f);
72 vector float onef = spu_splats(1.0f);
73 vector float twof = spu_splats(2.0f);
74 vector float xabs;
75 vector float x2;
76 vector unsigned int gttaylor;
77 vector float e;
78 vector float tresult;
79 vector float eresult;
80 vector float result;
81
82 xabs = spu_andc(x, signbit);
83
84 /*
85 * This is where we switch from Taylor Series
86 * to exponential formula.
87 */
88 gttaylor = spu_cmpgt(xabs, spu_splats(0.25f));
89
90
91 /*
92 * Taylor Series Approximation
93 */
94 x2 = spu_mul(x,x);
95 tresult = spu_madd(x2, spu_splats((float)TANH_TAY06), spu_splats((float)TANH_TAY05));
96 tresult = spu_madd(x2, tresult, spu_splats((float)TANH_TAY04));
97 tresult = spu_madd(x2, tresult, spu_splats((float)TANH_TAY03));
98 tresult = spu_madd(x2, tresult, spu_splats((float)TANH_TAY02));
99 tresult = spu_madd(x2, tresult, spu_splats((float)TANH_TAY01));
100 tresult = spu_mul(xabs, tresult);
101
102
103 /*
104 * Exponential Formula
105 * Our expf4 function gives a more accurate result in general
106 * with xabs instead of x for x<0. We correct for sign later.
107 */
108 e = _expf4(spu_mul(xabs, twof));
109 eresult = _divf4(spu_sub(e, onef), spu_add(e, onef));
110
111
112 /*
113 * Select Taylor or exp result.
114 */
115 result = spu_sel(tresult, eresult, gttaylor);
116
117 /*
118 * Correct for accumulated truncation error when
119 * tanh(x) should return 1.
120 * Note that this also handles the special case of
121 * x = +/- infinity.
122 */
123 result = spu_sel(result, onef, spu_cmpgt(xabs, spu_splats(9.125f)));
124
125 /*
126 * Antisymmetric function - preserve sign bit of x
127 * in the result.
128 */
129 result = spu_sel(result, x, (vec_uint4)signbit);
130
131 return result;
132 }
133
134 #endif /* _TANHF4_H_ */
135 #endif /* __SPU__ */
136