1 /* -------------------------------------------------------------- */
2 /* (C)Copyright 2007,2008, */
3 /* International Business Machines Corporation */
4 /* All Rights Reserved. */
5 /* */
6 /* Redistribution and use in source and binary forms, with or */
7 /* without modification, are permitted provided that the */
8 /* following conditions are met: */
9 /* */
10 /* - Redistributions of source code must retain the above copyright*/
11 /* notice, this list of conditions and the following disclaimer. */
12 /* */
13 /* - Redistributions in binary form must reproduce the above */
14 /* copyright notice, this list of conditions and the following */
15 /* disclaimer in the documentation and/or other materials */
16 /* provided with the distribution. */
17 /* */
18 /* - Neither the name of IBM Corporation nor the names of its */
19 /* contributors may be used to endorse or promote products */
20 /* derived from this software without specific prior written */
21 /* permission. */
22 /* */
23 /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */
24 /* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */
25 /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
26 /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
27 /* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR */
28 /* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, */
29 /* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT */
30 /* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; */
31 /* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) */
32 /* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
33 /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR */
34 /* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, */
35 /* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
36 /* -------------------------------------------------------------- */
37 /* PROLOG END TAG zYx */
38 #ifdef __SPU__
39 #ifndef _ATANHF4_H_
40 #define _ATANHF4_H_ 1
41
42 #include <spu_intrinsics.h>
43 #include <math.h>
44 #include "logf4.h"
45
46 /*
47 * FUNCTION
48 * vector float _atanhf4(vector float x)
49 *
50 * DESCRIPTION
51 * The atanhf4 function returns a vector containing the hyperbolic
52 * arctangents of the corresponding elements of the input vector.
53 *
54 * We are using the formula:
55 * atanh x = 1/2 * ln((1 + x)/(1 - x)) = 1/2 * [ln(1+x) - ln(1-x)]
56 * and the anti-symmetry of atanh.
57 *
58 * For x near 0, we use the Taylor series:
59 * atanh x = x + x^3/3 + x^5/5 + x^7/7 + x^9/9 + ...
60 *
61 * Special Cases:
62 * - atanh(1) = HUGE_VALF
63 * - atanh(-1) = -HUGE_VALF
64 * - The result is undefined for x outside of the domain [-1,1].
65 *
66 */
67
68 /*
69 * Maclaurin Series Coefficients
70 * for x near 0.
71 */
72 #define SDM_SP_ATANH_MAC01 1.000000000000000000000000000000E0
73 #define SDM_SP_ATANH_MAC03 3.333333333333333333333333333333E-1
74 #define SDM_SP_ATANH_MAC05 2.000000000000000000000000000000E-1
75 #define SDM_SP_ATANH_MAC07 1.428571428571428571428571428571E-1
76 #if 0
77 #define SDM_SP_ATANH_MAC09 1.111111111111111111111111111111E-1
78 #define SDM_SP_ATANH_MAC11 9.090909090909090909090909090909E-2
79 #define SDM_SP_ATANH_MAC13 7.692307692307692307692307692308E-2
80 #define SDM_SP_ATANH_MAC15 6.666666666666666666666666666667E-2
81 #endif
82
83
_atanhf4(vector float x)84 static __inline vector float _atanhf4(vector float x)
85 {
86 vec_uint4 one = spu_splats(1u);
87 vec_float4 sign_mask = spu_splats(-0.0f);
88 vec_float4 onef = spu_splats(1.0f);
89 vec_float4 onehalff = spu_splats(0.5f);
90 vec_float4 huge = spu_splats(HUGE_VALF);
91 vec_float4 result, fresult, mresult;;
92 vec_float4 xabs, xsqu;
93 /* Where we switch from maclaurin to formula */
94 vec_float4 switch_approx = spu_splats(0.165f);
95 vec_uint4 use_form;
96
97 xabs = spu_andc(x, sign_mask);
98 xsqu = spu_mul(x, x);
99
100 /*
101 * Formula:
102 * atanh = 1/2 * ln((1 + x)/(1 - x)) = 1/2 * [ln(1+x) - ln(1-x)]
103 */
104 fresult = spu_sub(_logf4(spu_add(onef, xabs)), _logf4(spu_sub(onef, xabs)));
105 fresult = spu_mul(fresult, onehalff);
106
107
108 /*
109 * Taylor Series
110 */
111 mresult = spu_madd(xsqu, spu_splats((float)SDM_SP_ATANH_MAC07), spu_splats((float)SDM_SP_ATANH_MAC05));
112 mresult = spu_madd(xsqu, mresult, spu_splats((float)SDM_SP_ATANH_MAC03));
113 mresult = spu_madd(xsqu, mresult, spu_splats((float)SDM_SP_ATANH_MAC01));
114 mresult = spu_mul(xabs, mresult);
115
116 /*
117 * Choose between series and formula
118 */
119 use_form = spu_cmpgt(xabs, switch_approx);
120 result = spu_sel(mresult, fresult, use_form);
121
122 /*
123 * Correct for accumulated truncation error. Currently reduces rms of
124 * absolute error by about 50%
125 */
126 result = (vec_float4)spu_add((vec_uint4)result, spu_and(one, spu_cmpgt(xabs, spu_splats(0.0f))));
127 result = (vec_float4)spu_add((vec_uint4)result, spu_and(one, spu_cmpgt(xabs, spu_splats(0.25f))));
128
129 /*
130 * Check Boundary Conditions
131 */
132 result = spu_sel(result, huge, spu_cmpeq(xabs, onef));
133
134 /*
135 * Spec says |x| > 1, result is undefined, so no additional
136 * boundary checks needed.
137 */
138
139 /* Preserve sign - atanh is anti-symmetric */
140 result = spu_sel(result, x, (vec_uint4)sign_mask);
141
142 return result;
143 }
144
145 #endif /* _ATANHF4_H_ */
146 #endif /* __SPU__ */
147