1 /* --------------------------------------------------------------  */
2 /* (C)Copyright 2001,2008,                                         */
3 /* International Business Machines Corporation,                    */
4 /* Sony Computer Entertainment, Incorporated,                      */
5 /* Toshiba Corporation,                                            */
6 /*                                                                 */
7 /* All Rights Reserved.                                            */
8 /*                                                                 */
9 /* Redistribution and use in source and binary forms, with or      */
10 /* without modification, are permitted provided that the           */
11 /* following conditions are met:                                   */
12 /*                                                                 */
13 /* - Redistributions of source code must retain the above copyright*/
14 /*   notice, this list of conditions and the following disclaimer. */
15 /*                                                                 */
16 /* - Redistributions in binary form must reproduce the above       */
17 /*   copyright notice, this list of conditions and the following   */
18 /*   disclaimer in the documentation and/or other materials        */
19 /*   provided with the distribution.                               */
20 /*                                                                 */
21 /* - Neither the name of IBM Corporation nor the names of its      */
22 /*   contributors may be used to endorse or promote products       */
23 /*   derived from this software without specific prior written     */
24 /*   permission.                                                   */
25 /*                                                                 */
26 /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND          */
27 /* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,     */
28 /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF        */
29 /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE        */
30 /* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR            */
31 /* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,    */
32 /* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT    */
33 /* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;    */
34 /* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)        */
35 /* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN       */
36 /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR    */
37 /* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,  */
38 /* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.              */
39 /* --------------------------------------------------------------  */
40 /* PROLOG END TAG zYx                                              */
41 
42 #ifdef __SPU__
43 #ifndef _LOG2D2_H_
44 #define _LOG2D2_H_	1
45 
46 #include <spu_intrinsics.h>
47 
48 /*
49  * FUNCTION
50  *	vector double _log2d2(vector double x)
51  *
52  * DESCRIPTION
53  *	The function _log2d2 computes log base 2 of the input x for each
54  *	of the double word elements of x. The log2 is decomposed
55  *      into two parts, log2 of the exponent and log2 of the
56  *	fraction. The log2 of the fraction is approximated
57  *	using a 21st order polynomial of the form:
58  *
59  *                        __20_
60  *                        \
61  *	log(x) = x * (1 +  \   (Ci * x^i))
62  *                         /
63  *                        /____
64  *                         i=0
65  *
66  *      for x in the range 0-1.
67  */
68 #define LOG_C00
69 #define LOG_C01
70 #define LOG_C02
71 
_log2d2(vector double vx)72 static __inline vector double _log2d2(vector double vx)
73 {
74   vec_int4 addval;
75   vec_ullong2 exp_mask = spu_splats(0x7FF0000000000000ULL);
76   vec_double2 vy, vxw;
77   vec_double2 v1 = spu_splats(1.0);
78   vec_double2 x2, x4, x8, x10, p1, p2;
79 
80   /* Extract the fraction component of input by forcing
81    * its exponent so that input is in the range [1.0, 2.0)
82    * and then subtract 1.0 to force it in the range
83    * [0.0, 1.0).
84    */
85   vxw = spu_sub(spu_sel(vx, v1, exp_mask), v1);
86 
87   /* Compute the log2 of the exponent as exp - 1023.
88    */
89   addval = spu_add(spu_rlmask((vec_int4)vx, -20), -1023);
90 
91   /* Compute the log2 of the fractional component using a 21st
92    * order polynomial. The polynomial is evaluated in two halves
93    * to improve efficiency.
94    */
95   p1 = spu_madd(spu_splats(3.61276447184348752E-05), vxw, spu_splats(-4.16662127033480827E-04));
96   p2 = spu_madd(spu_splats(-1.43988260692073185E-01), vxw, spu_splats(1.60245637034704267E-01));
97   p1 = spu_madd(vxw, p1, spu_splats(2.28193656337578229E-03));
98   p2 = spu_madd(vxw, p2, spu_splats(-1.80329036970820794E-01));
99   p1 = spu_madd(vxw, p1, spu_splats(-7.93793829370930689E-03));
100   p2 = spu_madd(vxw, p2, spu_splats(2.06098446037376922E-01));
101   p1 = spu_madd(vxw, p1, spu_splats(1.98461565426430164E-02));
102   p2 = spu_madd(vxw, p2, spu_splats(-2.40449108727688962E-01));
103   p1 = spu_madd(vxw, p1, spu_splats(-3.84093543662501949E-02));
104   p2 = spu_madd(vxw, p2, spu_splats(2.88539004851839364E-01));
105   p1 = spu_madd(vxw, p1, spu_splats(6.08335872067172597E-02));
106   p2 = spu_madd(vxw, p2, spu_splats(-3.60673760117245982E-01));
107   p1 = spu_madd(vxw, p1, spu_splats(-8.27937055456904317E-02));
108   p2 = spu_madd(vxw, p2, spu_splats(4.80898346961226595E-01));
109   p1 = spu_madd(vxw, p1, spu_splats(1.01392360727236079E-01));
110   p2 = spu_madd(vxw, p2, spu_splats(-7.21347520444469934E-01));
111   p1 = spu_madd(vxw, p1, spu_splats(-1.16530490533844182E-01));
112   p2 = spu_madd(vxw, p2, spu_splats(0.44269504088896339E+00));
113   p1 = spu_madd(vxw, p1, spu_splats(1.30009193360025350E-01));
114 
115   x2 = spu_mul(vxw, vxw);
116   x4 = spu_mul(x2, x2);
117   x8 = spu_mul(x4, x4);
118   x10 = spu_mul(x8, x2);
119 
120   vy = spu_madd(spu_madd(x10, p1, p2), vxw, vxw);
121 
122   /* Add the log2(exponent) and the log2(fraction) to
123    * compute the final result.
124    */
125   vy = spu_add(vy, spu_extend(spu_convtf(addval, 0)));
126 
127   vxw = spu_extend(spu_convtf(addval, 20));
128 
129   return(vy);
130 }
131 
132 #endif /* _LOG2D2_H_ */
133 #endif /* __SPU__ */
134