1 // -*- C++ -*-
2 /** @file */
3 #pragma once
4 
5 #include <dsppp/arch.hpp>
6 #include <type_traits>
7 #include <dsppp/number.hpp>
8 
9 #ifdef DOXYGEN
10 #define ARM_MATH_MVEI
11 #define ARM_MATH_MVEF
12 #endif
13 
14 /** \addtogroup ARCHALG
15  *  \addtogroup HELIUMALG Helium specific algorithm
16  *  \ingroup ARCHALG
17  *  @{
18  */
19 
20 #if defined(ARM_MATH_MVEI) || defined(ARM_MATH_MVEF)
21 /**
22  * @brief      Fill evaluator for Helium
23  *
24  * @param      v          Destination value
25  * @param[in]  val        Initialization value
26  * @param[in]  l          Vector length
27  *
28  * @tparam     T          Scalar datatype
29  * @tparam     DST        Destination datatype
30  * @tparam     <unnamed>  Check if has vector indexing
31  */
32 template<typename T,typename DST,
33 typename std::enable_if<has_vector_inst<DST>() &&
34           IsVector<DST>::value &&
35          SameElementType<DST,T>::value,bool>::type = true>
_Fill(DST & v,const T val,const vector_length_t l,const Helium * =nullptr)36 inline void _Fill(DST &v,
37                   const T val,
38                   const vector_length_t l,
39                   const Helium* = nullptr)
40 {
41       constexpr int nb_lanes = vector_traits<T>::nb_lanes;
42       index_t i=0;
43       UNROLL_LOOP
44       for(i=0;i < l; i += nb_lanes)
45       {
46         v.vector_store_tail(i,l-i,inner::vconst_tail(val,inner::vctpq<T>::mk(l-i)));
47       }
48 }
49 
50 /**
51  * @brief      Fill2D evaluator for Helium
52  *
53  * @param      v          Destination value
54  * @param[in]  val        Initialization value
55  * @param[in]  rows       Number of rows
56  * @param[in]  cols       Number of columns
57  *
58  * @tparam     T          Scalar datatype
59  * @tparam     DST        Destination datatype
60  * @tparam     <unnamed>  Check only matrix indexing supported
61  */
62 template<typename T,typename DST,
63 typename std::enable_if<has_vector_inst<DST>() &&
64          must_use_matrix_idx<DST>() &&
65          SameElementType<DST,T>::value,bool>::type = true>
_Fill2D(DST & v,const T val,const vector_length_t rows,const vector_length_t cols,const Helium * =nullptr)66 inline void _Fill2D(DST &v,
67                   const T val,
68                   const vector_length_t rows,
69                   const vector_length_t cols,
70                   const Helium* = nullptr)
71 {
72       constexpr int nb_lanes = vector_traits<T>::nb_lanes;
73 
74       // Outer unroll factor in case inner loop does not have
75       // enough arithmetic instructions.
76       // In future version this may be estimated from the
77       // complexity of the AST to evaluate
78       constexpr int U = 1;
79       index_t row=0;
80 
81       UNROLL_LOOP
82       for(; row <= rows-U;row += U)
83       {
84 
85           UNROLL_LOOP
86           for(index_t col=0; col < cols;col += nb_lanes)
87           {
88               for(int k=0;k<U;k++)
89               {
90                   v.matrix_store_tail(row+k,col,cols-col,inner::vconst_tail(val,inner::vctpq<T>::mk(cols-col)));
91               }
92           }
93       }
94 
95       for(; row < rows;row ++)
96       {
97 
98           UNROLL_LOOP
99           for(index_t col=0; col < cols;col += nb_lanes)
100           {
101               v.matrix_store_tail(row,col,cols-col,inner::vconst_tail(val,inner::vctpq<T>::mk(cols-col)));
102           }
103       }
104 }
105 
106 /**
107  * @brief      Eval function for Helium
108  *
109  * @param      v          Destination
110  * @param[in]  other      Expression to evaluate
111  * @param[in]  l          Vector length
112  *
113  * @tparam     DA         Destination datatype
114  * @tparam     DB         Expression datatype
115  * @tparam     <unnamed>  Check vector indexing and compatible vectors
116  */
117 template<typename DA,typename DB,
118 typename std::enable_if<has_vector_inst<DA>() &&
119                         vector_idx_pair<DA,DB>(),bool>::type = true>
eval(DA & v,const DB & other,const vector_length_t l,const Helium * =nullptr)120 inline void eval(DA &v,
121                  const DB& other,
122                  const vector_length_t l,
123                  const Helium* = nullptr)
124 {
125       using T = typename traits<DA>::Scalar;
126       constexpr int nb_lanes = vector_traits<T>::nb_lanes;
127 
128       index_t i=0;
129 
130       UNROLL_LOOP
131       for(i=0;i < l; i += nb_lanes)
132       {
133           v.vector_store_tail(i,l-i,other.vector_op_tail(i,l-i));
134       }
135 }
136 
137 /**
138  * @brief      Eval2D function for Helium
139  *
140  * @param      v          Destination vector
141  * @param[in]  other      Expression to evaluate
142  * @param[in]  rows       Number of rows
143  * @param[in]  cols       Number of columns
144  *
145  * @tparam     DA         Destination datatype
146  * @tparam     DB         Source datatype
147  * @tparam     <unnamed>  Check has only matrix indexing
148  */
149 template<typename DA,typename DB,
150 typename std::enable_if<has_vector_inst<DA>() &&
151                         must_use_matrix_idx_pair<DA,DB>(),bool>::type = true>
eval2D(DA & v,const DB & other,const vector_length_t rows,const vector_length_t cols,const Helium * =nullptr)152 inline void eval2D(DA &v,
153                    const DB& other,
154                    const vector_length_t rows,
155                    const vector_length_t cols,
156                    const Helium* = nullptr)
157 {
158       using T = typename traits<DA>::Scalar;
159       constexpr int nb_lanes = vector_traits<T>::nb_lanes;
160       // Attempt at computing the unrolling factor
161       // depending on the complexity of the AST
162       // (will have to rework this estimation)
163       constexpr int RU = 5 - Complexity<DB>::value;
164       constexpr int U = (RU <= 0) || (RU>=5) ? 1 : RU;
165       index_t row=0;
166 
167       UNROLL_LOOP
168       for(; row <= rows-U;row += U)
169       {
170 
171           UNROLL_LOOP
172           for(index_t col=0; col < cols;col += nb_lanes)
173           {
174               for(int k=0;k<U;k++)
175               {
176                   v.matrix_store_tail(row+k,col,cols-col,other.matrix_op_tail(row+k,col,cols-col));
177               }
178           }
179       }
180 
181       UNROLL_LOOP
182       for(; row < rows;row ++)
183       {
184 
185           UNROLL_LOOP
186           for(index_t col=0; col < cols;col += nb_lanes)
187           {
188               v.matrix_store_tail(row,col,cols-col,other.matrix_op_tail(row,col,cols-col));
189           }
190       }
191 }
192 
193 
194 /**
195     * @brief  Display the matrix content for debug purpose
196     * @param stream Output stream
197     * @param other The matrix to display
198     * @return the stream
199     *
200     */
operator <<(std::ostream & stream,const float32x4_t & other)201 static std::ostream& operator<< (std::ostream& stream, const float32x4_t& other)
202 {
203    stream << "(" << other[0] << "," <<other[1] << "," <<other[2] << "," <<other[3] << ")";
204    return(stream);
205 }
206 
207 /**
208  * @brief      Print tuple for debug
209  *
210  * @param[in]  _tup       Tuple
211  *
212  * @tparam     TupType    Tuple datatype
213  * @tparam     I          List of tuple indexes
214  */
215 template<class TupType, size_t... I>
printt(const TupType & _tup,std::index_sequence<I...>)216 void printt(const TupType& _tup, std::index_sequence<I...>)
217 {
218     std::cout << "(";
219     (..., (std::cout << (I == 0? "" : ", ") << std::get<I>(_tup)));
220     std::cout << ")\n";
221 }
222 
223 /**
224  * @brief      Print tuple
225  *
226  * @param[in]  _tup  Tuple
227  *
228  * @tparam     T     Datatype for tuple elements
229  */
230 template<class... T>
printt(const std::tuple<T...> & _tup)231 void printt (const std::tuple<T...>& _tup)
232 {
233     printt(_tup, std::make_index_sequence<sizeof...(T)>());
234 }
235 
236 /**
237  * @brief      Dor product for Helium
238  *
239  * @param[in]  a          First expression
240  * @param[in]  b          Second expression
241  * @param[in]  l          Vector length
242  *
243  * @tparam     DA         First operand datatype
244  * @tparam     DB         Second operand datatype
245  * @tparam     <unnamed>  Check vector indexing and compatible vectors
246  *
247  * @return     Dot product of vector expressions
248  */
249 template<typename DA,typename DB,
250          typename std::enable_if<has_vector_inst<DA>() &&
251          vector_idx_pair<DA,DB>(),bool>::type = true>
_dot(const DA & a,const DB & b,const vector_length_t l,const Helium * =nullptr)252 inline DotResult<DA> _dot(const DA& a,
253                           const DB& b,
254                           const vector_length_t l,
255                           const Helium* = nullptr)
256 {
257    //using Res = DotResult<DA>;
258    // Vector scalar datatype
259 
260    using T = typename traits<DA>::Scalar;
261    using Temp = typename vector_traits<T>::temp_accumulator;
262 
263    constexpr int nb_lanes = vector_traits<T>::nb_lanes;
264 
265    Temp acc = vector_traits<T>::temp_acc_zero();
266 
267     UNROLL_LOOP
268     for(index_t i=0; i<l; i+=nb_lanes)
269     {
270         acc = inner::vmacc(acc,a.vector_op_tail(i,l-i),b.vector_op_tail(i,l-i),inner::vctpq<T>::mk(l-i));
271     }
272 
273      return(inner::vreduce(acc));
274 }
275 
276 /**
277  * @brief      Swap operator for Helium
278  *
279  * @param      a          First opetand
280  * @param      b          Second operand
281  * @param[in]  l          Vector length
282  *
283  * @tparam     DA         First operand datatype
284  * @tparam     DB         Second operand datatype
285  * @tparam     <unnamed>  Check vector indexing and compatible vectors
286  */
287 template<typename DA,typename DB,
288          typename std::enable_if<has_vector_inst<DA>() &&
289                                  vector_idx_pair<DA,DB>(),bool>::type = true>
_swap(DA && a,DB && b,const vector_length_t l,const Helium * =nullptr)290 inline void _swap(DA&& a,
291                   DB&& b,
292                   const vector_length_t l,
293                   const Helium* = nullptr)
294 {
295       using Scalar = typename ElementType<DA>::type;
296       using Vector = typename vector_traits<Scalar>::vector;
297 
298       constexpr int nb_lanes = vector_traits<typename ElementType<DA>::type>::nb_lanes;
299       index_t i=0;
300       Vector tmpa,tmpb;
301 
302       UNROLL_LOOP
303       for(i=0;i < l; i += nb_lanes)
304       {
305         tmpa = a.vector_op_tail(i,l-i);
306         tmpb = b.vector_op_tail(i,l-i);
307         b.vector_store_tail(i,l-i,tmpa);
308         a.vector_store_tail(i,l-i,tmpb);
309       }
310 }
311 #endif
312 
313 /*! @} */
314 
315