1 // -*- C++ -*-
2 /** @file */
3 #pragma once
4
5 #include <dsppp/arch.hpp>
6 #include <type_traits>
7 #include <dsppp/number.hpp>
8
9 #ifdef DOXYGEN
10 #define ARM_MATH_MVEI
11 #define ARM_MATH_MVEF
12 #endif
13
14 /** \addtogroup ARCHALG
15 * \addtogroup HELIUMALG Helium specific algorithm
16 * \ingroup ARCHALG
17 * @{
18 */
19
20 #if defined(ARM_MATH_MVEI) || defined(ARM_MATH_MVEF)
21 /**
22 * @brief Fill evaluator for Helium
23 *
24 * @param v Destination value
25 * @param[in] val Initialization value
26 * @param[in] l Vector length
27 *
28 * @tparam T Scalar datatype
29 * @tparam DST Destination datatype
30 * @tparam <unnamed> Check if has vector indexing
31 */
32 template<typename T,typename DST,
33 typename std::enable_if<has_vector_inst<DST>() &&
34 IsVector<DST>::value &&
35 SameElementType<DST,T>::value,bool>::type = true>
_Fill(DST & v,const T val,const vector_length_t l,const Helium * =nullptr)36 inline void _Fill(DST &v,
37 const T val,
38 const vector_length_t l,
39 const Helium* = nullptr)
40 {
41 constexpr int nb_lanes = vector_traits<T>::nb_lanes;
42 index_t i=0;
43 UNROLL_LOOP
44 for(i=0;i < l; i += nb_lanes)
45 {
46 v.vector_store_tail(i,l-i,inner::vconst_tail(val,inner::vctpq<T>::mk(l-i)));
47 }
48 }
49
50 /**
51 * @brief Fill2D evaluator for Helium
52 *
53 * @param v Destination value
54 * @param[in] val Initialization value
55 * @param[in] rows Number of rows
56 * @param[in] cols Number of columns
57 *
58 * @tparam T Scalar datatype
59 * @tparam DST Destination datatype
60 * @tparam <unnamed> Check only matrix indexing supported
61 */
62 template<typename T,typename DST,
63 typename std::enable_if<has_vector_inst<DST>() &&
64 must_use_matrix_idx<DST>() &&
65 SameElementType<DST,T>::value,bool>::type = true>
_Fill2D(DST & v,const T val,const vector_length_t rows,const vector_length_t cols,const Helium * =nullptr)66 inline void _Fill2D(DST &v,
67 const T val,
68 const vector_length_t rows,
69 const vector_length_t cols,
70 const Helium* = nullptr)
71 {
72 constexpr int nb_lanes = vector_traits<T>::nb_lanes;
73
74 // Outer unroll factor in case inner loop does not have
75 // enough arithmetic instructions.
76 // In future version this may be estimated from the
77 // complexity of the AST to evaluate
78 constexpr int U = 1;
79 index_t row=0;
80
81 UNROLL_LOOP
82 for(; row <= rows-U;row += U)
83 {
84
85 UNROLL_LOOP
86 for(index_t col=0; col < cols;col += nb_lanes)
87 {
88 for(int k=0;k<U;k++)
89 {
90 v.matrix_store_tail(row+k,col,cols-col,inner::vconst_tail(val,inner::vctpq<T>::mk(cols-col)));
91 }
92 }
93 }
94
95 for(; row < rows;row ++)
96 {
97
98 UNROLL_LOOP
99 for(index_t col=0; col < cols;col += nb_lanes)
100 {
101 v.matrix_store_tail(row,col,cols-col,inner::vconst_tail(val,inner::vctpq<T>::mk(cols-col)));
102 }
103 }
104 }
105
106 /**
107 * @brief Eval function for Helium
108 *
109 * @param v Destination
110 * @param[in] other Expression to evaluate
111 * @param[in] l Vector length
112 *
113 * @tparam DA Destination datatype
114 * @tparam DB Expression datatype
115 * @tparam <unnamed> Check vector indexing and compatible vectors
116 */
117 template<typename DA,typename DB,
118 typename std::enable_if<has_vector_inst<DA>() &&
119 vector_idx_pair<DA,DB>(),bool>::type = true>
eval(DA & v,const DB & other,const vector_length_t l,const Helium * =nullptr)120 inline void eval(DA &v,
121 const DB& other,
122 const vector_length_t l,
123 const Helium* = nullptr)
124 {
125 using T = typename traits<DA>::Scalar;
126 constexpr int nb_lanes = vector_traits<T>::nb_lanes;
127
128 index_t i=0;
129
130 UNROLL_LOOP
131 for(i=0;i < l; i += nb_lanes)
132 {
133 v.vector_store_tail(i,l-i,other.vector_op_tail(i,l-i));
134 }
135 }
136
137 /**
138 * @brief Eval2D function for Helium
139 *
140 * @param v Destination vector
141 * @param[in] other Expression to evaluate
142 * @param[in] rows Number of rows
143 * @param[in] cols Number of columns
144 *
145 * @tparam DA Destination datatype
146 * @tparam DB Source datatype
147 * @tparam <unnamed> Check has only matrix indexing
148 */
149 template<typename DA,typename DB,
150 typename std::enable_if<has_vector_inst<DA>() &&
151 must_use_matrix_idx_pair<DA,DB>(),bool>::type = true>
eval2D(DA & v,const DB & other,const vector_length_t rows,const vector_length_t cols,const Helium * =nullptr)152 inline void eval2D(DA &v,
153 const DB& other,
154 const vector_length_t rows,
155 const vector_length_t cols,
156 const Helium* = nullptr)
157 {
158 using T = typename traits<DA>::Scalar;
159 constexpr int nb_lanes = vector_traits<T>::nb_lanes;
160 // Attempt at computing the unrolling factor
161 // depending on the complexity of the AST
162 // (will have to rework this estimation)
163 constexpr int RU = 5 - Complexity<DB>::value;
164 constexpr int U = (RU <= 0) || (RU>=5) ? 1 : RU;
165 index_t row=0;
166
167 UNROLL_LOOP
168 for(; row <= rows-U;row += U)
169 {
170
171 UNROLL_LOOP
172 for(index_t col=0; col < cols;col += nb_lanes)
173 {
174 for(int k=0;k<U;k++)
175 {
176 v.matrix_store_tail(row+k,col,cols-col,other.matrix_op_tail(row+k,col,cols-col));
177 }
178 }
179 }
180
181 UNROLL_LOOP
182 for(; row < rows;row ++)
183 {
184
185 UNROLL_LOOP
186 for(index_t col=0; col < cols;col += nb_lanes)
187 {
188 v.matrix_store_tail(row,col,cols-col,other.matrix_op_tail(row,col,cols-col));
189 }
190 }
191 }
192
193
194 /**
195 * @brief Display the matrix content for debug purpose
196 * @param stream Output stream
197 * @param other The matrix to display
198 * @return the stream
199 *
200 */
operator <<(std::ostream & stream,const float32x4_t & other)201 static std::ostream& operator<< (std::ostream& stream, const float32x4_t& other)
202 {
203 stream << "(" << other[0] << "," <<other[1] << "," <<other[2] << "," <<other[3] << ")";
204 return(stream);
205 }
206
207 /**
208 * @brief Print tuple for debug
209 *
210 * @param[in] _tup Tuple
211 *
212 * @tparam TupType Tuple datatype
213 * @tparam I List of tuple indexes
214 */
215 template<class TupType, size_t... I>
printt(const TupType & _tup,std::index_sequence<I...>)216 void printt(const TupType& _tup, std::index_sequence<I...>)
217 {
218 std::cout << "(";
219 (..., (std::cout << (I == 0? "" : ", ") << std::get<I>(_tup)));
220 std::cout << ")\n";
221 }
222
223 /**
224 * @brief Print tuple
225 *
226 * @param[in] _tup Tuple
227 *
228 * @tparam T Datatype for tuple elements
229 */
230 template<class... T>
printt(const std::tuple<T...> & _tup)231 void printt (const std::tuple<T...>& _tup)
232 {
233 printt(_tup, std::make_index_sequence<sizeof...(T)>());
234 }
235
236 /**
237 * @brief Dor product for Helium
238 *
239 * @param[in] a First expression
240 * @param[in] b Second expression
241 * @param[in] l Vector length
242 *
243 * @tparam DA First operand datatype
244 * @tparam DB Second operand datatype
245 * @tparam <unnamed> Check vector indexing and compatible vectors
246 *
247 * @return Dot product of vector expressions
248 */
249 template<typename DA,typename DB,
250 typename std::enable_if<has_vector_inst<DA>() &&
251 vector_idx_pair<DA,DB>(),bool>::type = true>
_dot(const DA & a,const DB & b,const vector_length_t l,const Helium * =nullptr)252 inline DotResult<DA> _dot(const DA& a,
253 const DB& b,
254 const vector_length_t l,
255 const Helium* = nullptr)
256 {
257 //using Res = DotResult<DA>;
258 // Vector scalar datatype
259
260 using T = typename traits<DA>::Scalar;
261 using Temp = typename vector_traits<T>::temp_accumulator;
262
263 constexpr int nb_lanes = vector_traits<T>::nb_lanes;
264
265 Temp acc = vector_traits<T>::temp_acc_zero();
266
267 UNROLL_LOOP
268 for(index_t i=0; i<l; i+=nb_lanes)
269 {
270 acc = inner::vmacc(acc,a.vector_op_tail(i,l-i),b.vector_op_tail(i,l-i),inner::vctpq<T>::mk(l-i));
271 }
272
273 return(inner::vreduce(acc));
274 }
275
276 /**
277 * @brief Swap operator for Helium
278 *
279 * @param a First opetand
280 * @param b Second operand
281 * @param[in] l Vector length
282 *
283 * @tparam DA First operand datatype
284 * @tparam DB Second operand datatype
285 * @tparam <unnamed> Check vector indexing and compatible vectors
286 */
287 template<typename DA,typename DB,
288 typename std::enable_if<has_vector_inst<DA>() &&
289 vector_idx_pair<DA,DB>(),bool>::type = true>
_swap(DA && a,DB && b,const vector_length_t l,const Helium * =nullptr)290 inline void _swap(DA&& a,
291 DB&& b,
292 const vector_length_t l,
293 const Helium* = nullptr)
294 {
295 using Scalar = typename ElementType<DA>::type;
296 using Vector = typename vector_traits<Scalar>::vector;
297
298 constexpr int nb_lanes = vector_traits<typename ElementType<DA>::type>::nb_lanes;
299 index_t i=0;
300 Vector tmpa,tmpb;
301
302 UNROLL_LOOP
303 for(i=0;i < l; i += nb_lanes)
304 {
305 tmpa = a.vector_op_tail(i,l-i);
306 tmpb = b.vector_op_tail(i,l-i);
307 b.vector_store_tail(i,l-i,tmpa);
308 a.vector_store_tail(i,l-i,tmpb);
309 }
310 }
311 #endif
312
313 /*! @} */
314
315