1 // -*- C++ -*-
2 /** @file */
3 #pragma once
4
5 #ifdef DOXYGEN
6 #define ARM_MATH_DSP
7 #undef ARM_MATH_MVEI
8 #undef ARM_MATH_MVEF
9 #undef ARM_MATH_NEON
10 #endif
11
12 /** \addtogroup ARCHALG
13 * \addtogroup DSPALG DSP Extension specific algorithm
14 * \ingroup ARCHALG
15 * @{
16 */
17
18 #if defined(ARM_MATH_DSP)
19 #if !defined(ARM_MATH_MVEI) && !defined(ARM_MATH_MVEF) && !defined(ARM_MATH_NEON)
20
21 #define DSP_UNROLL 1
22
23 template<typename T,typename DST,
24 typename std::enable_if<has_vector_inst<DST>() &&
25 IsVector<DST>::value &&
26 SameElementType<DST,T>::value,bool>::type = true>
_Fill(DST & v,const T val,vector_length_t l,const DSP * =nullptr)27 inline void _Fill(DST &v,
28 const T val,
29 vector_length_t l,
30 const DSP* = nullptr)
31 {
32 constexpr int nb_lanes = vector_traits<T>::nb_lanes;
33 index_t i;
34
35 for(i=0 ; i <= l-(nb_lanes<<DSP_UNROLL); i += (nb_lanes<<DSP_UNROLL))
36 {
37 for(int k=0;k < (1<<DSP_UNROLL);k++)
38 {
39 v.vector_store(i + k*nb_lanes,inner::vconst(val));
40 }
41 }
42
43 for(; i < l ; i++)
44 {
45 v[i] = val;
46 }
47 }
48
49
50 template<typename T,typename DST,
51 typename std::enable_if<has_vector_inst<DST>() &&
52 must_use_matrix_idx<DST>() &&
53 SameElementType<DST,T>::value,bool>::type = true>
_Fill2D(DST & v,const T val,const vector_length_t rows,const vector_length_t cols,const DSP * =nullptr)54 inline void _Fill2D(DST &v,
55 const T val,
56 const vector_length_t rows,
57 const vector_length_t cols,
58 const DSP* = nullptr)
59 {
60 constexpr int nb_lanes = vector_traits<T>::nb_lanes;
61 index_t row=0;
62
63 for(; row <= rows-(1<<DSP_UNROLL);row += (1<<DSP_UNROLL))
64 {
65 index_t col;
66
67 for(col=0; col <= cols-nb_lanes;col += nb_lanes)
68 {
69 for(int k=0;k<(1<<DSP_UNROLL);k++)
70 {
71 v.matrix_store(row+k,col,inner::vconst(val));
72 }
73 }
74
75 for(; col < cols;col += nb_lanes)
76 {
77 for(int k=0;k<(1<<DSP_UNROLL);k++)
78 {
79 v(row+k,col) = val;
80 }
81 }
82
83 }
84
85 for(; row < rows;row ++)
86 {
87 index_t col;
88 for(col=0; col <= cols-nb_lanes;col += nb_lanes)
89 {
90 v.matrix_store(row,col,inner::vconst(val));
91 }
92
93 for(; col < cols;col += nb_lanes)
94 {
95 v(row,col) = val;
96 }
97 }
98 }
99
100
101 /*
102
103 Evaluation : used when result is a vector
104
105 */
106 template<typename DA,typename DB,
107 typename std::enable_if<has_vector_inst<DA>() &&
108 vector_idx_pair<DA,DB>(),bool>::type = true>
eval(DA & v,const DB & other,const vector_length_t l,const DSP * =nullptr)109 inline void eval(DA &v,
110 const DB& other,
111 const vector_length_t l,
112 const DSP* = nullptr)
113 {
114 using T = typename traits<DA>::Scalar;
115 constexpr int nb_lanes = vector_traits<T>::nb_lanes;
116 constexpr unsigned int U = DSP_UNROLL;
117 index_t i;
118
119 for(i=0 ; i <= l-(nb_lanes<<U); i += (nb_lanes<<U))
120 {
121 for(int k=0;k < (1<<U);k++)
122 {
123 v.vector_store(i + k*nb_lanes,other.vector_op(i+k*nb_lanes));
124 }
125 }
126
127 for(; i < l ; i++)
128 {
129 v[i] = other[i];
130 }
131 }
132
133 template<typename DA,typename DB,
134 typename std::enable_if<has_vector_inst<DA>() &&
135 must_use_matrix_idx_pair<DA,DB>(),bool>::type = true>
eval2D(DA & v,const DB & other,const vector_length_t rows,const vector_length_t cols,const DSP * =nullptr)136 inline void eval2D(DA &v,
137 const DB& other,
138 const vector_length_t rows,
139 const vector_length_t cols,
140 const DSP* = nullptr)
141 {
142 using T = typename traits<DA>::Scalar;
143 constexpr int nb_lanes = vector_traits<T>::nb_lanes;
144 index_t row=0;
145
146 for(; row <= rows-(1<<DSP_UNROLL);row += (1<<DSP_UNROLL))
147 {
148 index_t col;
149
150 for(col=0; col <= cols-nb_lanes;col += nb_lanes)
151 {
152 for(int k=0;k<(1<<DSP_UNROLL);k++)
153 {
154 v.matrix_store(row+k,col,other.matrix_op(row+k,col));
155 }
156 }
157
158 for(; col < cols;col += nb_lanes)
159 {
160 for(int k=0;k<(1<<DSP_UNROLL);k++)
161 {
162 v(row+k,col) = other(row+k,col);
163 }
164 }
165
166 }
167
168 for(; row < rows;row ++)
169 {
170 index_t col;
171 for(col=0; col <= cols-nb_lanes;col += nb_lanes)
172 {
173 v.matrix_store(row,col,other.matrix_op(row,col));
174 }
175
176 for(; col < cols;col += nb_lanes)
177 {
178 v(row,col) = other(row,col);
179 }
180 }
181 }
182
183 template<typename DA,typename DB,
184 typename std::enable_if<has_vector_inst<DA>() &&
185 vector_idx_pair<DA,DB>(),bool>::type = true>
_dot(const DA & a,const DB & b,const vector_length_t l,const DSP * =nullptr)186 inline DotResult<DA> _dot(const DA& a,
187 const DB& b,
188 const vector_length_t l,
189 const DSP* = nullptr)
190 {
191 using Acc = DotResult<DA>;
192 using T = typename traits<DA>::Scalar;
193 using Temp = typename vector_traits<T>::temp_accumulator;
194 constexpr int nb_lanes = vector_traits<T>::nb_lanes;
195 constexpr unsigned int U = DSP_UNROLL;
196 index_t i;
197
198 Acc acc = Acc{};
199 Temp vacc = vector_traits<T>::temp_acc_zero();
200
201 for(i=0 ; i <= l-(nb_lanes<<U); i += (nb_lanes<<U))
202 {
203 for(int k=0;k < (1<<U);k++)
204 {
205 vacc = inner::vmacc(vacc,a.vector_op(i+k*nb_lanes),b.vector_op(i+k*nb_lanes));
206 }
207 }
208
209 acc = inner::vreduce(vacc);
210
211 for(; i < l ; i++)
212 {
213 acc = inner::mac(acc , a[i] , b[i]);
214 }
215
216 return(acc);
217 }
218
219 template<typename DA,typename DB,
220 typename std::enable_if<has_vector_inst<DA>() &&
221 vector_idx_pair<DA,DB>(),bool>::type = true>
_swap(DA && a,DB && b,const vector_length_t l,const DSP * =nullptr)222 inline void _swap(DA&& a,
223 DB&& b,
224 const vector_length_t l,
225 const DSP* = nullptr)
226 {
227 using Scalar = typename ElementType<DA>::type;
228 using Vector = typename vector_traits<Scalar>::vector;
229
230 constexpr int nb_lanes = vector_traits<typename ElementType<DA>::type>::nb_lanes;
231 index_t i=0;
232 Vector tmpa,tmpb;
233
234 for(i=0 ; i <= l-nb_lanes; i += nb_lanes)
235 {
236 tmpa = a.vector_op(i);
237 tmpb = b.vector_op(i);
238 b.vector_store(i,tmpa);
239 a.vector_store(i,tmpb);
240 }
241
242 for(;i<l;i++)
243 {
244 const auto tmp = a[i];
245 a[i] = b[i];
246 b[i] = tmp;
247 }
248
249 }
250
251 #undef DSP_UNROLL
252
253 #endif
254 #endif
255
256 /*! @} */
257