1 /******************************************************************************
2 *
3 * Copyright 2022 Google LLC
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at:
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 ******************************************************************************/
18
19 #if __ARM_FEATURE_SIMD32 || defined(TEST_ARM)
20
21 #ifndef TEST_ARM
22
23 #include <arm_acle.h>
24
__pkhbt(int16x2_t a,int16x2_t b)25 static inline int16x2_t __pkhbt(int16x2_t a, int16x2_t b)
26 {
27 int16x2_t r;
28 __asm("pkhbt %0, %1, %2" : "=r" (r) : "r" (a), "r" (b));
29 return r;
30 }
31
32 #endif /* TEST_ARM */
33
34
35 /**
36 * Import
37 */
38
39 static inline int32_t filter_hp50(struct lc3_ltpf_hp50_state *, int32_t);
40 static inline float dot(const int16_t *, const int16_t *, int);
41
42
43 /**
44 * Resample from 8 / 16 / 32 KHz to 12.8 KHz Template
45 */
46 #if !defined(resample_8k_12k8) || !defined(resample_16k_12k8) \
47 || !defined(resample_32k_12k8)
arm_resample_x64k_12k8(const int p,const int16x2_t * h,struct lc3_ltpf_hp50_state * hp50,const int16x2_t * x,int16_t * y,int n)48 static inline void arm_resample_x64k_12k8(const int p, const int16x2_t *h,
49 struct lc3_ltpf_hp50_state *hp50, const int16x2_t *x, int16_t *y, int n)
50 {
51 const int w = 40 / p;
52
53 x -= w;
54
55 for (int i = 0; i < 5*n; i += 5) {
56 const int16x2_t *hn = h + (i % (2*p)) * (48 / p);
57 const int16x2_t *xn = x + (i / (2*p));
58
59 int32_t un = __smlad(*(xn++), *(hn++), 0);
60
61 for (int k = 0; k < w; k += 5) {
62 un = __smlad(*(xn++), *(hn++), un);
63 un = __smlad(*(xn++), *(hn++), un);
64 un = __smlad(*(xn++), *(hn++), un);
65 un = __smlad(*(xn++), *(hn++), un);
66 un = __smlad(*(xn++), *(hn++), un);
67 }
68
69 int32_t yn = filter_hp50(hp50, un);
70 *(y++) = (yn + (1 << 15)) >> 16;
71 }
72 }
73 #endif
74
75 /**
76 * Resample from 24 / 48 KHz to 12.8 KHz Template
77 */
78 #if !defined(resample_24k_12k8) || !defined(resample_48k_12k8)
arm_resample_x192k_12k8(const int p,const int16x2_t * h,struct lc3_ltpf_hp50_state * hp50,const int16x2_t * x,int16_t * y,int n)79 static inline void arm_resample_x192k_12k8(const int p, const int16x2_t *h,
80 struct lc3_ltpf_hp50_state *hp50, const int16x2_t *x, int16_t *y, int n)
81 {
82 const int w = 120 / p;
83
84 x -= w;
85
86 for (int i = 0; i < 15*n; i += 15) {
87 const int16x2_t *hn = h + (i % (2*p)) * (128 / p);
88 const int16x2_t *xn = x + (i / (2*p));
89
90 int32_t un = __smlad(*(xn++), *(hn++), 0);
91
92 for (int k = 0; k < w; k += 15) {
93 un = __smlad(*(xn++), *(hn++), un);
94 un = __smlad(*(xn++), *(hn++), un);
95 un = __smlad(*(xn++), *(hn++), un);
96 un = __smlad(*(xn++), *(hn++), un);
97 un = __smlad(*(xn++), *(hn++), un);
98 un = __smlad(*(xn++), *(hn++), un);
99 un = __smlad(*(xn++), *(hn++), un);
100 un = __smlad(*(xn++), *(hn++), un);
101 un = __smlad(*(xn++), *(hn++), un);
102 un = __smlad(*(xn++), *(hn++), un);
103 un = __smlad(*(xn++), *(hn++), un);
104 un = __smlad(*(xn++), *(hn++), un);
105 un = __smlad(*(xn++), *(hn++), un);
106 un = __smlad(*(xn++), *(hn++), un);
107 un = __smlad(*(xn++), *(hn++), un);
108 }
109
110 int32_t yn = filter_hp50(hp50, un);
111 *(y++) = (yn + (1 << 15)) >> 16;
112 }
113 }
114 #endif
115
116 /**
117 * Resample from 8 Khz to 12.8 KHz
118 */
119 #ifndef resample_8k_12k8
120
arm_resample_8k_12k8(struct lc3_ltpf_hp50_state * hp50,const int16_t * x,int16_t * y,int n)121 static void arm_resample_8k_12k8(
122 struct lc3_ltpf_hp50_state *hp50, const int16_t *x, int16_t *y, int n)
123 {
124 static const int16_t alignas(int32_t) h[2*8*12] = {
125 0, 214, 417, -1052, -4529, 26233, -4529, -1052, 417, 214, 0, 0,
126 0, 180, 0, -1522, -2427, 24506, -5289, 0, 763, 156, -28, 0,
127 0, 92, -323, -1361, 0, 19741, -3885, 1317, 861, 0, -61, 0,
128 0, 0, -457, -752, 1873, 13068, 0, 2389, 598, -213, -79, 0,
129 0, -61, -398, 0, 2686, 5997, 5997, 2686, 0, -398, -61, 0,
130 0, -79, -213, 598, 2389, 0, 13068, 1873, -752, -457, 0, 0,
131 0, -61, 0, 861, 1317, -3885, 19741, 0, -1361, -323, 92, 0,
132 0, -28, 156, 763, 0, -5289, 24506, -2427, -1522, 0, 180, 0,
133 0, 0, 214, 417, -1052, -4529, 26233, -4529, -1052, 417, 214, 0,
134 0, 0, 180, 0, -1522, -2427, 24506, -5289, 0, 763, 156, -28,
135 0, 0, 92, -323, -1361, 0, 19741, -3885, 1317, 861, 0, -61,
136 0, 0, 0, -457, -752, 1873, 13068, 0, 2389, 598, -213, -79,
137 0, 0, -61, -398, 0, 2686, 5997, 5997, 2686, 0, -398, -61,
138 0, 0, -79, -213, 598, 2389, 0, 13068, 1873, -752, -457, 0,
139 0, 0, -61, 0, 861, 1317, -3885, 19741, 0, -1361, -323, 92,
140 0, 0, -28, 156, 763, 0, -5289, 24506, -2427, -1522, 0, 180,
141 };
142
143 arm_resample_x64k_12k8(
144 8, (const int16x2_t *)h, hp50, (int16x2_t *)x, y, n);
145 }
146
147 #ifndef TEST_ARM
148 #define resample_8k_12k8 arm_resample_8k_12k8
149 #endif
150
151 #endif /* resample_8k_12k8 */
152
153 /**
154 * Resample from 16 Khz to 12.8 KHz
155 */
156 #ifndef resample_16k_12k8
157
arm_resample_16k_12k8(struct lc3_ltpf_hp50_state * hp50,const int16_t * x,int16_t * y,int n)158 static void arm_resample_16k_12k8(
159 struct lc3_ltpf_hp50_state *hp50, const int16_t *x, int16_t *y, int n)
160 {
161 static const int16_t alignas(int32_t) h[2*4*24] = {
162
163 0, -61, 214, -398, 417, 0, -1052, 2686,
164 -4529, 5997, 26233, 5997, -4529, 2686, -1052, 0,
165 417, -398, 214, -61, 0, 0, 0, 0,
166
167
168 0, -79, 180, -213, 0, 598, -1522, 2389,
169 -2427, 0, 24506, 13068, -5289, 1873, 0, -752,
170 763, -457, 156, 0, -28, 0, 0, 0,
171
172
173 0, -61, 92, 0, -323, 861, -1361, 1317,
174 0, -3885, 19741, 19741, -3885, 0, 1317, -1361,
175 861, -323, 0, 92, -61, 0, 0, 0,
176
177 0, -28, 0, 156, -457, 763, -752, 0,
178 1873, -5289, 13068, 24506, 0, -2427, 2389, -1522,
179 598, 0, -213, 180, -79, 0, 0, 0,
180
181
182 0, 0, -61, 214, -398, 417, 0, -1052,
183 2686, -4529, 5997, 26233, 5997, -4529, 2686, -1052,
184 0, 417, -398, 214, -61, 0, 0, 0,
185
186
187 0, 0, -79, 180, -213, 0, 598, -1522,
188 2389, -2427, 0, 24506, 13068, -5289, 1873, 0,
189 -752, 763, -457, 156, 0, -28, 0, 0,
190
191
192 0, 0, -61, 92, 0, -323, 861, -1361,
193 1317, 0, -3885, 19741, 19741, -3885, 0, 1317,
194 -1361, 861, -323, 0, 92, -61, 0, 0,
195
196 0, 0, -28, 0, 156, -457, 763, -752,
197 0, 1873, -5289, 13068, 24506, 0, -2427, 2389,
198 -1522, 598, 0, -213, 180, -79, 0, 0,
199 };
200
201 arm_resample_x64k_12k8(
202 4, (const int16x2_t *)h, hp50, (int16x2_t *)x, y, n);
203 }
204
205 #ifndef TEST_ARM
206 #define resample_16k_12k8 arm_resample_16k_12k8
207 #endif
208
209 #endif /* resample_16k_12k8 */
210
211 /**
212 * Resample from 32 Khz to 12.8 KHz
213 */
214 #ifndef resample_32k_12k8
215
arm_resample_32k_12k8(struct lc3_ltpf_hp50_state * hp50,const int16_t * x,int16_t * y,int n)216 static void arm_resample_32k_12k8(
217 struct lc3_ltpf_hp50_state *hp50, const int16_t *x, int16_t *y, int n)
218 {
219 static const int16_t alignas(int32_t) h[2*2*48] = {
220
221 0, -30, -31, 46, 107, 0, -199, -162,
222 209, 430, 0, -681, -526, 658, 1343, 0,
223 -2264, -1943, 2999, 9871, 13116, 9871, 2999, -1943,
224 -2264, 0, 1343, 658, -526, -681, 0, 430,
225 209, -162, -199, 0, 107, 46, -31, -30,
226 0, 0, 0, 0, 0, 0, 0, 0,
227
228 0, -14, -39, 0, 90, 78, -106, -229,
229 0, 382, 299, -376, -761, 0, 1194, 937,
230 -1214, -2644, 0, 6534, 12253, 12253, 6534, 0,
231 -2644, -1214, 937, 1194, 0, -761, -376, 299,
232 382, 0, -229, -106, 78, 90, 0, -39,
233 -14, 0, 0, 0, 0, 0, 0, 0,
234
235 0, 0, -30, -31, 46, 107, 0, -199,
236 -162, 209, 430, 0, -681, -526, 658, 1343,
237 0, -2264, -1943, 2999, 9871, 13116, 9871, 2999,
238 -1943, -2264, 0, 1343, 658, -526, -681, 0,
239 430, 209, -162, -199, 0, 107, 46, -31,
240 -30, 0, 0, 0, 0, 0, 0, 0,
241
242 0, 0, -14, -39, 0, 90, 78, -106,
243 -229, 0, 382, 299, -376, -761, 0, 1194,
244 937, -1214, -2644, 0, 6534, 12253, 12253, 6534,
245 0, -2644, -1214, 937, 1194, 0, -761, -376,
246 299, 382, 0, -229, -106, 78, 90, 0,
247 -39, -14, 0, 0, 0, 0, 0, 0,
248 };
249
250 arm_resample_x64k_12k8(
251 2, (const int16x2_t *)h, hp50, (int16x2_t *)x, y, n);
252 }
253
254 #ifndef TEST_ARM
255 #define resample_32k_12k8 arm_resample_32k_12k8
256 #endif
257
258 #endif /* resample_32k_12k8 */
259
260 /**
261 * Resample from 24 Khz to 12.8 KHz
262 */
263 #ifndef resample_24k_12k8
264
arm_resample_24k_12k8(struct lc3_ltpf_hp50_state * hp50,const int16_t * x,int16_t * y,int n)265 static void arm_resample_24k_12k8(
266 struct lc3_ltpf_hp50_state *hp50, const int16_t *x, int16_t *y, int n)
267 {
268 static const int16_t alignas(int32_t) h[2*8*32] = {
269
270 0, -50, 19, 143, -93, -290, 278, 485,
271 -658, -701, 1396, 901, -3019, -1042, 10276, 17488,
272 10276, -1042, -3019, 901, 1396, -701, -658, 485,
273 278, -290, -93, 143, 19, -50, 0, 0,
274
275 0, -46, 0, 141, -45, -305, 185, 543,
276 -501, -854, 1153, 1249, -2619, -1908, 8712, 17358,
277 11772, 0, -3319, 480, 1593, -504, -796, 399,
278 367, -261, -142, 138, 40, -52, -5, 0,
279
280 0, -41, -17, 133, 0, -304, 91, 574,
281 -334, -959, 878, 1516, -2143, -2590, 7118, 16971,
282 13161, 1202, -3495, 0, 1731, -267, -908, 287,
283 445, -215, -188, 125, 62, -52, -12, 0,
284
285 0, -34, -30, 120, 41, -291, 0, 577,
286 -164, -1015, 585, 1697, -1618, -3084, 5534, 16337,
287 14406, 2544, -3526, -523, 1800, 0, -985, 152,
288 509, -156, -230, 104, 83, -48, -19, 0,
289
290 0, -26, -41, 103, 76, -265, -83, 554,
291 0, -1023, 288, 1791, -1070, -3393, 3998, 15474,
292 15474, 3998, -3393, -1070, 1791, 288, -1023, 0,
293 554, -83, -265, 76, 103, -41, -26, 0,
294
295 0, -19, -48, 83, 104, -230, -156, 509,
296 152, -985, 0, 1800, -523, -3526, 2544, 14406,
297 16337, 5534, -3084, -1618, 1697, 585, -1015, -164,
298 577, 0, -291, 41, 120, -30, -34, 0,
299
300 0, -12, -52, 62, 125, -188, -215, 445,
301 287, -908, -267, 1731, 0, -3495, 1202, 13161,
302 16971, 7118, -2590, -2143, 1516, 878, -959, -334,
303 574, 91, -304, 0, 133, -17, -41, 0,
304
305 0, -5, -52, 40, 138, -142, -261, 367,
306 399, -796, -504, 1593, 480, -3319, 0, 11772,
307 17358, 8712, -1908, -2619, 1249, 1153, -854, -501,
308 543, 185, -305, -45, 141, 0, -46, 0,
309
310 0, 0, -50, 19, 143, -93, -290, 278,
311 485, -658, -701, 1396, 901, -3019, -1042, 10276,
312 17488, 10276, -1042, -3019, 901, 1396, -701, -658,
313 485, 278, -290, -93, 143, 19, -50, 0,
314
315 0, 0, -46, 0, 141, -45, -305, 185,
316 543, -501, -854, 1153, 1249, -2619, -1908, 8712,
317 17358, 11772, 0, -3319, 480, 1593, -504, -796,
318 399, 367, -261, -142, 138, 40, -52, -5,
319
320 0, 0, -41, -17, 133, 0, -304, 91,
321 574, -334, -959, 878, 1516, -2143, -2590, 7118,
322 16971, 13161, 1202, -3495, 0, 1731, -267, -908,
323 287, 445, -215, -188, 125, 62, -52, -12,
324
325 0, 0, -34, -30, 120, 41, -291, 0,
326 577, -164, -1015, 585, 1697, -1618, -3084, 5534,
327 16337, 14406, 2544, -3526, -523, 1800, 0, -985,
328 152, 509, -156, -230, 104, 83, -48, -19,
329
330 0, 0, -26, -41, 103, 76, -265, -83,
331 554, 0, -1023, 288, 1791, -1070, -3393, 3998,
332 15474, 15474, 3998, -3393, -1070, 1791, 288, -1023,
333 0, 554, -83, -265, 76, 103, -41, -26,
334
335 0, 0, -19, -48, 83, 104, -230, -156,
336 509, 152, -985, 0, 1800, -523, -3526, 2544,
337 14406, 16337, 5534, -3084, -1618, 1697, 585, -1015,
338 -164, 577, 0, -291, 41, 120, -30, -34,
339
340 0, 0, -12, -52, 62, 125, -188, -215,
341 445, 287, -908, -267, 1731, 0, -3495, 1202,
342 13161, 16971, 7118, -2590, -2143, 1516, 878, -959,
343 -334, 574, 91, -304, 0, 133, -17, -41,
344
345 0, 0, -5, -52, 40, 138, -142, -261,
346 367, 399, -796, -504, 1593, 480, -3319, 0,
347 11772, 17358, 8712, -1908, -2619, 1249, 1153, -854,
348 -501, 543, 185, -305, -45, 141, 0, -46,
349 };
350
351 arm_resample_x192k_12k8(
352 8, (const int16x2_t *)h, hp50, (int16x2_t *)x, y, n);
353 }
354
355 #ifndef TEST_ARM
356 #define resample_24k_12k8 arm_resample_24k_12k8
357 #endif
358
359 #endif /* resample_24k_12k8 */
360
361 /**
362 * Resample from 48 Khz to 12.8 KHz
363 */
364 #ifndef resample_48k_12k8
365
arm_resample_48k_12k8(struct lc3_ltpf_hp50_state * hp50,const int16_t * x,int16_t * y,int n)366 static void arm_resample_48k_12k8(
367 struct lc3_ltpf_hp50_state *hp50, const int16_t *x, int16_t *y, int n)
368 {
369 static const int16_t alignas(int32_t) h[2*4*64] = {
370
371 0, -13, -25, -20, 10, 51, 71, 38,
372 -47, -133, -145, -42, 139, 277, 242, 0,
373 -329, -511, -351, 144, 698, 895, 450, -535,
374 -1510, -1697, -521, 1999, 5138, 7737, 8744, 7737,
375 5138, 1999, -521, -1697, -1510, -535, 450, 895,
376 698, 144, -351, -511, -329, 0, 242, 277,
377 139, -42, -145, -133, -47, 38, 71, 51,
378 10, -20, -25, -13, 0, 0, 0, 0,
379
380 0, -9, -23, -24, 0, 41, 71, 52,
381 -23, -115, -152, -78, 92, 254, 272, 76,
382 -251, -493, -427, 0, 576, 900, 624, -262,
383 -1309, -1763, -954, 1272, 4356, 7203, 8679, 8169,
384 5886, 2767, 0, -1542, -1660, -809, 240, 848,
385 796, 292, -252, -507, -398, -82, 199, 288,
386 183, 0, -130, -145, -71, 20, 69, 60,
387 20, -15, -26, -17, -3, 0, 0, 0,
388
389 0, -6, -20, -26, -8, 31, 67, 62,
390 0, -94, -152, -108, 45, 223, 287, 143,
391 -167, -454, -480, -134, 439, 866, 758, 0,
392 -1071, -1748, -1295, 601, 3559, 6580, 8485, 8485,
393 6580, 3559, 601, -1295, -1748, -1071, 0, 758,
394 866, 439, -134, -480, -454, -167, 143, 287,
395 223, 45, -108, -152, -94, 0, 62, 67,
396 31, -8, -26, -20, -6, 0, 0, 0,
397
398 0, -3, -17, -26, -15, 20, 60, 69,
399 20, -71, -145, -130, 0, 183, 288, 199,
400 -82, -398, -507, -252, 292, 796, 848, 240,
401 -809, -1660, -1542, 0, 2767, 5886, 8169, 8679,
402 7203, 4356, 1272, -954, -1763, -1309, -262, 624,
403 900, 576, 0, -427, -493, -251, 76, 272,
404 254, 92, -78, -152, -115, -23, 52, 71,
405 41, 0, -24, -23, -9, 0, 0, 0,
406
407 0, 0, -13, -25, -20, 10, 51, 71,
408 38, -47, -133, -145, -42, 139, 277, 242,
409 0, -329, -511, -351, 144, 698, 895, 450,
410 -535, -1510, -1697, -521, 1999, 5138, 7737, 8744,
411 7737, 5138, 1999, -521, -1697, -1510, -535, 450,
412 895, 698, 144, -351, -511, -329, 0, 242,
413 277, 139, -42, -145, -133, -47, 38, 71,
414 51, 10, -20, -25, -13, 0, 0, 0,
415
416 0, 0, -9, -23, -24, 0, 41, 71,
417 52, -23, -115, -152, -78, 92, 254, 272,
418 76, -251, -493, -427, 0, 576, 900, 624,
419 -262, -1309, -1763, -954, 1272, 4356, 7203, 8679,
420 8169, 5886, 2767, 0, -1542, -1660, -809, 240,
421 848, 796, 292, -252, -507, -398, -82, 199,
422 288, 183, 0, -130, -145, -71, 20, 69,
423 60, 20, -15, -26, -17, -3, 0, 0,
424
425 0, 0, -6, -20, -26, -8, 31, 67,
426 62, 0, -94, -152, -108, 45, 223, 287,
427 143, -167, -454, -480, -134, 439, 866, 758,
428 0, -1071, -1748, -1295, 601, 3559, 6580, 8485,
429 8485, 6580, 3559, 601, -1295, -1748, -1071, 0,
430 758, 866, 439, -134, -480, -454, -167, 143,
431 287, 223, 45, -108, -152, -94, 0, 62,
432 67, 31, -8, -26, -20, -6, 0, 0,
433
434 0, 0, -3, -17, -26, -15, 20, 60,
435 69, 20, -71, -145, -130, 0, 183, 288,
436 199, -82, -398, -507, -252, 292, 796, 848,
437 240, -809, -1660, -1542, 0, 2767, 5886, 8169,
438 8679, 7203, 4356, 1272, -954, -1763, -1309, -262,
439 624, 900, 576, 0, -427, -493, -251, 76,
440 272, 254, 92, -78, -152, -115, -23, 52,
441 71, 41, 0, -24, -23, -9, 0, 0,
442 };
443
444 arm_resample_x192k_12k8(
445 4, (const int16x2_t *)h, hp50, (int16x2_t *)x, y, n);
446 }
447
448 #ifndef TEST_ARM
449 #define resample_48k_12k8 arm_resample_48k_12k8
450 #endif
451
452 #endif /* resample_48k_12k8 */
453
454 /**
455 * Return vector of correlations
456 */
457 #ifndef correlate
458
arm_correlate(const int16_t * a,const int16_t * b,int n,float * y,int nc)459 static void arm_correlate(
460 const int16_t *a, const int16_t *b, int n, float *y, int nc)
461 {
462 /* --- Check alignment of `b` --- */
463
464 if ((uintptr_t)b & 3)
465 *(y++) = dot(a, b--, n), nc--;
466
467 /* --- Processing by pair --- */
468
469 for ( ; nc >= 2; nc -= 2) {
470 const int16x2_t *an = (const int16x2_t *)(a );
471 const int16x2_t *bn = (const int16x2_t *)(b--);
472
473 int16x2_t ax, b0, b1;
474 int64_t v0 = 0, v1 = 0;
475
476 b1 = (int16x2_t)*(b--) << 16;
477
478 for (int i = 0; i < (n >> 4); i++ )
479 for (int j = 0; j < 4; j++) {
480
481 ax = *(an++), b0 = *(bn++);
482 v0 = __smlald (ax, b0, v0);
483 v1 = __smlaldx(ax, __pkhbt(b0, b1), v1);
484
485 ax = *(an++), b1 = *(bn++);
486 v0 = __smlald (ax, b1, v0);
487 v1 = __smlaldx(ax, __pkhbt(b1, b0), v1);
488 }
489
490 *(y++) = (float)((int32_t)((v0 + (1 << 5)) >> 6));
491 *(y++) = (float)((int32_t)((v1 + (1 << 5)) >> 6));
492 }
493
494 /* --- Odd element count --- */
495
496 if (nc > 0)
497 *(y++) = dot(a, b, n);
498 }
499
500 #ifndef TEST_ARM
501 #define correlate arm_correlate
502 #endif
503
504 #endif /* correlate */
505
506 #endif /* __ARM_FEATURE_SIMD32 */
507