1 /******************************************************************************
2  *
3  *  Copyright 2022 Google LLC
4  *
5  *  Licensed under the Apache License, Version 2.0 (the "License");
6  *  you may not use this file except in compliance with the License.
7  *  You may obtain a copy of the License at:
8  *
9  *  http://www.apache.org/licenses/LICENSE-2.0
10  *
11  *  Unless required by applicable law or agreed to in writing, software
12  *  distributed under the License is distributed on an "AS IS" BASIS,
13  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  *  See the License for the specific language governing permissions and
15  *  limitations under the License.
16  *
17  ******************************************************************************/
18 
19 #if __ARM_FEATURE_SIMD32 || defined(TEST_ARM)
20 
21 #ifndef TEST_ARM
22 
23 #include <arm_acle.h>
24 
__pkhbt(int16x2_t a,int16x2_t b)25 static inline int16x2_t __pkhbt(int16x2_t a, int16x2_t b)
26 {
27     int16x2_t r;
28     __asm("pkhbt %0, %1, %2" : "=r" (r) : "r" (a), "r" (b));
29     return r;
30 }
31 
32 #endif /* TEST_ARM */
33 
34 
35 /**
36  * Import
37  */
38 
39 static inline int32_t filter_hp50(struct lc3_ltpf_hp50_state *, int32_t);
40 static inline float dot(const int16_t *, const int16_t *, int);
41 
42 
43 /**
44  * Resample from 8 / 16 / 32 KHz to 12.8 KHz Template
45  */
46 #if !defined(resample_8k_12k8) || !defined(resample_16k_12k8) \
47     || !defined(resample_32k_12k8)
arm_resample_x64k_12k8(const int p,const int16x2_t * h,struct lc3_ltpf_hp50_state * hp50,const int16x2_t * x,int16_t * y,int n)48 static inline void arm_resample_x64k_12k8(const int p, const int16x2_t *h,
49     struct lc3_ltpf_hp50_state *hp50, const int16x2_t *x, int16_t *y, int n)
50 {
51     const int w = 40 / p;
52 
53     x -= w;
54 
55     for (int i = 0; i < 5*n; i += 5) {
56         const int16x2_t *hn = h + (i % (2*p)) * (48 / p);
57         const int16x2_t *xn = x + (i / (2*p));
58 
59         int32_t un = __smlad(*(xn++), *(hn++), 0);
60 
61         for (int k = 0; k < w; k += 5) {
62             un = __smlad(*(xn++), *(hn++), un);
63             un = __smlad(*(xn++), *(hn++), un);
64             un = __smlad(*(xn++), *(hn++), un);
65             un = __smlad(*(xn++), *(hn++), un);
66             un = __smlad(*(xn++), *(hn++), un);
67         }
68 
69         int32_t yn = filter_hp50(hp50, un);
70         *(y++) = (yn + (1 << 15)) >> 16;
71     }
72 }
73 #endif
74 
75 /**
76  * Resample from 24 / 48 KHz to 12.8 KHz Template
77  */
78 #if !defined(resample_24k_12k8) || !defined(resample_48k_12k8)
arm_resample_x192k_12k8(const int p,const int16x2_t * h,struct lc3_ltpf_hp50_state * hp50,const int16x2_t * x,int16_t * y,int n)79 static inline void arm_resample_x192k_12k8(const int p, const int16x2_t *h,
80     struct lc3_ltpf_hp50_state *hp50, const int16x2_t *x, int16_t *y, int n)
81 {
82     const int w = 120 / p;
83 
84     x -= w;
85 
86     for (int i = 0; i < 15*n; i += 15) {
87         const int16x2_t *hn = h + (i % (2*p)) * (128 / p);
88         const int16x2_t *xn = x + (i / (2*p));
89 
90         int32_t un = __smlad(*(xn++), *(hn++), 0);
91 
92         for (int k = 0; k < w; k += 15) {
93             un = __smlad(*(xn++), *(hn++), un);
94             un = __smlad(*(xn++), *(hn++), un);
95             un = __smlad(*(xn++), *(hn++), un);
96             un = __smlad(*(xn++), *(hn++), un);
97             un = __smlad(*(xn++), *(hn++), un);
98             un = __smlad(*(xn++), *(hn++), un);
99             un = __smlad(*(xn++), *(hn++), un);
100             un = __smlad(*(xn++), *(hn++), un);
101             un = __smlad(*(xn++), *(hn++), un);
102             un = __smlad(*(xn++), *(hn++), un);
103             un = __smlad(*(xn++), *(hn++), un);
104             un = __smlad(*(xn++), *(hn++), un);
105             un = __smlad(*(xn++), *(hn++), un);
106             un = __smlad(*(xn++), *(hn++), un);
107             un = __smlad(*(xn++), *(hn++), un);
108         }
109 
110         int32_t yn = filter_hp50(hp50, un);
111         *(y++) = (yn + (1 << 15)) >> 16;
112     }
113 }
114 #endif
115 
116 /**
117  * Resample from 8 Khz to 12.8 KHz
118  */
119 #ifndef resample_8k_12k8
120 
arm_resample_8k_12k8(struct lc3_ltpf_hp50_state * hp50,const int16_t * x,int16_t * y,int n)121 static void arm_resample_8k_12k8(
122     struct lc3_ltpf_hp50_state *hp50, const int16_t *x, int16_t *y, int n)
123 {
124     static const int16_t alignas(int32_t) h[2*8*12] = {
125         0, 214,  417, -1052, -4529, 26233, -4529, -1052,   417,  214,   0, 0,
126         0, 180,    0, -1522, -2427, 24506, -5289,     0,   763,  156, -28, 0,
127         0,  92, -323, -1361,     0, 19741, -3885,  1317,   861,    0, -61, 0,
128         0,   0, -457,  -752,  1873, 13068,     0,  2389,   598, -213, -79, 0,
129         0, -61, -398,     0,  2686,  5997,  5997,  2686,     0, -398, -61, 0,
130         0, -79, -213,   598,  2389,     0, 13068,  1873,  -752, -457,   0, 0,
131         0, -61,    0,   861,  1317, -3885, 19741,     0, -1361, -323,  92, 0,
132         0, -28,  156,   763,     0, -5289, 24506, -2427, -1522,    0, 180, 0,
133         0, 0, 214,  417, -1052, -4529, 26233, -4529, -1052,   417,  214,   0,
134         0, 0, 180,    0, -1522, -2427, 24506, -5289,     0,   763,  156, -28,
135         0, 0,  92, -323, -1361,     0, 19741, -3885,  1317,   861,    0, -61,
136         0, 0,   0, -457,  -752,  1873, 13068,     0,  2389,   598, -213, -79,
137         0, 0, -61, -398,     0,  2686,  5997,  5997,  2686,     0, -398, -61,
138         0, 0, -79, -213,   598,  2389,     0, 13068,  1873,  -752, -457,   0,
139         0, 0, -61,    0,   861,  1317, -3885, 19741,     0, -1361, -323,  92,
140         0, 0, -28,  156,   763,     0, -5289, 24506, -2427, -1522,    0, 180,
141     };
142 
143     arm_resample_x64k_12k8(
144         8, (const int16x2_t *)h, hp50, (int16x2_t *)x, y, n);
145 }
146 
147 #ifndef TEST_ARM
148 #define resample_8k_12k8 arm_resample_8k_12k8
149 #endif
150 
151 #endif /* resample_8k_12k8 */
152 
153 /**
154  * Resample from 16 Khz to 12.8 KHz
155  */
156 #ifndef resample_16k_12k8
157 
arm_resample_16k_12k8(struct lc3_ltpf_hp50_state * hp50,const int16_t * x,int16_t * y,int n)158 static void arm_resample_16k_12k8(
159     struct lc3_ltpf_hp50_state *hp50, const int16_t *x, int16_t *y, int n)
160 {
161     static const int16_t alignas(int32_t) h[2*4*24] = {
162 
163             0,   -61,   214,  -398,   417,     0, -1052,  2686,
164         -4529,  5997, 26233,  5997, -4529,  2686, -1052,     0,
165           417,  -398,   214,   -61,     0,     0,     0,     0,
166 
167 
168             0,   -79,   180,  -213,     0,   598, -1522,  2389,
169         -2427,     0, 24506, 13068, -5289,  1873,     0,  -752,
170           763,  -457,   156,     0,   -28,     0,     0,     0,
171 
172 
173             0,   -61,    92,     0,  -323,   861, -1361,  1317,
174             0, -3885, 19741, 19741, -3885,     0,  1317, -1361,
175           861,  -323,     0,    92,   -61,     0,     0,     0,
176 
177             0,   -28,     0,   156,  -457,   763,  -752,     0,
178          1873, -5289, 13068, 24506,     0, -2427,  2389, -1522,
179           598,     0,  -213,   180,   -79,     0,     0,     0,
180 
181 
182             0,     0,   -61,   214,  -398,   417,     0, -1052,
183          2686, -4529,  5997, 26233,  5997, -4529,  2686, -1052,
184             0,   417,  -398,   214,   -61,     0,     0,     0,
185 
186 
187             0,     0,   -79,   180,  -213,     0,   598, -1522,
188          2389, -2427,     0, 24506, 13068, -5289,  1873,     0,
189          -752,   763,  -457,   156,     0,   -28,     0,     0,
190 
191 
192             0,     0,   -61,    92,     0,  -323,   861, -1361,
193          1317,     0, -3885, 19741, 19741, -3885,     0,  1317,
194         -1361,   861,  -323,     0,    92,   -61,     0,     0,
195 
196             0,     0,   -28,     0,   156,  -457,   763,  -752,
197             0,  1873, -5289, 13068, 24506,     0, -2427,  2389,
198         -1522,   598,     0,  -213,   180,   -79,     0,     0,
199     };
200 
201     arm_resample_x64k_12k8(
202         4, (const int16x2_t *)h, hp50, (int16x2_t *)x, y, n);
203 }
204 
205 #ifndef TEST_ARM
206 #define resample_16k_12k8 arm_resample_16k_12k8
207 #endif
208 
209 #endif /* resample_16k_12k8 */
210 
211 /**
212  * Resample from 32 Khz to 12.8 KHz
213  */
214 #ifndef resample_32k_12k8
215 
arm_resample_32k_12k8(struct lc3_ltpf_hp50_state * hp50,const int16_t * x,int16_t * y,int n)216 static void arm_resample_32k_12k8(
217     struct lc3_ltpf_hp50_state *hp50, const int16_t *x, int16_t *y, int n)
218 {
219     static const int16_t alignas(int32_t) h[2*2*48] = {
220 
221             0,   -30,   -31,    46,   107,     0,  -199,  -162,
222           209,   430,     0,  -681,  -526,   658,  1343,     0,
223         -2264, -1943,  2999,  9871, 13116,  9871,  2999, -1943,
224         -2264,     0,  1343,   658,  -526,  -681,     0,   430,
225           209,  -162,  -199,     0,   107,    46,   -31,   -30,
226             0,     0,     0,     0,     0,     0,     0,     0,
227 
228             0,   -14,   -39,     0,    90,    78,  -106,  -229,
229             0,   382,   299,  -376,  -761,     0,  1194,   937,
230         -1214, -2644,     0,  6534, 12253, 12253,  6534,     0,
231         -2644, -1214,   937,  1194,     0,  -761,  -376,   299,
232           382,     0,  -229,  -106,    78,    90,     0,   -39,
233           -14,     0,     0,     0,     0,     0,     0,     0,
234 
235             0,     0,   -30,   -31,    46,   107,     0,  -199,
236          -162,   209,   430,     0,  -681,  -526,   658,  1343,
237             0, -2264, -1943,  2999,  9871, 13116,  9871,  2999,
238         -1943, -2264,     0,  1343,   658,  -526,  -681,     0,
239           430,   209,  -162,  -199,     0,   107,    46,   -31,
240           -30,     0,     0,     0,     0,     0,     0,     0,
241 
242             0,     0,   -14,   -39,     0,    90,    78,  -106,
243          -229,     0,   382,   299,  -376,  -761,     0,  1194,
244           937, -1214, -2644,     0,  6534, 12253, 12253,  6534,
245             0, -2644, -1214,   937,  1194,     0,  -761,  -376,
246           299,   382,     0,  -229,  -106,    78,    90,     0,
247           -39,   -14,     0,     0,     0,     0,     0,     0,
248     };
249 
250     arm_resample_x64k_12k8(
251         2, (const int16x2_t *)h, hp50, (int16x2_t *)x, y, n);
252 }
253 
254 #ifndef TEST_ARM
255 #define resample_32k_12k8 arm_resample_32k_12k8
256 #endif
257 
258 #endif /* resample_32k_12k8 */
259 
260 /**
261  * Resample from 24 Khz to 12.8 KHz
262  */
263 #ifndef resample_24k_12k8
264 
arm_resample_24k_12k8(struct lc3_ltpf_hp50_state * hp50,const int16_t * x,int16_t * y,int n)265 static void arm_resample_24k_12k8(
266     struct lc3_ltpf_hp50_state *hp50, const int16_t *x, int16_t *y, int n)
267 {
268     static const int16_t alignas(int32_t) h[2*8*32] = {
269 
270             0,   -50,    19,   143,   -93,  -290,   278,   485,
271          -658,  -701,  1396,   901, -3019, -1042, 10276, 17488,
272         10276, -1042, -3019,   901,  1396,  -701,  -658,   485,
273           278,  -290,   -93,   143,    19,   -50,     0,     0,
274 
275             0,   -46,     0,   141,   -45,  -305,   185,   543,
276          -501,  -854,  1153,  1249, -2619, -1908,  8712, 17358,
277         11772,     0, -3319,   480,  1593,  -504,  -796,   399,
278           367,  -261,  -142,   138,    40,   -52,    -5,     0,
279 
280             0,   -41,   -17,   133,     0,  -304,    91,   574,
281          -334,  -959,   878,  1516, -2143, -2590,  7118, 16971,
282         13161,  1202, -3495,     0,  1731,  -267,  -908,   287,
283           445,  -215,  -188,   125,    62,   -52,   -12,     0,
284 
285             0,   -34,   -30,   120,    41,  -291,     0,   577,
286          -164, -1015,   585,  1697, -1618, -3084,  5534, 16337,
287         14406,  2544, -3526,  -523,  1800,     0,  -985,   152,
288           509,  -156,  -230,   104,    83,   -48,   -19,     0,
289 
290             0,   -26,   -41,   103,    76,  -265,   -83,   554,
291             0, -1023,   288,  1791, -1070, -3393,  3998, 15474,
292         15474,  3998, -3393, -1070,  1791,   288, -1023,     0,
293           554,   -83,  -265,    76,   103,   -41,   -26,     0,
294 
295             0,   -19,   -48,    83,   104,  -230,  -156,   509,
296           152,  -985,     0,  1800,  -523, -3526,  2544, 14406,
297         16337,  5534, -3084, -1618,  1697,   585, -1015,  -164,
298           577,     0,  -291,    41,   120,   -30,   -34,     0,
299 
300             0,   -12,   -52,    62,   125,  -188,  -215,   445,
301           287,  -908,  -267,  1731,     0, -3495,  1202, 13161,
302         16971,  7118, -2590, -2143,  1516,   878,  -959,  -334,
303           574,    91,  -304,     0,   133,   -17,   -41,     0,
304 
305             0,    -5,   -52,    40,   138,  -142,  -261,   367,
306           399,  -796,  -504,  1593,   480, -3319,     0, 11772,
307         17358,  8712, -1908, -2619,  1249,  1153,  -854,  -501,
308           543,   185,  -305,   -45,   141,     0,   -46,     0,
309 
310             0,     0,   -50,    19,   143,   -93,  -290,   278,
311           485,  -658,  -701,  1396,   901, -3019, -1042, 10276,
312         17488, 10276, -1042, -3019,   901,  1396,  -701,  -658,
313           485,   278,  -290,   -93,   143,    19,   -50,     0,
314 
315             0,     0,   -46,     0,   141,   -45,  -305,   185,
316           543,  -501,  -854,  1153,  1249, -2619, -1908,  8712,
317         17358, 11772,     0, -3319,   480,  1593,  -504,  -796,
318           399,   367,  -261,  -142,   138,    40,   -52,    -5,
319 
320             0,     0,   -41,   -17,   133,     0,  -304,    91,
321           574,  -334,  -959,   878,  1516, -2143, -2590,  7118,
322         16971, 13161,  1202, -3495,     0,  1731,  -267,  -908,
323           287,   445,  -215,  -188,   125,    62,   -52,   -12,
324 
325             0,     0,   -34,   -30,   120,    41,  -291,     0,
326           577,  -164, -1015,   585,  1697, -1618, -3084,  5534,
327         16337, 14406,  2544, -3526,  -523,  1800,     0,  -985,
328           152,   509,  -156,  -230,   104,    83,   -48,   -19,
329 
330             0,     0,   -26,   -41,   103,    76,  -265,   -83,
331           554,     0, -1023,   288,  1791, -1070, -3393,  3998,
332         15474, 15474,  3998, -3393, -1070,  1791,   288, -1023,
333             0,   554,   -83,  -265,    76,   103,   -41,   -26,
334 
335             0,     0,   -19,   -48,    83,   104,  -230,  -156,
336           509,   152,  -985,     0,  1800,  -523, -3526,  2544,
337         14406, 16337,  5534, -3084, -1618,  1697,   585, -1015,
338          -164,   577,     0,  -291,    41,   120,   -30,   -34,
339 
340             0,     0,   -12,   -52,    62,   125,  -188,  -215,
341           445,   287,  -908,  -267,  1731,     0, -3495,  1202,
342         13161, 16971,  7118, -2590, -2143,  1516,   878,  -959,
343          -334,   574,    91,  -304,     0,   133,   -17,   -41,
344 
345             0,     0,    -5,   -52,    40,   138,  -142,  -261,
346           367,   399,  -796,  -504,  1593,   480, -3319,     0,
347         11772, 17358,  8712, -1908, -2619,  1249,  1153,  -854,
348          -501,   543,   185,  -305,   -45,   141,     0,   -46,
349     };
350 
351     arm_resample_x192k_12k8(
352         8, (const int16x2_t *)h, hp50, (int16x2_t *)x, y, n);
353 }
354 
355 #ifndef TEST_ARM
356 #define resample_24k_12k8 arm_resample_24k_12k8
357 #endif
358 
359 #endif /* resample_24k_12k8 */
360 
361 /**
362  * Resample from 48 Khz to 12.8 KHz
363  */
364 #ifndef resample_48k_12k8
365 
arm_resample_48k_12k8(struct lc3_ltpf_hp50_state * hp50,const int16_t * x,int16_t * y,int n)366 static void arm_resample_48k_12k8(
367     struct lc3_ltpf_hp50_state *hp50, const int16_t *x, int16_t *y, int n)
368 {
369     static const int16_t alignas(int32_t) h[2*4*64] = {
370 
371             0,   -13,   -25,   -20,    10,    51,    71,    38,
372           -47,  -133,  -145,   -42,   139,   277,   242,     0,
373          -329,  -511,  -351,   144,   698,   895,   450,  -535,
374         -1510, -1697,  -521,  1999,  5138,  7737,  8744,  7737,
375          5138,  1999,  -521, -1697, -1510,  -535,   450,   895,
376           698,   144,  -351,  -511,  -329,     0,   242,   277,
377           139,   -42,  -145,  -133,   -47,    38,    71,    51,
378            10,   -20,   -25,   -13,     0,     0,     0,     0,
379 
380             0,    -9,   -23,   -24,     0,    41,    71,    52,
381           -23,  -115,  -152,   -78,    92,   254,   272,    76,
382          -251,  -493,  -427,     0,   576,   900,   624,  -262,
383         -1309, -1763,  -954,  1272,  4356,  7203,  8679,  8169,
384          5886,  2767,     0, -1542, -1660,  -809,   240,   848,
385           796,   292,  -252,  -507,  -398,   -82,   199,   288,
386           183,     0,  -130,  -145,   -71,    20,    69,    60,
387            20,   -15,   -26,   -17,    -3,     0,     0,     0,
388 
389             0,    -6,   -20,   -26,    -8,    31,    67,    62,
390             0,   -94,  -152,  -108,    45,   223,   287,   143,
391          -167,  -454,  -480,  -134,   439,   866,   758,     0,
392         -1071, -1748, -1295,   601,  3559,  6580,  8485,  8485,
393          6580,  3559,   601, -1295, -1748, -1071,     0,   758,
394           866,   439,  -134,  -480,  -454,  -167,   143,   287,
395           223,    45,  -108,  -152,   -94,     0,    62,    67,
396            31,    -8,   -26,   -20,    -6,     0,     0,     0,
397 
398             0,    -3,   -17,   -26,   -15,    20,    60,    69,
399            20,   -71,  -145,  -130,     0,   183,   288,   199,
400           -82,  -398,  -507,  -252,   292,   796,   848,   240,
401          -809, -1660, -1542,     0,  2767,  5886,  8169,  8679,
402          7203,  4356,  1272,  -954, -1763, -1309,  -262,   624,
403           900,   576,     0,  -427,  -493,  -251,    76,   272,
404           254,    92,   -78,  -152,  -115,   -23,    52,    71,
405            41,     0,   -24,   -23,    -9,     0,     0,     0,
406 
407             0,     0,   -13,   -25,   -20,    10,    51,    71,
408            38,   -47,  -133,  -145,   -42,   139,   277,   242,
409             0,  -329,  -511,  -351,   144,   698,   895,   450,
410          -535, -1510, -1697,  -521,  1999,  5138,  7737,  8744,
411          7737,  5138,  1999,  -521, -1697, -1510,  -535,   450,
412           895,   698,   144,  -351,  -511,  -329,     0,   242,
413           277,   139,   -42,  -145,  -133,   -47,    38,    71,
414            51,    10,   -20,   -25,   -13,     0,     0,     0,
415 
416             0,     0,    -9,   -23,   -24,     0,    41,    71,
417            52,   -23,  -115,  -152,   -78,    92,   254,   272,
418            76,  -251,  -493,  -427,     0,   576,   900,   624,
419          -262, -1309, -1763,  -954,  1272,  4356,  7203,  8679,
420          8169,  5886,  2767,     0, -1542, -1660,  -809,   240,
421           848,   796,   292,  -252,  -507,  -398,   -82,   199,
422           288,   183,     0,  -130,  -145,   -71,    20,    69,
423            60,    20,   -15,   -26,   -17,    -3,     0,     0,
424 
425             0,     0,    -6,   -20,   -26,    -8,    31,    67,
426            62,     0,   -94,  -152,  -108,    45,   223,   287,
427           143,  -167,  -454,  -480,  -134,   439,   866,   758,
428             0, -1071, -1748, -1295,   601,  3559,  6580,  8485,
429          8485,  6580,  3559,   601, -1295, -1748, -1071,     0,
430           758,   866,   439,  -134,  -480,  -454,  -167,   143,
431           287,   223,    45,  -108,  -152,   -94,     0,    62,
432            67,    31,    -8,   -26,   -20,    -6,     0,     0,
433 
434             0,     0,    -3,   -17,   -26,   -15,    20,    60,
435            69,    20,   -71,  -145,  -130,     0,   183,   288,
436           199,   -82,  -398,  -507,  -252,   292,   796,   848,
437           240,  -809, -1660, -1542,     0,  2767,  5886,  8169,
438          8679,  7203,  4356,  1272,  -954, -1763, -1309,  -262,
439           624,   900,   576,     0,  -427,  -493,  -251,    76,
440           272,   254,    92,   -78,  -152,  -115,   -23,    52,
441            71,    41,     0,   -24,   -23,    -9,     0,     0,
442     };
443 
444     arm_resample_x192k_12k8(
445         4, (const int16x2_t *)h, hp50, (int16x2_t *)x, y, n);
446 }
447 
448 #ifndef TEST_ARM
449 #define resample_48k_12k8 arm_resample_48k_12k8
450 #endif
451 
452 #endif /* resample_48k_12k8 */
453 
454 /**
455  * Return vector of correlations
456  */
457 #ifndef correlate
458 
arm_correlate(const int16_t * a,const int16_t * b,int n,float * y,int nc)459 static void arm_correlate(
460     const int16_t *a, const int16_t *b, int n, float *y, int nc)
461 {
462     /* --- Check alignment of `b` --- */
463 
464     if ((uintptr_t)b & 3)
465         *(y++) = dot(a, b--, n), nc--;
466 
467     /* --- Processing by pair --- */
468 
469     for ( ; nc >= 2; nc -= 2) {
470         const int16x2_t *an = (const int16x2_t *)(a  );
471         const int16x2_t *bn = (const int16x2_t *)(b--);
472 
473         int16x2_t ax, b0, b1;
474         int64_t v0 = 0, v1 = 0;
475 
476         b1 = (int16x2_t)*(b--) << 16;
477 
478         for (int i = 0; i < (n >> 4); i++ )
479             for (int j = 0; j < 4; j++) {
480 
481                 ax = *(an++), b0 = *(bn++);
482                 v0 = __smlald (ax, b0, v0);
483                 v1 = __smlaldx(ax, __pkhbt(b0, b1), v1);
484 
485                 ax = *(an++), b1 = *(bn++);
486                 v0 = __smlald (ax, b1, v0);
487                 v1 = __smlaldx(ax, __pkhbt(b1, b0), v1);
488             }
489 
490         *(y++) = (float)((int32_t)((v0 + (1 << 5)) >> 6));
491         *(y++) = (float)((int32_t)((v1 + (1 << 5)) >> 6));
492     }
493 
494     /* --- Odd element count --- */
495 
496     if (nc > 0)
497         *(y++) = dot(a, b, n);
498 }
499 
500 #ifndef TEST_ARM
501 #define correlate arm_correlate
502 #endif
503 
504 #endif /* correlate */
505 
506 #endif /* __ARM_FEATURE_SIMD32 */
507