1 /******************************************************************************
2  *
3  *  Copyright 2022 Google LLC
4  *
5  *  Licensed under the Apache License, Version 2.0 (the "License");
6  *  you may not use this file except in compliance with the License.
7  *  You may obtain a copy of the License at:
8  *
9  *  http://www.apache.org/licenses/LICENSE-2.0
10  *
11  *  Unless required by applicable law or agreed to in writing, software
12  *  distributed under the License is distributed on an "AS IS" BASIS,
13  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  *  See the License for the specific language governing permissions and
15  *  limitations under the License.
16  *
17  ******************************************************************************/
18 
19 #include "ltpf.h"
20 #include "tables.h"
21 
22 #include "ltpf_neon.h"
23 #include "ltpf_arm.h"
24 
25 
26 /* ----------------------------------------------------------------------------
27  *  Resampling
28  * -------------------------------------------------------------------------- */
29 
30 /**
31  * Resampling coefficients
32  * The coefficients, in fixed Q15, are reordered by phase for each source
33  * samplerate (coefficient matrix transposed)
34  */
35 
36 #ifndef resample_8k_12k8
37 static const int16_t h_8k_12k8_q15[8*10] = {
38       214,   417, -1052, -4529, 26233, -4529, -1052,   417,   214,     0,
39       180,     0, -1522, -2427, 24506, -5289,     0,   763,   156,   -28,
40        92,  -323, -1361,     0, 19741, -3885,  1317,   861,     0,   -61,
41         0,  -457,  -752,  1873, 13068,     0,  2389,   598,  -213,   -79,
42       -61,  -398,     0,  2686,  5997,  5997,  2686,     0,  -398,   -61,
43       -79,  -213,   598,  2389,     0, 13068,  1873,  -752,  -457,     0,
44       -61,     0,   861,  1317, -3885, 19741,     0, -1361,  -323,    92,
45       -28,   156,   763,     0, -5289, 24506, -2427, -1522,     0,   180,
46 };
47 #endif /* resample_8k_12k8 */
48 
49 #ifndef resample_16k_12k8
50 static const int16_t h_16k_12k8_q15[4*20] = {
51       -61,   214,  -398,   417,     0, -1052,  2686, -4529,  5997, 26233,
52      5997, -4529,  2686, -1052,     0,   417,  -398,   214,   -61,     0,
53 
54       -79,   180,  -213,     0,   598, -1522,  2389, -2427,     0, 24506,
55     13068, -5289,  1873,     0,  -752,   763,  -457,   156,     0,   -28,
56 
57       -61,    92,     0,  -323,   861, -1361,  1317,     0, -3885, 19741,
58     19741, -3885,     0,  1317, -1361,   861,  -323,     0,    92,   -61,
59 
60       -28,     0,   156,  -457,   763,  -752,     0,  1873, -5289, 13068,
61     24506,     0, -2427,  2389, -1522,   598,     0,  -213,   180,   -79,
62 };
63 #endif /* resample_16k_12k8 */
64 
65 #ifndef resample_32k_12k8
66 static const int16_t h_32k_12k8_q15[2*40] = {
67       -30,   -31,    46,   107,     0,  -199,  -162,   209,   430,     0,
68      -681,  -526,   658,  1343,     0, -2264, -1943,  2999,  9871, 13116,
69      9871,  2999, -1943, -2264,     0,  1343,   658,  -526,  -681,     0,
70       430,   209,  -162,  -199,     0,   107,    46,   -31,   -30,     0,
71 
72       -14,   -39,     0,    90,    78,  -106,  -229,     0,   382,   299,
73      -376,  -761,     0,  1194,   937, -1214, -2644,     0,  6534, 12253,
74     12253,  6534,     0, -2644, -1214,   937,  1194,     0,  -761,  -376,
75       299,   382,     0,  -229,  -106,    78,    90,     0,   -39,   -14,
76 };
77 #endif /* resample_32k_12k8 */
78 
79 #ifndef resample_24k_12k8
80 static const int16_t h_24k_12k8_q15[8*30] = {
81       -50,    19,   143,   -93,  -290,   278,   485,  -658,  -701,  1396,
82       901, -3019, -1042, 10276, 17488, 10276, -1042, -3019,   901,  1396,
83      -701,  -658,   485,   278,  -290,   -93,   143,    19,   -50,     0,
84 
85       -46,     0,   141,   -45,  -305,   185,   543,  -501,  -854,  1153,
86      1249, -2619, -1908,  8712, 17358, 11772,     0, -3319,   480,  1593,
87      -504,  -796,   399,   367,  -261,  -142,   138,    40,   -52,    -5,
88 
89       -41,   -17,   133,     0,  -304,    91,   574,  -334,  -959,   878,
90      1516, -2143, -2590,  7118, 16971, 13161,  1202, -3495,     0,  1731,
91      -267,  -908,   287,   445,  -215,  -188,   125,    62,   -52,   -12,
92 
93       -34,   -30,   120,    41,  -291,     0,   577,  -164, -1015,   585,
94      1697, -1618, -3084,  5534, 16337, 14406,  2544, -3526,  -523,  1800,
95         0,  -985,   152,   509,  -156,  -230,   104,    83,   -48,   -19,
96 
97       -26,   -41,   103,    76,  -265,   -83,   554,     0, -1023,   288,
98      1791, -1070, -3393,  3998, 15474, 15474,  3998, -3393, -1070,  1791,
99       288, -1023,     0,   554,   -83,  -265,    76,   103,   -41,   -26,
100 
101       -19,   -48,    83,   104,  -230,  -156,   509,   152,  -985,     0,
102      1800,  -523, -3526,  2544, 14406, 16337,  5534, -3084, -1618,  1697,
103       585, -1015,  -164,   577,     0,  -291,    41,   120,   -30,   -34,
104 
105       -12,   -52,    62,   125,  -188,  -215,   445,   287,  -908,  -267,
106      1731,     0, -3495,  1202, 13161, 16971,  7118, -2590, -2143,  1516,
107       878,  -959,  -334,   574,    91,  -304,     0,   133,   -17,   -41,
108 
109        -5,   -52,    40,   138,  -142,  -261,   367,   399,  -796,  -504,
110      1593,   480, -3319,     0, 11772, 17358,  8712, -1908, -2619,  1249,
111      1153,  -854,  -501,   543,   185,  -305,   -45,   141,     0,   -46,
112 };
113 #endif /* resample_24k_12k8 */
114 
115 #ifndef resample_48k_12k8
116 static const int16_t h_48k_12k8_q15[4*60] = {
117       -13,   -25,   -20,    10,    51,    71,    38,   -47,  -133,  -145,
118       -42,   139,   277,   242,     0,  -329,  -511,  -351,   144,   698,
119       895,   450,  -535, -1510, -1697,  -521,  1999,  5138,  7737,  8744,
120      7737,  5138,  1999,  -521, -1697, -1510,  -535,   450,   895,   698,
121       144,  -351,  -511,  -329,     0,   242,   277,   139,   -42,  -145,
122      -133,   -47,    38,    71,    51,    10,   -20,   -25,   -13,     0,
123 
124        -9,   -23,   -24,     0,    41,    71,    52,   -23,  -115,  -152,
125       -78,    92,   254,   272,    76,  -251,  -493,  -427,     0,   576,
126       900,   624,  -262, -1309, -1763,  -954,  1272,  4356,  7203,  8679,
127      8169,  5886,  2767,     0, -1542, -1660,  -809,   240,   848,   796,
128       292,  -252,  -507,  -398,   -82,   199,   288,   183,     0,  -130,
129      -145,   -71,    20,    69,    60,    20,   -15,   -26,   -17,    -3,
130 
131        -6,   -20,   -26,    -8,    31,    67,    62,     0,   -94,  -152,
132      -108,    45,   223,   287,   143,  -167,  -454,  -480,  -134,   439,
133       866,   758,     0, -1071, -1748, -1295,   601,  3559,  6580,  8485,
134      8485,  6580,  3559,   601, -1295, -1748, -1071,     0,   758,   866,
135       439,  -134,  -480,  -454,  -167,   143,   287,   223,    45,  -108,
136      -152,   -94,     0,    62,    67,    31,    -8,   -26,   -20,    -6,
137 
138        -3,   -17,   -26,   -15,    20,    60,    69,    20,   -71,  -145,
139      -130,     0,   183,   288,   199,   -82,  -398,  -507,  -252,   292,
140       796,   848,   240,  -809, -1660, -1542,     0,  2767,  5886,  8169,
141      8679,  7203,  4356,  1272,  -954, -1763, -1309,  -262,   624,   900,
142       576,     0,  -427,  -493,  -251,    76,   272,   254,    92,   -78,
143      -152,  -115,   -23,    52,    71,    41,     0,   -24,   -23,    -9,
144 };
145 #endif /* resample_48k_12k8 */
146 
147 #ifndef resample_96k_12k8
148 static const int16_t h_96k_12k8_q15[2*120] = {
149        -3,    -7,   -10,   -13,   -13,   -10,    -4,     5,    15,    26,
150        33,    36,    31,    19,     0,   -23,   -47,   -66,   -76,   -73,
151       -54,   -21,    23,    70,   111,   139,   143,   121,    72,     0,
152       -84,  -165,  -227,  -256,  -240,  -175,   -67,    72,   219,   349,
153       433,   448,   379,   225,     0,  -268,  -536,  -755,  -874,  -848,
154      -648,  -260,   301,  1000,  1780,  2569,  3290,  3869,  4243,  4372,
155      4243,  3869,  3290,  2569,  1780,  1000,   301,  -260,  -648,  -848,
156      -874,  -755,  -536,  -268,     0,   225,   379,   448,   433,   349,
157       219,    72,   -67,  -175,  -240,  -256,  -227,  -165,   -84,     0,
158        72,   121,   143,   139,   111,    70,    23,   -21,   -54,   -73,
159       -76,   -66,   -47,   -23,     0,    19,    31,    36,    33,    26,
160        15,     5,    -4,   -10,   -13,   -13,   -10,    -7,    -3,     0,
161 
162        -1,    -5,    -8,   -12,   -13,   -12,    -8,     0,    10,    21,
163        30,    35,    34,    26,    10,   -11,   -35,   -58,   -73,   -76,
164       -65,   -39,     0,    46,    92,   127,   144,   136,   100,    38,
165       -41,  -125,  -199,  -246,  -254,  -214,  -126,     0,   146,   288,
166       398,   450,   424,   312,   120,  -131,  -405,  -655,  -830,  -881,
167      -771,  -477,     0,   636,  1384,  2178,  2943,  3601,  4084,  4340,
168      4340,  4084,  3601,  2943,  2178,  1384,   636,     0,  -477,  -771,
169      -881,  -830,  -655,  -405,  -131,   120,   312,   424,   450,   398,
170       288,   146,     0,  -126,  -214,  -254,  -246,  -199,  -125,   -41,
171        38,   100,   136,   144,   127,    92,    46,     0,   -39,   -65,
172       -76,   -73,   -58,   -35,   -11,    10,    26,    34,    35,    30,
173        21,    10,     0,    -8,   -12,   -13,   -12,    -8,    -5,    -1,
174 };
175 #endif /* resample_96k_12k8 */
176 
177 
178 /**
179  * High-pass 50Hz filtering, at 12.8 KHz samplerate
180  * hp50            Biquad filter state
181  * xn              Input sample, in fixed Q30
182  * return          Filtered sample, in fixed Q30
183  */
filter_hp50(struct lc3_ltpf_hp50_state * hp50,int32_t xn)184 LC3_HOT static inline int32_t filter_hp50(
185     struct lc3_ltpf_hp50_state *hp50, int32_t xn)
186 {
187     int32_t yn;
188 
189     const int32_t a1 = -2110217691, a2 = 1037111617;
190     const int32_t b1 = -2110535566, b2 = 1055267782;
191 
192     yn       = (hp50->s1 + (int64_t)xn * b2) >> 30;
193     hp50->s1 = (hp50->s2 + (int64_t)xn * b1 - (int64_t)yn * a1);
194     hp50->s2 = (           (int64_t)xn * b2 - (int64_t)yn * a2);
195 
196     return yn;
197 }
198 
199 /**
200  * Resample from 8 / 16 / 32 KHz to 12.8 KHz Template
201  * p               Resampling factor with compared to 192 KHz (8, 4 or 2)
202  * h               Arrange by phase coefficients table
203  * hp50            High-Pass biquad filter state
204  * x               [-d..-1] Previous, [0..ns-1] Current samples, Q15
205  * y, n            [0..n-1] Output `n` processed samples, Q14
206  *
207  * The `x` vector is aligned on 32 bits
208  * The number of previous samples `d` accessed on `x` is :
209  *   d: { 10, 20, 40 } - 1 for resampling factors 8, 4 and 2.
210  */
211 #if !defined(resample_8k_12k8) || !defined(resample_16k_12k8) \
212     || !defined(resample_32k_12k8)
resample_x64k_12k8(const int p,const int16_t * h,struct lc3_ltpf_hp50_state * hp50,const int16_t * x,int16_t * y,int n)213 LC3_HOT static inline void resample_x64k_12k8(const int p, const int16_t *h,
214     struct lc3_ltpf_hp50_state *hp50, const int16_t *x, int16_t *y, int n)
215 {
216     const int w = 2*(40 / p);
217 
218     x -= w - 1;
219 
220     for (int i = 0; i < 5*n; i += 5) {
221         const int16_t *hn = h + (i % p) * w;
222         const int16_t *xn = x + (i / p);
223         int32_t un = 0;
224 
225         for (int k = 0; k < w; k += 10) {
226             un += *(xn++) * *(hn++);
227             un += *(xn++) * *(hn++);
228             un += *(xn++) * *(hn++);
229             un += *(xn++) * *(hn++);
230             un += *(xn++) * *(hn++);
231             un += *(xn++) * *(hn++);
232             un += *(xn++) * *(hn++);
233             un += *(xn++) * *(hn++);
234             un += *(xn++) * *(hn++);
235             un += *(xn++) * *(hn++);
236         }
237 
238         int32_t yn = filter_hp50(hp50, un);
239         *(y++) = (yn + (1 << 15)) >> 16;
240     }
241 }
242 #endif
243 
244 /**
245  * Resample from 24 / 48 KHz to 12.8 KHz Template
246  * p               Resampling factor with compared to 192 KHz (8 or 4)
247  * h               Arrange by phase coefficients table
248  * hp50            High-Pass biquad filter state
249  * x               [-d..-1] Previous, [0..ns-1] Current samples, Q15
250  * y, n            [0..n-1] Output `n` processed samples, Q14
251  *
252  * The `x` vector is aligned on 32 bits
253  * The number of previous samples `d` accessed on `x` is :
254  *   d: { 30, 60 } - 1 for resampling factors 8 and 4.
255  */
256 #if !defined(resample_24k_12k8) || !defined(resample_48k_12k8) \
257     || !defined(resample_96k_12k8)
resample_x192k_12k8(const int p,const int16_t * h,struct lc3_ltpf_hp50_state * hp50,const int16_t * x,int16_t * y,int n)258 LC3_HOT static inline void resample_x192k_12k8(const int p, const int16_t *h,
259     struct lc3_ltpf_hp50_state *hp50, const int16_t *x, int16_t *y, int n)
260 {
261     const int w = 2*(120 / p);
262 
263     x -= w - 1;
264 
265     for (int i = 0; i < 15*n; i += 15) {
266         const int16_t *hn = h + (i % p) * w;
267         const int16_t *xn = x + (i / p);
268         int32_t un = 0;
269 
270         for (int k = 0; k < w; k += 15) {
271             un += *(xn++) * *(hn++);
272             un += *(xn++) * *(hn++);
273             un += *(xn++) * *(hn++);
274             un += *(xn++) * *(hn++);
275             un += *(xn++) * *(hn++);
276             un += *(xn++) * *(hn++);
277             un += *(xn++) * *(hn++);
278             un += *(xn++) * *(hn++);
279             un += *(xn++) * *(hn++);
280             un += *(xn++) * *(hn++);
281             un += *(xn++) * *(hn++);
282             un += *(xn++) * *(hn++);
283             un += *(xn++) * *(hn++);
284             un += *(xn++) * *(hn++);
285             un += *(xn++) * *(hn++);
286         }
287 
288         int32_t yn = filter_hp50(hp50, un);
289         *(y++) = (yn + (1 << 15)) >> 16;
290     }
291 }
292 #endif
293 
294 /**
295  * Resample from 8 Khz to 12.8 KHz
296  * hp50            High-Pass biquad filter state
297  * x               [-10..-1] Previous, [0..ns-1] Current samples, Q15
298  * y, n            [0..n-1] Output `n` processed samples, Q14
299  *
300  * The `x` vector is aligned on 32 bits
301  */
302 #ifndef resample_8k_12k8
resample_8k_12k8(struct lc3_ltpf_hp50_state * hp50,const int16_t * x,int16_t * y,int n)303 LC3_HOT static void resample_8k_12k8(
304     struct lc3_ltpf_hp50_state *hp50, const int16_t *x, int16_t *y, int n)
305 {
306     resample_x64k_12k8(8, h_8k_12k8_q15, hp50, x, y, n);
307 }
308 #endif /* resample_8k_12k8 */
309 
310 /**
311  * Resample from 16 Khz to 12.8 KHz
312  * hp50            High-Pass biquad filter state
313  * x               [-20..-1] Previous, [0..ns-1] Current samples, in fixed Q15
314  * y, n            [0..n-1] Output `n` processed samples, in fixed Q14
315  *
316  * The `x` vector is aligned on 32 bits
317  */
318 #ifndef resample_16k_12k8
resample_16k_12k8(struct lc3_ltpf_hp50_state * hp50,const int16_t * x,int16_t * y,int n)319 LC3_HOT static void resample_16k_12k8(
320     struct lc3_ltpf_hp50_state *hp50, const int16_t *x, int16_t *y, int n)
321 {
322     resample_x64k_12k8(4, h_16k_12k8_q15, hp50, x, y, n);
323 }
324 #endif /* resample_16k_12k8 */
325 
326 /**
327  * Resample from 32 Khz to 12.8 KHz
328  * hp50            High-Pass biquad filter state
329  * x               [-30..-1] Previous, [0..ns-1] Current samples, in fixed Q15
330  * y, n            [0..n-1] Output `n` processed samples, in fixed Q14
331  *
332  * The `x` vector is aligned on 32 bits
333  */
334 #ifndef resample_32k_12k8
resample_32k_12k8(struct lc3_ltpf_hp50_state * hp50,const int16_t * x,int16_t * y,int n)335 LC3_HOT static void resample_32k_12k8(
336     struct lc3_ltpf_hp50_state *hp50, const int16_t *x, int16_t *y, int n)
337 {
338     resample_x64k_12k8(2, h_32k_12k8_q15, hp50, x, y, n);
339 }
340 #endif /* resample_32k_12k8 */
341 
342 /**
343  * Resample from 24 Khz to 12.8 KHz
344  * hp50            High-Pass biquad filter state
345  * x               [-30..-1] Previous, [0..ns-1] Current samples, in fixed Q15
346  * y, n            [0..n-1] Output `n` processed samples, in fixed Q14
347  *
348  * The `x` vector is aligned on 32 bits
349  */
350 #ifndef resample_24k_12k8
resample_24k_12k8(struct lc3_ltpf_hp50_state * hp50,const int16_t * x,int16_t * y,int n)351 LC3_HOT static void resample_24k_12k8(
352     struct lc3_ltpf_hp50_state *hp50, const int16_t *x, int16_t *y, int n)
353 {
354     resample_x192k_12k8(8, h_24k_12k8_q15, hp50, x, y, n);
355 }
356 #endif /* resample_24k_12k8 */
357 
358 /**
359  * Resample from 48 Khz to 12.8 KHz
360  * hp50            High-Pass biquad filter state
361  * x               [-60..-1] Previous, [0..ns-1] Current samples, in fixed Q15
362  * y, n            [0..n-1] Output `n` processed samples, in fixed Q14
363  *
364 * The `x` vector is aligned on 32 bits
365 */
366 #ifndef resample_48k_12k8
resample_48k_12k8(struct lc3_ltpf_hp50_state * hp50,const int16_t * x,int16_t * y,int n)367 LC3_HOT static void resample_48k_12k8(
368     struct lc3_ltpf_hp50_state *hp50, const int16_t *x, int16_t *y, int n)
369 {
370     resample_x192k_12k8(4, h_48k_12k8_q15, hp50, x, y, n);
371 }
372 #endif /* resample_48k_12k8 */
373 
374 /**
375  * Resample from 96 Khz to 12.8 KHz
376  * hp50            High-Pass biquad filter state
377  * x               [-120..-1] Previous, [0..ns-1] Current samples, in fixed Q15
378  * y, n            [0..n-1] Output `n` processed samples, in fixed Q14
379  *
380 * The `x` vector is aligned on 32 bits
381 */
382 #ifndef resample_96k_12k8
resample_96k_12k8(struct lc3_ltpf_hp50_state * hp50,const int16_t * x,int16_t * y,int n)383 LC3_HOT static void resample_96k_12k8(
384     struct lc3_ltpf_hp50_state *hp50, const int16_t *x, int16_t *y, int n)
385 {
386     resample_x192k_12k8(2, h_96k_12k8_q15, hp50, x, y, n);
387 }
388 #endif /* resample_96k_12k8 */
389 
390 /**
391 * Resample to 6.4 KHz
392 * x               [-3..-1] Previous, [0..n-1] Current samples
393 * y, n            [0..n-1] Output `n` processed samples
394 *
395 * The `x` vector is aligned on 32 bits
396  */
397 #ifndef resample_6k4
resample_6k4(const int16_t * x,int16_t * y,int n)398 LC3_HOT static void resample_6k4(const int16_t *x, int16_t *y, int n)
399 {
400     static const int16_t h[] = { 18477, 15424, 8105 };
401     const int16_t *ye = y + n;
402 
403     for (x--; y < ye; x += 2)
404         *(y++) = (x[0] * h[0] + (x[-1] + x[1]) * h[1]
405                               + (x[-2] + x[2]) * h[2]) >> 16;
406 }
407 #endif /* resample_6k4 */
408 
409 /**
410  * LTPF Resample to 12.8 KHz implementations for each samplerates
411  */
412 
413 static void (* const resample_12k8[])
414     (struct lc3_ltpf_hp50_state *, const int16_t *, int16_t *, int ) =
415 {
416     [LC3_SRATE_8K    ] = resample_8k_12k8,
417     [LC3_SRATE_16K   ] = resample_16k_12k8,
418     [LC3_SRATE_24K   ] = resample_24k_12k8,
419     [LC3_SRATE_32K   ] = resample_32k_12k8,
420     [LC3_SRATE_48K   ] = resample_48k_12k8,
421     [LC3_SRATE_48K_HR] = resample_48k_12k8,
422     [LC3_SRATE_96K_HR] = resample_96k_12k8,
423 };
424 
425 
426 /* ----------------------------------------------------------------------------
427  *  Analysis
428  * -------------------------------------------------------------------------- */
429 
430 /**
431  * Return dot product of 2 vectors
432  * a, b, n         The 2 vectors of size `n` (> 0 and <= 128)
433  * return          sum( a[i] * b[i] ), i = [0..n-1]
434  *
435  * The size `n` of vectors must be multiple of 16, and less or equal to 128
436 */
437 #ifndef dot
dot(const int16_t * a,const int16_t * b,int n)438 LC3_HOT static inline float dot(const int16_t *a, const int16_t *b, int n)
439 {
440     int64_t v = 0;
441 
442     for (int i = 0; i < (n >> 4); i++)
443         for (int j = 0; j < 16; j++)
444             v += *(a++) * *(b++);
445 
446     int32_t v32 = (v + (1 << 5)) >> 6;
447     return (float)v32;
448 }
449 #endif /* dot */
450 
451 /**
452  * Return vector of correlations
453  * a, b, n         The 2 vector of size `n` (> 0 and <= 128)
454  * y, nc           Output the correlation vector of size `nc`
455  *
456  * The first vector `a` is aligned of 32 bits
457  * The size `n` of vectors is multiple of 16, and less or equal to 128
458  */
459 #ifndef correlate
correlate(const int16_t * a,const int16_t * b,int n,float * y,int nc)460 LC3_HOT static void correlate(
461     const int16_t *a, const int16_t *b, int n, float *y, int nc)
462 {
463     for (const float *ye = y + nc; y < ye; )
464         *(y++) = dot(a, b--, n);
465 }
466 #endif /* correlate */
467 
468 /**
469  * Search the maximum value and returns its argument
470  * x, n            The input vector of size `n`
471  * x_max           Return the maximum value
472  * return          Return the argument of the maximum
473  */
argmax(const float * x,int n,float * x_max)474 LC3_HOT static int argmax(const float *x, int n, float *x_max)
475 {
476     int arg = 0;
477 
478     *x_max = x[arg = 0];
479     for (int i = 1; i < n; i++)
480         if (*x_max < x[i])
481             *x_max = x[arg = i];
482 
483     return arg;
484 }
485 
486 /**
487  * Search the maximum weithed value and returns its argument
488  * x, n            The input vector of size `n`
489  * w_incr          Increment of the weight
490  * x_max, xw_max   Return the maximum not weighted value
491  * return          Return the argument of the weigthed maximum
492  */
argmax_weighted(const float * x,int n,float w_incr,float * x_max)493 LC3_HOT static int argmax_weighted(
494     const float *x, int n, float w_incr, float *x_max)
495 {
496     int arg;
497 
498     float xw_max = (*x_max = x[arg = 0]);
499     float w = 1 + w_incr;
500 
501     for (int i = 1; i < n; i++, w += w_incr)
502         if (xw_max < x[i] * w)
503             xw_max = (*x_max = x[arg = i]) * w;
504 
505     return arg;
506 }
507 
508 /**
509  * Interpolate from pitch detected value
510  * x, n            [-2..-1] Previous, [0..n] Current input
511  * d               The phase of interpolation (0 to 3)
512  * return          The interpolated vector
513  *
514  * The size `n` of vectors must be multiple of 4
515  */
interpolate(const int16_t * x,int n,int d,int16_t * y)516 LC3_HOT static void interpolate(const int16_t *x, int n, int d, int16_t *y)
517 {
518     static const int16_t h4_q15[][4] = {
519         { 6877, 19121,  6877,     0 }, { 3506, 18025, 11000,   220 },
520         { 1300, 15048, 15048,  1300 }, {  220, 11000, 18025,  3506 } };
521 
522     const int16_t *h = h4_q15[d];
523     int16_t x3 = x[-2], x2 = x[-1], x1, x0;
524 
525     x1 = (*x++);
526     for (const int16_t *ye = y + n; y < ye; ) {
527         int32_t yn;
528 
529         yn = (x0 = *(x++)) * h[0] + x1 * h[1] + x2 * h[2] + x3 * h[3];
530         *(y++) = yn >> 15;
531 
532         yn = (x3 = *(x++)) * h[0] + x0 * h[1] + x1 * h[2] + x2 * h[3];
533         *(y++) = yn >> 15;
534 
535         yn = (x2 = *(x++)) * h[0] + x3 * h[1] + x0 * h[2] + x1 * h[3];
536         *(y++) = yn >> 15;
537 
538         yn = (x1 = *(x++)) * h[0] + x2 * h[1] + x3 * h[2] + x0 * h[3];
539         *(y++) = yn >> 15;
540     }
541 }
542 
543 /**
544  * Interpolate autocorrelation
545  * x               [-4..-1] Previous, [0..4] Current input
546  * d               The phase of interpolation (-3 to 3)
547  * return          The interpolated value
548  */
interpolate_corr(const float * x,int d)549 LC3_HOT static float interpolate_corr(const float *x, int d)
550 {
551     static const float h4[][8] = {
552         {  1.53572770e-02, -4.72963246e-02,  8.35788573e-02,  8.98638285e-01,
553            8.35788573e-02, -4.72963246e-02,  1.53572770e-02,                 },
554         {  2.74547165e-03,  4.59833449e-03, -7.54404636e-02,  8.17488686e-01,
555            3.30182571e-01, -1.05835916e-01,  2.86823405e-02, -2.87456116e-03 },
556         { -3.00125103e-03,  2.95038503e-02, -1.30305021e-01,  6.03297008e-01,
557            6.03297008e-01, -1.30305021e-01,  2.95038503e-02, -3.00125103e-03 },
558         { -2.87456116e-03,  2.86823405e-02, -1.05835916e-01,  3.30182571e-01,
559            8.17488686e-01, -7.54404636e-02,  4.59833449e-03,  2.74547165e-03 },
560     };
561 
562     const float *h = h4[(4+d) % 4];
563 
564     float y = d < 0 ? x[-4] * *(h++) :
565               d > 0 ? x[ 4] * *(h+7) : 0;
566 
567     y += x[-3] * h[0] + x[-2] * h[1] + x[-1] * h[2] + x[0] * h[3] +
568          x[ 1] * h[4] + x[ 2] * h[5] + x[ 3] * h[6];
569 
570     return y;
571 }
572 
573 /**
574  * Pitch detection algorithm
575  * ltpf            Context of analysis
576  * x, n            [-114..-17] Previous, [0..n-1] Current 6.4KHz samples
577  * tc              Return the pitch-lag estimation
578  * return          True when pitch present
579  *
580  * The `x` vector is aligned on 32 bits
581  */
detect_pitch(struct lc3_ltpf_analysis * ltpf,const int16_t * x,int n,int * tc)582 static bool detect_pitch(struct lc3_ltpf_analysis *ltpf,
583     const int16_t *x, int n, int *tc)
584 {
585     float rm1, rm2;
586     float r[98];
587 
588     const int r0 = 17, nr = 98;
589     int k0 = LC3_MAX(   0, ltpf->tc-4);
590     int nk = LC3_MIN(nr-1, ltpf->tc+4) - k0 + 1;
591 
592     correlate(x, x - r0, n, r, nr);
593 
594     int t1 = argmax_weighted(r, nr, -.5f/(nr-1), &rm1);
595     int t2 = k0 + argmax(r + k0, nk, &rm2);
596 
597     const int16_t *x1 = x - (r0 + t1);
598     const int16_t *x2 = x - (r0 + t2);
599 
600     float nc1 = rm1 <= 0 ? 0 :
601         rm1 / sqrtf(dot(x, x, n) * dot(x1, x1, n));
602 
603     float nc2 = rm2 <= 0 ? 0 :
604         rm2 / sqrtf(dot(x, x, n) * dot(x2, x2, n));
605 
606     int t1sel = nc2 <= 0.85f * nc1;
607     ltpf->tc = (t1sel ? t1 : t2);
608 
609     *tc = r0 + ltpf->tc;
610     return (t1sel ? nc1 : nc2) > 0.6f;
611 }
612 
613 /**
614  * Pitch-lag parameter
615  * x, n            [-232..-28] Previous, [0..n-1] Current 12.8KHz samples, Q14
616  * tc              Pitch-lag estimation
617  * pitch           The pitch value, in fixed .4
618  * return          The bitstream pitch index value
619  *
620  * The `x` vector is aligned on 32 bits
621  */
refine_pitch(const int16_t * x,int n,int tc,int * pitch)622 static int refine_pitch(const int16_t *x, int n, int tc, int *pitch)
623 {
624     float r[17], rm;
625     int e, f;
626 
627     int r0 = LC3_MAX( 32, 2*tc - 4);
628     int nr = LC3_MIN(228, 2*tc + 4) - r0 + 1;
629 
630     correlate(x, x - (r0 - 4), n, r, nr + 8);
631 
632     e = r0 + argmax(r + 4, nr, &rm);
633     const float *re = r + (e - (r0 - 4));
634 
635     float dm = interpolate_corr(re, f = 0);
636     for (int i = 1; i <= 3; i++) {
637         float d;
638 
639         if (e >= 127 && ((i & 1) | (e >= 157)))
640             continue;
641 
642         if ((d = interpolate_corr(re, i)) > dm)
643             dm = d, f = i;
644 
645         if (e > 32 && (d = interpolate_corr(re, -i)) > dm)
646             dm = d, f = -i;
647     }
648 
649     e -=   (f < 0);
650     f += 4*(f < 0);
651 
652     *pitch = 4*e + f;
653     return e < 127 ? 4*e +  f       - 128 :
654            e < 157 ? 2*e + (f >> 1) + 126 : e + 283;
655 }
656 
657 /**
658  * LTPF Analysis
659  */
lc3_ltpf_analyse(enum lc3_dt dt,enum lc3_srate sr,struct lc3_ltpf_analysis * ltpf,const int16_t * x,struct lc3_ltpf_data * data)660 bool lc3_ltpf_analyse(
661     enum lc3_dt dt, enum lc3_srate sr, struct lc3_ltpf_analysis *ltpf,
662     const int16_t *x, struct lc3_ltpf_data *data)
663 {
664     /* --- Resampling to 12.8 KHz --- */
665 
666     int z_12k8 = sizeof(ltpf->x_12k8) / sizeof(*ltpf->x_12k8);
667     int n_12k8 = (1 + dt) * 32;
668 
669     memmove(ltpf->x_12k8, ltpf->x_12k8 + n_12k8,
670         (z_12k8 - n_12k8) * sizeof(*ltpf->x_12k8));
671 
672     int16_t *x_12k8 = ltpf->x_12k8 + (z_12k8 - n_12k8);
673 
674     resample_12k8[sr](&ltpf->hp50, x, x_12k8, n_12k8);
675 
676     x_12k8 -= (dt == LC3_DT_7M5 ? 44 : 24);
677 
678     /* --- Resampling to 6.4 KHz --- */
679 
680     int z_6k4 = sizeof(ltpf->x_6k4) / sizeof(*ltpf->x_6k4);
681     int n_6k4 = n_12k8 >> 1;
682 
683     memmove(ltpf->x_6k4, ltpf->x_6k4 + n_6k4,
684         (z_6k4 - n_6k4) * sizeof(*ltpf->x_6k4));
685 
686     int16_t *x_6k4 = ltpf->x_6k4 + (z_6k4 - n_6k4);
687 
688     resample_6k4(x_12k8, x_6k4, n_6k4);
689 
690     /* --- Enlarge for small frame size --- */
691 
692     if (dt == LC3_DT_2M5) {
693         x_12k8 -= n_12k8, x_6k4 -= n_6k4;
694         n_12k8 += n_12k8; n_6k4 += n_6k4;
695     }
696 
697     /* --- Pitch detection --- */
698 
699     int tc, pitch = 0;
700     float nc = 0;
701 
702     bool pitch_present = detect_pitch(ltpf, x_6k4, n_6k4, &tc);
703 
704     if (pitch_present) {
705         int16_t u[128], v[128];
706 
707         data->pitch_index = refine_pitch(x_12k8, n_12k8, tc, &pitch);
708 
709         interpolate(x_12k8, n_12k8, 0, u);
710         interpolate(x_12k8 - (pitch >> 2), n_12k8, pitch & 3, v);
711 
712         nc = dot(u, v, n_12k8) / sqrtf(dot(u, u, n_12k8) * dot(v, v, n_12k8));
713     }
714 
715     /* --- Activation --- */
716 
717      if (ltpf->active) {
718         int pitch_diff =
719             LC3_MAX(pitch, ltpf->pitch) - LC3_MIN(pitch, ltpf->pitch);
720         float nc_diff = nc - ltpf->nc[0];
721 
722         data->active = !lc3_hr(sr) && pitch_present &&
723             ((nc > 0.9f) || (nc > 0.84f && pitch_diff < 8 && nc_diff > -0.1f));
724 
725      } else {
726          data->active = !lc3_hr(sr) && pitch_present &&
727             ( (dt == LC3_DT_10M || ltpf->nc[1] > 0.94f) &&
728               (ltpf->nc[0] > 0.94f && nc > 0.94f) );
729      }
730 
731      ltpf->active = data->active;
732      ltpf->pitch = pitch;
733      ltpf->nc[1] = ltpf->nc[0];
734      ltpf->nc[0] = nc;
735 
736      return pitch_present;
737 }
738 
739 
740 /* ----------------------------------------------------------------------------
741  *  Synthesis
742  * -------------------------------------------------------------------------- */
743 
744 /**
745  * Width of synthesis filter
746  */
747 
748 #define MAX_FILTER_WIDTH \
749     (LC3_MAX_SRATE_HZ / 4000)
750 
751 
752 /**
753  * Synthesis filter template
754  * xh, nh          History ring buffer of filtered samples
755  * lag             Lag parameter in the ring buffer
756  * x0              w-1 previous input samples
757  * x, n            Current samples as input, filtered as output
758  * c, w            Coefficients `den` then `num`, and width of filter
759  * fade            Fading mode of filter  -1: Out  1: In  0: None
760  */
synthesize_template(const float * xh,int nh,int lag,const float * x0,float * x,int n,const float * c,const int w,int fade)761 LC3_HOT static inline void synthesize_template(
762     const float *xh, int nh, int lag,
763     const float *x0, float *x, int n,
764     const float *c, const int w, int fade)
765 {
766     float g = (float)(fade <= 0);
767     float g_incr = (float)((fade > 0) - (fade < 0)) / n;
768     float u[MAX_FILTER_WIDTH];
769 
770     /* --- Load previous samples --- */
771 
772     lag += (w >> 1);
773 
774     const float *y = x - xh < lag ? x + (nh - lag) : x - lag;
775     const float *y_end = xh + nh - 1;
776 
777     for (int j = 0; j < w-1; j++) {
778 
779         u[j] = 0;
780 
781         float yi = *y, xi = *(x0++);
782         y = y < y_end ? y + 1 : xh;
783 
784         for (int k = 0; k <= j; k++)
785             u[j-k] -= yi * c[k];
786 
787         for (int k = 0; k <= j; k++)
788             u[j-k] += xi * c[w+k];
789     }
790 
791     u[w-1] = 0;
792 
793     /* --- Process by filter length --- */
794 
795     for (int i = 0; i < n; i += w)
796         for (int j = 0; j < w; j++, g += g_incr) {
797 
798             float yi = *y, xi = *x;
799             y = y < y_end ? y + 1 : xh;
800 
801             for (int k = 0; k < w; k++)
802                 u[(j+(w-1)-k)%w] -= yi * c[k];
803 
804             for (int k = 0; k < w; k++)
805                 u[(j+(w-1)-k)%w] += xi * c[w+k];
806 
807             *(x++) = xi - g * u[j];
808             u[j] = 0;
809         }
810 }
811 
812 /**
813  * Synthesis filter for each samplerates (width of filter)
814  */
815 
816 
synthesize_4(const float * xh,int nh,int lag,const float * x0,float * x,int n,const float * c,int fade)817 LC3_HOT static void synthesize_4(const float *xh, int nh, int lag,
818     const float *x0, float *x, int n, const float *c, int fade)
819 {
820     synthesize_template(xh, nh, lag, x0, x, n, c, 4, fade);
821 }
822 
synthesize_6(const float * xh,int nh,int lag,const float * x0,float * x,int n,const float * c,int fade)823 LC3_HOT static void synthesize_6(const float *xh, int nh, int lag,
824     const float *x0, float *x, int n, const float *c, int fade)
825 {
826     synthesize_template(xh, nh, lag, x0, x, n, c, 6, fade);
827 }
828 
synthesize_8(const float * xh,int nh,int lag,const float * x0,float * x,int n,const float * c,int fade)829 LC3_HOT static void synthesize_8(const float *xh, int nh, int lag,
830     const float *x0, float *x, int n, const float *c, int fade)
831 {
832     synthesize_template(xh, nh, lag, x0, x, n, c, 8, fade);
833 }
834 
synthesize_12(const float * xh,int nh,int lag,const float * x0,float * x,int n,const float * c,int fade)835 LC3_HOT static void synthesize_12(const float *xh, int nh, int lag,
836     const float *x0, float *x, int n, const float *c, int fade)
837 {
838     synthesize_template(xh, nh, lag, x0, x, n, c, 12, fade);
839 }
840 
841 static void (* const synthesize[])(const float *, int, int,
842     const float *, float *, int, const float *, int) =
843 {
844     [LC3_SRATE_8K ] = synthesize_4,
845     [LC3_SRATE_16K] = synthesize_4,
846     [LC3_SRATE_24K] = synthesize_6,
847     [LC3_SRATE_32K] = synthesize_8,
848     [LC3_SRATE_48K] = synthesize_12,
849 };
850 
851 
852 /**
853  * LTPF Synthesis
854  */
lc3_ltpf_synthesize(enum lc3_dt dt,enum lc3_srate sr,int nbytes,lc3_ltpf_synthesis_t * ltpf,const lc3_ltpf_data_t * data,const float * xh,float * x)855 void lc3_ltpf_synthesize(enum lc3_dt dt, enum lc3_srate sr, int nbytes,
856     lc3_ltpf_synthesis_t *ltpf, const lc3_ltpf_data_t *data,
857     const float *xh, float *x)
858 {
859     int nh = lc3_ns(dt, sr) + lc3_nh(dt, sr);
860 
861     /* --- Filter parameters --- */
862 
863     int p_idx = data ? data->pitch_index : 0;
864     int pitch =
865         p_idx >= 440 ? (((p_idx     ) - 283) << 2)  :
866         p_idx >= 380 ? (((p_idx >> 1) -  63) << 2) + (((p_idx & 1)) << 1) :
867                        (((p_idx >> 2) +  32) << 2) + (((p_idx & 3)) << 0)  ;
868 
869     pitch = (pitch * lc3_ns(LC3_DT_10M, sr) + 64) / 128;
870 
871     int nbits = (nbytes*8 * (1 + LC3_DT_10M)) / (1 + dt);
872     if (dt == LC3_DT_2M5)
873       nbits = (6 * nbits + 5) / 10;
874     if (dt == LC3_DT_5M)
875       nbits -= 160;
876 
877     int g_idx = LC3_MAX(nbits / 80, (int)(3 + sr)) - (3 + sr);
878     bool active = data && data->active && g_idx < 4;
879 
880     int w = LC3_MAX(4, lc3_ns_4m[sr] >> 4);
881     float c[2 * MAX_FILTER_WIDTH];
882 
883     for (int i = 0; i < w; i++) {
884         float g = active ? 0.4f - 0.05f * g_idx : 0;
885         c[  i] = g * lc3_ltpf_cden[sr][pitch & 3][(w-1)-i];
886         c[w+i] = 0.85f * g * lc3_ltpf_cnum[sr][LC3_MIN(g_idx, 3)][(w-1)-i];
887     }
888 
889     /* --- Transition handling --- */
890 
891     int ns = lc3_ns(dt, sr);
892     int nt = ns / (1 + dt);
893     float x0[2][MAX_FILTER_WIDTH];
894 
895     memcpy(x0[0], ltpf->x, (w-1) * sizeof(float));
896     memcpy(ltpf->x, x + ns - (w-1), (w-1) * sizeof(float));
897 
898     if (active)
899         memcpy(x0[1], x + nt-(w-1), (w-1) * sizeof(float));
900 
901     if (!ltpf->active && active)
902         synthesize[sr](xh, nh, pitch/4, x0[0], x, nt, c, 1);
903     else if (ltpf->active && !active)
904         synthesize[sr](xh, nh, ltpf->pitch/4, x0[0], x, nt, ltpf->c, -1);
905     else if (ltpf->active && active && ltpf->pitch == pitch)
906         synthesize[sr](xh, nh, pitch/4, x0[0], x, nt, c, 0);
907     else if (ltpf->active && active) {
908         synthesize[sr](xh, nh, ltpf->pitch/4, x0[0], x, nt, ltpf->c, -1);
909         synthesize[sr](xh, nh, pitch/4,
910             (x <= xh ? x + nh : x) - (w-1), x, nt, c, 1);
911     }
912 
913     /* --- Remainder --- */
914 
915     if (active && ns > nt)
916         synthesize[sr](xh, nh, pitch/4, x0[1], x + nt, ns-nt, c, 0);
917 
918     /* --- Update state --- */
919 
920     ltpf->active = active;
921     ltpf->pitch = pitch;
922     memcpy(ltpf->c, c, 2*w * sizeof(*ltpf->c));
923 }
924 
925 
926 /* ----------------------------------------------------------------------------
927  *  Bitstream data
928  * -------------------------------------------------------------------------- */
929 
930 /**
931  * LTPF disable
932  */
lc3_ltpf_disable(struct lc3_ltpf_data * data)933 void lc3_ltpf_disable(struct lc3_ltpf_data *data)
934 {
935     data->active = false;
936 }
937 
938 /**
939  * Return number of bits coding the bitstream data
940  */
lc3_ltpf_get_nbits(bool pitch)941 int lc3_ltpf_get_nbits(bool pitch)
942 {
943     return 1 + 10 * pitch;
944 }
945 
946 /**
947  * Put bitstream data
948  */
lc3_ltpf_put_data(lc3_bits_t * bits,const struct lc3_ltpf_data * data)949 void lc3_ltpf_put_data(lc3_bits_t *bits,
950     const struct lc3_ltpf_data *data)
951 {
952     lc3_put_bit(bits, data->active);
953     lc3_put_bits(bits, data->pitch_index, 9);
954 }
955 
956 /**
957  * Get bitstream data
958  */
lc3_ltpf_get_data(lc3_bits_t * bits,struct lc3_ltpf_data * data)959 void lc3_ltpf_get_data(lc3_bits_t *bits,
960     struct lc3_ltpf_data *data)
961 {
962     data->active = lc3_get_bit(bits);
963     data->pitch_index = lc3_get_bits(bits, 9);
964 }
965