1 /******************************************************************************
2  *
3  *  Copyright 2022 Google LLC
4  *
5  *  Licensed under the Apache License, Version 2.0 (the "License");
6  *  you may not use this file except in compliance with the License.
7  *  You may obtain a copy of the License at:
8  *
9  *  http://www.apache.org/licenses/LICENSE-2.0
10  *
11  *  Unless required by applicable law or agreed to in writing, software
12  *  distributed under the License is distributed on an "AS IS" BASIS,
13  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  *  See the License for the specific language governing permissions and
15  *  limitations under the License.
16  *
17  ******************************************************************************/
18 
19 #include "ltpf.h"
20 #include "tables.h"
21 
22 #include "ltpf_neon.h"
23 #include "ltpf_arm.h"
24 
25 
26 /* ----------------------------------------------------------------------------
27  *  Resampling
28  * -------------------------------------------------------------------------- */
29 
30 /**
31  * Resampling coefficients
32  * The coefficients, in fixed Q15, are reordered by phase for each source
33  * samplerate (coefficient matrix transposed)
34  */
35 
36 #ifndef resample_8k_12k8
37 static const int16_t h_8k_12k8_q15[8*10] = {
38       214,   417, -1052, -4529, 26233, -4529, -1052,   417,   214,     0,
39       180,     0, -1522, -2427, 24506, -5289,     0,   763,   156,   -28,
40        92,  -323, -1361,     0, 19741, -3885,  1317,   861,     0,   -61,
41         0,  -457,  -752,  1873, 13068,     0,  2389,   598,  -213,   -79,
42       -61,  -398,     0,  2686,  5997,  5997,  2686,     0,  -398,   -61,
43       -79,  -213,   598,  2389,     0, 13068,  1873,  -752,  -457,     0,
44       -61,     0,   861,  1317, -3885, 19741,     0, -1361,  -323,    92,
45       -28,   156,   763,     0, -5289, 24506, -2427, -1522,     0,   180,
46 };
47 #endif /* resample_8k_12k8 */
48 
49 #ifndef resample_16k_12k8
50 static const int16_t h_16k_12k8_q15[4*20] = {
51       -61,   214,  -398,   417,     0, -1052,  2686, -4529,  5997, 26233,
52      5997, -4529,  2686, -1052,     0,   417,  -398,   214,   -61,     0,
53 
54       -79,   180,  -213,     0,   598, -1522,  2389, -2427,     0, 24506,
55     13068, -5289,  1873,     0,  -752,   763,  -457,   156,     0,   -28,
56 
57       -61,    92,     0,  -323,   861, -1361,  1317,     0, -3885, 19741,
58     19741, -3885,     0,  1317, -1361,   861,  -323,     0,    92,   -61,
59 
60       -28,     0,   156,  -457,   763,  -752,     0,  1873, -5289, 13068,
61     24506,     0, -2427,  2389, -1522,   598,     0,  -213,   180,   -79,
62 };
63 #endif /* resample_16k_12k8 */
64 
65 #ifndef resample_32k_12k8
66 static const int16_t h_32k_12k8_q15[2*40] = {
67       -30,   -31,    46,   107,     0,  -199,  -162,   209,   430,     0,
68      -681,  -526,   658,  1343,     0, -2264, -1943,  2999,  9871, 13116,
69      9871,  2999, -1943, -2264,     0,  1343,   658,  -526,  -681,     0,
70       430,   209,  -162,  -199,     0,   107,    46,   -31,   -30,     0,
71 
72       -14,   -39,     0,    90,    78,  -106,  -229,     0,   382,   299,
73      -376,  -761,     0,  1194,   937, -1214, -2644,     0,  6534, 12253,
74     12253,  6534,     0, -2644, -1214,   937,  1194,     0,  -761,  -376,
75       299,   382,     0,  -229,  -106,    78,    90,     0,   -39,   -14,
76 };
77 #endif /* resample_32k_12k8 */
78 
79 #ifndef resample_24k_12k8
80 static const int16_t h_24k_12k8_q15[8*30] = {
81       -50,    19,   143,   -93,  -290,   278,   485,  -658,  -701,  1396,
82       901, -3019, -1042, 10276, 17488, 10276, -1042, -3019,   901,  1396,
83      -701,  -658,   485,   278,  -290,   -93,   143,    19,   -50,     0,
84 
85       -46,     0,   141,   -45,  -305,   185,   543,  -501,  -854,  1153,
86      1249, -2619, -1908,  8712, 17358, 11772,     0, -3319,   480,  1593,
87      -504,  -796,   399,   367,  -261,  -142,   138,    40,   -52,    -5,
88 
89       -41,   -17,   133,     0,  -304,    91,   574,  -334,  -959,   878,
90      1516, -2143, -2590,  7118, 16971, 13161,  1202, -3495,     0,  1731,
91      -267,  -908,   287,   445,  -215,  -188,   125,    62,   -52,   -12,
92 
93       -34,   -30,   120,    41,  -291,     0,   577,  -164, -1015,   585,
94      1697, -1618, -3084,  5534, 16337, 14406,  2544, -3526,  -523,  1800,
95         0,  -985,   152,   509,  -156,  -230,   104,    83,   -48,   -19,
96 
97       -26,   -41,   103,    76,  -265,   -83,   554,     0, -1023,   288,
98      1791, -1070, -3393,  3998, 15474, 15474,  3998, -3393, -1070,  1791,
99       288, -1023,     0,   554,   -83,  -265,    76,   103,   -41,   -26,
100 
101       -19,   -48,    83,   104,  -230,  -156,   509,   152,  -985,     0,
102      1800,  -523, -3526,  2544, 14406, 16337,  5534, -3084, -1618,  1697,
103       585, -1015,  -164,   577,     0,  -291,    41,   120,   -30,   -34,
104 
105       -12,   -52,    62,   125,  -188,  -215,   445,   287,  -908,  -267,
106      1731,     0, -3495,  1202, 13161, 16971,  7118, -2590, -2143,  1516,
107       878,  -959,  -334,   574,    91,  -304,     0,   133,   -17,   -41,
108 
109        -5,   -52,    40,   138,  -142,  -261,   367,   399,  -796,  -504,
110      1593,   480, -3319,     0, 11772, 17358,  8712, -1908, -2619,  1249,
111      1153,  -854,  -501,   543,   185,  -305,   -45,   141,     0,   -46,
112 };
113 #endif /* resample_24k_12k8 */
114 
115 #ifndef resample_48k_12k8
116 static const int16_t h_48k_12k8_q15[4*60] = {
117       -13,   -25,   -20,    10,    51,    71,    38,   -47,  -133,  -145,
118       -42,   139,   277,   242,     0,  -329,  -511,  -351,   144,   698,
119       895,   450,  -535, -1510, -1697,  -521,  1999,  5138,  7737,  8744,
120      7737,  5138,  1999,  -521, -1697, -1510,  -535,   450,   895,   698,
121       144,  -351,  -511,  -329,     0,   242,   277,   139,   -42,  -145,
122      -133,   -47,    38,    71,    51,    10,   -20,   -25,   -13,     0,
123 
124        -9,   -23,   -24,     0,    41,    71,    52,   -23,  -115,  -152,
125       -78,    92,   254,   272,    76,  -251,  -493,  -427,     0,   576,
126       900,   624,  -262, -1309, -1763,  -954,  1272,  4356,  7203,  8679,
127      8169,  5886,  2767,     0, -1542, -1660,  -809,   240,   848,   796,
128       292,  -252,  -507,  -398,   -82,   199,   288,   183,     0,  -130,
129      -145,   -71,    20,    69,    60,    20,   -15,   -26,   -17,    -3,
130 
131        -6,   -20,   -26,    -8,    31,    67,    62,     0,   -94,  -152,
132      -108,    45,   223,   287,   143,  -167,  -454,  -480,  -134,   439,
133       866,   758,     0, -1071, -1748, -1295,   601,  3559,  6580,  8485,
134      8485,  6580,  3559,   601, -1295, -1748, -1071,     0,   758,   866,
135       439,  -134,  -480,  -454,  -167,   143,   287,   223,    45,  -108,
136      -152,   -94,     0,    62,    67,    31,    -8,   -26,   -20,    -6,
137 
138        -3,   -17,   -26,   -15,    20,    60,    69,    20,   -71,  -145,
139      -130,     0,   183,   288,   199,   -82,  -398,  -507,  -252,   292,
140       796,   848,   240,  -809, -1660, -1542,     0,  2767,  5886,  8169,
141      8679,  7203,  4356,  1272,  -954, -1763, -1309,  -262,   624,   900,
142       576,     0,  -427,  -493,  -251,    76,   272,   254,    92,   -78,
143      -152,  -115,   -23,    52,    71,    41,     0,   -24,   -23,    -9,
144 };
145 #endif /* resample_48k_12k8 */
146 
147 
148 /**
149  * High-pass 50Hz filtering, at 12.8 KHz samplerate
150  * hp50            Biquad filter state
151  * xn              Input sample, in fixed Q30
152  * return          Filtered sample, in fixed Q30
153  */
filter_hp50(struct lc3_ltpf_hp50_state * hp50,int32_t xn)154 LC3_HOT static inline int32_t filter_hp50(
155     struct lc3_ltpf_hp50_state *hp50, int32_t xn)
156 {
157     int32_t yn;
158 
159     const int32_t a1 = -2110217691, a2 = 1037111617;
160     const int32_t b1 = -2110535566, b2 = 1055267782;
161 
162     yn       = (hp50->s1 + (int64_t)xn * b2) >> 30;
163     hp50->s1 = (hp50->s2 + (int64_t)xn * b1 - (int64_t)yn * a1);
164     hp50->s2 = (           (int64_t)xn * b2 - (int64_t)yn * a2);
165 
166     return yn;
167 }
168 
169 /**
170  * Resample from 8 / 16 / 32 KHz to 12.8 KHz Template
171  * p               Resampling factor with compared to 192 KHz (8, 4 or 2)
172  * h               Arrange by phase coefficients table
173  * hp50            High-Pass biquad filter state
174  * x               [-d..-1] Previous, [0..ns-1] Current samples, Q15
175  * y, n            [0..n-1] Output `n` processed samples, Q14
176  *
177  * The `x` vector is aligned on 32 bits
178  * The number of previous samples `d` accessed on `x` is :
179  *   d: { 10, 20, 40 } - 1 for resampling factors 8, 4 and 2.
180  */
181 #if !defined(resample_8k_12k8) || !defined(resample_16k_12k8) \
182     || !defined(resample_32k_12k8)
resample_x64k_12k8(const int p,const int16_t * h,struct lc3_ltpf_hp50_state * hp50,const int16_t * x,int16_t * y,int n)183 LC3_HOT static inline void resample_x64k_12k8(const int p, const int16_t *h,
184     struct lc3_ltpf_hp50_state *hp50, const int16_t *x, int16_t *y, int n)
185 {
186     const int w = 2*(40 / p);
187 
188     x -= w - 1;
189 
190     for (int i = 0; i < 5*n; i += 5) {
191         const int16_t *hn = h + (i % p) * w;
192         const int16_t *xn = x + (i / p);
193         int32_t un = 0;
194 
195         for (int k = 0; k < w; k += 10) {
196             un += *(xn++) * *(hn++);
197             un += *(xn++) * *(hn++);
198             un += *(xn++) * *(hn++);
199             un += *(xn++) * *(hn++);
200             un += *(xn++) * *(hn++);
201             un += *(xn++) * *(hn++);
202             un += *(xn++) * *(hn++);
203             un += *(xn++) * *(hn++);
204             un += *(xn++) * *(hn++);
205             un += *(xn++) * *(hn++);
206         }
207 
208         int32_t yn = filter_hp50(hp50, un);
209         *(y++) = (yn + (1 << 15)) >> 16;
210     }
211 }
212 #endif
213 
214 /**
215  * Resample from 24 / 48 KHz to 12.8 KHz Template
216  * p               Resampling factor with compared to 192 KHz (8 or 4)
217  * h               Arrange by phase coefficients table
218  * hp50            High-Pass biquad filter state
219  * x               [-d..-1] Previous, [0..ns-1] Current samples, Q15
220  * y, n            [0..n-1] Output `n` processed samples, Q14
221  *
222  * The `x` vector is aligned on 32 bits
223  * The number of previous samples `d` accessed on `x` is :
224  *   d: { 30, 60 } - 1 for resampling factors 8 and 4.
225  */
226 #if !defined(resample_24k_12k8) || !defined(resample_48k_12k8)
resample_x192k_12k8(const int p,const int16_t * h,struct lc3_ltpf_hp50_state * hp50,const int16_t * x,int16_t * y,int n)227 LC3_HOT static inline void resample_x192k_12k8(const int p, const int16_t *h,
228     struct lc3_ltpf_hp50_state *hp50, const int16_t *x, int16_t *y, int n)
229 {
230     const int w = 2*(120 / p);
231 
232     x -= w - 1;
233 
234     for (int i = 0; i < 15*n; i += 15) {
235         const int16_t *hn = h + (i % p) * w;
236         const int16_t *xn = x + (i / p);
237         int32_t un = 0;
238 
239         for (int k = 0; k < w; k += 15) {
240             un += *(xn++) * *(hn++);
241             un += *(xn++) * *(hn++);
242             un += *(xn++) * *(hn++);
243             un += *(xn++) * *(hn++);
244             un += *(xn++) * *(hn++);
245             un += *(xn++) * *(hn++);
246             un += *(xn++) * *(hn++);
247             un += *(xn++) * *(hn++);
248             un += *(xn++) * *(hn++);
249             un += *(xn++) * *(hn++);
250             un += *(xn++) * *(hn++);
251             un += *(xn++) * *(hn++);
252             un += *(xn++) * *(hn++);
253             un += *(xn++) * *(hn++);
254             un += *(xn++) * *(hn++);
255         }
256 
257         int32_t yn = filter_hp50(hp50, un);
258         *(y++) = (yn + (1 << 15)) >> 16;
259     }
260 }
261 #endif
262 
263 /**
264  * Resample from 8 Khz to 12.8 KHz
265  * hp50            High-Pass biquad filter state
266  * x               [-10..-1] Previous, [0..ns-1] Current samples, Q15
267  * y, n            [0..n-1] Output `n` processed samples, Q14
268  *
269  * The `x` vector is aligned on 32 bits
270  */
271 #ifndef resample_8k_12k8
resample_8k_12k8(struct lc3_ltpf_hp50_state * hp50,const int16_t * x,int16_t * y,int n)272 LC3_HOT static void resample_8k_12k8(
273     struct lc3_ltpf_hp50_state *hp50, const int16_t *x, int16_t *y, int n)
274 {
275     resample_x64k_12k8(8, h_8k_12k8_q15, hp50, x, y, n);
276 }
277 #endif /* resample_8k_12k8 */
278 
279 /**
280  * Resample from 16 Khz to 12.8 KHz
281  * hp50            High-Pass biquad filter state
282  * x               [-20..-1] Previous, [0..ns-1] Current samples, in fixed Q15
283  * y, n            [0..n-1] Output `n` processed samples, in fixed Q14
284  *
285  * The `x` vector is aligned on 32 bits
286  */
287 #ifndef resample_16k_12k8
resample_16k_12k8(struct lc3_ltpf_hp50_state * hp50,const int16_t * x,int16_t * y,int n)288 LC3_HOT static void resample_16k_12k8(
289     struct lc3_ltpf_hp50_state *hp50, const int16_t *x, int16_t *y, int n)
290 {
291     resample_x64k_12k8(4, h_16k_12k8_q15, hp50, x, y, n);
292 }
293 #endif /* resample_16k_12k8 */
294 
295 /**
296  * Resample from 32 Khz to 12.8 KHz
297  * hp50            High-Pass biquad filter state
298  * x               [-30..-1] Previous, [0..ns-1] Current samples, in fixed Q15
299  * y, n            [0..n-1] Output `n` processed samples, in fixed Q14
300  *
301  * The `x` vector is aligned on 32 bits
302  */
303 #ifndef resample_32k_12k8
resample_32k_12k8(struct lc3_ltpf_hp50_state * hp50,const int16_t * x,int16_t * y,int n)304 LC3_HOT static void resample_32k_12k8(
305     struct lc3_ltpf_hp50_state *hp50, const int16_t *x, int16_t *y, int n)
306 {
307     resample_x64k_12k8(2, h_32k_12k8_q15, hp50, x, y, n);
308 }
309 #endif /* resample_32k_12k8 */
310 
311 /**
312  * Resample from 24 Khz to 12.8 KHz
313  * hp50            High-Pass biquad filter state
314  * x               [-30..-1] Previous, [0..ns-1] Current samples, in fixed Q15
315  * y, n            [0..n-1] Output `n` processed samples, in fixed Q14
316  *
317  * The `x` vector is aligned on 32 bits
318  */
319 #ifndef resample_24k_12k8
resample_24k_12k8(struct lc3_ltpf_hp50_state * hp50,const int16_t * x,int16_t * y,int n)320 LC3_HOT static void resample_24k_12k8(
321     struct lc3_ltpf_hp50_state *hp50, const int16_t *x, int16_t *y, int n)
322 {
323     resample_x192k_12k8(8, h_24k_12k8_q15, hp50, x, y, n);
324 }
325 #endif /* resample_24k_12k8 */
326 
327 /**
328  * Resample from 48 Khz to 12.8 KHz
329  * hp50            High-Pass biquad filter state
330  * x               [-60..-1] Previous, [0..ns-1] Current samples, in fixed Q15
331  * y, n            [0..n-1] Output `n` processed samples, in fixed Q14
332  *
333 * The `x` vector is aligned on 32 bits
334 */
335 #ifndef resample_48k_12k8
resample_48k_12k8(struct lc3_ltpf_hp50_state * hp50,const int16_t * x,int16_t * y,int n)336 LC3_HOT static void resample_48k_12k8(
337     struct lc3_ltpf_hp50_state *hp50, const int16_t *x, int16_t *y, int n)
338 {
339     resample_x192k_12k8(4, h_48k_12k8_q15, hp50, x, y, n);
340 }
341 #endif /* resample_48k_12k8 */
342 
343 /**
344 * Resample to 6.4 KHz
345 * x               [-3..-1] Previous, [0..n-1] Current samples
346 * y, n            [0..n-1] Output `n` processed samples
347 *
348 * The `x` vector is aligned on 32 bits
349  */
350 #ifndef resample_6k4
resample_6k4(const int16_t * x,int16_t * y,int n)351 LC3_HOT static void resample_6k4(const int16_t *x, int16_t *y, int n)
352 {
353     static const int16_t h[] = { 18477, 15424, 8105 };
354     const int16_t *ye = y + n;
355 
356     for (x--; y < ye; x += 2)
357         *(y++) = (x[0] * h[0] + (x[-1] + x[1]) * h[1]
358                               + (x[-2] + x[2]) * h[2]) >> 16;
359 }
360 #endif /* resample_6k4 */
361 
362 /**
363  * LTPF Resample to 12.8 KHz implementations for each samplerates
364  */
365 
366 static void (* const resample_12k8[])
367     (struct lc3_ltpf_hp50_state *, const int16_t *, int16_t *, int ) =
368 {
369     [LC3_SRATE_8K ] = resample_8k_12k8,
370     [LC3_SRATE_16K] = resample_16k_12k8,
371     [LC3_SRATE_24K] = resample_24k_12k8,
372     [LC3_SRATE_32K] = resample_32k_12k8,
373     [LC3_SRATE_48K] = resample_48k_12k8,
374 };
375 
376 
377 /* ----------------------------------------------------------------------------
378  *  Analysis
379  * -------------------------------------------------------------------------- */
380 
381 /**
382  * Return dot product of 2 vectors
383  * a, b, n         The 2 vectors of size `n` (> 0 and <= 128)
384  * return          sum( a[i] * b[i] ), i = [0..n-1]
385  *
386  * The size `n` of vectors must be multiple of 16, and less or equal to 128
387 */
388 #ifndef dot
dot(const int16_t * a,const int16_t * b,int n)389 LC3_HOT static inline float dot(const int16_t *a, const int16_t *b, int n)
390 {
391     int64_t v = 0;
392 
393     for (int i = 0; i < (n >> 4); i++)
394         for (int j = 0; j < 16; j++)
395             v += *(a++) * *(b++);
396 
397     int32_t v32 = (v + (1 << 5)) >> 6;
398     return (float)v32;
399 }
400 #endif /* dot */
401 
402 /**
403  * Return vector of correlations
404  * a, b, n         The 2 vector of size `n` (> 0 and <= 128)
405  * y, nc           Output the correlation vector of size `nc`
406  *
407  * The first vector `a` is aligned of 32 bits
408  * The size `n` of vectors is multiple of 16, and less or equal to 128
409  */
410 #ifndef correlate
correlate(const int16_t * a,const int16_t * b,int n,float * y,int nc)411 LC3_HOT static void correlate(
412     const int16_t *a, const int16_t *b, int n, float *y, int nc)
413 {
414     for (const float *ye = y + nc; y < ye; )
415         *(y++) = dot(a, b--, n);
416 }
417 #endif /* correlate */
418 
419 /**
420  * Search the maximum value and returns its argument
421  * x, n            The input vector of size `n`
422  * x_max           Return the maximum value
423  * return          Return the argument of the maximum
424  */
argmax(const float * x,int n,float * x_max)425 LC3_HOT static int argmax(const float *x, int n, float *x_max)
426 {
427     int arg = 0;
428 
429     *x_max = x[arg = 0];
430     for (int i = 1; i < n; i++)
431         if (*x_max < x[i])
432             *x_max = x[arg = i];
433 
434     return arg;
435 }
436 
437 /**
438  * Search the maximum weithed value and returns its argument
439  * x, n            The input vector of size `n`
440  * w_incr          Increment of the weight
441  * x_max, xw_max   Return the maximum not weighted value
442  * return          Return the argument of the weigthed maximum
443  */
argmax_weighted(const float * x,int n,float w_incr,float * x_max)444 LC3_HOT static int argmax_weighted(
445     const float *x, int n, float w_incr, float *x_max)
446 {
447     int arg;
448 
449     float xw_max = (*x_max = x[arg = 0]);
450     float w = 1 + w_incr;
451 
452     for (int i = 1; i < n; i++, w += w_incr)
453         if (xw_max < x[i] * w)
454             xw_max = (*x_max = x[arg = i]) * w;
455 
456     return arg;
457 }
458 
459 /**
460  * Interpolate from pitch detected value (3.3.9.8)
461  * x, n            [-2..-1] Previous, [0..n] Current input
462  * d               The phase of interpolation (0 to 3)
463  * return          The interpolated vector
464  *
465  * The size `n` of vectors must be multiple of 4
466  */
interpolate(const int16_t * x,int n,int d,int16_t * y)467 LC3_HOT static void interpolate(const int16_t *x, int n, int d, int16_t *y)
468 {
469     static const int16_t h4_q15[][4] = {
470         { 6877, 19121,  6877,     0 }, { 3506, 18025, 11000,   220 },
471         { 1300, 15048, 15048,  1300 }, {  220, 11000, 18025,  3506 } };
472 
473     const int16_t *h = h4_q15[d];
474     int16_t x3 = x[-2], x2 = x[-1], x1, x0;
475 
476     x1 = (*x++);
477     for (const int16_t *ye = y + n; y < ye; ) {
478         int32_t yn;
479 
480         yn = (x0 = *(x++)) * h[0] + x1 * h[1] + x2 * h[2] + x3 * h[3];
481         *(y++) = yn >> 15;
482 
483         yn = (x3 = *(x++)) * h[0] + x0 * h[1] + x1 * h[2] + x2 * h[3];
484         *(y++) = yn >> 15;
485 
486         yn = (x2 = *(x++)) * h[0] + x3 * h[1] + x0 * h[2] + x1 * h[3];
487         *(y++) = yn >> 15;
488 
489         yn = (x1 = *(x++)) * h[0] + x2 * h[1] + x3 * h[2] + x0 * h[3];
490         *(y++) = yn >> 15;
491     }
492 }
493 
494 /**
495  * Interpolate autocorrelation (3.3.9.7)
496  * x               [-4..-1] Previous, [0..4] Current input
497  * d               The phase of interpolation (-3 to 3)
498  * return          The interpolated value
499  */
interpolate_corr(const float * x,int d)500 LC3_HOT static float interpolate_corr(const float *x, int d)
501 {
502     static const float h4[][8] = {
503         {  1.53572770e-02, -4.72963246e-02,  8.35788573e-02,  8.98638285e-01,
504            8.35788573e-02, -4.72963246e-02,  1.53572770e-02,                 },
505         {  2.74547165e-03,  4.59833449e-03, -7.54404636e-02,  8.17488686e-01,
506            3.30182571e-01, -1.05835916e-01,  2.86823405e-02, -2.87456116e-03 },
507         { -3.00125103e-03,  2.95038503e-02, -1.30305021e-01,  6.03297008e-01,
508            6.03297008e-01, -1.30305021e-01,  2.95038503e-02, -3.00125103e-03 },
509         { -2.87456116e-03,  2.86823405e-02, -1.05835916e-01,  3.30182571e-01,
510            8.17488686e-01, -7.54404636e-02,  4.59833449e-03,  2.74547165e-03 },
511     };
512 
513     const float *h = h4[(4+d) % 4];
514 
515     float y = d < 0 ? x[-4] * *(h++) :
516               d > 0 ? x[ 4] * *(h+7) : 0;
517 
518     y += x[-3] * h[0] + x[-2] * h[1] + x[-1] * h[2] + x[0] * h[3] +
519          x[ 1] * h[4] + x[ 2] * h[5] + x[ 3] * h[6];
520 
521     return y;
522 }
523 
524 /**
525  * Pitch detection algorithm (3.3.9.5-6)
526  * ltpf            Context of analysis
527  * x, n            [-114..-17] Previous, [0..n-1] Current 6.4KHz samples
528  * tc              Return the pitch-lag estimation
529  * return          True when pitch present
530  *
531  * The `x` vector is aligned on 32 bits
532  */
detect_pitch(struct lc3_ltpf_analysis * ltpf,const int16_t * x,int n,int * tc)533 static bool detect_pitch(
534     struct lc3_ltpf_analysis *ltpf, const int16_t *x, int n, int *tc)
535 {
536     float rm1, rm2;
537     float r[98];
538 
539     const int r0 = 17, nr = 98;
540     int k0 = LC3_MAX(   0, ltpf->tc-4);
541     int nk = LC3_MIN(nr-1, ltpf->tc+4) - k0 + 1;
542 
543     correlate(x, x - r0, n, r, nr);
544 
545     int t1 = argmax_weighted(r, nr, -.5f/(nr-1), &rm1);
546     int t2 = k0 + argmax(r + k0, nk, &rm2);
547 
548     const int16_t *x1 = x - (r0 + t1);
549     const int16_t *x2 = x - (r0 + t2);
550 
551     float nc1 = rm1 <= 0 ? 0 :
552         rm1 / sqrtf(dot(x, x, n) * dot(x1, x1, n));
553 
554     float nc2 = rm2 <= 0 ? 0 :
555         rm2 / sqrtf(dot(x, x, n) * dot(x2, x2, n));
556 
557     int t1sel = nc2 <= 0.85f * nc1;
558     ltpf->tc = (t1sel ? t1 : t2);
559 
560     *tc = r0 + ltpf->tc;
561     return (t1sel ? nc1 : nc2) > 0.6f;
562 }
563 
564 /**
565  * Pitch-lag parameter (3.3.9.7)
566  * x, n            [-232..-28] Previous, [0..n-1] Current 12.8KHz samples, Q14
567  * tc              Pitch-lag estimation
568  * pitch           The pitch value, in fixed .4
569  * return          The bitstream pitch index value
570  *
571  * The `x` vector is aligned on 32 bits
572  */
refine_pitch(const int16_t * x,int n,int tc,int * pitch)573 static int refine_pitch(const int16_t *x, int n, int tc, int *pitch)
574 {
575     float r[17], rm;
576     int e, f;
577 
578     int r0 = LC3_MAX( 32, 2*tc - 4);
579     int nr = LC3_MIN(228, 2*tc + 4) - r0 + 1;
580 
581     correlate(x, x - (r0 - 4), n, r, nr + 8);
582 
583     e = r0 + argmax(r + 4, nr, &rm);
584     const float *re = r + (e - (r0 - 4));
585 
586     float dm = interpolate_corr(re, f = 0);
587     for (int i = 1; i <= 3; i++) {
588         float d;
589 
590         if (e >= 127 && ((i & 1) | (e >= 157)))
591             continue;
592 
593         if ((d = interpolate_corr(re, i)) > dm)
594             dm = d, f = i;
595 
596         if (e > 32 && (d = interpolate_corr(re, -i)) > dm)
597             dm = d, f = -i;
598     }
599 
600     e -=   (f < 0);
601     f += 4*(f < 0);
602 
603     *pitch = 4*e + f;
604     return e < 127 ? 4*e +  f       - 128 :
605            e < 157 ? 2*e + (f >> 1) + 126 : e + 283;
606 }
607 
608 /**
609  * LTPF Analysis
610  */
lc3_ltpf_analyse(enum lc3_dt dt,enum lc3_srate sr,struct lc3_ltpf_analysis * ltpf,const int16_t * x,struct lc3_ltpf_data * data)611 bool lc3_ltpf_analyse(
612     enum lc3_dt dt, enum lc3_srate sr, struct lc3_ltpf_analysis *ltpf,
613     const int16_t *x, struct lc3_ltpf_data *data)
614 {
615     /* --- Resampling to 12.8 KHz --- */
616 
617     int z_12k8 = sizeof(ltpf->x_12k8) / sizeof(*ltpf->x_12k8);
618     int n_12k8 = dt == LC3_DT_7M5 ? 96 : 128;
619 
620     memmove(ltpf->x_12k8, ltpf->x_12k8 + n_12k8,
621         (z_12k8 - n_12k8) * sizeof(*ltpf->x_12k8));
622 
623     int16_t *x_12k8 = ltpf->x_12k8 + (z_12k8 - n_12k8);
624 
625     resample_12k8[sr](&ltpf->hp50, x, x_12k8, n_12k8);
626 
627     x_12k8 -= (dt == LC3_DT_7M5 ? 44 :  24);
628 
629     /* --- Resampling to 6.4 KHz --- */
630 
631     int z_6k4 = sizeof(ltpf->x_6k4) / sizeof(*ltpf->x_6k4);
632     int n_6k4 = n_12k8 >> 1;
633 
634     memmove(ltpf->x_6k4, ltpf->x_6k4 + n_6k4,
635         (z_6k4 - n_6k4) * sizeof(*ltpf->x_6k4));
636 
637     int16_t *x_6k4 = ltpf->x_6k4 + (z_6k4 - n_6k4);
638 
639     resample_6k4(x_12k8, x_6k4, n_6k4);
640 
641     /* --- Pitch detection --- */
642 
643     int tc, pitch = 0;
644     float nc = 0;
645 
646     bool pitch_present = detect_pitch(ltpf, x_6k4, n_6k4, &tc);
647 
648     if (pitch_present) {
649         int16_t u[128], v[128];
650 
651         data->pitch_index = refine_pitch(x_12k8, n_12k8, tc, &pitch);
652 
653         interpolate(x_12k8, n_12k8, 0, u);
654         interpolate(x_12k8 - (pitch >> 2), n_12k8, pitch & 3, v);
655 
656         nc = dot(u, v, n_12k8) / sqrtf(dot(u, u, n_12k8) * dot(v, v, n_12k8));
657     }
658 
659     /* --- Activation --- */
660 
661      if (ltpf->active) {
662         int pitch_diff =
663             LC3_MAX(pitch, ltpf->pitch) - LC3_MIN(pitch, ltpf->pitch);
664         float nc_diff = nc - ltpf->nc[0];
665 
666         data->active = pitch_present &&
667             ((nc > 0.9f) || (nc > 0.84f && pitch_diff < 8 && nc_diff > -0.1f));
668 
669      } else {
670          data->active = pitch_present &&
671             ( (dt == LC3_DT_10M || ltpf->nc[1] > 0.94f) &&
672               (ltpf->nc[0] > 0.94f && nc > 0.94f) );
673      }
674 
675      ltpf->active = data->active;
676      ltpf->pitch = pitch;
677      ltpf->nc[1] = ltpf->nc[0];
678      ltpf->nc[0] = nc;
679 
680      return pitch_present;
681 }
682 
683 
684 /* ----------------------------------------------------------------------------
685  *  Synthesis
686  * -------------------------------------------------------------------------- */
687 
688 /**
689  * Width of synthesis filter
690  */
691 
692 #define FILTER_WIDTH(sr) \
693   LC3_MAX(4, LC3_SRATE_KHZ(sr) / 4)
694 
695 #define MAX_FILTER_WIDTH \
696   FILTER_WIDTH(LC3_NUM_SRATE)
697 
698 
699 /**
700  * Synthesis filter template
701  * xh, nh          History ring buffer of filtered samples
702  * lag             Lag parameter in the ring buffer
703  * x0              w-1 previous input samples
704  * x, n            Current samples as input, filtered as output
705  * c, w            Coefficients `den` then `num`, and width of filter
706  * fade            Fading mode of filter  -1: Out  1: In  0: None
707  */
synthesize_template(const float * xh,int nh,int lag,const float * x0,float * x,int n,const float * c,const int w,int fade)708 LC3_HOT static inline void synthesize_template(
709     const float *xh, int nh, int lag,
710     const float *x0, float *x, int n,
711     const float *c, const int w, int fade)
712 {
713     float g = (float)(fade <= 0);
714     float g_incr = (float)((fade > 0) - (fade < 0)) / n;
715     float u[MAX_FILTER_WIDTH];
716 
717     /* --- Load previous samples --- */
718 
719     lag += (w >> 1);
720 
721     const float *y = x - xh < lag ? x + (nh - lag) : x - lag;
722     const float *y_end = xh + nh - 1;
723 
724     for (int j = 0; j < w-1; j++) {
725 
726         u[j] = 0;
727 
728         float yi = *y, xi = *(x0++);
729         y = y < y_end ? y + 1 : xh;
730 
731         for (int k = 0; k <= j; k++)
732             u[j-k] -= yi * c[k];
733 
734         for (int k = 0; k <= j; k++)
735             u[j-k] += xi * c[w+k];
736     }
737 
738     u[w-1] = 0;
739 
740     /* --- Process by filter length --- */
741 
742     for (int i = 0; i < n; i += w)
743         for (int j = 0; j < w; j++, g += g_incr) {
744 
745             float yi = *y, xi = *x;
746             y = y < y_end ? y + 1 : xh;
747 
748             for (int k = 0; k < w; k++)
749                 u[(j+(w-1)-k)%w] -= yi * c[k];
750 
751             for (int k = 0; k < w; k++)
752                 u[(j+(w-1)-k)%w] += xi * c[w+k];
753 
754             *(x++) = xi - g * u[j];
755             u[j] = 0;
756         }
757 }
758 
759 /**
760  * Synthesis filter for each samplerates (width of filter)
761  */
762 
763 
synthesize_4(const float * xh,int nh,int lag,const float * x0,float * x,int n,const float * c,int fade)764 LC3_HOT static void synthesize_4(const float *xh, int nh, int lag,
765     const float *x0, float *x, int n, const float *c, int fade)
766 {
767     synthesize_template(xh, nh, lag, x0, x, n, c, 4, fade);
768 }
769 
synthesize_6(const float * xh,int nh,int lag,const float * x0,float * x,int n,const float * c,int fade)770 LC3_HOT static void synthesize_6(const float *xh, int nh, int lag,
771     const float *x0, float *x, int n, const float *c, int fade)
772 {
773     synthesize_template(xh, nh, lag, x0, x, n, c, 6, fade);
774 }
775 
synthesize_8(const float * xh,int nh,int lag,const float * x0,float * x,int n,const float * c,int fade)776 LC3_HOT static void synthesize_8(const float *xh, int nh, int lag,
777     const float *x0, float *x, int n, const float *c, int fade)
778 {
779     synthesize_template(xh, nh, lag, x0, x, n, c, 8, fade);
780 }
781 
synthesize_12(const float * xh,int nh,int lag,const float * x0,float * x,int n,const float * c,int fade)782 LC3_HOT static void synthesize_12(const float *xh, int nh, int lag,
783     const float *x0, float *x, int n, const float *c, int fade)
784 {
785     synthesize_template(xh, nh, lag, x0, x, n, c, 12, fade);
786 }
787 
788 static void (* const synthesize[])(const float *, int, int,
789     const float *, float *, int, const float *, int) =
790 {
791     [LC3_SRATE_8K ] = synthesize_4,
792     [LC3_SRATE_16K] = synthesize_4,
793     [LC3_SRATE_24K] = synthesize_6,
794     [LC3_SRATE_32K] = synthesize_8,
795     [LC3_SRATE_48K] = synthesize_12,
796 };
797 
798 
799 /**
800  * LTPF Synthesis
801  */
lc3_ltpf_synthesize(enum lc3_dt dt,enum lc3_srate sr,int nbytes,lc3_ltpf_synthesis_t * ltpf,const lc3_ltpf_data_t * data,const float * xh,float * x)802 void lc3_ltpf_synthesize(enum lc3_dt dt, enum lc3_srate sr, int nbytes,
803     lc3_ltpf_synthesis_t *ltpf, const lc3_ltpf_data_t *data,
804     const float *xh, float *x)
805 {
806     int nh = LC3_NH(dt, sr);
807     int dt_us = LC3_DT_US(dt);
808 
809     /* --- Filter parameters --- */
810 
811     int p_idx = data ? data->pitch_index : 0;
812     int pitch =
813         p_idx >= 440 ? (((p_idx     ) - 283) << 2)  :
814         p_idx >= 380 ? (((p_idx >> 1) -  63) << 2) + (((p_idx & 1)) << 1) :
815                        (((p_idx >> 2) +  32) << 2) + (((p_idx & 3)) << 0)  ;
816 
817     pitch = (pitch * LC3_SRATE_KHZ(sr) * 10 + 64) / 128;
818 
819     int nbits = (nbytes*8 * 10000 + (dt_us/2)) / dt_us;
820     int g_idx = LC3_MAX(nbits / 80, 3 + (int)sr) - (3 + sr);
821     bool active = data && data->active && g_idx < 4;
822 
823     int w = FILTER_WIDTH(sr);
824     float c[2 * MAX_FILTER_WIDTH];
825 
826     for (int i = 0; i < w; i++) {
827         float g = active ? 0.4f - 0.05f * g_idx : 0;
828         c[  i] = g * lc3_ltpf_cden[sr][pitch & 3][(w-1)-i];
829         c[w+i] = 0.85f * g * lc3_ltpf_cnum[sr][LC3_MIN(g_idx, 3)][(w-1)-i];
830     }
831 
832     /* --- Transition handling --- */
833 
834     int ns = LC3_NS(dt, sr);
835     int nt = ns / (3 + dt);
836     float x0[MAX_FILTER_WIDTH];
837 
838     if (active)
839         memcpy(x0, x + nt-(w-1), (w-1) * sizeof(float));
840 
841     if (!ltpf->active && active)
842         synthesize[sr](xh, nh, pitch/4, ltpf->x, x, nt, c, 1);
843     else if (ltpf->active && !active)
844         synthesize[sr](xh, nh, ltpf->pitch/4, ltpf->x, x, nt, ltpf->c, -1);
845     else if (ltpf->active && active && ltpf->pitch == pitch)
846         synthesize[sr](xh, nh, pitch/4, ltpf->x, x, nt, c, 0);
847     else if (ltpf->active && active) {
848         synthesize[sr](xh, nh, ltpf->pitch/4, ltpf->x, x, nt, ltpf->c, -1);
849         synthesize[sr](xh, nh, pitch/4,
850             (x <= xh ? x + nh : x) - (w-1), x, nt, c, 1);
851     }
852 
853     /* --- Remainder --- */
854 
855     memcpy(ltpf->x, x + ns - (w-1), (w-1) * sizeof(float));
856 
857     if (active)
858         synthesize[sr](xh, nh, pitch/4, x0, x + nt, ns-nt, c, 0);
859 
860     /* --- Update state --- */
861 
862     ltpf->active = active;
863     ltpf->pitch = pitch;
864     memcpy(ltpf->c, c, 2*w * sizeof(*ltpf->c));
865 }
866 
867 
868 /* ----------------------------------------------------------------------------
869  *  Bitstream data
870  * -------------------------------------------------------------------------- */
871 
872 /**
873  * LTPF disable
874  */
lc3_ltpf_disable(struct lc3_ltpf_data * data)875 void lc3_ltpf_disable(struct lc3_ltpf_data *data)
876 {
877     data->active = false;
878 }
879 
880 /**
881  * Return number of bits coding the bitstream data
882  */
lc3_ltpf_get_nbits(bool pitch)883 int lc3_ltpf_get_nbits(bool pitch)
884 {
885     return 1 + 10 * pitch;
886 }
887 
888 /**
889  * Put bitstream data
890  */
lc3_ltpf_put_data(lc3_bits_t * bits,const struct lc3_ltpf_data * data)891 void lc3_ltpf_put_data(lc3_bits_t *bits,
892     const struct lc3_ltpf_data *data)
893 {
894     lc3_put_bit(bits, data->active);
895     lc3_put_bits(bits, data->pitch_index, 9);
896 }
897 
898 /**
899  * Get bitstream data
900  */
lc3_ltpf_get_data(lc3_bits_t * bits,struct lc3_ltpf_data * data)901 void lc3_ltpf_get_data(lc3_bits_t *bits, struct lc3_ltpf_data *data)
902 {
903     data->active = lc3_get_bit(bits);
904     data->pitch_index = lc3_get_bits(bits, 9);
905 }
906