1#
2# Copyright 2022 Google LLC
3#
4# Licensed under the Apache License, Version 2.0 (the "License");
5# you may not use this file except in compliance with the License.
6# You may obtain a copy of the License at
7#
8#     http://www.apache.org/licenses/LICENSE-2.0
9#
10# Unless required by applicable law or agreed to in writing, software
11# distributed under the License is distributed on an "AS IS" BASIS,
12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13# See the License for the specific language governing permissions and
14# limitations under the License.
15#
16
17import numpy as np
18import scipy.signal as signal
19
20import lc3
21import tables as T, appendix_c as C
22
23### ------------------------------------------------------------------------ ###
24
25class Resampler_12k8:
26
27    def __init__(self, dt, sr, history = 0):
28
29        self.sr = sr
30        self.p = 192 // T.SRATE_KHZ[sr]
31        self.w = 240 // self.p
32
33        self.n = ((T.DT_MS[dt] * 128) / 10).astype(int)
34        self.d = [ 24, 44 ][dt == T.DT_7M5]
35
36        self.x = np.zeros(self.w + T.NS[dt][sr])
37        self.u = np.zeros(self.n + 2)
38        self.y = np.zeros(self.n + self.d + history)
39
40    def resample(self, x):
41
42        p = self.p
43        w = self.w
44        d = self.d
45        n = self.n
46
47        ### Sliding window
48
49        self.x[:w] = self.x[-w:]
50        self.x[w:] = x
51        self.u[:2] = self.u[-2:]
52
53        if len(self.y) > 2*n + d:
54            self.y[n+d:-n] = self.y[d+2*n:]
55        if len(self.y) > n + d:
56            self.y[-n:] = self.y[:n]
57        self.y[:d] = self.y[n:d+n]
58
59        x = self.x
60        u = self.u
61
62        ### Resampling
63
64        h = np.zeros(240 + p)
65        h[-119:] = T.LTPF_H12K8[:119]
66        h[ :120] = T.LTPF_H12K8[119:]
67
68        for i in range(n):
69            e = (15 * i) // p
70            f = (15 * i)  % p
71            k = np.arange(-120, 120 + p, p) - f
72            u[2+i] = p * np.dot( x[e:e+w+1], np.take(h, k) )
73
74        if self.sr == T.SRATE_8K:
75            u = 0.5 * u
76
77        ### High-pass filtering
78
79        b = [ 0.9827947082978771, -1.9655894165957540, 0.9827947082978771 ]
80        a = [ 1                 , -1.9652933726226904, 0.9658854605688177 ]
81
82        self.y[d:d+n] = b[0] * u[2:] + b[1] * u[1:-1] + b[2] * u[:-2]
83        for i in range(n):
84            self.y[d+i] -= a[1] * self.y[d+i-1] + a[2] * self.y[d+i-2]
85
86        return self.y
87
88
89class Resampler_6k4:
90
91    def __init__(self, n, history = 0):
92
93        self.x = np.zeros(n + 5)
94        self.n = n // 2
95
96        self.y = np.zeros(self.n + history)
97
98    def resample(self, x):
99
100        n = self.n
101
102        ### Sliding window
103
104        self.x[:3] = self.x[-5:-2]
105        self.x[3:] = x[:2*n+2]
106        x = self.x
107
108        if len(self.y) > 2*n:
109            self.y[n:-n] = self.y[2*n:]
110        if len(self.y) > n:
111            self.y[-n:] = self.y[:n]
112
113        ### Downsampling to 6.4 KHz
114
115        h = [ 0.1236796411180537, 0.2353512128364889, 0.2819382920909148,
116              0.2353512128364889, 0.1236796411180537 ]
117
118        self.y[:n] = [ np.dot(x[2*i:2*i+5], h) for i in range(self.n) ]
119        return self.y
120
121
122def initial_hp50_state():
123    return { 's1': 0, 's2': 0 }
124
125### ------------------------------------------------------------------------ ###
126
127class Ltpf:
128
129    def __init__(self, dt, sr):
130
131        self.dt = dt
132        self.sr = sr
133
134        (self.pitch_present, self.pitch_index) = (None, None)
135
136
137class LtpfAnalysis(Ltpf):
138
139    def __init__(self, dt, sr):
140
141        super().__init__(dt, sr)
142
143        self.resampler_12k8 = Resampler_12k8(dt, sr,
144            history = 232 + (32 if dt == T.DT_2M5 else 0))
145
146        self.resampler_6k4 = Resampler_6k4(self.resampler_12k8.n,
147            history = 114 + (16 if dt == T.DT_2M5 else 0))
148
149        self.active = False
150        self.tc = 0
151        self.pitch = 0
152        self.nc = np.zeros(2)
153
154    def get_data(self):
155
156        return { 'active' : self.active,
157                 'pitch_index' : self.pitch_index }
158
159    def get_nbits(self):
160
161        return 1 + 10 * int(self.pitch_present)
162
163    def correlate(self, x, i0, n, k0, k1):
164
165        return np.array([ np.dot(
166            np.take(x, np.arange(i0, n)),
167            np.take(x, np.arange(i0, n) - k)) for k in range(k0, 1+k1) ])
168
169    def norm_corr(self, x, i0, n, k):
170
171        u  = np.take(x, np.arange(i0, n))
172        v  = np.take(x, np.arange(i0, n) - k)
173        uv = np.dot(u, v)
174        return uv / np.sqrt(np.dot(u, u) * np.dot(v, v)) if uv > 0 else 0
175
176    def run(self, x):
177
178        ### Resampling
179
180        x_12k8 = self.resampler_12k8.resample(x)
181
182        ### Pitch detection algorithm
183
184        x  = self.resampler_6k4.resample(x_12k8)
185        i0 = [-16, 0][self.dt > T.DT_2M5]
186        n  = self.resampler_6k4.n
187
188        r  = self.correlate(x, i0, n, 17, 114)
189        rw = r * (1 - 0.5 * np.arange(len(r)) / (len(r) - 1))
190
191        tc = self.tc
192        k0 = max(0, tc-4)
193        k1 = min(len(r)-1, tc+4)
194        t  = [ 17 + np.argmax(rw), 17 + k0 + np.argmax(r[k0:1+k1]) ]
195
196        nc = [ self.norm_corr(x, i0, n, t[i]) for i in range(2) ]
197        ti = int(nc[1] > 0.85 * nc[0])
198        self.tc = t[ti] - 17
199
200        self.pitch_present = bool(nc[ti] > 0.6)
201
202        ### Pitch-lag parameter
203
204        if self.pitch_present:
205            tc = self.tc + 17
206
207            x  = x_12k8
208            i0 = [-32, 0][self.dt > T.DT_2M5]
209            n  = self.resampler_12k8.n
210
211            k0 = max( 32, 2*tc-4)
212            k1 = min(228, 2*tc+4)
213            r  = self.correlate(x, i0, n, k0-4, k1+4)
214            e  = k0 + np.argmax(r[4:-4])
215
216            h = np.zeros(42)
217            h[-15:] = T.LTPF_H4[:15]
218            h[ :16] = T.LTPF_H4[15:]
219
220            m = np.arange(-4, 5)
221            s = [ np.dot( np.take(r, e-k0+4 + m), np.take(h, 4*m-d) ) \
222                      for d in range(-3, 4) ]
223
224            f = np.argmax(s[3:])            if e <=  32 else \
225                -3 + np.argmax(s)           if e <  127 else \
226                -2 + 2*np.argmax(s[1:-1:2]) if e <  157 else 0
227
228            e -=   (f < 0)
229            f += 4*(f < 0)
230
231            self.pitch_index = 4*e + f    - 128 if e < 127 else \
232                               2*e + f//2 + 126 if e < 157 else e + 283
233
234        else:
235            e = f = 0
236            self.pitch_index = 0
237
238        ### Activation bit
239
240        h = np.zeros(24)
241        h[-7:] = T.LTPF_HI[:7]
242        h[ :8] = T.LTPF_HI[7:]
243
244        x  = x_12k8
245        i0 = [-32, 0][self.dt > T.DT_2M5]
246        n  = self.resampler_12k8.n
247
248        k = np.arange(-2, 3)
249        u = [ np.dot( np.take(x, i-k), np.take(h, 4*k) ) \
250                  for i in range(i0, n) ]
251        v = [ np.dot( np.take(x, i-k), np.take(h, 4*k-f) ) \
252                  for i in range(i0-e, n-e) ]
253
254        nc = max(0, np.dot(u, v)) / np.sqrt(np.dot(u, u) * np.dot(v, v)) \
255                if self.pitch_present else 0
256
257        pitch = e + f/4
258
259        if not self.active:
260            active = (self.dt == T.DT_10M or self.nc[1] > 0.94) \
261                     and self.nc[0] > 0.94 and nc > 0.94
262
263        else:
264            dp = abs(pitch - self.pitch)
265            dc = nc - self.nc[0]
266            active = nc > 0.9 or (dp < 2 and dc > -0.1 and nc > 0.84)
267
268        if not self.pitch_present:
269            active = False
270            pitch = 0
271            nc = 0
272
273        self.active = active
274        self.pitch = pitch
275        self.nc[1] = self.nc[0]
276        self.nc[0] = nc
277
278        return self.pitch_present
279
280    def disable(self):
281
282        self.active = False
283
284    def store(self, b):
285
286        b.write_uint(self.active, 1)
287        b.write_uint(self.pitch_index, 9)
288
289
290class LtpfSynthesis(Ltpf):
291
292    C_N = [ T.LTPF_N_8K , T.LTPF_N_16K,
293            T.LTPF_N_24K, T.LTPF_N_32K, T.LTPF_N_48K ]
294
295    C_D = [ T.LTPF_D_8K , T.LTPF_D_16K,
296            T.LTPF_D_24K, T.LTPF_D_32K, T.LTPF_D_48K ]
297
298    def __init__(self, dt, sr):
299
300        super().__init__(dt, sr)
301
302        self.C_N = LtpfSynthesis.C_N[sr]
303        self.C_D = LtpfSynthesis.C_D[sr]
304
305        ns = T.NS[dt][sr]
306
307        self.active = [ False, False ]
308        self.pitch_index = 0
309
310        max_pitch_12k8 = 228
311        max_pitch = max_pitch_12k8 * T.SRATE_KHZ[self.sr] / 12.8
312        max_pitch = np.ceil(max_pitch).astype(int)
313
314        self.x = np.zeros(ns)
315        self.y = np.zeros(max_pitch + len(self.C_D[0]))
316
317        self.p_e = [ 0, 0 ]
318        self.p_f = [ 0, 0 ]
319        self.c_n = [ None, None ]
320        self.c_d = [ None, None ]
321
322    def load(self, b):
323
324        self.active[0] = bool(b.read_uint(1))
325        self.pitch_index = b.read_uint(9)
326
327    def disable(self):
328
329        self.active[0] = False
330        self.pitch_index = 0
331
332    def run(self, x, nbytes):
333
334        sr = self.sr
335        dt = self.dt
336
337        ### Filter parameters
338
339        pitch_index = self.pitch_index
340
341        if pitch_index >= 440:
342            p_e = pitch_index - 283
343            p_f = 0
344        elif pitch_index >= 380:
345            p_e = pitch_index // 2 - 63
346            p_f = 2*(pitch_index - 2*(p_e + 63))
347        else:
348            p_e = pitch_index // 4 + 32
349            p_f = pitch_index - 4*(p_e - 32)
350
351        p = (p_e + p_f / 4) * T.SRATE_KHZ[self.sr] / 12.8
352
353        self.p_e[0] = int(p * 4 + 0.5) // 4
354        self.p_f[0] = int(p * 4 + 0.5) - 4*self.p_e[0]
355
356        nbits = round(nbytes*8 * 10 / T.DT_MS[dt])
357        if dt == T.DT_2M5:
358            nbits = int(nbits * (1 - 0.4))
359        elif dt == T.DT_5M:
360            nbits = nbits - 160
361
362        g_idx = max(nbits // 80, 3+sr) - (3+sr)
363
364        g = [ 0.4, 0.35, 0.3, 0.25 ][g_idx] if g_idx < 4 else 0
365        g_idx = min(g_idx, 3)
366
367        self.c_n[0] = 0.85 * g * LtpfSynthesis.C_N[sr][g_idx]
368        self.c_d[0] = g * LtpfSynthesis.C_D[sr][self.p_f[0]]
369
370        ### Transition handling
371
372        n0 = (T.SRATE_KHZ[sr] * 1000) // 400
373        ns = T.NS[dt][sr]
374
375        x  = np.append(x, self.x)
376        y  = np.append(np.zeros(ns), self.y)
377        yc = y.copy()
378
379        c_n = self.c_n
380        c_d = self.c_d
381
382        l_n = len(c_n[0])
383        l_d = len(c_d[0])
384
385        d = [ self.p_e[0] - (l_d - 1) // 2,
386              self.p_e[1] - (l_d - 1) // 2 ]
387
388        for k in range(n0):
389
390            if not self.active[0] and not self.active[1]:
391                y[k] = x[k]
392
393            elif self.active[0] and not self.active[1]:
394                u = np.dot(c_n[0], np.take(x, k - np.arange(l_n))) - \
395                    np.dot(c_d[0], np.take(y, k - d[0] - np.arange(l_d)))
396                y[k] = x[k] - (k/n0) * u
397
398            elif not self.active[0] and self.active[1]:
399                u = np.dot(c_n[1], np.take(x, k - np.arange(l_n))) - \
400                    np.dot(c_d[1], np.take(y, k - d[1] - np.arange(l_d)))
401                y[k] = x[k] - (1 - k/n0) * u
402
403            elif self.p_e[0] == self.p_e[1] and self.p_f[0] == self.p_f[1]:
404                u = np.dot(c_n[0], np.take(x, k - np.arange(l_n))) - \
405                    np.dot(c_d[0], np.take(y, k - d[0] - np.arange(l_d)))
406                y[k] = x[k] - u
407
408            else:
409                u = np.dot(c_n[1], np.take(x, k - np.arange(l_n))) - \
410                    np.dot(c_d[1], np.take(y, k - d[1] - np.arange(l_d)))
411                yc[k] = x[k] - (1 - k/n0) * u
412
413                u = np.dot(c_n[0], np.take(yc, k - np.arange(l_n))) - \
414                    np.dot(c_d[0], np.take(y , k - d[0] - np.arange(l_d)))
415                y[k] = yc[k] - (k/n0) * u
416
417        ### Remainder of the frame
418
419        for k in range(n0, ns):
420
421            if not self.active[0]:
422                y[k] = x[k]
423
424            else:
425                u = np.dot(c_n[0], np.take(x, k - np.arange(l_n))) - \
426                    np.dot(c_d[0], np.take(y, k - d[0] - np.arange(l_d)))
427                y[k] = x[k] - u
428
429        ### Sliding window
430
431        self.active[1] = self.active[0]
432        self.p_e[1] = self.p_e[0]
433        self.p_f[1] = self.p_f[0]
434        self.c_n[1] = self.c_n[0]
435        self.c_d[1] = self.c_d[0]
436
437        self.x = x[:ns]
438        self.y = np.append(self.y[ns:], y[:ns])
439
440        return y[:ns]
441
442def initial_state():
443    return { 'active' : False, 'pitch': 0, 'nc':  np.zeros(2),
444             'hp50' : initial_hp50_state(),
445             'x_12k8' : np.zeros(384), 'x_6k4' : np.zeros(178), 'tc' : 0 }
446
447def initial_sstate():
448    return { 'active': False, 'pitch': 0,
449             'c': np.zeros(2*12), 'x': np.zeros(12) }
450
451### ------------------------------------------------------------------------ ###
452
453def check_resampler(rng, dt, sr):
454
455    ns = T.NS[dt][sr]
456    nt = (5 * T.SRATE_KHZ[sr]) // 4
457    ok = True
458
459    r = Resampler_12k8(dt, sr)
460
461    hp50_c = initial_hp50_state()
462    x_c = np.zeros(nt)
463    y_c = np.zeros(384)
464
465    for run in range(10):
466
467        x = ((2 * rng.random(ns)) - 1) * (2 ** 15 - 1)
468        y = r.resample(x)
469
470        x_c = np.append(x_c[-nt:], x.astype(np.int16))
471        y_c[:-r.n] = y_c[r.n:]
472        y_c = lc3.ltpf_resample(dt, sr, hp50_c, x_c, y_c)
473
474        ok = ok and np.amax(np.abs(y_c[-r.d-r.n:] - y[:r.d+r.n]/2)) < 4
475
476    return ok
477
478def check_resampler_appendix_c(dt):
479
480    i0 = dt - T.DT_7M5
481    sr = T.SRATE_16K
482
483    ok = True
484
485    nt = (5 * T.SRATE_KHZ[sr]) // 4
486    n  = [ 96, 128 ][i0]
487    k  = [ 44,  24 ][i0] + n
488
489    state = initial_hp50_state()
490
491    x = np.append(np.zeros(nt), C.X_PCM[i0][0])
492    y = np.zeros(384)
493    y = lc3.ltpf_resample(dt, sr, state, x, y)
494    u = y[-k:len(C.X_TILDE_12K8D[i0][0])-k]
495
496    ok = ok and np.amax(np.abs(u - C.X_TILDE_12K8D[i0][0]/2)) < 2
497
498    x = np.append(x[-nt:], C.X_PCM[i0][1])
499    y[:-n] = y[n:]
500    y = lc3.ltpf_resample(dt, sr, state, x, y)
501    u = y[-k:len(C.X_TILDE_12K8D[i0][1])-k]
502
503    ok = ok and np.amax(np.abs(u - C.X_TILDE_12K8D[i0][1]/2)) < 2
504
505    return ok
506
507def check_analysis(rng, dt, sr):
508
509    ns = T.NS[dt][sr]
510    nt = (5 * T.SRATE_KHZ[sr]) // 4
511    ok = True
512
513    state_c = initial_state()
514    x_c = np.zeros(ns+nt)
515
516    ltpf = LtpfAnalysis(dt, sr)
517
518    t = np.arange(100 * ns) / (T.SRATE_KHZ[sr] * 1000)
519    s = signal.chirp(t, f0=10, f1=2500, t1=t[-1], method='logarithmic')
520
521    for i in range(20):
522
523        x = s[i*ns:(i+1)*ns] * (2 ** 15 - 1)
524
525        pitch_present = ltpf.run(x)
526        data = ltpf.get_data()
527
528        x_c = np.append(x_c[-nt:], x.astype(np.int16))
529        (pitch_present_c, data_c) = lc3.ltpf_analyse(dt, sr, state_c, x_c)
530
531        ok = ok and (not pitch_present or state_c['tc'] == ltpf.tc)
532        ok = ok and np.amax(np.abs(state_c['nc'][0] - ltpf.nc[0])) < 1e-1
533        ok = ok and pitch_present_c == pitch_present
534        ok = ok and data_c['active'] == data['active']
535        ok = ok and data_c['pitch_index'] == data['pitch_index']
536        ok = ok and lc3.ltpf_get_nbits(pitch_present) == ltpf.get_nbits()
537
538    return ok
539
540def check_synthesis(rng, dt, sr):
541
542    ok = True
543
544    ns = T.NS[dt][sr]
545    nd = 18 * T.SRATE_KHZ[sr]
546
547    synthesis = LtpfSynthesis(dt, sr)
548
549    state_c = initial_sstate()
550    x_c = np.zeros(nd+ns)
551
552    for i in range(50):
553
554        pitch_present = bool(rng.integers(0, 10) >= 1)
555        if not pitch_present:
556            synthesis.disable()
557        else:
558            synthesis.active[0] = bool(rng.integers(0, 5) >= 1)
559            synthesis.pitch_index = rng.integers(0, 512)
560
561        data_c = None if not pitch_present else \
562            { 'active' : synthesis.active[0],
563              'pitch_index' : synthesis.pitch_index }
564
565        x = rng.random(ns) * 1e4
566        nbytes = rng.integers(10*(2+sr), 10*(6+sr))
567
568        x_c[:nd] = x_c[ns:]
569        x_c[nd:] = x
570
571        y = synthesis.run(x, nbytes)
572        x_c = lc3.ltpf_synthesize(dt, sr, nbytes, state_c, data_c, x_c)
573
574        ok = ok and np.amax(np.abs(x_c[nd:] - y)) < 1e-2
575
576    return ok
577
578def check_analysis_appendix_c(dt):
579
580    i0 = dt - T.DT_7M5
581    sr = T.SRATE_16K
582
583    ok = True
584
585    nt = (5 * T.SRATE_KHZ[sr]) // 4
586
587    state = initial_state()
588
589    x = np.append(np.zeros(nt), C.X_PCM[i0][0])
590    (pitch_present, data) = lc3.ltpf_analyse(dt, sr, state, x)
591
592    ok = ok and C.T_CURR[i0][0] - state['tc'] == 17
593    ok = ok and np.amax(np.abs(state['nc'][0] - C.NC_LTPF[i0][0])) < 1e-5
594    ok = ok and pitch_present == C.PITCH_PRESENT[i0][0]
595    ok = ok and data['pitch_index'] == C.PITCH_INDEX[i0][0]
596    ok = ok and data['active'] == C.LTPF_ACTIVE[i0][0]
597
598    x = np.append(x[-nt:], C.X_PCM[i0][1])
599    (pitch_present, data) = lc3.ltpf_analyse(dt, sr, state, x)
600
601    ok = ok and C.T_CURR[i0][1] - state['tc'] == 17
602    ok = ok and np.amax(np.abs(state['nc'][0] - C.NC_LTPF[i0][1])) < 1e-5
603    ok = ok and pitch_present == C.PITCH_PRESENT[i0][1]
604    ok = ok and data['pitch_index'] == C.PITCH_INDEX[i0][1]
605    ok = ok and data['active'] == C.LTPF_ACTIVE[i0][1]
606
607    return ok
608
609def check_synthesis_appendix_c(dt):
610
611    sr = T.SRATE_16K
612
613    ok = True
614    if dt != T.DT_10M:
615        return ok
616
617    ns = T.NS[dt][sr]
618    nd = 18 * T.SRATE_KHZ[sr]
619
620    NBYTES = [ C.LTPF_C2_NBITS // 8, C.LTPF_C3_NBITS // 8,
621               C.LTPF_C4_NBITS // 8, C.LTPF_C5_NBITS // 8 ]
622
623    ACTIVE = [ C.LTPF_C2_ACTIVE, C.LTPF_C3_ACTIVE,
624               C.LTPF_C4_ACTIVE, C.LTPF_C5_ACTIVE ]
625
626    PITCH_INDEX = [ C.LTPF_C2_PITCH_INDEX, C.LTPF_C3_PITCH_INDEX,
627                    C.LTPF_C4_PITCH_INDEX, C.LTPF_C5_PITCH_INDEX ]
628
629    X = [ C.LTPF_C2_X, C.LTPF_C3_X,
630          C.LTPF_C4_X, C.LTPF_C5_X ]
631
632    PREV = [ C.LTPF_C2_PREV, C.LTPF_C3_PREV,
633             C.LTPF_C4_PREV, C.LTPF_C5_PREV  ]
634
635    TRANS = [ C.LTPF_C2_TRANS, C.LTPF_C3_TRANS,
636              C.LTPF_C4_TRANS, C.LTPF_C5_TRANS ]
637
638    for i in range(4):
639
640        state = initial_sstate()
641        nbytes = NBYTES[i]
642
643        data = { 'active' : ACTIVE[i][0], 'pitch_index' : PITCH_INDEX[i][0] }
644        x = np.append(np.zeros(nd), X[i][0])
645
646        lc3.ltpf_synthesize(dt, sr, nbytes, state, data, x)
647
648        data = { 'active' : ACTIVE[i][1], 'pitch_index' : PITCH_INDEX[i][1] }
649        x[  :nd-ns] = PREV[i][0][-nd+ns:]
650        x[nd-ns:nd] = PREV[i][1]
651        x[nd:nd+ns] = X[i][1]
652
653        y = lc3.ltpf_synthesize(dt, sr, nbytes, state, data, x)[nd:]
654
655        ok = ok and np.amax(np.abs(y - TRANS[i])) < 1e-3
656
657    return ok
658
659def check():
660
661    rng = np.random.default_rng(1234)
662    ok = True
663
664    for dt in range(T.NUM_DT):
665        for sr in range(T.SRATE_8K, T.SRATE_48K + 1):
666            ok = ok and check_resampler(rng, dt, sr)
667            ok = ok and check_analysis(rng, dt, sr)
668            ok = ok and check_synthesis(rng, dt, sr)
669
670    for dt in ( T.DT_7M5, T.DT_10M ):
671        ok = ok and check_resampler_appendix_c(dt)
672        ok = ok and check_analysis_appendix_c(dt)
673        ok = ok and check_synthesis_appendix_c(dt)
674
675    return ok
676
677### ------------------------------------------------------------------------ ###
678