1# 2# Copyright 2022 Google LLC 3# 4# Licensed under the Apache License, Version 2.0 (the "License"); 5# you may not use this file except in compliance with the License. 6# You may obtain a copy of the License at 7# 8# http://www.apache.org/licenses/LICENSE-2.0 9# 10# Unless required by applicable law or agreed to in writing, software 11# distributed under the License is distributed on an "AS IS" BASIS, 12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13# See the License for the specific language governing permissions and 14# limitations under the License. 15# 16 17import numpy as np 18import scipy.signal as signal 19 20import lc3 21import tables as T, appendix_c as C 22 23### ------------------------------------------------------------------------ ### 24 25class Resampler_12k8: 26 27 def __init__(self, dt, sr, history = 0): 28 29 self.sr = sr 30 self.p = 192 // T.SRATE_KHZ[sr] 31 self.w = 240 // self.p 32 33 self.n = ((T.DT_MS[dt] * 128) / 10).astype(int) 34 self.d = [ 44, 24 ][dt] 35 36 self.x = np.zeros(self.w + T.NS[dt][sr]) 37 self.u = np.zeros(self.n + 2) 38 self.y = np.zeros(self.n + self.d + history) 39 40 def resample(self, x): 41 42 p = self.p 43 w = self.w 44 d = self.d 45 n = self.n 46 47 ### Sliding window 48 49 self.x[:w] = self.x[-w:] 50 self.x[w:] = x 51 self.u[:2] = self.u[-2:] 52 53 if len(self.y) > 2*n + d: 54 self.y[n+d:-n] = self.y[d+2*n:] 55 if len(self.y) > n + d: 56 self.y[-n:] = self.y[:n] 57 self.y[:d] = self.y[n:d+n] 58 59 x = self.x 60 u = self.u 61 62 ### 3.3.9.3 Resampling 63 64 h = np.zeros(240 + p) 65 h[-119:] = T.LTPF_H12K8[:119] 66 h[ :120] = T.LTPF_H12K8[119:] 67 68 for i in range(n): 69 e = (15 * i) // p 70 f = (15 * i) % p 71 k = np.arange(-120, 120 + p, p) - f 72 u[2+i] = p * np.dot( x[e:e+w+1], np.take(h, k) ) 73 74 if self.sr == T.SRATE_8K: 75 u = 0.5 * u 76 77 ### 3.3.9.4 High-pass filtering 78 79 b = [ 0.9827947082978771, -1.9655894165957540, 0.9827947082978771 ] 80 a = [ 1 , -1.9652933726226904, 0.9658854605688177 ] 81 82 self.y[d:d+n] = b[0] * u[2:] + b[1] * u[1:-1] + b[2] * u[:-2] 83 for i in range(n): 84 self.y[d+i] -= a[1] * self.y[d+i-1] + a[2] * self.y[d+i-2] 85 86 return self.y 87 88 89class Resampler_6k4: 90 91 def __init__(self, n, history = 0): 92 93 self.x = np.zeros(n + 5) 94 self.n = n // 2 95 96 self.y = np.zeros(self.n + history) 97 98 def resample(self, x): 99 100 n = self.n 101 102 ### Sliding window 103 104 self.x[:3] = self.x[-5:-2] 105 self.x[3:] = x[:2*n+2] 106 x = self.x 107 108 if len(self.y) > 2*n: 109 self.y[n:-n] = self.y[2*n:] 110 if len(self.y) > n: 111 self.y[-n:] = self.y[:n] 112 113 ### 3.3.9.5 Downsampling to 6.4 KHz 114 115 h = [ 0.1236796411180537, 0.2353512128364889, 0.2819382920909148, 116 0.2353512128364889, 0.1236796411180537 ] 117 118 self.y[:n] = [ np.dot(x[2*i:2*i+5], h) for i in range(self.n) ] 119 return self.y 120 121 122def initial_hp50_state(): 123 return { 's1': 0, 's2': 0 } 124 125### ------------------------------------------------------------------------ ### 126 127class Ltpf: 128 129 def __init__(self, dt, sr): 130 131 self.dt = dt 132 self.sr = sr 133 134 (self.pitch_present, self.pitch_index) = (None, None) 135 136 137class LtpfAnalysis(Ltpf): 138 139 def __init__(self, dt, sr): 140 141 super().__init__(dt, sr) 142 143 self.resampler_12k8 = Resampler_12k8( 144 dt, sr, history = 232) 145 146 self.resampler_6k4 = Resampler_6k4( 147 self.resampler_12k8.n, history = 114) 148 149 self.active = False 150 self.tc = 0 151 self.pitch = 0 152 self.nc = np.zeros(2) 153 154 def get_data(self): 155 156 return { 'active' : self.active, 157 'pitch_index' : self.pitch_index } 158 159 def get_nbits(self): 160 161 return 1 + 10 * int(self.pitch_present) 162 163 def correlate(self, x, n, k0, k1): 164 165 return [ np.dot(x[:n], np.take(x, np.arange(n) - k)) \ 166 for k in range(k0, 1+k1) ] 167 168 def norm_corr(self, x, n, k): 169 170 u = x[:n] 171 v = np.take(x, np.arange(n) - k) 172 uv = np.dot(u, v) 173 return uv / np.sqrt(np.dot(u, u) * np.dot(v, v)) if uv > 0 else 0 174 175 def run(self, x): 176 177 ### 3.3.9.3-4 Resampling 178 179 x_12k8 = self.resampler_12k8.resample(x) 180 181 ### 3.3.9.5-6 Pitch detection algorithm 182 183 x = self.resampler_6k4.resample(x_12k8) 184 n = self.resampler_6k4.n 185 186 r = self.correlate(x, n, 17, 114) 187 rw = r * (1 - 0.5 * np.arange(len(r)) / (len(r) - 1)) 188 189 tc = self.tc 190 k0 = max(0, tc-4) 191 k1 = min(len(r)-1, tc+4) 192 t = [ 17 + np.argmax(rw), 17 + k0 + np.argmax(r[k0:1+k1]) ] 193 194 nc = [ self.norm_corr(x, n, t[i]) for i in range(2) ] 195 ti = int(nc[1] > 0.85 * nc[0]) 196 self.tc = t[ti] - 17 197 198 self.pitch_present = bool(nc[ti] > 0.6) 199 200 ### 3.3.9.7 Pitch-lag parameter 201 202 if self.pitch_present: 203 tc = self.tc + 17 204 205 x = x_12k8 206 n = self.resampler_12k8.n 207 208 k0 = max( 32, 2*tc-4) 209 k1 = min(228, 2*tc+4) 210 r = self.correlate(x, n, k0-4, k1+4) 211 e = k0 + np.argmax(r[4:-4]) 212 213 h = np.zeros(42) 214 h[-15:] = T.LTPF_H4[:15] 215 h[ :16] = T.LTPF_H4[15:] 216 217 m = np.arange(-4, 5) 218 s = [ np.dot( np.take(r, e-k0+4 + m), np.take(h, 4*m-d) ) \ 219 for d in range(-3, 4) ] 220 221 f = np.argmax(s[3:]) if e <= 32 else \ 222 -3 + np.argmax(s) if e < 127 else \ 223 -2 + 2*np.argmax(s[1:-1:2]) if e < 157 else 0 224 225 e -= (f < 0) 226 f += 4*(f < 0) 227 228 self.pitch_index = 4*e + f - 128 if e < 127 else \ 229 2*e + f//2 + 126 if e < 157 else e + 283 230 231 else: 232 e = f = 0 233 self.pitch_index = 0 234 235 ### 3.3.9.8 Activation bit 236 237 h = np.zeros(24) 238 h[-7:] = T.LTPF_HI[:7] 239 h[ :8] = T.LTPF_HI[7:] 240 241 k = np.arange(-2, 3) 242 u = [ np.dot( np.take(x, i-k), np.take(h, 4*k) ) \ 243 for i in range(n) ] 244 v = [ np.dot( np.take(x, i-k), np.take(h, 4*k-f) ) \ 245 for i in range(-e, n-e) ] 246 247 nc = max(0, np.dot(u, v)) / np.sqrt(np.dot(u, u) * np.dot(v, v)) \ 248 if self.pitch_present else 0 249 250 pitch = e + f/4 251 252 if not self.active: 253 active = (self.dt == T.DT_10M or self.nc[1] > 0.94) \ 254 and self.nc[0] > 0.94 and nc > 0.94 255 256 else: 257 dp = abs(pitch - self.pitch) 258 dc = nc - self.nc[0] 259 active = nc > 0.9 or (dp < 2 and dc > -0.1 and nc > 0.84) 260 261 if not self.pitch_present: 262 active = False 263 pitch = 0 264 nc = 0 265 266 self.active = active 267 self.pitch = pitch 268 self.nc[1] = self.nc[0] 269 self.nc[0] = nc 270 271 return self.pitch_present 272 273 def disable(self): 274 275 self.active = False 276 277 def store(self, b): 278 279 b.write_uint(self.active, 1) 280 b.write_uint(self.pitch_index, 9) 281 282 283class LtpfSynthesis(Ltpf): 284 285 C_N = [ T.LTPF_N_8K , T.LTPF_N_16K, 286 T.LTPF_N_24K, T.LTPF_N_32K, T.LTPF_N_48K ] 287 288 C_D = [ T.LTPF_D_8K , T.LTPF_D_16K, 289 T.LTPF_D_24K, T.LTPF_D_32K, T.LTPF_D_48K ] 290 291 def __init__(self, dt, sr): 292 293 super().__init__(dt, sr) 294 295 self.C_N = LtpfSynthesis.C_N[sr] 296 self.C_D = LtpfSynthesis.C_D[sr] 297 298 ns = T.NS[dt][sr] 299 300 self.active = [ False, False ] 301 self.pitch_index = 0 302 303 max_pitch_12k8 = 228 304 max_pitch = max_pitch_12k8 * T.SRATE_KHZ[self.sr] / 12.8 305 max_pitch = np.ceil(max_pitch).astype(int) 306 307 self.x = np.zeros(ns) 308 self.y = np.zeros(max_pitch + len(self.C_D[0])) 309 310 self.p_e = [ 0, 0 ] 311 self.p_f = [ 0, 0 ] 312 self.c_n = [ None, None ] 313 self.c_d = [ None, None ] 314 315 def load(self, b): 316 317 self.active[0] = bool(b.read_uint(1)) 318 self.pitch_index = b.read_uint(9) 319 320 def disable(self): 321 322 self.active[0] = False 323 self.pitch_index = 0 324 325 def run(self, x, nbytes): 326 327 sr = self.sr 328 dt = self.dt 329 330 ### 3.4.9.4 Filter parameters 331 332 pitch_index = self.pitch_index 333 334 if pitch_index >= 440: 335 p_e = pitch_index - 283 336 p_f = 0 337 elif pitch_index >= 380: 338 p_e = pitch_index // 2 - 63 339 p_f = 2*(pitch_index - 2*(p_e + 63)) 340 else: 341 p_e = pitch_index // 4 + 32 342 p_f = pitch_index - 4*(p_e - 32) 343 344 p = (p_e + p_f / 4) * T.SRATE_KHZ[self.sr] / 12.8 345 346 self.p_e[0] = int(p * 4 + 0.5) // 4 347 self.p_f[0] = int(p * 4 + 0.5) - 4*self.p_e[0] 348 349 nbits = round(nbytes*80 / T.DT_MS[dt]) 350 g_idx = max(nbits // 80, 3+sr) - (3+sr) 351 352 g = [ 0.4, 0.35, 0.3, 0.25 ][g_idx] if g_idx < 4 else 0 353 g_idx = min(g_idx, 3) 354 355 self.c_n[0] = 0.85 * g * LtpfSynthesis.C_N[sr][g_idx] 356 self.c_d[0] = g * LtpfSynthesis.C_D[sr][self.p_f[0]] 357 358 ### 3.4.9.2 Transition handling 359 360 n0 = (T.SRATE_KHZ[sr] * 1000) // 400 361 ns = T.NS[dt][sr] 362 363 x = np.append(x, self.x) 364 y = np.append(np.zeros(ns), self.y) 365 yc = y.copy() 366 367 c_n = self.c_n 368 c_d = self.c_d 369 370 l_n = len(c_n[0]) 371 l_d = len(c_d[0]) 372 373 d = [ self.p_e[0] - (l_d - 1) // 2, 374 self.p_e[1] - (l_d - 1) // 2 ] 375 376 for k in range(n0): 377 378 if not self.active[0] and not self.active[1]: 379 y[k] = x[k] 380 381 elif self.active[0] and not self.active[1]: 382 u = np.dot(c_n[0], np.take(x, k - np.arange(l_n))) - \ 383 np.dot(c_d[0], np.take(y, k - d[0] - np.arange(l_d))) 384 y[k] = x[k] - (k/n0) * u 385 386 elif not self.active[0] and self.active[1]: 387 u = np.dot(c_n[1], np.take(x, k - np.arange(l_n))) - \ 388 np.dot(c_d[1], np.take(y, k - d[1] - np.arange(l_d))) 389 y[k] = x[k] - (1 - k/n0) * u 390 391 elif self.p_e[0] == self.p_e[1] and self.p_f[0] == self.p_f[1]: 392 u = np.dot(c_n[0], np.take(x, k - np.arange(l_n))) - \ 393 np.dot(c_d[0], np.take(y, k - d[0] - np.arange(l_d))) 394 y[k] = x[k] - u 395 396 else: 397 u = np.dot(c_n[1], np.take(x, k - np.arange(l_n))) - \ 398 np.dot(c_d[1], np.take(y, k - d[1] - np.arange(l_d))) 399 yc[k] = x[k] - (1 - k/n0) * u 400 401 u = np.dot(c_n[0], np.take(yc, k - np.arange(l_n))) - \ 402 np.dot(c_d[0], np.take(y , k - d[0] - np.arange(l_d))) 403 y[k] = yc[k] - (k/n0) * u 404 405 406 ### 3.4.9.3 Remainder of the frame 407 408 for k in range(n0, ns): 409 410 if not self.active[0]: 411 y[k] = x[k] 412 413 else: 414 u = np.dot(c_n[0], np.take(x, k - np.arange(l_n))) - \ 415 np.dot(c_d[0], np.take(y, k - d[0] - np.arange(l_d))) 416 y[k] = x[k] - u 417 418 ### Sliding window 419 420 self.active[1] = self.active[0] 421 self.p_e[1] = self.p_e[0] 422 self.p_f[1] = self.p_f[0] 423 self.c_n[1] = self.c_n[0] 424 self.c_d[1] = self.c_d[0] 425 426 self.x = x[:ns] 427 self.y = np.append(self.y[ns:], y[:ns]) 428 429 return y[:ns] 430 431def initial_state(): 432 return { 'active' : False, 'pitch': 0, 'nc': np.zeros(2), 433 'hp50' : initial_hp50_state(), 434 'x_12k8' : np.zeros(384), 'x_6k4' : np.zeros(178), 'tc' : 0 } 435 436def initial_sstate(): 437 return { 'active': False, 'pitch': 0, 438 'c': np.zeros(2*12), 'x': np.zeros(12) } 439 440### ------------------------------------------------------------------------ ### 441 442def check_resampler(rng, dt, sr): 443 444 ns = T.NS[dt][sr] 445 nt = (5 * T.SRATE_KHZ[sr]) // 4 446 ok = True 447 448 r = Resampler_12k8(dt, sr) 449 450 hp50_c = initial_hp50_state() 451 x_c = np.zeros(nt) 452 y_c = np.zeros(384) 453 454 for run in range(10): 455 456 x = ((2 * rng.random(ns)) - 1) * (2 ** 15 - 1) 457 y = r.resample(x) 458 459 x_c = np.append(x_c[-nt:], x.astype(np.int16)) 460 y_c[:-r.n] = y_c[r.n:] 461 y_c = lc3.ltpf_resample(dt, sr, hp50_c, x_c, y_c) 462 463 ok = ok and np.amax(np.abs(y_c[-r.d-r.n:] - y[:r.d+r.n]/2)) < 4 464 465 return ok 466 467def check_resampler_appendix_c(dt): 468 469 sr = T.SRATE_16K 470 ok = True 471 472 nt = (5 * T.SRATE_KHZ[sr]) // 4 473 n = [ 96, 128 ][dt] 474 k = [ 44, 24 ][dt] + n 475 476 state = initial_hp50_state() 477 478 x = np.append(np.zeros(nt), C.X_PCM[dt][0]) 479 y = np.zeros(384) 480 y = lc3.ltpf_resample(dt, sr, state, x, y) 481 u = y[-k:len(C.X_TILDE_12K8D[dt][0])-k] 482 483 ok = ok and np.amax(np.abs(u - C.X_TILDE_12K8D[dt][0]/2)) < 2 484 485 x = np.append(x[-nt:], C.X_PCM[dt][1]) 486 y[:-n] = y[n:] 487 y = lc3.ltpf_resample(dt, sr, state, x, y) 488 u = y[-k:len(C.X_TILDE_12K8D[dt][1])-k] 489 490 ok = ok and np.amax(np.abs(u - C.X_TILDE_12K8D[dt][1]/2)) < 2 491 492 return ok 493 494def check_analysis(rng, dt, sr): 495 496 ns = T.NS[dt][sr] 497 nt = (5 * T.SRATE_KHZ[sr]) // 4 498 ok = True 499 500 state_c = initial_state() 501 x_c = np.zeros(ns+nt) 502 503 ltpf = LtpfAnalysis(dt, sr) 504 505 t = np.arange(100 * ns) / (T.SRATE_KHZ[sr] * 1000) 506 s = signal.chirp(t, f0=10, f1=3e3, t1=t[-1], method='logarithmic') 507 508 for i in range(20): 509 510 x = s[i*ns:(i+1)*ns] * (2 ** 15 - 1) 511 512 pitch_present = ltpf.run(x) 513 data = ltpf.get_data() 514 515 x_c = np.append(x_c[-nt:], x.astype(np.int16)) 516 (pitch_present_c, data_c) = lc3.ltpf_analyse(dt, sr, state_c, x_c) 517 518 ok = ok and (not pitch_present or state_c['tc'] == ltpf.tc) 519 ok = ok and np.amax(np.abs(state_c['nc'][0] - ltpf.nc[0])) < 1e-2 520 ok = ok and pitch_present_c == pitch_present 521 ok = ok and data_c['active'] == data['active'] 522 ok = ok and data_c['pitch_index'] == data['pitch_index'] 523 ok = ok and lc3.ltpf_get_nbits(pitch_present) == ltpf.get_nbits() 524 525 return ok 526 527def check_synthesis(rng, dt, sr): 528 529 ok = True 530 531 ns = T.NS[dt][sr] 532 nd = 18 * T.SRATE_KHZ[sr] 533 534 synthesis = LtpfSynthesis(dt, sr) 535 536 state_c = initial_sstate() 537 x_c = np.zeros(nd+ns) 538 539 for i in range(50): 540 pitch_present = bool(rng.integers(0, 10) >= 1) 541 if not pitch_present: 542 synthesis.disable() 543 else: 544 synthesis.active[0] = bool(rng.integers(0, 5) >= 1) 545 synthesis.pitch_index = rng.integers(0, 512) 546 547 data_c = None if not pitch_present else \ 548 { 'active' : synthesis.active[0], 549 'pitch_index' : synthesis.pitch_index } 550 551 x = rng.random(ns) * 1e4 552 nbytes = rng.integers(10*(2+sr), 10*(6+sr)) 553 554 x_c[:nd] = x_c[ns:] 555 x_c[nd:] = x 556 557 y = synthesis.run(x, nbytes) 558 x_c = lc3.ltpf_synthesize(dt, sr, nbytes, state_c, data_c, x_c) 559 560 ok = ok and np.amax(np.abs(x_c[nd:] - y)) < 1e-2 561 562 return ok 563 564def check_analysis_appendix_c(dt): 565 566 sr = T.SRATE_16K 567 nt = (5 * T.SRATE_KHZ[sr]) // 4 568 ok = True 569 570 state = initial_state() 571 572 x = np.append(np.zeros(nt), C.X_PCM[dt][0]) 573 (pitch_present, data) = lc3.ltpf_analyse(dt, sr, state, x) 574 575 ok = ok and C.T_CURR[dt][0] - state['tc'] == 17 576 ok = ok and np.amax(np.abs(state['nc'][0] - C.NC_LTPF[dt][0])) < 1e-5 577 ok = ok and pitch_present == C.PITCH_PRESENT[dt][0] 578 ok = ok and data['pitch_index'] == C.PITCH_INDEX[dt][0] 579 ok = ok and data['active'] == C.LTPF_ACTIVE[dt][0] 580 581 x = np.append(x[-nt:], C.X_PCM[dt][1]) 582 (pitch_present, data) = lc3.ltpf_analyse(dt, sr, state, x) 583 584 ok = ok and C.T_CURR[dt][1] - state['tc'] == 17 585 ok = ok and np.amax(np.abs(state['nc'][0] - C.NC_LTPF[dt][1])) < 1e-5 586 ok = ok and pitch_present == C.PITCH_PRESENT[dt][1] 587 ok = ok and data['pitch_index'] == C.PITCH_INDEX[dt][1] 588 ok = ok and data['active'] == C.LTPF_ACTIVE[dt][1] 589 590 return ok 591 592def check_synthesis_appendix_c(dt): 593 594 sr = T.SRATE_16K 595 ok = True 596 597 if dt != T.DT_10M: 598 return ok 599 600 ns = T.NS[dt][sr] 601 nd = 18 * T.SRATE_KHZ[sr] 602 603 NBYTES = [ C.LTPF_C2_NBITS // 8, C.LTPF_C3_NBITS // 8, 604 C.LTPF_C4_NBITS // 8, C.LTPF_C5_NBITS // 8 ] 605 606 ACTIVE = [ C.LTPF_C2_ACTIVE, C.LTPF_C3_ACTIVE, 607 C.LTPF_C4_ACTIVE, C.LTPF_C5_ACTIVE ] 608 609 PITCH_INDEX = [ C.LTPF_C2_PITCH_INDEX, C.LTPF_C3_PITCH_INDEX, 610 C.LTPF_C4_PITCH_INDEX, C.LTPF_C5_PITCH_INDEX ] 611 612 X = [ C.LTPF_C2_X, C.LTPF_C3_X, 613 C.LTPF_C4_X, C.LTPF_C5_X ] 614 615 PREV = [ C.LTPF_C2_PREV, C.LTPF_C3_PREV, 616 C.LTPF_C4_PREV, C.LTPF_C5_PREV ] 617 618 TRANS = [ C.LTPF_C2_TRANS, C.LTPF_C3_TRANS, 619 C.LTPF_C4_TRANS, C.LTPF_C5_TRANS ] 620 621 for i in range(4): 622 623 state = initial_sstate() 624 nbytes = NBYTES[i] 625 626 data = { 'active' : ACTIVE[i][0], 'pitch_index' : PITCH_INDEX[i][0] } 627 x = np.append(np.zeros(nd), X[i][0]) 628 629 lc3.ltpf_synthesize(dt, sr, nbytes, state, data, x) 630 631 data = { 'active' : ACTIVE[i][1], 'pitch_index' : PITCH_INDEX[i][1] } 632 x[ :nd-ns] = PREV[i][0][-nd+ns:] 633 x[nd-ns:nd] = PREV[i][1] 634 x[nd:nd+ns] = X[i][1] 635 636 y = lc3.ltpf_synthesize(dt, sr, nbytes, state, data, x)[nd:] 637 638 ok = ok and np.amax(np.abs(y - TRANS[i])) < 1e-3 639 640 return ok 641 642def check(): 643 644 rng = np.random.default_rng(1234) 645 ok = True 646 647 for dt in range(T.NUM_DT): 648 for sr in range(T.NUM_SRATE): 649 ok = ok and check_resampler(rng, dt, sr) 650 ok = ok and check_analysis(rng, dt, sr) 651 ok = ok and check_synthesis(rng, dt, sr) 652 653 for dt in range(T.NUM_DT): 654 ok = ok and check_resampler_appendix_c(dt) 655 ok = ok and check_analysis_appendix_c(dt) 656 ok = ok and check_synthesis_appendix_c(dt) 657 658 return ok 659 660### ------------------------------------------------------------------------ ### 661