1########################################### 2# Project: CMSIS DSP Library 3# Title: MFCC.py 4# Description: Test pattern generation for MFCC 5# 6# $Date: 02 September 2021 7# $Revision: V1.10.0 8# 9# Target Processor: Cortex-M and Cortex-A cores 10# -------------------------------------------------------------------- */ 11# 12# Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. 13# 14# SPDX-License-Identifier: Apache-2.0 15# 16# Licensed under the Apache License, Version 2.0 (the License); you may 17# not use this file except in compliance with the License. 18# You may obtain a copy of the License at 19# 20# www.apache.org/licenses/LICENSE-2.0 21# 22# Unless required by applicable law or agreed to in writing, software 23# distributed under the License is distributed on an AS IS BASIS, WITHOUT 24# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 25# See the License for the specific language governing permissions and 26# limitations under the License. 27############################################ 28import os.path 29import numpy as np 30import itertools 31import Tools 32import scipy 33import scipy.signal as sig 34import scipy.fftpack 35 36################################ 37# 38# Gives the same results as the tensorflow lite 39# MFCC if hamming window is used 40# (TF stft) is using hanning by default 41# 42 43DEBUG = False 44 45def frequencyToMelSpace(freq): 46 return 1127.0 * np.log(1.0 + freq / 700.0) 47 48def melSpaceToFrequency(mels): 49 return 700.0 * (np.exp(mels / 1127.0) - 1.0) 50 51def melFilterMatrix(fmin, fmax, numOfMelFilters,fs,FFTSize): 52 53 filters = np.zeros((numOfMelFilters,int(FFTSize/2+1))) 54 zeros = np.zeros(int(FFTSize // 2 )) 55 56 57 fmin_mel = frequencyToMelSpace(fmin) 58 fmax_mel = frequencyToMelSpace(fmax) 59 mels = np.linspace(fmin_mel, fmax_mel, num=numOfMelFilters+2) 60 61 62 linearfreqs = np.linspace( 0, fs/2.0, int(FFTSize // 2 + 1) ) 63 spectrogrammels = frequencyToMelSpace(linearfreqs)[1:] 64 65 66 for n in range(numOfMelFilters): 67 68 69 upper = (spectrogrammels - mels[n])/(mels[n+1]-mels[n]) 70 lower = (mels[n+2] - spectrogrammels)/(mels[n+2]-mels[n+1]) 71 72 73 filters[n, :] = np.hstack([0,np.maximum(zeros,np.minimum(upper,lower))]) 74 75 return filters 76 77 78def dctMatrix(numOfDctOutputs, numOfMelFilters): 79 80 result = np.zeros((numOfDctOutputs,numOfMelFilters)) 81 s=(np.linspace(1,numOfMelFilters,numOfMelFilters) - 0.5)/numOfMelFilters 82 83 for i in range(0, numOfDctOutputs): 84 result[i,:]=np.cos(i * np.pi*s) * np.sqrt(2.0/numOfMelFilters) 85 86 return result 87 88 89 90class MFCCConfig: 91 def __init__(self,freq_min,freq_high,numOfMelFilters,numOfDctOutputs,FFTSize,sample_rate): 92 self._freq_min=freq_min 93 self._freq_high=freq_high 94 self._numOfMelFilters = numOfMelFilters 95 self._FFTSize=FFTSize 96 self._sample_rate=sample_rate 97 #self._window = sig.hann(FFTSize, sym=True) 98 self._window = sig.hamming(FFTSize, sym=False) 99 #print(self._window) 100 self._numOfDctOutputs=numOfDctOutputs 101 102 self._filters = melFilterMatrix(freq_min, freq_high, numOfMelFilters,sample_rate,FFTSize) 103 104 105 self._dctMatrixFilters = dctMatrix(numOfDctOutputs, numOfMelFilters) 106 107 def mfcc(self,audio): 108 #m = np.amax(np.abs(audio)) 109 #if m != 0: 110 # s = 1.0 / m 111 #else: 112 # s = 1.0 113 #audio = audio * s 114 115 audioWin = audio * self._window 116 117 if DEBUG: 118 print(audioWin) 119 120 audioFFT = scipy.fftpack.fft(audioWin) 121 if DEBUG: 122 print(audioFFT) 123 124 audioPower = np.abs(audioFFT) 125 #if m != 0: 126 # audioPower = audioPower * m 127 if DEBUG: 128 print(audioPower) 129 130 filterLimit = int(1 + self._FFTSize // 2) 131 audioPower=audioPower[:filterLimit] 132 133 audioFiltered = np.dot(self._filters,audioPower) 134 if DEBUG: 135 print(audioFiltered) 136 137 audioLog = np.log(audioFiltered + 1e-6) 138 139 cepstral_coefficents = np.dot(self._dctMatrixFilters, audioLog) 140 141 return(cepstral_coefficents) 142 143 144debug=np.array([ 0.65507051 ,-0.94647589 ,0.00627239 ,0.14151286 ,-0.10863318 ,-0.36370327 145 ,0.05777126 ,-0.11915792 ,0.50183546 ,-0.31461335 ,0.66440771 ,0.05389963 146 ,0.39690544 ,0.25424852 ,-0.17045277 ,0.09649268 ,0.87357385 ,-0.44666372 147 ,-0.02637822 ,-0.10055151 ,-0.14610252 ,-0.05981251 ,-0.02999124 ,0.60923213 148 ,0.10530095 ,0.35684248 ,0.21845946 ,0.47845017 ,-0.60206979 ,0.25186908 149 ,-0.27410056 ,-0.07080467 ,-0.05109539 ,-0.2666572 ,0.25483105 ,-0.86459185 150 ,0.07733397 ,-0.58535444 ,0.06230904 ,-0.04161475 ,-0.17467296 ,0.77721125 151 ,-0.01728161 ,-0.32141218 ,0.36674466 ,-0.17932843 ,0.78486115 ,0.12469579 152 ,-0.94796877 ,0.05536031 ,0.32627676 ,0.46628512 ,-0.02585836 ,-0.51439834 153 ,0.21387904 ,0.16319442 ,-0.01020818 ,-0.77161183 ,0.07754634 ,-0.24970455 154 ,0.2368003 ,0.35167963 ,0.14620137 ,-0.02415204 ,0.91086167 ,-0.02434647 155 ,-0.3968239 ,-0.04703925 ,-0.43905103 ,-0.34834965 ,0.33728158 ,0.15138992 156 ,-0.43218885 ,0.26619718 ,0.07177906 ,0.33393581 ,-0.50306915 ,-0.63101084 157 ,-0.08128395 ,-0.06569788 ,0.84232797 ,-0.32436751 ,0.02528537 ,-0.3498329 158 ,0.41859931 ,0.07794887 ,0.4571989 ,0.24290963 ,0.08437417 ,-0.01371585 159 ,-0.00103008 ,0.83978697 ,-0.29001237 ,0.14438743 ,0.11943318 ,-0.25576402 160 ,0.25151083 ,0.07886626 ,0.11565781 ,-0.01582203 ,0.1310246 ,-0.5553611 161 ,-0.37950665 ,0.44179691 ,0.08460877 ,0.30646419 ,0.48927934 ,-0.21240309 162 ,0.36844264 ,0.49686615 ,-0.81617664 ,0.52221472 ,-0.05188992 ,-0.03929655 163 ,-0.47674501 ,-0.54506781 ,0.30711148 ,0.10049337 ,-0.47549213 ,0.59106713 164 ,-0.62276051 ,-0.35182917 ,0.14612027 ,0.56142168 ,-0.01053732 ,0.35782179 165 ,-0.27220781 ,-0.03672346 ,-0.11282222 ,0.3364912 ,-0.22352515 ,-0.04245287 166 ,0.56968605 ,-0.14023724 ,-0.82982905 ,0.00860008 ,0.37920345 ,-0.53749318 167 ,-0.12761215 ,0.08567603 ,0.47020765 ,-0.28794812 ,-0.33888971 ,0.01850441 168 ,0.66848233 ,-0.26532759 ,-0.20777571 ,-0.68342729 ,-0.41498696 ,0.00593224 169 ,0.02229368 ,0.75596329 ,0.29447568 ,-0.1106449 ,0.24181939 ,0.05807497 170 ,-0.14343857 ,0.304988 ,0.00689148 ,-0.06264758 ,0.25864714 ,-0.22252155 171 ,0.28621689 ,0.17031599 ,-0.34694027 ,-0.01625718 ,0.39834181 ,0.01259659 172 ,-0.28022716 ,-0.02506168 ,-0.10276881 ,0.31733924 ,0.02787068 ,-0.09824124 173 ,0.45147797 ,0.14451518 ,0.17996395 ,-0.70594978 ,-0.92943177 ,0.13649282 174 ,-0.5938426 ,0.50289928 ,0.19635269 ,0.16811504 ,0.05803999 ,0.0037204 175 ,0.13847419 ,0.30568038 ,0.3700732 ,0.21257548 ,-0.31151753 ,-0.28836886 176 ,0.68743932 ,-0.11084429 ,-0.4673766 ,0.16637754 ,-0.38992572 ,0.16505578 177 ,-0.07499844 ,0.04226538 ,-0.11042177 ,0.0704542 ,-0.632819 ,-0.54898472 178 ,0.26498649 ,-0.59380386 ,0.93387213 ,0.06526726 ,-0.23223558 ,0.07941394 179 ,0.14325166 ,0.26914661 ,0.00925575 ,-0.34282161 ,-0.51418231 ,-0.12011075 180 ,-0.26676314 ,-0.09999028 ,0.03027513 ,0.22846503 ,-0.08930338 ,-0.1867156 181 ,0.66297846 ,0.32220769 ,-0.06015469 ,0.04034043 ,0.09595597 ,-1. 182 ,-0.42933352 ,0.25069376 ,-0.26030918 ,-0.28511861 ,-0.19931228 ,0.24408572 183 ,-0.3231952 ,0.45688981 ,-0.07354078 ,0.25669449 ,-0.44202722 ,0.11928406 184 ,-0.32826109 ,0.52660984 ,0.03067858 ,0.11095242 ,0.19933679 ,0.03042371 185 ,-0.34768682 ,0.09108447 ,0.61234556 ,0.1854931 ,0.19680502 ,0.27617564 186 ,0.33381827 ,-0.47358967 ,0.28714328 ,-0.27495982]) 187 188def noiseSignal(nb): 189 return(2.0*np.random.rand(nb)-1.0) 190 191def sineSignal(freqRatio,nb): 192 fc = nb / 2.0 193 f = freqRatio*fc 194 time = np.arange(0,nb) 195 return(np.sin(2 * np.pi * f * time/nb)) 196 197def noisySineSignal(noiseAmp,r,nb): 198 return(noiseAmp*noiseSignal(nb) + r*sineSignal(r,nb)) 199 200def writeTests(config,format): 201 NBSAMPLES=[256,512,1024] 202 if DEBUG: 203 NBSAMPLES=[256] 204 205 206 sample_rate = 16000 207 FFTSize = 256 208 numOfDctOutputs = 13 209 210 freq_min = 64 211 freq_high = sample_rate / 2 212 numOfMelFilters = 20 213 214 for nb in NBSAMPLES: 215 inputsNoise=[] 216 inputsSine=[] 217 outputsNoise=[] 218 outputsSine=[] 219 inNoiselengths=[] 220 outNoiselengths=[] 221 inSinelengths=[] 222 outSinelengths=[] 223 224 225 FFTSize=nb 226 mfccConfig=MFCCConfig(freq_min,freq_high,numOfMelFilters,numOfDctOutputs,FFTSize,sample_rate) 227 228 # Add noise 229 audio=np.random.randn(nb) 230 audio = Tools.normalize(audio) 231 if DEBUG: 232 audio=debug 233 inputsNoise += list(audio) 234 refNoise=mfccConfig.mfcc(audio) 235 if format == Tools.Q15: 236 refNoise = refNoise / (1<<8) 237 if format == Tools.Q31: 238 refNoise = refNoise / (1<<8) 239 #print(audio) 240 if DEBUG: 241 print(refNoise) 242 outputsNoise+=list(refNoise) 243 inNoiselengths+=[nb] 244 outNoiselengths+=[numOfDctOutputs] 245 246 247 config.writeInput(1, inputsNoise,"MFCCNoiseInput_%d_" % nb) 248 config.writeReference(1, outputsNoise,"MFCCNoiseRef_%d_" % nb) 249 250 # Sine 251 audio=noisySineSignal(0.1,0.8,nb) 252 audio = Tools.normalize(audio) 253 inputsSine += list(audio) 254 refSine=mfccConfig.mfcc(audio) 255 if format == Tools.Q15: 256 refSine = refSine / (1<<8) 257 if format == Tools.Q31: 258 refSine = refSine / (1<<8) 259 #print(audio) 260 outputsSine+=list(refSine) 261 inSinelengths+=[nb] 262 outSinelengths+=[numOfDctOutputs] 263 264 265 config.writeInput(1, inputsSine,"MFCCSineInput_%d_" % nb) 266 config.writeReference(1, outputsSine,"MFCCSineRef_%d_" % nb) 267 268 269 270 271def generatePatterns(): 272 PATTERNDIR = os.path.join("Patterns","DSP","Transform","MFCC") 273 PARAMDIR = os.path.join("Parameters","DSP","Transform","MFCC") 274 275 configf32=Tools.Config(PATTERNDIR,PARAMDIR,"f32") 276 configf16=Tools.Config(PATTERNDIR,PARAMDIR,"f16") 277 configq31=Tools.Config(PATTERNDIR,PARAMDIR,"q31") 278 configq15=Tools.Config(PATTERNDIR,PARAMDIR,"q15") 279 #configq7=Tools.Config(PATTERNDIR,PARAMDIR,"q7") 280 281 configf32.setOverwrite(True) 282 configf16.setOverwrite(True) 283 configq31.setOverwrite(True) 284 configq15.setOverwrite(True) 285 286 287 writeTests(configf32,0) 288 writeTests(configf16,Tools.F16) 289 290 writeTests(configq31,Tools.Q31) 291 writeTests(configq15,Tools.Q15) 292 293if __name__ == '__main__': 294 generatePatterns() 295