1 /* ----------------------------------------------------------------------
2  * Project:      CMSIS DSP Library
3  * Title:        AppNodes.h
4  * Description:  Application nodes for the C compute graph
5  *
6  * $Date:        16 March 2022
7  *
8  * Target Processor: Cortex-M and Cortex-A cores
9  * -------------------------------------------------------------------- */
10 /*
11  * Copyright (C) 2010-2022 ARM Limited or its affiliates. All rights reserved.
12  *
13  * SPDX-License-Identifier: Apache-2.0
14  *
15  * Licensed under the Apache License, Version 2.0 (the License); you may
16  * not use this file except in compliance with the License.
17  * You may obtain a copy of the License at
18  *
19  * www.apache.org/licenses/LICENSE-2.0
20  *
21  * Unless required by applicable law or agreed to in writing, software
22  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
23  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
24  * See the License for the specific language governing permissions and
25  * limitations under the License.
26  */
27 
28 
29 #ifndef _APPNODES_H_
30 #define _APPNODES_H_
31 
32 #include <hal/nrf_pdm.h>
33 #include "coef.h"
34 
35 #include <Arduino.h>
36 #include <HardwareSerial.h>
37 
38 // When enabled, lots of additional trace is generated
39 //#define DEBUG
40 
41 // Buffer to read samples into, each sample is 16-bits
42 // This is written by the PDM driver
43 extern short sampleBuffer[AUDIOBUFFER_LENGTH];
44 
45 // Number of audio samples available in the audio buffer
46 extern volatile int samplesRead;
47 
48 // Sink node. It is just printing the recognized word
49 template<typename IN, int inputSize> class Sink;
50 
51 template<int inputSize>
52 class Sink<q15_t, inputSize>: public GenericSink<q15_t, inputSize>
53 {
54 public:
Sink(FIFOBase<q15_t> & src)55     Sink(FIFOBase<q15_t> &src):GenericSink<q15_t,inputSize>(src){};
56 
run()57     int run()
58     {
59         #if defined(DEBUG)
60         Serial.println("==== Sink");
61         #endif
62 
63         q15_t *b=this->getReadBuffer();
64 
65         if (b[0]==-1)
66         {
67             Serial.println("Unknown");
68         };
69 
70         if (b[0]==0)
71         {
72             Serial.println("Yes");
73         };
74 
75         return(0);
76     };
77 
78 };
79 
80 
81 // Source node. It is getting audio data from the PDM driver
82 template<typename OUT, int outputSize> class Source;
83 
84 template<int outputSize>
85 class Source<q15_t,outputSize>: public GenericSource<q15_t,outputSize>
86 {
87 public:
Source(FIFOBase<q15_t> & dst)88     Source(FIFOBase<q15_t> &dst):GenericSource<q15_t,outputSize>(dst)
89     {
90 
91     };
92 
run()93     int run(){
94 
95         #if defined(DEBUG)
96         Serial.println("==== Source");
97         #endif
98         q15_t *b=this->getWriteBuffer();
99 
100         // We wait until enough samples are available.
101         // In a future version we may experiment with sleeping the board
102         while(samplesRead<outputSize)
103         {
104             #if defined(DEBUG)
105               Serial.print("Sample reads ");
106               Serial.println(samplesRead);
107             #endif
108         };
109 
110         #if defined(DEBUG)
111         Serial.println("Received");
112         #endif
113 
114         // We get the samples and update the
115         // sampleBuffer.
116         // Since this buffer is also accessed by the IRQ, we need to disable it
117         NVIC_DisableIRQ(PDM_IRQn);
118         memcpy(b,sampleBuffer,sizeof(q15_t)*outputSize);
119         if ((samplesRead-outputSize) > 0)
120         {
121             memmove(sampleBuffer,sampleBuffer+outputSize,sizeof(q15_t)*(samplesRead-outputSize));
122         }
123         samplesRead = samplesRead - outputSize;
124         NVIC_EnableIRQ(PDM_IRQn);
125 
126         #if defined(DEBUG)
127         Serial.print("After read : Sample reads ");
128         Serial.println(samplesRead);
129         #endif
130 
131 
132         return(0);
133     };
134 
135 
136 };
137 
138 template<typename IN, int inputSize,typename OUT,int outputSize> class FIR;
139 
140 
141 // FIR node
142 template<int inputSize>
143 class FIR<q15_t,inputSize,q15_t,inputSize>: public GenericNode<q15_t,inputSize,q15_t,inputSize>
144 {
145 public:
FIR(FIFOBase<q15_t> & src,FIFOBase<q15_t> & dst)146     FIR(FIFOBase<q15_t> &src,FIFOBase<q15_t> &dst):GenericNode<q15_t,inputSize,q15_t,inputSize>(src,dst){
147         int blockSize=inputSize;
148         int numTaps=10;
149         int stateLength = numTaps + blockSize - 1;
150 
151         state=(q15_t*)malloc(stateLength * sizeof(q15_t*));
152     };
153 
run()154     int run(){
155         #if defined(DEBUG)
156         Serial.println("==== FIR");
157         #endif
158         q15_t *a=this->getReadBuffer();
159         q15_t *b=this->getWriteBuffer();
160         int blockSize=inputSize;
161         int stateLength = NUMTAPS + blockSize - 1;
162 
163         arm_status status=arm_fir_init_q15(&(this->firq15),NUMTAPS,fir_coefs,state,blockSize);
164 
165         arm_fir_q15(&(this->firq15),a,b,blockSize);
166         return(0);
167     };
168 
169 arm_fir_instance_q15 firq15;
170 q15_t *state;
171 
172 };
173 
174 /* Not available in the older CMSIS-DSP version provided with Arduino.
175 So we copy the definition here */
176 
arm_divide_q15(q15_t numerator,q15_t denominator,q15_t * quotient,int16_t * shift)177 arm_status arm_divide_q15(q15_t numerator,
178   q15_t denominator,
179   q15_t *quotient,
180   int16_t *shift)
181 {
182   int16_t sign=0;
183   q31_t temp;
184   int16_t shiftForNormalizing;
185 
186   *shift = 0;
187 
188   sign = (numerator>>15) ^ (denominator>>15);
189 
190   if (denominator == 0)
191   {
192      if (sign)
193      {
194         *quotient = 0x8000;
195      }
196      else
197      {
198         *quotient = 0x7FFF;
199      }
200      return(ARM_MATH_NANINF);
201   }
202 
203   numerator = abs(numerator);
204   denominator = abs(denominator);
205 
206   temp = ((q31_t)numerator << 15) / ((q31_t)denominator);
207 
208   shiftForNormalizing= 17 - __CLZ(temp);
209   if (shiftForNormalizing > 0)
210   {
211      *shift = shiftForNormalizing;
212      temp = temp >> shiftForNormalizing;
213   }
214 
215   if (sign)
216   {
217     temp = -temp;
218   }
219 
220   *quotient=temp;
221 
222   return(ARM_MATH_SUCCESS);
223 }
224 
225 
226 // We similar to the Python implementation
dsp_zcr_q15(q15_t * w,int blockSize)227 q15_t dsp_zcr_q15(q15_t *w,int blockSize)
228 {
229     q15_t m;
230     arm_mean_q15(w,blockSize,&m);
231 
232     // Negate can saturate so we use CMSIS-DSP function which is working on array (and we have a scalar)
233     arm_negate_q15(&m,&m,1);
234     arm_offset_q15(w,m,w,blockSize);
235 
236     int k=0;
237     for(int i=0;i<blockSize-1;i++)
238     {
239          int f = w[i];
240          int g = w[i+1];
241          if ((((f>0) && (g<0)) || ((f<0) && (g>0))) && g>f)
242          {
243             k++;
244          }
245     }
246 
247 
248     // k < len(f) so shift should be 0 except when k == len(f)
249     // When k==len(f) normally quotient is 0x4000 and shift 1 and we convert
250     // this to 0x7FFF
251 
252     q15_t quotient;
253     int16_t shift;
254 
255     arm_status status=arm_divide_q15(k,blockSize-1,&quotient,&shift);
256 
257     if (shift==1)
258     {
259         arm_shift_q15(&quotient,shift,&quotient,1);
260         return(quotient);
261     }
262     else
263     {
264         return(quotient);
265     }
266 };
267 
268 template<typename IN, int inputSize,typename OUT,int outputSize> class Feature;
269 
270 template<int inputSize>
271 class Feature<q15_t,inputSize,q15_t,1>: public GenericNode<q15_t,inputSize,q15_t,1>
272 {
273 public:
Feature(FIFOBase<q15_t> & src,FIFOBase<q15_t> & dst,const q15_t * window)274     Feature(FIFOBase<q15_t> &src,FIFOBase<q15_t> &dst,const q15_t *window):
275        GenericNode<q15_t,inputSize,q15_t,1>(src,dst),mWindow(window){
276     };
277 
run()278     int run(){
279         #if defined(DEBUG)
280         Serial.println("==== Feature");
281         #endif
282         q15_t *a=this->getReadBuffer();
283         q15_t *b=this->getWriteBuffer();
284 
285         arm_mult_q15(a,this->mWindow,a,inputSize);
286 
287         b[0] = dsp_zcr_q15(a,inputSize);
288 
289         return(0);
290     };
291 
292 const q15_t* mWindow;
293 
294 };
295 
296 template<typename IN, int inputSize,typename OUT,int outputSize> class KWS;
297 
298 template<int inputSize>
299 class KWS<q15_t,inputSize,q15_t,1>: public GenericNode<q15_t,inputSize,q15_t,1>
300 {
301 public:
KWS(FIFOBase<q15_t> & src,FIFOBase<q15_t> & dst,const q15_t * coef_q15,const int coef_shift,const q15_t intercept_q15,const int intercept_shift)302     KWS(FIFOBase<q15_t> &src,FIFOBase<q15_t> &dst,
303     const q15_t* coef_q15,
304     const int coef_shift,
305     const q15_t intercept_q15,
306     const int intercept_shift):GenericNode<q15_t,inputSize,q15_t,1>(src,dst),
307     mCoef_q15(coef_q15),
308     mCoef_shift(coef_shift),
309     mIntercept_q15(intercept_q15),
310     mIntercept_shift(intercept_shift)
311     {
312 
313     };
314 
run()315     int run(){
316         #if defined(DEBUG)
317         Serial.println("==== KWS");
318         #endif
319         q15_t *a=this->getReadBuffer();
320         q15_t *b=this->getWriteBuffer();
321 
322         q63_t res;
323         arm_dot_prod_q15(this->mCoef_q15,a,inputSize,&res);
324 
325         q15_t scaled;
326         arm_shift_q15(&(this->mIntercept_q15),this->mIntercept_shift-this->mCoef_shift,&scaled,1);
327         // Because dot prod output is in Q34.30
328         // and ret is on 64 bits
329         q63_t scaled_Q30 = (q63_t)(scaled) << 15;
330 
331         res = res + scaled_Q30;
332 
333         if (res<0)
334         {
335             b[0]=-1;
336         }
337         else
338         {
339             b[0]=0;
340         }
341 
342 
343         return(0);
344     };
345 
346 const q15_t* mCoef_q15;
347 const int mCoef_shift;
348 const q15_t mIntercept_q15;
349 const int mIntercept_shift;
350 
351 };
352 #endif
353