1 /* ----------------------------------------------------------------------
2 * Project: CMSIS DSP Library
3 * Title: AppNodes.h
4 * Description: Application nodes for the C compute graph
5 *
6 * $Date: 16 March 2022
7 *
8 * Target Processor: Cortex-M and Cortex-A cores
9 * -------------------------------------------------------------------- */
10 /*
11 * Copyright (C) 2010-2022 ARM Limited or its affiliates. All rights reserved.
12 *
13 * SPDX-License-Identifier: Apache-2.0
14 *
15 * Licensed under the Apache License, Version 2.0 (the License); you may
16 * not use this file except in compliance with the License.
17 * You may obtain a copy of the License at
18 *
19 * www.apache.org/licenses/LICENSE-2.0
20 *
21 * Unless required by applicable law or agreed to in writing, software
22 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
23 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
24 * See the License for the specific language governing permissions and
25 * limitations under the License.
26 */
27
28
29 #ifndef _APPNODES_H_
30 #define _APPNODES_H_
31
32 #include <hal/nrf_pdm.h>
33 #include "coef.h"
34
35 #include <Arduino.h>
36 #include <HardwareSerial.h>
37
38 // When enabled, lots of additional trace is generated
39 //#define DEBUG
40
41 // Buffer to read samples into, each sample is 16-bits
42 // This is written by the PDM driver
43 extern short sampleBuffer[AUDIOBUFFER_LENGTH];
44
45 // Number of audio samples available in the audio buffer
46 extern volatile int samplesRead;
47
48 // Sink node. It is just printing the recognized word
49 template<typename IN, int inputSize> class Sink;
50
51 template<int inputSize>
52 class Sink<q15_t, inputSize>: public GenericSink<q15_t, inputSize>
53 {
54 public:
Sink(FIFOBase<q15_t> & src)55 Sink(FIFOBase<q15_t> &src):GenericSink<q15_t,inputSize>(src){};
56
run()57 int run()
58 {
59 #if defined(DEBUG)
60 Serial.println("==== Sink");
61 #endif
62
63 q15_t *b=this->getReadBuffer();
64
65 if (b[0]==-1)
66 {
67 Serial.println("Unknown");
68 };
69
70 if (b[0]==0)
71 {
72 Serial.println("Yes");
73 };
74
75 return(0);
76 };
77
78 };
79
80
81 // Source node. It is getting audio data from the PDM driver
82 template<typename OUT, int outputSize> class Source;
83
84 template<int outputSize>
85 class Source<q15_t,outputSize>: public GenericSource<q15_t,outputSize>
86 {
87 public:
Source(FIFOBase<q15_t> & dst)88 Source(FIFOBase<q15_t> &dst):GenericSource<q15_t,outputSize>(dst)
89 {
90
91 };
92
run()93 int run(){
94
95 #if defined(DEBUG)
96 Serial.println("==== Source");
97 #endif
98 q15_t *b=this->getWriteBuffer();
99
100 // We wait until enough samples are available.
101 // In a future version we may experiment with sleeping the board
102 while(samplesRead<outputSize)
103 {
104 #if defined(DEBUG)
105 Serial.print("Sample reads ");
106 Serial.println(samplesRead);
107 #endif
108 };
109
110 #if defined(DEBUG)
111 Serial.println("Received");
112 #endif
113
114 // We get the samples and update the
115 // sampleBuffer.
116 // Since this buffer is also accessed by the IRQ, we need to disable it
117 NVIC_DisableIRQ(PDM_IRQn);
118 memcpy(b,sampleBuffer,sizeof(q15_t)*outputSize);
119 if ((samplesRead-outputSize) > 0)
120 {
121 memmove(sampleBuffer,sampleBuffer+outputSize,sizeof(q15_t)*(samplesRead-outputSize));
122 }
123 samplesRead = samplesRead - outputSize;
124 NVIC_EnableIRQ(PDM_IRQn);
125
126 #if defined(DEBUG)
127 Serial.print("After read : Sample reads ");
128 Serial.println(samplesRead);
129 #endif
130
131
132 return(0);
133 };
134
135
136 };
137
138 template<typename IN, int inputSize,typename OUT,int outputSize> class FIR;
139
140
141 // FIR node
142 template<int inputSize>
143 class FIR<q15_t,inputSize,q15_t,inputSize>: public GenericNode<q15_t,inputSize,q15_t,inputSize>
144 {
145 public:
FIR(FIFOBase<q15_t> & src,FIFOBase<q15_t> & dst)146 FIR(FIFOBase<q15_t> &src,FIFOBase<q15_t> &dst):GenericNode<q15_t,inputSize,q15_t,inputSize>(src,dst){
147 int blockSize=inputSize;
148 int numTaps=10;
149 int stateLength = numTaps + blockSize - 1;
150
151 state=(q15_t*)malloc(stateLength * sizeof(q15_t*));
152 };
153
run()154 int run(){
155 #if defined(DEBUG)
156 Serial.println("==== FIR");
157 #endif
158 q15_t *a=this->getReadBuffer();
159 q15_t *b=this->getWriteBuffer();
160 int blockSize=inputSize;
161 int stateLength = NUMTAPS + blockSize - 1;
162
163 arm_status status=arm_fir_init_q15(&(this->firq15),NUMTAPS,fir_coefs,state,blockSize);
164
165 arm_fir_q15(&(this->firq15),a,b,blockSize);
166 return(0);
167 };
168
169 arm_fir_instance_q15 firq15;
170 q15_t *state;
171
172 };
173
174 /* Not available in the older CMSIS-DSP version provided with Arduino.
175 So we copy the definition here */
176
arm_divide_q15(q15_t numerator,q15_t denominator,q15_t * quotient,int16_t * shift)177 arm_status arm_divide_q15(q15_t numerator,
178 q15_t denominator,
179 q15_t *quotient,
180 int16_t *shift)
181 {
182 int16_t sign=0;
183 q31_t temp;
184 int16_t shiftForNormalizing;
185
186 *shift = 0;
187
188 sign = (numerator>>15) ^ (denominator>>15);
189
190 if (denominator == 0)
191 {
192 if (sign)
193 {
194 *quotient = 0x8000;
195 }
196 else
197 {
198 *quotient = 0x7FFF;
199 }
200 return(ARM_MATH_NANINF);
201 }
202
203 numerator = abs(numerator);
204 denominator = abs(denominator);
205
206 temp = ((q31_t)numerator << 15) / ((q31_t)denominator);
207
208 shiftForNormalizing= 17 - __CLZ(temp);
209 if (shiftForNormalizing > 0)
210 {
211 *shift = shiftForNormalizing;
212 temp = temp >> shiftForNormalizing;
213 }
214
215 if (sign)
216 {
217 temp = -temp;
218 }
219
220 *quotient=temp;
221
222 return(ARM_MATH_SUCCESS);
223 }
224
225
226 // We similar to the Python implementation
dsp_zcr_q15(q15_t * w,int blockSize)227 q15_t dsp_zcr_q15(q15_t *w,int blockSize)
228 {
229 q15_t m;
230 arm_mean_q15(w,blockSize,&m);
231
232 // Negate can saturate so we use CMSIS-DSP function which is working on array (and we have a scalar)
233 arm_negate_q15(&m,&m,1);
234 arm_offset_q15(w,m,w,blockSize);
235
236 int k=0;
237 for(int i=0;i<blockSize-1;i++)
238 {
239 int f = w[i];
240 int g = w[i+1];
241 if ((((f>0) && (g<0)) || ((f<0) && (g>0))) && g>f)
242 {
243 k++;
244 }
245 }
246
247
248 // k < len(f) so shift should be 0 except when k == len(f)
249 // When k==len(f) normally quotient is 0x4000 and shift 1 and we convert
250 // this to 0x7FFF
251
252 q15_t quotient;
253 int16_t shift;
254
255 arm_status status=arm_divide_q15(k,blockSize-1,"ient,&shift);
256
257 if (shift==1)
258 {
259 arm_shift_q15("ient,shift,"ient,1);
260 return(quotient);
261 }
262 else
263 {
264 return(quotient);
265 }
266 };
267
268 template<typename IN, int inputSize,typename OUT,int outputSize> class Feature;
269
270 template<int inputSize>
271 class Feature<q15_t,inputSize,q15_t,1>: public GenericNode<q15_t,inputSize,q15_t,1>
272 {
273 public:
Feature(FIFOBase<q15_t> & src,FIFOBase<q15_t> & dst,const q15_t * window)274 Feature(FIFOBase<q15_t> &src,FIFOBase<q15_t> &dst,const q15_t *window):
275 GenericNode<q15_t,inputSize,q15_t,1>(src,dst),mWindow(window){
276 };
277
run()278 int run(){
279 #if defined(DEBUG)
280 Serial.println("==== Feature");
281 #endif
282 q15_t *a=this->getReadBuffer();
283 q15_t *b=this->getWriteBuffer();
284
285 arm_mult_q15(a,this->mWindow,a,inputSize);
286
287 b[0] = dsp_zcr_q15(a,inputSize);
288
289 return(0);
290 };
291
292 const q15_t* mWindow;
293
294 };
295
296 template<typename IN, int inputSize,typename OUT,int outputSize> class KWS;
297
298 template<int inputSize>
299 class KWS<q15_t,inputSize,q15_t,1>: public GenericNode<q15_t,inputSize,q15_t,1>
300 {
301 public:
KWS(FIFOBase<q15_t> & src,FIFOBase<q15_t> & dst,const q15_t * coef_q15,const int coef_shift,const q15_t intercept_q15,const int intercept_shift)302 KWS(FIFOBase<q15_t> &src,FIFOBase<q15_t> &dst,
303 const q15_t* coef_q15,
304 const int coef_shift,
305 const q15_t intercept_q15,
306 const int intercept_shift):GenericNode<q15_t,inputSize,q15_t,1>(src,dst),
307 mCoef_q15(coef_q15),
308 mCoef_shift(coef_shift),
309 mIntercept_q15(intercept_q15),
310 mIntercept_shift(intercept_shift)
311 {
312
313 };
314
run()315 int run(){
316 #if defined(DEBUG)
317 Serial.println("==== KWS");
318 #endif
319 q15_t *a=this->getReadBuffer();
320 q15_t *b=this->getWriteBuffer();
321
322 q63_t res;
323 arm_dot_prod_q15(this->mCoef_q15,a,inputSize,&res);
324
325 q15_t scaled;
326 arm_shift_q15(&(this->mIntercept_q15),this->mIntercept_shift-this->mCoef_shift,&scaled,1);
327 // Because dot prod output is in Q34.30
328 // and ret is on 64 bits
329 q63_t scaled_Q30 = (q63_t)(scaled) << 15;
330
331 res = res + scaled_Q30;
332
333 if (res<0)
334 {
335 b[0]=-1;
336 }
337 else
338 {
339 b[0]=0;
340 }
341
342
343 return(0);
344 };
345
346 const q15_t* mCoef_q15;
347 const int mCoef_shift;
348 const q15_t mIntercept_q15;
349 const int mIntercept_shift;
350
351 };
352 #endif
353