1 /* ----------------------------------------------------------------------
2 * Copyright (C) 2010-2014 ARM Limited. All rights reserved.
3 *
4 * $Date:        12. March 2014
5 * $Revision: 	V1.4.4
6 *
7 * Project: 	    CMSIS DSP Library
8 * Title:	    arm_math.h
9 *
10 * Description:	Public header file for CMSIS DSP Library
11 *
12 * Target Processor: Cortex-M7/Cortex-M4/Cortex-M3/Cortex-M0
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions
16 * are met:
17 *   - Redistributions of source code must retain the above copyright
18 *     notice, this list of conditions and the following disclaimer.
19 *   - Redistributions in binary form must reproduce the above copyright
20 *     notice, this list of conditions and the following disclaimer in
21 *     the documentation and/or other materials provided with the
22 *     distribution.
23 *   - Neither the name of ARM LIMITED nor the names of its contributors
24 *     may be used to endorse or promote products derived from this
25 *     software without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
29 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
30 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
31 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
32 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
33 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
34 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
35 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
37 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38 * POSSIBILITY OF SUCH DAMAGE.
39  * -------------------------------------------------------------------- */
40 
41 /**
42    \mainpage CMSIS DSP Software Library
43    *
44    * Introduction
45    * ------------
46    *
47    * This user manual describes the CMSIS DSP software library,
48    * a suite of common signal processing functions for use on Cortex-M processor based devices.
49    *
50    * The library is divided into a number of functions each covering a specific category:
51    * - Basic math functions
52    * - Fast math functions
53    * - Complex math functions
54    * - Filters
55    * - Matrix functions
56    * - Transforms
57    * - Motor control functions
58    * - Statistical functions
59    * - Support functions
60    * - Interpolation functions
61    *
62    * The library has separate functions for operating on 8-bit integers, 16-bit integers,
63    * 32-bit integer and 32-bit floating-point values.
64    *
65    * Using the Library
66    * ------------
67    *
68    * The library installer contains prebuilt versions of the libraries in the <code>Lib</code> folder.
69    * - arm_cortexM7lfdp_math.lib (Little endian and Double Precision Floating Point Unit on Cortex-M7)
70    * - arm_cortexM7bfdp_math.lib (Big endian and Double Precision Floating Point Unit on Cortex-M7)
71    * - arm_cortexM7lfsp_math.lib (Little endian and Single Precision Floating Point Unit on Cortex-M7)
72    * - arm_cortexM7bfsp_math.lib (Big endian and Single Precision Floating Point Unit on Cortex-M7)
73    * - arm_cortexM7l_math.lib (Little endian on Cortex-M7)
74    * - arm_cortexM7b_math.lib (Big endian on Cortex-M7)
75    * - arm_cortexM4lf_math.lib (Little endian and Floating Point Unit on Cortex-M4)
76    * - arm_cortexM4bf_math.lib (Big endian and Floating Point Unit on Cortex-M4)
77    * - arm_cortexM4l_math.lib (Little endian on Cortex-M4)
78    * - arm_cortexM4b_math.lib (Big endian on Cortex-M4)
79    * - arm_cortexM3l_math.lib (Little endian on Cortex-M3)
80    * - arm_cortexM3b_math.lib (Big endian on Cortex-M3)
81    * - arm_cortexM0l_math.lib (Little endian on Cortex-M0 / CortexM0+)
82    * - arm_cortexM0b_math.lib (Big endian on Cortex-M0 / CortexM0+)
83    *
84    * The library functions are declared in the public file <code>arm_math.h</code> which is placed in the <code>Include</code> folder.
85    * Simply include this file and link the appropriate library in the application and begin calling the library functions. The Library supports single
86    * public header file <code> arm_math.h</code> for Cortex-M7/M4/M3/M0/M0+ with little endian and big endian. Same header file will be used for floating point unit(FPU) variants.
87    * Define the appropriate pre processor MACRO ARM_MATH_CM7 or ARM_MATH_CM4 or  ARM_MATH_CM3 or
88    * ARM_MATH_CM0 or ARM_MATH_CM0PLUS depending on the target processor in the application.
89    *
90    * Examples
91    * --------
92    *
93    * The library ships with a number of examples which demonstrate how to use the library functions.
94    *
95    * Toolchain Support
96    * ------------
97    *
98    * The library has been developed and tested with MDK-ARM version 4.60.
99    * The library is being tested in GCC and IAR toolchains and updates on this activity will be made available shortly.
100    *
101    * Building the Library
102    * ------------
103    *
104    * The library installer contains a project file to re build libraries on MDK-ARM Tool chain in the <code>CMSIS\\DSP_Lib\\Source\\ARM</code> folder.
105    * - arm_cortexM_math.uvproj
106    *
107    *
108    * The libraries can be built by opening the arm_cortexM_math.uvproj project in MDK-ARM, selecting a specific target, and defining the optional pre processor MACROs detailed above.
109    *
110    * Pre-processor Macros
111    * ------------
112    *
113    * Each library project have differant pre-processor macros.
114    *
115    * - UNALIGNED_SUPPORT_DISABLE:
116    *
117    * Define macro UNALIGNED_SUPPORT_DISABLE, If the silicon does not support unaligned memory access
118    *
119    * - ARM_MATH_BIG_ENDIAN:
120    *
121    * Define macro ARM_MATH_BIG_ENDIAN to build the library for big endian targets. By default library builds for little endian targets.
122    *
123    * - ARM_MATH_MATRIX_CHECK:
124    *
125    * Define macro ARM_MATH_MATRIX_CHECK for checking on the input and output sizes of matrices
126    *
127    * - ARM_MATH_ROUNDING:
128    *
129    * Define macro ARM_MATH_ROUNDING for rounding on support functions
130    *
131    * - ARM_MATH_CMx:
132    *
133    * Define macro ARM_MATH_CM4 for building the library on Cortex-M4 target, ARM_MATH_CM3 for building library on Cortex-M3 target
134    * and ARM_MATH_CM0 for building library on cortex-M0 target, ARM_MATH_CM0PLUS for building library on cortex-M0+ target.
135    *
136    * - __FPU_PRESENT:
137    *
138    * Initialize macro __FPU_PRESENT = 1 when building on FPU supported Targets. Enable this macro for M4bf and M4lf libraries
139    *
140    * <hr>
141    * CMSIS-DSP in ARM::CMSIS Pack
142    * -----------------------------
143    *
144    * The following files relevant to CMSIS-DSP are present in the <b>ARM::CMSIS</b> Pack directories:
145    * |File/Folder                   |Content                                                                 |
146    * |------------------------------|------------------------------------------------------------------------|
147    * |\b CMSIS\\Documentation\\DSP  | This documentation                                                     |
148    * |\b CMSIS\\DSP_Lib             | Software license agreement (license.txt)                               |
149    * |\b CMSIS\\DSP_Lib\\Examples   | Example projects demonstrating the usage of the library functions      |
150    * |\b CMSIS\\DSP_Lib\\Source     | Source files for rebuilding the library                                |
151    *
152    * <hr>
153    * Revision History of CMSIS-DSP
154    * ------------
155    * Please refer to \ref ChangeLog_pg.
156    *
157    * Copyright Notice
158    * ------------
159    *
160    * Copyright (C) 2010-2014 ARM Limited. All rights reserved.
161    */
162 
163 
164 /**
165  * @defgroup groupMath Basic Math Functions
166  */
167 
168 /**
169  * @defgroup groupFastMath Fast Math Functions
170  * This set of functions provides a fast approximation to sine, cosine, and square root.
171  * As compared to most of the other functions in the CMSIS math library, the fast math functions
172  * operate on individual values and not arrays.
173  * There are separate functions for Q15, Q31, and floating-point data.
174  *
175  */
176 
177 /**
178  * @defgroup groupCmplxMath Complex Math Functions
179  * This set of functions operates on complex data vectors.
180  * The data in the complex arrays is stored in an interleaved fashion
181  * (real, imag, real, imag, ...).
182  * In the API functions, the number of samples in a complex array refers
183  * to the number of complex values; the array contains twice this number of
184  * real values.
185  */
186 
187 /**
188  * @defgroup groupFilters Filtering Functions
189  */
190 
191 /**
192  * @defgroup groupMatrix Matrix Functions
193  *
194  * This set of functions provides basic matrix math operations.
195  * The functions operate on matrix data structures.  For example,
196  * the type
197  * definition for the floating-point matrix structure is shown
198  * below:
199  * <pre>
200  *     typedef struct
201  *     {
202  *       uint16_t numRows;     // number of rows of the matrix.
203  *       uint16_t numCols;     // number of columns of the matrix.
204  *       float32_t *pData;     // points to the data of the matrix.
205  *     } arm_matrix_instance_f32;
206  * </pre>
207  * There are similar definitions for Q15 and Q31 data types.
208  *
209  * The structure specifies the size of the matrix and then points to
210  * an array of data.  The array is of size <code>numRows X numCols</code>
211  * and the values are arranged in row order.  That is, the
212  * matrix element (i, j) is stored at:
213  * <pre>
214  *     pData[i*numCols + j]
215  * </pre>
216  *
217  * \par Init Functions
218  * There is an associated initialization function for each type of matrix
219  * data structure.
220  * The initialization function sets the values of the internal structure fields.
221  * Refer to the function <code>arm_mat_init_f32()</code>, <code>arm_mat_init_q31()</code>
222  * and <code>arm_mat_init_q15()</code> for floating-point, Q31 and Q15 types,  respectively.
223  *
224  * \par
225  * Use of the initialization function is optional. However, if initialization function is used
226  * then the instance structure cannot be placed into a const data section.
227  * To place the instance structure in a const data
228  * section, manually initialize the data structure.  For example:
229  * <pre>
230  * <code>arm_matrix_instance_f32 S = {nRows, nColumns, pData};</code>
231  * <code>arm_matrix_instance_q31 S = {nRows, nColumns, pData};</code>
232  * <code>arm_matrix_instance_q15 S = {nRows, nColumns, pData};</code>
233  * </pre>
234  * where <code>nRows</code> specifies the number of rows, <code>nColumns</code>
235  * specifies the number of columns, and <code>pData</code> points to the
236  * data array.
237  *
238  * \par Size Checking
239  * By default all of the matrix functions perform size checking on the input and
240  * output matrices.  For example, the matrix addition function verifies that the
241  * two input matrices and the output matrix all have the same number of rows and
242  * columns.  If the size check fails the functions return:
243  * <pre>
244  *     ARM_MATH_SIZE_MISMATCH
245  * </pre>
246  * Otherwise the functions return
247  * <pre>
248  *     ARM_MATH_SUCCESS
249  * </pre>
250  * There is some overhead associated with this matrix size checking.
251  * The matrix size checking is enabled via the \#define
252  * <pre>
253  *     ARM_MATH_MATRIX_CHECK
254  * </pre>
255  * within the library project settings.  By default this macro is defined
256  * and size checking is enabled.  By changing the project settings and
257  * undefining this macro size checking is eliminated and the functions
258  * run a bit faster.  With size checking disabled the functions always
259  * return <code>ARM_MATH_SUCCESS</code>.
260  */
261 
262 /**
263  * @defgroup groupTransforms Transform Functions
264  */
265 
266 /**
267  * @defgroup groupController Controller Functions
268  */
269 
270 /**
271  * @defgroup groupStats Statistics Functions
272  */
273 /**
274  * @defgroup groupSupport Support Functions
275  */
276 
277 /**
278  * @defgroup groupInterpolation Interpolation Functions
279  * These functions perform 1- and 2-dimensional interpolation of data.
280  * Linear interpolation is used for 1-dimensional data and
281  * bilinear interpolation is used for 2-dimensional data.
282  */
283 
284 /**
285  * @defgroup groupExamples Examples
286  */
287 #ifndef _ARM_MATH_H
288 #define _ARM_MATH_H
289 
290 #define __CMSIS_GENERIC         /* disable NVIC and Systick functions */
291 
292 #if defined(ARM_MATH_CM7)
293   #include "core_cm7.h"
294 #elif defined (ARM_MATH_CM4)
295   #include "core_cm4.h"
296 #elif defined (ARM_MATH_CM3)
297   #include "core_cm3.h"
298 #elif defined (ARM_MATH_CM0)
299   #include "core_cm0.h"
300 #define ARM_MATH_CM0_FAMILY
301   #elif defined (ARM_MATH_CM0PLUS)
302 #include "core_cm0plus.h"
303   #define ARM_MATH_CM0_FAMILY
304 #else
305   #error "Define according the used Cortex core ARM_MATH_CM7, ARM_MATH_CM4, ARM_MATH_CM3, ARM_MATH_CM0PLUS or ARM_MATH_CM0"
306 #endif
307 
308 #undef  __CMSIS_GENERIC         /* enable NVIC and Systick functions */
309 #include "string.h"
310 #include "math.h"
311 #ifdef	__cplusplus
312 extern "C"
313 {
314 #endif
315 
316 
317   /**
318    * @brief Macros required for reciprocal calculation in Normalized LMS
319    */
320 
321 #define DELTA_Q31 			(0x100)
322 #define DELTA_Q15 			0x5
323 #define INDEX_MASK 			0x0000003F
324 #ifndef PI
325 #define PI					3.14159265358979f
326 #endif
327 
328   /**
329    * @brief Macros required for SINE and COSINE Fast math approximations
330    */
331 
332 #define FAST_MATH_TABLE_SIZE  512
333 #define FAST_MATH_Q31_SHIFT   (32 - 10)
334 #define FAST_MATH_Q15_SHIFT   (16 - 10)
335 #define CONTROLLER_Q31_SHIFT  (32 - 9)
336 #define TABLE_SIZE  256
337 #define TABLE_SPACING_Q31	   0x400000
338 #define TABLE_SPACING_Q15	   0x80
339 
340   /**
341    * @brief Macros required for SINE and COSINE Controller functions
342    */
343   /* 1.31(q31) Fixed value of 2/360 */
344   /* -1 to +1 is divided into 360 values so total spacing is (2/360) */
345 #define INPUT_SPACING			0xB60B61
346 
347   /**
348    * @brief Macro for Unaligned Support
349    */
350 #ifndef UNALIGNED_SUPPORT_DISABLE
351     #define ALIGN4
352 #else
353   #if defined  (__GNUC__)
354     #define ALIGN4 __attribute__((aligned(4)))
355   #else
356     #define ALIGN4 __align(4)
357   #endif
358 #endif	/*	#ifndef UNALIGNED_SUPPORT_DISABLE	*/
359 
360   /**
361    * @brief Error status returned by some functions in the library.
362    */
363 
364   typedef enum
365   {
366     ARM_MATH_SUCCESS = 0,                /**< No error */
367     ARM_MATH_ARGUMENT_ERROR = -1,        /**< One or more arguments are incorrect */
368     ARM_MATH_LENGTH_ERROR = -2,          /**< Length of data buffer is incorrect */
369     ARM_MATH_SIZE_MISMATCH = -3,         /**< Size of matrices is not compatible with the operation. */
370     ARM_MATH_NANINF = -4,                /**< Not-a-number (NaN) or infinity is generated */
371     ARM_MATH_SINGULAR = -5,              /**< Generated by matrix inversion if the input matrix is singular and cannot be inverted. */
372     ARM_MATH_TEST_FAILURE = -6           /**< Test Failed  */
373   } arm_status;
374 
375   /**
376    * @brief 8-bit fractional data type in 1.7 format.
377    */
378   typedef int8_t q7_t;
379 
380   /**
381    * @brief 16-bit fractional data type in 1.15 format.
382    */
383   typedef int16_t q15_t;
384 
385   /**
386    * @brief 32-bit fractional data type in 1.31 format.
387    */
388   typedef int32_t q31_t;
389 
390   /**
391    * @brief 64-bit fractional data type in 1.63 format.
392    */
393   typedef int64_t q63_t;
394 
395   /**
396    * @brief 32-bit floating-point type definition.
397    */
398   typedef float float32_t;
399 
400   /**
401    * @brief 64-bit floating-point type definition.
402    */
403   typedef double float64_t;
404 
405   /**
406    * @brief definition to read/write two 16 bit values.
407    */
408 #if defined __CC_ARM
409 #define __SIMD32_TYPE int32_t __packed
410 #define CMSIS_UNUSED __attribute__((unused))
411 #elif defined __ICCARM__
412 #define CMSIS_UNUSED
413 #define __SIMD32_TYPE int32_t __packed
414 #elif defined __GNUC__
415 #define __SIMD32_TYPE int32_t
416 #define CMSIS_UNUSED __attribute__((unused))
417 #elif defined __CSMC__			/* Cosmic */
418 #define CMSIS_UNUSED
419 #define __SIMD32_TYPE int32_t
420 #else
421 #error Unknown compiler
422 #endif
423 
424 #define __SIMD32(addr)  (*(__SIMD32_TYPE **) & (addr))
425 #define __SIMD32_CONST(addr)  ((__SIMD32_TYPE *)(addr))
426 
427 #define _SIMD32_OFFSET(addr)  (*(__SIMD32_TYPE *)  (addr))
428 
429 #define __SIMD64(addr)  (*(int64_t **) & (addr))
430 
431 #if defined (ARM_MATH_CM3) || defined (ARM_MATH_CM0_FAMILY)
432   /**
433    * @brief definition to pack two 16 bit values.
434    */
435 #define __PKHBT(ARG1, ARG2, ARG3)      ( (((int32_t)(ARG1) <<  0) & (int32_t)0x0000FFFF) | \
436                                          (((int32_t)(ARG2) << ARG3) & (int32_t)0xFFFF0000)  )
437 #define __PKHTB(ARG1, ARG2, ARG3)      ( (((int32_t)(ARG1) <<  0) & (int32_t)0xFFFF0000) | \
438                                          (((int32_t)(ARG2) >> ARG3) & (int32_t)0x0000FFFF)  )
439 
440 #endif
441 
442 
443    /**
444    * @brief definition to pack four 8 bit values.
445    */
446 #ifndef ARM_MATH_BIG_ENDIAN
447 
448 #define __PACKq7(v0,v1,v2,v3) ( (((int32_t)(v0) <<  0) & (int32_t)0x000000FF) |	\
449                                 (((int32_t)(v1) <<  8) & (int32_t)0x0000FF00) |	\
450 							    (((int32_t)(v2) << 16) & (int32_t)0x00FF0000) |	\
451 							    (((int32_t)(v3) << 24) & (int32_t)0xFF000000)  )
452 #else
453 
454 #define __PACKq7(v0,v1,v2,v3) ( (((int32_t)(v3) <<  0) & (int32_t)0x000000FF) |	\
455                                 (((int32_t)(v2) <<  8) & (int32_t)0x0000FF00) |	\
456 							    (((int32_t)(v1) << 16) & (int32_t)0x00FF0000) |	\
457 							    (((int32_t)(v0) << 24) & (int32_t)0xFF000000)  )
458 
459 #endif
460 
461 
462   /**
463    * @brief Clips Q63 to Q31 values.
464    */
clip_q63_to_q31(q63_t x)465   static __INLINE q31_t clip_q63_to_q31(
466   q63_t x)
467   {
468     return ((q31_t) (x >> 32) != ((q31_t) x >> 31)) ?
469       ((0x7FFFFFFF ^ ((q31_t) (x >> 63)))) : (q31_t) x;
470   }
471 
472   /**
473    * @brief Clips Q63 to Q15 values.
474    */
clip_q63_to_q15(q63_t x)475   static __INLINE q15_t clip_q63_to_q15(
476   q63_t x)
477   {
478     return ((q31_t) (x >> 32) != ((q31_t) x >> 31)) ?
479       ((0x7FFF ^ ((q15_t) (x >> 63)))) : (q15_t) (x >> 15);
480   }
481 
482   /**
483    * @brief Clips Q31 to Q7 values.
484    */
clip_q31_to_q7(q31_t x)485   static __INLINE q7_t clip_q31_to_q7(
486   q31_t x)
487   {
488     return ((q31_t) (x >> 24) != ((q31_t) x >> 23)) ?
489       ((0x7F ^ ((q7_t) (x >> 31)))) : (q7_t) x;
490   }
491 
492   /**
493    * @brief Clips Q31 to Q15 values.
494    */
clip_q31_to_q15(q31_t x)495   static __INLINE q15_t clip_q31_to_q15(
496   q31_t x)
497   {
498     return ((q31_t) (x >> 16) != ((q31_t) x >> 15)) ?
499       ((0x7FFF ^ ((q15_t) (x >> 31)))) : (q15_t) x;
500   }
501 
502   /**
503    * @brief Multiplies 32 X 64 and returns 32 bit result in 2.30 format.
504    */
505 
mult32x64(q63_t x,q31_t y)506   static __INLINE q63_t mult32x64(
507   q63_t x,
508   q31_t y)
509   {
510     return ((((q63_t) (x & 0x00000000FFFFFFFF) * y) >> 32) +
511             (((q63_t) (x >> 32) * y)));
512   }
513 
514 
515 #if defined (ARM_MATH_CM0_FAMILY) && defined ( __CC_ARM   )
516 #define __CLZ __clz
517 #endif
518 
519 #if defined (ARM_MATH_CM0_FAMILY) && ((defined (__ICCARM__)) ||(defined (__GNUC__)) || defined (__TASKING__) )
520 
521   static __INLINE uint32_t __CLZ(
522   q31_t data);
523 
524 
__CLZ(q31_t data)525   static __INLINE uint32_t __CLZ(
526   q31_t data)
527   {
528     uint32_t count = 0;
529     uint32_t mask = 0x80000000;
530 
531     while((data & mask) == 0)
532     {
533       count += 1u;
534       mask = mask >> 1u;
535     }
536 
537     return (count);
538 
539   }
540 
541 #endif
542 
543   /**
544    * @brief Function to Calculates 1/in (reciprocal) value of Q31 Data type.
545    */
546 
arm_recip_q31(q31_t in,q31_t * dst,q31_t * pRecipTable)547   static __INLINE uint32_t arm_recip_q31(
548   q31_t in,
549   q31_t * dst,
550   q31_t * pRecipTable)
551   {
552 
553     uint32_t out, tempVal;
554     uint32_t index, i;
555     uint32_t signBits;
556 
557     if(in > 0)
558     {
559       signBits = __CLZ(in) - 1;
560     }
561     else
562     {
563       signBits = __CLZ(-in) - 1;
564     }
565 
566     /* Convert input sample to 1.31 format */
567     in = in << signBits;
568 
569     /* calculation of index for initial approximated Val */
570     index = (uint32_t) (in >> 24u);
571     index = (index & INDEX_MASK);
572 
573     /* 1.31 with exp 1 */
574     out = pRecipTable[index];
575 
576     /* calculation of reciprocal value */
577     /* running approximation for two iterations */
578     for (i = 0u; i < 2u; i++)
579     {
580       tempVal = (q31_t) (((q63_t) in * out) >> 31u);
581       tempVal = 0x7FFFFFFF - tempVal;
582       /*      1.31 with exp 1 */
583       //out = (q31_t) (((q63_t) out * tempVal) >> 30u);
584       out = (q31_t) clip_q63_to_q31(((q63_t) out * tempVal) >> 30u);
585     }
586 
587     /* write output */
588     *dst = out;
589 
590     /* return num of signbits of out = 1/in value */
591     return (signBits + 1u);
592 
593   }
594 
595   /**
596    * @brief Function to Calculates 1/in (reciprocal) value of Q15 Data type.
597    */
arm_recip_q15(q15_t in,q15_t * dst,q15_t * pRecipTable)598   static __INLINE uint32_t arm_recip_q15(
599   q15_t in,
600   q15_t * dst,
601   q15_t * pRecipTable)
602   {
603 
604     uint32_t out = 0, tempVal = 0;
605     uint32_t index = 0, i = 0;
606     uint32_t signBits = 0;
607 
608     if(in > 0)
609     {
610       signBits = __CLZ(in) - 17;
611     }
612     else
613     {
614       signBits = __CLZ(-in) - 17;
615     }
616 
617     /* Convert input sample to 1.15 format */
618     in = in << signBits;
619 
620     /* calculation of index for initial approximated Val */
621     index = in >> 8;
622     index = (index & INDEX_MASK);
623 
624     /*      1.15 with exp 1  */
625     out = pRecipTable[index];
626 
627     /* calculation of reciprocal value */
628     /* running approximation for two iterations */
629     for (i = 0; i < 2; i++)
630     {
631       tempVal = (q15_t) (((q31_t) in * out) >> 15);
632       tempVal = 0x7FFF - tempVal;
633       /*      1.15 with exp 1 */
634       out = (q15_t) (((q31_t) out * tempVal) >> 14);
635     }
636 
637     /* write output */
638     *dst = out;
639 
640     /* return num of signbits of out = 1/in value */
641     return (signBits + 1);
642 
643   }
644 
645 
646   /*
647    * @brief C custom defined intrinisic function for only M0 processors
648    */
649 #if defined(ARM_MATH_CM0_FAMILY)
650 
__SSAT(q31_t x,uint32_t y)651   static __INLINE q31_t __SSAT(
652   q31_t x,
653   uint32_t y)
654   {
655     int32_t posMax, negMin;
656     uint32_t i;
657 
658     posMax = 1;
659     for (i = 0; i < (y - 1); i++)
660     {
661       posMax = posMax * 2;
662     }
663 
664     if(x > 0)
665     {
666       posMax = (posMax - 1);
667 
668       if(x > posMax)
669       {
670         x = posMax;
671       }
672     }
673     else
674     {
675       negMin = -posMax;
676 
677       if(x < negMin)
678       {
679         x = negMin;
680       }
681     }
682     return (x);
683 
684 
685   }
686 
687 #endif /* end of ARM_MATH_CM0_FAMILY */
688 
689 
690 
691   /*
692    * @brief C custom defined intrinsic function for M3 and M0 processors
693    */
694 #if defined (ARM_MATH_CM3) || defined (ARM_MATH_CM0_FAMILY)
695 
696   /*
697    * @brief C custom defined QADD8 for M3 and M0 processors
698    */
__QADD8(q31_t x,q31_t y)699   static __INLINE q31_t __QADD8(
700   q31_t x,
701   q31_t y)
702   {
703 
704     q31_t sum;
705     q7_t r, s, t, u;
706 
707     r = (q7_t) x;
708     s = (q7_t) y;
709 
710     r = __SSAT((q31_t) (r + s), 8);
711     s = __SSAT(((q31_t) (((x << 16) >> 24) + ((y << 16) >> 24))), 8);
712     t = __SSAT(((q31_t) (((x << 8) >> 24) + ((y << 8) >> 24))), 8);
713     u = __SSAT(((q31_t) ((x >> 24) + (y >> 24))), 8);
714 
715     sum =
716       (((q31_t) u << 24) & 0xFF000000) | (((q31_t) t << 16) & 0x00FF0000) |
717       (((q31_t) s << 8) & 0x0000FF00) | (r & 0x000000FF);
718 
719     return sum;
720 
721   }
722 
723   /*
724    * @brief C custom defined QSUB8 for M3 and M0 processors
725    */
__QSUB8(q31_t x,q31_t y)726   static __INLINE q31_t __QSUB8(
727   q31_t x,
728   q31_t y)
729   {
730 
731     q31_t sum;
732     q31_t r, s, t, u;
733 
734     r = (q7_t) x;
735     s = (q7_t) y;
736 
737     r = __SSAT((r - s), 8);
738     s = __SSAT(((q31_t) (((x << 16) >> 24) - ((y << 16) >> 24))), 8) << 8;
739     t = __SSAT(((q31_t) (((x << 8) >> 24) - ((y << 8) >> 24))), 8) << 16;
740     u = __SSAT(((q31_t) ((x >> 24) - (y >> 24))), 8) << 24;
741 
742     sum =
743       (u & 0xFF000000) | (t & 0x00FF0000) | (s & 0x0000FF00) | (r &
744                                                                 0x000000FF);
745 
746     return sum;
747   }
748 
749   /*
750    * @brief C custom defined QADD16 for M3 and M0 processors
751    */
752 
753   /*
754    * @brief C custom defined QADD16 for M3 and M0 processors
755    */
__QADD16(q31_t x,q31_t y)756   static __INLINE q31_t __QADD16(
757   q31_t x,
758   q31_t y)
759   {
760 
761     q31_t sum;
762     q31_t r, s;
763 
764     r = (q15_t) x;
765     s = (q15_t) y;
766 
767     r = __SSAT(r + s, 16);
768     s = __SSAT(((q31_t) ((x >> 16) + (y >> 16))), 16) << 16;
769 
770     sum = (s & 0xFFFF0000) | (r & 0x0000FFFF);
771 
772     return sum;
773 
774   }
775 
776   /*
777    * @brief C custom defined SHADD16 for M3 and M0 processors
778    */
__SHADD16(q31_t x,q31_t y)779   static __INLINE q31_t __SHADD16(
780   q31_t x,
781   q31_t y)
782   {
783 
784     q31_t sum;
785     q31_t r, s;
786 
787     r = (q15_t) x;
788     s = (q15_t) y;
789 
790     r = ((r >> 1) + (s >> 1));
791     s = ((q31_t) ((x >> 17) + (y >> 17))) << 16;
792 
793     sum = (s & 0xFFFF0000) | (r & 0x0000FFFF);
794 
795     return sum;
796 
797   }
798 
799   /*
800    * @brief C custom defined QSUB16 for M3 and M0 processors
801    */
__QSUB16(q31_t x,q31_t y)802   static __INLINE q31_t __QSUB16(
803   q31_t x,
804   q31_t y)
805   {
806 
807     q31_t sum;
808     q31_t r, s;
809 
810     r = (q15_t) x;
811     s = (q15_t) y;
812 
813     r = __SSAT(r - s, 16);
814     s = __SSAT(((q31_t) ((x >> 16) - (y >> 16))), 16) << 16;
815 
816     sum = (s & 0xFFFF0000) | (r & 0x0000FFFF);
817 
818     return sum;
819   }
820 
821   /*
822    * @brief C custom defined SHSUB16 for M3 and M0 processors
823    */
__SHSUB16(q31_t x,q31_t y)824   static __INLINE q31_t __SHSUB16(
825   q31_t x,
826   q31_t y)
827   {
828 
829     q31_t diff;
830     q31_t r, s;
831 
832     r = (q15_t) x;
833     s = (q15_t) y;
834 
835     r = ((r >> 1) - (s >> 1));
836     s = (((x >> 17) - (y >> 17)) << 16);
837 
838     diff = (s & 0xFFFF0000) | (r & 0x0000FFFF);
839 
840     return diff;
841   }
842 
843   /*
844    * @brief C custom defined QASX for M3 and M0 processors
845    */
__QASX(q31_t x,q31_t y)846   static __INLINE q31_t __QASX(
847   q31_t x,
848   q31_t y)
849   {
850 
851     q31_t sum = 0;
852 
853     sum =
854       ((sum +
855         clip_q31_to_q15((q31_t) ((q15_t) (x >> 16) + (q15_t) y))) << 16) +
856       clip_q31_to_q15((q31_t) ((q15_t) x - (q15_t) (y >> 16)));
857 
858     return sum;
859   }
860 
861   /*
862    * @brief C custom defined SHASX for M3 and M0 processors
863    */
__SHASX(q31_t x,q31_t y)864   static __INLINE q31_t __SHASX(
865   q31_t x,
866   q31_t y)
867   {
868 
869     q31_t sum;
870     q31_t r, s;
871 
872     r = (q15_t) x;
873     s = (q15_t) y;
874 
875     r = ((r >> 1) - (y >> 17));
876     s = (((x >> 17) + (s >> 1)) << 16);
877 
878     sum = (s & 0xFFFF0000) | (r & 0x0000FFFF);
879 
880     return sum;
881   }
882 
883 
884   /*
885    * @brief C custom defined QSAX for M3 and M0 processors
886    */
__QSAX(q31_t x,q31_t y)887   static __INLINE q31_t __QSAX(
888   q31_t x,
889   q31_t y)
890   {
891 
892     q31_t sum = 0;
893 
894     sum =
895       ((sum +
896         clip_q31_to_q15((q31_t) ((q15_t) (x >> 16) - (q15_t) y))) << 16) +
897       clip_q31_to_q15((q31_t) ((q15_t) x + (q15_t) (y >> 16)));
898 
899     return sum;
900   }
901 
902   /*
903    * @brief C custom defined SHSAX for M3 and M0 processors
904    */
__SHSAX(q31_t x,q31_t y)905   static __INLINE q31_t __SHSAX(
906   q31_t x,
907   q31_t y)
908   {
909 
910     q31_t sum;
911     q31_t r, s;
912 
913     r = (q15_t) x;
914     s = (q15_t) y;
915 
916     r = ((r >> 1) + (y >> 17));
917     s = (((x >> 17) - (s >> 1)) << 16);
918 
919     sum = (s & 0xFFFF0000) | (r & 0x0000FFFF);
920 
921     return sum;
922   }
923 
924   /*
925    * @brief C custom defined SMUSDX for M3 and M0 processors
926    */
__SMUSDX(q31_t x,q31_t y)927   static __INLINE q31_t __SMUSDX(
928   q31_t x,
929   q31_t y)
930   {
931 
932     return ((q31_t) (((q15_t) x * (q15_t) (y >> 16)) -
933                      ((q15_t) (x >> 16) * (q15_t) y)));
934   }
935 
936   /*
937    * @brief C custom defined SMUADX for M3 and M0 processors
938    */
__SMUADX(q31_t x,q31_t y)939   static __INLINE q31_t __SMUADX(
940   q31_t x,
941   q31_t y)
942   {
943 
944     return ((q31_t) (((q15_t) x * (q15_t) (y >> 16)) +
945                      ((q15_t) (x >> 16) * (q15_t) y)));
946   }
947 
948   /*
949    * @brief C custom defined QADD for M3 and M0 processors
950    */
__QADD(q31_t x,q31_t y)951   static __INLINE q31_t __QADD(
952   q31_t x,
953   q31_t y)
954   {
955     return clip_q63_to_q31((q63_t) x + y);
956   }
957 
958   /*
959    * @brief C custom defined QSUB for M3 and M0 processors
960    */
__QSUB(q31_t x,q31_t y)961   static __INLINE q31_t __QSUB(
962   q31_t x,
963   q31_t y)
964   {
965     return clip_q63_to_q31((q63_t) x - y);
966   }
967 
968   /*
969    * @brief C custom defined SMLAD for M3 and M0 processors
970    */
__SMLAD(q31_t x,q31_t y,q31_t sum)971   static __INLINE q31_t __SMLAD(
972   q31_t x,
973   q31_t y,
974   q31_t sum)
975   {
976 
977     return (sum + ((q15_t) (x >> 16) * (q15_t) (y >> 16)) +
978             ((q15_t) x * (q15_t) y));
979   }
980 
981   /*
982    * @brief C custom defined SMLADX for M3 and M0 processors
983    */
__SMLADX(q31_t x,q31_t y,q31_t sum)984   static __INLINE q31_t __SMLADX(
985   q31_t x,
986   q31_t y,
987   q31_t sum)
988   {
989 
990     return (sum + ((q15_t) (x >> 16) * (q15_t) (y)) +
991             ((q15_t) x * (q15_t) (y >> 16)));
992   }
993 
994   /*
995    * @brief C custom defined SMLSDX for M3 and M0 processors
996    */
__SMLSDX(q31_t x,q31_t y,q31_t sum)997   static __INLINE q31_t __SMLSDX(
998   q31_t x,
999   q31_t y,
1000   q31_t sum)
1001   {
1002 
1003     return (sum - ((q15_t) (x >> 16) * (q15_t) (y)) +
1004             ((q15_t) x * (q15_t) (y >> 16)));
1005   }
1006 
1007   /*
1008    * @brief C custom defined SMLALD for M3 and M0 processors
1009    */
__SMLALD(q31_t x,q31_t y,q63_t sum)1010   static __INLINE q63_t __SMLALD(
1011   q31_t x,
1012   q31_t y,
1013   q63_t sum)
1014   {
1015 
1016     return (sum + ((q15_t) (x >> 16) * (q15_t) (y >> 16)) +
1017             ((q15_t) x * (q15_t) y));
1018   }
1019 
1020   /*
1021    * @brief C custom defined SMLALDX for M3 and M0 processors
1022    */
__SMLALDX(q31_t x,q31_t y,q63_t sum)1023   static __INLINE q63_t __SMLALDX(
1024   q31_t x,
1025   q31_t y,
1026   q63_t sum)
1027   {
1028 
1029     return (sum + ((q15_t) (x >> 16) * (q15_t) y)) +
1030       ((q15_t) x * (q15_t) (y >> 16));
1031   }
1032 
1033   /*
1034    * @brief C custom defined SMUAD for M3 and M0 processors
1035    */
__SMUAD(q31_t x,q31_t y)1036   static __INLINE q31_t __SMUAD(
1037   q31_t x,
1038   q31_t y)
1039   {
1040 
1041     return (((x >> 16) * (y >> 16)) +
1042             (((x << 16) >> 16) * ((y << 16) >> 16)));
1043   }
1044 
1045   /*
1046    * @brief C custom defined SMUSD for M3 and M0 processors
1047    */
__SMUSD(q31_t x,q31_t y)1048   static __INLINE q31_t __SMUSD(
1049   q31_t x,
1050   q31_t y)
1051   {
1052 
1053     return (-((x >> 16) * (y >> 16)) +
1054             (((x << 16) >> 16) * ((y << 16) >> 16)));
1055   }
1056 
1057 
1058   /*
1059    * @brief C custom defined SXTB16 for M3 and M0 processors
1060    */
__SXTB16(q31_t x)1061   static __INLINE q31_t __SXTB16(
1062   q31_t x)
1063   {
1064 
1065     return ((((x << 24) >> 24) & 0x0000FFFF) |
1066             (((x << 8) >> 8) & 0xFFFF0000));
1067   }
1068 
1069 
1070 #endif /* defined (ARM_MATH_CM3) || defined (ARM_MATH_CM0_FAMILY) */
1071 
1072 
1073   /**
1074    * @brief Instance structure for the Q7 FIR filter.
1075    */
1076   typedef struct
1077   {
1078     uint16_t numTaps;        /**< number of filter coefficients in the filter. */
1079     q7_t *pState;            /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
1080     q7_t *pCoeffs;           /**< points to the coefficient array. The array is of length numTaps.*/
1081   } arm_fir_instance_q7;
1082 
1083   /**
1084    * @brief Instance structure for the Q15 FIR filter.
1085    */
1086   typedef struct
1087   {
1088     uint16_t numTaps;         /**< number of filter coefficients in the filter. */
1089     q15_t *pState;            /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
1090     q15_t *pCoeffs;           /**< points to the coefficient array. The array is of length numTaps.*/
1091   } arm_fir_instance_q15;
1092 
1093   /**
1094    * @brief Instance structure for the Q31 FIR filter.
1095    */
1096   typedef struct
1097   {
1098     uint16_t numTaps;         /**< number of filter coefficients in the filter. */
1099     q31_t *pState;            /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
1100     q31_t *pCoeffs;           /**< points to the coefficient array. The array is of length numTaps. */
1101   } arm_fir_instance_q31;
1102 
1103   /**
1104    * @brief Instance structure for the floating-point FIR filter.
1105    */
1106   typedef struct
1107   {
1108     uint16_t numTaps;     /**< number of filter coefficients in the filter. */
1109     float32_t *pState;    /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
1110     float32_t *pCoeffs;   /**< points to the coefficient array. The array is of length numTaps. */
1111   } arm_fir_instance_f32;
1112 
1113 
1114   /**
1115    * @brief Processing function for the Q7 FIR filter.
1116    * @param[in] *S points to an instance of the Q7 FIR filter structure.
1117    * @param[in] *pSrc points to the block of input data.
1118    * @param[out] *pDst points to the block of output data.
1119    * @param[in] blockSize number of samples to process.
1120    * @return none.
1121    */
1122   void arm_fir_q7(
1123   const arm_fir_instance_q7 * S,
1124   q7_t * pSrc,
1125   q7_t * pDst,
1126   uint32_t blockSize);
1127 
1128 
1129   /**
1130    * @brief  Initialization function for the Q7 FIR filter.
1131    * @param[in,out] *S points to an instance of the Q7 FIR structure.
1132    * @param[in] numTaps  Number of filter coefficients in the filter.
1133    * @param[in] *pCoeffs points to the filter coefficients.
1134    * @param[in] *pState points to the state buffer.
1135    * @param[in] blockSize number of samples that are processed.
1136    * @return none
1137    */
1138   void arm_fir_init_q7(
1139   arm_fir_instance_q7 * S,
1140   uint16_t numTaps,
1141   q7_t * pCoeffs,
1142   q7_t * pState,
1143   uint32_t blockSize);
1144 
1145 
1146   /**
1147    * @brief Processing function for the Q15 FIR filter.
1148    * @param[in] *S points to an instance of the Q15 FIR structure.
1149    * @param[in] *pSrc points to the block of input data.
1150    * @param[out] *pDst points to the block of output data.
1151    * @param[in] blockSize number of samples to process.
1152    * @return none.
1153    */
1154   void arm_fir_q15(
1155   const arm_fir_instance_q15 * S,
1156   q15_t * pSrc,
1157   q15_t * pDst,
1158   uint32_t blockSize);
1159 
1160   /**
1161    * @brief Processing function for the fast Q15 FIR filter for Cortex-M3 and Cortex-M4.
1162    * @param[in] *S points to an instance of the Q15 FIR filter structure.
1163    * @param[in] *pSrc points to the block of input data.
1164    * @param[out] *pDst points to the block of output data.
1165    * @param[in] blockSize number of samples to process.
1166    * @return none.
1167    */
1168   void arm_fir_fast_q15(
1169   const arm_fir_instance_q15 * S,
1170   q15_t * pSrc,
1171   q15_t * pDst,
1172   uint32_t blockSize);
1173 
1174   /**
1175    * @brief  Initialization function for the Q15 FIR filter.
1176    * @param[in,out] *S points to an instance of the Q15 FIR filter structure.
1177    * @param[in] numTaps  Number of filter coefficients in the filter. Must be even and greater than or equal to 4.
1178    * @param[in] *pCoeffs points to the filter coefficients.
1179    * @param[in] *pState points to the state buffer.
1180    * @param[in] blockSize number of samples that are processed at a time.
1181    * @return The function returns ARM_MATH_SUCCESS if initialization was successful or ARM_MATH_ARGUMENT_ERROR if
1182    * <code>numTaps</code> is not a supported value.
1183    */
1184 
1185   arm_status arm_fir_init_q15(
1186   arm_fir_instance_q15 * S,
1187   uint16_t numTaps,
1188   q15_t * pCoeffs,
1189   q15_t * pState,
1190   uint32_t blockSize);
1191 
1192   /**
1193    * @brief Processing function for the Q31 FIR filter.
1194    * @param[in] *S points to an instance of the Q31 FIR filter structure.
1195    * @param[in] *pSrc points to the block of input data.
1196    * @param[out] *pDst points to the block of output data.
1197    * @param[in] blockSize number of samples to process.
1198    * @return none.
1199    */
1200   void arm_fir_q31(
1201   const arm_fir_instance_q31 * S,
1202   q31_t * pSrc,
1203   q31_t * pDst,
1204   uint32_t blockSize);
1205 
1206   /**
1207    * @brief Processing function for the fast Q31 FIR filter for Cortex-M3 and Cortex-M4.
1208    * @param[in] *S points to an instance of the Q31 FIR structure.
1209    * @param[in] *pSrc points to the block of input data.
1210    * @param[out] *pDst points to the block of output data.
1211    * @param[in] blockSize number of samples to process.
1212    * @return none.
1213    */
1214   void arm_fir_fast_q31(
1215   const arm_fir_instance_q31 * S,
1216   q31_t * pSrc,
1217   q31_t * pDst,
1218   uint32_t blockSize);
1219 
1220   /**
1221    * @brief  Initialization function for the Q31 FIR filter.
1222    * @param[in,out] *S points to an instance of the Q31 FIR structure.
1223    * @param[in] 	numTaps  Number of filter coefficients in the filter.
1224    * @param[in] 	*pCoeffs points to the filter coefficients.
1225    * @param[in] 	*pState points to the state buffer.
1226    * @param[in] 	blockSize number of samples that are processed at a time.
1227    * @return 		none.
1228    */
1229   void arm_fir_init_q31(
1230   arm_fir_instance_q31 * S,
1231   uint16_t numTaps,
1232   q31_t * pCoeffs,
1233   q31_t * pState,
1234   uint32_t blockSize);
1235 
1236   /**
1237    * @brief Processing function for the floating-point FIR filter.
1238    * @param[in] *S points to an instance of the floating-point FIR structure.
1239    * @param[in] *pSrc points to the block of input data.
1240    * @param[out] *pDst points to the block of output data.
1241    * @param[in] blockSize number of samples to process.
1242    * @return none.
1243    */
1244   void arm_fir_f32(
1245   const arm_fir_instance_f32 * S,
1246   float32_t * pSrc,
1247   float32_t * pDst,
1248   uint32_t blockSize);
1249 
1250   /**
1251    * @brief  Initialization function for the floating-point FIR filter.
1252    * @param[in,out] *S points to an instance of the floating-point FIR filter structure.
1253    * @param[in] 	numTaps  Number of filter coefficients in the filter.
1254    * @param[in] 	*pCoeffs points to the filter coefficients.
1255    * @param[in] 	*pState points to the state buffer.
1256    * @param[in] 	blockSize number of samples that are processed at a time.
1257    * @return    	none.
1258    */
1259   void arm_fir_init_f32(
1260   arm_fir_instance_f32 * S,
1261   uint16_t numTaps,
1262   float32_t * pCoeffs,
1263   float32_t * pState,
1264   uint32_t blockSize);
1265 
1266 
1267   /**
1268    * @brief Instance structure for the Q15 Biquad cascade filter.
1269    */
1270   typedef struct
1271   {
1272     int8_t numStages;         /**< number of 2nd order stages in the filter.  Overall order is 2*numStages. */
1273     q15_t *pState;            /**< Points to the array of state coefficients.  The array is of length 4*numStages. */
1274     q15_t *pCoeffs;           /**< Points to the array of coefficients.  The array is of length 5*numStages. */
1275     int8_t postShift;         /**< Additional shift, in bits, applied to each output sample. */
1276 
1277   } arm_biquad_casd_df1_inst_q15;
1278 
1279 
1280   /**
1281    * @brief Instance structure for the Q31 Biquad cascade filter.
1282    */
1283   typedef struct
1284   {
1285     uint32_t numStages;      /**< number of 2nd order stages in the filter.  Overall order is 2*numStages. */
1286     q31_t *pState;           /**< Points to the array of state coefficients.  The array is of length 4*numStages. */
1287     q31_t *pCoeffs;          /**< Points to the array of coefficients.  The array is of length 5*numStages. */
1288     uint8_t postShift;       /**< Additional shift, in bits, applied to each output sample. */
1289 
1290   } arm_biquad_casd_df1_inst_q31;
1291 
1292   /**
1293    * @brief Instance structure for the floating-point Biquad cascade filter.
1294    */
1295   typedef struct
1296   {
1297     uint32_t numStages;         /**< number of 2nd order stages in the filter.  Overall order is 2*numStages. */
1298     float32_t *pState;          /**< Points to the array of state coefficients.  The array is of length 4*numStages. */
1299     float32_t *pCoeffs;         /**< Points to the array of coefficients.  The array is of length 5*numStages. */
1300 
1301 
1302   } arm_biquad_casd_df1_inst_f32;
1303 
1304 
1305 
1306   /**
1307    * @brief Processing function for the Q15 Biquad cascade filter.
1308    * @param[in]  *S points to an instance of the Q15 Biquad cascade structure.
1309    * @param[in]  *pSrc points to the block of input data.
1310    * @param[out] *pDst points to the block of output data.
1311    * @param[in]  blockSize number of samples to process.
1312    * @return     none.
1313    */
1314 
1315   void arm_biquad_cascade_df1_q15(
1316   const arm_biquad_casd_df1_inst_q15 * S,
1317   q15_t * pSrc,
1318   q15_t * pDst,
1319   uint32_t blockSize);
1320 
1321   /**
1322    * @brief  Initialization function for the Q15 Biquad cascade filter.
1323    * @param[in,out] *S           points to an instance of the Q15 Biquad cascade structure.
1324    * @param[in]     numStages    number of 2nd order stages in the filter.
1325    * @param[in]     *pCoeffs     points to the filter coefficients.
1326    * @param[in]     *pState      points to the state buffer.
1327    * @param[in]     postShift    Shift to be applied to the output. Varies according to the coefficients format
1328    * @return        none
1329    */
1330 
1331   void arm_biquad_cascade_df1_init_q15(
1332   arm_biquad_casd_df1_inst_q15 * S,
1333   uint8_t numStages,
1334   q15_t * pCoeffs,
1335   q15_t * pState,
1336   int8_t postShift);
1337 
1338 
1339   /**
1340    * @brief Fast but less precise processing function for the Q15 Biquad cascade filter for Cortex-M3 and Cortex-M4.
1341    * @param[in]  *S points to an instance of the Q15 Biquad cascade structure.
1342    * @param[in]  *pSrc points to the block of input data.
1343    * @param[out] *pDst points to the block of output data.
1344    * @param[in]  blockSize number of samples to process.
1345    * @return     none.
1346    */
1347 
1348   void arm_biquad_cascade_df1_fast_q15(
1349   const arm_biquad_casd_df1_inst_q15 * S,
1350   q15_t * pSrc,
1351   q15_t * pDst,
1352   uint32_t blockSize);
1353 
1354 
1355   /**
1356    * @brief Processing function for the Q31 Biquad cascade filter
1357    * @param[in]  *S         points to an instance of the Q31 Biquad cascade structure.
1358    * @param[in]  *pSrc      points to the block of input data.
1359    * @param[out] *pDst      points to the block of output data.
1360    * @param[in]  blockSize  number of samples to process.
1361    * @return     none.
1362    */
1363 
1364   void arm_biquad_cascade_df1_q31(
1365   const arm_biquad_casd_df1_inst_q31 * S,
1366   q31_t * pSrc,
1367   q31_t * pDst,
1368   uint32_t blockSize);
1369 
1370   /**
1371    * @brief Fast but less precise processing function for the Q31 Biquad cascade filter for Cortex-M3 and Cortex-M4.
1372    * @param[in]  *S         points to an instance of the Q31 Biquad cascade structure.
1373    * @param[in]  *pSrc      points to the block of input data.
1374    * @param[out] *pDst      points to the block of output data.
1375    * @param[in]  blockSize  number of samples to process.
1376    * @return     none.
1377    */
1378 
1379   void arm_biquad_cascade_df1_fast_q31(
1380   const arm_biquad_casd_df1_inst_q31 * S,
1381   q31_t * pSrc,
1382   q31_t * pDst,
1383   uint32_t blockSize);
1384 
1385   /**
1386    * @brief  Initialization function for the Q31 Biquad cascade filter.
1387    * @param[in,out] *S           points to an instance of the Q31 Biquad cascade structure.
1388    * @param[in]     numStages      number of 2nd order stages in the filter.
1389    * @param[in]     *pCoeffs     points to the filter coefficients.
1390    * @param[in]     *pState      points to the state buffer.
1391    * @param[in]     postShift    Shift to be applied to the output. Varies according to the coefficients format
1392    * @return        none
1393    */
1394 
1395   void arm_biquad_cascade_df1_init_q31(
1396   arm_biquad_casd_df1_inst_q31 * S,
1397   uint8_t numStages,
1398   q31_t * pCoeffs,
1399   q31_t * pState,
1400   int8_t postShift);
1401 
1402   /**
1403    * @brief Processing function for the floating-point Biquad cascade filter.
1404    * @param[in]  *S         points to an instance of the floating-point Biquad cascade structure.
1405    * @param[in]  *pSrc      points to the block of input data.
1406    * @param[out] *pDst      points to the block of output data.
1407    * @param[in]  blockSize  number of samples to process.
1408    * @return     none.
1409    */
1410 
1411   void arm_biquad_cascade_df1_f32(
1412   const arm_biquad_casd_df1_inst_f32 * S,
1413   float32_t * pSrc,
1414   float32_t * pDst,
1415   uint32_t blockSize);
1416 
1417   /**
1418    * @brief  Initialization function for the floating-point Biquad cascade filter.
1419    * @param[in,out] *S           points to an instance of the floating-point Biquad cascade structure.
1420    * @param[in]     numStages    number of 2nd order stages in the filter.
1421    * @param[in]     *pCoeffs     points to the filter coefficients.
1422    * @param[in]     *pState      points to the state buffer.
1423    * @return        none
1424    */
1425 
1426   void arm_biquad_cascade_df1_init_f32(
1427   arm_biquad_casd_df1_inst_f32 * S,
1428   uint8_t numStages,
1429   float32_t * pCoeffs,
1430   float32_t * pState);
1431 
1432 
1433   /**
1434    * @brief Instance structure for the floating-point matrix structure.
1435    */
1436 
1437   typedef struct
1438   {
1439     uint16_t numRows;     /**< number of rows of the matrix.     */
1440     uint16_t numCols;     /**< number of columns of the matrix.  */
1441     float32_t *pData;     /**< points to the data of the matrix. */
1442   } arm_matrix_instance_f32;
1443 
1444 
1445   /**
1446    * @brief Instance structure for the floating-point matrix structure.
1447    */
1448 
1449   typedef struct
1450   {
1451     uint16_t numRows;     /**< number of rows of the matrix.     */
1452     uint16_t numCols;     /**< number of columns of the matrix.  */
1453     float64_t *pData;     /**< points to the data of the matrix. */
1454   } arm_matrix_instance_f64;
1455 
1456   /**
1457    * @brief Instance structure for the Q15 matrix structure.
1458    */
1459 
1460   typedef struct
1461   {
1462     uint16_t numRows;     /**< number of rows of the matrix.     */
1463     uint16_t numCols;     /**< number of columns of the matrix.  */
1464     q15_t *pData;         /**< points to the data of the matrix. */
1465 
1466   } arm_matrix_instance_q15;
1467 
1468   /**
1469    * @brief Instance structure for the Q31 matrix structure.
1470    */
1471 
1472   typedef struct
1473   {
1474     uint16_t numRows;     /**< number of rows of the matrix.     */
1475     uint16_t numCols;     /**< number of columns of the matrix.  */
1476     q31_t *pData;         /**< points to the data of the matrix. */
1477 
1478   } arm_matrix_instance_q31;
1479 
1480 
1481 
1482   /**
1483    * @brief Floating-point matrix addition.
1484    * @param[in]       *pSrcA points to the first input matrix structure
1485    * @param[in]       *pSrcB points to the second input matrix structure
1486    * @param[out]      *pDst points to output matrix structure
1487    * @return     The function returns either
1488    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
1489    */
1490 
1491   arm_status arm_mat_add_f32(
1492   const arm_matrix_instance_f32 * pSrcA,
1493   const arm_matrix_instance_f32 * pSrcB,
1494   arm_matrix_instance_f32 * pDst);
1495 
1496   /**
1497    * @brief Q15 matrix addition.
1498    * @param[in]       *pSrcA points to the first input matrix structure
1499    * @param[in]       *pSrcB points to the second input matrix structure
1500    * @param[out]      *pDst points to output matrix structure
1501    * @return     The function returns either
1502    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
1503    */
1504 
1505   arm_status arm_mat_add_q15(
1506   const arm_matrix_instance_q15 * pSrcA,
1507   const arm_matrix_instance_q15 * pSrcB,
1508   arm_matrix_instance_q15 * pDst);
1509 
1510   /**
1511    * @brief Q31 matrix addition.
1512    * @param[in]       *pSrcA points to the first input matrix structure
1513    * @param[in]       *pSrcB points to the second input matrix structure
1514    * @param[out]      *pDst points to output matrix structure
1515    * @return     The function returns either
1516    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
1517    */
1518 
1519   arm_status arm_mat_add_q31(
1520   const arm_matrix_instance_q31 * pSrcA,
1521   const arm_matrix_instance_q31 * pSrcB,
1522   arm_matrix_instance_q31 * pDst);
1523 
1524   /**
1525    * @brief Floating-point, complex, matrix multiplication.
1526    * @param[in]       *pSrcA points to the first input matrix structure
1527    * @param[in]       *pSrcB points to the second input matrix structure
1528    * @param[out]      *pDst points to output matrix structure
1529    * @return     The function returns either
1530    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
1531    */
1532 
1533   arm_status arm_mat_cmplx_mult_f32(
1534   const arm_matrix_instance_f32 * pSrcA,
1535   const arm_matrix_instance_f32 * pSrcB,
1536   arm_matrix_instance_f32 * pDst);
1537 
1538   /**
1539    * @brief Q15, complex,  matrix multiplication.
1540    * @param[in]       *pSrcA points to the first input matrix structure
1541    * @param[in]       *pSrcB points to the second input matrix structure
1542    * @param[out]      *pDst points to output matrix structure
1543    * @return     The function returns either
1544    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
1545    */
1546 
1547   arm_status arm_mat_cmplx_mult_q15(
1548   const arm_matrix_instance_q15 * pSrcA,
1549   const arm_matrix_instance_q15 * pSrcB,
1550   arm_matrix_instance_q15 * pDst,
1551   q15_t * pScratch);
1552 
1553   /**
1554    * @brief Q31, complex, matrix multiplication.
1555    * @param[in]       *pSrcA points to the first input matrix structure
1556    * @param[in]       *pSrcB points to the second input matrix structure
1557    * @param[out]      *pDst points to output matrix structure
1558    * @return     The function returns either
1559    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
1560    */
1561 
1562   arm_status arm_mat_cmplx_mult_q31(
1563   const arm_matrix_instance_q31 * pSrcA,
1564   const arm_matrix_instance_q31 * pSrcB,
1565   arm_matrix_instance_q31 * pDst);
1566 
1567 
1568   /**
1569    * @brief Floating-point matrix transpose.
1570    * @param[in]  *pSrc points to the input matrix
1571    * @param[out] *pDst points to the output matrix
1572    * @return 	The function returns either  <code>ARM_MATH_SIZE_MISMATCH</code>
1573    * or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
1574    */
1575 
1576   arm_status arm_mat_trans_f32(
1577   const arm_matrix_instance_f32 * pSrc,
1578   arm_matrix_instance_f32 * pDst);
1579 
1580 
1581   /**
1582    * @brief Q15 matrix transpose.
1583    * @param[in]  *pSrc points to the input matrix
1584    * @param[out] *pDst points to the output matrix
1585    * @return 	The function returns either  <code>ARM_MATH_SIZE_MISMATCH</code>
1586    * or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
1587    */
1588 
1589   arm_status arm_mat_trans_q15(
1590   const arm_matrix_instance_q15 * pSrc,
1591   arm_matrix_instance_q15 * pDst);
1592 
1593   /**
1594    * @brief Q31 matrix transpose.
1595    * @param[in]  *pSrc points to the input matrix
1596    * @param[out] *pDst points to the output matrix
1597    * @return 	The function returns either  <code>ARM_MATH_SIZE_MISMATCH</code>
1598    * or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
1599    */
1600 
1601   arm_status arm_mat_trans_q31(
1602   const arm_matrix_instance_q31 * pSrc,
1603   arm_matrix_instance_q31 * pDst);
1604 
1605 
1606   /**
1607    * @brief Floating-point matrix multiplication
1608    * @param[in]       *pSrcA points to the first input matrix structure
1609    * @param[in]       *pSrcB points to the second input matrix structure
1610    * @param[out]      *pDst points to output matrix structure
1611    * @return     The function returns either
1612    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
1613    */
1614 
1615   arm_status arm_mat_mult_f32(
1616   const arm_matrix_instance_f32 * pSrcA,
1617   const arm_matrix_instance_f32 * pSrcB,
1618   arm_matrix_instance_f32 * pDst);
1619 
1620   /**
1621    * @brief Q15 matrix multiplication
1622    * @param[in]       *pSrcA points to the first input matrix structure
1623    * @param[in]       *pSrcB points to the second input matrix structure
1624    * @param[out]      *pDst points to output matrix structure
1625    * @param[in]		 *pState points to the array for storing intermediate results
1626    * @return     The function returns either
1627    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
1628    */
1629 
1630   arm_status arm_mat_mult_q15(
1631   const arm_matrix_instance_q15 * pSrcA,
1632   const arm_matrix_instance_q15 * pSrcB,
1633   arm_matrix_instance_q15 * pDst,
1634   q15_t * pState);
1635 
1636   /**
1637    * @brief Q15 matrix multiplication (fast variant) for Cortex-M3 and Cortex-M4
1638    * @param[in]       *pSrcA  points to the first input matrix structure
1639    * @param[in]       *pSrcB  points to the second input matrix structure
1640    * @param[out]      *pDst   points to output matrix structure
1641    * @param[in]		  *pState points to the array for storing intermediate results
1642    * @return     The function returns either
1643    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
1644    */
1645 
1646   arm_status arm_mat_mult_fast_q15(
1647   const arm_matrix_instance_q15 * pSrcA,
1648   const arm_matrix_instance_q15 * pSrcB,
1649   arm_matrix_instance_q15 * pDst,
1650   q15_t * pState);
1651 
1652   /**
1653    * @brief Q31 matrix multiplication
1654    * @param[in]       *pSrcA points to the first input matrix structure
1655    * @param[in]       *pSrcB points to the second input matrix structure
1656    * @param[out]      *pDst points to output matrix structure
1657    * @return     The function returns either
1658    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
1659    */
1660 
1661   arm_status arm_mat_mult_q31(
1662   const arm_matrix_instance_q31 * pSrcA,
1663   const arm_matrix_instance_q31 * pSrcB,
1664   arm_matrix_instance_q31 * pDst);
1665 
1666   /**
1667    * @brief Q31 matrix multiplication (fast variant) for Cortex-M3 and Cortex-M4
1668    * @param[in]       *pSrcA points to the first input matrix structure
1669    * @param[in]       *pSrcB points to the second input matrix structure
1670    * @param[out]      *pDst points to output matrix structure
1671    * @return     The function returns either
1672    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
1673    */
1674 
1675   arm_status arm_mat_mult_fast_q31(
1676   const arm_matrix_instance_q31 * pSrcA,
1677   const arm_matrix_instance_q31 * pSrcB,
1678   arm_matrix_instance_q31 * pDst);
1679 
1680 
1681   /**
1682    * @brief Floating-point matrix subtraction
1683    * @param[in]       *pSrcA points to the first input matrix structure
1684    * @param[in]       *pSrcB points to the second input matrix structure
1685    * @param[out]      *pDst points to output matrix structure
1686    * @return     The function returns either
1687    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
1688    */
1689 
1690   arm_status arm_mat_sub_f32(
1691   const arm_matrix_instance_f32 * pSrcA,
1692   const arm_matrix_instance_f32 * pSrcB,
1693   arm_matrix_instance_f32 * pDst);
1694 
1695   /**
1696    * @brief Q15 matrix subtraction
1697    * @param[in]       *pSrcA points to the first input matrix structure
1698    * @param[in]       *pSrcB points to the second input matrix structure
1699    * @param[out]      *pDst points to output matrix structure
1700    * @return     The function returns either
1701    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
1702    */
1703 
1704   arm_status arm_mat_sub_q15(
1705   const arm_matrix_instance_q15 * pSrcA,
1706   const arm_matrix_instance_q15 * pSrcB,
1707   arm_matrix_instance_q15 * pDst);
1708 
1709   /**
1710    * @brief Q31 matrix subtraction
1711    * @param[in]       *pSrcA points to the first input matrix structure
1712    * @param[in]       *pSrcB points to the second input matrix structure
1713    * @param[out]      *pDst points to output matrix structure
1714    * @return     The function returns either
1715    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
1716    */
1717 
1718   arm_status arm_mat_sub_q31(
1719   const arm_matrix_instance_q31 * pSrcA,
1720   const arm_matrix_instance_q31 * pSrcB,
1721   arm_matrix_instance_q31 * pDst);
1722 
1723   /**
1724    * @brief Floating-point matrix scaling.
1725    * @param[in]  *pSrc points to the input matrix
1726    * @param[in]  scale scale factor
1727    * @param[out] *pDst points to the output matrix
1728    * @return     The function returns either
1729    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
1730    */
1731 
1732   arm_status arm_mat_scale_f32(
1733   const arm_matrix_instance_f32 * pSrc,
1734   float32_t scale,
1735   arm_matrix_instance_f32 * pDst);
1736 
1737   /**
1738    * @brief Q15 matrix scaling.
1739    * @param[in]       *pSrc points to input matrix
1740    * @param[in]       scaleFract fractional portion of the scale factor
1741    * @param[in]       shift number of bits to shift the result by
1742    * @param[out]      *pDst points to output matrix
1743    * @return     The function returns either
1744    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
1745    */
1746 
1747   arm_status arm_mat_scale_q15(
1748   const arm_matrix_instance_q15 * pSrc,
1749   q15_t scaleFract,
1750   int32_t shift,
1751   arm_matrix_instance_q15 * pDst);
1752 
1753   /**
1754    * @brief Q31 matrix scaling.
1755    * @param[in]       *pSrc points to input matrix
1756    * @param[in]       scaleFract fractional portion of the scale factor
1757    * @param[in]       shift number of bits to shift the result by
1758    * @param[out]      *pDst points to output matrix structure
1759    * @return     The function returns either
1760    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
1761    */
1762 
1763   arm_status arm_mat_scale_q31(
1764   const arm_matrix_instance_q31 * pSrc,
1765   q31_t scaleFract,
1766   int32_t shift,
1767   arm_matrix_instance_q31 * pDst);
1768 
1769 
1770   /**
1771    * @brief  Q31 matrix initialization.
1772    * @param[in,out] *S             points to an instance of the floating-point matrix structure.
1773    * @param[in]     nRows          number of rows in the matrix.
1774    * @param[in]     nColumns       number of columns in the matrix.
1775    * @param[in]     *pData	       points to the matrix data array.
1776    * @return        none
1777    */
1778 
1779   void arm_mat_init_q31(
1780   arm_matrix_instance_q31 * S,
1781   uint16_t nRows,
1782   uint16_t nColumns,
1783   q31_t * pData);
1784 
1785   /**
1786    * @brief  Q15 matrix initialization.
1787    * @param[in,out] *S             points to an instance of the floating-point matrix structure.
1788    * @param[in]     nRows          number of rows in the matrix.
1789    * @param[in]     nColumns       number of columns in the matrix.
1790    * @param[in]     *pData	       points to the matrix data array.
1791    * @return        none
1792    */
1793 
1794   void arm_mat_init_q15(
1795   arm_matrix_instance_q15 * S,
1796   uint16_t nRows,
1797   uint16_t nColumns,
1798   q15_t * pData);
1799 
1800   /**
1801    * @brief  Floating-point matrix initialization.
1802    * @param[in,out] *S             points to an instance of the floating-point matrix structure.
1803    * @param[in]     nRows          number of rows in the matrix.
1804    * @param[in]     nColumns       number of columns in the matrix.
1805    * @param[in]     *pData	       points to the matrix data array.
1806    * @return        none
1807    */
1808 
1809   void arm_mat_init_f32(
1810   arm_matrix_instance_f32 * S,
1811   uint16_t nRows,
1812   uint16_t nColumns,
1813   float32_t * pData);
1814 
1815 
1816 
1817   /**
1818    * @brief Instance structure for the Q15 PID Control.
1819    */
1820   typedef struct
1821   {
1822     q15_t A0;    /**< The derived gain, A0 = Kp + Ki + Kd . */
1823 #ifdef ARM_MATH_CM0_FAMILY
1824     q15_t A1;
1825     q15_t A2;
1826 #else
1827     q31_t A1;           /**< The derived gain A1 = -Kp - 2Kd | Kd.*/
1828 #endif
1829     q15_t state[3];       /**< The state array of length 3. */
1830     q15_t Kp;           /**< The proportional gain. */
1831     q15_t Ki;           /**< The integral gain. */
1832     q15_t Kd;           /**< The derivative gain. */
1833   } arm_pid_instance_q15;
1834 
1835   /**
1836    * @brief Instance structure for the Q31 PID Control.
1837    */
1838   typedef struct
1839   {
1840     q31_t A0;            /**< The derived gain, A0 = Kp + Ki + Kd . */
1841     q31_t A1;            /**< The derived gain, A1 = -Kp - 2Kd. */
1842     q31_t A2;            /**< The derived gain, A2 = Kd . */
1843     q31_t state[3];      /**< The state array of length 3. */
1844     q31_t Kp;            /**< The proportional gain. */
1845     q31_t Ki;            /**< The integral gain. */
1846     q31_t Kd;            /**< The derivative gain. */
1847 
1848   } arm_pid_instance_q31;
1849 
1850   /**
1851    * @brief Instance structure for the floating-point PID Control.
1852    */
1853   typedef struct
1854   {
1855     float32_t A0;          /**< The derived gain, A0 = Kp + Ki + Kd . */
1856     float32_t A1;          /**< The derived gain, A1 = -Kp - 2Kd. */
1857     float32_t A2;          /**< The derived gain, A2 = Kd . */
1858     float32_t state[3];    /**< The state array of length 3. */
1859     float32_t Kp;               /**< The proportional gain. */
1860     float32_t Ki;               /**< The integral gain. */
1861     float32_t Kd;               /**< The derivative gain. */
1862   } arm_pid_instance_f32;
1863 
1864 
1865 
1866   /**
1867    * @brief  Initialization function for the floating-point PID Control.
1868    * @param[in,out] *S      points to an instance of the PID structure.
1869    * @param[in]     resetStateFlag  flag to reset the state. 0 = no change in state 1 = reset the state.
1870    * @return none.
1871    */
1872   void arm_pid_init_f32(
1873   arm_pid_instance_f32 * S,
1874   int32_t resetStateFlag);
1875 
1876   /**
1877    * @brief  Reset function for the floating-point PID Control.
1878    * @param[in,out] *S is an instance of the floating-point PID Control structure
1879    * @return none
1880    */
1881   void arm_pid_reset_f32(
1882   arm_pid_instance_f32 * S);
1883 
1884 
1885   /**
1886    * @brief  Initialization function for the Q31 PID Control.
1887    * @param[in,out] *S points to an instance of the Q15 PID structure.
1888    * @param[in]     resetStateFlag  flag to reset the state. 0 = no change in state 1 = reset the state.
1889    * @return none.
1890    */
1891   void arm_pid_init_q31(
1892   arm_pid_instance_q31 * S,
1893   int32_t resetStateFlag);
1894 
1895 
1896   /**
1897    * @brief  Reset function for the Q31 PID Control.
1898    * @param[in,out] *S points to an instance of the Q31 PID Control structure
1899    * @return none
1900    */
1901 
1902   void arm_pid_reset_q31(
1903   arm_pid_instance_q31 * S);
1904 
1905   /**
1906    * @brief  Initialization function for the Q15 PID Control.
1907    * @param[in,out] *S points to an instance of the Q15 PID structure.
1908    * @param[in] resetStateFlag  flag to reset the state. 0 = no change in state 1 = reset the state.
1909    * @return none.
1910    */
1911   void arm_pid_init_q15(
1912   arm_pid_instance_q15 * S,
1913   int32_t resetStateFlag);
1914 
1915   /**
1916    * @brief  Reset function for the Q15 PID Control.
1917    * @param[in,out] *S points to an instance of the q15 PID Control structure
1918    * @return none
1919    */
1920   void arm_pid_reset_q15(
1921   arm_pid_instance_q15 * S);
1922 
1923 
1924   /**
1925    * @brief Instance structure for the floating-point Linear Interpolate function.
1926    */
1927   typedef struct
1928   {
1929     uint32_t nValues;           /**< nValues */
1930     float32_t x1;               /**< x1 */
1931     float32_t xSpacing;         /**< xSpacing */
1932     float32_t *pYData;          /**< pointer to the table of Y values */
1933   } arm_linear_interp_instance_f32;
1934 
1935   /**
1936    * @brief Instance structure for the floating-point bilinear interpolation function.
1937    */
1938 
1939   typedef struct
1940   {
1941     uint16_t numRows;   /**< number of rows in the data table. */
1942     uint16_t numCols;   /**< number of columns in the data table. */
1943     float32_t *pData;   /**< points to the data table. */
1944   } arm_bilinear_interp_instance_f32;
1945 
1946    /**
1947    * @brief Instance structure for the Q31 bilinear interpolation function.
1948    */
1949 
1950   typedef struct
1951   {
1952     uint16_t numRows;   /**< number of rows in the data table. */
1953     uint16_t numCols;   /**< number of columns in the data table. */
1954     q31_t *pData;       /**< points to the data table. */
1955   } arm_bilinear_interp_instance_q31;
1956 
1957    /**
1958    * @brief Instance structure for the Q15 bilinear interpolation function.
1959    */
1960 
1961   typedef struct
1962   {
1963     uint16_t numRows;   /**< number of rows in the data table. */
1964     uint16_t numCols;   /**< number of columns in the data table. */
1965     q15_t *pData;       /**< points to the data table. */
1966   } arm_bilinear_interp_instance_q15;
1967 
1968    /**
1969    * @brief Instance structure for the Q15 bilinear interpolation function.
1970    */
1971 
1972   typedef struct
1973   {
1974     uint16_t numRows;   /**< number of rows in the data table. */
1975     uint16_t numCols;   /**< number of columns in the data table. */
1976     q7_t *pData;                /**< points to the data table. */
1977   } arm_bilinear_interp_instance_q7;
1978 
1979 
1980   /**
1981    * @brief Q7 vector multiplication.
1982    * @param[in]       *pSrcA points to the first input vector
1983    * @param[in]       *pSrcB points to the second input vector
1984    * @param[out]      *pDst  points to the output vector
1985    * @param[in]       blockSize number of samples in each vector
1986    * @return none.
1987    */
1988 
1989   void arm_mult_q7(
1990   q7_t * pSrcA,
1991   q7_t * pSrcB,
1992   q7_t * pDst,
1993   uint32_t blockSize);
1994 
1995   /**
1996    * @brief Q15 vector multiplication.
1997    * @param[in]       *pSrcA points to the first input vector
1998    * @param[in]       *pSrcB points to the second input vector
1999    * @param[out]      *pDst  points to the output vector
2000    * @param[in]       blockSize number of samples in each vector
2001    * @return none.
2002    */
2003 
2004   void arm_mult_q15(
2005   q15_t * pSrcA,
2006   q15_t * pSrcB,
2007   q15_t * pDst,
2008   uint32_t blockSize);
2009 
2010   /**
2011    * @brief Q31 vector multiplication.
2012    * @param[in]       *pSrcA points to the first input vector
2013    * @param[in]       *pSrcB points to the second input vector
2014    * @param[out]      *pDst points to the output vector
2015    * @param[in]       blockSize number of samples in each vector
2016    * @return none.
2017    */
2018 
2019   void arm_mult_q31(
2020   q31_t * pSrcA,
2021   q31_t * pSrcB,
2022   q31_t * pDst,
2023   uint32_t blockSize);
2024 
2025   /**
2026    * @brief Floating-point vector multiplication.
2027    * @param[in]       *pSrcA points to the first input vector
2028    * @param[in]       *pSrcB points to the second input vector
2029    * @param[out]      *pDst points to the output vector
2030    * @param[in]       blockSize number of samples in each vector
2031    * @return none.
2032    */
2033 
2034   void arm_mult_f32(
2035   float32_t * pSrcA,
2036   float32_t * pSrcB,
2037   float32_t * pDst,
2038   uint32_t blockSize);
2039 
2040 
2041 
2042 
2043 
2044 
2045   /**
2046    * @brief Instance structure for the Q15 CFFT/CIFFT function.
2047    */
2048 
2049   typedef struct
2050   {
2051     uint16_t fftLen;                 /**< length of the FFT. */
2052     uint8_t ifftFlag;                /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */
2053     uint8_t bitReverseFlag;          /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */
2054     q15_t *pTwiddle;                     /**< points to the Sin twiddle factor table. */
2055     uint16_t *pBitRevTable;          /**< points to the bit reversal table. */
2056     uint16_t twidCoefModifier;       /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
2057     uint16_t bitRevFactor;           /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */
2058   } arm_cfft_radix2_instance_q15;
2059 
2060 /* Deprecated */
2061   arm_status arm_cfft_radix2_init_q15(
2062   arm_cfft_radix2_instance_q15 * S,
2063   uint16_t fftLen,
2064   uint8_t ifftFlag,
2065   uint8_t bitReverseFlag);
2066 
2067 /* Deprecated */
2068   void arm_cfft_radix2_q15(
2069   const arm_cfft_radix2_instance_q15 * S,
2070   q15_t * pSrc);
2071 
2072 
2073 
2074   /**
2075    * @brief Instance structure for the Q15 CFFT/CIFFT function.
2076    */
2077 
2078   typedef struct
2079   {
2080     uint16_t fftLen;                 /**< length of the FFT. */
2081     uint8_t ifftFlag;                /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */
2082     uint8_t bitReverseFlag;          /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */
2083     q15_t *pTwiddle;                 /**< points to the twiddle factor table. */
2084     uint16_t *pBitRevTable;          /**< points to the bit reversal table. */
2085     uint16_t twidCoefModifier;       /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
2086     uint16_t bitRevFactor;           /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */
2087   } arm_cfft_radix4_instance_q15;
2088 
2089 /* Deprecated */
2090   arm_status arm_cfft_radix4_init_q15(
2091   arm_cfft_radix4_instance_q15 * S,
2092   uint16_t fftLen,
2093   uint8_t ifftFlag,
2094   uint8_t bitReverseFlag);
2095 
2096 /* Deprecated */
2097   void arm_cfft_radix4_q15(
2098   const arm_cfft_radix4_instance_q15 * S,
2099   q15_t * pSrc);
2100 
2101   /**
2102    * @brief Instance structure for the Radix-2 Q31 CFFT/CIFFT function.
2103    */
2104 
2105   typedef struct
2106   {
2107     uint16_t fftLen;                 /**< length of the FFT. */
2108     uint8_t ifftFlag;                /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */
2109     uint8_t bitReverseFlag;          /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */
2110     q31_t *pTwiddle;                     /**< points to the Twiddle factor table. */
2111     uint16_t *pBitRevTable;          /**< points to the bit reversal table. */
2112     uint16_t twidCoefModifier;       /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
2113     uint16_t bitRevFactor;           /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */
2114   } arm_cfft_radix2_instance_q31;
2115 
2116 /* Deprecated */
2117   arm_status arm_cfft_radix2_init_q31(
2118   arm_cfft_radix2_instance_q31 * S,
2119   uint16_t fftLen,
2120   uint8_t ifftFlag,
2121   uint8_t bitReverseFlag);
2122 
2123 /* Deprecated */
2124   void arm_cfft_radix2_q31(
2125   const arm_cfft_radix2_instance_q31 * S,
2126   q31_t * pSrc);
2127 
2128   /**
2129    * @brief Instance structure for the Q31 CFFT/CIFFT function.
2130    */
2131 
2132   typedef struct
2133   {
2134     uint16_t fftLen;                 /**< length of the FFT. */
2135     uint8_t ifftFlag;                /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */
2136     uint8_t bitReverseFlag;          /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */
2137     q31_t *pTwiddle;                 /**< points to the twiddle factor table. */
2138     uint16_t *pBitRevTable;          /**< points to the bit reversal table. */
2139     uint16_t twidCoefModifier;       /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
2140     uint16_t bitRevFactor;           /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */
2141   } arm_cfft_radix4_instance_q31;
2142 
2143 /* Deprecated */
2144   void arm_cfft_radix4_q31(
2145   const arm_cfft_radix4_instance_q31 * S,
2146   q31_t * pSrc);
2147 
2148 /* Deprecated */
2149   arm_status arm_cfft_radix4_init_q31(
2150   arm_cfft_radix4_instance_q31 * S,
2151   uint16_t fftLen,
2152   uint8_t ifftFlag,
2153   uint8_t bitReverseFlag);
2154 
2155   /**
2156    * @brief Instance structure for the floating-point CFFT/CIFFT function.
2157    */
2158 
2159   typedef struct
2160   {
2161     uint16_t fftLen;                   /**< length of the FFT. */
2162     uint8_t ifftFlag;                  /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */
2163     uint8_t bitReverseFlag;            /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */
2164     float32_t *pTwiddle;               /**< points to the Twiddle factor table. */
2165     uint16_t *pBitRevTable;            /**< points to the bit reversal table. */
2166     uint16_t twidCoefModifier;         /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
2167     uint16_t bitRevFactor;             /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */
2168     float32_t onebyfftLen;                 /**< value of 1/fftLen. */
2169   } arm_cfft_radix2_instance_f32;
2170 
2171 /* Deprecated */
2172   arm_status arm_cfft_radix2_init_f32(
2173   arm_cfft_radix2_instance_f32 * S,
2174   uint16_t fftLen,
2175   uint8_t ifftFlag,
2176   uint8_t bitReverseFlag);
2177 
2178 /* Deprecated */
2179   void arm_cfft_radix2_f32(
2180   const arm_cfft_radix2_instance_f32 * S,
2181   float32_t * pSrc);
2182 
2183   /**
2184    * @brief Instance structure for the floating-point CFFT/CIFFT function.
2185    */
2186 
2187   typedef struct
2188   {
2189     uint16_t fftLen;                   /**< length of the FFT. */
2190     uint8_t ifftFlag;                  /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */
2191     uint8_t bitReverseFlag;            /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */
2192     float32_t *pTwiddle;               /**< points to the Twiddle factor table. */
2193     uint16_t *pBitRevTable;            /**< points to the bit reversal table. */
2194     uint16_t twidCoefModifier;         /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
2195     uint16_t bitRevFactor;             /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */
2196     float32_t onebyfftLen;                 /**< value of 1/fftLen. */
2197   } arm_cfft_radix4_instance_f32;
2198 
2199 /* Deprecated */
2200   arm_status arm_cfft_radix4_init_f32(
2201   arm_cfft_radix4_instance_f32 * S,
2202   uint16_t fftLen,
2203   uint8_t ifftFlag,
2204   uint8_t bitReverseFlag);
2205 
2206 /* Deprecated */
2207   void arm_cfft_radix4_f32(
2208   const arm_cfft_radix4_instance_f32 * S,
2209   float32_t * pSrc);
2210 
2211   /**
2212    * @brief Instance structure for the fixed-point CFFT/CIFFT function.
2213    */
2214 
2215   typedef struct
2216   {
2217     uint16_t fftLen;                   /**< length of the FFT. */
2218     const q15_t *pTwiddle;             /**< points to the Twiddle factor table. */
2219     const uint16_t *pBitRevTable;      /**< points to the bit reversal table. */
2220     uint16_t bitRevLength;             /**< bit reversal table length. */
2221   } arm_cfft_instance_q15;
2222 
2223 void arm_cfft_q15(
2224     const arm_cfft_instance_q15 * S,
2225     q15_t * p1,
2226     uint8_t ifftFlag,
2227     uint8_t bitReverseFlag);
2228 
2229   /**
2230    * @brief Instance structure for the fixed-point CFFT/CIFFT function.
2231    */
2232 
2233   typedef struct
2234   {
2235     uint16_t fftLen;                   /**< length of the FFT. */
2236     const q31_t *pTwiddle;             /**< points to the Twiddle factor table. */
2237     const uint16_t *pBitRevTable;      /**< points to the bit reversal table. */
2238     uint16_t bitRevLength;             /**< bit reversal table length. */
2239   } arm_cfft_instance_q31;
2240 
2241 void arm_cfft_q31(
2242     const arm_cfft_instance_q31 * S,
2243     q31_t * p1,
2244     uint8_t ifftFlag,
2245     uint8_t bitReverseFlag);
2246 
2247   /**
2248    * @brief Instance structure for the floating-point CFFT/CIFFT function.
2249    */
2250 
2251   typedef struct
2252   {
2253     uint16_t fftLen;                   /**< length of the FFT. */
2254     const float32_t *pTwiddle;         /**< points to the Twiddle factor table. */
2255     const uint16_t *pBitRevTable;      /**< points to the bit reversal table. */
2256     uint16_t bitRevLength;             /**< bit reversal table length. */
2257   } arm_cfft_instance_f32;
2258 
2259   void arm_cfft_f32(
2260   const arm_cfft_instance_f32 * S,
2261   float32_t * p1,
2262   uint8_t ifftFlag,
2263   uint8_t bitReverseFlag);
2264 
2265   /**
2266    * @brief Instance structure for the Q15 RFFT/RIFFT function.
2267    */
2268 
2269   typedef struct
2270   {
2271     uint32_t fftLenReal;                      /**< length of the real FFT. */
2272     uint8_t ifftFlagR;                        /**< flag that selects forward (ifftFlagR=0) or inverse (ifftFlagR=1) transform. */
2273     uint8_t bitReverseFlagR;                  /**< flag that enables (bitReverseFlagR=1) or disables (bitReverseFlagR=0) bit reversal of output. */
2274     uint32_t twidCoefRModifier;               /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
2275     q15_t *pTwiddleAReal;                     /**< points to the real twiddle factor table. */
2276     q15_t *pTwiddleBReal;                     /**< points to the imag twiddle factor table. */
2277     const arm_cfft_instance_q15 *pCfft;       /**< points to the complex FFT instance. */
2278   } arm_rfft_instance_q15;
2279 
2280   arm_status arm_rfft_init_q15(
2281   arm_rfft_instance_q15 * S,
2282   uint32_t fftLenReal,
2283   uint32_t ifftFlagR,
2284   uint32_t bitReverseFlag);
2285 
2286   void arm_rfft_q15(
2287   const arm_rfft_instance_q15 * S,
2288   q15_t * pSrc,
2289   q15_t * pDst);
2290 
2291   /**
2292    * @brief Instance structure for the Q31 RFFT/RIFFT function.
2293    */
2294 
2295   typedef struct
2296   {
2297     uint32_t fftLenReal;                        /**< length of the real FFT. */
2298     uint8_t ifftFlagR;                          /**< flag that selects forward (ifftFlagR=0) or inverse (ifftFlagR=1) transform. */
2299     uint8_t bitReverseFlagR;                    /**< flag that enables (bitReverseFlagR=1) or disables (bitReverseFlagR=0) bit reversal of output. */
2300     uint32_t twidCoefRModifier;                 /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
2301     q31_t *pTwiddleAReal;                       /**< points to the real twiddle factor table. */
2302     q31_t *pTwiddleBReal;                       /**< points to the imag twiddle factor table. */
2303     const arm_cfft_instance_q31 *pCfft;         /**< points to the complex FFT instance. */
2304   } arm_rfft_instance_q31;
2305 
2306   arm_status arm_rfft_init_q31(
2307   arm_rfft_instance_q31 * S,
2308   uint32_t fftLenReal,
2309   uint32_t ifftFlagR,
2310   uint32_t bitReverseFlag);
2311 
2312   void arm_rfft_q31(
2313   const arm_rfft_instance_q31 * S,
2314   q31_t * pSrc,
2315   q31_t * pDst);
2316 
2317   /**
2318    * @brief Instance structure for the floating-point RFFT/RIFFT function.
2319    */
2320 
2321   typedef struct
2322   {
2323     uint32_t fftLenReal;                        /**< length of the real FFT. */
2324     uint16_t fftLenBy2;                         /**< length of the complex FFT. */
2325     uint8_t ifftFlagR;                          /**< flag that selects forward (ifftFlagR=0) or inverse (ifftFlagR=1) transform. */
2326     uint8_t bitReverseFlagR;                    /**< flag that enables (bitReverseFlagR=1) or disables (bitReverseFlagR=0) bit reversal of output. */
2327     uint32_t twidCoefRModifier;                     /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
2328     float32_t *pTwiddleAReal;                   /**< points to the real twiddle factor table. */
2329     float32_t *pTwiddleBReal;                   /**< points to the imag twiddle factor table. */
2330     arm_cfft_radix4_instance_f32 *pCfft;        /**< points to the complex FFT instance. */
2331   } arm_rfft_instance_f32;
2332 
2333   arm_status arm_rfft_init_f32(
2334   arm_rfft_instance_f32 * S,
2335   arm_cfft_radix4_instance_f32 * S_CFFT,
2336   uint32_t fftLenReal,
2337   uint32_t ifftFlagR,
2338   uint32_t bitReverseFlag);
2339 
2340   void arm_rfft_f32(
2341   const arm_rfft_instance_f32 * S,
2342   float32_t * pSrc,
2343   float32_t * pDst);
2344 
2345   /**
2346    * @brief Instance structure for the floating-point RFFT/RIFFT function.
2347    */
2348 
2349 typedef struct
2350   {
2351     arm_cfft_instance_f32 Sint;      /**< Internal CFFT structure. */
2352     uint16_t fftLenRFFT;                        /**< length of the real sequence */
2353 	float32_t * pTwiddleRFFT;					/**< Twiddle factors real stage  */
2354   } arm_rfft_fast_instance_f32 ;
2355 
2356 arm_status arm_rfft_fast_init_f32 (
2357 	arm_rfft_fast_instance_f32 * S,
2358 	uint16_t fftLen);
2359 
2360 void arm_rfft_fast_f32(
2361   arm_rfft_fast_instance_f32 * S,
2362   float32_t * p, float32_t * pOut,
2363   uint8_t ifftFlag);
2364 
2365   /**
2366    * @brief Instance structure for the floating-point DCT4/IDCT4 function.
2367    */
2368 
2369   typedef struct
2370   {
2371     uint16_t N;                         /**< length of the DCT4. */
2372     uint16_t Nby2;                      /**< half of the length of the DCT4. */
2373     float32_t normalize;                /**< normalizing factor. */
2374     float32_t *pTwiddle;                /**< points to the twiddle factor table. */
2375     float32_t *pCosFactor;              /**< points to the cosFactor table. */
2376     arm_rfft_instance_f32 *pRfft;        /**< points to the real FFT instance. */
2377     arm_cfft_radix4_instance_f32 *pCfft; /**< points to the complex FFT instance. */
2378   } arm_dct4_instance_f32;
2379 
2380   /**
2381    * @brief  Initialization function for the floating-point DCT4/IDCT4.
2382    * @param[in,out] *S         points to an instance of floating-point DCT4/IDCT4 structure.
2383    * @param[in]     *S_RFFT    points to an instance of floating-point RFFT/RIFFT structure.
2384    * @param[in]     *S_CFFT    points to an instance of floating-point CFFT/CIFFT structure.
2385    * @param[in]     N          length of the DCT4.
2386    * @param[in]     Nby2       half of the length of the DCT4.
2387    * @param[in]     normalize  normalizing factor.
2388    * @return		arm_status function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_ARGUMENT_ERROR if <code>fftLenReal</code> is not a supported transform length.
2389    */
2390 
2391   arm_status arm_dct4_init_f32(
2392   arm_dct4_instance_f32 * S,
2393   arm_rfft_instance_f32 * S_RFFT,
2394   arm_cfft_radix4_instance_f32 * S_CFFT,
2395   uint16_t N,
2396   uint16_t Nby2,
2397   float32_t normalize);
2398 
2399   /**
2400    * @brief Processing function for the floating-point DCT4/IDCT4.
2401    * @param[in]       *S             points to an instance of the floating-point DCT4/IDCT4 structure.
2402    * @param[in]       *pState        points to state buffer.
2403    * @param[in,out]   *pInlineBuffer points to the in-place input and output buffer.
2404    * @return none.
2405    */
2406 
2407   void arm_dct4_f32(
2408   const arm_dct4_instance_f32 * S,
2409   float32_t * pState,
2410   float32_t * pInlineBuffer);
2411 
2412   /**
2413    * @brief Instance structure for the Q31 DCT4/IDCT4 function.
2414    */
2415 
2416   typedef struct
2417   {
2418     uint16_t N;                         /**< length of the DCT4. */
2419     uint16_t Nby2;                      /**< half of the length of the DCT4. */
2420     q31_t normalize;                    /**< normalizing factor. */
2421     q31_t *pTwiddle;                    /**< points to the twiddle factor table. */
2422     q31_t *pCosFactor;                  /**< points to the cosFactor table. */
2423     arm_rfft_instance_q31 *pRfft;        /**< points to the real FFT instance. */
2424     arm_cfft_radix4_instance_q31 *pCfft; /**< points to the complex FFT instance. */
2425   } arm_dct4_instance_q31;
2426 
2427   /**
2428    * @brief  Initialization function for the Q31 DCT4/IDCT4.
2429    * @param[in,out] *S         points to an instance of Q31 DCT4/IDCT4 structure.
2430    * @param[in]     *S_RFFT    points to an instance of Q31 RFFT/RIFFT structure
2431    * @param[in]     *S_CFFT    points to an instance of Q31 CFFT/CIFFT structure
2432    * @param[in]     N          length of the DCT4.
2433    * @param[in]     Nby2       half of the length of the DCT4.
2434    * @param[in]     normalize  normalizing factor.
2435    * @return		arm_status function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_ARGUMENT_ERROR if <code>N</code> is not a supported transform length.
2436    */
2437 
2438   arm_status arm_dct4_init_q31(
2439   arm_dct4_instance_q31 * S,
2440   arm_rfft_instance_q31 * S_RFFT,
2441   arm_cfft_radix4_instance_q31 * S_CFFT,
2442   uint16_t N,
2443   uint16_t Nby2,
2444   q31_t normalize);
2445 
2446   /**
2447    * @brief Processing function for the Q31 DCT4/IDCT4.
2448    * @param[in]       *S             points to an instance of the Q31 DCT4 structure.
2449    * @param[in]       *pState        points to state buffer.
2450    * @param[in,out]   *pInlineBuffer points to the in-place input and output buffer.
2451    * @return none.
2452    */
2453 
2454   void arm_dct4_q31(
2455   const arm_dct4_instance_q31 * S,
2456   q31_t * pState,
2457   q31_t * pInlineBuffer);
2458 
2459   /**
2460    * @brief Instance structure for the Q15 DCT4/IDCT4 function.
2461    */
2462 
2463   typedef struct
2464   {
2465     uint16_t N;                         /**< length of the DCT4. */
2466     uint16_t Nby2;                      /**< half of the length of the DCT4. */
2467     q15_t normalize;                    /**< normalizing factor. */
2468     q15_t *pTwiddle;                    /**< points to the twiddle factor table. */
2469     q15_t *pCosFactor;                  /**< points to the cosFactor table. */
2470     arm_rfft_instance_q15 *pRfft;        /**< points to the real FFT instance. */
2471     arm_cfft_radix4_instance_q15 *pCfft; /**< points to the complex FFT instance. */
2472   } arm_dct4_instance_q15;
2473 
2474   /**
2475    * @brief  Initialization function for the Q15 DCT4/IDCT4.
2476    * @param[in,out] *S         points to an instance of Q15 DCT4/IDCT4 structure.
2477    * @param[in]     *S_RFFT    points to an instance of Q15 RFFT/RIFFT structure.
2478    * @param[in]     *S_CFFT    points to an instance of Q15 CFFT/CIFFT structure.
2479    * @param[in]     N          length of the DCT4.
2480    * @param[in]     Nby2       half of the length of the DCT4.
2481    * @param[in]     normalize  normalizing factor.
2482    * @return		arm_status function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_ARGUMENT_ERROR if <code>N</code> is not a supported transform length.
2483    */
2484 
2485   arm_status arm_dct4_init_q15(
2486   arm_dct4_instance_q15 * S,
2487   arm_rfft_instance_q15 * S_RFFT,
2488   arm_cfft_radix4_instance_q15 * S_CFFT,
2489   uint16_t N,
2490   uint16_t Nby2,
2491   q15_t normalize);
2492 
2493   /**
2494    * @brief Processing function for the Q15 DCT4/IDCT4.
2495    * @param[in]       *S             points to an instance of the Q15 DCT4 structure.
2496    * @param[in]       *pState        points to state buffer.
2497    * @param[in,out]   *pInlineBuffer points to the in-place input and output buffer.
2498    * @return none.
2499    */
2500 
2501   void arm_dct4_q15(
2502   const arm_dct4_instance_q15 * S,
2503   q15_t * pState,
2504   q15_t * pInlineBuffer);
2505 
2506   /**
2507    * @brief Floating-point vector addition.
2508    * @param[in]       *pSrcA points to the first input vector
2509    * @param[in]       *pSrcB points to the second input vector
2510    * @param[out]      *pDst points to the output vector
2511    * @param[in]       blockSize number of samples in each vector
2512    * @return none.
2513    */
2514 
2515   void arm_add_f32(
2516   float32_t * pSrcA,
2517   float32_t * pSrcB,
2518   float32_t * pDst,
2519   uint32_t blockSize);
2520 
2521   /**
2522    * @brief Q7 vector addition.
2523    * @param[in]       *pSrcA points to the first input vector
2524    * @param[in]       *pSrcB points to the second input vector
2525    * @param[out]      *pDst points to the output vector
2526    * @param[in]       blockSize number of samples in each vector
2527    * @return none.
2528    */
2529 
2530   void arm_add_q7(
2531   q7_t * pSrcA,
2532   q7_t * pSrcB,
2533   q7_t * pDst,
2534   uint32_t blockSize);
2535 
2536   /**
2537    * @brief Q15 vector addition.
2538    * @param[in]       *pSrcA points to the first input vector
2539    * @param[in]       *pSrcB points to the second input vector
2540    * @param[out]      *pDst points to the output vector
2541    * @param[in]       blockSize number of samples in each vector
2542    * @return none.
2543    */
2544 
2545   void arm_add_q15(
2546   q15_t * pSrcA,
2547   q15_t * pSrcB,
2548   q15_t * pDst,
2549   uint32_t blockSize);
2550 
2551   /**
2552    * @brief Q31 vector addition.
2553    * @param[in]       *pSrcA points to the first input vector
2554    * @param[in]       *pSrcB points to the second input vector
2555    * @param[out]      *pDst points to the output vector
2556    * @param[in]       blockSize number of samples in each vector
2557    * @return none.
2558    */
2559 
2560   void arm_add_q31(
2561   q31_t * pSrcA,
2562   q31_t * pSrcB,
2563   q31_t * pDst,
2564   uint32_t blockSize);
2565 
2566   /**
2567    * @brief Floating-point vector subtraction.
2568    * @param[in]       *pSrcA points to the first input vector
2569    * @param[in]       *pSrcB points to the second input vector
2570    * @param[out]      *pDst points to the output vector
2571    * @param[in]       blockSize number of samples in each vector
2572    * @return none.
2573    */
2574 
2575   void arm_sub_f32(
2576   float32_t * pSrcA,
2577   float32_t * pSrcB,
2578   float32_t * pDst,
2579   uint32_t blockSize);
2580 
2581   /**
2582    * @brief Q7 vector subtraction.
2583    * @param[in]       *pSrcA points to the first input vector
2584    * @param[in]       *pSrcB points to the second input vector
2585    * @param[out]      *pDst points to the output vector
2586    * @param[in]       blockSize number of samples in each vector
2587    * @return none.
2588    */
2589 
2590   void arm_sub_q7(
2591   q7_t * pSrcA,
2592   q7_t * pSrcB,
2593   q7_t * pDst,
2594   uint32_t blockSize);
2595 
2596   /**
2597    * @brief Q15 vector subtraction.
2598    * @param[in]       *pSrcA points to the first input vector
2599    * @param[in]       *pSrcB points to the second input vector
2600    * @param[out]      *pDst points to the output vector
2601    * @param[in]       blockSize number of samples in each vector
2602    * @return none.
2603    */
2604 
2605   void arm_sub_q15(
2606   q15_t * pSrcA,
2607   q15_t * pSrcB,
2608   q15_t * pDst,
2609   uint32_t blockSize);
2610 
2611   /**
2612    * @brief Q31 vector subtraction.
2613    * @param[in]       *pSrcA points to the first input vector
2614    * @param[in]       *pSrcB points to the second input vector
2615    * @param[out]      *pDst points to the output vector
2616    * @param[in]       blockSize number of samples in each vector
2617    * @return none.
2618    */
2619 
2620   void arm_sub_q31(
2621   q31_t * pSrcA,
2622   q31_t * pSrcB,
2623   q31_t * pDst,
2624   uint32_t blockSize);
2625 
2626   /**
2627    * @brief Multiplies a floating-point vector by a scalar.
2628    * @param[in]       *pSrc points to the input vector
2629    * @param[in]       scale scale factor to be applied
2630    * @param[out]      *pDst points to the output vector
2631    * @param[in]       blockSize number of samples in the vector
2632    * @return none.
2633    */
2634 
2635   void arm_scale_f32(
2636   float32_t * pSrc,
2637   float32_t scale,
2638   float32_t * pDst,
2639   uint32_t blockSize);
2640 
2641   /**
2642    * @brief Multiplies a Q7 vector by a scalar.
2643    * @param[in]       *pSrc points to the input vector
2644    * @param[in]       scaleFract fractional portion of the scale value
2645    * @param[in]       shift number of bits to shift the result by
2646    * @param[out]      *pDst points to the output vector
2647    * @param[in]       blockSize number of samples in the vector
2648    * @return none.
2649    */
2650 
2651   void arm_scale_q7(
2652   q7_t * pSrc,
2653   q7_t scaleFract,
2654   int8_t shift,
2655   q7_t * pDst,
2656   uint32_t blockSize);
2657 
2658   /**
2659    * @brief Multiplies a Q15 vector by a scalar.
2660    * @param[in]       *pSrc points to the input vector
2661    * @param[in]       scaleFract fractional portion of the scale value
2662    * @param[in]       shift number of bits to shift the result by
2663    * @param[out]      *pDst points to the output vector
2664    * @param[in]       blockSize number of samples in the vector
2665    * @return none.
2666    */
2667 
2668   void arm_scale_q15(
2669   q15_t * pSrc,
2670   q15_t scaleFract,
2671   int8_t shift,
2672   q15_t * pDst,
2673   uint32_t blockSize);
2674 
2675   /**
2676    * @brief Multiplies a Q31 vector by a scalar.
2677    * @param[in]       *pSrc points to the input vector
2678    * @param[in]       scaleFract fractional portion of the scale value
2679    * @param[in]       shift number of bits to shift the result by
2680    * @param[out]      *pDst points to the output vector
2681    * @param[in]       blockSize number of samples in the vector
2682    * @return none.
2683    */
2684 
2685   void arm_scale_q31(
2686   q31_t * pSrc,
2687   q31_t scaleFract,
2688   int8_t shift,
2689   q31_t * pDst,
2690   uint32_t blockSize);
2691 
2692   /**
2693    * @brief Q7 vector absolute value.
2694    * @param[in]       *pSrc points to the input buffer
2695    * @param[out]      *pDst points to the output buffer
2696    * @param[in]       blockSize number of samples in each vector
2697    * @return none.
2698    */
2699 
2700   void arm_abs_q7(
2701   q7_t * pSrc,
2702   q7_t * pDst,
2703   uint32_t blockSize);
2704 
2705   /**
2706    * @brief Floating-point vector absolute value.
2707    * @param[in]       *pSrc points to the input buffer
2708    * @param[out]      *pDst points to the output buffer
2709    * @param[in]       blockSize number of samples in each vector
2710    * @return none.
2711    */
2712 
2713   void arm_abs_f32(
2714   float32_t * pSrc,
2715   float32_t * pDst,
2716   uint32_t blockSize);
2717 
2718   /**
2719    * @brief Q15 vector absolute value.
2720    * @param[in]       *pSrc points to the input buffer
2721    * @param[out]      *pDst points to the output buffer
2722    * @param[in]       blockSize number of samples in each vector
2723    * @return none.
2724    */
2725 
2726   void arm_abs_q15(
2727   q15_t * pSrc,
2728   q15_t * pDst,
2729   uint32_t blockSize);
2730 
2731   /**
2732    * @brief Q31 vector absolute value.
2733    * @param[in]       *pSrc points to the input buffer
2734    * @param[out]      *pDst points to the output buffer
2735    * @param[in]       blockSize number of samples in each vector
2736    * @return none.
2737    */
2738 
2739   void arm_abs_q31(
2740   q31_t * pSrc,
2741   q31_t * pDst,
2742   uint32_t blockSize);
2743 
2744   /**
2745    * @brief Dot product of floating-point vectors.
2746    * @param[in]       *pSrcA points to the first input vector
2747    * @param[in]       *pSrcB points to the second input vector
2748    * @param[in]       blockSize number of samples in each vector
2749    * @param[out]      *result output result returned here
2750    * @return none.
2751    */
2752 
2753   void arm_dot_prod_f32(
2754   float32_t * pSrcA,
2755   float32_t * pSrcB,
2756   uint32_t blockSize,
2757   float32_t * result);
2758 
2759   /**
2760    * @brief Dot product of Q7 vectors.
2761    * @param[in]       *pSrcA points to the first input vector
2762    * @param[in]       *pSrcB points to the second input vector
2763    * @param[in]       blockSize number of samples in each vector
2764    * @param[out]      *result output result returned here
2765    * @return none.
2766    */
2767 
2768   void arm_dot_prod_q7(
2769   q7_t * pSrcA,
2770   q7_t * pSrcB,
2771   uint32_t blockSize,
2772   q31_t * result);
2773 
2774   /**
2775    * @brief Dot product of Q15 vectors.
2776    * @param[in]       *pSrcA points to the first input vector
2777    * @param[in]       *pSrcB points to the second input vector
2778    * @param[in]       blockSize number of samples in each vector
2779    * @param[out]      *result output result returned here
2780    * @return none.
2781    */
2782 
2783   void arm_dot_prod_q15(
2784   q15_t * pSrcA,
2785   q15_t * pSrcB,
2786   uint32_t blockSize,
2787   q63_t * result);
2788 
2789   /**
2790    * @brief Dot product of Q31 vectors.
2791    * @param[in]       *pSrcA points to the first input vector
2792    * @param[in]       *pSrcB points to the second input vector
2793    * @param[in]       blockSize number of samples in each vector
2794    * @param[out]      *result output result returned here
2795    * @return none.
2796    */
2797 
2798   void arm_dot_prod_q31(
2799   q31_t * pSrcA,
2800   q31_t * pSrcB,
2801   uint32_t blockSize,
2802   q63_t * result);
2803 
2804   /**
2805    * @brief  Shifts the elements of a Q7 vector a specified number of bits.
2806    * @param[in]  *pSrc points to the input vector
2807    * @param[in]  shiftBits number of bits to shift.  A positive value shifts left; a negative value shifts right.
2808    * @param[out]  *pDst points to the output vector
2809    * @param[in]  blockSize number of samples in the vector
2810    * @return none.
2811    */
2812 
2813   void arm_shift_q7(
2814   q7_t * pSrc,
2815   int8_t shiftBits,
2816   q7_t * pDst,
2817   uint32_t blockSize);
2818 
2819   /**
2820    * @brief  Shifts the elements of a Q15 vector a specified number of bits.
2821    * @param[in]  *pSrc points to the input vector
2822    * @param[in]  shiftBits number of bits to shift.  A positive value shifts left; a negative value shifts right.
2823    * @param[out]  *pDst points to the output vector
2824    * @param[in]  blockSize number of samples in the vector
2825    * @return none.
2826    */
2827 
2828   void arm_shift_q15(
2829   q15_t * pSrc,
2830   int8_t shiftBits,
2831   q15_t * pDst,
2832   uint32_t blockSize);
2833 
2834   /**
2835    * @brief  Shifts the elements of a Q31 vector a specified number of bits.
2836    * @param[in]  *pSrc points to the input vector
2837    * @param[in]  shiftBits number of bits to shift.  A positive value shifts left; a negative value shifts right.
2838    * @param[out]  *pDst points to the output vector
2839    * @param[in]  blockSize number of samples in the vector
2840    * @return none.
2841    */
2842 
2843   void arm_shift_q31(
2844   q31_t * pSrc,
2845   int8_t shiftBits,
2846   q31_t * pDst,
2847   uint32_t blockSize);
2848 
2849   /**
2850    * @brief  Adds a constant offset to a floating-point vector.
2851    * @param[in]  *pSrc points to the input vector
2852    * @param[in]  offset is the offset to be added
2853    * @param[out]  *pDst points to the output vector
2854    * @param[in]  blockSize number of samples in the vector
2855    * @return none.
2856    */
2857 
2858   void arm_offset_f32(
2859   float32_t * pSrc,
2860   float32_t offset,
2861   float32_t * pDst,
2862   uint32_t blockSize);
2863 
2864   /**
2865    * @brief  Adds a constant offset to a Q7 vector.
2866    * @param[in]  *pSrc points to the input vector
2867    * @param[in]  offset is the offset to be added
2868    * @param[out]  *pDst points to the output vector
2869    * @param[in]  blockSize number of samples in the vector
2870    * @return none.
2871    */
2872 
2873   void arm_offset_q7(
2874   q7_t * pSrc,
2875   q7_t offset,
2876   q7_t * pDst,
2877   uint32_t blockSize);
2878 
2879   /**
2880    * @brief  Adds a constant offset to a Q15 vector.
2881    * @param[in]  *pSrc points to the input vector
2882    * @param[in]  offset is the offset to be added
2883    * @param[out]  *pDst points to the output vector
2884    * @param[in]  blockSize number of samples in the vector
2885    * @return none.
2886    */
2887 
2888   void arm_offset_q15(
2889   q15_t * pSrc,
2890   q15_t offset,
2891   q15_t * pDst,
2892   uint32_t blockSize);
2893 
2894   /**
2895    * @brief  Adds a constant offset to a Q31 vector.
2896    * @param[in]  *pSrc points to the input vector
2897    * @param[in]  offset is the offset to be added
2898    * @param[out]  *pDst points to the output vector
2899    * @param[in]  blockSize number of samples in the vector
2900    * @return none.
2901    */
2902 
2903   void arm_offset_q31(
2904   q31_t * pSrc,
2905   q31_t offset,
2906   q31_t * pDst,
2907   uint32_t blockSize);
2908 
2909   /**
2910    * @brief  Negates the elements of a floating-point vector.
2911    * @param[in]  *pSrc points to the input vector
2912    * @param[out]  *pDst points to the output vector
2913    * @param[in]  blockSize number of samples in the vector
2914    * @return none.
2915    */
2916 
2917   void arm_negate_f32(
2918   float32_t * pSrc,
2919   float32_t * pDst,
2920   uint32_t blockSize);
2921 
2922   /**
2923    * @brief  Negates the elements of a Q7 vector.
2924    * @param[in]  *pSrc points to the input vector
2925    * @param[out]  *pDst points to the output vector
2926    * @param[in]  blockSize number of samples in the vector
2927    * @return none.
2928    */
2929 
2930   void arm_negate_q7(
2931   q7_t * pSrc,
2932   q7_t * pDst,
2933   uint32_t blockSize);
2934 
2935   /**
2936    * @brief  Negates the elements of a Q15 vector.
2937    * @param[in]  *pSrc points to the input vector
2938    * @param[out]  *pDst points to the output vector
2939    * @param[in]  blockSize number of samples in the vector
2940    * @return none.
2941    */
2942 
2943   void arm_negate_q15(
2944   q15_t * pSrc,
2945   q15_t * pDst,
2946   uint32_t blockSize);
2947 
2948   /**
2949    * @brief  Negates the elements of a Q31 vector.
2950    * @param[in]  *pSrc points to the input vector
2951    * @param[out]  *pDst points to the output vector
2952    * @param[in]  blockSize number of samples in the vector
2953    * @return none.
2954    */
2955 
2956   void arm_negate_q31(
2957   q31_t * pSrc,
2958   q31_t * pDst,
2959   uint32_t blockSize);
2960   /**
2961    * @brief  Copies the elements of a floating-point vector.
2962    * @param[in]  *pSrc input pointer
2963    * @param[out]  *pDst output pointer
2964    * @param[in]  blockSize number of samples to process
2965    * @return none.
2966    */
2967   void arm_copy_f32(
2968   float32_t * pSrc,
2969   float32_t * pDst,
2970   uint32_t blockSize);
2971 
2972   /**
2973    * @brief  Copies the elements of a Q7 vector.
2974    * @param[in]  *pSrc input pointer
2975    * @param[out]  *pDst output pointer
2976    * @param[in]  blockSize number of samples to process
2977    * @return none.
2978    */
2979   void arm_copy_q7(
2980   q7_t * pSrc,
2981   q7_t * pDst,
2982   uint32_t blockSize);
2983 
2984   /**
2985    * @brief  Copies the elements of a Q15 vector.
2986    * @param[in]  *pSrc input pointer
2987    * @param[out]  *pDst output pointer
2988    * @param[in]  blockSize number of samples to process
2989    * @return none.
2990    */
2991   void arm_copy_q15(
2992   q15_t * pSrc,
2993   q15_t * pDst,
2994   uint32_t blockSize);
2995 
2996   /**
2997    * @brief  Copies the elements of a Q31 vector.
2998    * @param[in]  *pSrc input pointer
2999    * @param[out]  *pDst output pointer
3000    * @param[in]  blockSize number of samples to process
3001    * @return none.
3002    */
3003   void arm_copy_q31(
3004   q31_t * pSrc,
3005   q31_t * pDst,
3006   uint32_t blockSize);
3007   /**
3008    * @brief  Fills a constant value into a floating-point vector.
3009    * @param[in]  value input value to be filled
3010    * @param[out]  *pDst output pointer
3011    * @param[in]  blockSize number of samples to process
3012    * @return none.
3013    */
3014   void arm_fill_f32(
3015   float32_t value,
3016   float32_t * pDst,
3017   uint32_t blockSize);
3018 
3019   /**
3020    * @brief  Fills a constant value into a Q7 vector.
3021    * @param[in]  value input value to be filled
3022    * @param[out]  *pDst output pointer
3023    * @param[in]  blockSize number of samples to process
3024    * @return none.
3025    */
3026   void arm_fill_q7(
3027   q7_t value,
3028   q7_t * pDst,
3029   uint32_t blockSize);
3030 
3031   /**
3032    * @brief  Fills a constant value into a Q15 vector.
3033    * @param[in]  value input value to be filled
3034    * @param[out]  *pDst output pointer
3035    * @param[in]  blockSize number of samples to process
3036    * @return none.
3037    */
3038   void arm_fill_q15(
3039   q15_t value,
3040   q15_t * pDst,
3041   uint32_t blockSize);
3042 
3043   /**
3044    * @brief  Fills a constant value into a Q31 vector.
3045    * @param[in]  value input value to be filled
3046    * @param[out]  *pDst output pointer
3047    * @param[in]  blockSize number of samples to process
3048    * @return none.
3049    */
3050   void arm_fill_q31(
3051   q31_t value,
3052   q31_t * pDst,
3053   uint32_t blockSize);
3054 
3055 /**
3056  * @brief Convolution of floating-point sequences.
3057  * @param[in] *pSrcA points to the first input sequence.
3058  * @param[in] srcALen length of the first input sequence.
3059  * @param[in] *pSrcB points to the second input sequence.
3060  * @param[in] srcBLen length of the second input sequence.
3061  * @param[out] *pDst points to the location where the output result is written.  Length srcALen+srcBLen-1.
3062  * @return none.
3063  */
3064 
3065   void arm_conv_f32(
3066   float32_t * pSrcA,
3067   uint32_t srcALen,
3068   float32_t * pSrcB,
3069   uint32_t srcBLen,
3070   float32_t * pDst);
3071 
3072 
3073   /**
3074    * @brief Convolution of Q15 sequences.
3075    * @param[in] *pSrcA points to the first input sequence.
3076    * @param[in] srcALen length of the first input sequence.
3077    * @param[in] *pSrcB points to the second input sequence.
3078    * @param[in] srcBLen length of the second input sequence.
3079    * @param[out] *pDst points to the block of output data  Length srcALen+srcBLen-1.
3080    * @param[in]  *pScratch1 points to scratch buffer of size max(srcALen, srcBLen) + 2*min(srcALen, srcBLen) - 2.
3081    * @param[in]  *pScratch2 points to scratch buffer of size min(srcALen, srcBLen).
3082    * @return none.
3083    */
3084 
3085 
3086   void arm_conv_opt_q15(
3087   q15_t * pSrcA,
3088   uint32_t srcALen,
3089   q15_t * pSrcB,
3090   uint32_t srcBLen,
3091   q15_t * pDst,
3092   q15_t * pScratch1,
3093   q15_t * pScratch2);
3094 
3095 
3096 /**
3097  * @brief Convolution of Q15 sequences.
3098  * @param[in] *pSrcA points to the first input sequence.
3099  * @param[in] srcALen length of the first input sequence.
3100  * @param[in] *pSrcB points to the second input sequence.
3101  * @param[in] srcBLen length of the second input sequence.
3102  * @param[out] *pDst points to the location where the output result is written.  Length srcALen+srcBLen-1.
3103  * @return none.
3104  */
3105 
3106   void arm_conv_q15(
3107   q15_t * pSrcA,
3108   uint32_t srcALen,
3109   q15_t * pSrcB,
3110   uint32_t srcBLen,
3111   q15_t * pDst);
3112 
3113   /**
3114    * @brief Convolution of Q15 sequences (fast version) for Cortex-M3 and Cortex-M4
3115    * @param[in] *pSrcA points to the first input sequence.
3116    * @param[in] srcALen length of the first input sequence.
3117    * @param[in] *pSrcB points to the second input sequence.
3118    * @param[in] srcBLen length of the second input sequence.
3119    * @param[out] *pDst points to the block of output data  Length srcALen+srcBLen-1.
3120    * @return none.
3121    */
3122 
3123   void arm_conv_fast_q15(
3124 			  q15_t * pSrcA,
3125 			 uint32_t srcALen,
3126 			  q15_t * pSrcB,
3127 			 uint32_t srcBLen,
3128 			 q15_t * pDst);
3129 
3130   /**
3131    * @brief Convolution of Q15 sequences (fast version) for Cortex-M3 and Cortex-M4
3132    * @param[in] *pSrcA points to the first input sequence.
3133    * @param[in] srcALen length of the first input sequence.
3134    * @param[in] *pSrcB points to the second input sequence.
3135    * @param[in] srcBLen length of the second input sequence.
3136    * @param[out] *pDst points to the block of output data  Length srcALen+srcBLen-1.
3137    * @param[in]  *pScratch1 points to scratch buffer of size max(srcALen, srcBLen) + 2*min(srcALen, srcBLen) - 2.
3138    * @param[in]  *pScratch2 points to scratch buffer of size min(srcALen, srcBLen).
3139    * @return none.
3140    */
3141 
3142   void arm_conv_fast_opt_q15(
3143   q15_t * pSrcA,
3144   uint32_t srcALen,
3145   q15_t * pSrcB,
3146   uint32_t srcBLen,
3147   q15_t * pDst,
3148   q15_t * pScratch1,
3149   q15_t * pScratch2);
3150 
3151 
3152 
3153   /**
3154    * @brief Convolution of Q31 sequences.
3155    * @param[in] *pSrcA points to the first input sequence.
3156    * @param[in] srcALen length of the first input sequence.
3157    * @param[in] *pSrcB points to the second input sequence.
3158    * @param[in] srcBLen length of the second input sequence.
3159    * @param[out] *pDst points to the block of output data  Length srcALen+srcBLen-1.
3160    * @return none.
3161    */
3162 
3163   void arm_conv_q31(
3164   q31_t * pSrcA,
3165   uint32_t srcALen,
3166   q31_t * pSrcB,
3167   uint32_t srcBLen,
3168   q31_t * pDst);
3169 
3170   /**
3171    * @brief Convolution of Q31 sequences (fast version) for Cortex-M3 and Cortex-M4
3172    * @param[in] *pSrcA points to the first input sequence.
3173    * @param[in] srcALen length of the first input sequence.
3174    * @param[in] *pSrcB points to the second input sequence.
3175    * @param[in] srcBLen length of the second input sequence.
3176    * @param[out] *pDst points to the block of output data  Length srcALen+srcBLen-1.
3177    * @return none.
3178    */
3179 
3180   void arm_conv_fast_q31(
3181   q31_t * pSrcA,
3182   uint32_t srcALen,
3183   q31_t * pSrcB,
3184   uint32_t srcBLen,
3185   q31_t * pDst);
3186 
3187 
3188     /**
3189    * @brief Convolution of Q7 sequences.
3190    * @param[in] *pSrcA points to the first input sequence.
3191    * @param[in] srcALen length of the first input sequence.
3192    * @param[in] *pSrcB points to the second input sequence.
3193    * @param[in] srcBLen length of the second input sequence.
3194    * @param[out] *pDst points to the block of output data  Length srcALen+srcBLen-1.
3195    * @param[in]  *pScratch1 points to scratch buffer(of type q15_t) of size max(srcALen, srcBLen) + 2*min(srcALen, srcBLen) - 2.
3196    * @param[in]  *pScratch2 points to scratch buffer (of type q15_t) of size min(srcALen, srcBLen).
3197    * @return none.
3198    */
3199 
3200   void arm_conv_opt_q7(
3201   q7_t * pSrcA,
3202   uint32_t srcALen,
3203   q7_t * pSrcB,
3204   uint32_t srcBLen,
3205   q7_t * pDst,
3206   q15_t * pScratch1,
3207   q15_t * pScratch2);
3208 
3209 
3210 
3211   /**
3212    * @brief Convolution of Q7 sequences.
3213    * @param[in] *pSrcA points to the first input sequence.
3214    * @param[in] srcALen length of the first input sequence.
3215    * @param[in] *pSrcB points to the second input sequence.
3216    * @param[in] srcBLen length of the second input sequence.
3217    * @param[out] *pDst points to the block of output data  Length srcALen+srcBLen-1.
3218    * @return none.
3219    */
3220 
3221   void arm_conv_q7(
3222   q7_t * pSrcA,
3223   uint32_t srcALen,
3224   q7_t * pSrcB,
3225   uint32_t srcBLen,
3226   q7_t * pDst);
3227 
3228 
3229   /**
3230    * @brief Partial convolution of floating-point sequences.
3231    * @param[in]       *pSrcA points to the first input sequence.
3232    * @param[in]       srcALen length of the first input sequence.
3233    * @param[in]       *pSrcB points to the second input sequence.
3234    * @param[in]       srcBLen length of the second input sequence.
3235    * @param[out]      *pDst points to the block of output data
3236    * @param[in]       firstIndex is the first output sample to start with.
3237    * @param[in]       numPoints is the number of output points to be computed.
3238    * @return  Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2].
3239    */
3240 
3241   arm_status arm_conv_partial_f32(
3242   float32_t * pSrcA,
3243   uint32_t srcALen,
3244   float32_t * pSrcB,
3245   uint32_t srcBLen,
3246   float32_t * pDst,
3247   uint32_t firstIndex,
3248   uint32_t numPoints);
3249 
3250     /**
3251    * @brief Partial convolution of Q15 sequences.
3252    * @param[in]       *pSrcA points to the first input sequence.
3253    * @param[in]       srcALen length of the first input sequence.
3254    * @param[in]       *pSrcB points to the second input sequence.
3255    * @param[in]       srcBLen length of the second input sequence.
3256    * @param[out]      *pDst points to the block of output data
3257    * @param[in]       firstIndex is the first output sample to start with.
3258    * @param[in]       numPoints is the number of output points to be computed.
3259    * @param[in]       * pScratch1 points to scratch buffer of size max(srcALen, srcBLen) + 2*min(srcALen, srcBLen) - 2.
3260    * @param[in]       * pScratch2 points to scratch buffer of size min(srcALen, srcBLen).
3261    * @return  Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2].
3262    */
3263 
3264   arm_status arm_conv_partial_opt_q15(
3265   q15_t * pSrcA,
3266   uint32_t srcALen,
3267   q15_t * pSrcB,
3268   uint32_t srcBLen,
3269   q15_t * pDst,
3270   uint32_t firstIndex,
3271   uint32_t numPoints,
3272   q15_t * pScratch1,
3273   q15_t * pScratch2);
3274 
3275 
3276 /**
3277    * @brief Partial convolution of Q15 sequences.
3278    * @param[in]       *pSrcA points to the first input sequence.
3279    * @param[in]       srcALen length of the first input sequence.
3280    * @param[in]       *pSrcB points to the second input sequence.
3281    * @param[in]       srcBLen length of the second input sequence.
3282    * @param[out]      *pDst points to the block of output data
3283    * @param[in]       firstIndex is the first output sample to start with.
3284    * @param[in]       numPoints is the number of output points to be computed.
3285    * @return  Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2].
3286    */
3287 
3288   arm_status arm_conv_partial_q15(
3289   q15_t * pSrcA,
3290   uint32_t srcALen,
3291   q15_t * pSrcB,
3292   uint32_t srcBLen,
3293   q15_t * pDst,
3294   uint32_t firstIndex,
3295   uint32_t numPoints);
3296 
3297   /**
3298    * @brief Partial convolution of Q15 sequences (fast version) for Cortex-M3 and Cortex-M4
3299    * @param[in]       *pSrcA points to the first input sequence.
3300    * @param[in]       srcALen length of the first input sequence.
3301    * @param[in]       *pSrcB points to the second input sequence.
3302    * @param[in]       srcBLen length of the second input sequence.
3303    * @param[out]      *pDst points to the block of output data
3304    * @param[in]       firstIndex is the first output sample to start with.
3305    * @param[in]       numPoints is the number of output points to be computed.
3306    * @return  Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2].
3307    */
3308 
3309   arm_status arm_conv_partial_fast_q15(
3310 				        q15_t * pSrcA,
3311 				       uint32_t srcALen,
3312 				        q15_t * pSrcB,
3313 				       uint32_t srcBLen,
3314 				       q15_t * pDst,
3315 				       uint32_t firstIndex,
3316 				       uint32_t numPoints);
3317 
3318 
3319   /**
3320    * @brief Partial convolution of Q15 sequences (fast version) for Cortex-M3 and Cortex-M4
3321    * @param[in]       *pSrcA points to the first input sequence.
3322    * @param[in]       srcALen length of the first input sequence.
3323    * @param[in]       *pSrcB points to the second input sequence.
3324    * @param[in]       srcBLen length of the second input sequence.
3325    * @param[out]      *pDst points to the block of output data
3326    * @param[in]       firstIndex is the first output sample to start with.
3327    * @param[in]       numPoints is the number of output points to be computed.
3328    * @param[in]       * pScratch1 points to scratch buffer of size max(srcALen, srcBLen) + 2*min(srcALen, srcBLen) - 2.
3329    * @param[in]       * pScratch2 points to scratch buffer of size min(srcALen, srcBLen).
3330    * @return  Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2].
3331    */
3332 
3333   arm_status arm_conv_partial_fast_opt_q15(
3334   q15_t * pSrcA,
3335   uint32_t srcALen,
3336   q15_t * pSrcB,
3337   uint32_t srcBLen,
3338   q15_t * pDst,
3339   uint32_t firstIndex,
3340   uint32_t numPoints,
3341   q15_t * pScratch1,
3342   q15_t * pScratch2);
3343 
3344 
3345   /**
3346    * @brief Partial convolution of Q31 sequences.
3347    * @param[in]       *pSrcA points to the first input sequence.
3348    * @param[in]       srcALen length of the first input sequence.
3349    * @param[in]       *pSrcB points to the second input sequence.
3350    * @param[in]       srcBLen length of the second input sequence.
3351    * @param[out]      *pDst points to the block of output data
3352    * @param[in]       firstIndex is the first output sample to start with.
3353    * @param[in]       numPoints is the number of output points to be computed.
3354    * @return  Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2].
3355    */
3356 
3357   arm_status arm_conv_partial_q31(
3358   q31_t * pSrcA,
3359   uint32_t srcALen,
3360   q31_t * pSrcB,
3361   uint32_t srcBLen,
3362   q31_t * pDst,
3363   uint32_t firstIndex,
3364   uint32_t numPoints);
3365 
3366 
3367   /**
3368    * @brief Partial convolution of Q31 sequences (fast version) for Cortex-M3 and Cortex-M4
3369    * @param[in]       *pSrcA points to the first input sequence.
3370    * @param[in]       srcALen length of the first input sequence.
3371    * @param[in]       *pSrcB points to the second input sequence.
3372    * @param[in]       srcBLen length of the second input sequence.
3373    * @param[out]      *pDst points to the block of output data
3374    * @param[in]       firstIndex is the first output sample to start with.
3375    * @param[in]       numPoints is the number of output points to be computed.
3376    * @return  Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2].
3377    */
3378 
3379   arm_status arm_conv_partial_fast_q31(
3380   q31_t * pSrcA,
3381   uint32_t srcALen,
3382   q31_t * pSrcB,
3383   uint32_t srcBLen,
3384   q31_t * pDst,
3385   uint32_t firstIndex,
3386   uint32_t numPoints);
3387 
3388 
3389   /**
3390    * @brief Partial convolution of Q7 sequences
3391    * @param[in]       *pSrcA points to the first input sequence.
3392    * @param[in]       srcALen length of the first input sequence.
3393    * @param[in]       *pSrcB points to the second input sequence.
3394    * @param[in]       srcBLen length of the second input sequence.
3395    * @param[out]      *pDst points to the block of output data
3396    * @param[in]       firstIndex is the first output sample to start with.
3397    * @param[in]       numPoints is the number of output points to be computed.
3398    * @param[in]  *pScratch1 points to scratch buffer(of type q15_t) of size max(srcALen, srcBLen) + 2*min(srcALen, srcBLen) - 2.
3399    * @param[in]  *pScratch2 points to scratch buffer (of type q15_t) of size min(srcALen, srcBLen).
3400    * @return  Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2].
3401    */
3402 
3403   arm_status arm_conv_partial_opt_q7(
3404   q7_t * pSrcA,
3405   uint32_t srcALen,
3406   q7_t * pSrcB,
3407   uint32_t srcBLen,
3408   q7_t * pDst,
3409   uint32_t firstIndex,
3410   uint32_t numPoints,
3411   q15_t * pScratch1,
3412   q15_t * pScratch2);
3413 
3414 
3415 /**
3416    * @brief Partial convolution of Q7 sequences.
3417    * @param[in]       *pSrcA points to the first input sequence.
3418    * @param[in]       srcALen length of the first input sequence.
3419    * @param[in]       *pSrcB points to the second input sequence.
3420    * @param[in]       srcBLen length of the second input sequence.
3421    * @param[out]      *pDst points to the block of output data
3422    * @param[in]       firstIndex is the first output sample to start with.
3423    * @param[in]       numPoints is the number of output points to be computed.
3424    * @return  Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2].
3425    */
3426 
3427   arm_status arm_conv_partial_q7(
3428   q7_t * pSrcA,
3429   uint32_t srcALen,
3430   q7_t * pSrcB,
3431   uint32_t srcBLen,
3432   q7_t * pDst,
3433   uint32_t firstIndex,
3434   uint32_t numPoints);
3435 
3436 
3437 
3438   /**
3439    * @brief Instance structure for the Q15 FIR decimator.
3440    */
3441 
3442   typedef struct
3443   {
3444     uint8_t M;                      /**< decimation factor. */
3445     uint16_t numTaps;               /**< number of coefficients in the filter. */
3446     q15_t *pCoeffs;                  /**< points to the coefficient array. The array is of length numTaps.*/
3447     q15_t *pState;                   /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
3448   } arm_fir_decimate_instance_q15;
3449 
3450   /**
3451    * @brief Instance structure for the Q31 FIR decimator.
3452    */
3453 
3454   typedef struct
3455   {
3456     uint8_t M;                  /**< decimation factor. */
3457     uint16_t numTaps;           /**< number of coefficients in the filter. */
3458     q31_t *pCoeffs;              /**< points to the coefficient array. The array is of length numTaps.*/
3459     q31_t *pState;               /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
3460 
3461   } arm_fir_decimate_instance_q31;
3462 
3463   /**
3464    * @brief Instance structure for the floating-point FIR decimator.
3465    */
3466 
3467   typedef struct
3468   {
3469     uint8_t M;                          /**< decimation factor. */
3470     uint16_t numTaps;                   /**< number of coefficients in the filter. */
3471     float32_t *pCoeffs;                  /**< points to the coefficient array. The array is of length numTaps.*/
3472     float32_t *pState;                   /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
3473 
3474   } arm_fir_decimate_instance_f32;
3475 
3476 
3477 
3478   /**
3479    * @brief Processing function for the floating-point FIR decimator.
3480    * @param[in] *S points to an instance of the floating-point FIR decimator structure.
3481    * @param[in] *pSrc points to the block of input data.
3482    * @param[out] *pDst points to the block of output data
3483    * @param[in] blockSize number of input samples to process per call.
3484    * @return none
3485    */
3486 
3487   void arm_fir_decimate_f32(
3488   const arm_fir_decimate_instance_f32 * S,
3489   float32_t * pSrc,
3490   float32_t * pDst,
3491   uint32_t blockSize);
3492 
3493 
3494   /**
3495    * @brief  Initialization function for the floating-point FIR decimator.
3496    * @param[in,out] *S points to an instance of the floating-point FIR decimator structure.
3497    * @param[in] numTaps  number of coefficients in the filter.
3498    * @param[in] M  decimation factor.
3499    * @param[in] *pCoeffs points to the filter coefficients.
3500    * @param[in] *pState points to the state buffer.
3501    * @param[in] blockSize number of input samples to process per call.
3502    * @return    The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_LENGTH_ERROR if
3503    * <code>blockSize</code> is not a multiple of <code>M</code>.
3504    */
3505 
3506   arm_status arm_fir_decimate_init_f32(
3507   arm_fir_decimate_instance_f32 * S,
3508   uint16_t numTaps,
3509   uint8_t M,
3510   float32_t * pCoeffs,
3511   float32_t * pState,
3512   uint32_t blockSize);
3513 
3514   /**
3515    * @brief Processing function for the Q15 FIR decimator.
3516    * @param[in] *S points to an instance of the Q15 FIR decimator structure.
3517    * @param[in] *pSrc points to the block of input data.
3518    * @param[out] *pDst points to the block of output data
3519    * @param[in] blockSize number of input samples to process per call.
3520    * @return none
3521    */
3522 
3523   void arm_fir_decimate_q15(
3524   const arm_fir_decimate_instance_q15 * S,
3525   q15_t * pSrc,
3526   q15_t * pDst,
3527   uint32_t blockSize);
3528 
3529   /**
3530    * @brief Processing function for the Q15 FIR decimator (fast variant) for Cortex-M3 and Cortex-M4.
3531    * @param[in] *S points to an instance of the Q15 FIR decimator structure.
3532    * @param[in] *pSrc points to the block of input data.
3533    * @param[out] *pDst points to the block of output data
3534    * @param[in] blockSize number of input samples to process per call.
3535    * @return none
3536    */
3537 
3538   void arm_fir_decimate_fast_q15(
3539   const arm_fir_decimate_instance_q15 * S,
3540   q15_t * pSrc,
3541   q15_t * pDst,
3542   uint32_t blockSize);
3543 
3544 
3545 
3546   /**
3547    * @brief  Initialization function for the Q15 FIR decimator.
3548    * @param[in,out] *S points to an instance of the Q15 FIR decimator structure.
3549    * @param[in] numTaps  number of coefficients in the filter.
3550    * @param[in] M  decimation factor.
3551    * @param[in] *pCoeffs points to the filter coefficients.
3552    * @param[in] *pState points to the state buffer.
3553    * @param[in] blockSize number of input samples to process per call.
3554    * @return    The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_LENGTH_ERROR if
3555    * <code>blockSize</code> is not a multiple of <code>M</code>.
3556    */
3557 
3558   arm_status arm_fir_decimate_init_q15(
3559   arm_fir_decimate_instance_q15 * S,
3560   uint16_t numTaps,
3561   uint8_t M,
3562   q15_t * pCoeffs,
3563   q15_t * pState,
3564   uint32_t blockSize);
3565 
3566   /**
3567    * @brief Processing function for the Q31 FIR decimator.
3568    * @param[in] *S points to an instance of the Q31 FIR decimator structure.
3569    * @param[in] *pSrc points to the block of input data.
3570    * @param[out] *pDst points to the block of output data
3571    * @param[in] blockSize number of input samples to process per call.
3572    * @return none
3573    */
3574 
3575   void arm_fir_decimate_q31(
3576   const arm_fir_decimate_instance_q31 * S,
3577   q31_t * pSrc,
3578   q31_t * pDst,
3579   uint32_t blockSize);
3580 
3581   /**
3582    * @brief Processing function for the Q31 FIR decimator (fast variant) for Cortex-M3 and Cortex-M4.
3583    * @param[in] *S points to an instance of the Q31 FIR decimator structure.
3584    * @param[in] *pSrc points to the block of input data.
3585    * @param[out] *pDst points to the block of output data
3586    * @param[in] blockSize number of input samples to process per call.
3587    * @return none
3588    */
3589 
3590   void arm_fir_decimate_fast_q31(
3591   arm_fir_decimate_instance_q31 * S,
3592   q31_t * pSrc,
3593   q31_t * pDst,
3594   uint32_t blockSize);
3595 
3596 
3597   /**
3598    * @brief  Initialization function for the Q31 FIR decimator.
3599    * @param[in,out] *S points to an instance of the Q31 FIR decimator structure.
3600    * @param[in] numTaps  number of coefficients in the filter.
3601    * @param[in] M  decimation factor.
3602    * @param[in] *pCoeffs points to the filter coefficients.
3603    * @param[in] *pState points to the state buffer.
3604    * @param[in] blockSize number of input samples to process per call.
3605    * @return    The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_LENGTH_ERROR if
3606    * <code>blockSize</code> is not a multiple of <code>M</code>.
3607    */
3608 
3609   arm_status arm_fir_decimate_init_q31(
3610   arm_fir_decimate_instance_q31 * S,
3611   uint16_t numTaps,
3612   uint8_t M,
3613   q31_t * pCoeffs,
3614   q31_t * pState,
3615   uint32_t blockSize);
3616 
3617 
3618 
3619   /**
3620    * @brief Instance structure for the Q15 FIR interpolator.
3621    */
3622 
3623   typedef struct
3624   {
3625     uint8_t L;                      /**< upsample factor. */
3626     uint16_t phaseLength;           /**< length of each polyphase filter component. */
3627     q15_t *pCoeffs;                 /**< points to the coefficient array. The array is of length L*phaseLength. */
3628     q15_t *pState;                  /**< points to the state variable array. The array is of length blockSize+phaseLength-1. */
3629   } arm_fir_interpolate_instance_q15;
3630 
3631   /**
3632    * @brief Instance structure for the Q31 FIR interpolator.
3633    */
3634 
3635   typedef struct
3636   {
3637     uint8_t L;                      /**< upsample factor. */
3638     uint16_t phaseLength;           /**< length of each polyphase filter component. */
3639     q31_t *pCoeffs;                  /**< points to the coefficient array. The array is of length L*phaseLength. */
3640     q31_t *pState;                   /**< points to the state variable array. The array is of length blockSize+phaseLength-1. */
3641   } arm_fir_interpolate_instance_q31;
3642 
3643   /**
3644    * @brief Instance structure for the floating-point FIR interpolator.
3645    */
3646 
3647   typedef struct
3648   {
3649     uint8_t L;                     /**< upsample factor. */
3650     uint16_t phaseLength;          /**< length of each polyphase filter component. */
3651     float32_t *pCoeffs;             /**< points to the coefficient array. The array is of length L*phaseLength. */
3652     float32_t *pState;              /**< points to the state variable array. The array is of length phaseLength+numTaps-1. */
3653   } arm_fir_interpolate_instance_f32;
3654 
3655 
3656   /**
3657    * @brief Processing function for the Q15 FIR interpolator.
3658    * @param[in] *S        points to an instance of the Q15 FIR interpolator structure.
3659    * @param[in] *pSrc     points to the block of input data.
3660    * @param[out] *pDst    points to the block of output data.
3661    * @param[in] blockSize number of input samples to process per call.
3662    * @return none.
3663    */
3664 
3665   void arm_fir_interpolate_q15(
3666   const arm_fir_interpolate_instance_q15 * S,
3667   q15_t * pSrc,
3668   q15_t * pDst,
3669   uint32_t blockSize);
3670 
3671 
3672   /**
3673    * @brief  Initialization function for the Q15 FIR interpolator.
3674    * @param[in,out] *S        points to an instance of the Q15 FIR interpolator structure.
3675    * @param[in]     L         upsample factor.
3676    * @param[in]     numTaps   number of filter coefficients in the filter.
3677    * @param[in]     *pCoeffs  points to the filter coefficient buffer.
3678    * @param[in]     *pState   points to the state buffer.
3679    * @param[in]     blockSize number of input samples to process per call.
3680    * @return        The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_LENGTH_ERROR if
3681    * the filter length <code>numTaps</code> is not a multiple of the interpolation factor <code>L</code>.
3682    */
3683 
3684   arm_status arm_fir_interpolate_init_q15(
3685   arm_fir_interpolate_instance_q15 * S,
3686   uint8_t L,
3687   uint16_t numTaps,
3688   q15_t * pCoeffs,
3689   q15_t * pState,
3690   uint32_t blockSize);
3691 
3692   /**
3693    * @brief Processing function for the Q31 FIR interpolator.
3694    * @param[in] *S        points to an instance of the Q15 FIR interpolator structure.
3695    * @param[in] *pSrc     points to the block of input data.
3696    * @param[out] *pDst    points to the block of output data.
3697    * @param[in] blockSize number of input samples to process per call.
3698    * @return none.
3699    */
3700 
3701   void arm_fir_interpolate_q31(
3702   const arm_fir_interpolate_instance_q31 * S,
3703   q31_t * pSrc,
3704   q31_t * pDst,
3705   uint32_t blockSize);
3706 
3707   /**
3708    * @brief  Initialization function for the Q31 FIR interpolator.
3709    * @param[in,out] *S        points to an instance of the Q31 FIR interpolator structure.
3710    * @param[in]     L         upsample factor.
3711    * @param[in]     numTaps   number of filter coefficients in the filter.
3712    * @param[in]     *pCoeffs  points to the filter coefficient buffer.
3713    * @param[in]     *pState   points to the state buffer.
3714    * @param[in]     blockSize number of input samples to process per call.
3715    * @return        The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_LENGTH_ERROR if
3716    * the filter length <code>numTaps</code> is not a multiple of the interpolation factor <code>L</code>.
3717    */
3718 
3719   arm_status arm_fir_interpolate_init_q31(
3720   arm_fir_interpolate_instance_q31 * S,
3721   uint8_t L,
3722   uint16_t numTaps,
3723   q31_t * pCoeffs,
3724   q31_t * pState,
3725   uint32_t blockSize);
3726 
3727 
3728   /**
3729    * @brief Processing function for the floating-point FIR interpolator.
3730    * @param[in] *S        points to an instance of the floating-point FIR interpolator structure.
3731    * @param[in] *pSrc     points to the block of input data.
3732    * @param[out] *pDst    points to the block of output data.
3733    * @param[in] blockSize number of input samples to process per call.
3734    * @return none.
3735    */
3736 
3737   void arm_fir_interpolate_f32(
3738   const arm_fir_interpolate_instance_f32 * S,
3739   float32_t * pSrc,
3740   float32_t * pDst,
3741   uint32_t blockSize);
3742 
3743   /**
3744    * @brief  Initialization function for the floating-point FIR interpolator.
3745    * @param[in,out] *S        points to an instance of the floating-point FIR interpolator structure.
3746    * @param[in]     L         upsample factor.
3747    * @param[in]     numTaps   number of filter coefficients in the filter.
3748    * @param[in]     *pCoeffs  points to the filter coefficient buffer.
3749    * @param[in]     *pState   points to the state buffer.
3750    * @param[in]     blockSize number of input samples to process per call.
3751    * @return        The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_LENGTH_ERROR if
3752    * the filter length <code>numTaps</code> is not a multiple of the interpolation factor <code>L</code>.
3753    */
3754 
3755   arm_status arm_fir_interpolate_init_f32(
3756   arm_fir_interpolate_instance_f32 * S,
3757   uint8_t L,
3758   uint16_t numTaps,
3759   float32_t * pCoeffs,
3760   float32_t * pState,
3761   uint32_t blockSize);
3762 
3763   /**
3764    * @brief Instance structure for the high precision Q31 Biquad cascade filter.
3765    */
3766 
3767   typedef struct
3768   {
3769     uint8_t numStages;       /**< number of 2nd order stages in the filter.  Overall order is 2*numStages. */
3770     q63_t *pState;           /**< points to the array of state coefficients.  The array is of length 4*numStages. */
3771     q31_t *pCoeffs;          /**< points to the array of coefficients.  The array is of length 5*numStages. */
3772     uint8_t postShift;       /**< additional shift, in bits, applied to each output sample. */
3773 
3774   } arm_biquad_cas_df1_32x64_ins_q31;
3775 
3776 
3777   /**
3778    * @param[in]  *S        points to an instance of the high precision Q31 Biquad cascade filter structure.
3779    * @param[in]  *pSrc     points to the block of input data.
3780    * @param[out] *pDst     points to the block of output data
3781    * @param[in]  blockSize number of samples to process.
3782    * @return none.
3783    */
3784 
3785   void arm_biquad_cas_df1_32x64_q31(
3786   const arm_biquad_cas_df1_32x64_ins_q31 * S,
3787   q31_t * pSrc,
3788   q31_t * pDst,
3789   uint32_t blockSize);
3790 
3791 
3792   /**
3793    * @param[in,out] *S           points to an instance of the high precision Q31 Biquad cascade filter structure.
3794    * @param[in]     numStages    number of 2nd order stages in the filter.
3795    * @param[in]     *pCoeffs     points to the filter coefficients.
3796    * @param[in]     *pState      points to the state buffer.
3797    * @param[in]     postShift    shift to be applied to the output. Varies according to the coefficients format
3798    * @return        none
3799    */
3800 
3801   void arm_biquad_cas_df1_32x64_init_q31(
3802   arm_biquad_cas_df1_32x64_ins_q31 * S,
3803   uint8_t numStages,
3804   q31_t * pCoeffs,
3805   q63_t * pState,
3806   uint8_t postShift);
3807 
3808 
3809 
3810   /**
3811    * @brief Instance structure for the floating-point transposed direct form II Biquad cascade filter.
3812    */
3813 
3814   typedef struct
3815   {
3816     uint8_t numStages;         /**< number of 2nd order stages in the filter.  Overall order is 2*numStages. */
3817     float32_t *pState;         /**< points to the array of state coefficients.  The array is of length 2*numStages. */
3818     float32_t *pCoeffs;        /**< points to the array of coefficients.  The array is of length 5*numStages. */
3819   } arm_biquad_cascade_df2T_instance_f32;
3820 
3821 
3822 
3823   /**
3824    * @brief Instance structure for the floating-point transposed direct form II Biquad cascade filter.
3825    */
3826 
3827   typedef struct
3828   {
3829     uint8_t numStages;         /**< number of 2nd order stages in the filter.  Overall order is 2*numStages. */
3830     float32_t *pState;         /**< points to the array of state coefficients.  The array is of length 4*numStages. */
3831     float32_t *pCoeffs;        /**< points to the array of coefficients.  The array is of length 5*numStages. */
3832   } arm_biquad_cascade_stereo_df2T_instance_f32;
3833 
3834 
3835 
3836   /**
3837    * @brief Instance structure for the floating-point transposed direct form II Biquad cascade filter.
3838    */
3839 
3840   typedef struct
3841   {
3842     uint8_t numStages;         /**< number of 2nd order stages in the filter.  Overall order is 2*numStages. */
3843     float64_t *pState;         /**< points to the array of state coefficients.  The array is of length 2*numStages. */
3844     float64_t *pCoeffs;        /**< points to the array of coefficients.  The array is of length 5*numStages. */
3845   } arm_biquad_cascade_df2T_instance_f64;
3846 
3847 
3848   /**
3849    * @brief Processing function for the floating-point transposed direct form II Biquad cascade filter.
3850    * @param[in]  *S        points to an instance of the filter data structure.
3851    * @param[in]  *pSrc     points to the block of input data.
3852    * @param[out] *pDst     points to the block of output data
3853    * @param[in]  blockSize number of samples to process.
3854    * @return none.
3855    */
3856 
3857   void arm_biquad_cascade_df2T_f32(
3858   const arm_biquad_cascade_df2T_instance_f32 * S,
3859   float32_t * pSrc,
3860   float32_t * pDst,
3861   uint32_t blockSize);
3862 
3863 
3864   /**
3865    * @brief Processing function for the floating-point transposed direct form II Biquad cascade filter. 2 channels
3866    * @param[in]  *S        points to an instance of the filter data structure.
3867    * @param[in]  *pSrc     points to the block of input data.
3868    * @param[out] *pDst     points to the block of output data
3869    * @param[in]  blockSize number of samples to process.
3870    * @return none.
3871    */
3872 
3873   void arm_biquad_cascade_stereo_df2T_f32(
3874   const arm_biquad_cascade_stereo_df2T_instance_f32 * S,
3875   float32_t * pSrc,
3876   float32_t * pDst,
3877   uint32_t blockSize);
3878 
3879   /**
3880    * @brief Processing function for the floating-point transposed direct form II Biquad cascade filter.
3881    * @param[in]  *S        points to an instance of the filter data structure.
3882    * @param[in]  *pSrc     points to the block of input data.
3883    * @param[out] *pDst     points to the block of output data
3884    * @param[in]  blockSize number of samples to process.
3885    * @return none.
3886    */
3887 
3888   void arm_biquad_cascade_df2T_f64(
3889   const arm_biquad_cascade_df2T_instance_f64 * S,
3890   float64_t * pSrc,
3891   float64_t * pDst,
3892   uint32_t blockSize);
3893 
3894 
3895   /**
3896    * @brief  Initialization function for the floating-point transposed direct form II Biquad cascade filter.
3897    * @param[in,out] *S           points to an instance of the filter data structure.
3898    * @param[in]     numStages    number of 2nd order stages in the filter.
3899    * @param[in]     *pCoeffs     points to the filter coefficients.
3900    * @param[in]     *pState      points to the state buffer.
3901    * @return        none
3902    */
3903 
3904   void arm_biquad_cascade_df2T_init_f32(
3905   arm_biquad_cascade_df2T_instance_f32 * S,
3906   uint8_t numStages,
3907   float32_t * pCoeffs,
3908   float32_t * pState);
3909 
3910 
3911   /**
3912    * @brief  Initialization function for the floating-point transposed direct form II Biquad cascade filter.
3913    * @param[in,out] *S           points to an instance of the filter data structure.
3914    * @param[in]     numStages    number of 2nd order stages in the filter.
3915    * @param[in]     *pCoeffs     points to the filter coefficients.
3916    * @param[in]     *pState      points to the state buffer.
3917    * @return        none
3918    */
3919 
3920   void arm_biquad_cascade_stereo_df2T_init_f32(
3921   arm_biquad_cascade_stereo_df2T_instance_f32 * S,
3922   uint8_t numStages,
3923   float32_t * pCoeffs,
3924   float32_t * pState);
3925 
3926 
3927   /**
3928    * @brief  Initialization function for the floating-point transposed direct form II Biquad cascade filter.
3929    * @param[in,out] *S           points to an instance of the filter data structure.
3930    * @param[in]     numStages    number of 2nd order stages in the filter.
3931    * @param[in]     *pCoeffs     points to the filter coefficients.
3932    * @param[in]     *pState      points to the state buffer.
3933    * @return        none
3934    */
3935 
3936   void arm_biquad_cascade_df2T_init_f64(
3937   arm_biquad_cascade_df2T_instance_f64 * S,
3938   uint8_t numStages,
3939   float64_t * pCoeffs,
3940   float64_t * pState);
3941 
3942 
3943 
3944   /**
3945    * @brief Instance structure for the Q15 FIR lattice filter.
3946    */
3947 
3948   typedef struct
3949   {
3950     uint16_t numStages;                          /**< number of filter stages. */
3951     q15_t *pState;                               /**< points to the state variable array. The array is of length numStages. */
3952     q15_t *pCoeffs;                              /**< points to the coefficient array. The array is of length numStages. */
3953   } arm_fir_lattice_instance_q15;
3954 
3955   /**
3956    * @brief Instance structure for the Q31 FIR lattice filter.
3957    */
3958 
3959   typedef struct
3960   {
3961     uint16_t numStages;                          /**< number of filter stages. */
3962     q31_t *pState;                               /**< points to the state variable array. The array is of length numStages. */
3963     q31_t *pCoeffs;                              /**< points to the coefficient array. The array is of length numStages. */
3964   } arm_fir_lattice_instance_q31;
3965 
3966   /**
3967    * @brief Instance structure for the floating-point FIR lattice filter.
3968    */
3969 
3970   typedef struct
3971   {
3972     uint16_t numStages;                  /**< number of filter stages. */
3973     float32_t *pState;                   /**< points to the state variable array. The array is of length numStages. */
3974     float32_t *pCoeffs;                  /**< points to the coefficient array. The array is of length numStages. */
3975   } arm_fir_lattice_instance_f32;
3976 
3977   /**
3978    * @brief Initialization function for the Q15 FIR lattice filter.
3979    * @param[in] *S points to an instance of the Q15 FIR lattice structure.
3980    * @param[in] numStages  number of filter stages.
3981    * @param[in] *pCoeffs points to the coefficient buffer.  The array is of length numStages.
3982    * @param[in] *pState points to the state buffer.  The array is of length numStages.
3983    * @return none.
3984    */
3985 
3986   void arm_fir_lattice_init_q15(
3987   arm_fir_lattice_instance_q15 * S,
3988   uint16_t numStages,
3989   q15_t * pCoeffs,
3990   q15_t * pState);
3991 
3992 
3993   /**
3994    * @brief Processing function for the Q15 FIR lattice filter.
3995    * @param[in] *S points to an instance of the Q15 FIR lattice structure.
3996    * @param[in] *pSrc points to the block of input data.
3997    * @param[out] *pDst points to the block of output data.
3998    * @param[in] blockSize number of samples to process.
3999    * @return none.
4000    */
4001   void arm_fir_lattice_q15(
4002   const arm_fir_lattice_instance_q15 * S,
4003   q15_t * pSrc,
4004   q15_t * pDst,
4005   uint32_t blockSize);
4006 
4007   /**
4008    * @brief Initialization function for the Q31 FIR lattice filter.
4009    * @param[in] *S points to an instance of the Q31 FIR lattice structure.
4010    * @param[in] numStages  number of filter stages.
4011    * @param[in] *pCoeffs points to the coefficient buffer.  The array is of length numStages.
4012    * @param[in] *pState points to the state buffer.   The array is of length numStages.
4013    * @return none.
4014    */
4015 
4016   void arm_fir_lattice_init_q31(
4017   arm_fir_lattice_instance_q31 * S,
4018   uint16_t numStages,
4019   q31_t * pCoeffs,
4020   q31_t * pState);
4021 
4022 
4023   /**
4024    * @brief Processing function for the Q31 FIR lattice filter.
4025    * @param[in]  *S        points to an instance of the Q31 FIR lattice structure.
4026    * @param[in]  *pSrc     points to the block of input data.
4027    * @param[out] *pDst     points to the block of output data
4028    * @param[in]  blockSize number of samples to process.
4029    * @return none.
4030    */
4031 
4032   void arm_fir_lattice_q31(
4033   const arm_fir_lattice_instance_q31 * S,
4034   q31_t * pSrc,
4035   q31_t * pDst,
4036   uint32_t blockSize);
4037 
4038 /**
4039  * @brief Initialization function for the floating-point FIR lattice filter.
4040  * @param[in] *S points to an instance of the floating-point FIR lattice structure.
4041  * @param[in] numStages  number of filter stages.
4042  * @param[in] *pCoeffs points to the coefficient buffer.  The array is of length numStages.
4043  * @param[in] *pState points to the state buffer.  The array is of length numStages.
4044  * @return none.
4045  */
4046 
4047   void arm_fir_lattice_init_f32(
4048   arm_fir_lattice_instance_f32 * S,
4049   uint16_t numStages,
4050   float32_t * pCoeffs,
4051   float32_t * pState);
4052 
4053   /**
4054    * @brief Processing function for the floating-point FIR lattice filter.
4055    * @param[in]  *S        points to an instance of the floating-point FIR lattice structure.
4056    * @param[in]  *pSrc     points to the block of input data.
4057    * @param[out] *pDst     points to the block of output data
4058    * @param[in]  blockSize number of samples to process.
4059    * @return none.
4060    */
4061 
4062   void arm_fir_lattice_f32(
4063   const arm_fir_lattice_instance_f32 * S,
4064   float32_t * pSrc,
4065   float32_t * pDst,
4066   uint32_t blockSize);
4067 
4068   /**
4069    * @brief Instance structure for the Q15 IIR lattice filter.
4070    */
4071   typedef struct
4072   {
4073     uint16_t numStages;                         /**< number of stages in the filter. */
4074     q15_t *pState;                              /**< points to the state variable array. The array is of length numStages+blockSize. */
4075     q15_t *pkCoeffs;                            /**< points to the reflection coefficient array. The array is of length numStages. */
4076     q15_t *pvCoeffs;                            /**< points to the ladder coefficient array. The array is of length numStages+1. */
4077   } arm_iir_lattice_instance_q15;
4078 
4079   /**
4080    * @brief Instance structure for the Q31 IIR lattice filter.
4081    */
4082   typedef struct
4083   {
4084     uint16_t numStages;                         /**< number of stages in the filter. */
4085     q31_t *pState;                              /**< points to the state variable array. The array is of length numStages+blockSize. */
4086     q31_t *pkCoeffs;                            /**< points to the reflection coefficient array. The array is of length numStages. */
4087     q31_t *pvCoeffs;                            /**< points to the ladder coefficient array. The array is of length numStages+1. */
4088   } arm_iir_lattice_instance_q31;
4089 
4090   /**
4091    * @brief Instance structure for the floating-point IIR lattice filter.
4092    */
4093   typedef struct
4094   {
4095     uint16_t numStages;                         /**< number of stages in the filter. */
4096     float32_t *pState;                          /**< points to the state variable array. The array is of length numStages+blockSize. */
4097     float32_t *pkCoeffs;                        /**< points to the reflection coefficient array. The array is of length numStages. */
4098     float32_t *pvCoeffs;                        /**< points to the ladder coefficient array. The array is of length numStages+1. */
4099   } arm_iir_lattice_instance_f32;
4100 
4101   /**
4102    * @brief Processing function for the floating-point IIR lattice filter.
4103    * @param[in] *S points to an instance of the floating-point IIR lattice structure.
4104    * @param[in] *pSrc points to the block of input data.
4105    * @param[out] *pDst points to the block of output data.
4106    * @param[in] blockSize number of samples to process.
4107    * @return none.
4108    */
4109 
4110   void arm_iir_lattice_f32(
4111   const arm_iir_lattice_instance_f32 * S,
4112   float32_t * pSrc,
4113   float32_t * pDst,
4114   uint32_t blockSize);
4115 
4116   /**
4117    * @brief Initialization function for the floating-point IIR lattice filter.
4118    * @param[in] *S points to an instance of the floating-point IIR lattice structure.
4119    * @param[in] numStages number of stages in the filter.
4120    * @param[in] *pkCoeffs points to the reflection coefficient buffer.  The array is of length numStages.
4121    * @param[in] *pvCoeffs points to the ladder coefficient buffer.  The array is of length numStages+1.
4122    * @param[in] *pState points to the state buffer.  The array is of length numStages+blockSize-1.
4123    * @param[in] blockSize number of samples to process.
4124    * @return none.
4125    */
4126 
4127   void arm_iir_lattice_init_f32(
4128   arm_iir_lattice_instance_f32 * S,
4129   uint16_t numStages,
4130   float32_t * pkCoeffs,
4131   float32_t * pvCoeffs,
4132   float32_t * pState,
4133   uint32_t blockSize);
4134 
4135 
4136   /**
4137    * @brief Processing function for the Q31 IIR lattice filter.
4138    * @param[in] *S points to an instance of the Q31 IIR lattice structure.
4139    * @param[in] *pSrc points to the block of input data.
4140    * @param[out] *pDst points to the block of output data.
4141    * @param[in] blockSize number of samples to process.
4142    * @return none.
4143    */
4144 
4145   void arm_iir_lattice_q31(
4146   const arm_iir_lattice_instance_q31 * S,
4147   q31_t * pSrc,
4148   q31_t * pDst,
4149   uint32_t blockSize);
4150 
4151 
4152   /**
4153    * @brief Initialization function for the Q31 IIR lattice filter.
4154    * @param[in] *S points to an instance of the Q31 IIR lattice structure.
4155    * @param[in] numStages number of stages in the filter.
4156    * @param[in] *pkCoeffs points to the reflection coefficient buffer.  The array is of length numStages.
4157    * @param[in] *pvCoeffs points to the ladder coefficient buffer.  The array is of length numStages+1.
4158    * @param[in] *pState points to the state buffer.  The array is of length numStages+blockSize.
4159    * @param[in] blockSize number of samples to process.
4160    * @return none.
4161    */
4162 
4163   void arm_iir_lattice_init_q31(
4164   arm_iir_lattice_instance_q31 * S,
4165   uint16_t numStages,
4166   q31_t * pkCoeffs,
4167   q31_t * pvCoeffs,
4168   q31_t * pState,
4169   uint32_t blockSize);
4170 
4171 
4172   /**
4173    * @brief Processing function for the Q15 IIR lattice filter.
4174    * @param[in] *S points to an instance of the Q15 IIR lattice structure.
4175    * @param[in] *pSrc points to the block of input data.
4176    * @param[out] *pDst points to the block of output data.
4177    * @param[in] blockSize number of samples to process.
4178    * @return none.
4179    */
4180 
4181   void arm_iir_lattice_q15(
4182   const arm_iir_lattice_instance_q15 * S,
4183   q15_t * pSrc,
4184   q15_t * pDst,
4185   uint32_t blockSize);
4186 
4187 
4188 /**
4189  * @brief Initialization function for the Q15 IIR lattice filter.
4190  * @param[in] *S points to an instance of the fixed-point Q15 IIR lattice structure.
4191  * @param[in] numStages  number of stages in the filter.
4192  * @param[in] *pkCoeffs points to reflection coefficient buffer.  The array is of length numStages.
4193  * @param[in] *pvCoeffs points to ladder coefficient buffer.  The array is of length numStages+1.
4194  * @param[in] *pState points to state buffer.  The array is of length numStages+blockSize.
4195  * @param[in] blockSize number of samples to process per call.
4196  * @return none.
4197  */
4198 
4199   void arm_iir_lattice_init_q15(
4200   arm_iir_lattice_instance_q15 * S,
4201   uint16_t numStages,
4202   q15_t * pkCoeffs,
4203   q15_t * pvCoeffs,
4204   q15_t * pState,
4205   uint32_t blockSize);
4206 
4207   /**
4208    * @brief Instance structure for the floating-point LMS filter.
4209    */
4210 
4211   typedef struct
4212   {
4213     uint16_t numTaps;    /**< number of coefficients in the filter. */
4214     float32_t *pState;   /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
4215     float32_t *pCoeffs;  /**< points to the coefficient array. The array is of length numTaps. */
4216     float32_t mu;        /**< step size that controls filter coefficient updates. */
4217   } arm_lms_instance_f32;
4218 
4219   /**
4220    * @brief Processing function for floating-point LMS filter.
4221    * @param[in]  *S points to an instance of the floating-point LMS filter structure.
4222    * @param[in]  *pSrc points to the block of input data.
4223    * @param[in]  *pRef points to the block of reference data.
4224    * @param[out] *pOut points to the block of output data.
4225    * @param[out] *pErr points to the block of error data.
4226    * @param[in]  blockSize number of samples to process.
4227    * @return     none.
4228    */
4229 
4230   void arm_lms_f32(
4231   const arm_lms_instance_f32 * S,
4232   float32_t * pSrc,
4233   float32_t * pRef,
4234   float32_t * pOut,
4235   float32_t * pErr,
4236   uint32_t blockSize);
4237 
4238   /**
4239    * @brief Initialization function for floating-point LMS filter.
4240    * @param[in] *S points to an instance of the floating-point LMS filter structure.
4241    * @param[in] numTaps  number of filter coefficients.
4242    * @param[in] *pCoeffs points to the coefficient buffer.
4243    * @param[in] *pState points to state buffer.
4244    * @param[in] mu step size that controls filter coefficient updates.
4245    * @param[in] blockSize number of samples to process.
4246    * @return none.
4247    */
4248 
4249   void arm_lms_init_f32(
4250   arm_lms_instance_f32 * S,
4251   uint16_t numTaps,
4252   float32_t * pCoeffs,
4253   float32_t * pState,
4254   float32_t mu,
4255   uint32_t blockSize);
4256 
4257   /**
4258    * @brief Instance structure for the Q15 LMS filter.
4259    */
4260 
4261   typedef struct
4262   {
4263     uint16_t numTaps;    /**< number of coefficients in the filter. */
4264     q15_t *pState;       /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
4265     q15_t *pCoeffs;      /**< points to the coefficient array. The array is of length numTaps. */
4266     q15_t mu;            /**< step size that controls filter coefficient updates. */
4267     uint32_t postShift;  /**< bit shift applied to coefficients. */
4268   } arm_lms_instance_q15;
4269 
4270 
4271   /**
4272    * @brief Initialization function for the Q15 LMS filter.
4273    * @param[in] *S points to an instance of the Q15 LMS filter structure.
4274    * @param[in] numTaps  number of filter coefficients.
4275    * @param[in] *pCoeffs points to the coefficient buffer.
4276    * @param[in] *pState points to the state buffer.
4277    * @param[in] mu step size that controls filter coefficient updates.
4278    * @param[in] blockSize number of samples to process.
4279    * @param[in] postShift bit shift applied to coefficients.
4280    * @return    none.
4281    */
4282 
4283   void arm_lms_init_q15(
4284   arm_lms_instance_q15 * S,
4285   uint16_t numTaps,
4286   q15_t * pCoeffs,
4287   q15_t * pState,
4288   q15_t mu,
4289   uint32_t blockSize,
4290   uint32_t postShift);
4291 
4292   /**
4293    * @brief Processing function for Q15 LMS filter.
4294    * @param[in] *S points to an instance of the Q15 LMS filter structure.
4295    * @param[in] *pSrc points to the block of input data.
4296    * @param[in] *pRef points to the block of reference data.
4297    * @param[out] *pOut points to the block of output data.
4298    * @param[out] *pErr points to the block of error data.
4299    * @param[in] blockSize number of samples to process.
4300    * @return none.
4301    */
4302 
4303   void arm_lms_q15(
4304   const arm_lms_instance_q15 * S,
4305   q15_t * pSrc,
4306   q15_t * pRef,
4307   q15_t * pOut,
4308   q15_t * pErr,
4309   uint32_t blockSize);
4310 
4311 
4312   /**
4313    * @brief Instance structure for the Q31 LMS filter.
4314    */
4315 
4316   typedef struct
4317   {
4318     uint16_t numTaps;    /**< number of coefficients in the filter. */
4319     q31_t *pState;       /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
4320     q31_t *pCoeffs;      /**< points to the coefficient array. The array is of length numTaps. */
4321     q31_t mu;            /**< step size that controls filter coefficient updates. */
4322     uint32_t postShift;  /**< bit shift applied to coefficients. */
4323 
4324   } arm_lms_instance_q31;
4325 
4326   /**
4327    * @brief Processing function for Q31 LMS filter.
4328    * @param[in]  *S points to an instance of the Q15 LMS filter structure.
4329    * @param[in]  *pSrc points to the block of input data.
4330    * @param[in]  *pRef points to the block of reference data.
4331    * @param[out] *pOut points to the block of output data.
4332    * @param[out] *pErr points to the block of error data.
4333    * @param[in]  blockSize number of samples to process.
4334    * @return     none.
4335    */
4336 
4337   void arm_lms_q31(
4338   const arm_lms_instance_q31 * S,
4339   q31_t * pSrc,
4340   q31_t * pRef,
4341   q31_t * pOut,
4342   q31_t * pErr,
4343   uint32_t blockSize);
4344 
4345   /**
4346    * @brief Initialization function for Q31 LMS filter.
4347    * @param[in] *S points to an instance of the Q31 LMS filter structure.
4348    * @param[in] numTaps  number of filter coefficients.
4349    * @param[in] *pCoeffs points to coefficient buffer.
4350    * @param[in] *pState points to state buffer.
4351    * @param[in] mu step size that controls filter coefficient updates.
4352    * @param[in] blockSize number of samples to process.
4353    * @param[in] postShift bit shift applied to coefficients.
4354    * @return none.
4355    */
4356 
4357   void arm_lms_init_q31(
4358   arm_lms_instance_q31 * S,
4359   uint16_t numTaps,
4360   q31_t * pCoeffs,
4361   q31_t * pState,
4362   q31_t mu,
4363   uint32_t blockSize,
4364   uint32_t postShift);
4365 
4366   /**
4367    * @brief Instance structure for the floating-point normalized LMS filter.
4368    */
4369 
4370   typedef struct
4371   {
4372     uint16_t numTaps;     /**< number of coefficients in the filter. */
4373     float32_t *pState;    /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
4374     float32_t *pCoeffs;   /**< points to the coefficient array. The array is of length numTaps. */
4375     float32_t mu;        /**< step size that control filter coefficient updates. */
4376     float32_t energy;    /**< saves previous frame energy. */
4377     float32_t x0;        /**< saves previous input sample. */
4378   } arm_lms_norm_instance_f32;
4379 
4380   /**
4381    * @brief Processing function for floating-point normalized LMS filter.
4382    * @param[in] *S points to an instance of the floating-point normalized LMS filter structure.
4383    * @param[in] *pSrc points to the block of input data.
4384    * @param[in] *pRef points to the block of reference data.
4385    * @param[out] *pOut points to the block of output data.
4386    * @param[out] *pErr points to the block of error data.
4387    * @param[in] blockSize number of samples to process.
4388    * @return none.
4389    */
4390 
4391   void arm_lms_norm_f32(
4392   arm_lms_norm_instance_f32 * S,
4393   float32_t * pSrc,
4394   float32_t * pRef,
4395   float32_t * pOut,
4396   float32_t * pErr,
4397   uint32_t blockSize);
4398 
4399   /**
4400    * @brief Initialization function for floating-point normalized LMS filter.
4401    * @param[in] *S points to an instance of the floating-point LMS filter structure.
4402    * @param[in] numTaps  number of filter coefficients.
4403    * @param[in] *pCoeffs points to coefficient buffer.
4404    * @param[in] *pState points to state buffer.
4405    * @param[in] mu step size that controls filter coefficient updates.
4406    * @param[in] blockSize number of samples to process.
4407    * @return none.
4408    */
4409 
4410   void arm_lms_norm_init_f32(
4411   arm_lms_norm_instance_f32 * S,
4412   uint16_t numTaps,
4413   float32_t * pCoeffs,
4414   float32_t * pState,
4415   float32_t mu,
4416   uint32_t blockSize);
4417 
4418 
4419   /**
4420    * @brief Instance structure for the Q31 normalized LMS filter.
4421    */
4422   typedef struct
4423   {
4424     uint16_t numTaps;     /**< number of coefficients in the filter. */
4425     q31_t *pState;        /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
4426     q31_t *pCoeffs;       /**< points to the coefficient array. The array is of length numTaps. */
4427     q31_t mu;             /**< step size that controls filter coefficient updates. */
4428     uint8_t postShift;    /**< bit shift applied to coefficients. */
4429     q31_t *recipTable;    /**< points to the reciprocal initial value table. */
4430     q31_t energy;         /**< saves previous frame energy. */
4431     q31_t x0;             /**< saves previous input sample. */
4432   } arm_lms_norm_instance_q31;
4433 
4434   /**
4435    * @brief Processing function for Q31 normalized LMS filter.
4436    * @param[in] *S points to an instance of the Q31 normalized LMS filter structure.
4437    * @param[in] *pSrc points to the block of input data.
4438    * @param[in] *pRef points to the block of reference data.
4439    * @param[out] *pOut points to the block of output data.
4440    * @param[out] *pErr points to the block of error data.
4441    * @param[in] blockSize number of samples to process.
4442    * @return none.
4443    */
4444 
4445   void arm_lms_norm_q31(
4446   arm_lms_norm_instance_q31 * S,
4447   q31_t * pSrc,
4448   q31_t * pRef,
4449   q31_t * pOut,
4450   q31_t * pErr,
4451   uint32_t blockSize);
4452 
4453   /**
4454    * @brief Initialization function for Q31 normalized LMS filter.
4455    * @param[in] *S points to an instance of the Q31 normalized LMS filter structure.
4456    * @param[in] numTaps  number of filter coefficients.
4457    * @param[in] *pCoeffs points to coefficient buffer.
4458    * @param[in] *pState points to state buffer.
4459    * @param[in] mu step size that controls filter coefficient updates.
4460    * @param[in] blockSize number of samples to process.
4461    * @param[in] postShift bit shift applied to coefficients.
4462    * @return none.
4463    */
4464 
4465   void arm_lms_norm_init_q31(
4466   arm_lms_norm_instance_q31 * S,
4467   uint16_t numTaps,
4468   q31_t * pCoeffs,
4469   q31_t * pState,
4470   q31_t mu,
4471   uint32_t blockSize,
4472   uint8_t postShift);
4473 
4474   /**
4475    * @brief Instance structure for the Q15 normalized LMS filter.
4476    */
4477 
4478   typedef struct
4479   {
4480     uint16_t numTaps;    /**< Number of coefficients in the filter. */
4481     q15_t *pState;        /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
4482     q15_t *pCoeffs;       /**< points to the coefficient array. The array is of length numTaps. */
4483     q15_t mu;            /**< step size that controls filter coefficient updates. */
4484     uint8_t postShift;   /**< bit shift applied to coefficients. */
4485     q15_t *recipTable;   /**< Points to the reciprocal initial value table. */
4486     q15_t energy;        /**< saves previous frame energy. */
4487     q15_t x0;            /**< saves previous input sample. */
4488   } arm_lms_norm_instance_q15;
4489 
4490   /**
4491    * @brief Processing function for Q15 normalized LMS filter.
4492    * @param[in] *S points to an instance of the Q15 normalized LMS filter structure.
4493    * @param[in] *pSrc points to the block of input data.
4494    * @param[in] *pRef points to the block of reference data.
4495    * @param[out] *pOut points to the block of output data.
4496    * @param[out] *pErr points to the block of error data.
4497    * @param[in] blockSize number of samples to process.
4498    * @return none.
4499    */
4500 
4501   void arm_lms_norm_q15(
4502   arm_lms_norm_instance_q15 * S,
4503   q15_t * pSrc,
4504   q15_t * pRef,
4505   q15_t * pOut,
4506   q15_t * pErr,
4507   uint32_t blockSize);
4508 
4509 
4510   /**
4511    * @brief Initialization function for Q15 normalized LMS filter.
4512    * @param[in] *S points to an instance of the Q15 normalized LMS filter structure.
4513    * @param[in] numTaps  number of filter coefficients.
4514    * @param[in] *pCoeffs points to coefficient buffer.
4515    * @param[in] *pState points to state buffer.
4516    * @param[in] mu step size that controls filter coefficient updates.
4517    * @param[in] blockSize number of samples to process.
4518    * @param[in] postShift bit shift applied to coefficients.
4519    * @return none.
4520    */
4521 
4522   void arm_lms_norm_init_q15(
4523   arm_lms_norm_instance_q15 * S,
4524   uint16_t numTaps,
4525   q15_t * pCoeffs,
4526   q15_t * pState,
4527   q15_t mu,
4528   uint32_t blockSize,
4529   uint8_t postShift);
4530 
4531   /**
4532    * @brief Correlation of floating-point sequences.
4533    * @param[in] *pSrcA points to the first input sequence.
4534    * @param[in] srcALen length of the first input sequence.
4535    * @param[in] *pSrcB points to the second input sequence.
4536    * @param[in] srcBLen length of the second input sequence.
4537    * @param[out] *pDst points to the block of output data  Length 2 * max(srcALen, srcBLen) - 1.
4538    * @return none.
4539    */
4540 
4541   void arm_correlate_f32(
4542   float32_t * pSrcA,
4543   uint32_t srcALen,
4544   float32_t * pSrcB,
4545   uint32_t srcBLen,
4546   float32_t * pDst);
4547 
4548 
4549    /**
4550    * @brief Correlation of Q15 sequences
4551    * @param[in] *pSrcA points to the first input sequence.
4552    * @param[in] srcALen length of the first input sequence.
4553    * @param[in] *pSrcB points to the second input sequence.
4554    * @param[in] srcBLen length of the second input sequence.
4555    * @param[out] *pDst points to the block of output data  Length 2 * max(srcALen, srcBLen) - 1.
4556    * @param[in]  *pScratch points to scratch buffer of size max(srcALen, srcBLen) + 2*min(srcALen, srcBLen) - 2.
4557    * @return none.
4558    */
4559   void arm_correlate_opt_q15(
4560   q15_t * pSrcA,
4561   uint32_t srcALen,
4562   q15_t * pSrcB,
4563   uint32_t srcBLen,
4564   q15_t * pDst,
4565   q15_t * pScratch);
4566 
4567 
4568   /**
4569    * @brief Correlation of Q15 sequences.
4570    * @param[in] *pSrcA points to the first input sequence.
4571    * @param[in] srcALen length of the first input sequence.
4572    * @param[in] *pSrcB points to the second input sequence.
4573    * @param[in] srcBLen length of the second input sequence.
4574    * @param[out] *pDst points to the block of output data  Length 2 * max(srcALen, srcBLen) - 1.
4575    * @return none.
4576    */
4577 
4578   void arm_correlate_q15(
4579   q15_t * pSrcA,
4580   uint32_t srcALen,
4581   q15_t * pSrcB,
4582   uint32_t srcBLen,
4583   q15_t * pDst);
4584 
4585   /**
4586    * @brief Correlation of Q15 sequences (fast version) for Cortex-M3 and Cortex-M4.
4587    * @param[in] *pSrcA points to the first input sequence.
4588    * @param[in] srcALen length of the first input sequence.
4589    * @param[in] *pSrcB points to the second input sequence.
4590    * @param[in] srcBLen length of the second input sequence.
4591    * @param[out] *pDst points to the block of output data  Length 2 * max(srcALen, srcBLen) - 1.
4592    * @return none.
4593    */
4594 
4595   void arm_correlate_fast_q15(
4596 			       q15_t * pSrcA,
4597 			      uint32_t srcALen,
4598 			       q15_t * pSrcB,
4599 			      uint32_t srcBLen,
4600 			      q15_t * pDst);
4601 
4602 
4603 
4604   /**
4605    * @brief Correlation of Q15 sequences (fast version) for Cortex-M3 and Cortex-M4.
4606    * @param[in] *pSrcA points to the first input sequence.
4607    * @param[in] srcALen length of the first input sequence.
4608    * @param[in] *pSrcB points to the second input sequence.
4609    * @param[in] srcBLen length of the second input sequence.
4610    * @param[out] *pDst points to the block of output data  Length 2 * max(srcALen, srcBLen) - 1.
4611    * @param[in]  *pScratch points to scratch buffer of size max(srcALen, srcBLen) + 2*min(srcALen, srcBLen) - 2.
4612    * @return none.
4613    */
4614 
4615   void arm_correlate_fast_opt_q15(
4616   q15_t * pSrcA,
4617   uint32_t srcALen,
4618   q15_t * pSrcB,
4619   uint32_t srcBLen,
4620   q15_t * pDst,
4621   q15_t * pScratch);
4622 
4623   /**
4624    * @brief Correlation of Q31 sequences.
4625    * @param[in] *pSrcA points to the first input sequence.
4626    * @param[in] srcALen length of the first input sequence.
4627    * @param[in] *pSrcB points to the second input sequence.
4628    * @param[in] srcBLen length of the second input sequence.
4629    * @param[out] *pDst points to the block of output data  Length 2 * max(srcALen, srcBLen) - 1.
4630    * @return none.
4631    */
4632 
4633   void arm_correlate_q31(
4634   q31_t * pSrcA,
4635   uint32_t srcALen,
4636   q31_t * pSrcB,
4637   uint32_t srcBLen,
4638   q31_t * pDst);
4639 
4640   /**
4641    * @brief Correlation of Q31 sequences (fast version) for Cortex-M3 and Cortex-M4
4642    * @param[in] *pSrcA points to the first input sequence.
4643    * @param[in] srcALen length of the first input sequence.
4644    * @param[in] *pSrcB points to the second input sequence.
4645    * @param[in] srcBLen length of the second input sequence.
4646    * @param[out] *pDst points to the block of output data  Length 2 * max(srcALen, srcBLen) - 1.
4647    * @return none.
4648    */
4649 
4650   void arm_correlate_fast_q31(
4651   q31_t * pSrcA,
4652   uint32_t srcALen,
4653   q31_t * pSrcB,
4654   uint32_t srcBLen,
4655   q31_t * pDst);
4656 
4657 
4658 
4659  /**
4660    * @brief Correlation of Q7 sequences.
4661    * @param[in] *pSrcA points to the first input sequence.
4662    * @param[in] srcALen length of the first input sequence.
4663    * @param[in] *pSrcB points to the second input sequence.
4664    * @param[in] srcBLen length of the second input sequence.
4665    * @param[out] *pDst points to the block of output data  Length 2 * max(srcALen, srcBLen) - 1.
4666    * @param[in]  *pScratch1 points to scratch buffer(of type q15_t) of size max(srcALen, srcBLen) + 2*min(srcALen, srcBLen) - 2.
4667    * @param[in]  *pScratch2 points to scratch buffer (of type q15_t) of size min(srcALen, srcBLen).
4668    * @return none.
4669    */
4670 
4671   void arm_correlate_opt_q7(
4672   q7_t * pSrcA,
4673   uint32_t srcALen,
4674   q7_t * pSrcB,
4675   uint32_t srcBLen,
4676   q7_t * pDst,
4677   q15_t * pScratch1,
4678   q15_t * pScratch2);
4679 
4680 
4681   /**
4682    * @brief Correlation of Q7 sequences.
4683    * @param[in] *pSrcA points to the first input sequence.
4684    * @param[in] srcALen length of the first input sequence.
4685    * @param[in] *pSrcB points to the second input sequence.
4686    * @param[in] srcBLen length of the second input sequence.
4687    * @param[out] *pDst points to the block of output data  Length 2 * max(srcALen, srcBLen) - 1.
4688    * @return none.
4689    */
4690 
4691   void arm_correlate_q7(
4692   q7_t * pSrcA,
4693   uint32_t srcALen,
4694   q7_t * pSrcB,
4695   uint32_t srcBLen,
4696   q7_t * pDst);
4697 
4698 
4699   /**
4700    * @brief Instance structure for the floating-point sparse FIR filter.
4701    */
4702   typedef struct
4703   {
4704     uint16_t numTaps;             /**< number of coefficients in the filter. */
4705     uint16_t stateIndex;          /**< state buffer index.  Points to the oldest sample in the state buffer. */
4706     float32_t *pState;            /**< points to the state buffer array. The array is of length maxDelay+blockSize-1. */
4707     float32_t *pCoeffs;           /**< points to the coefficient array. The array is of length numTaps.*/
4708     uint16_t maxDelay;            /**< maximum offset specified by the pTapDelay array. */
4709     int32_t *pTapDelay;           /**< points to the array of delay values.  The array is of length numTaps. */
4710   } arm_fir_sparse_instance_f32;
4711 
4712   /**
4713    * @brief Instance structure for the Q31 sparse FIR filter.
4714    */
4715 
4716   typedef struct
4717   {
4718     uint16_t numTaps;             /**< number of coefficients in the filter. */
4719     uint16_t stateIndex;          /**< state buffer index.  Points to the oldest sample in the state buffer. */
4720     q31_t *pState;                /**< points to the state buffer array. The array is of length maxDelay+blockSize-1. */
4721     q31_t *pCoeffs;               /**< points to the coefficient array. The array is of length numTaps.*/
4722     uint16_t maxDelay;            /**< maximum offset specified by the pTapDelay array. */
4723     int32_t *pTapDelay;           /**< points to the array of delay values.  The array is of length numTaps. */
4724   } arm_fir_sparse_instance_q31;
4725 
4726   /**
4727    * @brief Instance structure for the Q15 sparse FIR filter.
4728    */
4729 
4730   typedef struct
4731   {
4732     uint16_t numTaps;             /**< number of coefficients in the filter. */
4733     uint16_t stateIndex;          /**< state buffer index.  Points to the oldest sample in the state buffer. */
4734     q15_t *pState;                /**< points to the state buffer array. The array is of length maxDelay+blockSize-1. */
4735     q15_t *pCoeffs;               /**< points to the coefficient array. The array is of length numTaps.*/
4736     uint16_t maxDelay;            /**< maximum offset specified by the pTapDelay array. */
4737     int32_t *pTapDelay;           /**< points to the array of delay values.  The array is of length numTaps. */
4738   } arm_fir_sparse_instance_q15;
4739 
4740   /**
4741    * @brief Instance structure for the Q7 sparse FIR filter.
4742    */
4743 
4744   typedef struct
4745   {
4746     uint16_t numTaps;             /**< number of coefficients in the filter. */
4747     uint16_t stateIndex;          /**< state buffer index.  Points to the oldest sample in the state buffer. */
4748     q7_t *pState;                 /**< points to the state buffer array. The array is of length maxDelay+blockSize-1. */
4749     q7_t *pCoeffs;                /**< points to the coefficient array. The array is of length numTaps.*/
4750     uint16_t maxDelay;            /**< maximum offset specified by the pTapDelay array. */
4751     int32_t *pTapDelay;           /**< points to the array of delay values.  The array is of length numTaps. */
4752   } arm_fir_sparse_instance_q7;
4753 
4754   /**
4755    * @brief Processing function for the floating-point sparse FIR filter.
4756    * @param[in]  *S          points to an instance of the floating-point sparse FIR structure.
4757    * @param[in]  *pSrc       points to the block of input data.
4758    * @param[out] *pDst       points to the block of output data
4759    * @param[in]  *pScratchIn points to a temporary buffer of size blockSize.
4760    * @param[in]  blockSize   number of input samples to process per call.
4761    * @return none.
4762    */
4763 
4764   void arm_fir_sparse_f32(
4765   arm_fir_sparse_instance_f32 * S,
4766   float32_t * pSrc,
4767   float32_t * pDst,
4768   float32_t * pScratchIn,
4769   uint32_t blockSize);
4770 
4771   /**
4772    * @brief  Initialization function for the floating-point sparse FIR filter.
4773    * @param[in,out] *S         points to an instance of the floating-point sparse FIR structure.
4774    * @param[in]     numTaps    number of nonzero coefficients in the filter.
4775    * @param[in]     *pCoeffs   points to the array of filter coefficients.
4776    * @param[in]     *pState    points to the state buffer.
4777    * @param[in]     *pTapDelay points to the array of offset times.
4778    * @param[in]     maxDelay   maximum offset time supported.
4779    * @param[in]     blockSize  number of samples that will be processed per block.
4780    * @return none
4781    */
4782 
4783   void arm_fir_sparse_init_f32(
4784   arm_fir_sparse_instance_f32 * S,
4785   uint16_t numTaps,
4786   float32_t * pCoeffs,
4787   float32_t * pState,
4788   int32_t * pTapDelay,
4789   uint16_t maxDelay,
4790   uint32_t blockSize);
4791 
4792   /**
4793    * @brief Processing function for the Q31 sparse FIR filter.
4794    * @param[in]  *S          points to an instance of the Q31 sparse FIR structure.
4795    * @param[in]  *pSrc       points to the block of input data.
4796    * @param[out] *pDst       points to the block of output data
4797    * @param[in]  *pScratchIn points to a temporary buffer of size blockSize.
4798    * @param[in]  blockSize   number of input samples to process per call.
4799    * @return none.
4800    */
4801 
4802   void arm_fir_sparse_q31(
4803   arm_fir_sparse_instance_q31 * S,
4804   q31_t * pSrc,
4805   q31_t * pDst,
4806   q31_t * pScratchIn,
4807   uint32_t blockSize);
4808 
4809   /**
4810    * @brief  Initialization function for the Q31 sparse FIR filter.
4811    * @param[in,out] *S         points to an instance of the Q31 sparse FIR structure.
4812    * @param[in]     numTaps    number of nonzero coefficients in the filter.
4813    * @param[in]     *pCoeffs   points to the array of filter coefficients.
4814    * @param[in]     *pState    points to the state buffer.
4815    * @param[in]     *pTapDelay points to the array of offset times.
4816    * @param[in]     maxDelay   maximum offset time supported.
4817    * @param[in]     blockSize  number of samples that will be processed per block.
4818    * @return none
4819    */
4820 
4821   void arm_fir_sparse_init_q31(
4822   arm_fir_sparse_instance_q31 * S,
4823   uint16_t numTaps,
4824   q31_t * pCoeffs,
4825   q31_t * pState,
4826   int32_t * pTapDelay,
4827   uint16_t maxDelay,
4828   uint32_t blockSize);
4829 
4830   /**
4831    * @brief Processing function for the Q15 sparse FIR filter.
4832    * @param[in]  *S           points to an instance of the Q15 sparse FIR structure.
4833    * @param[in]  *pSrc        points to the block of input data.
4834    * @param[out] *pDst        points to the block of output data
4835    * @param[in]  *pScratchIn  points to a temporary buffer of size blockSize.
4836    * @param[in]  *pScratchOut points to a temporary buffer of size blockSize.
4837    * @param[in]  blockSize    number of input samples to process per call.
4838    * @return none.
4839    */
4840 
4841   void arm_fir_sparse_q15(
4842   arm_fir_sparse_instance_q15 * S,
4843   q15_t * pSrc,
4844   q15_t * pDst,
4845   q15_t * pScratchIn,
4846   q31_t * pScratchOut,
4847   uint32_t blockSize);
4848 
4849 
4850   /**
4851    * @brief  Initialization function for the Q15 sparse FIR filter.
4852    * @param[in,out] *S         points to an instance of the Q15 sparse FIR structure.
4853    * @param[in]     numTaps    number of nonzero coefficients in the filter.
4854    * @param[in]     *pCoeffs   points to the array of filter coefficients.
4855    * @param[in]     *pState    points to the state buffer.
4856    * @param[in]     *pTapDelay points to the array of offset times.
4857    * @param[in]     maxDelay   maximum offset time supported.
4858    * @param[in]     blockSize  number of samples that will be processed per block.
4859    * @return none
4860    */
4861 
4862   void arm_fir_sparse_init_q15(
4863   arm_fir_sparse_instance_q15 * S,
4864   uint16_t numTaps,
4865   q15_t * pCoeffs,
4866   q15_t * pState,
4867   int32_t * pTapDelay,
4868   uint16_t maxDelay,
4869   uint32_t blockSize);
4870 
4871   /**
4872    * @brief Processing function for the Q7 sparse FIR filter.
4873    * @param[in]  *S           points to an instance of the Q7 sparse FIR structure.
4874    * @param[in]  *pSrc        points to the block of input data.
4875    * @param[out] *pDst        points to the block of output data
4876    * @param[in]  *pScratchIn  points to a temporary buffer of size blockSize.
4877    * @param[in]  *pScratchOut points to a temporary buffer of size blockSize.
4878    * @param[in]  blockSize    number of input samples to process per call.
4879    * @return none.
4880    */
4881 
4882   void arm_fir_sparse_q7(
4883   arm_fir_sparse_instance_q7 * S,
4884   q7_t * pSrc,
4885   q7_t * pDst,
4886   q7_t * pScratchIn,
4887   q31_t * pScratchOut,
4888   uint32_t blockSize);
4889 
4890   /**
4891    * @brief  Initialization function for the Q7 sparse FIR filter.
4892    * @param[in,out] *S         points to an instance of the Q7 sparse FIR structure.
4893    * @param[in]     numTaps    number of nonzero coefficients in the filter.
4894    * @param[in]     *pCoeffs   points to the array of filter coefficients.
4895    * @param[in]     *pState    points to the state buffer.
4896    * @param[in]     *pTapDelay points to the array of offset times.
4897    * @param[in]     maxDelay   maximum offset time supported.
4898    * @param[in]     blockSize  number of samples that will be processed per block.
4899    * @return none
4900    */
4901 
4902   void arm_fir_sparse_init_q7(
4903   arm_fir_sparse_instance_q7 * S,
4904   uint16_t numTaps,
4905   q7_t * pCoeffs,
4906   q7_t * pState,
4907   int32_t * pTapDelay,
4908   uint16_t maxDelay,
4909   uint32_t blockSize);
4910 
4911 
4912   /*
4913    * @brief  Floating-point sin_cos function.
4914    * @param[in]  theta    input value in degrees
4915    * @param[out] *pSinVal points to the processed sine output.
4916    * @param[out] *pCosVal points to the processed cos output.
4917    * @return none.
4918    */
4919 
4920   void arm_sin_cos_f32(
4921   float32_t theta,
4922   float32_t * pSinVal,
4923   float32_t * pCcosVal);
4924 
4925   /*
4926    * @brief  Q31 sin_cos function.
4927    * @param[in]  theta    scaled input value in degrees
4928    * @param[out] *pSinVal points to the processed sine output.
4929    * @param[out] *pCosVal points to the processed cosine output.
4930    * @return none.
4931    */
4932 
4933   void arm_sin_cos_q31(
4934   q31_t theta,
4935   q31_t * pSinVal,
4936   q31_t * pCosVal);
4937 
4938 
4939   /**
4940    * @brief  Floating-point complex conjugate.
4941    * @param[in]  *pSrc points to the input vector
4942    * @param[out]  *pDst points to the output vector
4943    * @param[in]  numSamples number of complex samples in each vector
4944    * @return none.
4945    */
4946 
4947   void arm_cmplx_conj_f32(
4948   float32_t * pSrc,
4949   float32_t * pDst,
4950   uint32_t numSamples);
4951 
4952   /**
4953    * @brief  Q31 complex conjugate.
4954    * @param[in]  *pSrc points to the input vector
4955    * @param[out]  *pDst points to the output vector
4956    * @param[in]  numSamples number of complex samples in each vector
4957    * @return none.
4958    */
4959 
4960   void arm_cmplx_conj_q31(
4961   q31_t * pSrc,
4962   q31_t * pDst,
4963   uint32_t numSamples);
4964 
4965   /**
4966    * @brief  Q15 complex conjugate.
4967    * @param[in]  *pSrc points to the input vector
4968    * @param[out]  *pDst points to the output vector
4969    * @param[in]  numSamples number of complex samples in each vector
4970    * @return none.
4971    */
4972 
4973   void arm_cmplx_conj_q15(
4974   q15_t * pSrc,
4975   q15_t * pDst,
4976   uint32_t numSamples);
4977 
4978 
4979 
4980   /**
4981    * @brief  Floating-point complex magnitude squared
4982    * @param[in]  *pSrc points to the complex input vector
4983    * @param[out]  *pDst points to the real output vector
4984    * @param[in]  numSamples number of complex samples in the input vector
4985    * @return none.
4986    */
4987 
4988   void arm_cmplx_mag_squared_f32(
4989   float32_t * pSrc,
4990   float32_t * pDst,
4991   uint32_t numSamples);
4992 
4993   /**
4994    * @brief  Q31 complex magnitude squared
4995    * @param[in]  *pSrc points to the complex input vector
4996    * @param[out]  *pDst points to the real output vector
4997    * @param[in]  numSamples number of complex samples in the input vector
4998    * @return none.
4999    */
5000 
5001   void arm_cmplx_mag_squared_q31(
5002   q31_t * pSrc,
5003   q31_t * pDst,
5004   uint32_t numSamples);
5005 
5006   /**
5007    * @brief  Q15 complex magnitude squared
5008    * @param[in]  *pSrc points to the complex input vector
5009    * @param[out]  *pDst points to the real output vector
5010    * @param[in]  numSamples number of complex samples in the input vector
5011    * @return none.
5012    */
5013 
5014   void arm_cmplx_mag_squared_q15(
5015   q15_t * pSrc,
5016   q15_t * pDst,
5017   uint32_t numSamples);
5018 
5019 
5020  /**
5021    * @ingroup groupController
5022    */
5023 
5024   /**
5025    * @defgroup PID PID Motor Control
5026    *
5027    * A Proportional Integral Derivative (PID) controller is a generic feedback control
5028    * loop mechanism widely used in industrial control systems.
5029    * A PID controller is the most commonly used type of feedback controller.
5030    *
5031    * This set of functions implements (PID) controllers
5032    * for Q15, Q31, and floating-point data types.  The functions operate on a single sample
5033    * of data and each call to the function returns a single processed value.
5034    * <code>S</code> points to an instance of the PID control data structure.  <code>in</code>
5035    * is the input sample value. The functions return the output value.
5036    *
5037    * \par Algorithm:
5038    * <pre>
5039    *    y[n] = y[n-1] + A0 * x[n] + A1 * x[n-1] + A2 * x[n-2]
5040    *    A0 = Kp + Ki + Kd
5041    *    A1 = (-Kp ) - (2 * Kd )
5042    *    A2 = Kd  </pre>
5043    *
5044    * \par
5045    * where \c Kp is proportional constant, \c Ki is Integral constant and \c Kd is Derivative constant
5046    *
5047    * \par
5048    * \image html PID.gif "Proportional Integral Derivative Controller"
5049    *
5050    * \par
5051    * The PID controller calculates an "error" value as the difference between
5052    * the measured output and the reference input.
5053    * The controller attempts to minimize the error by adjusting the process control inputs.
5054    * The proportional value determines the reaction to the current error,
5055    * the integral value determines the reaction based on the sum of recent errors,
5056    * and the derivative value determines the reaction based on the rate at which the error has been changing.
5057    *
5058    * \par Instance Structure
5059    * The Gains A0, A1, A2 and state variables for a PID controller are stored together in an instance data structure.
5060    * A separate instance structure must be defined for each PID Controller.
5061    * There are separate instance structure declarations for each of the 3 supported data types.
5062    *
5063    * \par Reset Functions
5064    * There is also an associated reset function for each data type which clears the state array.
5065    *
5066    * \par Initialization Functions
5067    * There is also an associated initialization function for each data type.
5068    * The initialization function performs the following operations:
5069    * - Initializes the Gains A0, A1, A2 from Kp,Ki, Kd gains.
5070    * - Zeros out the values in the state buffer.
5071    *
5072    * \par
5073    * Instance structure cannot be placed into a const data section and it is recommended to use the initialization function.
5074    *
5075    * \par Fixed-Point Behavior
5076    * Care must be taken when using the fixed-point versions of the PID Controller functions.
5077    * In particular, the overflow and saturation behavior of the accumulator used in each function must be considered.
5078    * Refer to the function specific documentation below for usage guidelines.
5079    */
5080 
5081   /**
5082    * @addtogroup PID
5083    * @{
5084    */
5085 
5086   /**
5087    * @brief  Process function for the floating-point PID Control.
5088    * @param[in,out] *S is an instance of the floating-point PID Control structure
5089    * @param[in] in input sample to process
5090    * @return out processed output sample.
5091    */
5092 
5093 
arm_pid_f32(arm_pid_instance_f32 * S,float32_t in)5094   static __INLINE float32_t arm_pid_f32(
5095   arm_pid_instance_f32 * S,
5096   float32_t in)
5097   {
5098     float32_t out;
5099 
5100     /* y[n] = y[n-1] + A0 * x[n] + A1 * x[n-1] + A2 * x[n-2]  */
5101     out = (S->A0 * in) +
5102       (S->A1 * S->state[0]) + (S->A2 * S->state[1]) + (S->state[2]);
5103 
5104     /* Update state */
5105     S->state[1] = S->state[0];
5106     S->state[0] = in;
5107     S->state[2] = out;
5108 
5109     /* return to application */
5110     return (out);
5111 
5112   }
5113 
5114   /**
5115    * @brief  Process function for the Q31 PID Control.
5116    * @param[in,out] *S points to an instance of the Q31 PID Control structure
5117    * @param[in] in input sample to process
5118    * @return out processed output sample.
5119    *
5120    * <b>Scaling and Overflow Behavior:</b>
5121    * \par
5122    * The function is implemented using an internal 64-bit accumulator.
5123    * The accumulator has a 2.62 format and maintains full precision of the intermediate multiplication results but provides only a single guard bit.
5124    * Thus, if the accumulator result overflows it wraps around rather than clip.
5125    * In order to avoid overflows completely the input signal must be scaled down by 2 bits as there are four additions.
5126    * After all multiply-accumulates are performed, the 2.62 accumulator is truncated to 1.32 format and then saturated to 1.31 format.
5127    */
5128 
arm_pid_q31(arm_pid_instance_q31 * S,q31_t in)5129   static __INLINE q31_t arm_pid_q31(
5130   arm_pid_instance_q31 * S,
5131   q31_t in)
5132   {
5133     q63_t acc;
5134     q31_t out;
5135 
5136     /* acc = A0 * x[n]  */
5137     acc = (q63_t) S->A0 * in;
5138 
5139     /* acc += A1 * x[n-1] */
5140     acc += (q63_t) S->A1 * S->state[0];
5141 
5142     /* acc += A2 * x[n-2]  */
5143     acc += (q63_t) S->A2 * S->state[1];
5144 
5145     /* convert output to 1.31 format to add y[n-1] */
5146     out = (q31_t) (acc >> 31u);
5147 
5148     /* out += y[n-1] */
5149     out += S->state[2];
5150 
5151     /* Update state */
5152     S->state[1] = S->state[0];
5153     S->state[0] = in;
5154     S->state[2] = out;
5155 
5156     /* return to application */
5157     return (out);
5158 
5159   }
5160 
5161   /**
5162    * @brief  Process function for the Q15 PID Control.
5163    * @param[in,out] *S points to an instance of the Q15 PID Control structure
5164    * @param[in] in input sample to process
5165    * @return out processed output sample.
5166    *
5167    * <b>Scaling and Overflow Behavior:</b>
5168    * \par
5169    * The function is implemented using a 64-bit internal accumulator.
5170    * Both Gains and state variables are represented in 1.15 format and multiplications yield a 2.30 result.
5171    * The 2.30 intermediate results are accumulated in a 64-bit accumulator in 34.30 format.
5172    * There is no risk of internal overflow with this approach and the full precision of intermediate multiplications is preserved.
5173    * After all additions have been performed, the accumulator is truncated to 34.15 format by discarding low 15 bits.
5174    * Lastly, the accumulator is saturated to yield a result in 1.15 format.
5175    */
5176 
arm_pid_q15(arm_pid_instance_q15 * S,q15_t in)5177   static __INLINE q15_t arm_pid_q15(
5178   arm_pid_instance_q15 * S,
5179   q15_t in)
5180   {
5181     q63_t acc;
5182     q15_t out;
5183 
5184 #ifndef ARM_MATH_CM0_FAMILY
5185     __SIMD32_TYPE *vstate;
5186 
5187     /* Implementation of PID controller */
5188 
5189     /* acc = A0 * x[n]  */
5190     acc = (q31_t) __SMUAD(S->A0, in);
5191 
5192     /* acc += A1 * x[n-1] + A2 * x[n-2]  */
5193     vstate = __SIMD32_CONST(S->state);
5194     acc = __SMLALD(S->A1, (q31_t) *vstate, acc);
5195 
5196 #else
5197     /* acc = A0 * x[n]  */
5198     acc = ((q31_t) S->A0) * in;
5199 
5200     /* acc += A1 * x[n-1] + A2 * x[n-2]  */
5201     acc += (q31_t) S->A1 * S->state[0];
5202     acc += (q31_t) S->A2 * S->state[1];
5203 
5204 #endif
5205 
5206     /* acc += y[n-1] */
5207     acc += (q31_t) S->state[2] << 15;
5208 
5209     /* saturate the output */
5210     out = (q15_t) (__SSAT((acc >> 15), 16));
5211 
5212     /* Update state */
5213     S->state[1] = S->state[0];
5214     S->state[0] = in;
5215     S->state[2] = out;
5216 
5217     /* return to application */
5218     return (out);
5219 
5220   }
5221 
5222   /**
5223    * @} end of PID group
5224    */
5225 
5226 
5227   /**
5228    * @brief Floating-point matrix inverse.
5229    * @param[in]  *src points to the instance of the input floating-point matrix structure.
5230    * @param[out] *dst points to the instance of the output floating-point matrix structure.
5231    * @return The function returns ARM_MATH_SIZE_MISMATCH, if the dimensions do not match.
5232    * If the input matrix is singular (does not have an inverse), then the algorithm terminates and returns error status ARM_MATH_SINGULAR.
5233    */
5234 
5235   arm_status arm_mat_inverse_f32(
5236   const arm_matrix_instance_f32 * src,
5237   arm_matrix_instance_f32 * dst);
5238 
5239 
5240   /**
5241    * @brief Floating-point matrix inverse.
5242    * @param[in]  *src points to the instance of the input floating-point matrix structure.
5243    * @param[out] *dst points to the instance of the output floating-point matrix structure.
5244    * @return The function returns ARM_MATH_SIZE_MISMATCH, if the dimensions do not match.
5245    * If the input matrix is singular (does not have an inverse), then the algorithm terminates and returns error status ARM_MATH_SINGULAR.
5246    */
5247 
5248   arm_status arm_mat_inverse_f64(
5249   const arm_matrix_instance_f64 * src,
5250   arm_matrix_instance_f64 * dst);
5251 
5252 
5253 
5254   /**
5255    * @ingroup groupController
5256    */
5257 
5258 
5259   /**
5260    * @defgroup clarke Vector Clarke Transform
5261    * Forward Clarke transform converts the instantaneous stator phases into a two-coordinate time invariant vector.
5262    * Generally the Clarke transform uses three-phase currents <code>Ia, Ib and Ic</code> to calculate currents
5263    * in the two-phase orthogonal stator axis <code>Ialpha</code> and <code>Ibeta</code>.
5264    * When <code>Ialpha</code> is superposed with <code>Ia</code> as shown in the figure below
5265    * \image html clarke.gif Stator current space vector and its components in (a,b).
5266    * and <code>Ia + Ib + Ic = 0</code>, in this condition <code>Ialpha</code> and <code>Ibeta</code>
5267    * can be calculated using only <code>Ia</code> and <code>Ib</code>.
5268    *
5269    * The function operates on a single sample of data and each call to the function returns the processed output.
5270    * The library provides separate functions for Q31 and floating-point data types.
5271    * \par Algorithm
5272    * \image html clarkeFormula.gif
5273    * where <code>Ia</code> and <code>Ib</code> are the instantaneous stator phases and
5274    * <code>pIalpha</code> and <code>pIbeta</code> are the two coordinates of time invariant vector.
5275    * \par Fixed-Point Behavior
5276    * Care must be taken when using the Q31 version of the Clarke transform.
5277    * In particular, the overflow and saturation behavior of the accumulator used must be considered.
5278    * Refer to the function specific documentation below for usage guidelines.
5279    */
5280 
5281   /**
5282    * @addtogroup clarke
5283    * @{
5284    */
5285 
5286   /**
5287    *
5288    * @brief  Floating-point Clarke transform
5289    * @param[in]       Ia       input three-phase coordinate <code>a</code>
5290    * @param[in]       Ib       input three-phase coordinate <code>b</code>
5291    * @param[out]      *pIalpha points to output two-phase orthogonal vector axis alpha
5292    * @param[out]      *pIbeta  points to output two-phase orthogonal vector axis beta
5293    * @return none.
5294    */
5295 
arm_clarke_f32(float32_t Ia,float32_t Ib,float32_t * pIalpha,float32_t * pIbeta)5296   static __INLINE void arm_clarke_f32(
5297   float32_t Ia,
5298   float32_t Ib,
5299   float32_t * pIalpha,
5300   float32_t * pIbeta)
5301   {
5302     /* Calculate pIalpha using the equation, pIalpha = Ia */
5303     *pIalpha = Ia;
5304 
5305     /* Calculate pIbeta using the equation, pIbeta = (1/sqrt(3)) * Ia + (2/sqrt(3)) * Ib */
5306     *pIbeta =
5307       ((float32_t) 0.57735026919 * Ia + (float32_t) 1.15470053838 * Ib);
5308 
5309   }
5310 
5311   /**
5312    * @brief  Clarke transform for Q31 version
5313    * @param[in]       Ia       input three-phase coordinate <code>a</code>
5314    * @param[in]       Ib       input three-phase coordinate <code>b</code>
5315    * @param[out]      *pIalpha points to output two-phase orthogonal vector axis alpha
5316    * @param[out]      *pIbeta  points to output two-phase orthogonal vector axis beta
5317    * @return none.
5318    *
5319    * <b>Scaling and Overflow Behavior:</b>
5320    * \par
5321    * The function is implemented using an internal 32-bit accumulator.
5322    * The accumulator maintains 1.31 format by truncating lower 31 bits of the intermediate multiplication in 2.62 format.
5323    * There is saturation on the addition, hence there is no risk of overflow.
5324    */
5325 
arm_clarke_q31(q31_t Ia,q31_t Ib,q31_t * pIalpha,q31_t * pIbeta)5326   static __INLINE void arm_clarke_q31(
5327   q31_t Ia,
5328   q31_t Ib,
5329   q31_t * pIalpha,
5330   q31_t * pIbeta)
5331   {
5332     q31_t product1, product2;                    /* Temporary variables used to store intermediate results */
5333 
5334     /* Calculating pIalpha from Ia by equation pIalpha = Ia */
5335     *pIalpha = Ia;
5336 
5337     /* Intermediate product is calculated by (1/(sqrt(3)) * Ia) */
5338     product1 = (q31_t) (((q63_t) Ia * 0x24F34E8B) >> 30);
5339 
5340     /* Intermediate product is calculated by (2/sqrt(3) * Ib) */
5341     product2 = (q31_t) (((q63_t) Ib * 0x49E69D16) >> 30);
5342 
5343     /* pIbeta is calculated by adding the intermediate products */
5344     *pIbeta = __QADD(product1, product2);
5345   }
5346 
5347   /**
5348    * @} end of clarke group
5349    */
5350 
5351   /**
5352    * @brief  Converts the elements of the Q7 vector to Q31 vector.
5353    * @param[in]  *pSrc     input pointer
5354    * @param[out]  *pDst    output pointer
5355    * @param[in]  blockSize number of samples to process
5356    * @return none.
5357    */
5358   void arm_q7_to_q31(
5359   q7_t * pSrc,
5360   q31_t * pDst,
5361   uint32_t blockSize);
5362 
5363 
5364 
5365 
5366   /**
5367    * @ingroup groupController
5368    */
5369 
5370   /**
5371    * @defgroup inv_clarke Vector Inverse Clarke Transform
5372    * Inverse Clarke transform converts the two-coordinate time invariant vector into instantaneous stator phases.
5373    *
5374    * The function operates on a single sample of data and each call to the function returns the processed output.
5375    * The library provides separate functions for Q31 and floating-point data types.
5376    * \par Algorithm
5377    * \image html clarkeInvFormula.gif
5378    * where <code>pIa</code> and <code>pIb</code> are the instantaneous stator phases and
5379    * <code>Ialpha</code> and <code>Ibeta</code> are the two coordinates of time invariant vector.
5380    * \par Fixed-Point Behavior
5381    * Care must be taken when using the Q31 version of the Clarke transform.
5382    * In particular, the overflow and saturation behavior of the accumulator used must be considered.
5383    * Refer to the function specific documentation below for usage guidelines.
5384    */
5385 
5386   /**
5387    * @addtogroup inv_clarke
5388    * @{
5389    */
5390 
5391    /**
5392    * @brief  Floating-point Inverse Clarke transform
5393    * @param[in]       Ialpha  input two-phase orthogonal vector axis alpha
5394    * @param[in]       Ibeta   input two-phase orthogonal vector axis beta
5395    * @param[out]      *pIa    points to output three-phase coordinate <code>a</code>
5396    * @param[out]      *pIb    points to output three-phase coordinate <code>b</code>
5397    * @return none.
5398    */
5399 
5400 
arm_inv_clarke_f32(float32_t Ialpha,float32_t Ibeta,float32_t * pIa,float32_t * pIb)5401   static __INLINE void arm_inv_clarke_f32(
5402   float32_t Ialpha,
5403   float32_t Ibeta,
5404   float32_t * pIa,
5405   float32_t * pIb)
5406   {
5407     /* Calculating pIa from Ialpha by equation pIa = Ialpha */
5408     *pIa = Ialpha;
5409 
5410     /* Calculating pIb from Ialpha and Ibeta by equation pIb = -(1/2) * Ialpha + (sqrt(3)/2) * Ibeta */
5411     *pIb = -0.5 * Ialpha + (float32_t) 0.8660254039 *Ibeta;
5412 
5413   }
5414 
5415   /**
5416    * @brief  Inverse Clarke transform for Q31 version
5417    * @param[in]       Ialpha  input two-phase orthogonal vector axis alpha
5418    * @param[in]       Ibeta   input two-phase orthogonal vector axis beta
5419    * @param[out]      *pIa    points to output three-phase coordinate <code>a</code>
5420    * @param[out]      *pIb    points to output three-phase coordinate <code>b</code>
5421    * @return none.
5422    *
5423    * <b>Scaling and Overflow Behavior:</b>
5424    * \par
5425    * The function is implemented using an internal 32-bit accumulator.
5426    * The accumulator maintains 1.31 format by truncating lower 31 bits of the intermediate multiplication in 2.62 format.
5427    * There is saturation on the subtraction, hence there is no risk of overflow.
5428    */
5429 
arm_inv_clarke_q31(q31_t Ialpha,q31_t Ibeta,q31_t * pIa,q31_t * pIb)5430   static __INLINE void arm_inv_clarke_q31(
5431   q31_t Ialpha,
5432   q31_t Ibeta,
5433   q31_t * pIa,
5434   q31_t * pIb)
5435   {
5436     q31_t product1, product2;                    /* Temporary variables used to store intermediate results */
5437 
5438     /* Calculating pIa from Ialpha by equation pIa = Ialpha */
5439     *pIa = Ialpha;
5440 
5441     /* Intermediate product is calculated by (1/(2*sqrt(3)) * Ia) */
5442     product1 = (q31_t) (((q63_t) (Ialpha) * (0x40000000)) >> 31);
5443 
5444     /* Intermediate product is calculated by (1/sqrt(3) * pIb) */
5445     product2 = (q31_t) (((q63_t) (Ibeta) * (0x6ED9EBA1)) >> 31);
5446 
5447     /* pIb is calculated by subtracting the products */
5448     *pIb = __QSUB(product2, product1);
5449 
5450   }
5451 
5452   /**
5453    * @} end of inv_clarke group
5454    */
5455 
5456   /**
5457    * @brief  Converts the elements of the Q7 vector to Q15 vector.
5458    * @param[in]  *pSrc     input pointer
5459    * @param[out] *pDst     output pointer
5460    * @param[in]  blockSize number of samples to process
5461    * @return none.
5462    */
5463   void arm_q7_to_q15(
5464   q7_t * pSrc,
5465   q15_t * pDst,
5466   uint32_t blockSize);
5467 
5468 
5469 
5470   /**
5471    * @ingroup groupController
5472    */
5473 
5474   /**
5475    * @defgroup park Vector Park Transform
5476    *
5477    * Forward Park transform converts the input two-coordinate vector to flux and torque components.
5478    * The Park transform can be used to realize the transformation of the <code>Ialpha</code> and the <code>Ibeta</code> currents
5479    * from the stationary to the moving reference frame and control the spatial relationship between
5480    * the stator vector current and rotor flux vector.
5481    * If we consider the d axis aligned with the rotor flux, the diagram below shows the
5482    * current vector and the relationship from the two reference frames:
5483    * \image html park.gif "Stator current space vector and its component in (a,b) and in the d,q rotating reference frame"
5484    *
5485    * The function operates on a single sample of data and each call to the function returns the processed output.
5486    * The library provides separate functions for Q31 and floating-point data types.
5487    * \par Algorithm
5488    * \image html parkFormula.gif
5489    * where <code>Ialpha</code> and <code>Ibeta</code> are the stator vector components,
5490    * <code>pId</code> and <code>pIq</code> are rotor vector components and <code>cosVal</code> and <code>sinVal</code> are the
5491    * cosine and sine values of theta (rotor flux position).
5492    * \par Fixed-Point Behavior
5493    * Care must be taken when using the Q31 version of the Park transform.
5494    * In particular, the overflow and saturation behavior of the accumulator used must be considered.
5495    * Refer to the function specific documentation below for usage guidelines.
5496    */
5497 
5498   /**
5499    * @addtogroup park
5500    * @{
5501    */
5502 
5503   /**
5504    * @brief Floating-point Park transform
5505    * @param[in]       Ialpha input two-phase vector coordinate alpha
5506    * @param[in]       Ibeta  input two-phase vector coordinate beta
5507    * @param[out]      *pId   points to output	rotor reference frame d
5508    * @param[out]      *pIq   points to output	rotor reference frame q
5509    * @param[in]       sinVal sine value of rotation angle theta
5510    * @param[in]       cosVal cosine value of rotation angle theta
5511    * @return none.
5512    *
5513    * The function implements the forward Park transform.
5514    *
5515    */
5516 
arm_park_f32(float32_t Ialpha,float32_t Ibeta,float32_t * pId,float32_t * pIq,float32_t sinVal,float32_t cosVal)5517   static __INLINE void arm_park_f32(
5518   float32_t Ialpha,
5519   float32_t Ibeta,
5520   float32_t * pId,
5521   float32_t * pIq,
5522   float32_t sinVal,
5523   float32_t cosVal)
5524   {
5525     /* Calculate pId using the equation, pId = Ialpha * cosVal + Ibeta * sinVal */
5526     *pId = Ialpha * cosVal + Ibeta * sinVal;
5527 
5528     /* Calculate pIq using the equation, pIq = - Ialpha * sinVal + Ibeta * cosVal */
5529     *pIq = -Ialpha * sinVal + Ibeta * cosVal;
5530 
5531   }
5532 
5533   /**
5534    * @brief  Park transform for Q31 version
5535    * @param[in]       Ialpha input two-phase vector coordinate alpha
5536    * @param[in]       Ibeta  input two-phase vector coordinate beta
5537    * @param[out]      *pId   points to output rotor reference frame d
5538    * @param[out]      *pIq   points to output rotor reference frame q
5539    * @param[in]       sinVal sine value of rotation angle theta
5540    * @param[in]       cosVal cosine value of rotation angle theta
5541    * @return none.
5542    *
5543    * <b>Scaling and Overflow Behavior:</b>
5544    * \par
5545    * The function is implemented using an internal 32-bit accumulator.
5546    * The accumulator maintains 1.31 format by truncating lower 31 bits of the intermediate multiplication in 2.62 format.
5547    * There is saturation on the addition and subtraction, hence there is no risk of overflow.
5548    */
5549 
5550 
arm_park_q31(q31_t Ialpha,q31_t Ibeta,q31_t * pId,q31_t * pIq,q31_t sinVal,q31_t cosVal)5551   static __INLINE void arm_park_q31(
5552   q31_t Ialpha,
5553   q31_t Ibeta,
5554   q31_t * pId,
5555   q31_t * pIq,
5556   q31_t sinVal,
5557   q31_t cosVal)
5558   {
5559     q31_t product1, product2;                    /* Temporary variables used to store intermediate results */
5560     q31_t product3, product4;                    /* Temporary variables used to store intermediate results */
5561 
5562     /* Intermediate product is calculated by (Ialpha * cosVal) */
5563     product1 = (q31_t) (((q63_t) (Ialpha) * (cosVal)) >> 31);
5564 
5565     /* Intermediate product is calculated by (Ibeta * sinVal) */
5566     product2 = (q31_t) (((q63_t) (Ibeta) * (sinVal)) >> 31);
5567 
5568 
5569     /* Intermediate product is calculated by (Ialpha * sinVal) */
5570     product3 = (q31_t) (((q63_t) (Ialpha) * (sinVal)) >> 31);
5571 
5572     /* Intermediate product is calculated by (Ibeta * cosVal) */
5573     product4 = (q31_t) (((q63_t) (Ibeta) * (cosVal)) >> 31);
5574 
5575     /* Calculate pId by adding the two intermediate products 1 and 2 */
5576     *pId = __QADD(product1, product2);
5577 
5578     /* Calculate pIq by subtracting the two intermediate products 3 from 4 */
5579     *pIq = __QSUB(product4, product3);
5580   }
5581 
5582   /**
5583    * @} end of park group
5584    */
5585 
5586   /**
5587    * @brief  Converts the elements of the Q7 vector to floating-point vector.
5588    * @param[in]  *pSrc is input pointer
5589    * @param[out]  *pDst is output pointer
5590    * @param[in]  blockSize is the number of samples to process
5591    * @return none.
5592    */
5593   void arm_q7_to_float(
5594   q7_t * pSrc,
5595   float32_t * pDst,
5596   uint32_t blockSize);
5597 
5598 
5599   /**
5600    * @ingroup groupController
5601    */
5602 
5603   /**
5604    * @defgroup inv_park Vector Inverse Park transform
5605    * Inverse Park transform converts the input flux and torque components to two-coordinate vector.
5606    *
5607    * The function operates on a single sample of data and each call to the function returns the processed output.
5608    * The library provides separate functions for Q31 and floating-point data types.
5609    * \par Algorithm
5610    * \image html parkInvFormula.gif
5611    * where <code>pIalpha</code> and <code>pIbeta</code> are the stator vector components,
5612    * <code>Id</code> and <code>Iq</code> are rotor vector components and <code>cosVal</code> and <code>sinVal</code> are the
5613    * cosine and sine values of theta (rotor flux position).
5614    * \par Fixed-Point Behavior
5615    * Care must be taken when using the Q31 version of the Park transform.
5616    * In particular, the overflow and saturation behavior of the accumulator used must be considered.
5617    * Refer to the function specific documentation below for usage guidelines.
5618    */
5619 
5620   /**
5621    * @addtogroup inv_park
5622    * @{
5623    */
5624 
5625    /**
5626    * @brief  Floating-point Inverse Park transform
5627    * @param[in]       Id        input coordinate of rotor reference frame d
5628    * @param[in]       Iq        input coordinate of rotor reference frame q
5629    * @param[out]      *pIalpha  points to output two-phase orthogonal vector axis alpha
5630    * @param[out]      *pIbeta   points to output two-phase orthogonal vector axis beta
5631    * @param[in]       sinVal    sine value of rotation angle theta
5632    * @param[in]       cosVal    cosine value of rotation angle theta
5633    * @return none.
5634    */
5635 
arm_inv_park_f32(float32_t Id,float32_t Iq,float32_t * pIalpha,float32_t * pIbeta,float32_t sinVal,float32_t cosVal)5636   static __INLINE void arm_inv_park_f32(
5637   float32_t Id,
5638   float32_t Iq,
5639   float32_t * pIalpha,
5640   float32_t * pIbeta,
5641   float32_t sinVal,
5642   float32_t cosVal)
5643   {
5644     /* Calculate pIalpha using the equation, pIalpha = Id * cosVal - Iq * sinVal */
5645     *pIalpha = Id * cosVal - Iq * sinVal;
5646 
5647     /* Calculate pIbeta using the equation, pIbeta = Id * sinVal + Iq * cosVal */
5648     *pIbeta = Id * sinVal + Iq * cosVal;
5649 
5650   }
5651 
5652 
5653   /**
5654    * @brief  Inverse Park transform for	Q31 version
5655    * @param[in]       Id        input coordinate of rotor reference frame d
5656    * @param[in]       Iq        input coordinate of rotor reference frame q
5657    * @param[out]      *pIalpha  points to output two-phase orthogonal vector axis alpha
5658    * @param[out]      *pIbeta   points to output two-phase orthogonal vector axis beta
5659    * @param[in]       sinVal    sine value of rotation angle theta
5660    * @param[in]       cosVal    cosine value of rotation angle theta
5661    * @return none.
5662    *
5663    * <b>Scaling and Overflow Behavior:</b>
5664    * \par
5665    * The function is implemented using an internal 32-bit accumulator.
5666    * The accumulator maintains 1.31 format by truncating lower 31 bits of the intermediate multiplication in 2.62 format.
5667    * There is saturation on the addition, hence there is no risk of overflow.
5668    */
5669 
5670 
arm_inv_park_q31(q31_t Id,q31_t Iq,q31_t * pIalpha,q31_t * pIbeta,q31_t sinVal,q31_t cosVal)5671   static __INLINE void arm_inv_park_q31(
5672   q31_t Id,
5673   q31_t Iq,
5674   q31_t * pIalpha,
5675   q31_t * pIbeta,
5676   q31_t sinVal,
5677   q31_t cosVal)
5678   {
5679     q31_t product1, product2;                    /* Temporary variables used to store intermediate results */
5680     q31_t product3, product4;                    /* Temporary variables used to store intermediate results */
5681 
5682     /* Intermediate product is calculated by (Id * cosVal) */
5683     product1 = (q31_t) (((q63_t) (Id) * (cosVal)) >> 31);
5684 
5685     /* Intermediate product is calculated by (Iq * sinVal) */
5686     product2 = (q31_t) (((q63_t) (Iq) * (sinVal)) >> 31);
5687 
5688 
5689     /* Intermediate product is calculated by (Id * sinVal) */
5690     product3 = (q31_t) (((q63_t) (Id) * (sinVal)) >> 31);
5691 
5692     /* Intermediate product is calculated by (Iq * cosVal) */
5693     product4 = (q31_t) (((q63_t) (Iq) * (cosVal)) >> 31);
5694 
5695     /* Calculate pIalpha by using the two intermediate products 1 and 2 */
5696     *pIalpha = __QSUB(product1, product2);
5697 
5698     /* Calculate pIbeta by using the two intermediate products 3 and 4 */
5699     *pIbeta = __QADD(product4, product3);
5700 
5701   }
5702 
5703   /**
5704    * @} end of Inverse park group
5705    */
5706 
5707 
5708   /**
5709    * @brief  Converts the elements of the Q31 vector to floating-point vector.
5710    * @param[in]  *pSrc is input pointer
5711    * @param[out]  *pDst is output pointer
5712    * @param[in]  blockSize is the number of samples to process
5713    * @return none.
5714    */
5715   void arm_q31_to_float(
5716   q31_t * pSrc,
5717   float32_t * pDst,
5718   uint32_t blockSize);
5719 
5720   /**
5721    * @ingroup groupInterpolation
5722    */
5723 
5724   /**
5725    * @defgroup LinearInterpolate Linear Interpolation
5726    *
5727    * Linear interpolation is a method of curve fitting using linear polynomials.
5728    * Linear interpolation works by effectively drawing a straight line between two neighboring samples and returning the appropriate point along that line
5729    *
5730    * \par
5731    * \image html LinearInterp.gif "Linear interpolation"
5732    *
5733    * \par
5734    * A  Linear Interpolate function calculates an output value(y), for the input(x)
5735    * using linear interpolation of the input values x0, x1( nearest input values) and the output values y0 and y1(nearest output values)
5736    *
5737    * \par Algorithm:
5738    * <pre>
5739    *       y = y0 + (x - x0) * ((y1 - y0)/(x1-x0))
5740    *       where x0, x1 are nearest values of input x
5741    *             y0, y1 are nearest values to output y
5742    * </pre>
5743    *
5744    * \par
5745    * This set of functions implements Linear interpolation process
5746    * for Q7, Q15, Q31, and floating-point data types.  The functions operate on a single
5747    * sample of data and each call to the function returns a single processed value.
5748    * <code>S</code> points to an instance of the Linear Interpolate function data structure.
5749    * <code>x</code> is the input sample value. The functions returns the output value.
5750    *
5751    * \par
5752    * if x is outside of the table boundary, Linear interpolation returns first value of the table
5753    * if x is below input range and returns last value of table if x is above range.
5754    */
5755 
5756   /**
5757    * @addtogroup LinearInterpolate
5758    * @{
5759    */
5760 
5761   /**
5762    * @brief  Process function for the floating-point Linear Interpolation Function.
5763    * @param[in,out] *S is an instance of the floating-point Linear Interpolation structure
5764    * @param[in] x input sample to process
5765    * @return y processed output sample.
5766    *
5767    */
5768 
arm_linear_interp_f32(arm_linear_interp_instance_f32 * S,float32_t x)5769   static __INLINE float32_t arm_linear_interp_f32(
5770   arm_linear_interp_instance_f32 * S,
5771   float32_t x)
5772   {
5773 
5774     float32_t y;
5775     float32_t x0, x1;                            /* Nearest input values */
5776     float32_t y0, y1;                            /* Nearest output values */
5777     float32_t xSpacing = S->xSpacing;            /* spacing between input values */
5778     int32_t i;                                   /* Index variable */
5779     float32_t *pYData = S->pYData;               /* pointer to output table */
5780 
5781     /* Calculation of index */
5782     i = (int32_t) ((x - S->x1) / xSpacing);
5783 
5784     if(i < 0)
5785     {
5786       /* Iniatilize output for below specified range as least output value of table */
5787       y = pYData[0];
5788     }
5789     else if((uint32_t)i >= S->nValues)
5790     {
5791       /* Iniatilize output for above specified range as last output value of table */
5792       y = pYData[S->nValues - 1];
5793     }
5794     else
5795     {
5796       /* Calculation of nearest input values */
5797       x0 = S->x1 + i * xSpacing;
5798       x1 = S->x1 + (i + 1) * xSpacing;
5799 
5800       /* Read of nearest output values */
5801       y0 = pYData[i];
5802       y1 = pYData[i + 1];
5803 
5804       /* Calculation of output */
5805       y = y0 + (x - x0) * ((y1 - y0) / (x1 - x0));
5806 
5807     }
5808 
5809     /* returns output value */
5810     return (y);
5811   }
5812 
5813    /**
5814    *
5815    * @brief  Process function for the Q31 Linear Interpolation Function.
5816    * @param[in] *pYData  pointer to Q31 Linear Interpolation table
5817    * @param[in] x input sample to process
5818    * @param[in] nValues number of table values
5819    * @return y processed output sample.
5820    *
5821    * \par
5822    * Input sample <code>x</code> is in 12.20 format which contains 12 bits for table index and 20 bits for fractional part.
5823    * This function can support maximum of table size 2^12.
5824    *
5825    */
5826 
5827 
arm_linear_interp_q31(q31_t * pYData,q31_t x,uint32_t nValues)5828   static __INLINE q31_t arm_linear_interp_q31(
5829   q31_t * pYData,
5830   q31_t x,
5831   uint32_t nValues)
5832   {
5833     q31_t y;                                     /* output */
5834     q31_t y0, y1;                                /* Nearest output values */
5835     q31_t fract;                                 /* fractional part */
5836     int32_t index;                               /* Index to read nearest output values */
5837 
5838     /* Input is in 12.20 format */
5839     /* 12 bits for the table index */
5840     /* Index value calculation */
5841     index = ((x & 0xFFF00000) >> 20);
5842 
5843     if(index >= (int32_t)(nValues - 1))
5844     {
5845       return (pYData[nValues - 1]);
5846     }
5847     else if(index < 0)
5848     {
5849       return (pYData[0]);
5850     }
5851     else
5852     {
5853 
5854       /* 20 bits for the fractional part */
5855       /* shift left by 11 to keep fract in 1.31 format */
5856       fract = (x & 0x000FFFFF) << 11;
5857 
5858       /* Read two nearest output values from the index in 1.31(q31) format */
5859       y0 = pYData[index];
5860       y1 = pYData[index + 1u];
5861 
5862       /* Calculation of y0 * (1-fract) and y is in 2.30 format */
5863       y = ((q31_t) ((q63_t) y0 * (0x7FFFFFFF - fract) >> 32));
5864 
5865       /* Calculation of y0 * (1-fract) + y1 *fract and y is in 2.30 format */
5866       y += ((q31_t) (((q63_t) y1 * fract) >> 32));
5867 
5868       /* Convert y to 1.31 format */
5869       return (y << 1u);
5870 
5871     }
5872 
5873   }
5874 
5875   /**
5876    *
5877    * @brief  Process function for the Q15 Linear Interpolation Function.
5878    * @param[in] *pYData  pointer to Q15 Linear Interpolation table
5879    * @param[in] x input sample to process
5880    * @param[in] nValues number of table values
5881    * @return y processed output sample.
5882    *
5883    * \par
5884    * Input sample <code>x</code> is in 12.20 format which contains 12 bits for table index and 20 bits for fractional part.
5885    * This function can support maximum of table size 2^12.
5886    *
5887    */
5888 
5889 
arm_linear_interp_q15(q15_t * pYData,q31_t x,uint32_t nValues)5890   static __INLINE q15_t arm_linear_interp_q15(
5891   q15_t * pYData,
5892   q31_t x,
5893   uint32_t nValues)
5894   {
5895     q63_t y;                                     /* output */
5896     q15_t y0, y1;                                /* Nearest output values */
5897     q31_t fract;                                 /* fractional part */
5898     int32_t index;                               /* Index to read nearest output values */
5899 
5900     /* Input is in 12.20 format */
5901     /* 12 bits for the table index */
5902     /* Index value calculation */
5903     index = ((x & 0xFFF00000) >> 20u);
5904 
5905     if(index >= (int32_t)(nValues - 1))
5906     {
5907       return (pYData[nValues - 1]);
5908     }
5909     else if(index < 0)
5910     {
5911       return (pYData[0]);
5912     }
5913     else
5914     {
5915       /* 20 bits for the fractional part */
5916       /* fract is in 12.20 format */
5917       fract = (x & 0x000FFFFF);
5918 
5919       /* Read two nearest output values from the index */
5920       y0 = pYData[index];
5921       y1 = pYData[index + 1u];
5922 
5923       /* Calculation of y0 * (1-fract) and y is in 13.35 format */
5924       y = ((q63_t) y0 * (0xFFFFF - fract));
5925 
5926       /* Calculation of (y0 * (1-fract) + y1 * fract) and y is in 13.35 format */
5927       y += ((q63_t) y1 * (fract));
5928 
5929       /* convert y to 1.15 format */
5930       return (y >> 20);
5931     }
5932 
5933 
5934   }
5935 
5936   /**
5937    *
5938    * @brief  Process function for the Q7 Linear Interpolation Function.
5939    * @param[in] *pYData  pointer to Q7 Linear Interpolation table
5940    * @param[in] x input sample to process
5941    * @param[in] nValues number of table values
5942    * @return y processed output sample.
5943    *
5944    * \par
5945    * Input sample <code>x</code> is in 12.20 format which contains 12 bits for table index and 20 bits for fractional part.
5946    * This function can support maximum of table size 2^12.
5947    */
5948 
5949 
arm_linear_interp_q7(q7_t * pYData,q31_t x,uint32_t nValues)5950   static __INLINE q7_t arm_linear_interp_q7(
5951   q7_t * pYData,
5952   q31_t x,
5953   uint32_t nValues)
5954   {
5955     q31_t y;                                     /* output */
5956     q7_t y0, y1;                                 /* Nearest output values */
5957     q31_t fract;                                 /* fractional part */
5958     uint32_t index;                              /* Index to read nearest output values */
5959 
5960     /* Input is in 12.20 format */
5961     /* 12 bits for the table index */
5962     /* Index value calculation */
5963     if (x < 0)
5964     {
5965       return (pYData[0]);
5966     }
5967     index = (x >> 20) & 0xfff;
5968 
5969 
5970     if(index >= (nValues - 1))
5971     {
5972       return (pYData[nValues - 1]);
5973     }
5974     else
5975     {
5976 
5977       /* 20 bits for the fractional part */
5978       /* fract is in 12.20 format */
5979       fract = (x & 0x000FFFFF);
5980 
5981       /* Read two nearest output values from the index and are in 1.7(q7) format */
5982       y0 = pYData[index];
5983       y1 = pYData[index + 1u];
5984 
5985       /* Calculation of y0 * (1-fract ) and y is in 13.27(q27) format */
5986       y = ((y0 * (0xFFFFF - fract)));
5987 
5988       /* Calculation of y1 * fract + y0 * (1-fract) and y is in 13.27(q27) format */
5989       y += (y1 * fract);
5990 
5991       /* convert y to 1.7(q7) format */
5992       return (y >> 20u);
5993 
5994     }
5995 
5996   }
5997   /**
5998    * @} end of LinearInterpolate group
5999    */
6000 
6001   /**
6002    * @brief  Fast approximation to the trigonometric sine function for floating-point data.
6003    * @param[in] x input value in radians.
6004    * @return  sin(x).
6005    */
6006 
6007   float32_t arm_sin_f32(
6008   float32_t x);
6009 
6010   /**
6011    * @brief  Fast approximation to the trigonometric sine function for Q31 data.
6012    * @param[in] x Scaled input value in radians.
6013    * @return  sin(x).
6014    */
6015 
6016   q31_t arm_sin_q31(
6017   q31_t x);
6018 
6019   /**
6020    * @brief  Fast approximation to the trigonometric sine function for Q15 data.
6021    * @param[in] x Scaled input value in radians.
6022    * @return  sin(x).
6023    */
6024 
6025   q15_t arm_sin_q15(
6026   q15_t x);
6027 
6028   /**
6029    * @brief  Fast approximation to the trigonometric cosine function for floating-point data.
6030    * @param[in] x input value in radians.
6031    * @return  cos(x).
6032    */
6033 
6034   float32_t arm_cos_f32(
6035   float32_t x);
6036 
6037   /**
6038    * @brief Fast approximation to the trigonometric cosine function for Q31 data.
6039    * @param[in] x Scaled input value in radians.
6040    * @return  cos(x).
6041    */
6042 
6043   q31_t arm_cos_q31(
6044   q31_t x);
6045 
6046   /**
6047    * @brief  Fast approximation to the trigonometric cosine function for Q15 data.
6048    * @param[in] x Scaled input value in radians.
6049    * @return  cos(x).
6050    */
6051 
6052   q15_t arm_cos_q15(
6053   q15_t x);
6054 
6055 
6056   /**
6057    * @ingroup groupFastMath
6058    */
6059 
6060 
6061   /**
6062    * @defgroup SQRT Square Root
6063    *
6064    * Computes the square root of a number.
6065    * There are separate functions for Q15, Q31, and floating-point data types.
6066    * The square root function is computed using the Newton-Raphson algorithm.
6067    * This is an iterative algorithm of the form:
6068    * <pre>
6069    *      x1 = x0 - f(x0)/f'(x0)
6070    * </pre>
6071    * where <code>x1</code> is the current estimate,
6072    * <code>x0</code> is the previous estimate, and
6073    * <code>f'(x0)</code> is the derivative of <code>f()</code> evaluated at <code>x0</code>.
6074    * For the square root function, the algorithm reduces to:
6075    * <pre>
6076    *     x0 = in/2                         [initial guess]
6077    *     x1 = 1/2 * ( x0 + in / x0)        [each iteration]
6078    * </pre>
6079    */
6080 
6081 
6082   /**
6083    * @addtogroup SQRT
6084    * @{
6085    */
6086 
6087   /**
6088    * @brief  Floating-point square root function.
6089    * @param[in]  in     input value.
6090    * @param[out] *pOut  square root of input value.
6091    * @return The function returns ARM_MATH_SUCCESS if input value is positive value or ARM_MATH_ARGUMENT_ERROR if
6092    * <code>in</code> is negative value and returns zero output for negative values.
6093    */
6094 
arm_sqrt_f32(float32_t in,float32_t * pOut)6095   static __INLINE arm_status arm_sqrt_f32(
6096   float32_t in,
6097   float32_t * pOut)
6098   {
6099     if(in > 0)
6100     {
6101 
6102 //      #if __FPU_USED
6103 #if (__FPU_USED == 1) && defined ( __CC_ARM   )
6104       *pOut = __sqrtf(in);
6105 #else
6106       *pOut = sqrtf(in);
6107 #endif
6108 
6109       return (ARM_MATH_SUCCESS);
6110     }
6111     else
6112     {
6113       *pOut = 0.0f;
6114       return (ARM_MATH_ARGUMENT_ERROR);
6115     }
6116 
6117   }
6118 
6119 
6120   /**
6121    * @brief Q31 square root function.
6122    * @param[in]   in    input value.  The range of the input value is [0 +1) or 0x00000000 to 0x7FFFFFFF.
6123    * @param[out]  *pOut square root of input value.
6124    * @return The function returns ARM_MATH_SUCCESS if input value is positive value or ARM_MATH_ARGUMENT_ERROR if
6125    * <code>in</code> is negative value and returns zero output for negative values.
6126    */
6127   arm_status arm_sqrt_q31(
6128   q31_t in,
6129   q31_t * pOut);
6130 
6131   /**
6132    * @brief  Q15 square root function.
6133    * @param[in]   in     input value.  The range of the input value is [0 +1) or 0x0000 to 0x7FFF.
6134    * @param[out]  *pOut  square root of input value.
6135    * @return The function returns ARM_MATH_SUCCESS if input value is positive value or ARM_MATH_ARGUMENT_ERROR if
6136    * <code>in</code> is negative value and returns zero output for negative values.
6137    */
6138   arm_status arm_sqrt_q15(
6139   q15_t in,
6140   q15_t * pOut);
6141 
6142   /**
6143    * @} end of SQRT group
6144    */
6145 
6146 
6147 
6148 
6149 
6150 
6151   /**
6152    * @brief floating-point Circular write function.
6153    */
6154 
arm_circularWrite_f32(int32_t * circBuffer,int32_t L,uint16_t * writeOffset,int32_t bufferInc,const int32_t * src,int32_t srcInc,uint32_t blockSize)6155   static __INLINE void arm_circularWrite_f32(
6156   int32_t * circBuffer,
6157   int32_t L,
6158   uint16_t * writeOffset,
6159   int32_t bufferInc,
6160   const int32_t * src,
6161   int32_t srcInc,
6162   uint32_t blockSize)
6163   {
6164     uint32_t i = 0u;
6165     int32_t wOffset;
6166 
6167     /* Copy the value of Index pointer that points
6168      * to the current location where the input samples to be copied */
6169     wOffset = *writeOffset;
6170 
6171     /* Loop over the blockSize */
6172     i = blockSize;
6173 
6174     while(i > 0u)
6175     {
6176       /* copy the input sample to the circular buffer */
6177       circBuffer[wOffset] = *src;
6178 
6179       /* Update the input pointer */
6180       src += srcInc;
6181 
6182       /* Circularly update wOffset.  Watch out for positive and negative value */
6183       wOffset += bufferInc;
6184       if(wOffset >= L)
6185         wOffset -= L;
6186 
6187       /* Decrement the loop counter */
6188       i--;
6189     }
6190 
6191     /* Update the index pointer */
6192     *writeOffset = wOffset;
6193   }
6194 
6195 
6196 
6197   /**
6198    * @brief floating-point Circular Read function.
6199    */
arm_circularRead_f32(int32_t * circBuffer,int32_t L,int32_t * readOffset,int32_t bufferInc,int32_t * dst,int32_t * dst_base,int32_t dst_length,int32_t dstInc,uint32_t blockSize)6200   static __INLINE void arm_circularRead_f32(
6201   int32_t * circBuffer,
6202   int32_t L,
6203   int32_t * readOffset,
6204   int32_t bufferInc,
6205   int32_t * dst,
6206   int32_t * dst_base,
6207   int32_t dst_length,
6208   int32_t dstInc,
6209   uint32_t blockSize)
6210   {
6211     uint32_t i = 0u;
6212     int32_t rOffset, dst_end;
6213 
6214     /* Copy the value of Index pointer that points
6215      * to the current location from where the input samples to be read */
6216     rOffset = *readOffset;
6217     dst_end = (int32_t) (dst_base + dst_length);
6218 
6219     /* Loop over the blockSize */
6220     i = blockSize;
6221 
6222     while(i > 0u)
6223     {
6224       /* copy the sample from the circular buffer to the destination buffer */
6225       *dst = circBuffer[rOffset];
6226 
6227       /* Update the input pointer */
6228       dst += dstInc;
6229 
6230       if(dst == (int32_t *) dst_end)
6231       {
6232         dst = dst_base;
6233       }
6234 
6235       /* Circularly update rOffset.  Watch out for positive and negative value  */
6236       rOffset += bufferInc;
6237 
6238       if(rOffset >= L)
6239       {
6240         rOffset -= L;
6241       }
6242 
6243       /* Decrement the loop counter */
6244       i--;
6245     }
6246 
6247     /* Update the index pointer */
6248     *readOffset = rOffset;
6249   }
6250 
6251   /**
6252    * @brief Q15 Circular write function.
6253    */
6254 
arm_circularWrite_q15(q15_t * circBuffer,int32_t L,uint16_t * writeOffset,int32_t bufferInc,const q15_t * src,int32_t srcInc,uint32_t blockSize)6255   static __INLINE void arm_circularWrite_q15(
6256   q15_t * circBuffer,
6257   int32_t L,
6258   uint16_t * writeOffset,
6259   int32_t bufferInc,
6260   const q15_t * src,
6261   int32_t srcInc,
6262   uint32_t blockSize)
6263   {
6264     uint32_t i = 0u;
6265     int32_t wOffset;
6266 
6267     /* Copy the value of Index pointer that points
6268      * to the current location where the input samples to be copied */
6269     wOffset = *writeOffset;
6270 
6271     /* Loop over the blockSize */
6272     i = blockSize;
6273 
6274     while(i > 0u)
6275     {
6276       /* copy the input sample to the circular buffer */
6277       circBuffer[wOffset] = *src;
6278 
6279       /* Update the input pointer */
6280       src += srcInc;
6281 
6282       /* Circularly update wOffset.  Watch out for positive and negative value */
6283       wOffset += bufferInc;
6284       if(wOffset >= L)
6285         wOffset -= L;
6286 
6287       /* Decrement the loop counter */
6288       i--;
6289     }
6290 
6291     /* Update the index pointer */
6292     *writeOffset = wOffset;
6293   }
6294 
6295 
6296 
6297   /**
6298    * @brief Q15 Circular Read function.
6299    */
arm_circularRead_q15(q15_t * circBuffer,int32_t L,int32_t * readOffset,int32_t bufferInc,q15_t * dst,q15_t * dst_base,int32_t dst_length,int32_t dstInc,uint32_t blockSize)6300   static __INLINE void arm_circularRead_q15(
6301   q15_t * circBuffer,
6302   int32_t L,
6303   int32_t * readOffset,
6304   int32_t bufferInc,
6305   q15_t * dst,
6306   q15_t * dst_base,
6307   int32_t dst_length,
6308   int32_t dstInc,
6309   uint32_t blockSize)
6310   {
6311     uint32_t i = 0;
6312     int32_t rOffset, dst_end;
6313 
6314     /* Copy the value of Index pointer that points
6315      * to the current location from where the input samples to be read */
6316     rOffset = *readOffset;
6317 
6318     dst_end = (int32_t) (dst_base + dst_length);
6319 
6320     /* Loop over the blockSize */
6321     i = blockSize;
6322 
6323     while(i > 0u)
6324     {
6325       /* copy the sample from the circular buffer to the destination buffer */
6326       *dst = circBuffer[rOffset];
6327 
6328       /* Update the input pointer */
6329       dst += dstInc;
6330 
6331       if(dst == (q15_t *) dst_end)
6332       {
6333         dst = dst_base;
6334       }
6335 
6336       /* Circularly update wOffset.  Watch out for positive and negative value */
6337       rOffset += bufferInc;
6338 
6339       if(rOffset >= L)
6340       {
6341         rOffset -= L;
6342       }
6343 
6344       /* Decrement the loop counter */
6345       i--;
6346     }
6347 
6348     /* Update the index pointer */
6349     *readOffset = rOffset;
6350   }
6351 
6352 
6353   /**
6354    * @brief Q7 Circular write function.
6355    */
6356 
arm_circularWrite_q7(q7_t * circBuffer,int32_t L,uint16_t * writeOffset,int32_t bufferInc,const q7_t * src,int32_t srcInc,uint32_t blockSize)6357   static __INLINE void arm_circularWrite_q7(
6358   q7_t * circBuffer,
6359   int32_t L,
6360   uint16_t * writeOffset,
6361   int32_t bufferInc,
6362   const q7_t * src,
6363   int32_t srcInc,
6364   uint32_t blockSize)
6365   {
6366     uint32_t i = 0u;
6367     int32_t wOffset;
6368 
6369     /* Copy the value of Index pointer that points
6370      * to the current location where the input samples to be copied */
6371     wOffset = *writeOffset;
6372 
6373     /* Loop over the blockSize */
6374     i = blockSize;
6375 
6376     while(i > 0u)
6377     {
6378       /* copy the input sample to the circular buffer */
6379       circBuffer[wOffset] = *src;
6380 
6381       /* Update the input pointer */
6382       src += srcInc;
6383 
6384       /* Circularly update wOffset.  Watch out for positive and negative value */
6385       wOffset += bufferInc;
6386       if(wOffset >= L)
6387         wOffset -= L;
6388 
6389       /* Decrement the loop counter */
6390       i--;
6391     }
6392 
6393     /* Update the index pointer */
6394     *writeOffset = wOffset;
6395   }
6396 
6397 
6398 
6399   /**
6400    * @brief Q7 Circular Read function.
6401    */
arm_circularRead_q7(q7_t * circBuffer,int32_t L,int32_t * readOffset,int32_t bufferInc,q7_t * dst,q7_t * dst_base,int32_t dst_length,int32_t dstInc,uint32_t blockSize)6402   static __INLINE void arm_circularRead_q7(
6403   q7_t * circBuffer,
6404   int32_t L,
6405   int32_t * readOffset,
6406   int32_t bufferInc,
6407   q7_t * dst,
6408   q7_t * dst_base,
6409   int32_t dst_length,
6410   int32_t dstInc,
6411   uint32_t blockSize)
6412   {
6413     uint32_t i = 0;
6414     int32_t rOffset, dst_end;
6415 
6416     /* Copy the value of Index pointer that points
6417      * to the current location from where the input samples to be read */
6418     rOffset = *readOffset;
6419 
6420     dst_end = (int32_t) (dst_base + dst_length);
6421 
6422     /* Loop over the blockSize */
6423     i = blockSize;
6424 
6425     while(i > 0u)
6426     {
6427       /* copy the sample from the circular buffer to the destination buffer */
6428       *dst = circBuffer[rOffset];
6429 
6430       /* Update the input pointer */
6431       dst += dstInc;
6432 
6433       if(dst == (q7_t *) dst_end)
6434       {
6435         dst = dst_base;
6436       }
6437 
6438       /* Circularly update rOffset.  Watch out for positive and negative value */
6439       rOffset += bufferInc;
6440 
6441       if(rOffset >= L)
6442       {
6443         rOffset -= L;
6444       }
6445 
6446       /* Decrement the loop counter */
6447       i--;
6448     }
6449 
6450     /* Update the index pointer */
6451     *readOffset = rOffset;
6452   }
6453 
6454 
6455   /**
6456    * @brief  Sum of the squares of the elements of a Q31 vector.
6457    * @param[in]  *pSrc is input pointer
6458    * @param[in]  blockSize is the number of samples to process
6459    * @param[out]  *pResult is output value.
6460    * @return none.
6461    */
6462 
6463   void arm_power_q31(
6464   q31_t * pSrc,
6465   uint32_t blockSize,
6466   q63_t * pResult);
6467 
6468   /**
6469    * @brief  Sum of the squares of the elements of a floating-point vector.
6470    * @param[in]  *pSrc is input pointer
6471    * @param[in]  blockSize is the number of samples to process
6472    * @param[out]  *pResult is output value.
6473    * @return none.
6474    */
6475 
6476   void arm_power_f32(
6477   float32_t * pSrc,
6478   uint32_t blockSize,
6479   float32_t * pResult);
6480 
6481   /**
6482    * @brief  Sum of the squares of the elements of a Q15 vector.
6483    * @param[in]  *pSrc is input pointer
6484    * @param[in]  blockSize is the number of samples to process
6485    * @param[out]  *pResult is output value.
6486    * @return none.
6487    */
6488 
6489   void arm_power_q15(
6490   q15_t * pSrc,
6491   uint32_t blockSize,
6492   q63_t * pResult);
6493 
6494   /**
6495    * @brief  Sum of the squares of the elements of a Q7 vector.
6496    * @param[in]  *pSrc is input pointer
6497    * @param[in]  blockSize is the number of samples to process
6498    * @param[out]  *pResult is output value.
6499    * @return none.
6500    */
6501 
6502   void arm_power_q7(
6503   q7_t * pSrc,
6504   uint32_t blockSize,
6505   q31_t * pResult);
6506 
6507   /**
6508    * @brief  Mean value of a Q7 vector.
6509    * @param[in]  *pSrc is input pointer
6510    * @param[in]  blockSize is the number of samples to process
6511    * @param[out]  *pResult is output value.
6512    * @return none.
6513    */
6514 
6515   void arm_mean_q7(
6516   q7_t * pSrc,
6517   uint32_t blockSize,
6518   q7_t * pResult);
6519 
6520   /**
6521    * @brief  Mean value of a Q15 vector.
6522    * @param[in]  *pSrc is input pointer
6523    * @param[in]  blockSize is the number of samples to process
6524    * @param[out]  *pResult is output value.
6525    * @return none.
6526    */
6527   void arm_mean_q15(
6528   q15_t * pSrc,
6529   uint32_t blockSize,
6530   q15_t * pResult);
6531 
6532   /**
6533    * @brief  Mean value of a Q31 vector.
6534    * @param[in]  *pSrc is input pointer
6535    * @param[in]  blockSize is the number of samples to process
6536    * @param[out]  *pResult is output value.
6537    * @return none.
6538    */
6539   void arm_mean_q31(
6540   q31_t * pSrc,
6541   uint32_t blockSize,
6542   q31_t * pResult);
6543 
6544   /**
6545    * @brief  Mean value of a floating-point vector.
6546    * @param[in]  *pSrc is input pointer
6547    * @param[in]  blockSize is the number of samples to process
6548    * @param[out]  *pResult is output value.
6549    * @return none.
6550    */
6551   void arm_mean_f32(
6552   float32_t * pSrc,
6553   uint32_t blockSize,
6554   float32_t * pResult);
6555 
6556   /**
6557    * @brief  Variance of the elements of a floating-point vector.
6558    * @param[in]  *pSrc is input pointer
6559    * @param[in]  blockSize is the number of samples to process
6560    * @param[out]  *pResult is output value.
6561    * @return none.
6562    */
6563 
6564   void arm_var_f32(
6565   float32_t * pSrc,
6566   uint32_t blockSize,
6567   float32_t * pResult);
6568 
6569   /**
6570    * @brief  Variance of the elements of a Q31 vector.
6571    * @param[in]  *pSrc is input pointer
6572    * @param[in]  blockSize is the number of samples to process
6573    * @param[out]  *pResult is output value.
6574    * @return none.
6575    */
6576 
6577   void arm_var_q31(
6578   q31_t * pSrc,
6579   uint32_t blockSize,
6580   q31_t * pResult);
6581 
6582   /**
6583    * @brief  Variance of the elements of a Q15 vector.
6584    * @param[in]  *pSrc is input pointer
6585    * @param[in]  blockSize is the number of samples to process
6586    * @param[out]  *pResult is output value.
6587    * @return none.
6588    */
6589 
6590   void arm_var_q15(
6591   q15_t * pSrc,
6592   uint32_t blockSize,
6593   q15_t * pResult);
6594 
6595   /**
6596    * @brief  Root Mean Square of the elements of a floating-point vector.
6597    * @param[in]  *pSrc is input pointer
6598    * @param[in]  blockSize is the number of samples to process
6599    * @param[out]  *pResult is output value.
6600    * @return none.
6601    */
6602 
6603   void arm_rms_f32(
6604   float32_t * pSrc,
6605   uint32_t blockSize,
6606   float32_t * pResult);
6607 
6608   /**
6609    * @brief  Root Mean Square of the elements of a Q31 vector.
6610    * @param[in]  *pSrc is input pointer
6611    * @param[in]  blockSize is the number of samples to process
6612    * @param[out]  *pResult is output value.
6613    * @return none.
6614    */
6615 
6616   void arm_rms_q31(
6617   q31_t * pSrc,
6618   uint32_t blockSize,
6619   q31_t * pResult);
6620 
6621   /**
6622    * @brief  Root Mean Square of the elements of a Q15 vector.
6623    * @param[in]  *pSrc is input pointer
6624    * @param[in]  blockSize is the number of samples to process
6625    * @param[out]  *pResult is output value.
6626    * @return none.
6627    */
6628 
6629   void arm_rms_q15(
6630   q15_t * pSrc,
6631   uint32_t blockSize,
6632   q15_t * pResult);
6633 
6634   /**
6635    * @brief  Standard deviation of the elements of a floating-point vector.
6636    * @param[in]  *pSrc is input pointer
6637    * @param[in]  blockSize is the number of samples to process
6638    * @param[out]  *pResult is output value.
6639    * @return none.
6640    */
6641 
6642   void arm_std_f32(
6643   float32_t * pSrc,
6644   uint32_t blockSize,
6645   float32_t * pResult);
6646 
6647   /**
6648    * @brief  Standard deviation of the elements of a Q31 vector.
6649    * @param[in]  *pSrc is input pointer
6650    * @param[in]  blockSize is the number of samples to process
6651    * @param[out]  *pResult is output value.
6652    * @return none.
6653    */
6654 
6655   void arm_std_q31(
6656   q31_t * pSrc,
6657   uint32_t blockSize,
6658   q31_t * pResult);
6659 
6660   /**
6661    * @brief  Standard deviation of the elements of a Q15 vector.
6662    * @param[in]  *pSrc is input pointer
6663    * @param[in]  blockSize is the number of samples to process
6664    * @param[out]  *pResult is output value.
6665    * @return none.
6666    */
6667 
6668   void arm_std_q15(
6669   q15_t * pSrc,
6670   uint32_t blockSize,
6671   q15_t * pResult);
6672 
6673   /**
6674    * @brief  Floating-point complex magnitude
6675    * @param[in]  *pSrc points to the complex input vector
6676    * @param[out]  *pDst points to the real output vector
6677    * @param[in]  numSamples number of complex samples in the input vector
6678    * @return none.
6679    */
6680 
6681   void arm_cmplx_mag_f32(
6682   float32_t * pSrc,
6683   float32_t * pDst,
6684   uint32_t numSamples);
6685 
6686   /**
6687    * @brief  Q31 complex magnitude
6688    * @param[in]  *pSrc points to the complex input vector
6689    * @param[out]  *pDst points to the real output vector
6690    * @param[in]  numSamples number of complex samples in the input vector
6691    * @return none.
6692    */
6693 
6694   void arm_cmplx_mag_q31(
6695   q31_t * pSrc,
6696   q31_t * pDst,
6697   uint32_t numSamples);
6698 
6699   /**
6700    * @brief  Q15 complex magnitude
6701    * @param[in]  *pSrc points to the complex input vector
6702    * @param[out]  *pDst points to the real output vector
6703    * @param[in]  numSamples number of complex samples in the input vector
6704    * @return none.
6705    */
6706 
6707   void arm_cmplx_mag_q15(
6708   q15_t * pSrc,
6709   q15_t * pDst,
6710   uint32_t numSamples);
6711 
6712   /**
6713    * @brief  Q15 complex dot product
6714    * @param[in]  *pSrcA points to the first input vector
6715    * @param[in]  *pSrcB points to the second input vector
6716    * @param[in]  numSamples number of complex samples in each vector
6717    * @param[out]  *realResult real part of the result returned here
6718    * @param[out]  *imagResult imaginary part of the result returned here
6719    * @return none.
6720    */
6721 
6722   void arm_cmplx_dot_prod_q15(
6723   q15_t * pSrcA,
6724   q15_t * pSrcB,
6725   uint32_t numSamples,
6726   q31_t * realResult,
6727   q31_t * imagResult);
6728 
6729   /**
6730    * @brief  Q31 complex dot product
6731    * @param[in]  *pSrcA points to the first input vector
6732    * @param[in]  *pSrcB points to the second input vector
6733    * @param[in]  numSamples number of complex samples in each vector
6734    * @param[out]  *realResult real part of the result returned here
6735    * @param[out]  *imagResult imaginary part of the result returned here
6736    * @return none.
6737    */
6738 
6739   void arm_cmplx_dot_prod_q31(
6740   q31_t * pSrcA,
6741   q31_t * pSrcB,
6742   uint32_t numSamples,
6743   q63_t * realResult,
6744   q63_t * imagResult);
6745 
6746   /**
6747    * @brief  Floating-point complex dot product
6748    * @param[in]  *pSrcA points to the first input vector
6749    * @param[in]  *pSrcB points to the second input vector
6750    * @param[in]  numSamples number of complex samples in each vector
6751    * @param[out]  *realResult real part of the result returned here
6752    * @param[out]  *imagResult imaginary part of the result returned here
6753    * @return none.
6754    */
6755 
6756   void arm_cmplx_dot_prod_f32(
6757   float32_t * pSrcA,
6758   float32_t * pSrcB,
6759   uint32_t numSamples,
6760   float32_t * realResult,
6761   float32_t * imagResult);
6762 
6763   /**
6764    * @brief  Q15 complex-by-real multiplication
6765    * @param[in]  *pSrcCmplx points to the complex input vector
6766    * @param[in]  *pSrcReal points to the real input vector
6767    * @param[out]  *pCmplxDst points to the complex output vector
6768    * @param[in]  numSamples number of samples in each vector
6769    * @return none.
6770    */
6771 
6772   void arm_cmplx_mult_real_q15(
6773   q15_t * pSrcCmplx,
6774   q15_t * pSrcReal,
6775   q15_t * pCmplxDst,
6776   uint32_t numSamples);
6777 
6778   /**
6779    * @brief  Q31 complex-by-real multiplication
6780    * @param[in]  *pSrcCmplx points to the complex input vector
6781    * @param[in]  *pSrcReal points to the real input vector
6782    * @param[out]  *pCmplxDst points to the complex output vector
6783    * @param[in]  numSamples number of samples in each vector
6784    * @return none.
6785    */
6786 
6787   void arm_cmplx_mult_real_q31(
6788   q31_t * pSrcCmplx,
6789   q31_t * pSrcReal,
6790   q31_t * pCmplxDst,
6791   uint32_t numSamples);
6792 
6793   /**
6794    * @brief  Floating-point complex-by-real multiplication
6795    * @param[in]  *pSrcCmplx points to the complex input vector
6796    * @param[in]  *pSrcReal points to the real input vector
6797    * @param[out]  *pCmplxDst points to the complex output vector
6798    * @param[in]  numSamples number of samples in each vector
6799    * @return none.
6800    */
6801 
6802   void arm_cmplx_mult_real_f32(
6803   float32_t * pSrcCmplx,
6804   float32_t * pSrcReal,
6805   float32_t * pCmplxDst,
6806   uint32_t numSamples);
6807 
6808   /**
6809    * @brief  Minimum value of a Q7 vector.
6810    * @param[in]  *pSrc is input pointer
6811    * @param[in]  blockSize is the number of samples to process
6812    * @param[out]  *result is output pointer
6813    * @param[in]  index is the array index of the minimum value in the input buffer.
6814    * @return none.
6815    */
6816 
6817   void arm_min_q7(
6818   q7_t * pSrc,
6819   uint32_t blockSize,
6820   q7_t * result,
6821   uint32_t * index);
6822 
6823   /**
6824    * @brief  Minimum value of a Q15 vector.
6825    * @param[in]  *pSrc is input pointer
6826    * @param[in]  blockSize is the number of samples to process
6827    * @param[out]  *pResult is output pointer
6828    * @param[in]  *pIndex is the array index of the minimum value in the input buffer.
6829    * @return none.
6830    */
6831 
6832   void arm_min_q15(
6833   q15_t * pSrc,
6834   uint32_t blockSize,
6835   q15_t * pResult,
6836   uint32_t * pIndex);
6837 
6838   /**
6839    * @brief  Minimum value of a Q31 vector.
6840    * @param[in]  *pSrc is input pointer
6841    * @param[in]  blockSize is the number of samples to process
6842    * @param[out]  *pResult is output pointer
6843    * @param[out]  *pIndex is the array index of the minimum value in the input buffer.
6844    * @return none.
6845    */
6846   void arm_min_q31(
6847   q31_t * pSrc,
6848   uint32_t blockSize,
6849   q31_t * pResult,
6850   uint32_t * pIndex);
6851 
6852   /**
6853    * @brief  Minimum value of a floating-point vector.
6854    * @param[in]  *pSrc is input pointer
6855    * @param[in]  blockSize is the number of samples to process
6856    * @param[out]  *pResult is output pointer
6857    * @param[out]  *pIndex is the array index of the minimum value in the input buffer.
6858    * @return none.
6859    */
6860 
6861   void arm_min_f32(
6862   float32_t * pSrc,
6863   uint32_t blockSize,
6864   float32_t * pResult,
6865   uint32_t * pIndex);
6866 
6867 /**
6868  * @brief Maximum value of a Q7 vector.
6869  * @param[in]       *pSrc points to the input buffer
6870  * @param[in]       blockSize length of the input vector
6871  * @param[out]      *pResult maximum value returned here
6872  * @param[out]      *pIndex index of maximum value returned here
6873  * @return none.
6874  */
6875 
6876   void arm_max_q7(
6877   q7_t * pSrc,
6878   uint32_t blockSize,
6879   q7_t * pResult,
6880   uint32_t * pIndex);
6881 
6882 /**
6883  * @brief Maximum value of a Q15 vector.
6884  * @param[in]       *pSrc points to the input buffer
6885  * @param[in]       blockSize length of the input vector
6886  * @param[out]      *pResult maximum value returned here
6887  * @param[out]      *pIndex index of maximum value returned here
6888  * @return none.
6889  */
6890 
6891   void arm_max_q15(
6892   q15_t * pSrc,
6893   uint32_t blockSize,
6894   q15_t * pResult,
6895   uint32_t * pIndex);
6896 
6897 /**
6898  * @brief Maximum value of a Q31 vector.
6899  * @param[in]       *pSrc points to the input buffer
6900  * @param[in]       blockSize length of the input vector
6901  * @param[out]      *pResult maximum value returned here
6902  * @param[out]      *pIndex index of maximum value returned here
6903  * @return none.
6904  */
6905 
6906   void arm_max_q31(
6907   q31_t * pSrc,
6908   uint32_t blockSize,
6909   q31_t * pResult,
6910   uint32_t * pIndex);
6911 
6912 /**
6913  * @brief Maximum value of a floating-point vector.
6914  * @param[in]       *pSrc points to the input buffer
6915  * @param[in]       blockSize length of the input vector
6916  * @param[out]      *pResult maximum value returned here
6917  * @param[out]      *pIndex index of maximum value returned here
6918  * @return none.
6919  */
6920 
6921   void arm_max_f32(
6922   float32_t * pSrc,
6923   uint32_t blockSize,
6924   float32_t * pResult,
6925   uint32_t * pIndex);
6926 
6927   /**
6928    * @brief  Q15 complex-by-complex multiplication
6929    * @param[in]  *pSrcA points to the first input vector
6930    * @param[in]  *pSrcB points to the second input vector
6931    * @param[out]  *pDst  points to the output vector
6932    * @param[in]  numSamples number of complex samples in each vector
6933    * @return none.
6934    */
6935 
6936   void arm_cmplx_mult_cmplx_q15(
6937   q15_t * pSrcA,
6938   q15_t * pSrcB,
6939   q15_t * pDst,
6940   uint32_t numSamples);
6941 
6942   /**
6943    * @brief  Q31 complex-by-complex multiplication
6944    * @param[in]  *pSrcA points to the first input vector
6945    * @param[in]  *pSrcB points to the second input vector
6946    * @param[out]  *pDst  points to the output vector
6947    * @param[in]  numSamples number of complex samples in each vector
6948    * @return none.
6949    */
6950 
6951   void arm_cmplx_mult_cmplx_q31(
6952   q31_t * pSrcA,
6953   q31_t * pSrcB,
6954   q31_t * pDst,
6955   uint32_t numSamples);
6956 
6957   /**
6958    * @brief  Floating-point complex-by-complex multiplication
6959    * @param[in]  *pSrcA points to the first input vector
6960    * @param[in]  *pSrcB points to the second input vector
6961    * @param[out]  *pDst  points to the output vector
6962    * @param[in]  numSamples number of complex samples in each vector
6963    * @return none.
6964    */
6965 
6966   void arm_cmplx_mult_cmplx_f32(
6967   float32_t * pSrcA,
6968   float32_t * pSrcB,
6969   float32_t * pDst,
6970   uint32_t numSamples);
6971 
6972   /**
6973    * @brief Converts the elements of the floating-point vector to Q31 vector.
6974    * @param[in]       *pSrc points to the floating-point input vector
6975    * @param[out]      *pDst points to the Q31 output vector
6976    * @param[in]       blockSize length of the input vector
6977    * @return none.
6978    */
6979   void arm_float_to_q31(
6980   float32_t * pSrc,
6981   q31_t * pDst,
6982   uint32_t blockSize);
6983 
6984   /**
6985    * @brief Converts the elements of the floating-point vector to Q15 vector.
6986    * @param[in]       *pSrc points to the floating-point input vector
6987    * @param[out]      *pDst points to the Q15 output vector
6988    * @param[in]       blockSize length of the input vector
6989    * @return          none
6990    */
6991   void arm_float_to_q15(
6992   float32_t * pSrc,
6993   q15_t * pDst,
6994   uint32_t blockSize);
6995 
6996   /**
6997    * @brief Converts the elements of the floating-point vector to Q7 vector.
6998    * @param[in]       *pSrc points to the floating-point input vector
6999    * @param[out]      *pDst points to the Q7 output vector
7000    * @param[in]       blockSize length of the input vector
7001    * @return          none
7002    */
7003   void arm_float_to_q7(
7004   float32_t * pSrc,
7005   q7_t * pDst,
7006   uint32_t blockSize);
7007 
7008 
7009   /**
7010    * @brief  Converts the elements of the Q31 vector to Q15 vector.
7011    * @param[in]  *pSrc is input pointer
7012    * @param[out]  *pDst is output pointer
7013    * @param[in]  blockSize is the number of samples to process
7014    * @return none.
7015    */
7016   void arm_q31_to_q15(
7017   q31_t * pSrc,
7018   q15_t * pDst,
7019   uint32_t blockSize);
7020 
7021   /**
7022    * @brief  Converts the elements of the Q31 vector to Q7 vector.
7023    * @param[in]  *pSrc is input pointer
7024    * @param[out]  *pDst is output pointer
7025    * @param[in]  blockSize is the number of samples to process
7026    * @return none.
7027    */
7028   void arm_q31_to_q7(
7029   q31_t * pSrc,
7030   q7_t * pDst,
7031   uint32_t blockSize);
7032 
7033   /**
7034    * @brief  Converts the elements of the Q15 vector to floating-point vector.
7035    * @param[in]  *pSrc is input pointer
7036    * @param[out]  *pDst is output pointer
7037    * @param[in]  blockSize is the number of samples to process
7038    * @return none.
7039    */
7040   void arm_q15_to_float(
7041   q15_t * pSrc,
7042   float32_t * pDst,
7043   uint32_t blockSize);
7044 
7045 
7046   /**
7047    * @brief  Converts the elements of the Q15 vector to Q31 vector.
7048    * @param[in]  *pSrc is input pointer
7049    * @param[out]  *pDst is output pointer
7050    * @param[in]  blockSize is the number of samples to process
7051    * @return none.
7052    */
7053   void arm_q15_to_q31(
7054   q15_t * pSrc,
7055   q31_t * pDst,
7056   uint32_t blockSize);
7057 
7058 
7059   /**
7060    * @brief  Converts the elements of the Q15 vector to Q7 vector.
7061    * @param[in]  *pSrc is input pointer
7062    * @param[out]  *pDst is output pointer
7063    * @param[in]  blockSize is the number of samples to process
7064    * @return none.
7065    */
7066   void arm_q15_to_q7(
7067   q15_t * pSrc,
7068   q7_t * pDst,
7069   uint32_t blockSize);
7070 
7071 
7072   /**
7073    * @ingroup groupInterpolation
7074    */
7075 
7076   /**
7077    * @defgroup BilinearInterpolate Bilinear Interpolation
7078    *
7079    * Bilinear interpolation is an extension of linear interpolation applied to a two dimensional grid.
7080    * The underlying function <code>f(x, y)</code> is sampled on a regular grid and the interpolation process
7081    * determines values between the grid points.
7082    * Bilinear interpolation is equivalent to two step linear interpolation, first in the x-dimension and then in the y-dimension.
7083    * Bilinear interpolation is often used in image processing to rescale images.
7084    * The CMSIS DSP library provides bilinear interpolation functions for Q7, Q15, Q31, and floating-point data types.
7085    *
7086    * <b>Algorithm</b>
7087    * \par
7088    * The instance structure used by the bilinear interpolation functions describes a two dimensional data table.
7089    * For floating-point, the instance structure is defined as:
7090    * <pre>
7091    *   typedef struct
7092    *   {
7093    *     uint16_t numRows;
7094    *     uint16_t numCols;
7095    *     float32_t *pData;
7096    * } arm_bilinear_interp_instance_f32;
7097    * </pre>
7098    *
7099    * \par
7100    * where <code>numRows</code> specifies the number of rows in the table;
7101    * <code>numCols</code> specifies the number of columns in the table;
7102    * and <code>pData</code> points to an array of size <code>numRows*numCols</code> values.
7103    * The data table <code>pTable</code> is organized in row order and the supplied data values fall on integer indexes.
7104    * That is, table element (x,y) is located at <code>pTable[x + y*numCols]</code> where x and y are integers.
7105    *
7106    * \par
7107    * Let <code>(x, y)</code> specify the desired interpolation point.  Then define:
7108    * <pre>
7109    *     XF = floor(x)
7110    *     YF = floor(y)
7111    * </pre>
7112    * \par
7113    * The interpolated output point is computed as:
7114    * <pre>
7115    *  f(x, y) = f(XF, YF) * (1-(x-XF)) * (1-(y-YF))
7116    *           + f(XF+1, YF) * (x-XF)*(1-(y-YF))
7117    *           + f(XF, YF+1) * (1-(x-XF))*(y-YF)
7118    *           + f(XF+1, YF+1) * (x-XF)*(y-YF)
7119    * </pre>
7120    * Note that the coordinates (x, y) contain integer and fractional components.
7121    * The integer components specify which portion of the table to use while the
7122    * fractional components control the interpolation processor.
7123    *
7124    * \par
7125    * if (x,y) are outside of the table boundary, Bilinear interpolation returns zero output.
7126    */
7127 
7128   /**
7129    * @addtogroup BilinearInterpolate
7130    * @{
7131    */
7132 
7133   /**
7134   *
7135   * @brief  Floating-point bilinear interpolation.
7136   * @param[in,out] *S points to an instance of the interpolation structure.
7137   * @param[in] X interpolation coordinate.
7138   * @param[in] Y interpolation coordinate.
7139   * @return out interpolated value.
7140   */
7141 
7142 
arm_bilinear_interp_f32(const arm_bilinear_interp_instance_f32 * S,float32_t X,float32_t Y)7143   static __INLINE float32_t arm_bilinear_interp_f32(
7144   const arm_bilinear_interp_instance_f32 * S,
7145   float32_t X,
7146   float32_t Y)
7147   {
7148     float32_t out;
7149     float32_t f00, f01, f10, f11;
7150     float32_t *pData = S->pData;
7151     int32_t xIndex, yIndex, index;
7152     float32_t xdiff, ydiff;
7153     float32_t b1, b2, b3, b4;
7154 
7155     xIndex = (int32_t) X;
7156     yIndex = (int32_t) Y;
7157 
7158     /* Care taken for table outside boundary */
7159     /* Returns zero output when values are outside table boundary */
7160     if(xIndex < 0 || xIndex > (S->numRows - 1) || yIndex < 0
7161        || yIndex > (S->numCols - 1))
7162     {
7163       return (0);
7164     }
7165 
7166     /* Calculation of index for two nearest points in X-direction */
7167     index = (xIndex - 1) + (yIndex - 1) * S->numCols;
7168 
7169 
7170     /* Read two nearest points in X-direction */
7171     f00 = pData[index];
7172     f01 = pData[index + 1];
7173 
7174     /* Calculation of index for two nearest points in Y-direction */
7175     index = (xIndex - 1) + (yIndex) * S->numCols;
7176 
7177 
7178     /* Read two nearest points in Y-direction */
7179     f10 = pData[index];
7180     f11 = pData[index + 1];
7181 
7182     /* Calculation of intermediate values */
7183     b1 = f00;
7184     b2 = f01 - f00;
7185     b3 = f10 - f00;
7186     b4 = f00 - f01 - f10 + f11;
7187 
7188     /* Calculation of fractional part in X */
7189     xdiff = X - xIndex;
7190 
7191     /* Calculation of fractional part in Y */
7192     ydiff = Y - yIndex;
7193 
7194     /* Calculation of bi-linear interpolated output */
7195     out = b1 + b2 * xdiff + b3 * ydiff + b4 * xdiff * ydiff;
7196 
7197     /* return to application */
7198     return (out);
7199 
7200   }
7201 
7202   /**
7203   *
7204   * @brief  Q31 bilinear interpolation.
7205   * @param[in,out] *S points to an instance of the interpolation structure.
7206   * @param[in] X interpolation coordinate in 12.20 format.
7207   * @param[in] Y interpolation coordinate in 12.20 format.
7208   * @return out interpolated value.
7209   */
7210 
arm_bilinear_interp_q31(arm_bilinear_interp_instance_q31 * S,q31_t X,q31_t Y)7211   static __INLINE q31_t arm_bilinear_interp_q31(
7212   arm_bilinear_interp_instance_q31 * S,
7213   q31_t X,
7214   q31_t Y)
7215   {
7216     q31_t out;                                   /* Temporary output */
7217     q31_t acc = 0;                               /* output */
7218     q31_t xfract, yfract;                        /* X, Y fractional parts */
7219     q31_t x1, x2, y1, y2;                        /* Nearest output values */
7220     int32_t rI, cI;                              /* Row and column indices */
7221     q31_t *pYData = S->pData;                    /* pointer to output table values */
7222     uint32_t nCols = S->numCols;                 /* num of rows */
7223 
7224 
7225     /* Input is in 12.20 format */
7226     /* 12 bits for the table index */
7227     /* Index value calculation */
7228     rI = ((X & 0xFFF00000) >> 20u);
7229 
7230     /* Input is in 12.20 format */
7231     /* 12 bits for the table index */
7232     /* Index value calculation */
7233     cI = ((Y & 0xFFF00000) >> 20u);
7234 
7235     /* Care taken for table outside boundary */
7236     /* Returns zero output when values are outside table boundary */
7237     if(rI < 0 || rI > (S->numRows - 1) || cI < 0 || cI > (S->numCols - 1))
7238     {
7239       return (0);
7240     }
7241 
7242     /* 20 bits for the fractional part */
7243     /* shift left xfract by 11 to keep 1.31 format */
7244     xfract = (X & 0x000FFFFF) << 11u;
7245 
7246     /* Read two nearest output values from the index */
7247     x1 = pYData[(rI) + nCols * (cI)];
7248     x2 = pYData[(rI) + nCols * (cI) + 1u];
7249 
7250     /* 20 bits for the fractional part */
7251     /* shift left yfract by 11 to keep 1.31 format */
7252     yfract = (Y & 0x000FFFFF) << 11u;
7253 
7254     /* Read two nearest output values from the index */
7255     y1 = pYData[(rI) + nCols * (cI + 1)];
7256     y2 = pYData[(rI) + nCols * (cI + 1) + 1u];
7257 
7258     /* Calculation of x1 * (1-xfract ) * (1-yfract) and acc is in 3.29(q29) format */
7259     out = ((q31_t) (((q63_t) x1 * (0x7FFFFFFF - xfract)) >> 32));
7260     acc = ((q31_t) (((q63_t) out * (0x7FFFFFFF - yfract)) >> 32));
7261 
7262     /* x2 * (xfract) * (1-yfract)  in 3.29(q29) and adding to acc */
7263     out = ((q31_t) ((q63_t) x2 * (0x7FFFFFFF - yfract) >> 32));
7264     acc += ((q31_t) ((q63_t) out * (xfract) >> 32));
7265 
7266     /* y1 * (1 - xfract) * (yfract)  in 3.29(q29) and adding to acc */
7267     out = ((q31_t) ((q63_t) y1 * (0x7FFFFFFF - xfract) >> 32));
7268     acc += ((q31_t) ((q63_t) out * (yfract) >> 32));
7269 
7270     /* y2 * (xfract) * (yfract)  in 3.29(q29) and adding to acc */
7271     out = ((q31_t) ((q63_t) y2 * (xfract) >> 32));
7272     acc += ((q31_t) ((q63_t) out * (yfract) >> 32));
7273 
7274     /* Convert acc to 1.31(q31) format */
7275     return (acc << 2u);
7276 
7277   }
7278 
7279   /**
7280   * @brief  Q15 bilinear interpolation.
7281   * @param[in,out] *S points to an instance of the interpolation structure.
7282   * @param[in] X interpolation coordinate in 12.20 format.
7283   * @param[in] Y interpolation coordinate in 12.20 format.
7284   * @return out interpolated value.
7285   */
7286 
arm_bilinear_interp_q15(arm_bilinear_interp_instance_q15 * S,q31_t X,q31_t Y)7287   static __INLINE q15_t arm_bilinear_interp_q15(
7288   arm_bilinear_interp_instance_q15 * S,
7289   q31_t X,
7290   q31_t Y)
7291   {
7292     q63_t acc = 0;                               /* output */
7293     q31_t out;                                   /* Temporary output */
7294     q15_t x1, x2, y1, y2;                        /* Nearest output values */
7295     q31_t xfract, yfract;                        /* X, Y fractional parts */
7296     int32_t rI, cI;                              /* Row and column indices */
7297     q15_t *pYData = S->pData;                    /* pointer to output table values */
7298     uint32_t nCols = S->numCols;                 /* num of rows */
7299 
7300     /* Input is in 12.20 format */
7301     /* 12 bits for the table index */
7302     /* Index value calculation */
7303     rI = ((X & 0xFFF00000) >> 20);
7304 
7305     /* Input is in 12.20 format */
7306     /* 12 bits for the table index */
7307     /* Index value calculation */
7308     cI = ((Y & 0xFFF00000) >> 20);
7309 
7310     /* Care taken for table outside boundary */
7311     /* Returns zero output when values are outside table boundary */
7312     if(rI < 0 || rI > (S->numRows - 1) || cI < 0 || cI > (S->numCols - 1))
7313     {
7314       return (0);
7315     }
7316 
7317     /* 20 bits for the fractional part */
7318     /* xfract should be in 12.20 format */
7319     xfract = (X & 0x000FFFFF);
7320 
7321     /* Read two nearest output values from the index */
7322     x1 = pYData[(rI) + nCols * (cI)];
7323     x2 = pYData[(rI) + nCols * (cI) + 1u];
7324 
7325 
7326     /* 20 bits for the fractional part */
7327     /* yfract should be in 12.20 format */
7328     yfract = (Y & 0x000FFFFF);
7329 
7330     /* Read two nearest output values from the index */
7331     y1 = pYData[(rI) + nCols * (cI + 1)];
7332     y2 = pYData[(rI) + nCols * (cI + 1) + 1u];
7333 
7334     /* Calculation of x1 * (1-xfract ) * (1-yfract) and acc is in 13.51 format */
7335 
7336     /* x1 is in 1.15(q15), xfract in 12.20 format and out is in 13.35 format */
7337     /* convert 13.35 to 13.31 by right shifting  and out is in 1.31 */
7338     out = (q31_t) (((q63_t) x1 * (0xFFFFF - xfract)) >> 4u);
7339     acc = ((q63_t) out * (0xFFFFF - yfract));
7340 
7341     /* x2 * (xfract) * (1-yfract)  in 1.51 and adding to acc */
7342     out = (q31_t) (((q63_t) x2 * (0xFFFFF - yfract)) >> 4u);
7343     acc += ((q63_t) out * (xfract));
7344 
7345     /* y1 * (1 - xfract) * (yfract)  in 1.51 and adding to acc */
7346     out = (q31_t) (((q63_t) y1 * (0xFFFFF - xfract)) >> 4u);
7347     acc += ((q63_t) out * (yfract));
7348 
7349     /* y2 * (xfract) * (yfract)  in 1.51 and adding to acc */
7350     out = (q31_t) (((q63_t) y2 * (xfract)) >> 4u);
7351     acc += ((q63_t) out * (yfract));
7352 
7353     /* acc is in 13.51 format and down shift acc by 36 times */
7354     /* Convert out to 1.15 format */
7355     return (acc >> 36);
7356 
7357   }
7358 
7359   /**
7360   * @brief  Q7 bilinear interpolation.
7361   * @param[in,out] *S points to an instance of the interpolation structure.
7362   * @param[in] X interpolation coordinate in 12.20 format.
7363   * @param[in] Y interpolation coordinate in 12.20 format.
7364   * @return out interpolated value.
7365   */
7366 
arm_bilinear_interp_q7(arm_bilinear_interp_instance_q7 * S,q31_t X,q31_t Y)7367   static __INLINE q7_t arm_bilinear_interp_q7(
7368   arm_bilinear_interp_instance_q7 * S,
7369   q31_t X,
7370   q31_t Y)
7371   {
7372     q63_t acc = 0;                               /* output */
7373     q31_t out;                                   /* Temporary output */
7374     q31_t xfract, yfract;                        /* X, Y fractional parts */
7375     q7_t x1, x2, y1, y2;                         /* Nearest output values */
7376     int32_t rI, cI;                              /* Row and column indices */
7377     q7_t *pYData = S->pData;                     /* pointer to output table values */
7378     uint32_t nCols = S->numCols;                 /* num of rows */
7379 
7380     /* Input is in 12.20 format */
7381     /* 12 bits for the table index */
7382     /* Index value calculation */
7383     rI = ((X & 0xFFF00000) >> 20);
7384 
7385     /* Input is in 12.20 format */
7386     /* 12 bits for the table index */
7387     /* Index value calculation */
7388     cI = ((Y & 0xFFF00000) >> 20);
7389 
7390     /* Care taken for table outside boundary */
7391     /* Returns zero output when values are outside table boundary */
7392     if(rI < 0 || rI > (S->numRows - 1) || cI < 0 || cI > (S->numCols - 1))
7393     {
7394       return (0);
7395     }
7396 
7397     /* 20 bits for the fractional part */
7398     /* xfract should be in 12.20 format */
7399     xfract = (X & 0x000FFFFF);
7400 
7401     /* Read two nearest output values from the index */
7402     x1 = pYData[(rI) + nCols * (cI)];
7403     x2 = pYData[(rI) + nCols * (cI) + 1u];
7404 
7405 
7406     /* 20 bits for the fractional part */
7407     /* yfract should be in 12.20 format */
7408     yfract = (Y & 0x000FFFFF);
7409 
7410     /* Read two nearest output values from the index */
7411     y1 = pYData[(rI) + nCols * (cI + 1)];
7412     y2 = pYData[(rI) + nCols * (cI + 1) + 1u];
7413 
7414     /* Calculation of x1 * (1-xfract ) * (1-yfract) and acc is in 16.47 format */
7415     out = ((x1 * (0xFFFFF - xfract)));
7416     acc = (((q63_t) out * (0xFFFFF - yfract)));
7417 
7418     /* x2 * (xfract) * (1-yfract)  in 2.22 and adding to acc */
7419     out = ((x2 * (0xFFFFF - yfract)));
7420     acc += (((q63_t) out * (xfract)));
7421 
7422     /* y1 * (1 - xfract) * (yfract)  in 2.22 and adding to acc */
7423     out = ((y1 * (0xFFFFF - xfract)));
7424     acc += (((q63_t) out * (yfract)));
7425 
7426     /* y2 * (xfract) * (yfract)  in 2.22 and adding to acc */
7427     out = ((y2 * (yfract)));
7428     acc += (((q63_t) out * (xfract)));
7429 
7430     /* acc in 16.47 format and down shift by 40 to convert to 1.7 format */
7431     return (acc >> 40);
7432 
7433   }
7434 
7435   /**
7436    * @} end of BilinearInterpolate group
7437    */
7438 
7439 
7440 //SMMLAR
7441 #define multAcc_32x32_keep32_R(a, x, y) \
7442     a = (q31_t) (((((q63_t) a) << 32) + ((q63_t) x * y) + 0x80000000LL ) >> 32)
7443 
7444 //SMMLSR
7445 #define multSub_32x32_keep32_R(a, x, y) \
7446     a = (q31_t) (((((q63_t) a) << 32) - ((q63_t) x * y) + 0x80000000LL ) >> 32)
7447 
7448 //SMMULR
7449 #define mult_32x32_keep32_R(a, x, y) \
7450     a = (q31_t) (((q63_t) x * y + 0x80000000LL ) >> 32)
7451 
7452 //SMMLA
7453 #define multAcc_32x32_keep32(a, x, y) \
7454     a += (q31_t) (((q63_t) x * y) >> 32)
7455 
7456 //SMMLS
7457 #define multSub_32x32_keep32(a, x, y) \
7458     a -= (q31_t) (((q63_t) x * y) >> 32)
7459 
7460 //SMMUL
7461 #define mult_32x32_keep32(a, x, y) \
7462     a = (q31_t) (((q63_t) x * y ) >> 32)
7463 
7464 
7465 #if defined ( __CC_ARM ) //Keil
7466 
7467 //Enter low optimization region - place directly above function definition
7468     #ifdef ARM_MATH_CM4
7469       #define LOW_OPTIMIZATION_ENTER \
7470          _Pragma ("push")         \
7471          _Pragma ("O1")
7472     #else
7473       #define LOW_OPTIMIZATION_ENTER
7474     #endif
7475 
7476 //Exit low optimization region - place directly after end of function definition
7477     #ifdef ARM_MATH_CM4
7478       #define LOW_OPTIMIZATION_EXIT \
7479          _Pragma ("pop")
7480     #else
7481       #define LOW_OPTIMIZATION_EXIT
7482     #endif
7483 
7484 //Enter low optimization region - place directly above function definition
7485   #define IAR_ONLY_LOW_OPTIMIZATION_ENTER
7486 
7487 //Exit low optimization region - place directly after end of function definition
7488   #define IAR_ONLY_LOW_OPTIMIZATION_EXIT
7489 
7490 #elif defined(__ICCARM__) //IAR
7491 
7492 //Enter low optimization region - place directly above function definition
7493     #ifdef ARM_MATH_CM4
7494       #define LOW_OPTIMIZATION_ENTER \
7495          _Pragma ("optimize=low")
7496     #else
7497       #define LOW_OPTIMIZATION_ENTER
7498     #endif
7499 
7500 //Exit low optimization region - place directly after end of function definition
7501   #define LOW_OPTIMIZATION_EXIT
7502 
7503 //Enter low optimization region - place directly above function definition
7504     #ifdef ARM_MATH_CM4
7505       #define IAR_ONLY_LOW_OPTIMIZATION_ENTER \
7506          _Pragma ("optimize=low")
7507     #else
7508       #define IAR_ONLY_LOW_OPTIMIZATION_ENTER
7509     #endif
7510 
7511 //Exit low optimization region - place directly after end of function definition
7512   #define IAR_ONLY_LOW_OPTIMIZATION_EXIT
7513 
7514 #elif defined(__GNUC__)
7515 
7516   #define LOW_OPTIMIZATION_ENTER __attribute__(( optimize("-O1") ))
7517 
7518   #define LOW_OPTIMIZATION_EXIT
7519 
7520   #define IAR_ONLY_LOW_OPTIMIZATION_ENTER
7521 
7522   #define IAR_ONLY_LOW_OPTIMIZATION_EXIT
7523 
7524 #elif defined(__CSMC__)		// Cosmic
7525 
7526 #define LOW_OPTIMIZATION_ENTER
7527 #define LOW_OPTIMIZATION_EXIT
7528 #define IAR_ONLY_LOW_OPTIMIZATION_ENTER
7529 #define IAR_ONLY_LOW_OPTIMIZATION_EXIT
7530 
7531 #endif
7532 
7533 
7534 #ifdef	__cplusplus
7535 }
7536 #endif
7537 
7538 
7539 #endif /* _ARM_MATH_H */
7540 
7541 /**
7542  *
7543  * End of file.
7544  */
7545