1 /* ----------------------------------------------------------------------
2 * Project: CMSIS DSP Library
3 * Title: arm_var_q15.c
4 * Description: Variance of an array of Q15 type
5 *
6 * $Date: 23 April 2021
7 * $Revision: V1.9.0
8 *
9 * Target Processor: Cortex-M and Cortex-A cores
10 * -------------------------------------------------------------------- */
11 /*
12 * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13 *
14 * SPDX-License-Identifier: Apache-2.0
15 *
16 * Licensed under the Apache License, Version 2.0 (the License); you may
17 * not use this file except in compliance with the License.
18 * You may obtain a copy of the License at
19 *
20 * www.apache.org/licenses/LICENSE-2.0
21 *
22 * Unless required by applicable law or agreed to in writing, software
23 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25 * See the License for the specific language governing permissions and
26 * limitations under the License.
27 */
28
29 #include "dsp/statistics_functions.h"
30
31 /**
32 @ingroup groupStats
33 */
34
35 /**
36 @addtogroup variance
37 @{
38 */
39
40 /**
41 @brief Variance of the elements of a Q15 vector.
42 @param[in] pSrc points to the input vector
43 @param[in] blockSize number of samples in input vector
44 @param[out] pResult variance value returned here
45 @return none
46
47 @par Scaling and Overflow Behavior
48 The function is implemented using a 64-bit internal accumulator.
49 The input is represented in 1.15 format.
50 Intermediate multiplication yields a 2.30 format, and this
51 result is added without saturation to a 64-bit accumulator in 34.30 format.
52 With 33 guard bits in the accumulator, there is no risk of overflow, and the
53 full precision of the intermediate multiplication is preserved.
54 Finally, the 34.30 result is truncated to 34.15 format by discarding the lower
55 15 bits, and then saturated to yield a result in 1.15 format.
56 */
57 #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
arm_var_q15(const q15_t * pSrc,uint32_t blockSize,q15_t * pResult)58 void arm_var_q15(
59 const q15_t * pSrc,
60 uint32_t blockSize,
61 q15_t * pResult)
62 {
63 uint32_t blkCnt; /* loop counters */
64 q15x8_t vecSrc;
65 q63_t sumOfSquares = 0LL;
66 q63_t meanOfSquares, squareOfMean; /* square of mean and mean of square */
67 q63_t sum = 0LL;
68 q15_t in;
69
70 if (blockSize <= 1U) {
71 *pResult = 0;
72 return;
73 }
74
75
76 blkCnt = blockSize >> 3;
77 while (blkCnt > 0U)
78 {
79 vecSrc = vldrhq_s16(pSrc);
80 /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */
81 /* Compute Sum of squares of the input samples
82 * and then store the result in a temporary variable, sumOfSquares. */
83
84 sumOfSquares = vmlaldavaq_s16(sumOfSquares, vecSrc, vecSrc);
85 sum = vaddvaq_s16(sum, vecSrc);
86
87 blkCnt --;
88 pSrc += 8;
89 }
90
91 /* Tail */
92 blkCnt = blockSize & 7;
93 while (blkCnt > 0U)
94 {
95 /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
96 /* C = A[0] + A[1] + ... + A[blockSize-1] */
97
98 in = *pSrc++;
99 /* Compute sum of squares and store result in a temporary variable, sumOfSquares. */
100 #if defined (ARM_MATH_DSP)
101 sumOfSquares = __SMLALD(in, in, sumOfSquares);
102 #else
103 sumOfSquares += (in * in);
104 #endif /* #if defined (ARM_MATH_DSP) */
105 /* Compute sum and store result in a temporary variable, sum. */
106 sum += in;
107
108 /* Decrement loop counter */
109 blkCnt--;
110 }
111
112 /* Compute Mean of squares of the input samples
113 * and then store the result in a temporary variable, meanOfSquares. */
114 meanOfSquares = arm_div_q63_to_q31(sumOfSquares, (blockSize - 1U));
115
116 /* Compute square of mean */
117 squareOfMean = arm_div_q63_to_q31((q63_t)sum * sum, (q31_t)(blockSize * (blockSize - 1U)));
118
119 /* mean of the squares minus the square of the mean. */
120 *pResult = (meanOfSquares - squareOfMean) >> 15;
121 }
122 #else
arm_var_q15(const q15_t * pSrc,uint32_t blockSize,q15_t * pResult)123 void arm_var_q15(
124 const q15_t * pSrc,
125 uint32_t blockSize,
126 q15_t * pResult)
127 {
128 uint32_t blkCnt; /* Loop counter */
129 q31_t sum = 0; /* Accumulator */
130 q31_t meanOfSquares, squareOfMean; /* Square of mean and mean of square */
131 q63_t sumOfSquares = 0; /* Sum of squares */
132 q15_t in; /* Temporary variable to store input value */
133
134 #if defined (ARM_MATH_LOOPUNROLL) && defined (ARM_MATH_DSP)
135 q31_t in32; /* Temporary variable to store input value */
136 #endif
137
138 if (blockSize <= 1U)
139 {
140 *pResult = 0;
141 return;
142 }
143
144 #if defined (ARM_MATH_LOOPUNROLL)
145
146 /* Loop unrolling: Compute 4 outputs at a time */
147 blkCnt = blockSize >> 2U;
148
149 while (blkCnt > 0U)
150 {
151 /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
152 /* C = A[0] + A[1] + ... + A[blockSize-1] */
153
154 /* Compute sum of squares and store result in a temporary variable, sumOfSquares. */
155 /* Compute sum and store result in a temporary variable, sum. */
156 #if defined (ARM_MATH_DSP)
157 in32 = read_q15x2_ia ((q15_t **) &pSrc);
158 sumOfSquares = __SMLALD(in32, in32, sumOfSquares);
159 sum += ((in32 << 16U) >> 16U);
160 sum += (in32 >> 16U);
161
162 in32 = read_q15x2_ia ((q15_t **) &pSrc);
163 sumOfSquares = __SMLALD(in32, in32, sumOfSquares);
164 sum += ((in32 << 16U) >> 16U);
165 sum += (in32 >> 16U);
166 #else
167 in = *pSrc++;
168 sumOfSquares += (in * in);
169 sum += in;
170
171 in = *pSrc++;
172 sumOfSquares += (in * in);
173 sum += in;
174
175 in = *pSrc++;
176 sumOfSquares += (in * in);
177 sum += in;
178
179 in = *pSrc++;
180 sumOfSquares += (in * in);
181 sum += in;
182 #endif /* #if defined (ARM_MATH_DSP) */
183
184 /* Decrement loop counter */
185 blkCnt--;
186 }
187
188 /* Loop unrolling: Compute remaining outputs */
189 blkCnt = blockSize % 0x4U;
190
191 #else
192
193 /* Initialize blkCnt with number of samples */
194 blkCnt = blockSize;
195
196 #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
197
198 while (blkCnt > 0U)
199 {
200 /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
201 /* C = A[0] + A[1] + ... + A[blockSize-1] */
202
203 in = *pSrc++;
204 /* Compute sum of squares and store result in a temporary variable, sumOfSquares. */
205 #if defined (ARM_MATH_DSP)
206 sumOfSquares = __SMLALD(in, in, sumOfSquares);
207 #else
208 sumOfSquares += (in * in);
209 #endif /* #if defined (ARM_MATH_DSP) */
210 /* Compute sum and store result in a temporary variable, sum. */
211 sum += in;
212
213 /* Decrement loop counter */
214 blkCnt--;
215 }
216
217 /* Compute Mean of squares and store result in a temporary variable, meanOfSquares. */
218 meanOfSquares = (q31_t) (sumOfSquares / (q63_t)(blockSize - 1U));
219
220 /* Compute square of mean */
221 squareOfMean = (q31_t) ((q63_t) sum * sum / (q63_t)(blockSize * (blockSize - 1U)));
222
223 /* mean of squares minus the square of mean. */
224 *pResult = (meanOfSquares - squareOfMean) >> 15U;
225 }
226 #endif /* defined(ARM_MATH_MVEI) */
227
228 /**
229 @} end of variance group
230 */
231