1 /* ----------------------------------------------------------------------
2 * Project: CMSIS DSP Library
3 * Title: arm_var_q31.c
4 * Description: Variance of an array of Q31 type
5 *
6 * $Date: 23 April 2021
7 * $Revision: V1.9.0
8 *
9 * Target Processor: Cortex-M and Cortex-A cores
10 * -------------------------------------------------------------------- */
11 /*
12 * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13 *
14 * SPDX-License-Identifier: Apache-2.0
15 *
16 * Licensed under the Apache License, Version 2.0 (the License); you may
17 * not use this file except in compliance with the License.
18 * You may obtain a copy of the License at
19 *
20 * www.apache.org/licenses/LICENSE-2.0
21 *
22 * Unless required by applicable law or agreed to in writing, software
23 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25 * See the License for the specific language governing permissions and
26 * limitations under the License.
27 */
28
29 #include "dsp/statistics_functions.h"
30
31 /**
32 @ingroup groupStats
33 */
34
35 /**
36 @addtogroup variance
37 @{
38 */
39
40 /**
41 @brief Variance of the elements of a Q31 vector.
42 @param[in] pSrc points to the input vector
43 @param[in] blockSize number of samples in input vector
44 @param[out] pResult variance value returned here
45 @return none
46
47 @par Scaling and Overflow Behavior
48 The function is implemented using an internal 64-bit accumulator.
49 The input is represented in 1.31 format, which is then downshifted by 8 bits
50 which yields 1.23, and intermediate multiplication yields a 2.46 format.
51 The accumulator maintains full precision of the intermediate multiplication results,
52 and as a consequence has only 16 guard bits.
53 There is no saturation on intermediate additions.
54 If the accumulator overflows it wraps around and distorts the result.
55 In order to avoid overflows completely the input signal must be scaled down by
56 log2(blockSize)-8 bits, as a total of blockSize additions are performed internally.
57 After division, internal variables should be Q18.46
58 Finally, the 18.46 accumulator is right shifted by 15 bits to yield a 1.31 format value.
59 */
60 #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
arm_var_q31(const q31_t * pSrc,uint32_t blockSize,q31_t * pResult)61 void arm_var_q31(
62 const q31_t * pSrc,
63 uint32_t blockSize,
64 q31_t * pResult)
65 {
66 uint32_t blkCnt; /* loop counters */
67 q31x4_t vecSrc;
68 q63_t sumOfSquares = 0LL;
69 q63_t meanOfSquares, squareOfMean; /* square of mean and mean of square */
70 q63_t sum = 0LL;
71 q31_t in;
72
73 if (blockSize <= 1U) {
74 *pResult = 0;
75 return;
76 }
77
78
79 /* Compute 4 outputs at a time */
80 blkCnt = blockSize >> 2U;
81 while (blkCnt > 0U)
82 {
83 vecSrc = vldrwq_s32(pSrc);
84 /* C = (A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1]) */
85 /* Compute Sum of squares of the input samples
86 * and then store the result in a temporary variable, sumOfSquares. */
87
88 /* downscale */
89 vecSrc = vshrq(vecSrc, 8);
90 sumOfSquares = vmlaldavaq(sumOfSquares, vecSrc, vecSrc);
91 sum = vaddlvaq(sum, vecSrc);
92
93 blkCnt --;
94 pSrc += 4;
95 }
96
97
98 /*
99 * tail
100 */
101 blkCnt = blockSize & 0x3;
102 while (blkCnt > 0U)
103 {
104 /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
105 /* C = A[0] + A[1] + ... + A[blockSize-1] */
106
107 in = *pSrc++ >> 8U;
108 /* Compute sum of squares and store result in a temporary variable, sumOfSquares. */
109 sumOfSquares += ((q63_t) (in) * (in));
110 /* Compute sum and store result in a temporary variable, sum. */
111 sum += in;
112
113 /* Decrement loop counter */
114 blkCnt--;
115 }
116
117 /* Compute Mean of squares of the input samples
118 * and then store the result in a temporary variable, meanOfSquares. */
119 meanOfSquares = sumOfSquares / (q63_t) (blockSize - 1U);
120
121 /* Compute square of mean */
122 squareOfMean = sum * sum / (q63_t) (blockSize * (blockSize - 1U));
123
124 /* Compute standard deviation and then store the result to the destination */
125 *pResult = asrl(meanOfSquares - squareOfMean, 15U);
126 }
127 #else
arm_var_q31(const q31_t * pSrc,uint32_t blockSize,q31_t * pResult)128 void arm_var_q31(
129 const q31_t * pSrc,
130 uint32_t blockSize,
131 q31_t * pResult)
132 {
133 uint32_t blkCnt; /* Loop counter */
134 q63_t sum = 0; /* Temporary result storage */
135 q63_t meanOfSquares, squareOfMean; /* Square of mean and mean of square */
136 q63_t sumOfSquares = 0; /* Sum of squares */
137 q31_t in; /* Temporary variable to store input value */
138
139 if (blockSize <= 1U)
140 {
141 *pResult = 0;
142 return;
143 }
144
145 #if defined (ARM_MATH_LOOPUNROLL)
146
147 /* Loop unrolling: Compute 4 outputs at a time */
148 blkCnt = blockSize >> 2U;
149
150 while (blkCnt > 0U)
151 {
152 /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
153 /* C = A[0] + A[1] + ... + A[blockSize-1] */
154
155 in = *pSrc++ >> 8U;
156 /* Compute sum of squares and store result in a temporary variable, sumOfSquares. */
157 sumOfSquares += ((q63_t) (in) * (in));
158 /* Compute sum and store result in a temporary variable, sum. */
159 sum += in;
160
161 in = *pSrc++ >> 8U;
162 sumOfSquares += ((q63_t) (in) * (in));
163 sum += in;
164
165 in = *pSrc++ >> 8U;
166 sumOfSquares += ((q63_t) (in) * (in));
167 sum += in;
168
169 in = *pSrc++ >> 8U;
170 sumOfSquares += ((q63_t) (in) * (in));
171 sum += in;
172
173 /* Decrement loop counter */
174 blkCnt--;
175 }
176
177 /* Loop unrolling: Compute remaining outputs */
178 blkCnt = blockSize % 0x4U;
179
180 #else
181
182 /* Initialize blkCnt with number of samples */
183 blkCnt = blockSize;
184
185 #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
186
187 while (blkCnt > 0U)
188 {
189 /* C = A[0] * A[0] + A[1] * A[1] + ... + A[blockSize-1] * A[blockSize-1] */
190 /* C = A[0] + A[1] + ... + A[blockSize-1] */
191
192 in = *pSrc++ >> 8U;
193 /* Compute sum of squares and store result in a temporary variable, sumOfSquares. */
194 sumOfSquares += ((q63_t) (in) * (in));
195 /* Compute sum and store result in a temporary variable, sum. */
196 sum += in;
197
198 /* Decrement loop counter */
199 blkCnt--;
200 }
201
202 /* Compute Mean of squares and store result in a temporary variable, meanOfSquares. */
203 meanOfSquares = (sumOfSquares / (q63_t)(blockSize - 1U));
204
205 /* Compute square of mean */
206 squareOfMean = ( sum * sum / (q63_t)(blockSize * (blockSize - 1U)));
207
208 /* Compute variance and store result in destination */
209 *pResult = (meanOfSquares - squareOfMean) >> 15U;
210 }
211 #endif
212 /**
213 @} end of variance group
214 */
215