1 /* ----------------------------------------------------------------------
2 * Project: CMSIS DSP Library
3 * Title: arm_mse_q15.c
4 * Description: Mean square error between two Q15 vectors
5 *
6 * $Date: 04 April 2022
7 * $Revision: V1.10.0
8 *
9 * Target Processor: Cortex-M and Cortex-A cores
10 * -------------------------------------------------------------------- */
11 /*
12 * Copyright (C) 2010-2022 ARM Limited or its affiliates. All rights reserved.
13 *
14 * SPDX-License-Identifier: Apache-2.0
15 *
16 * Licensed under the Apache License, Version 2.0 (the License); you may
17 * not use this file except in compliance with the License.
18 * You may obtain a copy of the License at
19 *
20 * www.apache.org/licenses/LICENSE-2.0
21 *
22 * Unless required by applicable law or agreed to in writing, software
23 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25 * See the License for the specific language governing permissions and
26 * limitations under the License.
27 */
28
29 #include "dsp/statistics_functions.h"
30
31 /**
32 @ingroup groupStats
33 */
34
35
36 /**
37 @addtogroup MSE
38 @{
39 */
40
41 /**
42 @brief Mean square error between two Q15 vectors.
43 @param[in] pSrcA points to the first input vector
44 @param[in] pSrcB points to the second input vector
45 @param[in] blockSize number of samples in input vector
46 @param[out] pResult mean square error
47 */
48 #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
arm_mse_q15(const q15_t * pSrcA,const q15_t * pSrcB,uint32_t blockSize,q15_t * pResult)49 ARM_DSP_ATTRIBUTE void arm_mse_q15(
50 const q15_t * pSrcA,
51 const q15_t * pSrcB,
52 uint32_t blockSize,
53 q15_t * pResult)
54 {
55 uint32_t blkCnt; /* loop counters */
56 q15x8_t vecSrcA,vecSrcB;
57 q63_t sum = 0LL;
58
59 blkCnt = blockSize >> 3U;
60 while (blkCnt > 0U)
61 {
62 vecSrcA = vld1q(pSrcA);
63 vecSrcB = vld1q(pSrcB);
64
65 vecSrcA = vshrq(vecSrcA,1);
66 vecSrcB = vshrq(vecSrcB,1);
67
68 vecSrcA = vqsubq(vecSrcA,vecSrcB);
69 /*
70 * sum lanes
71 */
72 sum = vmlaldavaq(sum, vecSrcA, vecSrcA);
73
74 blkCnt--;
75 pSrcA += 8;
76 pSrcB += 8;
77 }
78
79 /*
80 * tail
81 */
82 blkCnt = blockSize & 7;
83 if (blkCnt > 0U)
84 {
85 mve_pred16_t p0 = vctp16q(blkCnt);
86 vecSrcA = vld1q(pSrcA);
87 vecSrcB = vld1q(pSrcB);
88
89 vecSrcA = vshrq(vecSrcA,1);
90 vecSrcB = vshrq(vecSrcB,1);
91
92 vecSrcA = vqsubq(vecSrcA,vecSrcB);
93
94 sum = vmlaldavaq_p(sum, vecSrcA, vecSrcA, p0);
95 }
96
97
98
99 *pResult = (q15_t) __SSAT((q31_t) (sum / blockSize)>>13, 16);
100 }
101 #else
arm_mse_q15(const q15_t * pSrcA,const q15_t * pSrcB,uint32_t blockSize,q15_t * pResult)102 ARM_DSP_ATTRIBUTE void arm_mse_q15(
103 const q15_t * pSrcA,
104 const q15_t * pSrcB,
105 uint32_t blockSize,
106 q15_t * pResult)
107 {
108 uint32_t blkCnt; /* Loop counter */
109 q63_t sum = 0; /* Temporary result storage */
110 q15_t inA,inB; /* Temporary variable to store input value */
111
112
113 #if defined (ARM_MATH_LOOPUNROLL)
114
115 /* Loop unrolling: Compute 4 outputs at a time */
116 blkCnt = blockSize >> 2U;
117
118 while (blkCnt > 0U)
119 {
120
121 inA = *pSrcA++ >> 1;
122 inB = *pSrcB++ >> 1;
123 inA = (q15_t) __SSAT(((q31_t) inA - (q31_t)inB), 16);
124 sum += (q63_t)((q31_t) inA * inA);
125
126 inA = *pSrcA++ >> 1;
127 inB = *pSrcB++ >> 1;
128 inA = (q15_t) __SSAT(((q31_t) inA - (q31_t)inB), 16);
129 sum += (q63_t)((q31_t) inA * inA);
130
131 inA = *pSrcA++ >> 1;
132 inB = *pSrcB++ >> 1;
133 inA = (q15_t) __SSAT(((q31_t) inA - (q31_t)inB), 16);
134 sum += (q63_t)((q31_t) inA * inA);
135
136 inA = *pSrcA++ >> 1;
137 inB = *pSrcB++ >> 1;
138 inA = (q15_t) __SSAT(((q31_t) inA - (q31_t)inB), 16);
139 sum += (q63_t)((q31_t) inA * inA);
140
141 /* Decrement loop counter */
142 blkCnt--;
143 }
144
145 /* Loop unrolling: Compute remaining outputs */
146 blkCnt = blockSize % 0x4U;
147
148 #else
149
150 /* Initialize blkCnt with number of samples */
151 blkCnt = blockSize;
152
153 #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
154
155 while (blkCnt > 0U)
156 {
157
158 inA = *pSrcA++ >> 1;
159 inB = *pSrcB++ >> 1;
160 inA = (q15_t) __SSAT(((q31_t) inA - (q31_t)inB), 16);
161 sum += (q63_t)((q31_t) inA * inA);
162
163 /* Decrement loop counter */
164 blkCnt--;
165 }
166
167 /* Store result in q15 format */
168 *pResult = (q15_t) __SSAT((q31_t) (sum / blockSize)>>13, 16);
169 }
170 #endif /* defined(ARM_MATH_MVEI) */
171
172 /**
173 @} end of MSE group
174 */
175