1 /* ----------------------------------------------------------------------
2  * Project:      CMSIS DSP Library
3  * Title:        arm_mse_q15.c
4  * Description:  Mean square error between two Q15 vectors
5  *
6  * $Date:        04 April 2022
7  * $Revision:    V1.10.0
8  *
9  * Target Processor: Cortex-M and Cortex-A cores
10  * -------------------------------------------------------------------- */
11 /*
12  * Copyright (C) 2010-2022 ARM Limited or its affiliates. All rights reserved.
13  *
14  * SPDX-License-Identifier: Apache-2.0
15  *
16  * Licensed under the Apache License, Version 2.0 (the License); you may
17  * not use this file except in compliance with the License.
18  * You may obtain a copy of the License at
19  *
20  * www.apache.org/licenses/LICENSE-2.0
21  *
22  * Unless required by applicable law or agreed to in writing, software
23  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25  * See the License for the specific language governing permissions and
26  * limitations under the License.
27  */
28 
29 #include "dsp/statistics_functions.h"
30 
31 /**
32   @ingroup groupStats
33  */
34 
35 
36 /**
37   @addtogroup MSE
38   @{
39  */
40 
41 /**
42   @brief         Mean square error between two Q15 vectors.
43   @param[in]     pSrcA       points to the first input vector
44   @param[in]     pSrcB       points to the second input vector
45   @param[in]     blockSize   number of samples in input vector
46   @param[out]    pResult     mean square error
47  */
48 #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
arm_mse_q15(const q15_t * pSrcA,const q15_t * pSrcB,uint32_t blockSize,q15_t * pResult)49 ARM_DSP_ATTRIBUTE void arm_mse_q15(
50   const q15_t * pSrcA,
51   const q15_t * pSrcB,
52         uint32_t blockSize,
53         q15_t * pResult)
54 {
55     uint32_t  blkCnt;           /* loop counters */
56     q15x8_t vecSrcA,vecSrcB;
57     q63_t   sum = 0LL;
58 
59     blkCnt = blockSize >> 3U;
60     while (blkCnt > 0U)
61     {
62         vecSrcA = vld1q(pSrcA);
63         vecSrcB = vld1q(pSrcB);
64 
65         vecSrcA = vshrq(vecSrcA,1);
66         vecSrcB = vshrq(vecSrcB,1);
67 
68         vecSrcA = vqsubq(vecSrcA,vecSrcB);
69         /*
70          * sum lanes
71          */
72         sum = vmlaldavaq(sum, vecSrcA, vecSrcA);
73 
74         blkCnt--;
75         pSrcA += 8;
76         pSrcB += 8;
77     }
78 
79     /*
80      * tail
81      */
82     blkCnt = blockSize & 7;
83     if (blkCnt > 0U)
84     {
85         mve_pred16_t p0 = vctp16q(blkCnt);
86         vecSrcA = vld1q(pSrcA);
87         vecSrcB = vld1q(pSrcB);
88 
89         vecSrcA = vshrq(vecSrcA,1);
90         vecSrcB = vshrq(vecSrcB,1);
91 
92         vecSrcA = vqsubq(vecSrcA,vecSrcB);
93 
94         sum = vmlaldavaq_p(sum, vecSrcA, vecSrcA, p0);
95     }
96 
97 
98 
99     *pResult = (q15_t) __SSAT((q31_t) (sum / blockSize)>>13, 16);
100 }
101 #else
arm_mse_q15(const q15_t * pSrcA,const q15_t * pSrcB,uint32_t blockSize,q15_t * pResult)102 ARM_DSP_ATTRIBUTE void arm_mse_q15(
103   const q15_t * pSrcA,
104   const q15_t * pSrcB,
105         uint32_t blockSize,
106         q15_t * pResult)
107 {
108         uint32_t blkCnt;                               /* Loop counter */
109         q63_t sum = 0;                                 /* Temporary result storage */
110         q15_t inA,inB;                                       /* Temporary variable to store input value */
111 
112 
113 #if defined (ARM_MATH_LOOPUNROLL)
114 
115   /* Loop unrolling: Compute 4 outputs at a time */
116   blkCnt = blockSize >> 2U;
117 
118   while (blkCnt > 0U)
119   {
120 
121     inA = *pSrcA++ >> 1;
122     inB = *pSrcB++ >> 1;
123     inA = (q15_t) __SSAT(((q31_t) inA - (q31_t)inB), 16);
124     sum += (q63_t)((q31_t) inA * inA);
125 
126     inA = *pSrcA++ >> 1;
127     inB = *pSrcB++ >> 1;
128     inA = (q15_t) __SSAT(((q31_t) inA - (q31_t)inB), 16);
129     sum += (q63_t)((q31_t) inA * inA);
130 
131     inA = *pSrcA++ >> 1;
132     inB = *pSrcB++ >> 1;
133     inA = (q15_t) __SSAT(((q31_t) inA - (q31_t)inB), 16);
134     sum += (q63_t)((q31_t) inA * inA);
135 
136     inA = *pSrcA++ >> 1;
137     inB = *pSrcB++ >> 1;
138     inA = (q15_t) __SSAT(((q31_t) inA - (q31_t)inB), 16);
139     sum += (q63_t)((q31_t) inA * inA);
140 
141     /* Decrement loop counter */
142     blkCnt--;
143   }
144 
145   /* Loop unrolling: Compute remaining outputs */
146   blkCnt = blockSize % 0x4U;
147 
148 #else
149 
150   /* Initialize blkCnt with number of samples */
151   blkCnt = blockSize;
152 
153 #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
154 
155   while (blkCnt > 0U)
156   {
157 
158     inA = *pSrcA++ >> 1;
159     inB = *pSrcB++ >> 1;
160     inA = (q15_t) __SSAT(((q31_t) inA - (q31_t)inB), 16);
161     sum += (q63_t)((q31_t) inA * inA);
162 
163     /* Decrement loop counter */
164     blkCnt--;
165   }
166 
167   /* Store result in q15 format */
168   *pResult = (q15_t) __SSAT((q31_t) (sum / blockSize)>>13, 16);
169 }
170 #endif /* defined(ARM_MATH_MVEI) */
171 
172 /**
173   @} end of MSE group
174  */
175