1 /* ----------------------------------------------------------------------
2  * Project:      CMSIS DSP Library
3  * Title:        arm_mse_f64.c
4  * Description:  Double floating point mean square error
5  *
6  * $Date:        10 August 2022
7  * $Revision:    V1.10.1
8  *
9  * Target Processor: Cortex-M and Cortex-A cores
10  * -------------------------------------------------------------------- */
11 /*
12  * Copyright (C) 2010-2022 ARM Limited or its affiliates. All rights reserved.
13  *
14  * SPDX-License-Identifier: Apache-2.0
15  *
16  * Licensed under the Apache License, Version 2.0 (the License); you may
17  * not use this file except in compliance with the License.
18  * You may obtain a copy of the License at
19  *
20  * www.apache.org/licenses/LICENSE-2.0
21  *
22  * Unless required by applicable law or agreed to in writing, software
23  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25  * See the License for the specific language governing permissions and
26  * limitations under the License.
27  */
28 
29 #include "dsp/statistics_functions.h"
30 
31 /**
32   @ingroup groupStats
33  */
34 
35 /**
36   @addtogroup MSE
37   @{
38  */
39 
40 /**
41   @brief         Mean square error between two double floating point vectors.
42   @param[in]     pSrcA       points to the first input vector
43   @param[in]     pSrcB       points to the second input vector
44   @param[in]     blockSize   number of samples in input vector
45   @param[out]    pResult      mean square error
46  */
47 
arm_mse_f64(const float64_t * pSrcA,const float64_t * pSrcB,uint32_t blockSize,float64_t * pResult)48 ARM_DSP_ATTRIBUTE void arm_mse_f64(
49     const float64_t * pSrcA,
50     const float64_t * pSrcB,
51     uint32_t blockSize,
52     float64_t * pResult)
53 
54 {
55 
56     uint32_t blkCnt;                               /* Loop counter */
57     float64_t inA, inB;
58     float64_t sum = 0.0;
59 #if defined(ARM_MATH_NEON) && defined(__aarch64__)
60 
61     float64x2_t inAV , inBV , subV, sumV;
62     sumV = vdupq_n_f64(0.0);
63 
64     blkCnt = blockSize >> 1U ;
65 
66     while (blkCnt > 0U)
67     {
68         inAV = vld1q_f64(pSrcA);
69         pSrcA+=2;
70         inBV = vld1q_f64(pSrcB);
71         pSrcB+=2;
72         subV = vsubq_f64(inAV, inBV);
73         sumV = vmlaq_f64(sumV, subV, subV);
74 
75         blkCnt--;
76 
77     }
78     sum = vaddvq_f64(sumV);
79     blkCnt = (blockSize) & 1;
80 
81 #else
82     /* Temporary return variable */
83 #if defined (ARM_MATH_LOOPUNROLL)
84     blkCnt = (blockSize) >> 1;
85 
86     while (blkCnt > 0U)
87     {
88 
89 
90         inA = *pSrcA++;
91         inB = *pSrcB++;
92         inA = inA - inB;
93         sum += inA * inA;
94 
95         inA = *pSrcA++;
96         inB = *pSrcB++;
97         inA = inA - inB;
98         sum += inA * inA;
99 
100         /* Decrement loop counter */
101         blkCnt--;
102     }
103 
104 
105     /* Loop unrolling: Compute remaining outputs */
106     blkCnt = (blockSize) & 1;
107 #else
108     /* Initialize blkCnt with number of samples */
109     blkCnt = blockSize;
110 #endif
111 #endif
112 
113 #if defined(__clang__) && defined(ARM_MATH_NEON) && defined(__aarch64__)
114     #pragma clang loop vectorize(enable) unroll(disable)
115 #endif
116     while (blkCnt > 0U)
117     {
118         inA = *pSrcA++;
119         inB = *pSrcB++;
120         inA = inA - inB;
121         sum += inA * inA;
122 
123         /* Decrement loop counter */
124         blkCnt--;
125     }
126 
127     /* Store result in destination buffer */
128     *pResult = sum / blockSize;
129 }
130 
131 
132 
133 /**
134   @} end of MSE group
135  */
136