1 /* ----------------------------------------------------------------------
2  * Project:      CMSIS DSP Library
3  * Title:        arm_dot_prod_f64.c
4  * Description:  Floating-point dot product
5  *
6  * $Date:        03 June 2022
7  * $Revision:    V1.10.1
8  *
9  * Target Processor: Cortex-M and Cortex-A cores
10  * -------------------------------------------------------------------- */
11 /*
12  * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13  *
14  * SPDX-License-Identifier: Apache-2.0
15  *
16  * Licensed under the Apache License, Version 2.0 (the License); you may
17  * not use this file except in compliance with the License.
18  * You may obtain a copy of the License at
19  *
20  * www.apache.org/licenses/LICENSE-2.0
21  *
22  * Unless required by applicable law or agreed to in writing, software
23  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25  * See the License for the specific language governing permissions and
26  * limitations under the License.
27  */
28 
29 #include "dsp/basic_math_functions.h"
30 
31 /**
32   @ingroup groupMath
33  */
34 
35 /**
36   @addtogroup BasicDotProd
37   @{
38  */
39 
40 /**
41   @brief         Dot product of floating-point vectors.
42   @param[in]     pSrcA      points to the first input vector.
43   @param[in]     pSrcB      points to the second input vector.
44   @param[in]     blockSize  number of samples in each vector.
45   @param[out]    result     output result returned here.
46  */
47 #if defined(ARM_MATH_NEON) && defined(__aarch64__)
arm_dot_prod_f64(const float64_t * pSrcA,const float64_t * pSrcB,uint32_t blockSize,float64_t * result)48 ARM_DSP_ATTRIBUTE void arm_dot_prod_f64(
49     const float64_t * pSrcA,
50     const float64_t * pSrcB,
51     uint32_t blockSize,
52     float64_t * result)
53 {
54     uint32_t blkCnt;                               /* Loop counter */
55     float64_t sum = 0.;                            /* Temporary return variable */
56     /* Neon Buffer Initialisation */
57     float64x2_t sumV = vdupq_n_f64(0.0);                              /* Neon buffer for sum variable */
58 
59 
60     /* Neon Buffer for sources */
61     float64x2_t pSrcAV;
62     float64x2_t pSrcBV;
63 
64     /* Initialize blkCnt with number of samples */
65     blkCnt = blockSize >> 1U;
66 
67     while (blkCnt > 0U)
68     {
69         /* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1] */
70 
71         /* Load source value in Neon Buffer */
72         pSrcAV = vld1q_f64(pSrcA);
73         pSrcBV = vld1q_f64(pSrcB);
74         /* Calculate dot product and store result in a temporary buffer. */
75         sumV = vmlaq_f64(sumV, pSrcAV, pSrcBV);
76 
77         pSrcA+=2;
78         pSrcB+=2;
79         /* Decrement loop counter */
80         blkCnt--;
81     }
82     /* Sum both 64 bits part in the float64x2 */
83     sum = vaddvq_f64(sumV);
84 
85 
86     /* Tail */
87     blkCnt = blockSize & 1 ;
88 
89     while(blkCnt > 0U)
90     {
91         sum += (*pSrcA++) * (*pSrcB++);
92 
93         /* Decrement loop counter */
94         blkCnt--;
95     }
96 
97     /* Store result in destination buffer */
98     *result = sum;
99 }
100 #else
arm_dot_prod_f64(const float64_t * pSrcA,const float64_t * pSrcB,uint32_t blockSize,float64_t * result)101 ARM_DSP_ATTRIBUTE void arm_dot_prod_f64(
102     const float64_t * pSrcA,
103     const float64_t * pSrcB,
104     uint32_t blockSize,
105     float64_t * result)
106 {
107     uint32_t blkCnt;                               /* Loop counter */
108     float64_t sum = 0.;                            /* Temporary return variable */
109 
110     /* Initialize blkCnt with number of samples */
111     blkCnt = blockSize;
112 
113     while (blkCnt > 0U)
114     {
115         /* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1] */
116 
117         /* Calculate dot product and store result in a temporary buffer. */
118         sum += (*pSrcA++) * (*pSrcB++);
119 
120         /* Decrement loop counter */
121         blkCnt--;
122     }
123 
124     /* Store result in destination buffer */
125     *result = sum;
126 }
127 #endif
128 
129 /**
130   @} end of BasicDotProd group
131  */
132