1 
2 /* ----------------------------------------------------------------------
3  * Project:      CMSIS DSP Library
4  * Title:        arm_euclidean_distance_f32.c
5  * Description:  Euclidean distance between two vectors
6  *
7  * $Date:        23 April 2021
8  * $Revision:    V1.9.0
9  *
10  * Target Processor: Cortex-M and Cortex-A cores
11  * -------------------------------------------------------------------- */
12 /*
13  * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
14  *
15  * SPDX-License-Identifier: Apache-2.0
16  *
17  * Licensed under the Apache License, Version 2.0 (the License); you may
18  * not use this file except in compliance with the License.
19  * You may obtain a copy of the License at
20  *
21  * www.apache.org/licenses/LICENSE-2.0
22  *
23  * Unless required by applicable law or agreed to in writing, software
24  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
25  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
26  * See the License for the specific language governing permissions and
27  * limitations under the License.
28  */
29 
30 #include "dsp/distance_functions.h"
31 #include <limits.h>
32 #include <math.h>
33 
34 
35 
36 /**
37   @addtogroup Euclidean
38   @{
39  */
40 
41 
42 /**
43  * @brief        Euclidean distance between two vectors
44  * @param[in]    pA         First vector
45  * @param[in]    pB         Second vector
46  * @param[in]    blockSize  vector length
47  * @return distance
48  *
49  */
50 #if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
51 
52 #include "arm_helium_utils.h"
53 #include "arm_vec_math.h"
arm_euclidean_distance_f32(const float32_t * pA,const float32_t * pB,uint32_t blockSize)54 float32_t arm_euclidean_distance_f32(const float32_t *pA,const float32_t *pB, uint32_t blockSize)
55 {
56     uint32_t        blkCnt;
57     float32_t       tmp;
58     f32x4_t         a, b, accumV, tempV;
59 
60     accumV = vdupq_n_f32(0.0f);
61 
62     blkCnt = blockSize >> 2;
63     while (blkCnt > 0U) {
64         a = vld1q(pA);
65         b = vld1q(pB);
66 
67         tempV = vsubq(a, b);
68         accumV = vfmaq(accumV, tempV, tempV);
69 
70         pA += 4;
71         pB += 4;
72         blkCnt--;
73     }
74 
75     /*
76      * tail
77      * (will be merged thru tail predication)
78      */
79     blkCnt = blockSize & 3;
80     if (blkCnt > 0U) {
81         mve_pred16_t    p0 = vctp32q(blkCnt);
82 
83         a = vldrwq_z_f32(pA, p0);
84         b = vldrwq_z_f32(pB, p0);
85 
86         tempV = vsubq(a, b);
87         accumV = vfmaq_m(accumV, tempV, tempV, p0);
88     }
89 
90     arm_sqrt_f32(vecAddAcrossF32Mve(accumV), &tmp);
91     return (tmp);
92 }
93 #else
94 #if defined(ARM_MATH_NEON)
95 
96 #include "NEMath.h"
97 
arm_euclidean_distance_f32(const float32_t * pA,const float32_t * pB,uint32_t blockSize)98 float32_t arm_euclidean_distance_f32(const float32_t *pA,const float32_t *pB, uint32_t blockSize)
99 {
100    float32_t accum=0.0f,tmp;
101    uint32_t blkCnt;
102    float32x4_t a,b,accumV;
103    float32x2_t accumV2;
104 
105    accumV = vdupq_n_f32(0.0f);
106    blkCnt = blockSize >> 2;
107    while(blkCnt > 0)
108    {
109         a = vld1q_f32(pA);
110         b = vld1q_f32(pB);
111 
112         a = vsubq_f32(a,b);
113         accumV = vmlaq_f32(accumV,a,a);
114         pA += 4;
115         pB += 4;
116         blkCnt --;
117    }
118    accumV2 = vpadd_f32(vget_low_f32(accumV),vget_high_f32(accumV));
119    accum = vget_lane_f32(accumV2, 0) + vget_lane_f32(accumV2, 1);
120 
121    blkCnt = blockSize & 3;
122    while(blkCnt > 0)
123    {
124       tmp = *pA++ - *pB++;
125       accum += ARM_SQ(tmp);
126       blkCnt --;
127    }
128    arm_sqrt_f32(accum,&tmp);
129    return(tmp);
130 }
131 
132 #else
arm_euclidean_distance_f32(const float32_t * pA,const float32_t * pB,uint32_t blockSize)133 float32_t arm_euclidean_distance_f32(const float32_t *pA,const float32_t *pB, uint32_t blockSize)
134 {
135    float32_t accum=0.0f,tmp;
136 
137    while(blockSize > 0)
138    {
139       tmp = *pA++ - *pB++;
140       accum += ARM_SQ(tmp);
141       blockSize --;
142    }
143    arm_sqrt_f32(accum,&tmp);
144    return(tmp);
145 }
146 #endif
147 #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
148 
149 
150 /**
151  * @} end of Euclidean group
152  */
153