1 
2 /* ----------------------------------------------------------------------
3  * Project:      CMSIS DSP Library
4  * Title:        arm_minkowski_distance_f32.c
5  * Description:  Minkowski distance between two vectors
6  *
7  * $Date:        23 April 2021
8  * $Revision:    V1.9.0
9  *
10  * Target Processor: Cortex-M and Cortex-A cores
11  * -------------------------------------------------------------------- */
12 /*
13  * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
14  *
15  * SPDX-License-Identifier: Apache-2.0
16  *
17  * Licensed under the Apache License, Version 2.0 (the License); you may
18  * not use this file except in compliance with the License.
19  * You may obtain a copy of the License at
20  *
21  * www.apache.org/licenses/LICENSE-2.0
22  *
23  * Unless required by applicable law or agreed to in writing, software
24  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
25  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
26  * See the License for the specific language governing permissions and
27  * limitations under the License.
28  */
29 
30 #include "dsp/distance_functions.h"
31 #include <limits.h>
32 #include <math.h>
33 
34 
35 /**
36   @addtogroup Minkowski
37   @{
38  */
39 
40 /* 6.14 bug */
41 #if defined (__ARMCC_VERSION) && (__ARMCC_VERSION >= 6100100) && (__ARMCC_VERSION < 6150001)
42 
__powisf2(float a,int b)43 __attribute__((weak)) float __powisf2(float a, int b)
44 {
45     const int recip = b < 0;
46     float r = 1;
47     while (1)
48     {
49         if (b & 1)
50             r *= a;
51         b /= 2;
52         if (b == 0)
53             break;
54         a *= a;
55     }
56     return recip ? 1/r : r;
57 }
58 #endif
59 
60 /**
61  * @brief        Minkowski distance between two vectors
62  *
63  * @param[in]    pA         First vector
64  * @param[in]    pB         Second vector
65  * @param[in]    order      Distance order
66  * @param[in]    blockSize  Number of samples
67  * @return distance
68  *
69  */
70 
71 #if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
72 
73 #include "arm_helium_utils.h"
74 #include "arm_vec_math.h"
75 
arm_minkowski_distance_f32(const float32_t * pA,const float32_t * pB,int32_t order,uint32_t blockSize)76 float32_t arm_minkowski_distance_f32(const float32_t *pA,const float32_t *pB, int32_t order, uint32_t blockSize)
77 {
78     uint32_t        blkCnt;
79     f32x4_t         a, b, tmpV, sumV;
80 
81     sumV = vdupq_n_f32(0.0f);
82 
83     blkCnt = blockSize >> 2;
84     while (blkCnt > 0U) {
85         a = vld1q(pA);
86         b = vld1q(pB);
87 
88         tmpV = vabdq(a, b);
89         tmpV = vpowq_f32(tmpV, vdupq_n_f32(order));
90         sumV = vaddq(sumV, tmpV);
91 
92         pA += 4;
93         pB += 4;
94         blkCnt--;
95     }
96 
97     /*
98      * tail
99      * (will be merged thru tail predication)
100      */
101     blkCnt = blockSize & 3;
102     if (blkCnt > 0U) {
103         mve_pred16_t    p0 = vctp32q(blkCnt);
104 
105         a = vldrwq_z_f32(pA, p0);
106         b = vldrwq_z_f32(pB, p0);
107 
108         tmpV = vabdq(a, b);
109         tmpV = vpowq_f32(tmpV, vdupq_n_f32(order));
110         sumV = vaddq_m(sumV, sumV, tmpV, p0);
111     }
112 
113     return (powf(vecAddAcrossF32Mve(sumV), (1.0f / (float32_t) order)));
114 }
115 
116 #else
117 #if defined(ARM_MATH_NEON)
118 
119 #include "NEMath.h"
120 
arm_minkowski_distance_f32(const float32_t * pA,const float32_t * pB,int32_t order,uint32_t blockSize)121 float32_t arm_minkowski_distance_f32(const float32_t *pA,const float32_t *pB, int32_t order, uint32_t blockSize)
122 {
123     float32_t sum;
124     uint32_t blkCnt;
125     float32x4_t sumV,aV,bV, tmpV, n;
126     float32x2_t sumV2;
127 
128     sum = 0.0f;
129     sumV = vdupq_n_f32(0.0f);
130     n = vdupq_n_f32(order);
131 
132     blkCnt = blockSize >> 2;
133     while(blkCnt > 0)
134     {
135        aV = vld1q_f32(pA);
136        bV = vld1q_f32(pB);
137        pA += 4;
138        pB += 4;
139 
140        tmpV = vabdq_f32(aV,bV);
141        tmpV = vpowq_f32(tmpV,n);
142        sumV = vaddq_f32(sumV, tmpV);
143 
144 
145        blkCnt --;
146     }
147 
148     sumV2 = vpadd_f32(vget_low_f32(sumV),vget_high_f32(sumV));
149     sum = vget_lane_f32(sumV2, 0) + vget_lane_f32(sumV2, 1);
150 
151     blkCnt = blockSize & 3;
152     while(blkCnt > 0)
153     {
154        sum += powf(fabsf(*pA++ - *pB++),order);
155 
156        blkCnt --;
157     }
158 
159 
160     return(powf(sum,(1.0f/order)));
161 
162 }
163 
164 #else
165 
166 
arm_minkowski_distance_f32(const float32_t * pA,const float32_t * pB,int32_t order,uint32_t blockSize)167 float32_t arm_minkowski_distance_f32(const float32_t *pA,const float32_t *pB, int32_t order, uint32_t blockSize)
168 {
169     float32_t sum;
170     uint32_t i;
171 
172     sum = 0.0f;
173     for(i=0; i < blockSize; i++)
174     {
175        sum += powf(fabsf(pA[i] - pB[i]),order);
176     }
177 
178 
179     return(powf(sum,(1.0f/order)));
180 
181 }
182 #endif
183 #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
184 
185 
186 /**
187  * @} end of Minkowski group
188  */
189