1
2 /* ----------------------------------------------------------------------
3 * Project: CMSIS DSP Library
4 * Title: arm_jensenshannon_distance_f16.c
5 * Description: Jensen-Shannon distance between two vectors
6 *
7 * $Date: 23 April 2021
8 * $Revision: V1.9.0
9 *
10 * Target Processor: Cortex-M and Cortex-A cores
11 * -------------------------------------------------------------------- */
12 /*
13 * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
14 *
15 * SPDX-License-Identifier: Apache-2.0
16 *
17 * Licensed under the Apache License, Version 2.0 (the License); you may
18 * not use this file except in compliance with the License.
19 * You may obtain a copy of the License at
20 *
21 * www.apache.org/licenses/LICENSE-2.0
22 *
23 * Unless required by applicable law or agreed to in writing, software
24 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
25 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
26 * See the License for the specific language governing permissions and
27 * limitations under the License.
28 */
29
30 #include "dsp/distance_functions_f16.h"
31
32 #if defined(ARM_FLOAT16_SUPPORTED)
33
34 #include <limits.h>
35 #include <math.h>
36
37 /**
38 @ingroup FloatDist
39 */
40
41 /**
42 @defgroup JensenShannon Jensen-Shannon distance
43
44 Jensen-Shannon distance
45 */
46
47
48 /**
49 @addtogroup JensenShannon
50 @{
51 */
52
53 #if !defined(ARM_MATH_MVE_FLOAT16) || defined(ARM_MATH_AUTOVECTORIZE)
54 /// @private
rel_entr(float16_t x,float16_t y)55 __STATIC_INLINE float16_t rel_entr(float16_t x, float16_t y)
56 {
57 return ((_Float16)x * (_Float16)logf((float32_t)((_Float16)x / (_Float16)y)));
58 }
59 #endif
60
61
62 #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
63
64 #include "arm_helium_utils.h"
65 #include "arm_vec_math_f16.h"
66
arm_jensenshannon_distance_f16(const float16_t * pA,const float16_t * pB,uint32_t blockSize)67 float16_t arm_jensenshannon_distance_f16(const float16_t *pA,const float16_t *pB, uint32_t blockSize)
68 {
69 uint32_t blkCnt;
70 float16_t tmp;
71 f16x8_t a, b, t, tmpV, accumV;
72
73 accumV = vdupq_n_f16(0.0f);
74
75 blkCnt = blockSize >> 3;
76 while (blkCnt > 0U) {
77 a = vld1q(pA);
78 b = vld1q(pB);
79
80 t = vaddq(a, b);
81 t = vmulq(t, 0.5f);
82
83 tmpV = vmulq(a, vrecip_medprec_f16(t));
84 tmpV = vlogq_f16(tmpV);
85 accumV = vfmaq(accumV, a, tmpV);
86
87 tmpV = vmulq_f16(b, vrecip_medprec_f16(t));
88 tmpV = vlogq_f16(tmpV);
89 accumV = vfmaq(accumV, b, tmpV);
90
91 pA += 8;
92 pB += 8;
93 blkCnt--;
94 }
95
96 /*
97 * tail
98 * (will be merged thru tail predication)
99 */
100 blkCnt = blockSize & 7;
101 if (blkCnt > 0U) {
102 mve_pred16_t p0 = vctp16q(blkCnt);
103
104 a = vldrhq_z_f16(pA, p0);
105 b = vldrhq_z_f16(pB, p0);
106
107 t = vaddq(a, b);
108 t = vmulq(t, 0.5f);
109
110 tmpV = vmulq_f16(a, vrecip_medprec_f16(t));
111 tmpV = vlogq_f16(tmpV);
112 accumV = vfmaq_m_f16(accumV, a, tmpV, p0);
113
114 tmpV = vmulq_f16(b, vrecip_medprec_f16(t));
115 tmpV = vlogq_f16(tmpV);
116 accumV = vfmaq_m_f16(accumV, b, tmpV, p0);
117
118 }
119
120 arm_sqrt_f16((_Float16)vecAddAcrossF16Mve(accumV) / 2.0f16, &tmp);
121 return (tmp);
122 }
123
124 #else
125
126
127 /**
128 * @brief Jensen-Shannon distance between two vectors
129 *
130 * This function is assuming that elements of second vector are > 0
131 * and 0 only when the corresponding element of first vector is 0.
132 * Otherwise the result of the computation does not make sense
133 * and for speed reasons, the cases returning NaN or Infinity are not
134 * managed.
135 *
136 * When the function is computing x log (x / y) with x == 0 and y == 0,
137 * it will compute the right result (0) but a division by zero will occur
138 * and should be ignored in client code.
139 *
140 * @param[in] pA First vector
141 * @param[in] pB Second vector
142 * @param[in] blockSize vector length
143 * @return distance
144 *
145 */
146
147
arm_jensenshannon_distance_f16(const float16_t * pA,const float16_t * pB,uint32_t blockSize)148 float16_t arm_jensenshannon_distance_f16(const float16_t *pA,const float16_t *pB, uint32_t blockSize)
149 {
150 _Float16 left, right,sum, tmp;
151 float16_t result;
152 uint32_t i;
153
154 left = 0.0f16;
155 right = 0.0f16;
156 for(i=0; i < blockSize; i++)
157 {
158 tmp = ((_Float16)pA[i] + (_Float16)pB[i]) / 2.0f16;
159 left += (_Float16)rel_entr(pA[i], tmp);
160 right += (_Float16)rel_entr(pB[i], tmp);
161 }
162
163
164 sum = left + right;
165 arm_sqrt_f16((_Float16)sum/2.0f16, &result);
166 return(result);
167
168 }
169
170 #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
171
172 /**
173 * @} end of JensenShannon group
174 */
175
176 #endif /* #if defined(ARM_FLOAT16_SUPPORTED) */
177
178