1 
2 /* ----------------------------------------------------------------------
3  * Project:      CMSIS DSP Library
4  * Title:        arm_jensenshannon_distance_f16.c
5  * Description:  Jensen-Shannon distance between two vectors
6  *
7  * $Date:        23 April 2021
8  * $Revision:    V1.9.0
9  *
10  * Target Processor: Cortex-M and Cortex-A cores
11  * -------------------------------------------------------------------- */
12 /*
13  * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
14  *
15  * SPDX-License-Identifier: Apache-2.0
16  *
17  * Licensed under the Apache License, Version 2.0 (the License); you may
18  * not use this file except in compliance with the License.
19  * You may obtain a copy of the License at
20  *
21  * www.apache.org/licenses/LICENSE-2.0
22  *
23  * Unless required by applicable law or agreed to in writing, software
24  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
25  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
26  * See the License for the specific language governing permissions and
27  * limitations under the License.
28  */
29 
30 #include "dsp/distance_functions_f16.h"
31 
32 #if defined(ARM_FLOAT16_SUPPORTED)
33 
34 #include <limits.h>
35 #include <math.h>
36 
37 /**
38   @ingroup FloatDist
39  */
40 
41 /**
42   @defgroup JensenShannon Jensen-Shannon distance
43 
44   Jensen-Shannon distance
45  */
46 
47 
48 /**
49   @addtogroup JensenShannon
50   @{
51  */
52 
53 #if !defined(ARM_MATH_MVE_FLOAT16) || defined(ARM_MATH_AUTOVECTORIZE)
54 /// @private
rel_entr(float16_t x,float16_t y)55 __STATIC_INLINE float16_t rel_entr(float16_t x, float16_t y)
56 {
57     return (x * logf(x / y));
58 }
59 #endif
60 
61 
62 #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
63 
64 #include "arm_helium_utils.h"
65 #include "arm_vec_math_f16.h"
66 
arm_jensenshannon_distance_f16(const float16_t * pA,const float16_t * pB,uint32_t blockSize)67 float16_t arm_jensenshannon_distance_f16(const float16_t *pA,const float16_t *pB, uint32_t blockSize)
68 {
69     uint32_t        blkCnt;
70     float16_t       tmp;
71     f16x8_t         a, b, t, tmpV, accumV;
72 
73     accumV = vdupq_n_f16(0.0f);
74 
75     blkCnt = blockSize >> 3;
76     while (blkCnt > 0U) {
77         a = vld1q(pA);
78         b = vld1q(pB);
79 
80         t = vaddq(a, b);
81         t = vmulq(t, 0.5f);
82 
83         tmpV = vmulq(a, vrecip_medprec_f16(t));
84         tmpV = vlogq_f16(tmpV);
85         accumV = vfmaq(accumV, a, tmpV);
86 
87         tmpV = vmulq_f16(b, vrecip_medprec_f16(t));
88         tmpV = vlogq_f16(tmpV);
89         accumV = vfmaq(accumV, b, tmpV);
90 
91         pA += 8;
92         pB += 8;
93         blkCnt--;
94     }
95 
96     /*
97      * tail
98      * (will be merged thru tail predication)
99      */
100     blkCnt = blockSize & 7;
101     if (blkCnt > 0U) {
102         mve_pred16_t    p0 = vctp16q(blkCnt);
103 
104         a = vldrhq_z_f16(pA, p0);
105         b = vldrhq_z_f16(pB, p0);
106 
107         t = vaddq(a, b);
108         t = vmulq(t, 0.5f);
109 
110         tmpV = vmulq_f16(a, vrecip_medprec_f16(t));
111         tmpV = vlogq_f16(tmpV);
112         accumV = vfmaq_m_f16(accumV, a, tmpV, p0);
113 
114         tmpV = vmulq_f16(b, vrecip_medprec_f16(t));
115         tmpV = vlogq_f16(tmpV);
116         accumV = vfmaq_m_f16(accumV, b, tmpV, p0);
117 
118     }
119 
120     arm_sqrt_f16(vecAddAcrossF16Mve(accumV) / 2.0f, &tmp);
121     return (tmp);
122 }
123 
124 #else
125 
126 
127 /**
128  * @brief        Jensen-Shannon distance between two vectors
129  *
130  * This function is assuming that elements of second vector are > 0
131  * and 0 only when the corresponding element of first vector is 0.
132  * Otherwise the result of the computation does not make sense
133  * and for speed reasons, the cases returning NaN or Infinity are not
134  * managed.
135  *
136  * When the function is computing x log (x / y) with x == 0 and y == 0,
137  * it will compute the right result (0) but a division by zero will occur
138  * and should be ignored in client code.
139  *
140  * @param[in]    pA         First vector
141  * @param[in]    pB         Second vector
142  * @param[in]    blockSize  vector length
143  * @return distance
144  *
145  */
146 
147 
arm_jensenshannon_distance_f16(const float16_t * pA,const float16_t * pB,uint32_t blockSize)148 float16_t arm_jensenshannon_distance_f16(const float16_t *pA,const float16_t *pB, uint32_t blockSize)
149 {
150     _Float16 left, right,sum, tmp;
151     float16_t result;
152     uint32_t i;
153 
154     left = 0.0f16;
155     right = 0.0f16;
156     for(i=0; i < blockSize; i++)
157     {
158       tmp = ((_Float16)pA[i] + (_Float16)pB[i]) / 2.0f16;
159       left  += (_Float16)rel_entr(pA[i], tmp);
160       right += (_Float16)rel_entr(pB[i], tmp);
161     }
162 
163 
164     sum = left + right;
165     arm_sqrt_f16(sum/2.0f, &result);
166     return(result);
167 
168 }
169 
170 #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
171 
172 /**
173  * @} end of JensenShannon group
174  */
175 
176 #endif /* #if defined(ARM_FLOAT16_SUPPORTED) */
177 
178