1 /* ----------------------------------------------------------------------
2 * Project: CMSIS DSP Library
3 * Title: arm_logsumexp_f16.c
4 * Description: LogSumExp
5 *
6 * $Date: 23 April 2021
7 * $Revision: V1.9.0
8 *
9 * Target Processor: Cortex-M and Cortex-A cores
10 * -------------------------------------------------------------------- */
11 /*
12 * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13 *
14 * SPDX-License-Identifier: Apache-2.0
15 *
16 * Licensed under the Apache License, Version 2.0 (the License); you may
17 * not use this file except in compliance with the License.
18 * You may obtain a copy of the License at
19 *
20 * www.apache.org/licenses/LICENSE-2.0
21 *
22 * Unless required by applicable law or agreed to in writing, software
23 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25 * See the License for the specific language governing permissions and
26 * limitations under the License.
27 */
28
29 #include "dsp/statistics_functions_f16.h"
30
31 #if defined(ARM_FLOAT16_SUPPORTED)
32
33 #include <limits.h>
34 #include <math.h>
35
36
37 /**
38 * @addtogroup LogSumExp
39 * @{
40 */
41
42
43 /**
44 * @brief Computation of the LogSumExp
45 *
46 * In probabilistic computations, the dynamic of the probability values can be very
47 * wide because they come from gaussian functions.
48 * To avoid underflow and overflow issues, the values are represented by their log.
49 * In this representation, multiplying the original exp values is easy : their logs are added.
50 * But adding the original exp values is requiring some special handling and it is the
51 * goal of the LogSumExp function.
52 *
53 * If the values are x1...xn, the function is computing:
54 *
55 * ln(exp(x1) + ... + exp(xn)) and the computation is done in such a way that
56 * rounding issues are minimised.
57 *
58 * The max xm of the values is extracted and the function is computing:
59 * xm + ln(exp(x1 - xm) + ... + exp(xn - xm))
60 *
61 * @param[in] *in Pointer to an array of input values.
62 * @param[in] blockSize Number of samples in the input array.
63 * @return LogSumExp
64 *
65 */
66
67 #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
68
69 #include "arm_helium_utils.h"
70 #include "arm_vec_math_f16.h"
71
arm_logsumexp_f16(const float16_t * in,uint32_t blockSize)72 float16_t arm_logsumexp_f16(const float16_t *in, uint32_t blockSize)
73 {
74 float16_t maxVal;
75 const float16_t *pIn;
76 int32_t blkCnt;
77 _Float16 accum=0.0f16;
78 _Float16 tmp;
79
80
81 arm_max_no_idx_f16((float16_t *) in, blockSize, &maxVal);
82
83
84 blkCnt = blockSize;
85 pIn = in;
86
87
88 f16x8_t vSum = vdupq_n_f16(0.0f16);
89 blkCnt = blockSize >> 3;
90 while(blkCnt > 0)
91 {
92 f16x8_t vecIn = vld1q(pIn);
93 f16x8_t vecExp;
94
95 vecExp = vexpq_f16(vsubq_n_f16(vecIn, maxVal));
96
97 vSum = vaddq_f16(vSum, vecExp);
98
99 /*
100 * Decrement the blockSize loop counter
101 * Advance vector source and destination pointers
102 */
103 pIn += 8;
104 blkCnt --;
105 }
106
107 /* sum + log */
108 accum = vecAddAcrossF16Mve(vSum);
109
110 blkCnt = blockSize & 0x7;
111 while(blkCnt > 0)
112 {
113 tmp = *pIn++;
114 accum += (_Float16)expf((float32_t)((_Float16)tmp - (_Float16)maxVal));
115 blkCnt--;
116
117 }
118
119 accum = (_Float16)maxVal + (_Float16)logf((float32_t)accum);
120
121 return (accum);
122 }
123
124 #else
arm_logsumexp_f16(const float16_t * in,uint32_t blockSize)125 float16_t arm_logsumexp_f16(const float16_t *in, uint32_t blockSize)
126 {
127 _Float16 maxVal;
128 _Float16 tmp;
129 const float16_t *pIn;
130 uint32_t blkCnt;
131 _Float16 accum;
132
133 pIn = in;
134 blkCnt = blockSize;
135
136 maxVal = *pIn++;
137 blkCnt--;
138
139 while(blkCnt > 0)
140 {
141 tmp = *pIn++;
142
143 if (tmp > maxVal)
144 {
145 maxVal = tmp;
146 }
147 blkCnt--;
148
149 }
150
151 blkCnt = blockSize;
152 pIn = in;
153 accum = 0;
154 while(blkCnt > 0)
155 {
156 tmp = *pIn++;
157 accum += (_Float16)expf((float32_t)((_Float16)tmp - (_Float16)maxVal));
158 blkCnt--;
159
160 }
161 accum = (_Float16)maxVal + (_Float16)logf((float32_t)accum);
162
163 return(accum);
164 }
165 #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
166
167 /**
168 * @} end of LogSumExp group
169 */
170
171 #endif /* #if defined(ARM_FLOAT16_SUPPORTED) */
172
173