1 /* ----------------------------------------------------------------------
2  * Project:      CMSIS DSP Library
3  * Title:        arm_naive_gaussian_bayes_predict_f16
4  * Description:  Naive Gaussian Bayesian Estimator
5  *
6  * $Date:        23 April 2021
7  * $Revision:    V1.9.0
8  *
9  * Target Processor: Cortex-M and Cortex-A cores
10  * -------------------------------------------------------------------- */
11 /*
12  * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13  *
14  * SPDX-License-Identifier: Apache-2.0
15  *
16  * Licensed under the Apache License, Version 2.0 (the License); you may
17  * not use this file except in compliance with the License.
18  * You may obtain a copy of the License at
19  *
20  * www.apache.org/licenses/LICENSE-2.0
21  *
22  * Unless required by applicable law or agreed to in writing, software
23  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25  * See the License for the specific language governing permissions and
26  * limitations under the License.
27  */
28 
29 #include "dsp/bayes_functions_f16.h"
30 
31 #if defined(ARM_FLOAT16_SUPPORTED)
32 
33 #include <limits.h>
34 #include <math.h>
35 
36 
37 /**
38  * @addtogroup groupBayes
39  * @{
40  */
41 
42 /**
43  * @brief Naive Gaussian Bayesian Estimator
44  *
45  * @param[in]  *S                       points to a naive bayes instance structure
46  * @param[in]  *in                      points to the elements of the input vector.
47  * @param[out] *pOutputProbabilities    points to a buffer of length numberOfClasses containing estimated probabilities
48  * @param[out] *pBufferB                points to a temporary buffer of length numberOfClasses
49  * @return The predicted class
50  *
51  *
52  */
53 
54 #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
55 
56 #include "arm_helium_utils.h"
57 #include "arm_vec_math_f16.h"
58 
arm_gaussian_naive_bayes_predict_f16(const arm_gaussian_naive_bayes_instance_f16 * S,const float16_t * in,float16_t * pOutputProbabilities,float16_t * pBufferB)59 uint32_t arm_gaussian_naive_bayes_predict_f16(const arm_gaussian_naive_bayes_instance_f16 *S,
60    const float16_t * in,
61    float16_t *pOutputProbabilities,
62    float16_t *pBufferB
63    )
64 {
65     uint32_t         nbClass;
66     const float16_t *pTheta = S->theta;
67     const float16_t *pSigma = S->sigma;
68     float16_t      *buffer = pOutputProbabilities;
69     const float16_t *pIn = in;
70     float16_t       result;
71     f16x8_t         vsigma;
72     _Float16       tmp;
73     f16x8_t         vacc1, vacc2;
74     uint32_t        index;
75     float16_t       *logclassPriors=pBufferB;
76     float16_t      *pLogPrior = logclassPriors;
77 
78     arm_vlog_f16((float16_t *) S->classPriors, logclassPriors, S->numberOfClasses);
79 
80     pTheta = S->theta;
81     pSigma = S->sigma;
82 
83     for (nbClass = 0; nbClass < S->numberOfClasses; nbClass++) {
84         pIn = in;
85 
86         vacc1 = vdupq_n_f16(0.0f16);
87         vacc2 = vdupq_n_f16(0.0f16);
88 
89         uint32_t         blkCnt =S->vectorDimension >> 3;
90         while (blkCnt > 0U) {
91             f16x8_t         vinvSigma, vtmp;
92 
93             vsigma = vaddq_n_f16(vld1q(pSigma), S->epsilon);
94             vacc1 = vaddq(vacc1, vlogq_f16(vmulq_n_f16(vsigma, 2.0f16 * (_Float16)PI)));
95 
96             vinvSigma = vrecip_medprec_f16(vsigma);
97 
98             vtmp = vsubq(vld1q(pIn), vld1q(pTheta));
99             /* squaring */
100             vtmp = vmulq(vtmp, vtmp);
101 
102             vacc2 = vfmaq(vacc2, vtmp, vinvSigma);
103 
104             pIn += 8;
105             pTheta += 8;
106             pSigma += 8;
107             blkCnt--;
108         }
109 
110         blkCnt = S->vectorDimension & 7;
111         if (blkCnt > 0U) {
112             mve_pred16_t    p0 = vctp16q(blkCnt);
113             f16x8_t         vinvSigma, vtmp;
114 
115             vsigma = vaddq_n_f16(vld1q(pSigma), S->epsilon);
116             vacc1 =
117                 vaddq_m_f16(vacc1, vacc1, vlogq_f16(vmulq_n_f16(vsigma, 2.0f16 * (_Float16)PI)), p0);
118 
119             vinvSigma = vrecip_medprec_f16(vsigma);
120 
121             vtmp = vsubq(vld1q(pIn), vld1q(pTheta));
122             /* squaring */
123             vtmp = vmulq(vtmp, vtmp);
124 
125             vacc2 = vfmaq_m_f16(vacc2, vtmp, vinvSigma, p0);
126 
127             pTheta += blkCnt;
128             pSigma += blkCnt;
129         }
130 
131         tmp = -0.5f16 * (_Float16)vecAddAcrossF16Mve(vacc1);
132         tmp -= 0.5f16 * (_Float16)vecAddAcrossF16Mve(vacc2);
133 
134         *buffer = (_Float16)tmp + (_Float16)*pLogPrior++;
135         buffer++;
136     }
137 
138     arm_max_f16(pOutputProbabilities, S->numberOfClasses, &result, &index);
139 
140     return (index);
141 }
142 
143 #else
144 
arm_gaussian_naive_bayes_predict_f16(const arm_gaussian_naive_bayes_instance_f16 * S,const float16_t * in,float16_t * pOutputProbabilities,float16_t * pBufferB)145 uint32_t arm_gaussian_naive_bayes_predict_f16(const arm_gaussian_naive_bayes_instance_f16 *S,
146    const float16_t * in,
147    float16_t *pOutputProbabilities,
148    float16_t *pBufferB)
149 {
150     uint32_t nbClass;
151     uint32_t nbDim;
152     const float16_t *pPrior = S->classPriors;
153     const float16_t *pTheta = S->theta;
154     const float16_t *pSigma = S->sigma;
155     float16_t *buffer = pOutputProbabilities;
156     const float16_t *pIn=in;
157     float16_t result;
158     _Float16 sigma;
159     _Float16 tmp;
160     _Float16 acc1,acc2;
161     uint32_t index;
162     (void)pBufferB;
163 
164     pTheta=S->theta;
165     pSigma=S->sigma;
166 
167     for(nbClass = 0; nbClass < S->numberOfClasses; nbClass++)
168     {
169 
170 
171         pIn = in;
172 
173         tmp = 0.0f16;
174         acc1 = 0.0f16;
175         acc2 = 0.0f16;
176         for(nbDim = 0; nbDim < S->vectorDimension; nbDim++)
177         {
178            sigma = (_Float16)*pSigma + (_Float16)S->epsilon;
179            acc1 += (_Float16)logf(2.0f * PI * (float32_t)sigma);
180            acc2 += ((_Float16)*pIn - (_Float16)*pTheta) * ((_Float16)*pIn - (_Float16)*pTheta) / (_Float16)sigma;
181 
182            pIn++;
183            pTheta++;
184            pSigma++;
185         }
186 
187         tmp = -0.5f16 * (_Float16)acc1;
188         tmp -= 0.5f16 * (_Float16)acc2;
189 
190 
191         *buffer = (_Float16)tmp + (_Float16)logf((float32_t)*pPrior++);
192         buffer++;
193     }
194 
195     arm_max_f16(pOutputProbabilities,S->numberOfClasses,&result,&index);
196 
197     return(index);
198 }
199 
200 #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
201 
202 /**
203  * @} end of groupBayes group
204  */
205 
206 #endif /* #if defined(ARM_FLOAT16_SUPPORTED) */
207 
208