1 /* ----------------------------------------------------------------------
2  * Project:      CMSIS DSP Library
3  * Title:        arm_vlog_q31
4  * Description:  Q31 vector log
5  *
6  * $Date:        19 July 2021
7  * $Revision:    V1.10.0
8  *
9  * Target Processor: Cortex-M and Cortex-A cores
10  * -------------------------------------------------------------------- */
11 /*
12  * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13  *
14  * SPDX-License-Identifier: Apache-2.0
15  *
16  * Licensed under the Apache License, Version 2.0 (the License); you may
17  * not use this file except in compliance with the License.
18  * You may obtain a copy of the License at
19  *
20  * www.apache.org/licenses/LICENSE-2.0
21  *
22  * Unless required by applicable law or agreed to in writing, software
23  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25  * See the License for the specific language governing permissions and
26  * limitations under the License.
27  */
28 
29 #include "dsp/fast_math_functions.h"
30 
31 #define LOG_Q31_ACCURACY 31
32 
33 /* Bit to represent the normalization factor
34    It is Ceiling[Log2[LOG_Q31_ACCURACY]] of the previous value.
35    The Log2 algorithm is assuming that the value x is
36    1 <= x < 2.
37 
38    But input value could be as small a 2^-LOG_Q31_ACCURACY
39    which would give an integer part of -31.
40 */
41 #define LOG_Q31_INTEGER_PART 5
42 
43 /* 2.0 in Q30 */
44 #define LOQ_Q31_THRESHOLD (1u << LOG_Q31_ACCURACY)
45 
46 /* HALF */
47 #define LOQ_Q31_Q32_HALF LOQ_Q31_THRESHOLD
48 #define LOQ_Q31_Q30_HALF (LOQ_Q31_Q32_HALF >> 2)
49 
50 
51 /* 1.0 / Log2[Exp[1]] in Q31 */
52 #define LOG_Q31_INVLOG2EXP 0x58b90bfbuL
53 
54 /* Clay Turner algorithm */
arm_scalar_log_q31(uint32_t src)55 static uint32_t arm_scalar_log_q31(uint32_t src)
56 {
57    int32_t i;
58 
59    int32_t c = __CLZ(src);
60    int32_t normalization=0;
61 
62    /* 0.5 in q26 */
63    uint32_t inc = LOQ_Q31_Q32_HALF >> (LOG_Q31_INTEGER_PART + 1);
64 
65    /* Will compute y = log2(x) for 1 <= x < 2.0 */
66    uint32_t x;
67 
68    /* q26 */
69    uint32_t y=0;
70 
71    /* q26 */
72    int32_t tmp;
73 
74 
75    /* Normalize and convert to q30 format */
76    x = src;
77    if ((c-1) < 0)
78    {
79      x = x >> (1-c);
80    }
81    else
82    {
83      x = x << (c-1);
84    }
85    normalization = c;
86 
87    /* Compute the Log2. Result is in q26
88       because we know 0 <= y < 1.0 but
89       do not want to use q32 to allow
90       following computation with less instructions.
91    */
92    for(i = 0; i < LOG_Q31_ACCURACY ; i++)
93    {
94       x = ((int64_t)x*x)  >> (LOG_Q31_ACCURACY - 1);
95 
96       if (x >= LOQ_Q31_THRESHOLD)
97       {
98          y += inc ;
99          x = x >> 1;
100       }
101       inc = inc >> 1;
102    }
103 
104    /*
105       Convert the Log2 to Log and apply normalization.
106       We compute (y - normalisation) * (1 / Log2[e]).
107 
108    */
109 
110    /* q26 */
111    tmp = (int32_t)y - (normalization << (LOG_Q31_ACCURACY - LOG_Q31_INTEGER_PART));
112 
113 
114    /* q5.26 */
115    y = ((int64_t)tmp * LOG_Q31_INVLOG2EXP) >> 31;
116 
117 
118 
119    return(y);
120 
121 }
122 
123 #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
124 
125 
vlogq_q31(q31x4_t src)126 q31x4_t vlogq_q31(q31x4_t src)
127 {
128 
129    int32_t i;
130 
131    int32x4_t c = vclzq_s32(src);
132    int32x4_t normalization = c;
133 
134 
135    /* 0.5 in q11 */
136    uint32_t inc  = LOQ_Q31_Q32_HALF >> (LOG_Q31_INTEGER_PART + 1);
137 
138    /* Will compute y = log2(x) for 1 <= x < 2.0 */
139    uint32x4_t x;
140 
141 
142    /* q11 */
143    uint32x4_t y = vdupq_n_u32(0);
144 
145 
146    /* q11 */
147    int32x4_t vtmp;
148 
149 
150    mve_pred16_t p;
151 
152    /* Normalize and convert to q14 format */
153 
154 
155    vtmp = vsubq_n_s32(c,1);
156    x = vshlq_u32((uint32x4_t)src,vtmp);
157 
158 
159     /* Compute the Log2. Result is in Q26
160       because we know 0 <= y < 1.0 but
161       do not want to use Q32 to allow
162       following computation with less instructions.
163    */
164    for(i = 0; i < LOG_Q31_ACCURACY ; i++)
165    {
166       x = vmulhq_u32(x,x);
167       x = vshlq_n_u32(x,2);
168 
169 
170       p = vcmphiq_u32(x,vdupq_n_u32(LOQ_Q31_THRESHOLD));
171       y = vaddq_m_n_u32(y, y,inc,p);
172       x = vshrq_m_n_u32(x,x,1,p);
173 
174       inc = inc >> 1;
175    }
176 
177 
178    /*
179       Convert the Log2 to Log and apply normalization.
180       We compute (y - normalisation) * (1 / Log2[e]).
181 
182    */
183 
184    /* q11 */
185    // tmp = (int16_t)y - (normalization << (LOG_Q15_ACCURACY - LOG_Q15_INTEGER_PART));
186    vtmp = vshlq_n_s32(normalization,LOG_Q31_ACCURACY - LOG_Q31_INTEGER_PART);
187    vtmp = vsubq_s32((int32x4_t)y,vtmp);
188 
189 
190 
191    /* q4.11 */
192    // y = ((int32_t)tmp * LOG_Q15_INVLOG2EXP) >> 15;
193    vtmp = vqdmulhq_n_s32(vtmp,LOG_Q31_INVLOG2EXP);
194 
195    return(vtmp);
196 }
197 #endif
198 
199 /**
200   @ingroup groupFastMath
201  */
202 
203 /**
204   @addtogroup vlog
205   @{
206  */
207 
208 /**
209   @brief         q31 vector of log values.
210   @param[in]     pSrc       points to the input vector in q31
211   @param[out]    pDst       points to the output vector q5.26
212   @param[in]     blockSize  number of samples in each vector
213  */
arm_vlog_q31(const q31_t * pSrc,q31_t * pDst,uint32_t blockSize)214 void arm_vlog_q31(
215   const q31_t * pSrc,
216         q31_t * pDst,
217         uint32_t blockSize)
218 {
219   uint32_t  blkCnt;           /* loop counters */
220 
221   #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
222 
223   q31x4_t src;
224   q31x4_t dst;
225 
226   blkCnt = blockSize >> 2;
227 
228   while (blkCnt > 0U)
229   {
230       src = vld1q(pSrc);
231       dst = vlogq_q31(src);
232       vst1q(pDst, dst);
233 
234       pSrc += 4;
235       pDst += 4;
236       /* Decrement loop counter */
237       blkCnt--;
238   }
239 
240   blkCnt = blockSize & 3;
241   #else
242   blkCnt = blockSize;
243   #endif
244 
245   while (blkCnt > 0U)
246   {
247      *pDst++=arm_scalar_log_q31(*pSrc++);
248 
249      blkCnt--;
250   }
251 
252 }
253 
254 /**
255   @} end of vlog group
256  */
257