1 /* ----------------------------------------------------------------------
2 * Project: CMSIS DSP Library
3 * Title: arm_vlog_q15
4 * Description: Q15 vector log
5 *
6 * $Date: 19 July 2021
7 * $Revision: V1.10.0
8 *
9 * Target Processor: Cortex-M and Cortex-A cores
10 * -------------------------------------------------------------------- */
11 /*
12 * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13 *
14 * SPDX-License-Identifier: Apache-2.0
15 *
16 * Licensed under the Apache License, Version 2.0 (the License); you may
17 * not use this file except in compliance with the License.
18 * You may obtain a copy of the License at
19 *
20 * www.apache.org/licenses/LICENSE-2.0
21 *
22 * Unless required by applicable law or agreed to in writing, software
23 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25 * See the License for the specific language governing permissions and
26 * limitations under the License.
27 */
28
29
30 #include "dsp/fast_math_functions.h"
31
32
33 #define LOG_Q15_ACCURACY 15
34
35 /* Bit to represent the normalization factor
36 It is Ceiling[Log2[LOG_Q15_ACCURACY]] of the previous value.
37 The Log2 algorithm is assuming that the value x is
38 1 <= x < 2.
39
40 But input value could be as small a 2^-LOG_Q15_ACCURACY
41 which would give an integer part of -15.
42 */
43 #define LOG_Q15_INTEGER_PART 4
44
45 /* 2.0 in q14 */
46 #define LOQ_Q15_THRESHOLD (1u << LOG_Q15_ACCURACY)
47
48 /* HALF */
49 #define LOQ_Q15_Q16_HALF LOQ_Q15_THRESHOLD
50 #define LOQ_Q15_Q14_HALF (LOQ_Q15_Q16_HALF >> 2)
51
52
53 /* 1.0 / Log2[Exp[1]] in q15 */
54 #define LOG_Q15_INVLOG2EXP 0x58b9u
55
56
57 /* Clay Turner algorithm */
arm_scalar_log_q15(uint16_t src)58 static uint16_t arm_scalar_log_q15(uint16_t src)
59 {
60 int i;
61
62 int16_t c = __CLZ(src)-16;
63 int16_t normalization=0;
64
65 /* 0.5 in q11 */
66 uint16_t inc = LOQ_Q15_Q16_HALF >> (LOG_Q15_INTEGER_PART + 1);
67
68 /* Will compute y = log2(x) for 1 <= x < 2.0 */
69 uint16_t x;
70
71 /* q11 */
72 uint16_t y=0;
73
74 /* q11 */
75 int16_t tmp;
76
77
78 /* Normalize and convert to q14 format */
79 x = src;
80 if ((c-1) < 0)
81 {
82 x = x >> (1-c);
83 }
84 else
85 {
86 x = x << (c-1);
87 }
88 normalization = c;
89
90
91
92 /* Compute the Log2. Result is in q11 instead of q16
93 because we know 0 <= y < 1.0 but
94 we want a result allowing to do a
95 product on int16 rather than having to go
96 through int32
97 */
98 for(i = 0; i < LOG_Q15_ACCURACY ; i++)
99 {
100 x = (((int32_t)x*x)) >> (LOG_Q15_ACCURACY - 1);
101
102 if (x >= LOQ_Q15_THRESHOLD)
103 {
104 y += inc ;
105 x = x >> 1;
106 }
107 inc = inc >> 1;
108 }
109
110
111 /*
112 Convert the Log2 to Log and apply normalization.
113 We compute (y - normalisation) * (1 / Log2[e]).
114
115 */
116
117 /* q11 */
118 //tmp = y - ((int32_t)normalization << (LOG_Q15_ACCURACY + 1));
119 tmp = (int16_t)y - (normalization << (LOG_Q15_ACCURACY - LOG_Q15_INTEGER_PART));
120
121 /* q4.11 */
122 y = ((int32_t)tmp * LOG_Q15_INVLOG2EXP) >> 15;
123
124 return(y);
125
126 }
127
128 #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
129
130
vlogq_q15(q15x8_t src)131 static q15x8_t vlogq_q15(q15x8_t src)
132 {
133
134 int i;
135
136 int16x8_t c = vclzq_s16(src);
137 int16x8_t normalization = c;
138
139
140 /* 0.5 in q11 */
141 uint16_t inc = LOQ_Q15_Q16_HALF >> (LOG_Q15_INTEGER_PART + 1);
142
143 /* Will compute y = log2(x) for 1 <= x < 2.0 */
144 uint16x8_t x;
145
146
147 /* q11 */
148 uint16x8_t y = vdupq_n_u16(0);
149
150
151 /* q11 */
152 int16x8_t vtmp;
153
154
155 mve_pred16_t p;
156
157 /* Normalize and convert to q14 format */
158
159
160 vtmp = vsubq_n_s16(c,1);
161 x = vshlq_u16((uint16x8_t)src,vtmp);
162
163
164 /* Compute the Log2. Result is in q11 instead of q16
165 because we know 0 <= y < 1.0 but
166 we want a result allowing to do a
167 product on int16 rather than having to go
168 through int32
169 */
170 for(i = 0; i < LOG_Q15_ACCURACY ; i++)
171 {
172 x = vmulhq_u16(x,x);
173 x = vshlq_n_u16(x,2);
174
175
176 p = vcmphiq_u16(x,vdupq_n_u16(LOQ_Q15_THRESHOLD));
177 y = vaddq_m_n_u16(y, y,inc,p);
178 x = vshrq_m_n_u16(x,x,1,p);
179
180 inc = inc >> 1;
181 }
182
183
184 /*
185 Convert the Log2 to Log and apply normalization.
186 We compute (y - normalisation) * (1 / Log2[e]).
187
188 */
189
190 /* q11 */
191 // tmp = (int16_t)y - (normalization << (LOG_Q15_ACCURACY - LOG_Q15_INTEGER_PART));
192 vtmp = vshlq_n_s16(normalization,LOG_Q15_ACCURACY - LOG_Q15_INTEGER_PART);
193 vtmp = vsubq_s16((int16x8_t)y,vtmp);
194
195
196
197 /* q4.11 */
198 // y = ((int32_t)tmp * LOG_Q15_INVLOG2EXP) >> 15;
199 vtmp = vqdmulhq_n_s16(vtmp,LOG_Q15_INVLOG2EXP);
200
201 return(vtmp);
202 }
203 #endif
204
205 /**
206 @ingroup groupFastMath
207 */
208
209 /**
210 @addtogroup vlog
211 @{
212 */
213
214 /**
215 @brief q15 vector of log values.
216 @param[in] pSrc points to the input vector in q15
217 @param[out] pDst points to the output vector in q4.11
218 @param[in] blockSize number of samples in each vector
219 */
220
arm_vlog_q15(const q15_t * pSrc,q15_t * pDst,uint32_t blockSize)221 ARM_DSP_ATTRIBUTE void arm_vlog_q15(
222 const q15_t * pSrc,
223 q15_t * pDst,
224 uint32_t blockSize)
225 {
226 uint32_t blkCnt; /* loop counters */
227
228 #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
229 q15x8_t src;
230 q15x8_t dst;
231
232 blkCnt = blockSize >> 3;
233
234 while (blkCnt > 0U)
235 {
236 src = vld1q(pSrc);
237 dst = vlogq_q15(src);
238 vst1q(pDst, dst);
239
240 pSrc += 8;
241 pDst += 8;
242 /* Decrement loop counter */
243 blkCnt--;
244 }
245
246 blkCnt = blockSize & 7;
247 #else
248 blkCnt = blockSize;
249 #endif
250
251 while (blkCnt > 0U)
252 {
253 *pDst++ = arm_scalar_log_q15(*pSrc++);
254
255 /* Decrement loop counter */
256 blkCnt--;
257 }
258 }
259
260 /**
261 @} end of vlog group
262 */
263