1 /* ----------------------------------------------------------------------
2  * Project:      CMSIS DSP Library
3  * Title:        arm_float_to_q7.c
4  * Description:  Converts the elements of the floating-point vector to Q7 vector
5  *
6  * $Date:        23 April 2021
7  * $Revision:    V1.9.0
8  *
9  * Target Processor: Cortex-M and Cortex-A cores
10  * -------------------------------------------------------------------- */
11 /*
12  * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13  *
14  * SPDX-License-Identifier: Apache-2.0
15  *
16  * Licensed under the Apache License, Version 2.0 (the License); you may
17  * not use this file except in compliance with the License.
18  * You may obtain a copy of the License at
19  *
20  * www.apache.org/licenses/LICENSE-2.0
21  *
22  * Unless required by applicable law or agreed to in writing, software
23  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25  * See the License for the specific language governing permissions and
26  * limitations under the License.
27  */
28 
29 #include "dsp/support_functions.h"
30 
31 /**
32   @ingroup groupSupport
33  */
34 
35 /**
36   @addtogroup float_to_x
37   @{
38  */
39 
40 /**
41  * @brief Converts the elements of the floating-point vector to Q7 vector.
42  * @param[in]       *pSrc points to the floating-point input vector
43  * @param[out]      *pDst points to the Q7 output vector
44  * @param[in]       blockSize length of the input vector
45  *
46  *\par Description:
47  * \par
48  * The equation used for the conversion process is:
49  * <pre>
50  * 	pDst[n] = (q7_t)(pSrc[n] * 128);   0 <= n < blockSize.
51  * </pre>
52  * \par Scaling and Overflow Behavior:
53  * \par
54  * The function uses saturating arithmetic.
55  * Results outside of the allowable Q7 range [0x80 0x7F] will be saturated.
56  * \note
57  * In order to apply rounding, the library should be rebuilt with the ROUNDING macro
58  * defined in the preprocessor section of project options.
59  */
60 #if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
arm_float_to_q7(const float32_t * pSrc,q7_t * pDst,uint32_t blockSize)61 ARM_DSP_ATTRIBUTE void arm_float_to_q7(
62   const float32_t * pSrc,
63   q7_t * pDst,
64   uint32_t blockSize)
65 {
66     uint32_t         blkCnt;     /* loop counters */
67     float32_t       maxQ = powf(2.0, 7);
68     f32x4x4_t       tmp;
69     q15x8_t         evVec = { 0 }, oddVec = { 0 };
70     q7x16_t         vecDst = { 0 };
71     float32_t const *pSrcVec;
72 #ifdef ARM_MATH_ROUNDING
73     float32_t in;
74 #endif
75 
76     pSrcVec = (float32_t const *) pSrc;
77     blkCnt = blockSize >> 4;
78     while (blkCnt > 0U) {
79         tmp = vld4q(pSrcVec);
80         pSrcVec += 16;
81         /*
82          * C = A * 128.0
83          * convert from float to q7 and then store the results in the destination buffer
84          */
85         tmp.val[0] = vmulq(tmp.val[0], maxQ);
86         tmp.val[1] = vmulq(tmp.val[1], maxQ);
87         tmp.val[2] = vmulq(tmp.val[2], maxQ);
88         tmp.val[3] = vmulq(tmp.val[3], maxQ);
89 
90         /*
91          * convert and pack evens
92          */
93         evVec = vqmovnbq(evVec, vcvtaq_s32_f32(tmp.val[0]));
94         evVec = vqmovntq(evVec, vcvtaq_s32_f32(tmp.val[2]));
95         /*
96          * convert and pack odds
97          */
98         oddVec = vqmovnbq(oddVec, vcvtaq_s32_f32(tmp.val[1]));
99         oddVec = vqmovntq(oddVec, vcvtaq_s32_f32(tmp.val[3]));
100         /*
101          * merge
102          */
103         vecDst = vqmovnbq(vecDst, evVec);
104         vecDst = vqmovntq(vecDst, oddVec);
105 
106         vst1q(pDst, vecDst);
107         pDst += 16;
108         /*
109          * Decrement the blockSize loop counter
110          */
111         blkCnt--;
112     }
113 
114   blkCnt = blockSize & 0xF;
115   while (blkCnt > 0U)
116   {
117     /* C = A * 128 */
118 
119     /* Convert from float to q7 and store result in destination buffer */
120 #ifdef ARM_MATH_ROUNDING
121 
122     in = (*pSrcVec++ * 128);
123     in += in > 0.0f ? 0.5f : -0.5f;
124     *pDst++ = (q7_t) (__SSAT((q15_t) (in), 8));
125 
126 #else
127 
128     *pDst++ = (q7_t) __SSAT((q31_t) (*pSrcVec++ * 128.0f), 8);
129 
130 #endif /* #ifdef ARM_MATH_ROUNDING */
131 
132     /* Decrement loop counter */
133     blkCnt--;
134   }
135 
136 }
137 #else
138 #if defined(ARM_MATH_NEON)
arm_float_to_q7(const float32_t * pSrc,q7_t * pDst,uint32_t blockSize)139 ARM_DSP_ATTRIBUTE void arm_float_to_q7(
140   const float32_t * pSrc,
141   q7_t * pDst,
142   uint32_t blockSize)
143 {
144   const float32_t *pIn = pSrc;                         /* Src pointer */
145   uint32_t blkCnt;                               /* loop counter */
146 
147   float32x4_t inV;
148   #ifdef ARM_MATH_ROUNDING
149   float32_t in;
150   float32x4_t zeroV = vdupq_n_f32(0.0f);
151   float32x4_t pHalf = vdupq_n_f32(0.5f / 128.0f);
152   float32x4_t mHalf = vdupq_n_f32(-0.5f / 128.0f);
153   float32x4_t r;
154   uint32x4_t cmp;
155   #endif
156 
157   int16x4_t cvt1,cvt2;
158   int8x8_t outV;
159 
160   blkCnt = blockSize >> 3U;
161 
162   /* Compute 8 outputs at a time.
163    ** a second loop below computes the remaining 1 to 7 samples. */
164   while (blkCnt > 0U)
165   {
166 
167 #ifdef ARM_MATH_ROUNDING
168     /* C = A * 128 */
169     /* Convert from float to q7 and then store the results in the destination buffer */
170     inV = vld1q_f32(pIn);
171     cmp = vcgtq_f32(inV,zeroV);
172     r = vbslq_f32(cmp,pHalf,mHalf);
173     inV = vaddq_f32(inV, r);
174     cvt1 = vqmovn_s32(vcvtq_n_s32_f32(inV,7));
175     pIn += 4;
176 
177     inV = vld1q_f32(pIn);
178     cmp = vcgtq_f32(inV,zeroV);
179     r = vbslq_f32(cmp,pHalf,mHalf);
180     inV = vaddq_f32(inV, r);
181     cvt2 = vqmovn_s32(vcvtq_n_s32_f32(inV,7));
182     pIn += 4;
183 
184     outV = vqmovn_s16(vcombine_s16(cvt1,cvt2));
185     vst1_s8(pDst, outV);
186     pDst += 8;
187 
188 #else
189 
190     /* C = A * 128 */
191     /* Convert from float to q7 and then store the results in the destination buffer */
192     inV = vld1q_f32(pIn);
193     cvt1 = vqmovn_s32(vcvtq_n_s32_f32(inV,7));
194     pIn += 4;
195 
196     inV = vld1q_f32(pIn);
197     cvt2 = vqmovn_s32(vcvtq_n_s32_f32(inV,7));
198     pIn += 4;
199 
200     outV = vqmovn_s16(vcombine_s16(cvt1,cvt2));
201 
202     vst1_s8(pDst, outV);
203     pDst += 8;
204 #endif /*      #ifdef ARM_MATH_ROUNDING        */
205 
206     /* Decrement the loop counter */
207     blkCnt--;
208   }
209 
210   /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
211    ** No loop unrolling is used. */
212   blkCnt = blockSize & 7;
213 
214   while (blkCnt > 0U)
215   {
216 
217 #ifdef ARM_MATH_ROUNDING
218     /* C = A * 128 */
219     /* Convert from float to q7 and then store the results in the destination buffer */
220     in = *pIn++;
221     in = (in * 128);
222     in += in > 0.0f ? 0.5f : -0.5f;
223     *pDst++ = (q7_t) (__SSAT((q15_t) (in), 8));
224 
225 #else
226 
227     /* C = A * 128 */
228     /* Convert from float to q7 and then store the results in the destination buffer */
229     *pDst++ = __SSAT((q31_t) (*pIn++ * 128.0f), 8);
230 
231 #endif /*      #ifdef ARM_MATH_ROUNDING        */
232 
233     /* Decrement the loop counter */
234     blkCnt--;
235   }
236 
237 }
238 #else
arm_float_to_q7(const float32_t * pSrc,q7_t * pDst,uint32_t blockSize)239 ARM_DSP_ATTRIBUTE void arm_float_to_q7(
240   const float32_t * pSrc,
241         q7_t * pDst,
242         uint32_t blockSize)
243 {
244         uint32_t blkCnt;                               /* Loop counter */
245   const float32_t *pIn = pSrc;                         /* Source pointer */
246 
247 #ifdef ARM_MATH_ROUNDING
248         float32_t in;
249 #endif /* #ifdef ARM_MATH_ROUNDING */
250 
251 #if defined (ARM_MATH_LOOPUNROLL)
252 
253   /* Loop unrolling: Compute 4 outputs at a time */
254   blkCnt = blockSize >> 2U;
255 
256   while (blkCnt > 0U)
257   {
258     /* C = A * 128 */
259 
260     /* Convert from float to q7 and store result in destination buffer */
261 #ifdef ARM_MATH_ROUNDING
262 
263     in = (*pIn++ * 128);
264     in += in > 0.0f ? 0.5f : -0.5f;
265     *pDst++ = (q7_t) (__SSAT((q15_t) (in), 8));
266 
267     in = (*pIn++ * 128);
268     in += in > 0.0f ? 0.5f : -0.5f;
269     *pDst++ = (q7_t) (__SSAT((q15_t) (in), 8));
270 
271     in = (*pIn++ * 128);
272     in += in > 0.0f ? 0.5f : -0.5f;
273     *pDst++ = (q7_t) (__SSAT((q15_t) (in), 8));
274 
275     in = (*pIn++ * 128);
276     in += in > 0.0f ? 0.5f : -0.5f;
277     *pDst++ = (q7_t) (__SSAT((q15_t) (in), 8));
278 
279 #else
280 
281     *pDst++ = __SSAT((q31_t) (*pIn++ * 128.0f), 8);
282     *pDst++ = __SSAT((q31_t) (*pIn++ * 128.0f), 8);
283     *pDst++ = __SSAT((q31_t) (*pIn++ * 128.0f), 8);
284     *pDst++ = __SSAT((q31_t) (*pIn++ * 128.0f), 8);
285 
286 #endif /* #ifdef ARM_MATH_ROUNDING */
287 
288     /* Decrement loop counter */
289     blkCnt--;
290   }
291 
292   /* Loop unrolling: Compute remaining outputs */
293   blkCnt = blockSize % 0x4U;
294 
295 #else
296 
297   /* Initialize blkCnt with number of samples */
298   blkCnt = blockSize;
299 
300 #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
301 
302   while (blkCnt > 0U)
303   {
304     /* C = A * 128 */
305 
306     /* Convert from float to q7 and store result in destination buffer */
307 #ifdef ARM_MATH_ROUNDING
308 
309     in = (*pIn++ * 128);
310     in += in > 0.0f ? 0.5f : -0.5f;
311     *pDst++ = (q7_t) (__SSAT((q15_t) (in), 8));
312 
313 #else
314 
315     *pDst++ = (q7_t) __SSAT((q31_t) (*pIn++ * 128.0f), 8);
316 
317 #endif /* #ifdef ARM_MATH_ROUNDING */
318 
319     /* Decrement loop counter */
320     blkCnt--;
321   }
322 
323 }
324 #endif /* #if defined(ARM_MATH_NEON) */
325 #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
326 
327 /**
328   @} end of float_to_x group
329  */
330