1 /* ----------------------------------------------------------------------
2  * Project:      CMSIS DSP Library
3  * Title:        arm_float_to_q15.c
4  * Description:  Converts the elements of the floating-point vector to Q15 vector
5  *
6  * $Date:        23 April 2021
7  * $Revision:    V1.9.0
8  *
9  * Target Processor: Cortex-M and Cortex-A cores
10  * -------------------------------------------------------------------- */
11 /*
12  * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13  *
14  * SPDX-License-Identifier: Apache-2.0
15  *
16  * Licensed under the Apache License, Version 2.0 (the License); you may
17  * not use this file except in compliance with the License.
18  * You may obtain a copy of the License at
19  *
20  * www.apache.org/licenses/LICENSE-2.0
21  *
22  * Unless required by applicable law or agreed to in writing, software
23  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25  * See the License for the specific language governing permissions and
26  * limitations under the License.
27  */
28 
29 #include "dsp/support_functions.h"
30 
31 /**
32   @ingroup groupSupport
33  */
34 
35 /**
36   @addtogroup float_to_x
37   @{
38  */
39 
40 /**
41   @brief         Converts the elements of the floating-point vector to Q15 vector.
42   @param[in]     pSrc       points to the floating-point input vector
43   @param[out]    pDst       points to the Q15 output vector
44   @param[in]     blockSize  number of samples in each vector
45 
46   @par           Details
47                    The equation used for the conversion process is:
48   <pre>
49       pDst[n] = (q15_t)(pSrc[n] * 32768);   0 <= n < blockSize.
50   </pre>
51 
52   @par           Scaling and Overflow Behavior
53                    The function uses saturating arithmetic.
54                    Results outside of the allowable Q15 range [0x8000 0x7FFF] are saturated.
55 
56   @note
57                    In order to apply rounding, the library should be rebuilt with the ROUNDING macro
58                    defined in the preprocessor section of project options.
59  */
60 
61 #if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
arm_float_to_q15(const float32_t * pSrc,q15_t * pDst,uint32_t blockSize)62 void arm_float_to_q15(
63   const float32_t * pSrc,
64   q15_t * pDst,
65   uint32_t blockSize)
66 {
67     uint32_t         blkCnt;
68     float32_t       maxQ = (float32_t) Q15_MAX;
69     f32x4x2_t       tmp;
70     q15x8_t         vecDst = { 0 };
71 #ifdef ARM_MATH_ROUNDING
72     float32_t in;
73 #endif
74 
75 
76     blkCnt = blockSize >> 3;
77     while (blkCnt > 0U)
78     {
79         /* C = A * 32768 */
80         /* convert from float to q15 and then store the results in the destination buffer */
81         tmp = vld2q(pSrc);
82 
83         tmp.val[0] = vmulq(tmp.val[0], maxQ);
84         tmp.val[1] = vmulq(tmp.val[1], maxQ);
85 
86         vecDst = vqmovnbq(vecDst, vcvtaq_s32_f32(tmp.val[0]));
87         vecDst = vqmovntq(vecDst, vcvtaq_s32_f32(tmp.val[1]));
88         vst1q(pDst, vecDst);
89         /*
90          * Decrement the blockSize loop counter
91          */
92         blkCnt--;
93         pDst += 8;
94         pSrc += 8;
95     }
96 
97     blkCnt = blockSize & 7;
98     while (blkCnt > 0U)
99     {
100       /* C = A * 32768 */
101 
102       /* convert from float to Q15 and store result in destination buffer */
103 #ifdef ARM_MATH_ROUNDING
104 
105       in = (*pSrc++ * 32768.0f);
106       in += in > 0.0f ? 0.5f : -0.5f;
107       *pDst++ = (q15_t) (__SSAT((q31_t) (in), 16));
108 
109 #else
110 
111       /* C = A * 32768 */
112       /* Convert from float to q15 and then store the results in the destination buffer */
113       *pDst++ = (q15_t) __SSAT((q31_t) (*pSrc++ * 32768.0f), 16);
114 
115 #endif /* #ifdef ARM_MATH_ROUNDING */
116 
117       /* Decrement loop counter */
118       blkCnt--;
119     }
120 }
121 
122 #else
123 #if defined(ARM_MATH_NEON_EXPERIMENTAL)
arm_float_to_q15(const float32_t * pSrc,q15_t * pDst,uint32_t blockSize)124 void arm_float_to_q15(
125   const float32_t * pSrc,
126   q15_t * pDst,
127   uint32_t blockSize)
128 {
129   const float32_t *pIn = pSrc;                         /* Src pointer */
130   uint32_t blkCnt;                               /* loop counter */
131 
132   float32x4_t inV;
133   #ifdef ARM_MATH_ROUNDING
134   float32x4_t zeroV = vdupq_n_f32(0.0f);
135   float32x4_t pHalf = vdupq_n_f32(0.5f / 32768.0f);
136   float32x4_t mHalf = vdupq_n_f32(-0.5f / 32768.0f);
137   float32x4_t r;
138   uint32x4_t cmp;
139   float32_t in;
140   #endif
141 
142   int32x4_t cvt;
143   int16x4_t outV;
144 
145   blkCnt = blockSize >> 2U;
146 
147   /* Compute 4 outputs at a time.
148    ** a second loop below computes the remaining 1 to 3 samples. */
149   while (blkCnt > 0U)
150   {
151 
152 #ifdef ARM_MATH_ROUNDING
153     /* C = A * 32768 */
154     /* Convert from float to q15 and then store the results in the destination buffer */
155     inV = vld1q_f32(pIn);
156     cmp = vcgtq_f32(inV,zeroV);
157     r = vbslq_f32(cmp,pHalf,mHalf);
158     inV = vaddq_f32(inV, r);
159 
160     pIn += 4;
161 
162     cvt = vcvtq_n_s32_f32(inV,15);
163     outV = vqmovn_s32(cvt);
164 
165     vst1_s16(pDst, outV);
166     pDst += 4;
167 
168 #else
169 
170     /* C = A * 32768 */
171     /* Convert from float to q15 and then store the results in the destination buffer */
172     inV = vld1q_f32(pIn);
173 
174     cvt = vcvtq_n_s32_f32(inV,15);
175     outV = vqmovn_s32(cvt);
176 
177     vst1_s16(pDst, outV);
178     pDst += 4;
179     pIn += 4;
180 
181 #endif /*      #ifdef ARM_MATH_ROUNDING        */
182 
183     /* Decrement the loop counter */
184     blkCnt--;
185   }
186 
187   /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
188    ** No loop unrolling is used. */
189   blkCnt = blockSize & 3;
190 
191   while (blkCnt > 0U)
192   {
193 
194 #ifdef ARM_MATH_ROUNDING
195     /* C = A * 32768 */
196     /* Convert from float to q15 and then store the results in the destination buffer */
197     in = *pIn++;
198     in = (in * 32768.0f);
199     in += in > 0.0f ? 0.5f : -0.5f;
200     *pDst++ = (q15_t) (__SSAT((q31_t) (in), 16));
201 
202 #else
203 
204     /* C = A * 32768 */
205     /* Convert from float to q15 and then store the results in the destination buffer */
206     *pDst++ = (q15_t) __SSAT((q31_t) (*pIn++ * 32768.0f), 16);
207 
208 #endif /*      #ifdef ARM_MATH_ROUNDING        */
209 
210     /* Decrement the loop counter */
211     blkCnt--;
212   }
213 }
214 #else
arm_float_to_q15(const float32_t * pSrc,q15_t * pDst,uint32_t blockSize)215 void arm_float_to_q15(
216   const float32_t * pSrc,
217         q15_t * pDst,
218         uint32_t blockSize)
219 {
220         uint32_t blkCnt;                               /* Loop counter */
221   const float32_t *pIn = pSrc;                         /* Source pointer */
222 
223 #ifdef ARM_MATH_ROUNDING
224         float32_t in;
225 #endif /* #ifdef ARM_MATH_ROUNDING */
226 
227 #if defined (ARM_MATH_LOOPUNROLL)
228 
229   /* Loop unrolling: Compute 4 outputs at a time */
230   blkCnt = blockSize >> 2U;
231 
232   while (blkCnt > 0U)
233   {
234     /* C = A * 32768 */
235 
236     /* convert from float to Q15 and store result in destination buffer */
237 #ifdef ARM_MATH_ROUNDING
238 
239     in = (*pIn++ * 32768.0f);
240     in += in > 0.0f ? 0.5f : -0.5f;
241     *pDst++ = (q15_t) (__SSAT((q31_t) (in), 16));
242 
243     in = (*pIn++ * 32768.0f);
244     in += in > 0.0f ? 0.5f : -0.5f;
245     *pDst++ = (q15_t) (__SSAT((q31_t) (in), 16));
246 
247     in = (*pIn++ * 32768.0f);
248     in += in > 0.0f ? 0.5f : -0.5f;
249     *pDst++ = (q15_t) (__SSAT((q31_t) (in), 16));
250 
251     in = (*pIn++ * 32768.0f);
252     in += in > 0.0f ? 0.5f : -0.5f;
253     *pDst++ = (q15_t) (__SSAT((q31_t) (in), 16));
254 
255 #else
256 
257     *pDst++ = (q15_t) __SSAT((q31_t) (*pIn++ * 32768.0f), 16);
258     *pDst++ = (q15_t) __SSAT((q31_t) (*pIn++ * 32768.0f), 16);
259     *pDst++ = (q15_t) __SSAT((q31_t) (*pIn++ * 32768.0f), 16);
260     *pDst++ = (q15_t) __SSAT((q31_t) (*pIn++ * 32768.0f), 16);
261 
262 #endif /* #ifdef ARM_MATH_ROUNDING */
263 
264     /* Decrement loop counter */
265     blkCnt--;
266   }
267 
268   /* Loop unrolling: Compute remaining outputs */
269   blkCnt = blockSize % 0x4U;
270 
271 #else
272 
273   /* Initialize blkCnt with number of samples */
274   blkCnt = blockSize;
275 
276 #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
277 
278   while (blkCnt > 0U)
279   {
280     /* C = A * 32768 */
281 
282     /* convert from float to Q15 and store result in destination buffer */
283 #ifdef ARM_MATH_ROUNDING
284 
285     in = (*pIn++ * 32768.0f);
286     in += in > 0.0f ? 0.5f : -0.5f;
287     *pDst++ = (q15_t) (__SSAT((q31_t) (in), 16));
288 
289 #else
290 
291     /* C = A * 32768 */
292     /* Convert from float to q15 and then store the results in the destination buffer */
293     *pDst++ = (q15_t) __SSAT((q31_t) (*pIn++ * 32768.0f), 16);
294 
295 #endif /* #ifdef ARM_MATH_ROUNDING */
296 
297     /* Decrement loop counter */
298     blkCnt--;
299   }
300 
301 }
302 #endif /* #if defined(ARM_MATH_NEON) */
303 #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
304 
305 /**
306   @} end of float_to_x group
307  */
308