1 /* ----------------------------------------------------------------------
2  * Project:      CMSIS DSP Library
3  * Title:        arm_float_to_q15.c
4  * Description:  Converts the elements of the floating-point vector to Q15 vector
5  *
6  * $Date:        23 April 2021
7  * $Revision:    V1.9.0
8  *
9  * Target Processor: Cortex-M and Cortex-A cores
10  * -------------------------------------------------------------------- */
11 /*
12  * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13  *
14  * SPDX-License-Identifier: Apache-2.0
15  *
16  * Licensed under the Apache License, Version 2.0 (the License); you may
17  * not use this file except in compliance with the License.
18  * You may obtain a copy of the License at
19  *
20  * www.apache.org/licenses/LICENSE-2.0
21  *
22  * Unless required by applicable law or agreed to in writing, software
23  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25  * See the License for the specific language governing permissions and
26  * limitations under the License.
27  */
28 
29 #include "dsp/support_functions_f16.h"
30 
31 #if defined(ARM_FLOAT16_SUPPORTED)
32 
33 
34 /**
35   @ingroup groupSupport
36  */
37 
38 /**
39   @addtogroup f16_to_x
40   @{
41  */
42 
43 /**
44   @brief         Converts the elements of the f16 vector to Q15 vector.
45   @param[in]     pSrc       points to the f16 input vector
46   @param[out]    pDst       points to the Q15 output vector
47   @param[in]     blockSize  number of samples in each vector
48 
49   @par           Details
50                    The equation used for the conversion process is:
51   <pre>
52       pDst[n] = (q15_t)(pSrc[n] * 32768);   0 <= n < blockSize.
53   </pre>
54 
55   @par           Scaling and Overflow Behavior
56                    The function uses saturating arithmetic.
57                    Results outside of the allowable Q15 range [0x8000 0x7FFF] are saturated.
58 
59   @note
60                    In order to apply rounding in scalar version, the library should be rebuilt with the ROUNDING macro
61                    defined in the preprocessor section of project options.
62  */
63 
64 #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
65 
arm_f16_to_q15(const float16_t * pSrc,q15_t * pDst,uint32_t blockSize)66 ARM_DSP_ATTRIBUTE void arm_f16_to_q15(
67   const float16_t * pSrc,
68   q15_t * pDst,
69   uint32_t blockSize)
70 {
71     float16_t       maxQ = (float16_t) Q15_MAX;
72     float16x8_t         vecDst;
73 
74 
75     do {
76         mve_pred16_t    p = vctp16q(blockSize);
77 
78         vecDst = vldrhq_z_f16((float16_t const *) pSrc, p);
79         /* C = A * 32767 */
80         /* convert from float to Q15 and then store the results in the destination buffer */
81         vecDst = vmulq_m(vuninitializedq_f16(), vecDst, maxQ, p);
82 
83         vstrhq_p_s16(pDst,
84             vcvtaq_m(vuninitializedq_s16(), vecDst, p), p);
85         /*
86          * Decrement the blockSize loop counter
87          * Advance vector source and destination pointers
88          */
89         pSrc += 8;
90         pDst += 8;
91         blockSize -= 8;
92     }
93     while ((int32_t) blockSize > 0);
94 }
95 
96 #else
97 
arm_f16_to_q15(const float16_t * pSrc,q15_t * pDst,uint32_t blockSize)98 ARM_DSP_ATTRIBUTE void arm_f16_to_q15(
99   const float16_t * pSrc,
100         q15_t * pDst,
101         uint32_t blockSize)
102 {
103     const float16_t *pIn = pSrc;      /* Src pointer */
104     uint32_t  blkCnt;           /* loop counter */
105 #ifdef ARM_MATH_ROUNDING
106     float16_t in;
107 #endif                          /*      #ifdef ARM_MATH_ROUNDING        */
108 
109     /*
110      * Loop over blockSize number of values
111      */
112     blkCnt = blockSize;
113 
114     while (blkCnt > 0U)
115     {
116 
117 #ifdef ARM_MATH_ROUNDING
118 
119         /*
120          * C = A * 65536
121          */
122         /*
123          * convert from float to Q31 and then store the results in the destination buffer
124          */
125         in = *pIn++;
126         in = ((_Float16)in * (_Float16)32768.0f16);
127         in += (_Float16)in > 0.0f16 ? 0.5f16 : -0.5f16;
128         *pDst++ = clip_q31_to_q15((q31_t) (in));
129 
130 #else
131 
132         /*
133          * C = A * 32768
134          */
135         /*
136          * convert from float to Q31 and then store the results in the destination buffer
137          */
138         *pDst++ = clip_q31_to_q15((q31_t) ((_Float16)*pIn++ * 32768.0f16));
139 
140 #endif                          /*      #ifdef ARM_MATH_ROUNDING        */
141 
142         /*
143          * Decrement the loop counter
144          */
145         blkCnt--;
146     }
147 
148 }
149 #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
150 
151 /**
152   @} end of f16_to_x group
153  */
154 
155 #endif /* #if defined(ARM_FLOAT16_SUPPORTED) */
156 
157