1 /* ----------------------------------------------------------------------
2  * Project:      CMSIS DSP Library
3  * Title:        arm_scale_q31.c
4  * Description:  Multiplies a Q31 vector by a scalar
5  *
6  * $Date:        23 April 2021
7  * $Revision:    V1.9.0
8  *
9  * Target Processor: Cortex-M and Cortex-A cores
10  * -------------------------------------------------------------------- */
11 /*
12  * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13  *
14  * SPDX-License-Identifier: Apache-2.0
15  *
16  * Licensed under the Apache License, Version 2.0 (the License); you may
17  * not use this file except in compliance with the License.
18  * You may obtain a copy of the License at
19  *
20  * www.apache.org/licenses/LICENSE-2.0
21  *
22  * Unless required by applicable law or agreed to in writing, software
23  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25  * See the License for the specific language governing permissions and
26  * limitations under the License.
27  */
28 
29 #include "dsp/basic_math_functions.h"
30 
31 /**
32   @ingroup groupMath
33  */
34 
35 /**
36   @addtogroup BasicScale
37   @{
38  */
39 
40 /**
41   @brief         Multiplies a Q31 vector by a scalar.
42   @param[in]     pSrc       points to the input vector
43   @param[in]     scaleFract fractional portion of the scale value
44   @param[in]     shift      number of bits to shift the result by
45   @param[out]    pDst       points to the output vector
46   @param[in]     blockSize  number of samples in each vector
47 
48   @par           Scaling and Overflow Behavior
49                    The input data <code>*pSrc</code> and <code>scaleFract</code> are in 1.31 format.
50                    These are multiplied to yield a 2.62 intermediate result and this is shifted
51                    with saturation to 1.31 format.
52                    There is an intermediate shift by 32 to go from the
53                    2.62 to 1.31 format.
54                    The shift argument is applied on the 1.31 result and not to the intermediate
55                    2.62 format.
56  */
57 
58 #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
59 
60 #include "arm_helium_utils.h"
61 
arm_scale_q31(const q31_t * pSrc,q31_t scaleFract,int8_t shift,q31_t * pDst,uint32_t blockSize)62 ARM_DSP_ATTRIBUTE void arm_scale_q31(
63     const q31_t * pSrc,
64     q31_t   scaleFract,
65     int8_t  shift,
66     q31_t * pDst,
67     uint32_t blockSize)
68 {
69     uint32_t  blkCnt;           /* loop counters */
70     q31x4_t vecSrc;
71     q31x4_t vecDst;
72 
73     /* Compute 4 outputs at a time */
74     blkCnt = blockSize >> 2;
75     while (blkCnt > 0U)
76     {
77         /*
78          * C = A * scale
79          * Scale the input and then store the result in the destination buffer.
80          */
81         vecSrc = vld1q(pSrc);
82         vecDst = vmulhq(vecSrc, vdupq_n_s32(scaleFract));
83         vecDst = vqshlq_r(vecDst, shift + 1);
84         vst1q(pDst, vecDst);
85         /*
86          * Decrement the blockSize loop counter
87          */
88         blkCnt--;
89         /*
90          * advance vector source and destination pointers
91          */
92         pSrc += 4;
93         pDst += 4;
94     }
95     /*
96      * tail
97      */
98     blkCnt = blockSize & 3;
99     if (blkCnt > 0U)
100     {
101         mve_pred16_t p0 = vctp32q(blkCnt);
102         vecSrc = vld1q(pSrc);
103         vecDst = vmulhq(vecSrc, vdupq_n_s32(scaleFract));
104         vecDst = vqshlq_r(vecDst, shift + 1);
105         vstrwq_p(pDst, vecDst, p0);
106     }
107 }
108 
109 #else
arm_scale_q31(const q31_t * pSrc,q31_t scaleFract,int8_t shift,q31_t * pDst,uint32_t blockSize)110 ARM_DSP_ATTRIBUTE void arm_scale_q31(
111   const q31_t *pSrc,
112         q31_t scaleFract,
113         int8_t shift,
114         q31_t *pDst,
115         uint32_t blockSize)
116 {
117         uint32_t blkCnt;                               /* Loop counter */
118         q31_t in, out;                                 /* Temporary variables */
119         int8_t kShift = shift + 1;                     /* Shift to apply after scaling */
120         int8_t sign = (kShift & 0x80);
121 
122 #if defined (ARM_MATH_LOOPUNROLL)
123 
124   /* Loop unrolling: Compute 4 outputs at a time */
125   blkCnt = blockSize >> 2U;
126 
127   if (sign == 0U)
128   {
129     while (blkCnt > 0U)
130     {
131       /* C = A * scale */
132 
133       /* Scale input and store result in destination buffer. */
134       in = *pSrc++;                                /* read input from source */
135       in = ((q63_t) in * scaleFract) >> 32;        /* multiply input with scaler value */
136       out = in << kShift;                          /* apply shifting */
137       if (in != (out >> kShift))                   /* saturate the result */
138         out = 0x7FFFFFFF ^ (in >> 31);
139       *pDst++ = out;                               /* Store result destination */
140 
141       in = *pSrc++;
142       in = ((q63_t) in * scaleFract) >> 32;
143       out = in << kShift;
144       if (in != (out >> kShift))
145         out = 0x7FFFFFFF ^ (in >> 31);
146       *pDst++ = out;
147 
148       in = *pSrc++;
149       in = ((q63_t) in * scaleFract) >> 32;
150       out = in << kShift;
151       if (in != (out >> kShift))
152         out = 0x7FFFFFFF ^ (in >> 31);
153       *pDst++ = out;
154 
155       in = *pSrc++;
156       in = ((q63_t) in * scaleFract) >> 32;
157       out = in << kShift;
158       if (in != (out >> kShift))
159         out = 0x7FFFFFFF ^ (in >> 31);
160       *pDst++ = out;
161 
162       /* Decrement loop counter */
163       blkCnt--;
164     }
165   }
166   else
167   {
168     while (blkCnt > 0U)
169     {
170       /* C = A * scale */
171 
172       /* Scale input and store result in destination buffer. */
173       in = *pSrc++;                                /* read four inputs from source */
174       in = ((q63_t) in * scaleFract) >> 32;        /* multiply input with scaler value */
175       out = in >> -kShift;                         /* apply shifting */
176       *pDst++ = out;                               /* Store result destination */
177 
178       in = *pSrc++;
179       in = ((q63_t) in * scaleFract) >> 32;
180       out = in >> -kShift;
181       *pDst++ = out;
182 
183       in = *pSrc++;
184       in = ((q63_t) in * scaleFract) >> 32;
185       out = in >> -kShift;
186       *pDst++ = out;
187 
188       in = *pSrc++;
189       in = ((q63_t) in * scaleFract) >> 32;
190       out = in >> -kShift;
191       *pDst++ = out;
192 
193       /* Decrement loop counter */
194       blkCnt--;
195     }
196   }
197 
198   /* Loop unrolling: Compute remaining outputs */
199   blkCnt = blockSize % 0x4U;
200 
201 #else
202 
203   /* Initialize blkCnt with number of samples */
204   blkCnt = blockSize;
205 
206 #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
207 
208   if (sign == 0U)
209   {
210     while (blkCnt > 0U)
211     {
212       /* C = A * scale */
213 
214       /* Scale input and store result in destination buffer. */
215       in = *pSrc++;
216       in = ((q63_t) in * scaleFract) >> 32;
217       out = in << kShift;
218       if (in != (out >> kShift))
219           out = 0x7FFFFFFF ^ (in >> 31);
220       *pDst++ = out;
221 
222       /* Decrement loop counter */
223       blkCnt--;
224     }
225   }
226   else
227   {
228     while (blkCnt > 0U)
229     {
230       /* C = A * scale */
231 
232       /* Scale input and store result in destination buffer. */
233       in = *pSrc++;
234       in = ((q63_t) in * scaleFract) >> 32;
235       out = in >> -kShift;
236       *pDst++ = out;
237 
238       /* Decrement loop counter */
239       blkCnt--;
240     }
241   }
242 
243 }
244 #endif /* defined(ARM_MATH_MVEI) */
245 
246 /**
247   @} end of BasicScale group
248  */
249