1 /* ----------------------------------------------------------------------
2  * Project:      CMSIS DSP Library
3  * Title:        arm_cmplx_mag_fast_q15.c
4  * Description:  Q15 complex magnitude
5  *
6  * $Date:        23 April 2021
7  * $Revision:    V1.9.0
8  *
9  * Target Processor: Cortex-M and Cortex-A cores
10  * -------------------------------------------------------------------- */
11 /*
12  * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13  *
14  * SPDX-License-Identifier: Apache-2.0
15  *
16  * Licensed under the Apache License, Version 2.0 (the License); you may
17  * not use this file except in compliance with the License.
18  * You may obtain a copy of the License at
19  *
20  * www.apache.org/licenses/LICENSE-2.0
21  *
22  * Unless required by applicable law or agreed to in writing, software
23  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25  * See the License for the specific language governing permissions and
26  * limitations under the License.
27  */
28 
29 #include "dsp/complex_math_functions.h"
30 
31 /**
32   @ingroup groupCmplxMath
33  */
34 
35 /**
36   @addtogroup cmplx_mag
37   @{
38  */
39 
40 /**
41   @brief         Q15 complex magnitude.
42   @param[in]     pSrc        points to input vector
43   @param[out]    pDst        points to output vector
44   @param[in]     numSamples  number of samples in each vector
45 
46   @par           Scaling and Overflow Behavior
47                    The function implements 1.15 by 1.15 multiplications and finally output is converted into 2.14 format.
48                    Fast functions are less accurate. This function will tend to clamp to 0
49                    the too small values. So sqrt(x*x) = x will not always be true.
50  */
51 #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
52 
53 #include "arm_helium_utils.h"
54 
arm_cmplx_mag_fast_q15(const q15_t * pSrc,q15_t * pDst,uint32_t numSamples)55 void arm_cmplx_mag_fast_q15(
56   const q15_t * pSrc,
57         q15_t * pDst,
58         uint32_t numSamples)
59 {
60 
61     int32_t blockSize = numSamples;  /* loop counters */
62     uint32_t  blkCnt;           /* loop counters */
63     q15x8x2_t vecSrc;
64     q15x8_t sum;
65     q31_t in;
66     q31_t acc0;
67 
68     blkCnt = blockSize >> 3;
69     while (blkCnt > 0U)
70     {
71         vecSrc = vld2q(pSrc);
72         pSrc += 16;
73         sum = vqaddq(vmulhq(vecSrc.val[0], vecSrc.val[0]),
74                      vmulhq(vecSrc.val[1], vecSrc.val[1]));
75 
76         sum = vshrq(sum, 1);
77 
78         sum = FAST_VSQRT_Q15(sum);
79 
80         vst1q(pDst, sum);
81         pDst += 8;
82         /*
83          * Decrement the blockSize loop counter
84          */
85         blkCnt--;
86     }
87 
88     /*
89      * tail
90      */
91     blkCnt = blockSize & 7;
92 
93     while (blkCnt > 0U)
94     {
95       /* C[0] = sqrt(A[0] * A[0] + A[1] * A[1]) */
96 
97       in = read_q15x2_ia ((q15_t **) &pSrc);
98       acc0 = __SMUAD(in, in);
99 
100       /* store result in 2.14 format in destination buffer. */
101       arm_sqrt_q15((q15_t) (acc0 >> 17), pDst++);
102 
103 
104       /* Decrement loop counter */
105       blkCnt--;
106     }
107 }
108 
109 #else
arm_cmplx_mag_fast_q15(const q15_t * pSrc,q15_t * pDst,uint32_t numSamples)110 void arm_cmplx_mag_fast_q15(
111   const q15_t * pSrc,
112         q15_t * pDst,
113         uint32_t numSamples)
114 {
115         uint32_t blkCnt;                               /* Loop counter */
116 
117 #if defined (ARM_MATH_DSP)
118         q31_t in;
119         q31_t acc0;                                    /* Accumulators */
120 #else
121        q15_t real, imag;                              /* Temporary input variables */
122        q31_t acc0, acc1;                              /* Accumulators */
123 #endif
124 
125 #if defined (ARM_MATH_LOOPUNROLL)
126 
127   /* Loop unrolling: Compute 4 outputs at a time */
128   blkCnt = numSamples >> 2U;
129 
130   while (blkCnt > 0U)
131   {
132     /* C[0] = sqrt(A[0] * A[0] + A[1] * A[1]) */
133 
134 #if defined (ARM_MATH_DSP)
135     in = read_q15x2_ia (&pSrc);
136     acc0 = __SMUAD(in, in);
137     /* store result in 2.14 format in destination buffer. */
138     arm_sqrt_q15((q15_t) (acc0 >> 17), pDst++);
139 
140     in = read_q15x2_ia (&pSrc);
141     acc0 = __SMUAD(in, in);
142     arm_sqrt_q15((q15_t) (acc0 >> 17), pDst++);
143 
144     in = read_q15x2_ia (&pSrc);
145     acc0 = __SMUAD(in, in);
146     arm_sqrt_q15((q15_t) (acc0 >> 17), pDst++);
147 
148     in = read_q15x2_ia (&pSrc);
149     acc0 = __SMUAD(in, in);
150     arm_sqrt_q15((q15_t) (acc0 >> 17), pDst++);
151 #else
152     real = *pSrc++;
153     imag = *pSrc++;
154     acc0 = ((q31_t) real * real);
155     acc1 = ((q31_t) imag * imag);
156 
157     /* store result in 2.14 format in destination buffer. */
158     arm_sqrt_q15((q15_t) (((q63_t) acc0 + acc1) >> 17), pDst++);
159 
160     real = *pSrc++;
161     imag = *pSrc++;
162     acc0 = ((q31_t) real * real);
163     acc1 = ((q31_t) imag * imag);
164     arm_sqrt_q15((q15_t) (((q63_t) acc0 + acc1) >> 17), pDst++);
165 
166     real = *pSrc++;
167     imag = *pSrc++;
168     acc0 = ((q31_t) real * real);
169     acc1 = ((q31_t) imag * imag);
170     arm_sqrt_q15((q15_t) (((q63_t) acc0 + acc1) >> 17), pDst++);
171 
172     real = *pSrc++;
173     imag = *pSrc++;
174     acc0 = ((q31_t) real * real);
175     acc1 = ((q31_t) imag * imag);
176     arm_sqrt_q15((q15_t) (((q63_t) acc0 + acc1) >> 17), pDst++);
177 #endif /* #if defined (ARM_MATH_DSP) */
178 
179     /* Decrement loop counter */
180     blkCnt--;
181   }
182 
183   /* Loop unrolling: Compute remaining outputs */
184   blkCnt = numSamples % 0x4U;
185 
186 #else
187 
188   /* Initialize blkCnt with number of samples */
189   blkCnt = numSamples;
190 
191 #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
192 
193   while (blkCnt > 0U)
194   {
195     /* C[0] = sqrt(A[0] * A[0] + A[1] * A[1]) */
196 
197 #if defined (ARM_MATH_DSP)
198     in = read_q15x2_ia (&pSrc);
199     acc0 = __SMUAD(in, in);
200 
201     /* store result in 2.14 format in destination buffer. */
202     arm_sqrt_q15((q15_t) (acc0 >> 17), pDst++);
203 #else
204     real = *pSrc++;
205     imag = *pSrc++;
206     acc0 = ((q31_t) real * real);
207     acc1 = ((q31_t) imag * imag);
208 
209     /* store result in 2.14 format in destination buffer. */
210     arm_sqrt_q15((q15_t) (((q63_t) acc0 + acc1) >> 17), pDst++);
211 #endif
212 
213     /* Decrement loop counter */
214     blkCnt--;
215   }
216 
217 }
218 #endif /* defined(ARM_MATH_MVEI) */
219 
220 /**
221   @} end of cmplx_mag group
222  */
223