1 /* ----------------------------------------------------------------------
2  * Project:      CMSIS DSP Library
3  * Title:        arm_cmplx_mag_squared_q15.c
4  * Description:  Q15 complex magnitude squared
5  *
6  * $Date:        23 April 2021
7  * $Revision:    V1.9.0
8  *
9  * Target Processor: Cortex-M and Cortex-A cores
10  * -------------------------------------------------------------------- */
11 /*
12  * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13  *
14  * SPDX-License-Identifier: Apache-2.0
15  *
16  * Licensed under the Apache License, Version 2.0 (the License); you may
17  * not use this file except in compliance with the License.
18  * You may obtain a copy of the License at
19  *
20  * www.apache.org/licenses/LICENSE-2.0
21  *
22  * Unless required by applicable law or agreed to in writing, software
23  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25  * See the License for the specific language governing permissions and
26  * limitations under the License.
27  */
28 
29 #include "dsp/complex_math_functions.h"
30 
31 /**
32   @ingroup groupCmplxMath
33  */
34 
35 /**
36   @addtogroup cmplx_mag_squared
37   @{
38  */
39 
40 /**
41   @brief         Q15 complex magnitude squared.
42   @param[in]     pSrc        points to input vector
43   @param[out]    pDst        points to output vector
44   @param[in]     numSamples  number of samples in each vector
45 
46   @par           Scaling and Overflow Behavior
47                    The function implements 1.15 by 1.15 multiplications and finally output is converted into 3.13 format.
48  */
49 
50 #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
51 
arm_cmplx_mag_squared_q15(const q15_t * pSrc,q15_t * pDst,uint32_t numSamples)52 void arm_cmplx_mag_squared_q15(
53   const q15_t * pSrc,
54         q15_t * pDst,
55         uint32_t numSamples)
56 {
57   int32_t blockSize = numSamples;  /* loop counters */
58   uint32_t  blkCnt;           /* loop counters */
59   q31_t in;
60   q31_t acc0;                                    /* Accumulators */
61   q15x8x2_t vecSrc;
62   q15x8_t vReal, vImag;
63   q15x8_t vMagSq;
64 
65 
66   blkCnt = blockSize >> 3;
67   while (blkCnt > 0U)
68   {
69     vecSrc = vld2q(pSrc);
70     vReal = vmulhq(vecSrc.val[0], vecSrc.val[0]);
71     vImag = vmulhq(vecSrc.val[1], vecSrc.val[1]);
72     vMagSq = vqaddq(vReal, vImag);
73     vMagSq = vshrq(vMagSq, 1);
74 
75     vst1q(pDst, vMagSq);
76 
77     pSrc += 16;
78     pDst += 8;
79     /*
80      * Decrement the blkCnt loop counter
81      * Advance vector source and destination pointers
82      */
83     blkCnt --;
84   }
85 
86   /*
87    * tail
88    */
89   blkCnt = blockSize & 7;
90   while (blkCnt > 0U)
91   {
92     /* C[0] = (A[0] * A[0] + A[1] * A[1]) */
93 
94     in = read_q15x2_ia ((q15_t **) &pSrc);
95     acc0 = __SMUAD(in, in);
96 
97     /* store result in 3.13 format in destination buffer. */
98     *pDst++ = (q15_t) (acc0 >> 17);
99 
100 
101     /* Decrement loop counter */
102     blkCnt--;
103   }
104 
105 }
106 
107 #else
arm_cmplx_mag_squared_q15(const q15_t * pSrc,q15_t * pDst,uint32_t numSamples)108 void arm_cmplx_mag_squared_q15(
109   const q15_t * pSrc,
110         q15_t * pDst,
111         uint32_t numSamples)
112 {
113         uint32_t blkCnt;                               /* Loop counter */
114 
115 #if defined (ARM_MATH_DSP)
116         q31_t in;
117         q31_t acc0;                                    /* Accumulators */
118 #else
119         q15_t real, imag;                              /* Temporary input variables */
120         q31_t acc0, acc1;                              /* Accumulators */
121 #endif
122 
123 #if defined (ARM_MATH_LOOPUNROLL)
124 
125   /* Loop unrolling: Compute 4 outputs at a time */
126   blkCnt = numSamples >> 2U;
127 
128   while (blkCnt > 0U)
129   {
130     /* C[0] = (A[0] * A[0] + A[1] * A[1]) */
131 
132 #if defined (ARM_MATH_DSP)
133     in = read_q15x2_ia (&pSrc);
134     acc0 = __SMUAD(in, in);
135     /* store result in 3.13 format in destination buffer. */
136     *pDst++ = (q15_t) (acc0 >> 17);
137 
138     in = read_q15x2_ia (&pSrc);
139     acc0 = __SMUAD(in, in);
140     *pDst++ = (q15_t) (acc0 >> 17);
141 
142     in = read_q15x2_ia (&pSrc);
143     acc0 = __SMUAD(in, in);
144     *pDst++ = (q15_t) (acc0 >> 17);
145 
146     in = read_q15x2_ia (&pSrc);
147     acc0 = __SMUAD(in, in);
148     *pDst++ = (q15_t) (acc0 >> 17);
149 #else
150     real = *pSrc++;
151     imag = *pSrc++;
152     acc0 = ((q31_t) real * real);
153     acc1 = ((q31_t) imag * imag);
154     /* store result in 3.13 format in destination buffer. */
155     *pDst++ = (q15_t) (((q63_t) acc0 + acc1) >> 17);
156 
157     real = *pSrc++;
158     imag = *pSrc++;
159     acc0 = ((q31_t) real * real);
160     acc1 = ((q31_t) imag * imag);
161     *pDst++ = (q15_t) (((q63_t) acc0 + acc1) >> 17);
162 
163     real = *pSrc++;
164     imag = *pSrc++;
165     acc0 = ((q31_t) real * real);
166     acc1 = ((q31_t) imag * imag);
167     *pDst++ = (q15_t) (((q63_t) acc0 + acc1) >> 17);
168 
169     real = *pSrc++;
170     imag = *pSrc++;
171     acc0 = ((q31_t) real * real);
172     acc1 = ((q31_t) imag * imag);
173     *pDst++ = (q15_t) (((q63_t) acc0 + acc1) >> 17);
174 #endif /* #if defined (ARM_MATH_DSP) */
175 
176     /* Decrement loop counter */
177     blkCnt--;
178   }
179 
180   /* Loop unrolling: Compute remaining outputs */
181   blkCnt = numSamples % 0x4U;
182 
183 #else
184 
185   /* Initialize blkCnt with number of samples */
186   blkCnt = numSamples;
187 
188 #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
189 
190   while (blkCnt > 0U)
191   {
192     /* C[0] = (A[0] * A[0] + A[1] * A[1]) */
193 
194 #if defined (ARM_MATH_DSP)
195     in = read_q15x2_ia (&pSrc);
196     acc0 = __SMUAD(in, in);
197 
198     /* store result in 3.13 format in destination buffer. */
199     *pDst++ = (q15_t) (acc0 >> 17);
200 #else
201     real = *pSrc++;
202     imag = *pSrc++;
203     acc0 = ((q31_t) real * real);
204     acc1 = ((q31_t) imag * imag);
205 
206     /* store result in 3.13 format in destination buffer. */
207     *pDst++ = (q15_t) (((q63_t) acc0 + acc1) >> 17);
208 #endif
209 
210     /* Decrement loop counter */
211     blkCnt--;
212   }
213 
214 }
215 
216 #endif /* defined(ARM_MATH_MVEI) */
217 
218 /**
219   @} end of cmplx_mag_squared group
220  */
221