1 /* ----------------------------------------------------------------------
2  * Project:      CMSIS DSP Library
3  * Title:        arm_cmplx_mag_squared_q31.c
4  * Description:  Q31 complex magnitude squared
5  *
6  * $Date:        23 April 2021
7  * $Revision:    V1.9.0
8  *
9  * Target Processor: Cortex-M and Cortex-A cores
10  * -------------------------------------------------------------------- */
11 /*
12  * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13  *
14  * SPDX-License-Identifier: Apache-2.0
15  *
16  * Licensed under the Apache License, Version 2.0 (the License); you may
17  * not use this file except in compliance with the License.
18  * You may obtain a copy of the License at
19  *
20  * www.apache.org/licenses/LICENSE-2.0
21  *
22  * Unless required by applicable law or agreed to in writing, software
23  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25  * See the License for the specific language governing permissions and
26  * limitations under the License.
27  */
28 
29 #include "dsp/complex_math_functions.h"
30 
31 /**
32   @ingroup groupCmplxMath
33  */
34 
35 /**
36   @addtogroup cmplx_mag_squared
37   @{
38  */
39 
40 /**
41   @brief         Q31 complex magnitude squared.
42   @param[in]     pSrc        points to input vector
43   @param[out]    pDst        points to output vector
44   @param[in]     numSamples  number of samples in each vector
45 
46   @par           Scaling and Overflow Behavior
47                    The function implements 1.31 by 1.31 multiplications and finally output is converted into 3.29 format.
48                    Input down scaling is not required.
49  */
50 
51 #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
52 
arm_cmplx_mag_squared_q31(const q31_t * pSrc,q31_t * pDst,uint32_t numSamples)53 void arm_cmplx_mag_squared_q31(
54   const q31_t * pSrc,
55         q31_t * pDst,
56         uint32_t numSamples)
57 {
58     int32_t blockSize = numSamples;  /* loop counters */
59     uint32_t  blkCnt;           /* loop counters */
60     q31x4x2_t vecSrc;
61     q31x4_t vReal, vImag;
62     q31x4_t vMagSq;
63     q31_t real, imag;                              /* Temporary input variables */
64     q31_t acc0, acc1;                              /* Accumulators */
65 
66     /* Compute 4 complex samples at a time */
67     blkCnt = blockSize >> 2;
68     while (blkCnt > 0U)
69     {
70         vecSrc = vld2q(pSrc);
71         vReal = vmulhq(vecSrc.val[0], vecSrc.val[0]);
72         vImag = vmulhq(vecSrc.val[1], vecSrc.val[1]);
73         vMagSq = vqaddq(vReal, vImag);
74         vMagSq = vshrq(vMagSq, 1);
75 
76         vst1q(pDst, vMagSq);
77 
78         pSrc += 8;
79         pDst += 4;
80         /*
81          * Decrement the blkCnt loop counter
82          * Advance vector source and destination pointers
83          */
84         blkCnt --;
85     }
86 
87     /* Tail */
88     blkCnt = blockSize & 3;
89     while (blkCnt > 0U)
90     {
91       /* C[0] = (A[0] * A[0] + A[1] * A[1]) */
92 
93       real = *pSrc++;
94       imag = *pSrc++;
95       acc0 = (q31_t) (((q63_t) real * real) >> 33);
96       acc1 = (q31_t) (((q63_t) imag * imag) >> 33);
97 
98       /* store result in 3.29 format in destination buffer. */
99       *pDst++ = acc0 + acc1;
100 
101       /* Decrement loop counter */
102       blkCnt--;
103     }
104 }
105 
106 #else
arm_cmplx_mag_squared_q31(const q31_t * pSrc,q31_t * pDst,uint32_t numSamples)107 void arm_cmplx_mag_squared_q31(
108   const q31_t * pSrc,
109         q31_t * pDst,
110         uint32_t numSamples)
111 {
112         uint32_t blkCnt;                               /* Loop counter */
113         q31_t real, imag;                              /* Temporary input variables */
114         q31_t acc0, acc1;                              /* Accumulators */
115 
116 #if defined (ARM_MATH_LOOPUNROLL)
117 
118   /* Loop unrolling: Compute 4 outputs at a time */
119   blkCnt = numSamples >> 2U;
120 
121   while (blkCnt > 0U)
122   {
123     /* C[0] = (A[0] * A[0] + A[1] * A[1]) */
124 
125     real = *pSrc++;
126     imag = *pSrc++;
127     acc0 = (q31_t) (((q63_t) real * real) >> 33);
128     acc1 = (q31_t) (((q63_t) imag * imag) >> 33);
129     /* store the result in 3.29 format in the destination buffer. */
130     *pDst++ = acc0 + acc1;
131 
132     real = *pSrc++;
133     imag = *pSrc++;
134     acc0 = (q31_t) (((q63_t) real * real) >> 33);
135     acc1 = (q31_t) (((q63_t) imag * imag) >> 33);
136     *pDst++ = acc0 + acc1;
137 
138     real = *pSrc++;
139     imag = *pSrc++;
140     acc0 = (q31_t) (((q63_t) real * real) >> 33);
141     acc1 = (q31_t) (((q63_t) imag * imag) >> 33);
142     *pDst++ = acc0 + acc1;
143 
144     real = *pSrc++;
145     imag = *pSrc++;
146     acc0 = (q31_t) (((q63_t) real * real) >> 33);
147     acc1 = (q31_t) (((q63_t) imag * imag) >> 33);
148     *pDst++ = acc0 + acc1;
149 
150     /* Decrement loop counter */
151     blkCnt--;
152   }
153 
154   /* Loop unrolling: Compute remaining outputs */
155   blkCnt = numSamples % 0x4U;
156 
157 #else
158 
159   /* Initialize blkCnt with number of samples */
160   blkCnt = numSamples;
161 
162 #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
163 
164   while (blkCnt > 0U)
165   {
166     /* C[0] = (A[0] * A[0] + A[1] * A[1]) */
167 
168     real = *pSrc++;
169     imag = *pSrc++;
170     acc0 = (q31_t) (((q63_t) real * real) >> 33);
171     acc1 = (q31_t) (((q63_t) imag * imag) >> 33);
172 
173     /* store result in 3.29 format in destination buffer. */
174     *pDst++ = acc0 + acc1;
175 
176     /* Decrement loop counter */
177     blkCnt--;
178   }
179 
180 }
181 
182 #endif /* defined(ARM_MATH_MVEI) */
183 
184 /**
185   @} end of cmplx_mag_squared group
186  */
187