1 /* ----------------------------------------------------------------------
2 * Project: CMSIS DSP Library
3 * Title: arm_cmplx_mag_squared_f32.c
4 * Description: Floating-point complex magnitude squared
5 *
6 * $Date: 23 April 2021
7 * $Revision: V1.9.0
8 *
9 * Target Processor: Cortex-M and Cortex-A cores
10 * -------------------------------------------------------------------- */
11 /*
12 * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13 *
14 * SPDX-License-Identifier: Apache-2.0
15 *
16 * Licensed under the Apache License, Version 2.0 (the License); you may
17 * not use this file except in compliance with the License.
18 * You may obtain a copy of the License at
19 *
20 * www.apache.org/licenses/LICENSE-2.0
21 *
22 * Unless required by applicable law or agreed to in writing, software
23 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25 * See the License for the specific language governing permissions and
26 * limitations under the License.
27 */
28
29 #include "dsp/complex_math_functions.h"
30
31 /**
32 @ingroup groupCmplxMath
33 */
34
35 /**
36 @defgroup cmplx_mag_squared Complex Magnitude Squared
37
38 Computes the magnitude squared of the elements of a complex data vector.
39
40 The <code>pSrc</code> points to the source data and
41 <code>pDst</code> points to the where the result should be written.
42 <code>numSamples</code> specifies the number of complex samples
43 in the input array and the data is stored in an interleaved fashion
44 (real, imag, real, imag, ...).
45 The input array has a total of <code>2*numSamples</code> values;
46 the output array has a total of <code>numSamples</code> values.
47
48 The underlying algorithm is used:
49
50 <pre>
51 for (n = 0; n < numSamples; n++) {
52 pDst[n] = pSrc[(2*n)+0]^2 + pSrc[(2*n)+1]^2;
53 }
54 </pre>
55
56 There are separate functions for floating-point, Q15, and Q31 data types.
57 */
58
59 /**
60 @addtogroup cmplx_mag_squared
61 @{
62 */
63
64 /**
65 @brief Floating-point complex magnitude squared.
66 @param[in] pSrc points to input vector
67 @param[out] pDst points to output vector
68 @param[in] numSamples number of samples in each vector
69 */
70
71 #if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
72
arm_cmplx_mag_squared_f32(const float32_t * pSrc,float32_t * pDst,uint32_t numSamples)73 ARM_DSP_ATTRIBUTE void arm_cmplx_mag_squared_f32(
74 const float32_t * pSrc,
75 float32_t * pDst,
76 uint32_t numSamples)
77 {
78 int32_t blockSize = numSamples; /* loop counters */
79 uint32_t blkCnt; /* loop counters */
80 f32x4x2_t vecSrc;
81 f32x4_t sum;
82 float32_t real, imag; /* Temporary input variables */
83
84 /* Compute 4 complex samples at a time */
85 blkCnt = blockSize >> 2;
86 while (blkCnt > 0U)
87 {
88 vecSrc = vld2q(pSrc);
89 sum = vmulq(vecSrc.val[0], vecSrc.val[0]);
90 sum = vfmaq(sum, vecSrc.val[1], vecSrc.val[1]);
91 vst1q(pDst, sum);
92
93 pSrc += 8;
94 pDst += 4;
95
96 /*
97 * Decrement the blockSize loop counter
98 */
99 blkCnt--;
100 }
101
102 /* Tail */
103 blkCnt = blockSize & 3;
104 while (blkCnt > 0U)
105 {
106 /* C[0] = (A[0] * A[0] + A[1] * A[1]) */
107
108 real = *pSrc++;
109 imag = *pSrc++;
110
111 /* store result in destination buffer. */
112 *pDst++ = (real * real) + (imag * imag);
113
114 /* Decrement loop counter */
115 blkCnt--;
116 }
117
118 }
119
120 #else
arm_cmplx_mag_squared_f32(const float32_t * pSrc,float32_t * pDst,uint32_t numSamples)121 ARM_DSP_ATTRIBUTE void arm_cmplx_mag_squared_f32(
122 const float32_t * pSrc,
123 float32_t * pDst,
124 uint32_t numSamples)
125 {
126 uint32_t blkCnt; /* Loop counter */
127 float32_t real, imag; /* Temporary input variables */
128
129 #if defined(ARM_MATH_NEON) && !defined(ARM_MATH_AUTOVECTORIZE)
130 float32x4x2_t vecA;
131 float32x4_t vRealA;
132 float32x4_t vImagA;
133 float32x4_t vMagSqA;
134
135 float32x4x2_t vecB;
136 float32x4_t vRealB;
137 float32x4_t vImagB;
138 float32x4_t vMagSqB;
139
140 /* Loop unrolling: Compute 8 outputs at a time */
141 blkCnt = numSamples >> 3;
142
143 while (blkCnt > 0U)
144 {
145 /* out = sqrt((real * real) + (imag * imag)) */
146
147 vecA = vld2q_f32(pSrc);
148 pSrc += 8;
149
150 vRealA = vmulq_f32(vecA.val[0], vecA.val[0]);
151 vImagA = vmulq_f32(vecA.val[1], vecA.val[1]);
152 vMagSqA = vaddq_f32(vRealA, vImagA);
153
154 vecB = vld2q_f32(pSrc);
155 pSrc += 8;
156
157 vRealB = vmulq_f32(vecB.val[0], vecB.val[0]);
158 vImagB = vmulq_f32(vecB.val[1], vecB.val[1]);
159 vMagSqB = vaddq_f32(vRealB, vImagB);
160
161 /* Store the result in the destination buffer. */
162 vst1q_f32(pDst, vMagSqA);
163 pDst += 4;
164
165 vst1q_f32(pDst, vMagSqB);
166 pDst += 4;
167
168 /* Decrement the loop counter */
169 blkCnt--;
170 }
171
172 blkCnt = numSamples & 7;
173
174 #else
175 #if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
176
177 /* Loop unrolling: Compute 4 outputs at a time */
178 blkCnt = numSamples >> 2U;
179
180 while (blkCnt > 0U)
181 {
182 /* C[0] = (A[0] * A[0] + A[1] * A[1]) */
183
184 real = *pSrc++;
185 imag = *pSrc++;
186 *pDst++ = (real * real) + (imag * imag);
187
188 real = *pSrc++;
189 imag = *pSrc++;
190 *pDst++ = (real * real) + (imag * imag);
191
192 real = *pSrc++;
193 imag = *pSrc++;
194 *pDst++ = (real * real) + (imag * imag);
195
196 real = *pSrc++;
197 imag = *pSrc++;
198 *pDst++ = (real * real) + (imag * imag);
199
200 /* Decrement loop counter */
201 blkCnt--;
202 }
203
204 /* Loop unrolling: Compute remaining outputs */
205 blkCnt = numSamples % 0x4U;
206
207 #else
208
209 /* Initialize blkCnt with number of samples */
210 blkCnt = numSamples;
211
212 #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
213 #endif /* #if defined(ARM_MATH_NEON) */
214
215 while (blkCnt > 0U)
216 {
217 /* C[0] = (A[0] * A[0] + A[1] * A[1]) */
218
219 real = *pSrc++;
220 imag = *pSrc++;
221
222 /* store result in destination buffer. */
223 *pDst++ = (real * real) + (imag * imag);
224
225 /* Decrement loop counter */
226 blkCnt--;
227 }
228
229 }
230 #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
231
232 /**
233 @} end of cmplx_mag_squared group
234 */
235