1 /* ----------------------------------------------------------------------
2 * Project: CMSIS DSP Library
3 * Title: arm_cmplx_mag_q15.c
4 * Description: Q15 complex magnitude
5 *
6 * $Date: 23 April 2021
7 * $Revision: V1.9.0
8 *
9 * Target Processor: Cortex-M and Cortex-A cores
10 * -------------------------------------------------------------------- */
11 /*
12 * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13 *
14 * SPDX-License-Identifier: Apache-2.0
15 *
16 * Licensed under the Apache License, Version 2.0 (the License); you may
17 * not use this file except in compliance with the License.
18 * You may obtain a copy of the License at
19 *
20 * www.apache.org/licenses/LICENSE-2.0
21 *
22 * Unless required by applicable law or agreed to in writing, software
23 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25 * See the License for the specific language governing permissions and
26 * limitations under the License.
27 */
28
29 #include "dsp/complex_math_functions.h"
30
31 /**
32 @ingroup groupCmplxMath
33 */
34
35 /**
36 @addtogroup cmplx_mag
37 @{
38 */
39
40 /**
41 @brief Q15 complex magnitude.
42 @param[in] pSrc points to input vector
43 @param[out] pDst points to output vector
44 @param[in] numSamples number of samples in each vector
45 @return none
46
47 @par Scaling and Overflow Behavior
48 The function implements 1.15 by 1.15 multiplications and finally output is converted into 2.14 format.
49 */
50 #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
51
52 #include "arm_helium_utils.h"
53
arm_cmplx_mag_q15(const q15_t * pSrc,q15_t * pDst,uint32_t numSamples)54 void arm_cmplx_mag_q15(
55 const q15_t * pSrc,
56 q15_t * pDst,
57 uint32_t numSamples)
58 {
59
60 int32_t blockSize = numSamples; /* loop counters */
61 uint32_t blkCnt; /* loop counters */
62 q15x8x2_t vecSrc;
63 q15x8_t sum;
64 q31_t in;
65 q31_t acc0;
66
67 blkCnt = blockSize >> 3;
68 while (blkCnt > 0U)
69 {
70 vecSrc = vld2q(pSrc);
71 pSrc += 16;
72 sum = vqaddq(vmulhq(vecSrc.val[0], vecSrc.val[0]),
73 vmulhq(vecSrc.val[1], vecSrc.val[1]));
74
75 sum = vshrq(sum, 1);
76
77 sum = FAST_VSQRT_Q15(sum);
78
79 vst1q(pDst, sum);
80 pDst += 8;
81 /*
82 * Decrement the blockSize loop counter
83 */
84 blkCnt--;
85 }
86
87 /*
88 * tail
89 */
90 blkCnt = blockSize & 7;
91
92 while (blkCnt > 0U)
93 {
94 /* C[0] = sqrt(A[0] * A[0] + A[1] * A[1]) */
95
96 in = read_q15x2_ia ((q15_t **) &pSrc);
97 acc0 = __SMUAD(in, in);
98
99 /* store result in 2.14 format in destination buffer. */
100 arm_sqrt_q15((q15_t) (acc0 >> 17), pDst++);
101
102
103 /* Decrement loop counter */
104 blkCnt--;
105 }
106 }
107
108 #else
arm_cmplx_mag_q15(const q15_t * pSrc,q15_t * pDst,uint32_t numSamples)109 void arm_cmplx_mag_q15(
110 const q15_t * pSrc,
111 q15_t * pDst,
112 uint32_t numSamples)
113 {
114 uint32_t blkCnt; /* Loop counter */
115
116 #if defined (ARM_MATH_DSP)
117 q31_t in;
118 q31_t acc0; /* Accumulators */
119 #else
120 q15_t real, imag; /* Temporary input variables */
121 q31_t acc0, acc1; /* Accumulators */
122 #endif
123
124 #if defined (ARM_MATH_LOOPUNROLL)
125
126 /* Loop unrolling: Compute 4 outputs at a time */
127 blkCnt = numSamples >> 2U;
128
129 while (blkCnt > 0U)
130 {
131 /* C[0] = sqrt(A[0] * A[0] + A[1] * A[1]) */
132
133 #if defined (ARM_MATH_DSP)
134 in = read_q15x2_ia ((q15_t **) &pSrc);
135 acc0 = __SMUAD(in, in);
136 /* store result in 2.14 format in destination buffer. */
137 arm_sqrt_q15((q15_t) (acc0 >> 17), pDst++);
138
139 in = read_q15x2_ia ((q15_t **) &pSrc);
140 acc0 = __SMUAD(in, in);
141 arm_sqrt_q15((q15_t) (acc0 >> 17), pDst++);
142
143 in = read_q15x2_ia ((q15_t **) &pSrc);
144 acc0 = __SMUAD(in, in);
145 arm_sqrt_q15((q15_t) (acc0 >> 17), pDst++);
146
147 in = read_q15x2_ia ((q15_t **) &pSrc);
148 acc0 = __SMUAD(in, in);
149 arm_sqrt_q15((q15_t) (acc0 >> 17), pDst++);
150 #else
151 real = *pSrc++;
152 imag = *pSrc++;
153 acc0 = ((q31_t) real * real);
154 acc1 = ((q31_t) imag * imag);
155
156 /* store result in 2.14 format in destination buffer. */
157 arm_sqrt_q15((q15_t) (((q63_t) acc0 + acc1) >> 17), pDst++);
158
159 real = *pSrc++;
160 imag = *pSrc++;
161 acc0 = ((q31_t) real * real);
162 acc1 = ((q31_t) imag * imag);
163 arm_sqrt_q15((q15_t) (((q63_t) acc0 + acc1) >> 17), pDst++);
164
165 real = *pSrc++;
166 imag = *pSrc++;
167 acc0 = ((q31_t) real * real);
168 acc1 = ((q31_t) imag * imag);
169 arm_sqrt_q15((q15_t) (((q63_t) acc0 + acc1) >> 17), pDst++);
170
171 real = *pSrc++;
172 imag = *pSrc++;
173 acc0 = ((q31_t) real * real);
174 acc1 = ((q31_t) imag * imag);
175 arm_sqrt_q15((q15_t) (((q63_t) acc0 + acc1) >> 17), pDst++);
176 #endif /* #if defined (ARM_MATH_DSP) */
177
178 /* Decrement loop counter */
179 blkCnt--;
180 }
181
182 /* Loop unrolling: Compute remaining outputs */
183 blkCnt = numSamples % 0x4U;
184
185 #else
186
187 /* Initialize blkCnt with number of samples */
188 blkCnt = numSamples;
189
190 #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
191
192 while (blkCnt > 0U)
193 {
194 /* C[0] = sqrt(A[0] * A[0] + A[1] * A[1]) */
195
196 #if defined (ARM_MATH_DSP)
197 in = read_q15x2_ia ((q15_t **) &pSrc);
198 acc0 = __SMUAD(in, in);
199
200 /* store result in 2.14 format in destination buffer. */
201 arm_sqrt_q15((q15_t) (acc0 >> 17), pDst++);
202 #else
203 real = *pSrc++;
204 imag = *pSrc++;
205 acc0 = ((q31_t) real * real);
206 acc1 = ((q31_t) imag * imag);
207
208 /* store result in 2.14 format in destination buffer. */
209 arm_sqrt_q15((q15_t) (((q63_t) acc0 + acc1) >> 17), pDst++);
210 #endif
211
212 /* Decrement loop counter */
213 blkCnt--;
214 }
215
216 }
217 #endif /* defined(ARM_MATH_MVEI) */
218
219 /**
220 @} end of cmplx_mag group
221 */
222