1 /* ----------------------------------------------------------------------
2 * Project: CMSIS DSP Library
3 * Title: arm_cmplx_mag_fast_q15.c
4 * Description: Q15 complex magnitude
5 *
6 * $Date: 23 April 2021
7 * $Revision: V1.9.0
8 *
9 * Target Processor: Cortex-M and Cortex-A cores
10 * -------------------------------------------------------------------- */
11 /*
12 * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13 *
14 * SPDX-License-Identifier: Apache-2.0
15 *
16 * Licensed under the Apache License, Version 2.0 (the License); you may
17 * not use this file except in compliance with the License.
18 * You may obtain a copy of the License at
19 *
20 * www.apache.org/licenses/LICENSE-2.0
21 *
22 * Unless required by applicable law or agreed to in writing, software
23 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25 * See the License for the specific language governing permissions and
26 * limitations under the License.
27 */
28
29 #include "dsp/complex_math_functions.h"
30
31 /**
32 @ingroup groupCmplxMath
33 */
34
35 /**
36 @addtogroup cmplx_mag
37 @{
38 */
39
40 /**
41 @brief Q15 complex magnitude.
42 @param[in] pSrc points to input vector
43 @param[out] pDst points to output vector
44 @param[in] numSamples number of samples in each vector
45
46 @par Scaling and Overflow Behavior
47 The function implements 1.15 by 1.15 multiplications and finally output is converted into 2.14 format.
48 Fast functions are less accurate. This function will tend to clamp to 0
49 the too small values. So sqrt(x*x) = x will not always be true.
50 */
51 #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
52
53 #include "arm_helium_utils.h"
54
arm_cmplx_mag_fast_q15(const q15_t * pSrc,q15_t * pDst,uint32_t numSamples)55 void arm_cmplx_mag_fast_q15(
56 const q15_t * pSrc,
57 q15_t * pDst,
58 uint32_t numSamples)
59 {
60
61 int32_t blockSize = numSamples; /* loop counters */
62 uint32_t blkCnt; /* loop counters */
63 q15x8x2_t vecSrc;
64 q15x8_t sum;
65 q31_t in;
66 q31_t acc0;
67
68 blkCnt = blockSize >> 3;
69 while (blkCnt > 0U)
70 {
71 vecSrc = vld2q(pSrc);
72 pSrc += 16;
73 sum = vqaddq(vmulhq(vecSrc.val[0], vecSrc.val[0]),
74 vmulhq(vecSrc.val[1], vecSrc.val[1]));
75
76 sum = vshrq(sum, 1);
77
78 sum = FAST_VSQRT_Q15(sum);
79
80 vst1q(pDst, sum);
81 pDst += 8;
82 /*
83 * Decrement the blockSize loop counter
84 */
85 blkCnt--;
86 }
87
88 /*
89 * tail
90 */
91 blkCnt = blockSize & 7;
92
93 while (blkCnt > 0U)
94 {
95 /* C[0] = sqrt(A[0] * A[0] + A[1] * A[1]) */
96
97 in = read_q15x2_ia ((q15_t **) &pSrc);
98 acc0 = __SMUAD(in, in);
99
100 /* store result in 2.14 format in destination buffer. */
101 arm_sqrt_q15((q15_t) (acc0 >> 17), pDst++);
102
103
104 /* Decrement loop counter */
105 blkCnt--;
106 }
107 }
108
109 #else
arm_cmplx_mag_fast_q15(const q15_t * pSrc,q15_t * pDst,uint32_t numSamples)110 void arm_cmplx_mag_fast_q15(
111 const q15_t * pSrc,
112 q15_t * pDst,
113 uint32_t numSamples)
114 {
115 uint32_t blkCnt; /* Loop counter */
116
117 #if defined (ARM_MATH_DSP)
118 q31_t in;
119 q31_t acc0; /* Accumulators */
120 #else
121 q15_t real, imag; /* Temporary input variables */
122 q31_t acc0, acc1; /* Accumulators */
123 #endif
124
125 #if defined (ARM_MATH_LOOPUNROLL)
126
127 /* Loop unrolling: Compute 4 outputs at a time */
128 blkCnt = numSamples >> 2U;
129
130 while (blkCnt > 0U)
131 {
132 /* C[0] = sqrt(A[0] * A[0] + A[1] * A[1]) */
133
134 #if defined (ARM_MATH_DSP)
135 in = read_q15x2_ia (&pSrc);
136 acc0 = __SMUAD(in, in);
137 /* store result in 2.14 format in destination buffer. */
138 arm_sqrt_q15((q15_t) (acc0 >> 17), pDst++);
139
140 in = read_q15x2_ia (&pSrc);
141 acc0 = __SMUAD(in, in);
142 arm_sqrt_q15((q15_t) (acc0 >> 17), pDst++);
143
144 in = read_q15x2_ia (&pSrc);
145 acc0 = __SMUAD(in, in);
146 arm_sqrt_q15((q15_t) (acc0 >> 17), pDst++);
147
148 in = read_q15x2_ia (&pSrc);
149 acc0 = __SMUAD(in, in);
150 arm_sqrt_q15((q15_t) (acc0 >> 17), pDst++);
151 #else
152 real = *pSrc++;
153 imag = *pSrc++;
154 acc0 = ((q31_t) real * real);
155 acc1 = ((q31_t) imag * imag);
156
157 /* store result in 2.14 format in destination buffer. */
158 arm_sqrt_q15((q15_t) (((q63_t) acc0 + acc1) >> 17), pDst++);
159
160 real = *pSrc++;
161 imag = *pSrc++;
162 acc0 = ((q31_t) real * real);
163 acc1 = ((q31_t) imag * imag);
164 arm_sqrt_q15((q15_t) (((q63_t) acc0 + acc1) >> 17), pDst++);
165
166 real = *pSrc++;
167 imag = *pSrc++;
168 acc0 = ((q31_t) real * real);
169 acc1 = ((q31_t) imag * imag);
170 arm_sqrt_q15((q15_t) (((q63_t) acc0 + acc1) >> 17), pDst++);
171
172 real = *pSrc++;
173 imag = *pSrc++;
174 acc0 = ((q31_t) real * real);
175 acc1 = ((q31_t) imag * imag);
176 arm_sqrt_q15((q15_t) (((q63_t) acc0 + acc1) >> 17), pDst++);
177 #endif /* #if defined (ARM_MATH_DSP) */
178
179 /* Decrement loop counter */
180 blkCnt--;
181 }
182
183 /* Loop unrolling: Compute remaining outputs */
184 blkCnt = numSamples % 0x4U;
185
186 #else
187
188 /* Initialize blkCnt with number of samples */
189 blkCnt = numSamples;
190
191 #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
192
193 while (blkCnt > 0U)
194 {
195 /* C[0] = sqrt(A[0] * A[0] + A[1] * A[1]) */
196
197 #if defined (ARM_MATH_DSP)
198 in = read_q15x2_ia (&pSrc);
199 acc0 = __SMUAD(in, in);
200
201 /* store result in 2.14 format in destination buffer. */
202 arm_sqrt_q15((q15_t) (acc0 >> 17), pDst++);
203 #else
204 real = *pSrc++;
205 imag = *pSrc++;
206 acc0 = ((q31_t) real * real);
207 acc1 = ((q31_t) imag * imag);
208
209 /* store result in 2.14 format in destination buffer. */
210 arm_sqrt_q15((q15_t) (((q63_t) acc0 + acc1) >> 17), pDst++);
211 #endif
212
213 /* Decrement loop counter */
214 blkCnt--;
215 }
216
217 }
218 #endif /* defined(ARM_MATH_MVEI) */
219
220 /**
221 @} end of cmplx_mag group
222 */
223