1 /* ----------------------------------------------------------------------
2 * Project: CMSIS DSP Library
3 * Title: arm_cmplx_mag_squared_f16.c
4 * Description: Floating-point complex magnitude squared
5 *
6 * $Date: 23 April 2021
7 * $Revision: V1.9.0
8 *
9 * Target Processor: Cortex-M and Cortex-A cores
10 * -------------------------------------------------------------------- */
11 /*
12 * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13 *
14 * SPDX-License-Identifier: Apache-2.0
15 *
16 * Licensed under the Apache License, Version 2.0 (the License); you may
17 * not use this file except in compliance with the License.
18 * You may obtain a copy of the License at
19 *
20 * www.apache.org/licenses/LICENSE-2.0
21 *
22 * Unless required by applicable law or agreed to in writing, software
23 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25 * See the License for the specific language governing permissions and
26 * limitations under the License.
27 */
28
29 #include "dsp/complex_math_functions_f16.h"
30
31 #if defined(ARM_FLOAT16_SUPPORTED)
32
33 /**
34 @ingroup groupCmplxMath
35 */
36
37
38 /**
39 @addtogroup cmplx_mag_squared
40 @{
41 */
42
43 /**
44 @brief Floating-point complex magnitude squared.
45 @param[in] pSrc points to input vector
46 @param[out] pDst points to output vector
47 @param[in] numSamples number of samples in each vector
48 */
49
50 #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
51
arm_cmplx_mag_squared_f16(const float16_t * pSrc,float16_t * pDst,uint32_t numSamples)52 ARM_DSP_ATTRIBUTE void arm_cmplx_mag_squared_f16(
53 const float16_t * pSrc,
54 float16_t * pDst,
55 uint32_t numSamples)
56 {
57 int32_t blockSize = numSamples; /* loop counters */
58 f16x8x2_t vecSrc;
59 f16x8_t sum;
60
61 /* Compute 4 complex samples at a time */
62 while (blockSize > 0)
63 {
64 mve_pred16_t p = vctp16q(blockSize);
65 vecSrc = vld2q(pSrc);
66 sum = vmulq_m(vuninitializedq_f16(),vecSrc.val[0], vecSrc.val[0],p);
67 sum = vfmaq_m(sum, vecSrc.val[1], vecSrc.val[1],p);
68 vstrhq_p_f16(pDst, sum,p);
69
70 pSrc += 16;
71 pDst += 8;
72
73 /*
74 * Decrement the blockSize loop counter
75 */
76 blockSize-= 8;
77 }
78
79 }
80
81 #else
arm_cmplx_mag_squared_f16(const float16_t * pSrc,float16_t * pDst,uint32_t numSamples)82 ARM_DSP_ATTRIBUTE void arm_cmplx_mag_squared_f16(
83 const float16_t * pSrc,
84 float16_t * pDst,
85 uint32_t numSamples)
86 {
87 uint32_t blkCnt; /* Loop counter */
88 _Float16 real, imag; /* Temporary input variables */
89
90 #if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
91
92 /* Loop unrolling: Compute 4 outputs at a time */
93 blkCnt = numSamples >> 2U;
94
95 while (blkCnt > 0U)
96 {
97 /* C[0] = (A[0] * A[0] + A[1] * A[1]) */
98
99 real = *pSrc++;
100 imag = *pSrc++;
101 *pDst++ = (real * real) + (imag * imag);
102
103 real = *pSrc++;
104 imag = *pSrc++;
105 *pDst++ = (real * real) + (imag * imag);
106
107 real = *pSrc++;
108 imag = *pSrc++;
109 *pDst++ = (real * real) + (imag * imag);
110
111 real = *pSrc++;
112 imag = *pSrc++;
113 *pDst++ = (real * real) + (imag * imag);
114
115 /* Decrement loop counter */
116 blkCnt--;
117 }
118
119 /* Loop unrolling: Compute remaining outputs */
120 blkCnt = numSamples % 0x4U;
121
122 #else
123
124 /* Initialize blkCnt with number of samples */
125 blkCnt = numSamples;
126
127 #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
128
129 while (blkCnt > 0U)
130 {
131 /* C[0] = (A[0] * A[0] + A[1] * A[1]) */
132
133 real = *pSrc++;
134 imag = *pSrc++;
135
136 /* store result in destination buffer. */
137 *pDst++ = (real * real) + (imag * imag);
138
139 /* Decrement loop counter */
140 blkCnt--;
141 }
142
143 }
144 #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
145
146 /**
147 @} end of cmplx_mag_squared group
148 */
149
150 #endif /* #if defined(ARM_FLOAT16_SUPPORTED) */
151