1 /* ----------------------------------------------------------------------
2 * Project: CMSIS DSP Library
3 * Title: arm_cmplx_mag_squared_f16.c
4 * Description: Floating-point complex magnitude squared
5 *
6 * $Date: 23 April 2021
7 * $Revision: V1.9.0
8 *
9 * Target Processor: Cortex-M and Cortex-A cores
10 * -------------------------------------------------------------------- */
11 /*
12 * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13 *
14 * SPDX-License-Identifier: Apache-2.0
15 *
16 * Licensed under the Apache License, Version 2.0 (the License); you may
17 * not use this file except in compliance with the License.
18 * You may obtain a copy of the License at
19 *
20 * www.apache.org/licenses/LICENSE-2.0
21 *
22 * Unless required by applicable law or agreed to in writing, software
23 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25 * See the License for the specific language governing permissions and
26 * limitations under the License.
27 */
28
29 #include "dsp/complex_math_functions_f16.h"
30
31 #if defined(ARM_FLOAT16_SUPPORTED)
32
33 /**
34 @ingroup groupCmplxMath
35 */
36
37
38 /**
39 @addtogroup cmplx_mag_squared
40 @{
41 */
42
43 /**
44 @brief Floating-point complex magnitude squared.
45 @param[in] pSrc points to input vector
46 @param[out] pDst points to output vector
47 @param[in] numSamples number of samples in each vector
48 @return none
49 */
50
51 #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
52
arm_cmplx_mag_squared_f16(const float16_t * pSrc,float16_t * pDst,uint32_t numSamples)53 void arm_cmplx_mag_squared_f16(
54 const float16_t * pSrc,
55 float16_t * pDst,
56 uint32_t numSamples)
57 {
58 int32_t blockSize = numSamples; /* loop counters */
59 f16x8x2_t vecSrc;
60 f16x8_t sum;
61
62 /* Compute 4 complex samples at a time */
63 while (blockSize > 0)
64 {
65 mve_pred16_t p = vctp16q(blockSize);
66 vecSrc = vld2q(pSrc);
67 sum = vmulq_m(vuninitializedq_f16(),vecSrc.val[0], vecSrc.val[0],p);
68 sum = vfmaq_m(sum, vecSrc.val[1], vecSrc.val[1],p);
69 vstrhq_p_f16(pDst, sum,p);
70
71 pSrc += 16;
72 pDst += 8;
73
74 /*
75 * Decrement the blockSize loop counter
76 */
77 blockSize-= 8;
78 }
79
80 }
81
82 #else
arm_cmplx_mag_squared_f16(const float16_t * pSrc,float16_t * pDst,uint32_t numSamples)83 void arm_cmplx_mag_squared_f16(
84 const float16_t * pSrc,
85 float16_t * pDst,
86 uint32_t numSamples)
87 {
88 uint32_t blkCnt; /* Loop counter */
89 _Float16 real, imag; /* Temporary input variables */
90
91 #if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
92
93 /* Loop unrolling: Compute 4 outputs at a time */
94 blkCnt = numSamples >> 2U;
95
96 while (blkCnt > 0U)
97 {
98 /* C[0] = (A[0] * A[0] + A[1] * A[1]) */
99
100 real = *pSrc++;
101 imag = *pSrc++;
102 *pDst++ = (real * real) + (imag * imag);
103
104 real = *pSrc++;
105 imag = *pSrc++;
106 *pDst++ = (real * real) + (imag * imag);
107
108 real = *pSrc++;
109 imag = *pSrc++;
110 *pDst++ = (real * real) + (imag * imag);
111
112 real = *pSrc++;
113 imag = *pSrc++;
114 *pDst++ = (real * real) + (imag * imag);
115
116 /* Decrement loop counter */
117 blkCnt--;
118 }
119
120 /* Loop unrolling: Compute remaining outputs */
121 blkCnt = numSamples % 0x4U;
122
123 #else
124
125 /* Initialize blkCnt with number of samples */
126 blkCnt = numSamples;
127
128 #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
129
130 while (blkCnt > 0U)
131 {
132 /* C[0] = (A[0] * A[0] + A[1] * A[1]) */
133
134 real = *pSrc++;
135 imag = *pSrc++;
136
137 /* store result in destination buffer. */
138 *pDst++ = (real * real) + (imag * imag);
139
140 /* Decrement loop counter */
141 blkCnt--;
142 }
143
144 }
145 #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
146
147 /**
148 @} end of cmplx_mag_squared group
149 */
150
151 #endif /* #if defined(ARM_FLOAT16_SUPPORTED) */
152