1 /* ----------------------------------------------------------------------
2  * Project:      CMSIS DSP Library
3  * Title:        arm_cmplx_mag_squared_f16.c
4  * Description:  Floating-point complex magnitude squared
5  *
6  * $Date:        23 April 2021
7  * $Revision:    V1.9.0
8  *
9  * Target Processor: Cortex-M and Cortex-A cores
10  * -------------------------------------------------------------------- */
11 /*
12  * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13  *
14  * SPDX-License-Identifier: Apache-2.0
15  *
16  * Licensed under the Apache License, Version 2.0 (the License); you may
17  * not use this file except in compliance with the License.
18  * You may obtain a copy of the License at
19  *
20  * www.apache.org/licenses/LICENSE-2.0
21  *
22  * Unless required by applicable law or agreed to in writing, software
23  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25  * See the License for the specific language governing permissions and
26  * limitations under the License.
27  */
28 
29 #include "dsp/complex_math_functions_f16.h"
30 
31 #if defined(ARM_FLOAT16_SUPPORTED)
32 
33 /**
34   @ingroup groupCmplxMath
35  */
36 
37 
38 /**
39   @addtogroup cmplx_mag_squared
40   @{
41  */
42 
43 /**
44   @brief         Floating-point complex magnitude squared.
45   @param[in]     pSrc        points to input vector
46   @param[out]    pDst        points to output vector
47   @param[in]     numSamples  number of samples in each vector
48  */
49 
50 #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
51 
arm_cmplx_mag_squared_f16(const float16_t * pSrc,float16_t * pDst,uint32_t numSamples)52 ARM_DSP_ATTRIBUTE void arm_cmplx_mag_squared_f16(
53   const float16_t * pSrc,
54         float16_t * pDst,
55         uint32_t numSamples)
56 {
57     int32_t blockSize = numSamples;  /* loop counters */
58     f16x8x2_t vecSrc;
59     f16x8_t sum;
60 
61     /* Compute 4 complex samples at a time */
62     while (blockSize > 0)
63     {
64         mve_pred16_t p = vctp16q(blockSize);
65         vecSrc = vld2q(pSrc);
66         sum = vmulq_m(vuninitializedq_f16(),vecSrc.val[0], vecSrc.val[0],p);
67         sum = vfmaq_m(sum, vecSrc.val[1], vecSrc.val[1],p);
68         vstrhq_p_f16(pDst, sum,p);
69 
70         pSrc += 16;
71         pDst += 8;
72 
73         /*
74          * Decrement the blockSize loop counter
75          */
76         blockSize-= 8;
77     }
78 
79 }
80 
81 #else
arm_cmplx_mag_squared_f16(const float16_t * pSrc,float16_t * pDst,uint32_t numSamples)82 ARM_DSP_ATTRIBUTE void arm_cmplx_mag_squared_f16(
83   const float16_t * pSrc,
84         float16_t * pDst,
85         uint32_t numSamples)
86 {
87         uint32_t blkCnt;                               /* Loop counter */
88         _Float16 real, imag;                          /* Temporary input variables */
89 
90 #if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
91 
92   /* Loop unrolling: Compute 4 outputs at a time */
93   blkCnt = numSamples >> 2U;
94 
95   while (blkCnt > 0U)
96   {
97     /* C[0] = (A[0] * A[0] + A[1] * A[1]) */
98 
99     real = *pSrc++;
100     imag = *pSrc++;
101     *pDst++ = (real * real) + (imag * imag);
102 
103     real = *pSrc++;
104     imag = *pSrc++;
105     *pDst++ = (real * real) + (imag * imag);
106 
107     real = *pSrc++;
108     imag = *pSrc++;
109     *pDst++ = (real * real) + (imag * imag);
110 
111     real = *pSrc++;
112     imag = *pSrc++;
113     *pDst++ = (real * real) + (imag * imag);
114 
115     /* Decrement loop counter */
116     blkCnt--;
117   }
118 
119   /* Loop unrolling: Compute remaining outputs */
120   blkCnt = numSamples % 0x4U;
121 
122 #else
123 
124   /* Initialize blkCnt with number of samples */
125   blkCnt = numSamples;
126 
127 #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
128 
129   while (blkCnt > 0U)
130   {
131     /* C[0] = (A[0] * A[0] + A[1] * A[1]) */
132 
133     real = *pSrc++;
134     imag = *pSrc++;
135 
136     /* store result in destination buffer. */
137     *pDst++ = (real * real) + (imag * imag);
138 
139     /* Decrement loop counter */
140     blkCnt--;
141   }
142 
143 }
144 #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
145 
146 /**
147   @} end of cmplx_mag_squared group
148  */
149 
150 #endif /* #if defined(ARM_FLOAT16_SUPPORTED) */
151