1 /* ----------------------------------------------------------------------
2  * Project:      CMSIS DSP Library
3  * Title:        arm_cmplx_mag_squared_f16.c
4  * Description:  Floating-point complex magnitude squared
5  *
6  * $Date:        23 April 2021
7  * $Revision:    V1.9.0
8  *
9  * Target Processor: Cortex-M and Cortex-A cores
10  * -------------------------------------------------------------------- */
11 /*
12  * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13  *
14  * SPDX-License-Identifier: Apache-2.0
15  *
16  * Licensed under the Apache License, Version 2.0 (the License); you may
17  * not use this file except in compliance with the License.
18  * You may obtain a copy of the License at
19  *
20  * www.apache.org/licenses/LICENSE-2.0
21  *
22  * Unless required by applicable law or agreed to in writing, software
23  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25  * See the License for the specific language governing permissions and
26  * limitations under the License.
27  */
28 
29 #include "dsp/complex_math_functions_f16.h"
30 
31 #if defined(ARM_FLOAT16_SUPPORTED)
32 
33 /**
34   @ingroup groupCmplxMath
35  */
36 
37 
38 /**
39   @addtogroup cmplx_mag_squared
40   @{
41  */
42 
43 /**
44   @brief         Floating-point complex magnitude squared.
45   @param[in]     pSrc        points to input vector
46   @param[out]    pDst        points to output vector
47   @param[in]     numSamples  number of samples in each vector
48   @return        none
49  */
50 
51 #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
52 
arm_cmplx_mag_squared_f16(const float16_t * pSrc,float16_t * pDst,uint32_t numSamples)53 void arm_cmplx_mag_squared_f16(
54   const float16_t * pSrc,
55         float16_t * pDst,
56         uint32_t numSamples)
57 {
58     int32_t blockSize = numSamples;  /* loop counters */
59     f16x8x2_t vecSrc;
60     f16x8_t sum;
61 
62     /* Compute 4 complex samples at a time */
63     while (blockSize > 0)
64     {
65         mve_pred16_t p = vctp16q(blockSize);
66         vecSrc = vld2q(pSrc);
67         sum = vmulq_m(vuninitializedq_f16(),vecSrc.val[0], vecSrc.val[0],p);
68         sum = vfmaq_m(sum, vecSrc.val[1], vecSrc.val[1],p);
69         vstrhq_p_f16(pDst, sum,p);
70 
71         pSrc += 16;
72         pDst += 8;
73 
74         /*
75          * Decrement the blockSize loop counter
76          */
77         blockSize-= 8;
78     }
79 
80 }
81 
82 #else
arm_cmplx_mag_squared_f16(const float16_t * pSrc,float16_t * pDst,uint32_t numSamples)83 void arm_cmplx_mag_squared_f16(
84   const float16_t * pSrc,
85         float16_t * pDst,
86         uint32_t numSamples)
87 {
88         uint32_t blkCnt;                               /* Loop counter */
89         _Float16 real, imag;                          /* Temporary input variables */
90 
91 #if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
92 
93   /* Loop unrolling: Compute 4 outputs at a time */
94   blkCnt = numSamples >> 2U;
95 
96   while (blkCnt > 0U)
97   {
98     /* C[0] = (A[0] * A[0] + A[1] * A[1]) */
99 
100     real = *pSrc++;
101     imag = *pSrc++;
102     *pDst++ = (real * real) + (imag * imag);
103 
104     real = *pSrc++;
105     imag = *pSrc++;
106     *pDst++ = (real * real) + (imag * imag);
107 
108     real = *pSrc++;
109     imag = *pSrc++;
110     *pDst++ = (real * real) + (imag * imag);
111 
112     real = *pSrc++;
113     imag = *pSrc++;
114     *pDst++ = (real * real) + (imag * imag);
115 
116     /* Decrement loop counter */
117     blkCnt--;
118   }
119 
120   /* Loop unrolling: Compute remaining outputs */
121   blkCnt = numSamples % 0x4U;
122 
123 #else
124 
125   /* Initialize blkCnt with number of samples */
126   blkCnt = numSamples;
127 
128 #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
129 
130   while (blkCnt > 0U)
131   {
132     /* C[0] = (A[0] * A[0] + A[1] * A[1]) */
133 
134     real = *pSrc++;
135     imag = *pSrc++;
136 
137     /* store result in destination buffer. */
138     *pDst++ = (real * real) + (imag * imag);
139 
140     /* Decrement loop counter */
141     blkCnt--;
142   }
143 
144 }
145 #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
146 
147 /**
148   @} end of cmplx_mag_squared group
149  */
150 
151 #endif /* #if defined(ARM_FLOAT16_SUPPORTED) */
152