1 /* ----------------------------------------------------------------------
2  * Project:      CMSIS DSP Library
3  * Title:        arm_cmplx_mult_real_f16.c
4  * Description:  Floating-point complex by real multiplication
5  *
6  * $Date:        23 April 2021
7  * $Revision:    V1.9.0
8  *
9  * Target Processor: Cortex-M and Cortex-A cores
10  * -------------------------------------------------------------------- */
11 /*
12  * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13  *
14  * SPDX-License-Identifier: Apache-2.0
15  *
16  * Licensed under the Apache License, Version 2.0 (the License); you may
17  * not use this file except in compliance with the License.
18  * You may obtain a copy of the License at
19  *
20  * www.apache.org/licenses/LICENSE-2.0
21  *
22  * Unless required by applicable law or agreed to in writing, software
23  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25  * See the License for the specific language governing permissions and
26  * limitations under the License.
27  */
28 
29 #include "dsp/complex_math_functions_f16.h"
30 
31 #if defined(ARM_FLOAT16_SUPPORTED)
32 
33 /**
34   @ingroup groupCmplxMath
35  */
36 
37 
38 /**
39   @addtogroup CmplxByRealMult
40   @{
41  */
42 
43 /**
44   @brief         Floating-point complex-by-real multiplication.
45   @param[in]     pSrcCmplx   points to complex input vector
46   @param[in]     pSrcReal    points to real input vector
47   @param[out]    pCmplxDst   points to complex output vector
48   @param[in]     numSamples  number of samples in each vector
49  */
50 
51 #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
52 
arm_cmplx_mult_real_f16(const float16_t * pSrcCmplx,const float16_t * pSrcReal,float16_t * pCmplxDst,uint32_t numSamples)53 void arm_cmplx_mult_real_f16(
54   const float16_t * pSrcCmplx,
55   const float16_t * pSrcReal,
56         float16_t * pCmplxDst,
57         uint32_t numSamples)
58 {
59     static const uint16_t stride_cmplx_x_real_16[8] = {
60         0, 0, 1, 1, 2, 2, 3, 3
61         };
62     uint32_t blockSizeC = numSamples * CMPLX_DIM;   /* loop counters */
63     uint32_t blkCnt;
64     f16x8_t rVec;
65     f16x8_t cmplxVec;
66     f16x8_t dstVec;
67     uint16x8_t strideVec;
68 
69 
70     /* stride vector for pairs of real generation */
71     strideVec = vld1q(stride_cmplx_x_real_16);
72 
73     /* Compute 4 complex outputs at a time */
74     blkCnt = blockSizeC >> 3;
75     while (blkCnt > 0U)
76     {
77         cmplxVec = vld1q(pSrcCmplx);
78         rVec = vldrhq_gather_shifted_offset_f16(pSrcReal, strideVec);
79         dstVec = vmulq(cmplxVec, rVec);
80         vst1q(pCmplxDst, dstVec);
81 
82         pSrcReal += 4;
83         pSrcCmplx += 8;
84         pCmplxDst += 8;
85         blkCnt--;
86     }
87 
88     blkCnt = blockSizeC & 7;
89     if (blkCnt > 0U) {
90         mve_pred16_t p0 = vctp16q(blkCnt);
91 
92         cmplxVec = vld1q(pSrcCmplx);
93         rVec = vldrhq_gather_shifted_offset_f16(pSrcReal, strideVec);
94         dstVec = vmulq(cmplxVec, rVec);
95         vstrhq_p_f16(pCmplxDst, dstVec, p0);
96     }
97 }
98 
99 #else
arm_cmplx_mult_real_f16(const float16_t * pSrcCmplx,const float16_t * pSrcReal,float16_t * pCmplxDst,uint32_t numSamples)100 void arm_cmplx_mult_real_f16(
101   const float16_t * pSrcCmplx,
102   const float16_t * pSrcReal,
103         float16_t * pCmplxDst,
104         uint32_t numSamples)
105 {
106         uint32_t blkCnt;                               /* Loop counter */
107         float16_t in;                                  /* Temporary variable */
108 
109 #if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
110 
111   /* Loop unrolling: Compute 4 outputs at a time */
112   blkCnt = numSamples >> 2U;
113 
114   while (blkCnt > 0U)
115   {
116     /* C[2 * i    ] = A[2 * i    ] * B[i]. */
117     /* C[2 * i + 1] = A[2 * i + 1] * B[i]. */
118 
119     in = *pSrcReal++;
120     /* store result in destination buffer. */
121     *pCmplxDst++ = (_Float16)*pSrcCmplx++ * (_Float16)in;
122     *pCmplxDst++ = (_Float16)*pSrcCmplx++ * (_Float16)in;
123 
124     in = *pSrcReal++;
125     *pCmplxDst++ = (_Float16)*pSrcCmplx++ * (_Float16)in;
126     *pCmplxDst++ = (_Float16)*pSrcCmplx++ * (_Float16)in;
127 
128     in = *pSrcReal++;
129     *pCmplxDst++ = (_Float16)*pSrcCmplx++ * (_Float16)in;
130     *pCmplxDst++ = (_Float16)*pSrcCmplx++ * (_Float16)in;
131 
132     in = *pSrcReal++;
133     *pCmplxDst++ = (_Float16)*pSrcCmplx++ * (_Float16)in;
134     *pCmplxDst++ = (_Float16)*pSrcCmplx++ * (_Float16)in;
135 
136     /* Decrement loop counter */
137     blkCnt--;
138   }
139 
140   /* Loop unrolling: Compute remaining outputs */
141   blkCnt = numSamples % 0x4U;
142 
143 #else
144 
145   /* Initialize blkCnt with number of samples */
146   blkCnt = numSamples;
147 
148 #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
149 
150   while (blkCnt > 0U)
151   {
152     /* C[2 * i    ] = A[2 * i    ] * B[i]. */
153     /* C[2 * i + 1] = A[2 * i + 1] * B[i]. */
154 
155     in = *pSrcReal++;
156     /* store result in destination buffer. */
157     *pCmplxDst++ = (_Float16)*pSrcCmplx++ * (_Float16)in;
158     *pCmplxDst++ = (_Float16)*pSrcCmplx++ * (_Float16)in;
159 
160     /* Decrement loop counter */
161     blkCnt--;
162   }
163 
164 }
165 #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
166 
167 /**
168   @} end of CmplxByRealMult group
169  */
170 
171 #endif /* #if defined(ARM_FLOAT16_SUPPORTED) */
172