1 /* ----------------------------------------------------------------------
2 * Project: CMSIS DSP Library
3 * Title: arm_cmplx_mult_real_f16.c
4 * Description: Floating-point complex by real multiplication
5 *
6 * $Date: 23 April 2021
7 * $Revision: V1.9.0
8 *
9 * Target Processor: Cortex-M and Cortex-A cores
10 * -------------------------------------------------------------------- */
11 /*
12 * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13 *
14 * SPDX-License-Identifier: Apache-2.0
15 *
16 * Licensed under the Apache License, Version 2.0 (the License); you may
17 * not use this file except in compliance with the License.
18 * You may obtain a copy of the License at
19 *
20 * www.apache.org/licenses/LICENSE-2.0
21 *
22 * Unless required by applicable law or agreed to in writing, software
23 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25 * See the License for the specific language governing permissions and
26 * limitations under the License.
27 */
28
29 #include "dsp/complex_math_functions_f16.h"
30
31 #if defined(ARM_FLOAT16_SUPPORTED)
32
33 /**
34 @ingroup groupCmplxMath
35 */
36
37
38 /**
39 @addtogroup CmplxByRealMult
40 @{
41 */
42
43 /**
44 @brief Floating-point complex-by-real multiplication.
45 @param[in] pSrcCmplx points to complex input vector
46 @param[in] pSrcReal points to real input vector
47 @param[out] pCmplxDst points to complex output vector
48 @param[in] numSamples number of samples in each vector
49 */
50
51 #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
52
arm_cmplx_mult_real_f16(const float16_t * pSrcCmplx,const float16_t * pSrcReal,float16_t * pCmplxDst,uint32_t numSamples)53 void arm_cmplx_mult_real_f16(
54 const float16_t * pSrcCmplx,
55 const float16_t * pSrcReal,
56 float16_t * pCmplxDst,
57 uint32_t numSamples)
58 {
59 static const uint16_t stride_cmplx_x_real_16[8] = {
60 0, 0, 1, 1, 2, 2, 3, 3
61 };
62 uint32_t blockSizeC = numSamples * CMPLX_DIM; /* loop counters */
63 uint32_t blkCnt;
64 f16x8_t rVec;
65 f16x8_t cmplxVec;
66 f16x8_t dstVec;
67 uint16x8_t strideVec;
68
69
70 /* stride vector for pairs of real generation */
71 strideVec = vld1q(stride_cmplx_x_real_16);
72
73 /* Compute 4 complex outputs at a time */
74 blkCnt = blockSizeC >> 3;
75 while (blkCnt > 0U)
76 {
77 cmplxVec = vld1q(pSrcCmplx);
78 rVec = vldrhq_gather_shifted_offset_f16(pSrcReal, strideVec);
79 dstVec = vmulq(cmplxVec, rVec);
80 vst1q(pCmplxDst, dstVec);
81
82 pSrcReal += 4;
83 pSrcCmplx += 8;
84 pCmplxDst += 8;
85 blkCnt--;
86 }
87
88 blkCnt = blockSizeC & 7;
89 if (blkCnt > 0U) {
90 mve_pred16_t p0 = vctp16q(blkCnt);
91
92 cmplxVec = vld1q(pSrcCmplx);
93 rVec = vldrhq_gather_shifted_offset_f16(pSrcReal, strideVec);
94 dstVec = vmulq(cmplxVec, rVec);
95 vstrhq_p_f16(pCmplxDst, dstVec, p0);
96 }
97 }
98
99 #else
arm_cmplx_mult_real_f16(const float16_t * pSrcCmplx,const float16_t * pSrcReal,float16_t * pCmplxDst,uint32_t numSamples)100 void arm_cmplx_mult_real_f16(
101 const float16_t * pSrcCmplx,
102 const float16_t * pSrcReal,
103 float16_t * pCmplxDst,
104 uint32_t numSamples)
105 {
106 uint32_t blkCnt; /* Loop counter */
107 float16_t in; /* Temporary variable */
108
109 #if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
110
111 /* Loop unrolling: Compute 4 outputs at a time */
112 blkCnt = numSamples >> 2U;
113
114 while (blkCnt > 0U)
115 {
116 /* C[2 * i ] = A[2 * i ] * B[i]. */
117 /* C[2 * i + 1] = A[2 * i + 1] * B[i]. */
118
119 in = *pSrcReal++;
120 /* store result in destination buffer. */
121 *pCmplxDst++ = (_Float16)*pSrcCmplx++ * (_Float16)in;
122 *pCmplxDst++ = (_Float16)*pSrcCmplx++ * (_Float16)in;
123
124 in = *pSrcReal++;
125 *pCmplxDst++ = (_Float16)*pSrcCmplx++ * (_Float16)in;
126 *pCmplxDst++ = (_Float16)*pSrcCmplx++ * (_Float16)in;
127
128 in = *pSrcReal++;
129 *pCmplxDst++ = (_Float16)*pSrcCmplx++ * (_Float16)in;
130 *pCmplxDst++ = (_Float16)*pSrcCmplx++ * (_Float16)in;
131
132 in = *pSrcReal++;
133 *pCmplxDst++ = (_Float16)*pSrcCmplx++ * (_Float16)in;
134 *pCmplxDst++ = (_Float16)*pSrcCmplx++ * (_Float16)in;
135
136 /* Decrement loop counter */
137 blkCnt--;
138 }
139
140 /* Loop unrolling: Compute remaining outputs */
141 blkCnt = numSamples % 0x4U;
142
143 #else
144
145 /* Initialize blkCnt with number of samples */
146 blkCnt = numSamples;
147
148 #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
149
150 while (blkCnt > 0U)
151 {
152 /* C[2 * i ] = A[2 * i ] * B[i]. */
153 /* C[2 * i + 1] = A[2 * i + 1] * B[i]. */
154
155 in = *pSrcReal++;
156 /* store result in destination buffer. */
157 *pCmplxDst++ = (_Float16)*pSrcCmplx++ * (_Float16)in;
158 *pCmplxDst++ = (_Float16)*pSrcCmplx++ * (_Float16)in;
159
160 /* Decrement loop counter */
161 blkCnt--;
162 }
163
164 }
165 #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
166
167 /**
168 @} end of CmplxByRealMult group
169 */
170
171 #endif /* #if defined(ARM_FLOAT16_SUPPORTED) */
172