1 /* ----------------------------------------------------------------------
2  * Project:      CMSIS DSP Library
3  * Title:        arm_cmplx_conj_f32.c
4  * Description:  Floating-point complex conjugate
5  *
6  * $Date:        23 April 2021
7  * $Revision:    V1.9.0
8  *
9  * Target Processor: Cortex-M and Cortex-A cores
10  * -------------------------------------------------------------------- */
11 /*
12  * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
13  *
14  * SPDX-License-Identifier: Apache-2.0
15  *
16  * Licensed under the Apache License, Version 2.0 (the License); you may
17  * not use this file except in compliance with the License.
18  * You may obtain a copy of the License at
19  *
20  * www.apache.org/licenses/LICENSE-2.0
21  *
22  * Unless required by applicable law or agreed to in writing, software
23  * distributed under the License is distributed on an AS IS BASIS, WITHOUT
24  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25  * See the License for the specific language governing permissions and
26  * limitations under the License.
27  */
28 
29 #include "dsp/complex_math_functions.h"
30 
31 /**
32   @ingroup groupCmplxMath
33  */
34 
35 /**
36   @defgroup cmplx_conj Complex Conjugate
37 
38   Conjugates the elements of a complex data vector.
39 
40   The <code>pSrc</code> points to the source data and
41   <code>pDst</code> points to the destination data where the result should be written.
42   <code>numSamples</code> specifies the number of complex samples
43   and the data in each array is stored in an interleaved fashion
44   (real, imag, real, imag, ...).
45   Each array has a total of <code>2*numSamples</code> values.
46 
47   The underlying algorithm is used:
48   <pre>
49   for (n = 0; n < numSamples; n++) {
50       pDst[(2*n)  ] =  pSrc[(2*n)  ];    // real part
51       pDst[(2*n)+1] = -pSrc[(2*n)+1];    // imag part
52   }
53   </pre>
54 
55   There are separate functions for floating-point, Q15, and Q31 data types.
56  */
57 
58 /**
59   @addtogroup cmplx_conj
60   @{
61  */
62 
63 /**
64   @brief         Floating-point complex conjugate.
65   @param[in]     pSrc        points to the input vector
66   @param[out]    pDst        points to the output vector
67   @param[in]     numSamples  number of samples in each vector
68   @return        none
69  */
70 
71 #if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
72 
arm_cmplx_conj_f32(const float32_t * pSrc,float32_t * pDst,uint32_t numSamples)73 void arm_cmplx_conj_f32(
74     const float32_t * pSrc,
75     float32_t * pDst,
76     uint32_t numSamples)
77 {
78     static const float32_t cmplx_conj_sign[4] = { 1.0f, -1.0f, 1.0f, -1.0f };
79     uint32_t blockSize = numSamples * CMPLX_DIM;   /* loop counters */
80     uint32_t blkCnt;
81     f32x4_t vecSrc;
82     f32x4_t vecSign;
83 
84     /*
85      * load sign vector
86      */
87     vecSign = *(f32x4_t *) cmplx_conj_sign;
88 
89     /* Compute 4 real samples at a time */
90     blkCnt = blockSize >> 2U;
91 
92     while (blkCnt > 0U)
93     {
94         vecSrc = vld1q(pSrc);
95         vst1q(pDst,vmulq(vecSrc, vecSign));
96         /*
97          * Decrement the blkCnt loop counter
98          * Advance vector source and destination pointers
99          */
100         pSrc += 4;
101         pDst += 4;
102         blkCnt--;
103     }
104 
105      /* Tail */
106     blkCnt = (blockSize & 0x3) >> 1;
107 
108     while (blkCnt > 0U)
109     {
110       /* C[0] + jC[1] = A[0]+ j(-1)A[1] */
111 
112       /* Calculate Complex Conjugate and store result in destination buffer. */
113       *pDst++ =  *pSrc++;
114       *pDst++ = -*pSrc++;
115 
116       /* Decrement loop counter */
117       blkCnt--;
118     }
119 
120 }
121 
122 #else
arm_cmplx_conj_f32(const float32_t * pSrc,float32_t * pDst,uint32_t numSamples)123 void arm_cmplx_conj_f32(
124   const float32_t * pSrc,
125         float32_t * pDst,
126         uint32_t numSamples)
127 {
128         uint32_t blkCnt;                               /* Loop counter */
129 
130 #if defined(ARM_MATH_NEON) && !defined(ARM_MATH_AUTOVECTORIZE)
131    float32x4_t zero;
132    float32x4x2_t vec;
133 
134    zero = vdupq_n_f32(0.0f);
135 
136    /* Compute 4 outputs at a time */
137    blkCnt = numSamples >> 2U;
138 
139    while (blkCnt > 0U)
140    {
141      /* C[0]+jC[1] = A[0]+(-1)*jA[1] */
142      /* Calculate Complex Conjugate and then store the results in the destination buffer. */
143      vec = vld2q_f32(pSrc);
144      vec.val[1] = vsubq_f32(zero,vec.val[1]);
145      vst2q_f32(pDst,vec);
146 
147      /* Increment pointers */
148      pSrc += 8;
149      pDst += 8;
150 
151      /* Decrement the loop counter */
152      blkCnt--;
153    }
154 
155    /* Tail */
156    blkCnt = numSamples & 0x3;
157 
158 #else
159 #if defined (ARM_MATH_LOOPUNROLL) && !defined(ARM_MATH_AUTOVECTORIZE)
160 
161   /* Loop unrolling: Compute 4 outputs at a time */
162   blkCnt = numSamples >> 2U;
163 
164   while (blkCnt > 0U)
165   {
166     /* C[0] + jC[1] = A[0]+ j(-1)A[1] */
167 
168     /* Calculate Complex Conjugate and store result in destination buffer. */
169     *pDst++ =  *pSrc++;
170     *pDst++ = -*pSrc++;
171 
172     *pDst++ =  *pSrc++;
173     *pDst++ = -*pSrc++;
174 
175     *pDst++ =  *pSrc++;
176     *pDst++ = -*pSrc++;
177 
178     *pDst++ =  *pSrc++;
179     *pDst++ = -*pSrc++;
180 
181     /* Decrement loop counter */
182     blkCnt--;
183   }
184 
185   /* Loop unrolling: Compute remaining outputs */
186   blkCnt = numSamples % 0x4U;
187 
188 #else
189 
190   /* Initialize blkCnt with number of samples */
191   blkCnt = numSamples;
192 
193 #endif /* #if defined (ARM_MATH_LOOPUNROLL) */
194 #endif /* #if defined (ARM_MATH_NEON) */
195 
196   while (blkCnt > 0U)
197   {
198     /* C[0] + jC[1] = A[0]+ j(-1)A[1] */
199 
200     /* Calculate Complex Conjugate and store result in destination buffer. */
201     *pDst++ =  *pSrc++;
202     *pDst++ = -*pSrc++;
203 
204     /* Decrement loop counter */
205     blkCnt--;
206   }
207 
208 }
209 #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
210 
211 /**
212   @} end of cmplx_conj group
213  */
214